You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-08-08 14:22:09 +03:00
MCOL-641 Work of Ivan Zuniga on basic read and write support for Binary16
This commit is contained in:
committed by
Roman Nozdrin
parent
d943beb445
commit
32f6167067
@@ -91,7 +91,8 @@ CPPUNIT_TEST(setUp);
|
||||
// Extent & dict related testing
|
||||
CPPUNIT_TEST( testExtensionWOPrealloc );
|
||||
CPPUNIT_TEST( testDictExtensionWOPrealloc );
|
||||
// Semaphore related testing
|
||||
CPPUNIT_TEST( testExtentCrWOPreallocBin );
|
||||
// Semaphore related testing
|
||||
// CPPUNIT_TEST( testSem );
|
||||
|
||||
// Log related testing
|
||||
@@ -1542,7 +1543,180 @@ public:
|
||||
|
||||
}
|
||||
*/
|
||||
template<uint8_t W> struct binary;
|
||||
typedef binary<16> binary16;
|
||||
typedef binary<32> binary32;
|
||||
template<uint8_t W>
|
||||
struct binary {
|
||||
unsigned char data[W]; // May be ok for empty value ?
|
||||
void operator=(uint64_t v) {*((uint64_t *) data) = v; memset(data + 8, 0, W - 8);}
|
||||
inline uint8_t& operator[](const int index) {return *((uint8_t*) (data + index));}
|
||||
inline uint64_t& uint64(const int index) {return *((uint64_t*) (data + (index << 3)));}
|
||||
};
|
||||
|
||||
void testExtentCrWOPreallocBin() {
|
||||
IDBDataFile* pFile = NULL;
|
||||
ColumnOpCompress1 fileOp;
|
||||
BlockOp blockOp;
|
||||
char fileName[20];
|
||||
int rc;
|
||||
char hdrs[ IDBCompressInterface::HDR_BUF_LEN * 2 ];
|
||||
int dbRoot = 1;
|
||||
|
||||
idbdatafile::IDBPolicy::init(true, false, "", 0);
|
||||
// Set to versionbuffer to satisfy IDBPolicy::getType
|
||||
strcpy(fileName, "versionbuffer");
|
||||
fileOp.compressionType(1);
|
||||
|
||||
fileOp.deleteFile(fileName);
|
||||
CPPUNIT_ASSERT(fileOp.exists(fileName) == false);
|
||||
|
||||
//binary16 emptyVal = blockOp.getEmptyBinRowValue( execplan::CalpontSystemCatalog::BINARY, 16 );
|
||||
uint64_t emptyVal = blockOp.getEmptyRowValue(execplan::CalpontSystemCatalog::BIGINT, 8);
|
||||
int width = blockOp.getCorrectRowWidth(execplan::CalpontSystemCatalog::BINARY, sizeof (binary16));
|
||||
int nBlocks = INITIAL_EXTENT_ROWS_TO_DISK / BYTE_PER_BLOCK * width;
|
||||
|
||||
// createFile runs IDBDataFile::open + initAbrevCompColumnExtent
|
||||
// under the hood
|
||||
// bigint column file
|
||||
rc = fileOp.createFile(fileName,
|
||||
nBlocks, // number of blocks
|
||||
emptyVal, // NULL value
|
||||
width, // width
|
||||
dbRoot); // dbroot
|
||||
CPPUNIT_ASSERT(rc == NO_ERROR);
|
||||
|
||||
fileOp.closeFile(pFile);
|
||||
|
||||
// open created compressed file and check its header
|
||||
pFile = IDBDataFile::open(IDBPolicy::getType(fileName,
|
||||
IDBPolicy::WRITEENG), fileName, "rb", dbRoot);
|
||||
|
||||
rc = pFile->seek(0, 0);
|
||||
CPPUNIT_ASSERT(rc == NO_ERROR);
|
||||
rc = fileOp.readHeaders(pFile, hdrs);
|
||||
CPPUNIT_ASSERT(rc == NO_ERROR);
|
||||
// Couldn't use IDBDataFile->close() here w/o excplicit cast
|
||||
fileOp.closeFile(pFile);
|
||||
|
||||
// Extend the extent up to 64MB
|
||||
pFile = IDBDataFile::open(IDBPolicy::getType(fileName,
|
||||
IDBPolicy::WRITEENG), fileName, "rb", dbRoot);
|
||||
|
||||
// disable disk space preallocation
|
||||
idbdatafile::IDBPolicy::setPreallocSpace(dbRoot);
|
||||
rc = fileOp.initColumnExtent(pFile,
|
||||
dbRoot,
|
||||
BYTE_PER_BLOCK - nBlocks, // number of blocks
|
||||
emptyVal,
|
||||
width,
|
||||
false, // use existing file
|
||||
false, // don't expand; new extent
|
||||
false, // add full (not abbreviated) extent
|
||||
true); // optimize extention
|
||||
|
||||
CPPUNIT_ASSERT(rc == NO_ERROR);
|
||||
fileOp.closeFile(pFile);
|
||||
// file has been extended
|
||||
cout << endl << "file has been extended";
|
||||
|
||||
// write up to INITIAL_EXTENT_ROWS_TO_DISK + 1 rows into the file
|
||||
|
||||
Column curCol;
|
||||
binary16 valArray[INITIAL_EXTENT_ROWS_TO_DISK + 1];
|
||||
RID rowIdArray[INITIAL_EXTENT_ROWS_TO_DISK + 1];
|
||||
// This is the magic for the stub in FileOp::oid2FileName
|
||||
int fid = 42;
|
||||
|
||||
for (uint64_t it = 0; it <= INITIAL_EXTENT_ROWS_TO_DISK; it++) {
|
||||
rowIdArray[it] = it;
|
||||
valArray[it].uint64(0) = it + 3;
|
||||
valArray[it].uint64(1) = it + 5;
|
||||
}
|
||||
|
||||
fileOp.initColumn(curCol);
|
||||
fileOp.setColParam(curCol,
|
||||
1, // column number
|
||||
width,
|
||||
execplan::CalpontSystemCatalog::BINARY,
|
||||
WriteEngine::WR_BINARY,
|
||||
fid,
|
||||
1); //compression type
|
||||
|
||||
string segFile;
|
||||
// openColumnFile uses DBRM's oid server but we
|
||||
// have to get the chunks' pointers from the header.
|
||||
curCol.dataFile.pFile = fileOp.openFile(
|
||||
curCol,
|
||||
dbRoot,
|
||||
0,
|
||||
0,
|
||||
segFile,
|
||||
false,
|
||||
"r+b",
|
||||
BYTE_PER_BLOCK * BYTE_PER_BLOCK); // buffer size is 64MB
|
||||
|
||||
CPPUNIT_ASSERT(rc == NO_ERROR);
|
||||
|
||||
rc = fileOp.writeRow(curCol, INITIAL_EXTENT_ROWS_TO_DISK + 1,
|
||||
(RID*) rowIdArray, valArray);
|
||||
CPPUNIT_ASSERT_EQUAL(NO_ERROR, rc); // I prefer this way as it prints values
|
||||
|
||||
// flush and close the file used for reading
|
||||
fileOp.clearColumn(curCol);
|
||||
|
||||
std::map<uint32_t, uint32_t> oids;
|
||||
oids[fid] = fid;
|
||||
|
||||
// flush changed chunks from the Manager
|
||||
int rtn1 = fileOp.chunkManager()->flushChunks(rc, oids);
|
||||
|
||||
// read back the file
|
||||
cout << endl << "Read file ";
|
||||
DataBlock block;
|
||||
binary16* bin16 = (binary16*) block.data;
|
||||
|
||||
fileOp.initColumn(curCol);
|
||||
fileOp.setColParam(curCol,
|
||||
1, // column number
|
||||
width,
|
||||
execplan::CalpontSystemCatalog::BINARY,
|
||||
WriteEngine::WR_BINARY,
|
||||
fid,
|
||||
1); //compression type
|
||||
|
||||
curCol.dataFile.pFile = fileOp.openFile(
|
||||
curCol,
|
||||
dbRoot,
|
||||
0,
|
||||
0,
|
||||
segFile,
|
||||
false,
|
||||
"r+b",
|
||||
BYTE_PER_BLOCK * BYTE_PER_BLOCK); // buffer size is 64MB
|
||||
|
||||
CPPUNIT_ASSERT_EQUAL(NO_ERROR, rc);
|
||||
|
||||
int blocks = fileOp.blocksInFile(curCol.dataFile.pFile);
|
||||
|
||||
for (int b = 0; b < blocks; b++) {
|
||||
rc = fileOp.chunkManager()->readBlock(curCol.dataFile.pFile, block.data, b); // ColumnOpCompress1.readBlock() is protected so ...
|
||||
CPPUNIT_ASSERT_EQUAL(NO_ERROR, rc);
|
||||
//cout << endl << bin16[0].uint64(0);
|
||||
CPPUNIT_ASSERT_EQUAL(b * 512UL + 3, bin16[0].uint64(0)); // Checking just first value of each block as it was written before
|
||||
CPPUNIT_ASSERT_EQUAL(b * 512UL + 5, bin16[0].uint64(1));
|
||||
}
|
||||
|
||||
fileOp.clearColumn(curCol);
|
||||
fileOp.closeFile(curCol.dataFile.pFile); // Seems done by clearColumn, but anyways...
|
||||
|
||||
cout << endl << "Delete file ";
|
||||
|
||||
fileOp.deleteFile(fileName);
|
||||
CPPUNIT_ASSERT(fileOp.exists(fileName) == false);
|
||||
cout << endl << "End of test";
|
||||
}
|
||||
|
||||
void testCleanup()
|
||||
{
|
||||
// shutdown
|
||||
|
@@ -159,6 +159,10 @@ uint64_t BlockOp::getEmptyRowValue(
|
||||
case CalpontSystemCatalog::UBIGINT :
|
||||
emptyVal = joblist::UBIGINTEMPTYROW;
|
||||
break;
|
||||
|
||||
case CalpontSystemCatalog::BINARY :
|
||||
emptyVal = joblist::BINARYEMPTYROW;
|
||||
break;
|
||||
|
||||
case CalpontSystemCatalog::CHAR :
|
||||
case CalpontSystemCatalog::VARCHAR :
|
||||
@@ -267,9 +271,11 @@ void BlockOp::setEmptyBuf(
|
||||
// Optimize buffer initialization by constructing and copying in an array
|
||||
// instead of individual values. This reduces the number of calls to
|
||||
// memcpy().
|
||||
for (int j = 0; j < ARRAY_COUNT; j++)
|
||||
|
||||
int w = width > 8 ? 8: width;
|
||||
for(uint8_t* pos = emptyValArray, * end = pos + NBYTES_IN_ARRAY; pos < end; pos += w) //FIXME for no loop
|
||||
{
|
||||
memcpy(emptyValArray + (j * width), &emptyVal, width);
|
||||
memcpy(pos, &emptyVal, w);
|
||||
}
|
||||
|
||||
int countFull128 = (bufSize / width) / ARRAY_COUNT;
|
||||
|
@@ -434,6 +434,11 @@ void Convertor::convertColType(CalpontSystemCatalog::ColDataType dataType,
|
||||
case CalpontSystemCatalog::UBIGINT:
|
||||
internalType = WriteEngine::WR_ULONGLONG;
|
||||
break;
|
||||
|
||||
// Map BINARY to WR_BINARY
|
||||
case CalpontSystemCatalog::BINARY:
|
||||
internalType = WriteEngine::WR_BINARY;
|
||||
break;
|
||||
|
||||
default:
|
||||
internalType = WriteEngine::WR_CHAR;
|
||||
@@ -682,6 +687,11 @@ void Convertor::convertColType(ColStruct* curStruct)
|
||||
case CalpontSystemCatalog::UBIGINT:
|
||||
*internalType = WriteEngine::WR_ULONGLONG;
|
||||
break;
|
||||
|
||||
// Map BINARY to WR_BINARY
|
||||
case CalpontSystemCatalog::BINARY:
|
||||
*internalType = WriteEngine::WR_BINARY;
|
||||
break;
|
||||
|
||||
default:
|
||||
*internalType = WriteEngine::WR_CHAR;
|
||||
@@ -772,7 +782,11 @@ int Convertor::getCorrectRowWidth(CalpontSystemCatalog::ColDataType dataType, in
|
||||
case CalpontSystemCatalog::TIMESTAMP:
|
||||
newWidth = 8;
|
||||
break;
|
||||
|
||||
|
||||
case CalpontSystemCatalog::BINARY:
|
||||
newWidth = width;
|
||||
break;
|
||||
|
||||
case CalpontSystemCatalog::CHAR:
|
||||
case CalpontSystemCatalog::VARCHAR:
|
||||
case CalpontSystemCatalog::VARBINARY: // treat same as varchar for now
|
||||
|
@@ -108,9 +108,10 @@ enum ColType /** @brief Column type enumeration*/
|
||||
WR_USHORT = 14, /** @brief Unsigned Short */
|
||||
WR_UINT = 15, /** @brief Unsigned Int */
|
||||
WR_ULONGLONG = 16, /** @brief Unsigned Long long*/
|
||||
WR_TEXT = 17, /** @brief TEXT */
|
||||
WR_TEXT = 17, /** @brief TEXT */
|
||||
WR_MEDINT = 18, /** @brief Medium Int */
|
||||
WR_UMEDINT = 19 /** @brief Unsigned Medium Int */
|
||||
WR_UMEDINT = 19, /** @brief Unsigned Medium Int */
|
||||
WR_BINARY = 20 /** @brief BINARY */
|
||||
};
|
||||
|
||||
// Describes relation of field to column for a bulk load
|
||||
|
Reference in New Issue
Block a user