1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-08-08 14:22:09 +03:00

MCOL-641 Work of Ivan Zuniga on basic read and write support for Binary16

This commit is contained in:
Gagan Goel
2019-10-24 14:01:47 -04:00
committed by Roman Nozdrin
parent d943beb445
commit 32f6167067
48 changed files with 1114 additions and 75 deletions

View File

@@ -91,7 +91,8 @@ CPPUNIT_TEST(setUp);
// Extent & dict related testing
CPPUNIT_TEST( testExtensionWOPrealloc );
CPPUNIT_TEST( testDictExtensionWOPrealloc );
// Semaphore related testing
CPPUNIT_TEST( testExtentCrWOPreallocBin );
// Semaphore related testing
// CPPUNIT_TEST( testSem );
// Log related testing
@@ -1542,7 +1543,180 @@ public:
}
*/
template<uint8_t W> struct binary;
typedef binary<16> binary16;
typedef binary<32> binary32;
template<uint8_t W>
struct binary {
unsigned char data[W]; // May be ok for empty value ?
void operator=(uint64_t v) {*((uint64_t *) data) = v; memset(data + 8, 0, W - 8);}
inline uint8_t& operator[](const int index) {return *((uint8_t*) (data + index));}
inline uint64_t& uint64(const int index) {return *((uint64_t*) (data + (index << 3)));}
};
void testExtentCrWOPreallocBin() {
IDBDataFile* pFile = NULL;
ColumnOpCompress1 fileOp;
BlockOp blockOp;
char fileName[20];
int rc;
char hdrs[ IDBCompressInterface::HDR_BUF_LEN * 2 ];
int dbRoot = 1;
idbdatafile::IDBPolicy::init(true, false, "", 0);
// Set to versionbuffer to satisfy IDBPolicy::getType
strcpy(fileName, "versionbuffer");
fileOp.compressionType(1);
fileOp.deleteFile(fileName);
CPPUNIT_ASSERT(fileOp.exists(fileName) == false);
//binary16 emptyVal = blockOp.getEmptyBinRowValue( execplan::CalpontSystemCatalog::BINARY, 16 );
uint64_t emptyVal = blockOp.getEmptyRowValue(execplan::CalpontSystemCatalog::BIGINT, 8);
int width = blockOp.getCorrectRowWidth(execplan::CalpontSystemCatalog::BINARY, sizeof (binary16));
int nBlocks = INITIAL_EXTENT_ROWS_TO_DISK / BYTE_PER_BLOCK * width;
// createFile runs IDBDataFile::open + initAbrevCompColumnExtent
// under the hood
// bigint column file
rc = fileOp.createFile(fileName,
nBlocks, // number of blocks
emptyVal, // NULL value
width, // width
dbRoot); // dbroot
CPPUNIT_ASSERT(rc == NO_ERROR);
fileOp.closeFile(pFile);
// open created compressed file and check its header
pFile = IDBDataFile::open(IDBPolicy::getType(fileName,
IDBPolicy::WRITEENG), fileName, "rb", dbRoot);
rc = pFile->seek(0, 0);
CPPUNIT_ASSERT(rc == NO_ERROR);
rc = fileOp.readHeaders(pFile, hdrs);
CPPUNIT_ASSERT(rc == NO_ERROR);
// Couldn't use IDBDataFile->close() here w/o excplicit cast
fileOp.closeFile(pFile);
// Extend the extent up to 64MB
pFile = IDBDataFile::open(IDBPolicy::getType(fileName,
IDBPolicy::WRITEENG), fileName, "rb", dbRoot);
// disable disk space preallocation
idbdatafile::IDBPolicy::setPreallocSpace(dbRoot);
rc = fileOp.initColumnExtent(pFile,
dbRoot,
BYTE_PER_BLOCK - nBlocks, // number of blocks
emptyVal,
width,
false, // use existing file
false, // don't expand; new extent
false, // add full (not abbreviated) extent
true); // optimize extention
CPPUNIT_ASSERT(rc == NO_ERROR);
fileOp.closeFile(pFile);
// file has been extended
cout << endl << "file has been extended";
// write up to INITIAL_EXTENT_ROWS_TO_DISK + 1 rows into the file
Column curCol;
binary16 valArray[INITIAL_EXTENT_ROWS_TO_DISK + 1];
RID rowIdArray[INITIAL_EXTENT_ROWS_TO_DISK + 1];
// This is the magic for the stub in FileOp::oid2FileName
int fid = 42;
for (uint64_t it = 0; it <= INITIAL_EXTENT_ROWS_TO_DISK; it++) {
rowIdArray[it] = it;
valArray[it].uint64(0) = it + 3;
valArray[it].uint64(1) = it + 5;
}
fileOp.initColumn(curCol);
fileOp.setColParam(curCol,
1, // column number
width,
execplan::CalpontSystemCatalog::BINARY,
WriteEngine::WR_BINARY,
fid,
1); //compression type
string segFile;
// openColumnFile uses DBRM's oid server but we
// have to get the chunks' pointers from the header.
curCol.dataFile.pFile = fileOp.openFile(
curCol,
dbRoot,
0,
0,
segFile,
false,
"r+b",
BYTE_PER_BLOCK * BYTE_PER_BLOCK); // buffer size is 64MB
CPPUNIT_ASSERT(rc == NO_ERROR);
rc = fileOp.writeRow(curCol, INITIAL_EXTENT_ROWS_TO_DISK + 1,
(RID*) rowIdArray, valArray);
CPPUNIT_ASSERT_EQUAL(NO_ERROR, rc); // I prefer this way as it prints values
// flush and close the file used for reading
fileOp.clearColumn(curCol);
std::map<uint32_t, uint32_t> oids;
oids[fid] = fid;
// flush changed chunks from the Manager
int rtn1 = fileOp.chunkManager()->flushChunks(rc, oids);
// read back the file
cout << endl << "Read file ";
DataBlock block;
binary16* bin16 = (binary16*) block.data;
fileOp.initColumn(curCol);
fileOp.setColParam(curCol,
1, // column number
width,
execplan::CalpontSystemCatalog::BINARY,
WriteEngine::WR_BINARY,
fid,
1); //compression type
curCol.dataFile.pFile = fileOp.openFile(
curCol,
dbRoot,
0,
0,
segFile,
false,
"r+b",
BYTE_PER_BLOCK * BYTE_PER_BLOCK); // buffer size is 64MB
CPPUNIT_ASSERT_EQUAL(NO_ERROR, rc);
int blocks = fileOp.blocksInFile(curCol.dataFile.pFile);
for (int b = 0; b < blocks; b++) {
rc = fileOp.chunkManager()->readBlock(curCol.dataFile.pFile, block.data, b); // ColumnOpCompress1.readBlock() is protected so ...
CPPUNIT_ASSERT_EQUAL(NO_ERROR, rc);
//cout << endl << bin16[0].uint64(0);
CPPUNIT_ASSERT_EQUAL(b * 512UL + 3, bin16[0].uint64(0)); // Checking just first value of each block as it was written before
CPPUNIT_ASSERT_EQUAL(b * 512UL + 5, bin16[0].uint64(1));
}
fileOp.clearColumn(curCol);
fileOp.closeFile(curCol.dataFile.pFile); // Seems done by clearColumn, but anyways...
cout << endl << "Delete file ";
fileOp.deleteFile(fileName);
CPPUNIT_ASSERT(fileOp.exists(fileName) == false);
cout << endl << "End of test";
}
void testCleanup()
{
// shutdown

View File

@@ -159,6 +159,10 @@ uint64_t BlockOp::getEmptyRowValue(
case CalpontSystemCatalog::UBIGINT :
emptyVal = joblist::UBIGINTEMPTYROW;
break;
case CalpontSystemCatalog::BINARY :
emptyVal = joblist::BINARYEMPTYROW;
break;
case CalpontSystemCatalog::CHAR :
case CalpontSystemCatalog::VARCHAR :
@@ -267,9 +271,11 @@ void BlockOp::setEmptyBuf(
// Optimize buffer initialization by constructing and copying in an array
// instead of individual values. This reduces the number of calls to
// memcpy().
for (int j = 0; j < ARRAY_COUNT; j++)
int w = width > 8 ? 8: width;
for(uint8_t* pos = emptyValArray, * end = pos + NBYTES_IN_ARRAY; pos < end; pos += w) //FIXME for no loop
{
memcpy(emptyValArray + (j * width), &emptyVal, width);
memcpy(pos, &emptyVal, w);
}
int countFull128 = (bufSize / width) / ARRAY_COUNT;

View File

@@ -434,6 +434,11 @@ void Convertor::convertColType(CalpontSystemCatalog::ColDataType dataType,
case CalpontSystemCatalog::UBIGINT:
internalType = WriteEngine::WR_ULONGLONG;
break;
// Map BINARY to WR_BINARY
case CalpontSystemCatalog::BINARY:
internalType = WriteEngine::WR_BINARY;
break;
default:
internalType = WriteEngine::WR_CHAR;
@@ -682,6 +687,11 @@ void Convertor::convertColType(ColStruct* curStruct)
case CalpontSystemCatalog::UBIGINT:
*internalType = WriteEngine::WR_ULONGLONG;
break;
// Map BINARY to WR_BINARY
case CalpontSystemCatalog::BINARY:
*internalType = WriteEngine::WR_BINARY;
break;
default:
*internalType = WriteEngine::WR_CHAR;
@@ -772,7 +782,11 @@ int Convertor::getCorrectRowWidth(CalpontSystemCatalog::ColDataType dataType, in
case CalpontSystemCatalog::TIMESTAMP:
newWidth = 8;
break;
case CalpontSystemCatalog::BINARY:
newWidth = width;
break;
case CalpontSystemCatalog::CHAR:
case CalpontSystemCatalog::VARCHAR:
case CalpontSystemCatalog::VARBINARY: // treat same as varchar for now

View File

@@ -108,9 +108,10 @@ enum ColType /** @brief Column type enumeration*/
WR_USHORT = 14, /** @brief Unsigned Short */
WR_UINT = 15, /** @brief Unsigned Int */
WR_ULONGLONG = 16, /** @brief Unsigned Long long*/
WR_TEXT = 17, /** @brief TEXT */
WR_TEXT = 17, /** @brief TEXT */
WR_MEDINT = 18, /** @brief Medium Int */
WR_UMEDINT = 19 /** @brief Unsigned Medium Int */
WR_UMEDINT = 19, /** @brief Unsigned Medium Int */
WR_BINARY = 20 /** @brief BINARY */
};
// Describes relation of field to column for a bulk load