1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-08-08 14:22:09 +03:00

MCOL-4685: Eliminate some irrelevant settings (uncompressed data and extents per file).

This patch:
1. Removes the option to declare uncompressed columns (set columnstore_compression_type = 0).
2. Ignores [COMMENT '[compression=0] option at table or column level (no error messages, just disregard).
3. Removes the option to set more than 2 extents per file (ExtentsPreSegmentFile).
4. Updates rebuildEM tool to support up to 10 dictionary extent per dictionary segment file.
5. Adds check for `DBRootStorageType` for rebuildEM tool.
6. Renamed rebuildEM to mcsRebuildEM.
This commit is contained in:
Denis Khalikov
2021-05-05 15:02:08 +03:00
parent ddf19a9cc3
commit 606194e6e4
23 changed files with 170 additions and 143 deletions

View File

@@ -132,7 +132,7 @@ int32_t EMReBuilder::collectExtent(const std::string& fullFileName)
auto colDataType = compressor.getColDataType(fileHeader);
auto colWidth = compressor.getColumnWidth(fileHeader);
auto blockCount = compressor.getBlockCount(fileHeader);
auto lbid = compressor.getLBID0(fileHeader);
auto lbidCount = compressor.getLBIDCount(fileHeader);
if (colDataType == execplan::CalpontSystemCatalog::UNDEFINED)
{
@@ -146,7 +146,6 @@ int32_t EMReBuilder::collectExtent(const std::string& fullFileName)
auto isDict = isDictFile(colDataType, colWidth);
if (isDict)
colWidth = 8;
uint64_t hwm = 0;
if (doVerbose())
{
@@ -154,48 +153,55 @@ int32_t EMReBuilder::collectExtent(const std::string& fullFileName)
std::cout << "Block count: " << blockCount << std::endl;
}
rc =
searchHWMInSegmentFile(oid, getDBRoot(), partition, segment,
colDataType, colWidth, blockCount, isDict, hwm);
uint64_t hwm = 0;
rc = searchHWMInSegmentFile(oid, getDBRoot(), partition, segment, colDataType, colWidth,
blockCount, isDict, hwm);
if (rc != 0)
{
return rc;
}
const uint32_t extentMaxBlockCount =
getEM().getExtentRows() * colWidth / BLOCK_SIZE;
// We found multiple extents per one segment file.
if (hwm >= extentMaxBlockCount)
{
auto lbid = compressor.getLBID1(fileHeader);
FileId fileId(oid, partition, segment, colWidth, colDataType, lbid,
hwm, isDict);
extentMap.push_back(fileId);
// Update HWM.
hwm = extentMaxBlockCount - 1;
if (doVerbose())
{
std::cout << "Found multiple extents per segment file "
<< std::endl;
std::cout << "FileId is collected " << fileId << std::endl;
}
}
if (doVerbose())
{
std::cout << "HWM is: " << hwm << std::endl;
}
FileId fileId(oid, partition, segment, colWidth, colDataType, lbid, hwm,
isDict);
extentMap.push_back(fileId);
if (doVerbose())
const uint32_t extentMaxBlockCount = getEM().getExtentRows() * colWidth / BLOCK_SIZE;
// We found multiple extents per one segment file.
if (hwm >= extentMaxBlockCount)
{
std::cout << "FileId is collected " << fileId << std::endl;
for (uint32_t lbidIndex = 0; lbidIndex < lbidCount - 1; ++lbidIndex)
{
auto lbid = compressor.getLBIDByIndex(fileHeader, lbidIndex);
FileId fileId(oid, partition, segment, colWidth, colDataType, lbid, /*hwm*/ 0, isDict);
extentMap.push_back(fileId);
}
// Last one has an actual HWM.
auto lbid = compressor.getLBIDByIndex(fileHeader, lbidCount - 1);
FileId fileId(oid, partition, segment, colWidth, colDataType, lbid, hwm, isDict);
extentMap.push_back(fileId);
if (doVerbose())
{
std::cout << "Found multiple extents per segment file "
<< std::endl;
std::cout << "FileId is collected " << fileId << std::endl;
}
}
else
{
// One extent per segment file.
auto lbid = compressor.getLBIDByIndex(fileHeader, 0);
FileId fileId(oid, partition, segment, colWidth, colDataType, lbid, hwm, isDict);
extentMap.push_back(fileId);
if (doVerbose())
{
std::cout << "FileId is collected " << fileId << std::endl;
}
}
return 0;
}