1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-30 19:23:07 +03:00

MCOL-5021 Add an option in Columnstore.xml, fastdelete (disabled

by default), which when enabled, indiscriminately invalidates all
column extents and performs the actual DELETE only on the AUX
column. The trade-off with this approach would now be that the
first SELECT for certain query patterns (those containing a WHERE
predicate) after the DELETE operation will slow down as the
invalidated column extent would need to be scanned again to set
the min/max values.
This commit is contained in:
Gagan Goel
2022-05-26 15:13:19 -04:00
parent 2280b1dd25
commit c8b6b154bf
6 changed files with 96 additions and 32 deletions

View File

@ -137,6 +137,7 @@
<BulkRollbackDir>/var/lib/columnstore/data1/systemFiles/bulkRollback</BulkRollbackDir>
<MaxFileSystemDiskUsagePct>98</MaxFileSystemDiskUsagePct>
<CompressedPaddingBlocks>1</CompressedPaddingBlocks> <!-- Number of blocks used to pad compressed chunks -->
<FastDelete>false</FastDelete>
</WriteEngine>
<DBRM_Controller>
<NumWorkers>1</NumWorkers>

View File

@ -1655,9 +1655,8 @@ void vectorizedFiltering(NewColRequestHeader* in, ColResultHeader* out, const T*
for (uint16_t i = 0; i < iterNumberAux; ++i)
{
dataVecAux = simdDataLoadTemplate<VTAux, SimdWrapperTypeAux, HAS_INPUT_RIDS, uint8_t>(simdProcessorAux, blockAux,
origBlockAux, ridArray, i)
.v;
dataVecAux = simdDataLoad<VTAux, SimdWrapperTypeAux, HAS_INPUT_RIDS, uint8_t>(simdProcessorAux, blockAux,
origBlockAux, ridArray, i).v;
nonEmptyMaskAux[i] = simdProcessorAux.nullEmptyCmpNe(dataVecAux, emptyFilterArgVecAux);
blockAux += VECTOR_SIZE_AUX;
ridArray += VECTOR_SIZE_AUX;

View File

@ -136,7 +136,7 @@ TEST_F(ColumnScanFilterTest, ColumnScan1Byte)
in->NVALS = 0;
pp.setBlockPtr((int*)readBlockFromLiteralArray("col1block.cdf", block));
pp.columnScanAndFilter<IntegralType>(in, out);
pp.columnScanAndFilter<IntegralType>(in, out, false);
results = getValuesArrayPosition<UT>(getFirstValueArrayPosition(out), 0);
EXPECT_EQ(out->NVALS, 8160);
@ -160,7 +160,7 @@ TEST_F(ColumnScanFilterTest, ColumnScan1ByteVectorized)
in->NVALS = 0;
pp.setBlockPtr((int*)readBlockFromLiteralArray("col1block.cdf", block));
pp.columnScanAndFilter<IntegralType>(in, out);
pp.columnScanAndFilter<IntegralType>(in, out, false);
results = getValuesArrayPosition<UT>(getFirstValueArrayPosition(out), 0);
EXPECT_EQ(out->NVALS, 8160);
@ -196,7 +196,7 @@ TEST_F(ColumnScanFilterTest, ColumnScan2Bytes)
in->NVALS = 0;
pp.setBlockPtr((int*)readBlockFromLiteralArray("col2block.cdf", block));
pp.columnScanAndFilter<IntegralType>(in, out);
pp.columnScanAndFilter<IntegralType>(in, out, false);
results = getValuesArrayPosition<UT>(getFirstValueArrayPosition(out), 0);
EXPECT_EQ(out->NVALS, 4096);
@ -223,7 +223,7 @@ TEST_F(ColumnScanFilterTest, ColumnScan4Bytes)
in->NVALS = 0;
pp.setBlockPtr((int*)readBlockFromLiteralArray("col4block.cdf", block));
pp.columnScanAndFilter<IntegralType>(in, out);
pp.columnScanAndFilter<IntegralType>(in, out, false);
results = getValuesArrayPosition<UT>(getFirstValueArrayPosition(out), 0);
EXPECT_EQ(out->NVALS, 2048);
@ -251,7 +251,7 @@ TEST_F(ColumnScanFilterTest, ColumnScan8Bytes)
in->NVALS = 0;
pp.setBlockPtr((int*)readBlockFromLiteralArray("col8block.cdf", block));
pp.columnScanAndFilter<IntegralType>(in, out);
pp.columnScanAndFilter<IntegralType>(in, out, false);
results = getValuesArrayPosition<UT>(getFirstValueArrayPosition(out), 0);
ASSERT_EQ(out->NVALS, 1024);
@ -287,7 +287,7 @@ TEST_F(ColumnScanFilterTest, ColumnScan2Bytes1EqFilter)
&input[sizeof(NewColRequestHeader) + sizeof(ColArgs) + in->colType.DataSize]);
pp.setBlockPtr((int*)readBlockFromLiteralArray("col2block.cdf", block));
pp.columnScanAndFilter<IntegralType>(in, out);
pp.columnScanAndFilter<IntegralType>(in, out, false);
results = getValuesArrayPosition<UT>(getFirstValueArrayPosition(out), 0);
ASSERT_EQ(out->NVALS, 51);
@ -315,7 +315,7 @@ TEST_F(ColumnScanFilterTest, ColumnScan1ByteUsingRID)
rids[1] = 17;
pp.setBlockPtr((int*)readBlockFromLiteralArray("col1block.cdf", block));
pp.columnScanAndFilter<IntegralType>(in, out);
pp.columnScanAndFilter<IntegralType>(in, out, false);
results = getValuesArrayPosition<UT>(getFirstValueArrayPosition(out), 0);
ASSERT_EQ(out->NVALS, 2);
@ -344,7 +344,7 @@ TEST_F(ColumnScanFilterTest, ColumnScan1ByteUsingMultipleRIDs)
rids[126] = 8189;
pp.setBlockPtr((int*)readBlockFromLiteralArray("col1block.cdf", block));
pp.columnScanAndFilter<IntegralType>(in, out);
pp.columnScanAndFilter<IntegralType>(in, out, false);
results = getValuesArrayPosition<UT>(getFirstValueArrayPosition(out), 0);
ASSERT_EQ(out->NVALS, expectedNVALS);
@ -376,7 +376,7 @@ TEST_F(ColumnScanFilterTest, ColumnScan4Bytes1EqFilter)
&input[sizeof(NewColRequestHeader) + sizeof(ColArgs) + in->colType.DataSize]);
pp.setBlockPtr((int*)readBlockFromLiteralArray("col4block.cdf", block));
pp.columnScanAndFilter<IntegralType>(in, out);
pp.columnScanAndFilter<IntegralType>(in, out, false);
results = getValuesArrayPosition<UT>(getFirstValueArrayPosition(out), 0);
ASSERT_EQ(out->NVALS, 8);
@ -404,7 +404,7 @@ TEST_F(ColumnScanFilterTest, ColumnScan4BytesUsingMultipleRIDs)
rids[126] = 1020;
pp.setBlockPtr((int*)readBlockFromLiteralArray("col4block.cdf", block));
pp.columnScanAndFilter<IntegralType>(in, out);
pp.columnScanAndFilter<IntegralType>(in, out, false);
results = getValuesArrayPosition<UT>(getFirstValueArrayPosition(out), 0);
@ -438,7 +438,7 @@ TEST_F(ColumnScanFilterTest, ColumnScan4Bytes2Filters)
memcpy(args->val, &tmp, in->colType.DataSize);
pp.setBlockPtr((int*)readBlockFromLiteralArray("col4block.cdf", block));
pp.columnScanAndFilter<IntegralType>(in, out);
pp.columnScanAndFilter<IntegralType>(in, out, false);
results = getValuesArrayPosition<UT>(getFirstValueArrayPosition(out), 0);
ASSERT_EQ(out->NVALS, 9);
@ -477,7 +477,7 @@ TEST_F(ColumnScanFilterTest, ColumnScan8Bytes1EqFilter)
&input[sizeof(NewColRequestHeader) + sizeof(ColArgs) + in->colType.DataSize]);
pp.setBlockPtr((int*)readBlockFromLiteralArray("col8block.cdf", block));
pp.columnScanAndFilter<IntegralType>(in, out);
pp.columnScanAndFilter<IntegralType>(in, out, false);
results = getValuesArrayPosition<UT>(getFirstValueArrayPosition(out), 0);
ASSERT_EQ(out->NVALS, 11);
@ -511,7 +511,7 @@ TEST_F(ColumnScanFilterTest, ColumnScan8BytesUsingMultipleRIDs)
rids[126] = 1020;
pp.setBlockPtr((int*)readBlockFromLiteralArray("col8block.cdf", block));
pp.columnScanAndFilter<IntegralType>(in, out);
pp.columnScanAndFilter<IntegralType>(in, out, false);
results = getValuesArrayPosition<UT>(getFirstValueArrayPosition(out), 0);
@ -547,7 +547,7 @@ TEST_F(ColumnScanFilterTest, ColumnScan8Bytes2CompFilters)
memcpy(args->val, &tmp, in->colType.DataSize);
pp.setBlockPtr((int*)readBlockFromLiteralArray("col8block.cdf", block));
pp.columnScanAndFilter<IntegralType>(in, out);
pp.columnScanAndFilter<IntegralType>(in, out, false);
results = getValuesArrayPosition<UT>(getFirstValueArrayPosition(out), 0);
ASSERT_EQ(out->NVALS, 33);
@ -587,7 +587,7 @@ TEST_F(ColumnScanFilterTest, ColumnScan8Bytes2EqFilters)
memcpy(args->val, &tmp, in->colType.DataSize);
pp.setBlockPtr((int*)readBlockFromLiteralArray("col8block.cdf", block));
pp.columnScanAndFilter<IntegralType>(in, out);
pp.columnScanAndFilter<IntegralType>(in, out, false);
results = getValuesArrayPosition<UT>(getFirstValueArrayPosition(out), 0);
@ -631,7 +631,7 @@ TEST_F(ColumnScanFilterTest, ColumnScan8Bytes2EqFiltersRID)
rids[1] = 100;
pp.setBlockPtr((int*)readBlockFromLiteralArray("col8block.cdf", block));
pp.columnScanAndFilter<IntegralType>(in, out);
pp.columnScanAndFilter<IntegralType>(in, out, false);
results = getValuesArrayPosition<UT>(getFirstValueArrayPosition(out), 0);
ASSERT_EQ(out->NVALS, 1);
@ -662,7 +662,7 @@ TEST_F(ColumnScanFilterTest, ColumnScan8Bytes2FiltersRIDOutputRid)
memcpy(args->val, &tmp, in->colType.DataSize);
pp.setBlockPtr((int*)readBlockFromLiteralArray("col8block.cdf", block));
pp.columnScanAndFilter<IntegralType>(in, out);
pp.columnScanAndFilter<IntegralType>(in, out, false);
results = getValuesArrayPosition<RIDType>(getFirstRIDArrayPosition(out), 0);
ASSERT_EQ(out->NVALS, 33);
@ -701,7 +701,7 @@ TEST_F(ColumnScanFilterTest, ColumnScan8Bytes2EqFiltersRIDOutputBoth)
memcpy(args->val, &tmp, in->colType.DataSize);
pp.setBlockPtr((int*)readBlockFromLiteralArray("col8block.cdf", block));
pp.columnScanAndFilter<IntegralType>(in, out);
pp.columnScanAndFilter<IntegralType>(in, out, false);
ASSERT_EQ(out->NVALS, 33);
@ -742,7 +742,7 @@ TEST_F(ColumnScanFilterTest, ColumnScan1Byte2CompFilters)
args->val[0] = '4';
pp.setBlockPtr((int*)readBlockFromLiteralArray("col1block.cdf", block));
pp.columnScanAndFilter<IntegralType>(in, out);
pp.columnScanAndFilter<IntegralType>(in, out, false);
results = getValuesArrayPosition<UT>(getFirstValueArrayPosition(out), 0);
ASSERT_EQ(out->NVALS, 32);
@ -791,7 +791,7 @@ TEST_F(ColumnScanFilterTest, ColumnScan4Bytes2CompFiltersOutputRID)
memcpy(&args->val[in->colType.DataSize], &ridTmp, 2);
pp.setBlockPtr((int*) readBlockFromLiteralArray("col4block.cdf", block));
pp.columnScanAndFilter<IntegralType>(in, out);
pp.columnScanAndFilter<IntegralType>(in, out, false);
results = reinterpret_cast<int16_t*>(&output[sizeof(ColResultHeader)]);
ASSERT_EQ(out->NVALS, 2);
@ -823,7 +823,7 @@ TEST_F(ColumnScanFilterTest, ColumnScan8BytesDouble2CompFilters)
memcpy(args->val, &tmp, sizeof(tmp));
pp.setBlockPtr((int*)readBlockFromLiteralArray("col_double_block.cdf", block));
pp.columnScanAndFilter<int64_t>(in, out);
pp.columnScanAndFilter<int64_t>(in, out, false);
results = getValuesArrayPosition<IntegralType>(getFirstValueArrayPosition(out), 0);
ASSERT_EQ(out->NVALS, 8);
@ -857,7 +857,7 @@ TEST_F(ColumnScanFilterTest, ColumnScan4BytesFloat2CompFiltersOutputBoth)
memcpy(args->val, &tmp, sizeof(tmp));
pp.setBlockPtr((int*)readBlockFromLiteralArray("col_float_block.cdf", block));
pp.columnScanAndFilter<int32_t>(in, out);
pp.columnScanAndFilter<int32_t>(in, out, false);
ASSERT_EQ(out->NVALS, 8);
@ -892,7 +892,7 @@ TEST_F(ColumnScanFilterTest, ColumnScan4BytesNegFloat2CompFiltersOutputBoth)
memcpy(args->val, &tmp, sizeof(tmp));
pp.setBlockPtr((int*)readBlockFromLiteralArray("col_neg_float.cdf", block));
pp.columnScanAndFilter<int32_t>(in, out);
pp.columnScanAndFilter<int32_t>(in, out, false);
ASSERT_EQ(out->NVALS, 19);
for (i = 0; i < out->NVALS; i++)
@ -926,7 +926,7 @@ TEST_F(ColumnScanFilterTest, ColumnScan8BytesNegDouble2CompFilters)
memcpy(args->val, &tmp, sizeof(tmp));
pp.setBlockPtr((int*)readBlockFromLiteralArray("col_neg_double.cdf", block));
pp.columnScanAndFilter<int64_t>(in, out);
pp.columnScanAndFilter<int64_t>(in, out, false);
ASSERT_EQ(out->NVALS, 19);
@ -950,7 +950,7 @@ TEST_F(ColumnScanFilterTest, ColumnScan16Bytes)
in->NVALS = 0;
pp.setBlockPtr((int*)readBlockFromLiteralArray("col16block.cdf", block));
pp.columnScanAndFilter<IntegralType>(in, out);
pp.columnScanAndFilter<IntegralType>(in, out, false);
results = getValuesArrayPosition<IntegralType>(getFirstValueArrayPosition(out), 0);
@ -992,7 +992,7 @@ TEST_F(ColumnScanFilterTest, ColumnScan16Bytes2CompFilters)
memcpy(args->val, &tmp, in->colType.DataSize);
pp.setBlockPtr((int*)readBlockFromLiteralArray("col16block.cdf", block));
pp.columnScanAndFilter<IntegralType>(in, out);
pp.columnScanAndFilter<IntegralType>(in, out, false);
results = getValuesArrayPosition<IntegralType>(getFirstValueArrayPosition(out), 0);

View File

@ -68,6 +68,7 @@ unsigned Config::m_FilesPerColumnPartition = DEFAULT_FILES_PER_COLUMN_PARTITION;
unsigned Config::m_ExtentsPerSegmentFile = DEFAULT_EXTENTS_PER_SEGMENT_FILE;
int Config::m_BulkProcessPriority = DEFAULT_BULK_PROCESS_PRIORITY;
string Config::m_BulkRollbackDir;
bool Config::m_FastDelete;
unsigned Config::m_MaxFileSystemDiskUsage = DEFAULT_MAX_FILESYSTEM_DISK_USAGE;
unsigned Config::m_NumCompressedPadBlks = DEFAULT_COMPRESSED_PADDING_BLKS;
bool Config::m_ParentOAMModuleFlag = DEFAULT_PARENT_OAM;
@ -185,6 +186,17 @@ void Config::checkReload()
m_BulkRollbackDir += "/rollback";
}
const std::string fastDeleteTemp = cf->getConfig("WriteEngine", "FastDelete");
if (fastDeleteTemp.length() == 0 || boost::iequals(fastDeleteTemp, "false"))
{
m_FastDelete = false;
}
else
{
m_FastDelete = true;
}
//--------------------------------------------------------------------------
// Initialize max disk usage
//--------------------------------------------------------------------------
@ -516,6 +528,20 @@ std::string Config::getBulkRollbackDir()
return m_BulkRollbackDir;
}
/*******************************************************************************
* DESCRIPTION:
* Get the fast delete option
* PARAMETERS:
* none
******************************************************************************/
bool Config::getFastDelete()
{
boost::mutex::scoped_lock lk(fCacheLock);
checkReload();
return m_FastDelete;
}
/*******************************************************************************
* DESCRIPTION:
* Get Max percentage of allowable file system disk usage for each DBRoot

View File

@ -122,6 +122,14 @@ class Config
*/
EXPORT static std::string getBulkRollbackDir();
/**
* @brief MCOL-5021 Option to enable/disable fast deletes.
* When enabled (option is disabled by default),
* all column extents are indiscriminately invalidated
* and actual delete is only performed on the AUX column.
*/
EXPORT static bool getFastDelete();
/**
* @brief Max percentage of allowable file system disk usage for each DBRoot
*/
@ -187,6 +195,7 @@ class Config
static unsigned m_ExtentsPerSegmentFile; // # extents per segment file
static int m_BulkProcessPriority; // cpimport.bin proc priority
static std::string m_BulkRollbackDir; // bulk rollback meta data dir
static bool m_FastDelete; // fast delete option
static unsigned m_MaxFileSystemDiskUsage; // max file system % disk usage
static unsigned m_NumCompressedPadBlks; // num blks to pad comp chunks
static bool m_ParentOAMModuleFlag; // are we running on parent PM

View File

@ -4601,9 +4601,38 @@ int WriteEngineWrapper::updateColumnRec(const TxnID& txnid, const vector<CSCType
// timer.start("markExtentsInvalid");
//#endif
bool hasFastDelete = false;
if (m_opType == DELETE && hasAUXCol)
{
hasFastDelete = Config::getFastDelete();
}
if (hasFastDelete)
{
ColStructList colStructListAUX(1, colStructList.back());
WriteEngine::CSCTypesList cscColTypeListAUX(1, cscColTypeList.back());
ColValueList colValueListAUX(1, colValueList.back());
std::vector<ExtCPInfo*> currentExtentRangesPtrsAUX(1, currentExtentRangesPtrs.back());
rc = writeColumnRecUpdate(txnid, cscColTypeListAUX, colStructListAUX, colValueListAUX, colOldValueList,
ridLists[extent], tableOid, true, ridLists[extent].size(),
&currentExtentRangesPtrsAUX, hasAUXCol);
for (auto& cpInfoPtr : currentExtentRangesPtrs)
{
if (cpInfoPtr)
{
cpInfoPtr->toInvalid();
}
}
}
else
{
rc = writeColumnRecUpdate(txnid, cscColTypeList, colStructList, colValueList, colOldValueList,
ridLists[extent], tableOid, true, ridLists[extent].size(),
&currentExtentRangesPtrs, hasAUXCol);
}
if (rc != NO_ERROR)
break;