1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-08-01 06:46:55 +03:00

feat(primproc,aggregegation)!: Changes for ROLLUP with single-phase aggregation (#3025)

The fix is simple: enable subtotals in single-phase aggregation and
disable parallel processing when there are subtotals and aggregation is
single-phase.
This commit is contained in:
Sergey Zefirov
2023-11-28 17:33:02 +03:00
committed by GitHub
parent 9a84aa8d99
commit 8632c85ecf
5 changed files with 29 additions and 4 deletions

View File

@ -1503,7 +1503,7 @@ void TupleAggregateStep::prep1PhaseAggregate(JobInfo& jobInfo, vector<RowGroup>&
RowGroup aggRG(oidsAgg.size(), posAgg, oidsAgg, keysAgg, typeAgg, csNumAgg, scaleAgg, precisionAgg,
jobInfo.stringTableThreshold);
SP_ROWAGG_UM_t rowAgg(new RowAggregationUM(groupBy, functionVec, jobInfo.rm, jobInfo.umMemLimit, false));
SP_ROWAGG_UM_t rowAgg(new RowAggregationUM(groupBy, functionVec, jobInfo.rm, jobInfo.umMemLimit, jobInfo.hasRollup));
rowAgg->timeZone(jobInfo.timeZone);
rowgroups.push_back(aggRG);
aggregators.push_back(rowAgg);
@ -5443,7 +5443,6 @@ void TupleAggregateStep::threadedAggregateRowGroups(uint32_t threadID)
for (uint32_t i = 0; i < fNumOfBuckets; i++)
{
fAggregators[i].reset(fAggregator->clone());
fAggregators[i]->clearRollup();
fAggregators[i]->setInputOutput(fRowGroupIn, &fRowGroupOuts[i]);
}
}
@ -5497,7 +5496,19 @@ void TupleAggregateStep::threadedAggregateRowGroups(uint32_t threadID)
// The key is the groupby columns, which are the leading columns.
// TBD This approach could potential
// put all values in on bucket.
uint64_t hash = rowgroup::hashRow(rowIn, hashLens[0] - 1);
// The fAggregator->hasRollup() is true when we perform one-phase
// aggregation and also are doing subtotals' computations.
// Subtotals produce new keys whose hash values may not be in
// the processing bucket. Consider case for key tuples (1,2) and (1,3).
// Their subtotals's keys will be (1, NULL) and (1, NULL)
// but they will be left in their processing buckets and never
// gets aggregated properly.
// Due to this, we put all rows into the same bucket 0 when perfoming
// single-phase aggregation with subtotals.
// For all other cases (single-phase without subtotals and two-phase
// aggregation with and without subtotals) fAggregator->hasRollup() is false.
// In these cases we have full parallel processing as expected.
uint64_t hash = fAggregator->hasRollup() ? 0 : rowgroup::hashRow(rowIn, hashLens[0] - 1);
int bucketID = hash % fNumOfBuckets;
rowBucketVecs[bucketID][0].emplace_back(rowIn.getPointer(), hash);
rowIn.nextRow();