1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-08-01 06:46:55 +03:00

feat(runtime)!: MCOL-678 A "GROUP BY ... WITH ROLLUP" support

Adds a special column which helps to differentiate data and rollups of
various depts and a simple logic to row aggregation to add processing of
subtotals.
This commit is contained in:
Sergey Zefirov
2023-09-26 17:01:53 +03:00
committed by GitHub
parent 5013717730
commit 920607520c
20 changed files with 650 additions and 74 deletions

View File

@ -904,7 +904,9 @@ SJSTEP TupleAggregateStep::prepAggregate(SJSTEP& step, JobInfo& jobInfo)
if (distinctAgg == true)
prep2PhasesDistinctAggregate(jobInfo, rgs, aggs);
else
{
prep2PhasesAggregate(jobInfo, rgs, aggs);
}
}
if (tbps != NULL)
@ -1501,7 +1503,7 @@ void TupleAggregateStep::prep1PhaseAggregate(JobInfo& jobInfo, vector<RowGroup>&
RowGroup aggRG(oidsAgg.size(), posAgg, oidsAgg, keysAgg, typeAgg, csNumAgg, scaleAgg, precisionAgg,
jobInfo.stringTableThreshold);
SP_ROWAGG_UM_t rowAgg(new RowAggregationUM(groupBy, functionVec, jobInfo.rm, jobInfo.umMemLimit));
SP_ROWAGG_UM_t rowAgg(new RowAggregationUM(groupBy, functionVec, jobInfo.rm, jobInfo.umMemLimit, false));
rowAgg->timeZone(jobInfo.timeZone);
rowgroups.push_back(aggRG);
aggregators.push_back(rowAgg);
@ -2594,7 +2596,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(JobInfo& jobInfo, vector<Ro
RowGroup aggRG(oidsAgg.size(), posAgg, oidsAgg, keysAgg, typeAgg, csNumAgg, scaleAgg, precisionAgg,
jobInfo.stringTableThreshold);
SP_ROWAGG_UM_t rowAgg(new RowAggregationUM(groupBy, functionVec1, jobInfo.rm, jobInfo.umMemLimit));
SP_ROWAGG_UM_t rowAgg(new RowAggregationUM(groupBy, functionVec1, jobInfo.rm, jobInfo.umMemLimit, false));
rowAgg->timeZone(jobInfo.timeZone);
posAggDist.push_back(2); // rid
@ -2861,7 +2863,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(JobInfo& jobInfo, vector<Ro
// construct sub-aggregator
SP_ROWAGG_UM_t subAgg(
new RowAggregationUM(groupBySubNoDist, functionSub1, jobInfo.rm, jobInfo.umMemLimit));
new RowAggregationUM(groupBySubNoDist, functionSub1, jobInfo.rm, jobInfo.umMemLimit, false));
subAgg->timeZone(jobInfo.timeZone);
subAgg->groupConcat(jobInfo.groupConcatInfo.groupConcat());
@ -3177,11 +3179,14 @@ void TupleAggregateStep::prep2PhasesAggregate(JobInfo& jobInfo, vector<RowGroup>
colAggPm++;
}
// PM: put the count column for avg next to the sum
// let fall through to add a count column for average function
if (aggOp != ROWAGG_AVG)
break;
/* fall through */
// PM: put the count column for avg next to the sum
// let fall through to add a count column for average function
if (aggOp != ROWAGG_AVG)
break;
// The AVG aggregation has a special treatment everywhere.
// This is so because AVG(column) is a SUM(column)/COUNT(column)
// and these aggregations can be utilized by AVG, if present.
/* fall through */
case ROWAGG_COUNT_ASTERISK:
case ROWAGG_COUNT_COL_NAME:
@ -3559,8 +3564,10 @@ void TupleAggregateStep::prep2PhasesAggregate(JobInfo& jobInfo, vector<RowGroup>
// a duplicate group by column
if (dupGroupbyIndex != -1)
{
functionVecUm.push_back(SP_ROWAGG_FUNC_t(
new RowAggFunctionCol(ROWAGG_DUP_FUNCT, ROWAGG_FUNCT_UNDEFINE, -1, outIdx, dupGroupbyIndex)));
}
}
else
{
@ -3706,7 +3713,7 @@ void TupleAggregateStep::prep2PhasesAggregate(JobInfo& jobInfo, vector<RowGroup>
RowGroup aggRgUm(oidsAggUm.size(), posAggUm, oidsAggUm, keysAggUm, typeAggUm, csNumAggUm, scaleAggUm,
precisionAggUm, jobInfo.stringTableThreshold);
SP_ROWAGG_UM_t rowAggUm(new RowAggregationUMP2(groupByUm, functionVecUm, jobInfo.rm, jobInfo.umMemLimit));
SP_ROWAGG_UM_t rowAggUm(new RowAggregationUMP2(groupByUm, functionVecUm, jobInfo.rm, jobInfo.umMemLimit, false));
rowAggUm->timeZone(jobInfo.timeZone);
rowgroups.push_back(aggRgUm);
aggregators.push_back(rowAggUm);
@ -3718,7 +3725,7 @@ void TupleAggregateStep::prep2PhasesAggregate(JobInfo& jobInfo, vector<RowGroup>
RowGroup aggRgPm(oidsAggPm.size(), posAggPm, oidsAggPm, keysAggPm, typeAggPm, csNumAggPm, scaleAggPm,
precisionAggPm, jobInfo.stringTableThreshold);
SP_ROWAGG_PM_t rowAggPm(new RowAggregation(groupByPm, functionVecPm));
SP_ROWAGG_PM_t rowAggPm(new RowAggregation(groupByPm, functionVecPm, nullptr, nullptr, jobInfo.hasRollup));
rowAggPm->timeZone(jobInfo.timeZone);
rowgroups.push_back(aggRgPm);
aggregators.push_back(rowAggPm);
@ -4803,7 +4810,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(JobInfo& jobInfo, vector<R
RowGroup aggRgUm(oidsAggUm.size(), posAggUm, oidsAggUm, keysAggUm, typeAggUm, csNumAggUm, scaleAggUm,
precisionAggUm, jobInfo.stringTableThreshold);
SP_ROWAGG_UM_t rowAggUm(
new RowAggregationUMP2(groupByUm, functionNoDistVec, jobInfo.rm, jobInfo.umMemLimit));
new RowAggregationUMP2(groupByUm, functionNoDistVec, jobInfo.rm, jobInfo.umMemLimit, false));
rowAggUm->timeZone(jobInfo.timeZone);
posAggDist.push_back(2); // rid
@ -5061,7 +5068,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(JobInfo& jobInfo, vector<R
// construct sub-aggregator
SP_ROWAGG_UM_t subAgg(
new RowAggregationUMP2(groupBySubNoDist, functionSub1, jobInfo.rm, jobInfo.umMemLimit));
new RowAggregationUMP2(groupBySubNoDist, functionSub1, jobInfo.rm, jobInfo.umMemLimit, false));
subAgg->timeZone(jobInfo.timeZone);
// add to rowAggDist
@ -5308,7 +5315,6 @@ void TupleAggregateStep::threadedAggregateRowGroups(uint32_t threadID)
uint32_t rgVecShift = float(fNumOfBuckets) / fNumOfThreads * threadID;
RowAggregationMultiDistinct* multiDist = nullptr;
if (!fDoneAggregate)
{
if (fInputJobStepAssociation.outSize() == 0)
@ -5437,6 +5443,7 @@ void TupleAggregateStep::threadedAggregateRowGroups(uint32_t threadID)
for (uint32_t i = 0; i < fNumOfBuckets; i++)
{
fAggregators[i].reset(fAggregator->clone());
fAggregators[i]->clearRollup();
fAggregators[i]->setInputOutput(fRowGroupIn, &fRowGroupOuts[i]);
}
}
@ -5522,7 +5529,9 @@ void TupleAggregateStep::threadedAggregateRowGroups(uint32_t threadID)
dynamic_cast<RowAggregationMultiDistinct*>(fAggregators[c].get())
->addRowGroup(&fRowGroupIns[threadID], rowBucketVecs[c]);
else
{
fAggregators[c]->addRowGroup(&fRowGroupIns[threadID], rowBucketVecs[c][0]);
}
}
catch (...)
{