You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-07-30 19:23:07 +03:00
Mcol 3738 Allow COUNT(DISTINCT to have multiple parms) (#2002)
* MCOL-3738 allow COUNT(DISTINCT) multiple parameters Changes in the way tupleaggregatestep sets up the aggregate arrays. * MCOL-3738 mtr test
This commit is contained in:
@ -1782,13 +1782,19 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// vectors for aggregate functions
|
// vectors for aggregate functions
|
||||||
|
RowAggFunctionType aggOp = ROWAGG_FUNCT_UNDEFINE;
|
||||||
|
RowAggFunctionType prevAggOp = ROWAGG_FUNCT_UNDEFINE;
|
||||||
for (uint64_t i = 0; i < aggColVec.size(); i++)
|
for (uint64_t i = 0; i < aggColVec.size(); i++)
|
||||||
{
|
{
|
||||||
pUDAFFunc = NULL;
|
pUDAFFunc = NULL;
|
||||||
uint32_t aggKey = aggColVec[i].first;
|
uint32_t aggKey = aggColVec[i].first;
|
||||||
RowAggFunctionType aggOp = functionIdMap(aggColVec[i].second);
|
aggOp = functionIdMap(aggColVec[i].second);
|
||||||
RowAggFunctionType stats = statsFuncIdMap(aggColVec[i].second);
|
RowAggFunctionType stats = statsFuncIdMap(aggColVec[i].second);
|
||||||
|
|
||||||
|
// Save the op for MULTI_PARM exclusion when COUNT(DISTINCT)
|
||||||
|
if (aggOp != ROWAGG_MULTI_PARM)
|
||||||
|
prevAggOp = aggOp;
|
||||||
|
|
||||||
// skip if this is a constant
|
// skip if this is a constant
|
||||||
if (aggOp == ROWAGG_CONSTANT)
|
if (aggOp == ROWAGG_CONSTANT)
|
||||||
continue;
|
continue;
|
||||||
@ -1829,10 +1835,13 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(
|
|||||||
throw logic_error(emsg.str());
|
throw logic_error(emsg.str());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// We skip distinct aggs, including extra parms. These are handled by adding them to group by list above.
|
||||||
if (aggOp == ROWAGG_DISTINCT_SUM ||
|
if (aggOp == ROWAGG_DISTINCT_SUM ||
|
||||||
aggOp == ROWAGG_DISTINCT_AVG ||
|
aggOp == ROWAGG_DISTINCT_AVG ||
|
||||||
aggOp == ROWAGG_COUNT_DISTINCT_COL_NAME)
|
aggOp == ROWAGG_COUNT_DISTINCT_COL_NAME)
|
||||||
continue;
|
continue;
|
||||||
|
if (aggOp == ROWAGG_MULTI_PARM && prevAggOp == ROWAGG_COUNT_DISTINCT_COL_NAME)
|
||||||
|
continue;
|
||||||
|
|
||||||
uint64_t colProj = projColPosMap[aggKey];
|
uint64_t colProj = projColPosMap[aggKey];
|
||||||
|
|
||||||
@ -2103,7 +2112,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(
|
|||||||
funct->fpConstCol = udafc->aggParms()[udafcParamIdx];
|
funct->fpConstCol = udafc->aggParms()[udafcParamIdx];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else if (prevAggOp != ROWAGG_COUNT_DISTINCT_COL_NAME)
|
||||||
{
|
{
|
||||||
throw QueryDataExcept("prep1PhaseDistinctAggregate: UDAF multi function with no parms", aggregateFuncErr);
|
throw QueryDataExcept("prep1PhaseDistinctAggregate: UDAF multi function with no parms", aggregateFuncErr);
|
||||||
}
|
}
|
||||||
@ -2522,6 +2531,8 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(
|
|||||||
funct->fAggFunction = ROWAGG_DUP_STATS;
|
funct->fAggFunction = ROWAGG_DUP_STATS;
|
||||||
else if (funct->fAggFunction == ROWAGG_UDAF)
|
else if (funct->fAggFunction == ROWAGG_UDAF)
|
||||||
funct->fAggFunction = ROWAGG_DUP_UDAF;
|
funct->fAggFunction = ROWAGG_DUP_UDAF;
|
||||||
|
else if (funct->fAggFunction == ROWAGG_COUNT_DISTINCT_COL_NAME) // Don't track dup for this one. Gets confused when multi-parm.
|
||||||
|
{}
|
||||||
else
|
else
|
||||||
funct->fAggFunction = ROWAGG_DUP_FUNCT;
|
funct->fAggFunction = ROWAGG_DUP_FUNCT;
|
||||||
|
|
||||||
@ -2724,13 +2735,36 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(
|
|||||||
// for distinct, each column requires seperate rowgroup
|
// for distinct, each column requires seperate rowgroup
|
||||||
vector<SP_ROWAGG_DIST> rowAggSubDistVec;
|
vector<SP_ROWAGG_DIST> rowAggSubDistVec;
|
||||||
|
|
||||||
for (uint64_t i = 0; i < jobInfo.distinctColVec.size(); i++)
|
uint32_t distinctColKey;
|
||||||
|
int64_t j;
|
||||||
|
uint64_t k;
|
||||||
|
uint64_t outIdx = 0;
|
||||||
|
for (uint64_t i = 0; i < returnedColVec.size(); i++)
|
||||||
{
|
{
|
||||||
uint32_t distinctColKey = jobInfo.distinctColVec[i];
|
if (returnedColVec[i].second == 0)
|
||||||
uint64_t j = -1;
|
{
|
||||||
|
++outIdx;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
j = -1;
|
||||||
|
|
||||||
|
distinctColKey = -1;
|
||||||
|
// Find the entry in distinctColVec, if any
|
||||||
|
for (k = 0; k < jobInfo.distinctColVec.size(); k++)
|
||||||
|
{
|
||||||
|
distinctColKey = jobInfo.distinctColVec[k];
|
||||||
|
if (returnedColVec[i].first == distinctColKey)
|
||||||
|
break;
|
||||||
|
|
||||||
|
}
|
||||||
|
if (distinctColKey == (uint32_t)-1)
|
||||||
|
{
|
||||||
|
++outIdx;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
// locate the distinct key in the row group
|
// locate the distinct key in the row group
|
||||||
for (uint64_t k = 0; k < keysAgg.size(); k++)
|
for (k = 0; k < keysAgg.size(); k++)
|
||||||
{
|
{
|
||||||
if (keysProj[k] == distinctColKey)
|
if (keysProj[k] == distinctColKey)
|
||||||
{
|
{
|
||||||
@ -2739,7 +2773,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
idbassert(j != (uint64_t) - 1);
|
idbassert(j != -1);
|
||||||
|
|
||||||
oidsAggSub = oidsAggGb;
|
oidsAggSub = oidsAggGb;
|
||||||
keysAggSub = keysAggGb;
|
keysAggSub = keysAggGb;
|
||||||
@ -2757,20 +2791,9 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(
|
|||||||
csNumAggSub.push_back(csNumProj[j]);
|
csNumAggSub.push_back(csNumProj[j]);
|
||||||
widthAggSub.push_back(widthProj[j]);
|
widthAggSub.push_back(widthProj[j]);
|
||||||
|
|
||||||
// construct sub-rowgroup
|
|
||||||
posAggSub.clear();
|
|
||||||
posAggSub.push_back(2); // rid
|
|
||||||
|
|
||||||
for (uint64_t k = 0; k < oidsAggSub.size(); k++)
|
|
||||||
posAggSub.push_back(posAggSub[k] + widthAggSub[k]);
|
|
||||||
|
|
||||||
RowGroup subRg(oidsAggSub.size(), posAggSub, oidsAggSub, keysAggSub, typeAggSub,
|
|
||||||
csNumAggSub, scaleAggSub, precisionAggSub, jobInfo.stringTableThreshold);
|
|
||||||
subRgVec.push_back(subRg);
|
|
||||||
|
|
||||||
// construct groupby vector
|
// construct groupby vector
|
||||||
vector<SP_ROWAGG_GRPBY_t> groupBySub;
|
vector<SP_ROWAGG_GRPBY_t> groupBySub;
|
||||||
uint64_t k = 0;
|
k = 0;
|
||||||
|
|
||||||
while (k < jobInfo.groupByColVec.size())
|
while (k < jobInfo.groupByColVec.size())
|
||||||
{
|
{
|
||||||
@ -2778,11 +2801,60 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(
|
|||||||
groupBySub.push_back(groupby);
|
groupBySub.push_back(groupby);
|
||||||
k++;
|
k++;
|
||||||
}
|
}
|
||||||
|
|
||||||
// add the distinct column as groupby
|
// add the distinct column as groupby
|
||||||
SP_ROWAGG_GRPBY_t groupby(new RowAggGroupByCol(j, k));
|
SP_ROWAGG_GRPBY_t groupby(new RowAggGroupByCol(j, k));
|
||||||
groupBySub.push_back(groupby);
|
groupBySub.push_back(groupby);
|
||||||
|
|
||||||
|
// Add multi parm distinct
|
||||||
|
while ((i+1) < returnedColVec.size() && functionIdMap(returnedColVec[i+1].second) == ROWAGG_MULTI_PARM)
|
||||||
|
{
|
||||||
|
++i;
|
||||||
|
uint32_t dColKey = -1;
|
||||||
|
j = -1;
|
||||||
|
|
||||||
|
// Find the entry in distinctColVec, if any
|
||||||
|
for (k = 0; k < jobInfo.distinctColVec.size(); k++)
|
||||||
|
{
|
||||||
|
dColKey = jobInfo.distinctColVec[k];
|
||||||
|
if (returnedColVec[i].first == dColKey)
|
||||||
|
break;
|
||||||
|
|
||||||
|
}
|
||||||
|
idbassert(dColKey != (uint32_t)-1);
|
||||||
|
// locate the distinct key in the row group
|
||||||
|
for (k = 0; k < keysAgg.size(); k++)
|
||||||
|
{
|
||||||
|
if (keysProj[k] == dColKey)
|
||||||
|
{
|
||||||
|
j = k;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
idbassert(j != -1);
|
||||||
|
|
||||||
|
oidsAggSub.push_back(oidsProj[j]);
|
||||||
|
keysAggSub.push_back(keysProj[j]);
|
||||||
|
scaleAggSub.push_back(scaleProj[j]);
|
||||||
|
precisionAggSub.push_back(precisionProj[j]);
|
||||||
|
typeAggSub.push_back(typeProj[j]);
|
||||||
|
csNumAggSub.push_back(csNumProj[j]);
|
||||||
|
widthAggSub.push_back(widthProj[j]);
|
||||||
|
|
||||||
|
SP_ROWAGG_GRPBY_t groupby(new RowAggGroupByCol(j, k));
|
||||||
|
groupBySub.push_back(groupby);
|
||||||
|
}
|
||||||
|
|
||||||
|
// construct sub-rowgroup
|
||||||
|
posAggSub.clear();
|
||||||
|
posAggSub.push_back(2); // rid
|
||||||
|
|
||||||
|
for ( k = 0; k < oidsAggSub.size(); k++)
|
||||||
|
posAggSub.push_back(posAggSub[k] + widthAggSub[k]);
|
||||||
|
|
||||||
|
RowGroup subRg(oidsAggSub.size(), posAggSub, oidsAggSub, keysAggSub, typeAggSub,
|
||||||
|
csNumAggSub, scaleAggSub, precisionAggSub, jobInfo.stringTableThreshold);
|
||||||
|
subRgVec.push_back(subRg);
|
||||||
|
|
||||||
// Keep a count of the parms after the first for any aggregate.
|
// Keep a count of the parms after the first for any aggregate.
|
||||||
// These will be skipped and the count needs to be subtracted
|
// These will be skipped and the count needs to be subtracted
|
||||||
// from where the aux column will be.
|
// from where the aux column will be.
|
||||||
@ -2792,37 +2864,26 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(
|
|||||||
// -- dummy function vector for sub-aggregator, which does distinct only
|
// -- dummy function vector for sub-aggregator, which does distinct only
|
||||||
// -- aggregate function on this distinct column for rowAggDist
|
// -- aggregate function on this distinct column for rowAggDist
|
||||||
vector<SP_ROWAGG_FUNC_t> functionSub1, functionSub2;
|
vector<SP_ROWAGG_FUNC_t> functionSub1, functionSub2;
|
||||||
|
// search the function in functionVec
|
||||||
|
vector<SP_ROWAGG_FUNC_t>::iterator it = functionVec2.begin();
|
||||||
|
|
||||||
for (uint64_t k = 0; k < returnedColVec.size(); k++)
|
while (it != functionVec2.end())
|
||||||
{
|
{
|
||||||
if (functionIdMap(returnedColVec[i].second) == ROWAGG_MULTI_PARM)
|
SP_ROWAGG_FUNC_t f = *it++;
|
||||||
|
|
||||||
|
if ((f->fOutputColumnIndex == outIdx) &&
|
||||||
|
(f->fAggFunction == ROWAGG_COUNT_DISTINCT_COL_NAME ||
|
||||||
|
f->fAggFunction == ROWAGG_DISTINCT_SUM ||
|
||||||
|
f->fAggFunction == ROWAGG_DISTINCT_AVG))
|
||||||
{
|
{
|
||||||
++multiParms;
|
SP_ROWAGG_FUNC_t funct(
|
||||||
continue;
|
new RowAggFunctionCol(
|
||||||
}
|
f->fAggFunction,
|
||||||
if (returnedColVec[k].first != distinctColKey)
|
f->fStatsFunction,
|
||||||
continue;
|
groupBySub.size() - 1,
|
||||||
|
f->fOutputColumnIndex,
|
||||||
// search the function in functionVec
|
f->fAuxColumnIndex-multiParms));
|
||||||
vector<SP_ROWAGG_FUNC_t>::iterator it = functionVec2.begin();
|
functionSub2.push_back(funct);
|
||||||
|
|
||||||
while (it != functionVec2.end())
|
|
||||||
{
|
|
||||||
SP_ROWAGG_FUNC_t f = *it++;
|
|
||||||
|
|
||||||
if ((f->fOutputColumnIndex == k) &&
|
|
||||||
(f->fAggFunction == ROWAGG_COUNT_DISTINCT_COL_NAME ||
|
|
||||||
f->fAggFunction == ROWAGG_DISTINCT_SUM ||
|
|
||||||
f->fAggFunction == ROWAGG_DISTINCT_AVG))
|
|
||||||
{
|
|
||||||
SP_ROWAGG_FUNC_t funct(new RowAggFunctionCol(
|
|
||||||
f->fAggFunction,
|
|
||||||
f->fStatsFunction,
|
|
||||||
groupBySub.size() - 1,
|
|
||||||
f->fOutputColumnIndex,
|
|
||||||
f->fAuxColumnIndex-multiParms));
|
|
||||||
functionSub2.push_back(funct);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2834,6 +2895,8 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(
|
|||||||
|
|
||||||
// add to rowAggDist
|
// add to rowAggDist
|
||||||
multiDistinctAggregator->addSubAggregator(subAgg, subRg, functionSub2);
|
multiDistinctAggregator->addSubAggregator(subAgg, subRg, functionSub2);
|
||||||
|
|
||||||
|
++outIdx;
|
||||||
}
|
}
|
||||||
|
|
||||||
// cover any non-distinct column functions
|
// cover any non-distinct column functions
|
||||||
@ -3968,11 +4031,17 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// vectors for aggregate functions
|
// vectors for aggregate functions
|
||||||
|
RowAggFunctionType aggOp = ROWAGG_FUNCT_UNDEFINE;
|
||||||
|
RowAggFunctionType prevAggOp = ROWAGG_FUNCT_UNDEFINE;
|
||||||
for (uint64_t i = 0; i < aggColVec.size(); i++)
|
for (uint64_t i = 0; i < aggColVec.size(); i++)
|
||||||
{
|
{
|
||||||
// skip on PM if this is a constant
|
aggOp = functionIdMap(aggColVec[i].second);
|
||||||
RowAggFunctionType aggOp = functionIdMap(aggColVec[i].second);
|
|
||||||
|
|
||||||
|
// Save the op for MULTI_PARM exclusion when COUNT(DISTINCT)
|
||||||
|
if (aggOp != ROWAGG_MULTI_PARM)
|
||||||
|
prevAggOp = aggOp;
|
||||||
|
|
||||||
|
// skip on PM if this is a constant
|
||||||
if (aggOp == ROWAGG_CONSTANT)
|
if (aggOp == ROWAGG_CONSTANT)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
@ -3994,17 +4063,21 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(
|
|||||||
throw logic_error(emsg.str());
|
throw logic_error(emsg.str());
|
||||||
}
|
}
|
||||||
|
|
||||||
RowAggFunctionType stats = statsFuncIdMap(aggColVec[i].second);
|
RowAggFunctionType stats = statsFuncIdMap(aggOp);
|
||||||
|
|
||||||
// skip sum / count(column) if avg is also selected
|
// skip sum / count(column) if avg is also selected
|
||||||
if ((aggOp == ROWAGG_SUM || aggOp == ROWAGG_COUNT_COL_NAME) &&
|
if ((aggOp == ROWAGG_SUM || aggOp == ROWAGG_COUNT_COL_NAME) &&
|
||||||
(avgSet.find(aggKey) != avgSet.end()))
|
(avgSet.find(aggKey) != avgSet.end()))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
// We skip distinct aggs, including extra parms. These are handled by adding them to group by list above.
|
||||||
if (aggOp == ROWAGG_DISTINCT_SUM ||
|
if (aggOp == ROWAGG_DISTINCT_SUM ||
|
||||||
aggOp == ROWAGG_DISTINCT_AVG ||
|
aggOp == ROWAGG_DISTINCT_AVG ||
|
||||||
aggOp == ROWAGG_COUNT_DISTINCT_COL_NAME)
|
aggOp == ROWAGG_COUNT_DISTINCT_COL_NAME)
|
||||||
continue;
|
continue;
|
||||||
|
if (aggOp == ROWAGG_MULTI_PARM && prevAggOp == ROWAGG_COUNT_DISTINCT_COL_NAME)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
|
||||||
uint64_t colProj = projColPosMap[aggKey];
|
uint64_t colProj = projColPosMap[aggKey];
|
||||||
SP_ROWAGG_FUNC_t funct;
|
SP_ROWAGG_FUNC_t funct;
|
||||||
@ -4273,7 +4346,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(
|
|||||||
funct->fpConstCol = udafc->aggParms()[udafcParamIdx];
|
funct->fpConstCol = udafc->aggParms()[udafcParamIdx];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else if (prevAggOp != ROWAGG_COUNT_DISTINCT_COL_NAME)
|
||||||
{
|
{
|
||||||
throw QueryDataExcept("prep2PhasesDistinctAggregate: UDAF multi function with no parms", aggregateFuncErr);
|
throw QueryDataExcept("prep2PhasesDistinctAggregate: UDAF multi function with no parms", aggregateFuncErr);
|
||||||
}
|
}
|
||||||
@ -4682,6 +4755,8 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(
|
|||||||
funct->fAggFunction = ROWAGG_DUP_STATS;
|
funct->fAggFunction = ROWAGG_DUP_STATS;
|
||||||
else if (funct->fAggFunction == ROWAGG_UDAF)
|
else if (funct->fAggFunction == ROWAGG_UDAF)
|
||||||
funct->fAggFunction = ROWAGG_DUP_UDAF;
|
funct->fAggFunction = ROWAGG_DUP_UDAF;
|
||||||
|
else if (funct->fAggFunction == ROWAGG_COUNT_DISTINCT_COL_NAME) // Don't track dup for this one. Gets confused when multi-parm.
|
||||||
|
{}
|
||||||
else
|
else
|
||||||
funct->fAggFunction = ROWAGG_DUP_FUNCT;
|
funct->fAggFunction = ROWAGG_DUP_FUNCT;
|
||||||
|
|
||||||
@ -4874,16 +4949,39 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(
|
|||||||
widthAggGb.push_back(widthAggUm[i]);
|
widthAggGb.push_back(widthAggUm[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
// for distinct, each column requires seperate rowgroup
|
// for distinct, each column requires a seperate rowgroup
|
||||||
vector<SP_ROWAGG_DIST> rowAggSubDistVec;
|
vector<SP_ROWAGG_DIST> rowAggSubDistVec;
|
||||||
|
|
||||||
for (uint64_t i = 0; i < jobInfo.distinctColVec.size(); i++)
|
uint32_t distinctColKey;
|
||||||
|
int64_t j;
|
||||||
|
uint64_t k;
|
||||||
|
uint64_t outIdx = 0;
|
||||||
|
for (uint64_t i = 0; i < returnedColVec.size(); i++)
|
||||||
{
|
{
|
||||||
uint32_t distinctColKey = jobInfo.distinctColVec[i];
|
if (returnedColVec[i].second == 0)
|
||||||
uint64_t j = -1;
|
{
|
||||||
|
++outIdx;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
j = -1;
|
||||||
|
|
||||||
|
distinctColKey = -1;
|
||||||
|
// Find the entry in distinctColVec, if any
|
||||||
|
for (k = 0; k < jobInfo.distinctColVec.size(); k++)
|
||||||
|
{
|
||||||
|
distinctColKey = jobInfo.distinctColVec[k];
|
||||||
|
if (returnedColVec[i].first == distinctColKey)
|
||||||
|
break;
|
||||||
|
|
||||||
|
}
|
||||||
|
if (distinctColKey == (uint32_t)-1)
|
||||||
|
{
|
||||||
|
++outIdx;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
// locate the distinct key in the row group
|
// locate the distinct key in the row group
|
||||||
for (uint64_t k = 0; k < keysAggUm.size(); k++)
|
for (k = 0; k < keysAggUm.size(); k++)
|
||||||
{
|
{
|
||||||
if (keysAggUm[k] == distinctColKey)
|
if (keysAggUm[k] == distinctColKey)
|
||||||
{
|
{
|
||||||
@ -4892,7 +4990,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
idbassert(j != (uint64_t) - 1);
|
idbassert(j != -1);
|
||||||
|
|
||||||
oidsAggSub = oidsAggGb;
|
oidsAggSub = oidsAggGb;
|
||||||
keysAggSub = keysAggGb;
|
keysAggSub = keysAggGb;
|
||||||
@ -4907,23 +5005,12 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(
|
|||||||
scaleAggSub.push_back(scaleAggUm[j]);
|
scaleAggSub.push_back(scaleAggUm[j]);
|
||||||
precisionAggSub.push_back(precisionAggUm[j]);
|
precisionAggSub.push_back(precisionAggUm[j]);
|
||||||
typeAggSub.push_back(typeAggUm[j]);
|
typeAggSub.push_back(typeAggUm[j]);
|
||||||
csNumAggSub.push_back(csNumAggUm[i]);
|
csNumAggSub.push_back(csNumAggUm[j]);
|
||||||
widthAggSub.push_back(widthAggUm[j]);
|
widthAggSub.push_back(widthAggUm[j]);
|
||||||
|
|
||||||
// construct sub-rowgroup
|
|
||||||
posAggSub.clear();
|
|
||||||
posAggSub.push_back(2); // rid
|
|
||||||
|
|
||||||
for (uint64_t k = 0; k < oidsAggSub.size(); k++)
|
|
||||||
posAggSub.push_back(posAggSub[k] + widthAggSub[k]);
|
|
||||||
|
|
||||||
RowGroup subRg(oidsAggSub.size(), posAggSub, oidsAggSub, keysAggSub, typeAggSub,
|
|
||||||
csNumAggSub, scaleAggSub, precisionAggSub, jobInfo.stringTableThreshold);
|
|
||||||
subRgVec.push_back(subRg);
|
|
||||||
|
|
||||||
// construct groupby vector
|
// construct groupby vector
|
||||||
vector<SP_ROWAGG_GRPBY_t> groupBySub;
|
vector<SP_ROWAGG_GRPBY_t> groupBySub;
|
||||||
uint64_t k = 0;
|
k = 0;
|
||||||
|
|
||||||
while (k < jobInfo.groupByColVec.size())
|
while (k < jobInfo.groupByColVec.size())
|
||||||
{
|
{
|
||||||
@ -4931,11 +5018,60 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(
|
|||||||
groupBySub.push_back(groupby);
|
groupBySub.push_back(groupby);
|
||||||
k++;
|
k++;
|
||||||
}
|
}
|
||||||
|
|
||||||
// add the distinct column as groupby
|
// add the distinct column as groupby
|
||||||
SP_ROWAGG_GRPBY_t groupby(new RowAggGroupByCol(j, k));
|
SP_ROWAGG_GRPBY_t groupby(new RowAggGroupByCol(j, k));
|
||||||
groupBySub.push_back(groupby);
|
groupBySub.push_back(groupby);
|
||||||
|
|
||||||
|
// Add multi parm distinct
|
||||||
|
while ((i+1) < returnedColVec.size() && functionIdMap(returnedColVec[i+1].second) == ROWAGG_MULTI_PARM)
|
||||||
|
{
|
||||||
|
++i;
|
||||||
|
uint32_t dColKey = -1;
|
||||||
|
j = -1;
|
||||||
|
|
||||||
|
// Find the entry in distinctColVec, if any
|
||||||
|
for (k = 0; k < jobInfo.distinctColVec.size(); k++)
|
||||||
|
{
|
||||||
|
dColKey = jobInfo.distinctColVec[k];
|
||||||
|
if (returnedColVec[i].first == dColKey)
|
||||||
|
break;
|
||||||
|
|
||||||
|
}
|
||||||
|
idbassert(dColKey != (uint32_t)-1);
|
||||||
|
// locate the distinct key in the row group
|
||||||
|
for (k = 0; k < keysAggUm.size(); k++)
|
||||||
|
{
|
||||||
|
if (keysAggUm[k] == dColKey)
|
||||||
|
{
|
||||||
|
j = k;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
idbassert(j != -1);
|
||||||
|
|
||||||
|
oidsAggSub.push_back(oidsAggUm[j]);
|
||||||
|
keysAggSub.push_back(keysAggUm[j]);
|
||||||
|
scaleAggSub.push_back(scaleAggUm[j]);
|
||||||
|
precisionAggSub.push_back(precisionAggUm[j]);
|
||||||
|
typeAggSub.push_back(typeAggUm[j]);
|
||||||
|
csNumAggSub.push_back(csNumAggUm[j]);
|
||||||
|
widthAggSub.push_back(widthAggUm[j]);
|
||||||
|
|
||||||
|
SP_ROWAGG_GRPBY_t groupby(new RowAggGroupByCol(j, k));
|
||||||
|
groupBySub.push_back(groupby);
|
||||||
|
}
|
||||||
|
|
||||||
|
// construct sub-rowgroup
|
||||||
|
posAggSub.clear();
|
||||||
|
posAggSub.push_back(2); // rid
|
||||||
|
|
||||||
|
for ( k = 0; k < oidsAggSub.size(); k++)
|
||||||
|
posAggSub.push_back(posAggSub[k] + widthAggSub[k]);
|
||||||
|
|
||||||
|
RowGroup subRg(oidsAggSub.size(), posAggSub, oidsAggSub, keysAggSub, typeAggSub,
|
||||||
|
csNumAggSub, scaleAggSub, precisionAggSub, jobInfo.stringTableThreshold);
|
||||||
|
subRgVec.push_back(subRg);
|
||||||
|
|
||||||
// Keep a count of the parms after the first for any aggregate.
|
// Keep a count of the parms after the first for any aggregate.
|
||||||
// These will be skipped and the count needs to be subtracted
|
// These will be skipped and the count needs to be subtracted
|
||||||
// from where the aux column will be.
|
// from where the aux column will be.
|
||||||
@ -4945,38 +5081,26 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(
|
|||||||
// -- dummy function vector for sub-aggregator, which does distinct only
|
// -- dummy function vector for sub-aggregator, which does distinct only
|
||||||
// -- aggregate function on this distinct column for rowAggDist
|
// -- aggregate function on this distinct column for rowAggDist
|
||||||
vector<SP_ROWAGG_FUNC_t> functionSub1, functionSub2;
|
vector<SP_ROWAGG_FUNC_t> functionSub1, functionSub2;
|
||||||
|
// search the function in functionVec
|
||||||
|
vector<SP_ROWAGG_FUNC_t>::iterator it = functionVecUm.begin();
|
||||||
|
|
||||||
for (uint64_t k = 0; k < returnedColVec.size(); k++)
|
while (it != functionVecUm.end())
|
||||||
{
|
{
|
||||||
if (functionIdMap(returnedColVec[i].second) == ROWAGG_MULTI_PARM)
|
SP_ROWAGG_FUNC_t f = *it++;
|
||||||
|
|
||||||
|
if ((f->fOutputColumnIndex == outIdx) &&
|
||||||
|
(f->fAggFunction == ROWAGG_COUNT_DISTINCT_COL_NAME ||
|
||||||
|
f->fAggFunction == ROWAGG_DISTINCT_SUM ||
|
||||||
|
f->fAggFunction == ROWAGG_DISTINCT_AVG))
|
||||||
{
|
{
|
||||||
++multiParms;
|
SP_ROWAGG_FUNC_t funct(
|
||||||
continue;
|
new RowAggFunctionCol(
|
||||||
}
|
f->fAggFunction,
|
||||||
if (returnedColVec[k].first != distinctColKey)
|
f->fStatsFunction,
|
||||||
continue;
|
groupBySub.size() - 1,
|
||||||
|
f->fOutputColumnIndex,
|
||||||
// search the function in functionVec
|
f->fAuxColumnIndex-multiParms));
|
||||||
vector<SP_ROWAGG_FUNC_t>::iterator it = functionVecUm.begin();
|
functionSub2.push_back(funct);
|
||||||
|
|
||||||
while (it != functionVecUm.end())
|
|
||||||
{
|
|
||||||
SP_ROWAGG_FUNC_t f = *it++;
|
|
||||||
|
|
||||||
if ((f->fOutputColumnIndex == k) &&
|
|
||||||
(f->fAggFunction == ROWAGG_COUNT_DISTINCT_COL_NAME ||
|
|
||||||
f->fAggFunction == ROWAGG_DISTINCT_SUM ||
|
|
||||||
f->fAggFunction == ROWAGG_DISTINCT_AVG))
|
|
||||||
{
|
|
||||||
SP_ROWAGG_FUNC_t funct(
|
|
||||||
new RowAggFunctionCol(
|
|
||||||
f->fAggFunction,
|
|
||||||
f->fStatsFunction,
|
|
||||||
groupBySub.size() - 1,
|
|
||||||
f->fOutputColumnIndex,
|
|
||||||
f->fAuxColumnIndex-multiParms));
|
|
||||||
functionSub2.push_back(funct);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -4986,6 +5110,8 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(
|
|||||||
|
|
||||||
// add to rowAggDist
|
// add to rowAggDist
|
||||||
multiDistinctAggregator->addSubAggregator(subAgg, subRg, functionSub2);
|
multiDistinctAggregator->addSubAggregator(subAgg, subRg, functionSub2);
|
||||||
|
|
||||||
|
++outIdx;
|
||||||
}
|
}
|
||||||
|
|
||||||
// cover any non-distinct column functions
|
// cover any non-distinct column functions
|
||||||
|
@ -4737,10 +4737,11 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi)
|
|||||||
gwi.aggOnSelect = true;
|
gwi.aggOnSelect = true;
|
||||||
|
|
||||||
// Argument_count() is the # of formal parms to the agg fcn. Columnstore
|
// Argument_count() is the # of formal parms to the agg fcn. Columnstore
|
||||||
// only supports 1 argument except UDAnF and GROUP_CONCAT
|
// only supports 1 argument except UDAnF, COUNT(DISTINC) and GROUP_CONCAT
|
||||||
// TODO: Support more than one parm for COUNT(DISTINCT)
|
if (isp->argument_count() != 1
|
||||||
if (isp->argument_count() != 1 && isp->sum_func() != Item_sum::GROUP_CONCAT_FUNC
|
&& isp->sum_func() != Item_sum::COUNT_DISTINCT_FUNC
|
||||||
&& isp->sum_func() != Item_sum::UDF_SUM_FUNC)
|
&& isp->sum_func() != Item_sum::GROUP_CONCAT_FUNC
|
||||||
|
&& isp->sum_func() != Item_sum::UDF_SUM_FUNC)
|
||||||
{
|
{
|
||||||
gwi.fatalParseError = true;
|
gwi.fatalParseError = true;
|
||||||
gwi.parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_MUL_ARG_AGG);
|
gwi.parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_MUL_ARG_AGG);
|
||||||
|
@ -0,0 +1,38 @@
|
|||||||
|
DROP DATABASE IF EXISTS mcol_3738_db;
|
||||||
|
CREATE DATABASE mcol_3738_db;
|
||||||
|
USE mcol_3738_db;
|
||||||
|
CREATE TABLE `t1` (
|
||||||
|
idx int(11),
|
||||||
|
c1 int(11),
|
||||||
|
c2 int(11),
|
||||||
|
c3 int(11),
|
||||||
|
char1 varchar(28)
|
||||||
|
) ENGINE=Columnstore DEFAULT CHARSET=utf8mb4;
|
||||||
|
insert into t1 values (1, 2, 2, 1, "something this way comes"),
|
||||||
|
(1, 2, 3, 1, "elsewhere this way comes"),
|
||||||
|
(1, 2, 4, 1, "something this way comes"),
|
||||||
|
(1, 3, 2, 2, "something this way comes"),
|
||||||
|
(1, 3, 3, 2, "elsewhere this way comes"),
|
||||||
|
(1, 3, 4, 2, "elsewhere this way comes"),
|
||||||
|
(2, 2, 2, 3, "something this way comes"),
|
||||||
|
(2, 2, 3, 3, "elsewhere this way comes"),
|
||||||
|
(2, 2, 4, 3, "something this way comes"),
|
||||||
|
(3, 3, 2, 4, "something this way comes"),
|
||||||
|
(3, 3, 3, 4, "elsewhere this way comes"),
|
||||||
|
(4, 3, 4, 5, "elsewhere this way comes");
|
||||||
|
select count(distinct c1, c2), count(distinct char1) from t1;
|
||||||
|
count(distinct c1, c2) count(distinct char1)
|
||||||
|
6 2
|
||||||
|
select idx, count(distinct c1, c2), count(distinct c1, c3, char1) from t1 group by idx order by idx;
|
||||||
|
idx count(distinct c1, c2) count(distinct c1, c3, char1)
|
||||||
|
1 6 4
|
||||||
|
2 3 2
|
||||||
|
3 2 2
|
||||||
|
4 1 1
|
||||||
|
select idx, sum(c3), count(distinct c1, c2), count(distinct c1, c3, char1), group_concat("ls_", char1) from t1 group by idx order by idx;
|
||||||
|
idx sum(c3) count(distinct c1, c2) count(distinct c1, c3, char1) group_concat("ls_", char1)
|
||||||
|
1 9 6 4 ls_something this way comes,ls_elsewhere this way comes,ls_something this way comes,ls_something this way comes,ls_elsewhere this way comes,ls_elsewhere this way comes
|
||||||
|
2 9 3 2 ls_something this way comes,ls_elsewhere this way comes,ls_something this way comes
|
||||||
|
3 8 2 2 ls_something this way comes,ls_elsewhere this way comes
|
||||||
|
4 5 1 1 ls_elsewhere this way comes
|
||||||
|
DROP DATABASE mcol_3738_db;
|
@ -0,0 +1,40 @@
|
|||||||
|
#
|
||||||
|
# Test COUNT(DISTINCT n, m, ...)
|
||||||
|
# Author: David Hall, david.hall@mariadb.com
|
||||||
|
#
|
||||||
|
-- source ../include/have_columnstore.inc
|
||||||
|
|
||||||
|
--disable_warnings
|
||||||
|
DROP DATABASE IF EXISTS mcol_3738_db;
|
||||||
|
--enable_warnings
|
||||||
|
|
||||||
|
CREATE DATABASE mcol_3738_db;
|
||||||
|
USE mcol_3738_db;
|
||||||
|
CREATE TABLE `t1` (
|
||||||
|
idx int(11),
|
||||||
|
c1 int(11),
|
||||||
|
c2 int(11),
|
||||||
|
c3 int(11),
|
||||||
|
char1 varchar(28)
|
||||||
|
) ENGINE=Columnstore DEFAULT CHARSET=utf8mb4;
|
||||||
|
|
||||||
|
insert into t1 values (1, 2, 2, 1, "something this way comes"),
|
||||||
|
(1, 2, 3, 1, "elsewhere this way comes"),
|
||||||
|
(1, 2, 4, 1, "something this way comes"),
|
||||||
|
(1, 3, 2, 2, "something this way comes"),
|
||||||
|
(1, 3, 3, 2, "elsewhere this way comes"),
|
||||||
|
(1, 3, 4, 2, "elsewhere this way comes"),
|
||||||
|
(2, 2, 2, 3, "something this way comes"),
|
||||||
|
(2, 2, 3, 3, "elsewhere this way comes"),
|
||||||
|
(2, 2, 4, 3, "something this way comes"),
|
||||||
|
(3, 3, 2, 4, "something this way comes"),
|
||||||
|
(3, 3, 3, 4, "elsewhere this way comes"),
|
||||||
|
(4, 3, 4, 5, "elsewhere this way comes");
|
||||||
|
|
||||||
|
select count(distinct c1, c2), count(distinct char1) from t1;
|
||||||
|
select idx, count(distinct c1, c2), count(distinct c1, c3, char1) from t1 group by idx order by idx;
|
||||||
|
# group_concat causes the aggregation to be performed on UM only.
|
||||||
|
select idx, sum(c3), count(distinct c1, c2), count(distinct c1, c3, char1), group_concat("ls_", char1) from t1 group by idx order by idx;
|
||||||
|
|
||||||
|
# Clean UP
|
||||||
|
DROP DATABASE mcol_3738_db;
|
Reference in New Issue
Block a user