You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-07-29 08:21:15 +03:00
Merge pull request #2044 from dhall-MariaDB/MCOL-3738
MCOL-3738 COUNT(DISTINCT) with multiple parms
This commit is contained in:
@ -2184,6 +2184,9 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(
|
||||
|
||||
// locate the return column position in aggregated rowgroup
|
||||
uint64_t outIdx = 0;
|
||||
RowAggFunctionType prevAggOp = ROWAGG_FUNCT_UNDEFINE;
|
||||
uint32_t prevRetKey = 0;
|
||||
|
||||
for (uint64_t i = 0; i < returnedColVec.size(); i++)
|
||||
{
|
||||
udafc = NULL;
|
||||
@ -2195,10 +2198,31 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(
|
||||
|
||||
if (aggOp == ROWAGG_MULTI_PARM)
|
||||
{
|
||||
// Skip on final agg.: Extra parms for an aggregate have no work there.
|
||||
// Duplicate detection doesn't work for multi-parm`
|
||||
|
||||
// If this function was earlier detected as a duplicate, unduplicate it.
|
||||
SP_ROWAGG_FUNC_t funct = functionVec2.back();
|
||||
if (funct->fAggFunction == ROWAGG_DUP_FUNCT)
|
||||
funct->fAggFunction = prevAggOp;
|
||||
|
||||
// Remove it from aggDupFuncMap if it's in there.
|
||||
funct->hasMultiParm = true;
|
||||
AGG_MAP::iterator it = aggDupFuncMap.find(boost::make_tuple(prevRetKey, prevAggOp, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL));
|
||||
if (it != aggDupFuncMap.end())
|
||||
{
|
||||
aggDupFuncMap.erase(it);
|
||||
}
|
||||
|
||||
// Skip on final agg.: Extra parms for an aggregate have no work there.
|
||||
++multiParms;
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Save the op for MULTI_PARM exclusion when COUNT(DISTINCT)
|
||||
prevAggOp = aggOp;
|
||||
prevRetKey = returnedColVec[i].first;
|
||||
}
|
||||
|
||||
if (find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), retKey) !=
|
||||
jobInfo.distinctColVec.end() )
|
||||
@ -2543,8 +2567,6 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(
|
||||
funct->fAggFunction = ROWAGG_DUP_STATS;
|
||||
else if (funct->fAggFunction == ROWAGG_UDAF)
|
||||
funct->fAggFunction = ROWAGG_DUP_UDAF;
|
||||
else if (funct->fAggFunction == ROWAGG_COUNT_DISTINCT_COL_NAME) // Don't track dup for this one. Gets confused when multi-parm.
|
||||
{}
|
||||
else
|
||||
funct->fAggFunction = ROWAGG_DUP_FUNCT;
|
||||
|
||||
@ -2599,7 +2621,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(
|
||||
// now fix the AVG distinct function, locate the count(distinct column) position
|
||||
for (uint64_t i = 0; i < functionVec2.size(); i++)
|
||||
{
|
||||
if (functionVec2[i]->fAggFunction == ROWAGG_COUNT_DISTINCT_COL_NAME)
|
||||
if (functionVec2[i]->fAggFunction == ROWAGG_COUNT_DISTINCT_COL_NAME && !functionVec2[i]->hasMultiParm)
|
||||
{
|
||||
// if the count(distinct k) can be associated with an avg(distinct k)
|
||||
map<uint32_t, SP_ROWAGG_FUNC_t>::iterator k =
|
||||
@ -2919,11 +2941,6 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(
|
||||
|
||||
for (uint64_t k = 0; k < returnedColVec.size(); k++)
|
||||
{
|
||||
if (functionIdMap(returnedColVec[k].second) == ROWAGG_MULTI_PARM)
|
||||
{
|
||||
++multiParms;
|
||||
continue;
|
||||
}
|
||||
// search non-distinct functions in functionVec
|
||||
vector<SP_ROWAGG_FUNC_t>::iterator it = functionVec2.begin();
|
||||
|
||||
@ -4471,6 +4488,9 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(
|
||||
// locate the return column position in aggregated rowgroup from PM
|
||||
// outIdx is i without the multi-columns,
|
||||
uint64_t outIdx = 0;
|
||||
RowAggFunctionType prevAggOp = ROWAGG_FUNCT_UNDEFINE;
|
||||
uint32_t prevRetKey = 0;
|
||||
|
||||
for (uint64_t i = 0; i < returnedColVec.size(); i++)
|
||||
{
|
||||
pUDAFFunc = NULL;
|
||||
@ -4483,10 +4503,30 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(
|
||||
|
||||
if (aggOp == ROWAGG_MULTI_PARM)
|
||||
{
|
||||
// Skip on UM: Extra parms for an aggregate have no work on the UM
|
||||
// Duplicate detection doesn't work for multi-parm`
|
||||
|
||||
// If this function was earlier detected as a duplicate, unduplicate it.
|
||||
SP_ROWAGG_FUNC_t funct = functionVecUm.back();
|
||||
if (funct->fAggFunction == ROWAGG_DUP_FUNCT)
|
||||
funct->fAggFunction = prevAggOp;
|
||||
|
||||
// Remove it from aggDupFuncMap if it's in there.
|
||||
funct->hasMultiParm = true;
|
||||
AGG_MAP::iterator it = aggDupFuncMap.find(boost::make_tuple(prevRetKey, prevAggOp, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL));
|
||||
if (it != aggDupFuncMap.end())
|
||||
{
|
||||
aggDupFuncMap.erase(it);
|
||||
}
|
||||
// Skip further UM porocessing of the multi-parm: Extra parms for an aggregate have no work on the UM
|
||||
++multiParms;
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Save the op for MULTI_PARM exclusion when COUNT(DISTINCT)
|
||||
prevAggOp = aggOp;
|
||||
prevRetKey = returnedColVec[i].first;
|
||||
}
|
||||
|
||||
if (aggOp == ROWAGG_UDAF)
|
||||
{
|
||||
@ -4763,8 +4803,6 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(
|
||||
funct->fAggFunction = ROWAGG_DUP_STATS;
|
||||
else if (funct->fAggFunction == ROWAGG_UDAF)
|
||||
funct->fAggFunction = ROWAGG_DUP_UDAF;
|
||||
else if (funct->fAggFunction == ROWAGG_COUNT_DISTINCT_COL_NAME) // Don't track dup for this one. Gets confused when multi-parm.
|
||||
{}
|
||||
else
|
||||
funct->fAggFunction = ROWAGG_DUP_FUNCT;
|
||||
|
||||
@ -4819,7 +4857,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(
|
||||
//distinct avg
|
||||
for (uint64_t i = 0; i < functionVecUm.size(); i++)
|
||||
{
|
||||
if (functionVecUm[i]->fAggFunction == ROWAGG_COUNT_DISTINCT_COL_NAME)
|
||||
if (functionVecUm[i]->fAggFunction == ROWAGG_COUNT_DISTINCT_COL_NAME && !functionVecUm[i]->hasMultiParm)
|
||||
{
|
||||
map<uint32_t, SP_ROWAGG_FUNC_t>::iterator k =
|
||||
avgDistFuncMap.find(keysAggDist[functionVecUm[i]->fOutputColumnIndex]);
|
||||
@ -5130,11 +5168,6 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(
|
||||
|
||||
for (uint64_t k = 0; k < returnedColVec.size(); k++)
|
||||
{
|
||||
if (functionIdMap(returnedColVec[k].second) == ROWAGG_MULTI_PARM)
|
||||
{
|
||||
++multiParms;
|
||||
continue;
|
||||
}
|
||||
// search non-distinct functions in functionVec
|
||||
vector<SP_ROWAGG_FUNC_t>::iterator it = functionVecUm.begin();
|
||||
|
||||
|
@ -29,10 +29,35 @@ idx count(distinct c1, c2) count(distinct c1, c3, char1)
|
||||
2 3 2
|
||||
3 2 2
|
||||
4 1 1
|
||||
select avg(distinct c2), count(c2), count( distinct c2, c3), count(c2),sum(distinct c2), avg(distinct c3), count(distinct c2, c3) from t1;
|
||||
avg(distinct c2) count(c2) count( distinct c2, c3) count(c2) sum(distinct c2) avg(distinct c3) count(distinct c2, c3)
|
||||
3.0000 12 12 12 9 3.0000 12
|
||||
select avg(distinct c2), count(c2), count( distinct c2, c3), count(c2),sum(distinct c2), avg(distinct c3), count(distinct c2, c3) from t1 group by c1;
|
||||
avg(distinct c2) count(c2) count( distinct c2, c3) count(c2) sum(distinct c2) avg(distinct c3) count(distinct c2, c3)
|
||||
3.0000 6 6 6 9 2.0000 6
|
||||
3.0000 6 6 6 9 3.6667 6
|
||||
select avg(distinct c2), count(c2), count( distinct c2, c3), count(c2),sum(distinct c2), avg(distinct c3), count(distinct c2) from t1 group by c1;
|
||||
avg(distinct c2) count(c2) count( distinct c2, c3) count(c2) sum(distinct c2) avg(distinct c3) count(distinct c2)
|
||||
3.0000 6 6 6 9 2.0000 3
|
||||
3.0000 6 6 6 9 3.6667 3
|
||||
select avg(distinct c2), count(c2), count( distinct c2), count(c2),sum(distinct c2), avg(distinct c3), count(distinct c2, c3) from t1 group by c1;
|
||||
avg(distinct c2) count(c2) count( distinct c2) count(c2) sum(distinct c2) avg(distinct c3) count(distinct c2, c3)
|
||||
3.0000 6 3 6 9 2.0000 6
|
||||
3.0000 6 3 6 9 3.6667 6
|
||||
select group_concat(distinct char1), avg(distinct c2), count(c2), count( distinct c2), count(c2),sum(distinct c2), avg(distinct c3), count(distinct c2, c3, c1) from t1;
|
||||
group_concat(distinct char1) avg(distinct c2) count(c2) count( distinct c2) count(c2) sum(distinct c2) avg(distinct c3) count(distinct c2, c3, c1)
|
||||
elsewhere this way comes,something this way comes 3.0000 12 3 12 9 3.0000 12
|
||||
select group_concat(distinct char1), avg(distinct c2), count(c2), count( distinct c2), count(c2),sum(distinct c2), avg(distinct c3), count(distinct c2, c3) from t1 group by c1;
|
||||
group_concat(distinct char1) avg(distinct c2) count(c2) count( distinct c2) count(c2) sum(distinct c2) avg(distinct c3) count(distinct c2, c3)
|
||||
elsewhere this way comes,something this way comes 3.0000 6 3 6 9 2.0000 6
|
||||
elsewhere this way comes,something this way comes 3.0000 6 3 6 9 3.6667 6
|
||||
select idx, sum(c3), count(distinct c1, c2), count(distinct c1, c3, char1), group_concat("ls_", char1) from t1 group by idx order by idx;
|
||||
idx sum(c3) count(distinct c1, c2) count(distinct c1, c3, char1) group_concat("ls_", char1)
|
||||
1 9 6 4 ls_something this way comes,ls_elsewhere this way comes,ls_something this way comes,ls_something this way comes,ls_elsewhere this way comes,ls_elsewhere this way comes
|
||||
2 9 3 2 ls_something this way comes,ls_elsewhere this way comes,ls_something this way comes
|
||||
3 8 2 2 ls_something this way comes,ls_elsewhere this way comes
|
||||
4 5 1 1 ls_elsewhere this way comes
|
||||
select group_concat(distinct char1), avg(distinct c2), count(c2), count( distinct c2), count(c2),sum(distinct c2), avg(distinct c3), count(distinct c2, c3, c1) from t1;
|
||||
group_concat(distinct char1) avg(distinct c2) count(c2) count( distinct c2) count(c2) sum(distinct c2) avg(distinct c3) count(distinct c2, c3, c1)
|
||||
elsewhere this way comes,something this way comes 3.0000 12 3 12 9 3.0000 12
|
||||
DROP DATABASE mcol_3738_db;
|
||||
|
@ -33,8 +33,16 @@ insert into t1 values (1, 2, 2, 1, "something this way comes"),
|
||||
|
||||
select count(distinct c1, c2), count(distinct char1) from t1;
|
||||
select idx, count(distinct c1, c2), count(distinct c1, c3, char1) from t1 group by idx order by idx;
|
||||
select avg(distinct c2), count(c2), count( distinct c2, c3), count(c2),sum(distinct c2), avg(distinct c3), count(distinct c2, c3) from t1;
|
||||
select avg(distinct c2), count(c2), count( distinct c2, c3), count(c2),sum(distinct c2), avg(distinct c3), count(distinct c2, c3) from t1 group by c1;
|
||||
select avg(distinct c2), count(c2), count( distinct c2, c3), count(c2),sum(distinct c2), avg(distinct c3), count(distinct c2) from t1 group by c1;
|
||||
select avg(distinct c2), count(c2), count( distinct c2), count(c2),sum(distinct c2), avg(distinct c3), count(distinct c2, c3) from t1 group by c1;
|
||||
select group_concat(distinct char1), avg(distinct c2), count(c2), count( distinct c2), count(c2),sum(distinct c2), avg(distinct c3), count(distinct c2, c3, c1) from t1;
|
||||
|
||||
# group_concat causes the aggregation to be performed on UM only.
|
||||
select group_concat(distinct char1), avg(distinct c2), count(c2), count( distinct c2), count(c2),sum(distinct c2), avg(distinct c3), count(distinct c2, c3) from t1 group by c1;
|
||||
select idx, sum(c3), count(distinct c1, c2), count(distinct c1, c3, char1), group_concat("ls_", char1) from t1 group by idx order by idx;
|
||||
select group_concat(distinct char1), avg(distinct c2), count(c2), count( distinct c2), count(c2),sum(distinct c2), avg(distinct c3), count(distinct c2, c3, c1) from t1;
|
||||
|
||||
# Clean UP
|
||||
DROP DATABASE mcol_3738_db;
|
||||
|
@ -176,7 +176,7 @@ struct RowAggFunctionCol
|
||||
RowAggFunctionCol(RowAggFunctionType aggFunction, RowAggFunctionType stats,
|
||||
int32_t inputColIndex, int32_t outputColIndex, int32_t auxColIndex = -1) :
|
||||
fAggFunction(aggFunction), fStatsFunction(stats), fInputColumnIndex(inputColIndex),
|
||||
fOutputColumnIndex(outputColIndex), fAuxColumnIndex(auxColIndex) {}
|
||||
fOutputColumnIndex(outputColIndex), fAuxColumnIndex(auxColIndex), hasMultiParm(false) {}
|
||||
virtual ~RowAggFunctionCol() = default;
|
||||
|
||||
virtual void serialize(messageqcpp::ByteStream& bs) const;
|
||||
@ -203,6 +203,8 @@ struct RowAggFunctionCol
|
||||
// with fAggFunction == ROWAGG_MULTI_PARM. Order is important.
|
||||
// If this parameter is constant, that value is here.
|
||||
execplan::SRCP fpConstCol;
|
||||
|
||||
bool hasMultiParm;
|
||||
};
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user