From 76607be63a1c1d614bd5e731761d440dd226ac83 Mon Sep 17 00:00:00 2001 From: David Hall Date: Fri, 9 Jul 2021 09:07:03 -0500 Subject: [PATCH] MCOL-3738 COUNT(DISTINCT) with multiple parms Fixed regression Added a few more mtr tests --- dbcon/joblist/tupleaggregatestep.cpp | 69 ++++++++++++++----- .../mcol3738_count_distinct_multiparm.result | 25 +++++++ .../t/mcol3738_count_distinct_multiparm.test | 8 +++ utils/rowgroup/rowaggregation.h | 4 +- 4 files changed, 87 insertions(+), 19 deletions(-) diff --git a/dbcon/joblist/tupleaggregatestep.cpp b/dbcon/joblist/tupleaggregatestep.cpp index d8c17fa07..e83256d31 100644 --- a/dbcon/joblist/tupleaggregatestep.cpp +++ b/dbcon/joblist/tupleaggregatestep.cpp @@ -2170,6 +2170,9 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( // locate the return column position in aggregated rowgroup uint64_t outIdx = 0; + RowAggFunctionType prevAggOp = ROWAGG_FUNCT_UNDEFINE; + uint32_t prevRetKey = 0; + for (uint64_t i = 0; i < returnedColVec.size(); i++) { udafc = NULL; @@ -2181,10 +2184,31 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( if (aggOp == ROWAGG_MULTI_PARM) { - // Skip on final agg.: Extra parms for an aggregate have no work there. + // Duplicate detection doesn't work for multi-parm` + + // If this function was earlier detected as a duplicate, unduplicate it. + SP_ROWAGG_FUNC_t funct = functionVec2.back(); + if (funct->fAggFunction == ROWAGG_DUP_FUNCT) + funct->fAggFunction = prevAggOp; + + // Remove it from aggDupFuncMap if it's in there. + funct->hasMultiParm = true; + AGG_MAP::iterator it = aggDupFuncMap.find(boost::make_tuple(prevRetKey, prevAggOp, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL)); + if (it != aggDupFuncMap.end()) + { + aggDupFuncMap.erase(it); + } + + // Skip on final agg.: Extra parms for an aggregate have no work there. ++multiParms; continue; } + else + { + // Save the op for MULTI_PARM exclusion when COUNT(DISTINCT) + prevAggOp = aggOp; + prevRetKey = returnedColVec[i].first; + } if (find(jobInfo.distinctColVec.begin(), jobInfo.distinctColVec.end(), retKey) != jobInfo.distinctColVec.end() ) @@ -2531,8 +2555,6 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( funct->fAggFunction = ROWAGG_DUP_STATS; else if (funct->fAggFunction == ROWAGG_UDAF) funct->fAggFunction = ROWAGG_DUP_UDAF; - else if (funct->fAggFunction == ROWAGG_COUNT_DISTINCT_COL_NAME) // Don't track dup for this one. Gets confused when multi-parm. - {} else funct->fAggFunction = ROWAGG_DUP_FUNCT; @@ -2587,7 +2609,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( // now fix the AVG distinct function, locate the count(distinct column) position for (uint64_t i = 0; i < functionVec2.size(); i++) { - if (functionVec2[i]->fAggFunction == ROWAGG_COUNT_DISTINCT_COL_NAME) + if (functionVec2[i]->fAggFunction == ROWAGG_COUNT_DISTINCT_COL_NAME && !functionVec2[i]->hasMultiParm) { // if the count(distinct k) can be associated with an avg(distinct k) map::iterator k = @@ -2907,11 +2929,6 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( for (uint64_t k = 0; k < returnedColVec.size(); k++) { - if (functionIdMap(returnedColVec[k].second) == ROWAGG_MULTI_PARM) - { - ++multiParms; - continue; - } // search non-distinct functions in functionVec vector::iterator it = functionVec2.begin(); @@ -4460,6 +4477,9 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // locate the return column position in aggregated rowgroup from PM // outIdx is i without the multi-columns, uint64_t outIdx = 0; + RowAggFunctionType prevAggOp = ROWAGG_FUNCT_UNDEFINE; + uint32_t prevRetKey = 0; + for (uint64_t i = 0; i < returnedColVec.size(); i++) { pUDAFFunc = NULL; @@ -4472,10 +4492,30 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( if (aggOp == ROWAGG_MULTI_PARM) { - // Skip on UM: Extra parms for an aggregate have no work on the UM + // Duplicate detection doesn't work for multi-parm` + + // If this function was earlier detected as a duplicate, unduplicate it. + SP_ROWAGG_FUNC_t funct = functionVecUm.back(); + if (funct->fAggFunction == ROWAGG_DUP_FUNCT) + funct->fAggFunction = prevAggOp; + + // Remove it from aggDupFuncMap if it's in there. + funct->hasMultiParm = true; + AGG_MAP::iterator it = aggDupFuncMap.find(boost::make_tuple(prevRetKey, prevAggOp, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL)); + if (it != aggDupFuncMap.end()) + { + aggDupFuncMap.erase(it); + } + // Skip further UM porocessing of the multi-parm: Extra parms for an aggregate have no work on the UM ++multiParms; continue; } + else + { + // Save the op for MULTI_PARM exclusion when COUNT(DISTINCT) + prevAggOp = aggOp; + prevRetKey = returnedColVec[i].first; + } if (aggOp == ROWAGG_UDAF) { @@ -4754,8 +4794,6 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( funct->fAggFunction = ROWAGG_DUP_STATS; else if (funct->fAggFunction == ROWAGG_UDAF) funct->fAggFunction = ROWAGG_DUP_UDAF; - else if (funct->fAggFunction == ROWAGG_COUNT_DISTINCT_COL_NAME) // Don't track dup for this one. Gets confused when multi-parm. - {} else funct->fAggFunction = ROWAGG_DUP_FUNCT; @@ -4810,7 +4848,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( //distinct avg for (uint64_t i = 0; i < functionVecUm.size(); i++) { - if (functionVecUm[i]->fAggFunction == ROWAGG_COUNT_DISTINCT_COL_NAME) + if (functionVecUm[i]->fAggFunction == ROWAGG_COUNT_DISTINCT_COL_NAME && !functionVecUm[i]->hasMultiParm) { map::iterator k = avgDistFuncMap.find(keysAggDist[functionVecUm[i]->fOutputColumnIndex]); @@ -5121,11 +5159,6 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( for (uint64_t k = 0; k < returnedColVec.size(); k++) { - if (functionIdMap(returnedColVec[k].second) == ROWAGG_MULTI_PARM) - { - ++multiParms; - continue; - } // search non-distinct functions in functionVec vector::iterator it = functionVecUm.begin(); diff --git a/mysql-test/columnstore/basic/r/mcol3738_count_distinct_multiparm.result b/mysql-test/columnstore/basic/r/mcol3738_count_distinct_multiparm.result index bbfc2c975..2c7174513 100644 --- a/mysql-test/columnstore/basic/r/mcol3738_count_distinct_multiparm.result +++ b/mysql-test/columnstore/basic/r/mcol3738_count_distinct_multiparm.result @@ -29,10 +29,35 @@ idx count(distinct c1, c2) count(distinct c1, c3, char1) 2 3 2 3 2 2 4 1 1 +select avg(distinct c2), count(c2), count( distinct c2, c3), count(c2),sum(distinct c2), avg(distinct c3), count(distinct c2, c3) from t1; +avg(distinct c2) count(c2) count( distinct c2, c3) count(c2) sum(distinct c2) avg(distinct c3) count(distinct c2, c3) +3.0000 12 12 12 9 3.0000 12 +select avg(distinct c2), count(c2), count( distinct c2, c3), count(c2),sum(distinct c2), avg(distinct c3), count(distinct c2, c3) from t1 group by c1; +avg(distinct c2) count(c2) count( distinct c2, c3) count(c2) sum(distinct c2) avg(distinct c3) count(distinct c2, c3) +3.0000 6 6 6 9 2.0000 6 +3.0000 6 6 6 9 3.6667 6 +select avg(distinct c2), count(c2), count( distinct c2, c3), count(c2),sum(distinct c2), avg(distinct c3), count(distinct c2) from t1 group by c1; +avg(distinct c2) count(c2) count( distinct c2, c3) count(c2) sum(distinct c2) avg(distinct c3) count(distinct c2) +3.0000 6 6 6 9 2.0000 3 +3.0000 6 6 6 9 3.6667 3 +select avg(distinct c2), count(c2), count( distinct c2), count(c2),sum(distinct c2), avg(distinct c3), count(distinct c2, c3) from t1 group by c1; +avg(distinct c2) count(c2) count( distinct c2) count(c2) sum(distinct c2) avg(distinct c3) count(distinct c2, c3) +3.0000 6 3 6 9 2.0000 6 +3.0000 6 3 6 9 3.6667 6 +select group_concat(distinct char1), avg(distinct c2), count(c2), count( distinct c2), count(c2),sum(distinct c2), avg(distinct c3), count(distinct c2, c3, c1) from t1; +group_concat(distinct char1) avg(distinct c2) count(c2) count( distinct c2) count(c2) sum(distinct c2) avg(distinct c3) count(distinct c2, c3, c1) +elsewhere this way comes,something this way comes 3.0000 12 3 12 9 3.0000 12 +select group_concat(distinct char1), avg(distinct c2), count(c2), count( distinct c2), count(c2),sum(distinct c2), avg(distinct c3), count(distinct c2, c3) from t1 group by c1; +group_concat(distinct char1) avg(distinct c2) count(c2) count( distinct c2) count(c2) sum(distinct c2) avg(distinct c3) count(distinct c2, c3) +elsewhere this way comes,something this way comes 3.0000 6 3 6 9 2.0000 6 +elsewhere this way comes,something this way comes 3.0000 6 3 6 9 3.6667 6 select idx, sum(c3), count(distinct c1, c2), count(distinct c1, c3, char1), group_concat("ls_", char1) from t1 group by idx order by idx; idx sum(c3) count(distinct c1, c2) count(distinct c1, c3, char1) group_concat("ls_", char1) 1 9 6 4 ls_something this way comes,ls_elsewhere this way comes,ls_something this way comes,ls_something this way comes,ls_elsewhere this way comes,ls_elsewhere this way comes 2 9 3 2 ls_something this way comes,ls_elsewhere this way comes,ls_something this way comes 3 8 2 2 ls_something this way comes,ls_elsewhere this way comes 4 5 1 1 ls_elsewhere this way comes +select group_concat(distinct char1), avg(distinct c2), count(c2), count( distinct c2), count(c2),sum(distinct c2), avg(distinct c3), count(distinct c2, c3, c1) from t1; +group_concat(distinct char1) avg(distinct c2) count(c2) count( distinct c2) count(c2) sum(distinct c2) avg(distinct c3) count(distinct c2, c3, c1) +elsewhere this way comes,something this way comes 3.0000 12 3 12 9 3.0000 12 DROP DATABASE mcol_3738_db; diff --git a/mysql-test/columnstore/basic/t/mcol3738_count_distinct_multiparm.test b/mysql-test/columnstore/basic/t/mcol3738_count_distinct_multiparm.test index ae58ffa8f..dca6f57db 100644 --- a/mysql-test/columnstore/basic/t/mcol3738_count_distinct_multiparm.test +++ b/mysql-test/columnstore/basic/t/mcol3738_count_distinct_multiparm.test @@ -33,8 +33,16 @@ insert into t1 values (1, 2, 2, 1, "something this way comes"), select count(distinct c1, c2), count(distinct char1) from t1; select idx, count(distinct c1, c2), count(distinct c1, c3, char1) from t1 group by idx order by idx; +select avg(distinct c2), count(c2), count( distinct c2, c3), count(c2),sum(distinct c2), avg(distinct c3), count(distinct c2, c3) from t1; +select avg(distinct c2), count(c2), count( distinct c2, c3), count(c2),sum(distinct c2), avg(distinct c3), count(distinct c2, c3) from t1 group by c1; +select avg(distinct c2), count(c2), count( distinct c2, c3), count(c2),sum(distinct c2), avg(distinct c3), count(distinct c2) from t1 group by c1; +select avg(distinct c2), count(c2), count( distinct c2), count(c2),sum(distinct c2), avg(distinct c3), count(distinct c2, c3) from t1 group by c1; +select group_concat(distinct char1), avg(distinct c2), count(c2), count( distinct c2), count(c2),sum(distinct c2), avg(distinct c3), count(distinct c2, c3, c1) from t1; + # group_concat causes the aggregation to be performed on UM only. +select group_concat(distinct char1), avg(distinct c2), count(c2), count( distinct c2), count(c2),sum(distinct c2), avg(distinct c3), count(distinct c2, c3) from t1 group by c1; select idx, sum(c3), count(distinct c1, c2), count(distinct c1, c3, char1), group_concat("ls_", char1) from t1 group by idx order by idx; +select group_concat(distinct char1), avg(distinct c2), count(c2), count( distinct c2), count(c2),sum(distinct c2), avg(distinct c3), count(distinct c2, c3, c1) from t1; # Clean UP DROP DATABASE mcol_3738_db; diff --git a/utils/rowgroup/rowaggregation.h b/utils/rowgroup/rowaggregation.h index 9537460e2..be785fcff 100644 --- a/utils/rowgroup/rowaggregation.h +++ b/utils/rowgroup/rowaggregation.h @@ -176,7 +176,7 @@ struct RowAggFunctionCol RowAggFunctionCol(RowAggFunctionType aggFunction, RowAggFunctionType stats, int32_t inputColIndex, int32_t outputColIndex, int32_t auxColIndex = -1) : fAggFunction(aggFunction), fStatsFunction(stats), fInputColumnIndex(inputColIndex), - fOutputColumnIndex(outputColIndex), fAuxColumnIndex(auxColIndex) {} + fOutputColumnIndex(outputColIndex), fAuxColumnIndex(auxColIndex), hasMultiParm(false) {} virtual ~RowAggFunctionCol() = default; virtual void serialize(messageqcpp::ByteStream& bs) const; @@ -203,6 +203,8 @@ struct RowAggFunctionCol // with fAggFunction == ROWAGG_MULTI_PARM. Order is important. // If this parameter is constant, that value is here. execplan::SRCP fpConstCol; + + bool hasMultiParm; };