From 132146b9c8a2bf765045b97c28fa5c52f300854f Mon Sep 17 00:00:00 2001 From: "David.Hall" Date: Mon, 28 Jun 2021 12:14:44 -0500 Subject: [PATCH] Mcol 3738 Allow COUNT(DISTINCT to have multiple parms) (#2002) * MCOL-3738 allow COUNT(DISTINCT) multiple parameters Changes in the way tupleaggregatestep sets up the aggregate arrays. * MCOL-3738 mtr test --- dbcon/joblist/tupleaggregatestep.cpp | 328 ++++++++++++------ dbcon/mysql/ha_mcs_execplan.cpp | 9 +- .../mcol3738_count_distinct_multiparm.result | 38 ++ .../t/mcol3738_count_distinct_multiparm.test | 40 +++ 4 files changed, 310 insertions(+), 105 deletions(-) create mode 100644 mysql-test/columnstore/basic/r/mcol3738_count_distinct_multiparm.result create mode 100644 mysql-test/columnstore/basic/t/mcol3738_count_distinct_multiparm.test diff --git a/dbcon/joblist/tupleaggregatestep.cpp b/dbcon/joblist/tupleaggregatestep.cpp index eefcab950..2cd020cfc 100644 --- a/dbcon/joblist/tupleaggregatestep.cpp +++ b/dbcon/joblist/tupleaggregatestep.cpp @@ -1782,13 +1782,19 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( } // vectors for aggregate functions + RowAggFunctionType aggOp = ROWAGG_FUNCT_UNDEFINE; + RowAggFunctionType prevAggOp = ROWAGG_FUNCT_UNDEFINE; for (uint64_t i = 0; i < aggColVec.size(); i++) { pUDAFFunc = NULL; uint32_t aggKey = aggColVec[i].first; - RowAggFunctionType aggOp = functionIdMap(aggColVec[i].second); + aggOp = functionIdMap(aggColVec[i].second); RowAggFunctionType stats = statsFuncIdMap(aggColVec[i].second); + // Save the op for MULTI_PARM exclusion when COUNT(DISTINCT) + if (aggOp != ROWAGG_MULTI_PARM) + prevAggOp = aggOp; + // skip if this is a constant if (aggOp == ROWAGG_CONSTANT) continue; @@ -1829,10 +1835,13 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( throw logic_error(emsg.str()); } + // We skip distinct aggs, including extra parms. These are handled by adding them to group by list above. if (aggOp == ROWAGG_DISTINCT_SUM || aggOp == ROWAGG_DISTINCT_AVG || aggOp == ROWAGG_COUNT_DISTINCT_COL_NAME) continue; + if (aggOp == ROWAGG_MULTI_PARM && prevAggOp == ROWAGG_COUNT_DISTINCT_COL_NAME) + continue; uint64_t colProj = projColPosMap[aggKey]; @@ -2103,7 +2112,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; } } - else + else if (prevAggOp != ROWAGG_COUNT_DISTINCT_COL_NAME) { throw QueryDataExcept("prep1PhaseDistinctAggregate: UDAF multi function with no parms", aggregateFuncErr); } @@ -2522,6 +2531,8 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( funct->fAggFunction = ROWAGG_DUP_STATS; else if (funct->fAggFunction == ROWAGG_UDAF) funct->fAggFunction = ROWAGG_DUP_UDAF; + else if (funct->fAggFunction == ROWAGG_COUNT_DISTINCT_COL_NAME) // Don't track dup for this one. Gets confused when multi-parm. + {} else funct->fAggFunction = ROWAGG_DUP_FUNCT; @@ -2724,13 +2735,36 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( // for distinct, each column requires seperate rowgroup vector rowAggSubDistVec; - for (uint64_t i = 0; i < jobInfo.distinctColVec.size(); i++) + uint32_t distinctColKey; + int64_t j; + uint64_t k; + uint64_t outIdx = 0; + for (uint64_t i = 0; i < returnedColVec.size(); i++) { - uint32_t distinctColKey = jobInfo.distinctColVec[i]; - uint64_t j = -1; + if (returnedColVec[i].second == 0) + { + ++outIdx; + continue; + } + j = -1; + + distinctColKey = -1; + // Find the entry in distinctColVec, if any + for (k = 0; k < jobInfo.distinctColVec.size(); k++) + { + distinctColKey = jobInfo.distinctColVec[k]; + if (returnedColVec[i].first == distinctColKey) + break; + + } + if (distinctColKey == (uint32_t)-1) + { + ++outIdx; + continue; + } // locate the distinct key in the row group - for (uint64_t k = 0; k < keysAgg.size(); k++) + for (k = 0; k < keysAgg.size(); k++) { if (keysProj[k] == distinctColKey) { @@ -2739,7 +2773,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( } } - idbassert(j != (uint64_t) - 1); + idbassert(j != -1); oidsAggSub = oidsAggGb; keysAggSub = keysAggGb; @@ -2757,20 +2791,9 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( csNumAggSub.push_back(csNumProj[j]); widthAggSub.push_back(widthProj[j]); - // construct sub-rowgroup - posAggSub.clear(); - posAggSub.push_back(2); // rid - - for (uint64_t k = 0; k < oidsAggSub.size(); k++) - posAggSub.push_back(posAggSub[k] + widthAggSub[k]); - - RowGroup subRg(oidsAggSub.size(), posAggSub, oidsAggSub, keysAggSub, typeAggSub, - csNumAggSub, scaleAggSub, precisionAggSub, jobInfo.stringTableThreshold); - subRgVec.push_back(subRg); - // construct groupby vector vector groupBySub; - uint64_t k = 0; + k = 0; while (k < jobInfo.groupByColVec.size()) { @@ -2778,11 +2801,60 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( groupBySub.push_back(groupby); k++; } - // add the distinct column as groupby SP_ROWAGG_GRPBY_t groupby(new RowAggGroupByCol(j, k)); groupBySub.push_back(groupby); + // Add multi parm distinct + while ((i+1) < returnedColVec.size() && functionIdMap(returnedColVec[i+1].second) == ROWAGG_MULTI_PARM) + { + ++i; + uint32_t dColKey = -1; + j = -1; + + // Find the entry in distinctColVec, if any + for (k = 0; k < jobInfo.distinctColVec.size(); k++) + { + dColKey = jobInfo.distinctColVec[k]; + if (returnedColVec[i].first == dColKey) + break; + + } + idbassert(dColKey != (uint32_t)-1); + // locate the distinct key in the row group + for (k = 0; k < keysAgg.size(); k++) + { + if (keysProj[k] == dColKey) + { + j = k; + break; + } + } + idbassert(j != -1); + + oidsAggSub.push_back(oidsProj[j]); + keysAggSub.push_back(keysProj[j]); + scaleAggSub.push_back(scaleProj[j]); + precisionAggSub.push_back(precisionProj[j]); + typeAggSub.push_back(typeProj[j]); + csNumAggSub.push_back(csNumProj[j]); + widthAggSub.push_back(widthProj[j]); + + SP_ROWAGG_GRPBY_t groupby(new RowAggGroupByCol(j, k)); + groupBySub.push_back(groupby); + } + + // construct sub-rowgroup + posAggSub.clear(); + posAggSub.push_back(2); // rid + + for ( k = 0; k < oidsAggSub.size(); k++) + posAggSub.push_back(posAggSub[k] + widthAggSub[k]); + + RowGroup subRg(oidsAggSub.size(), posAggSub, oidsAggSub, keysAggSub, typeAggSub, + csNumAggSub, scaleAggSub, precisionAggSub, jobInfo.stringTableThreshold); + subRgVec.push_back(subRg); + // Keep a count of the parms after the first for any aggregate. // These will be skipped and the count needs to be subtracted // from where the aux column will be. @@ -2792,37 +2864,26 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( // -- dummy function vector for sub-aggregator, which does distinct only // -- aggregate function on this distinct column for rowAggDist vector functionSub1, functionSub2; + // search the function in functionVec + vector::iterator it = functionVec2.begin(); - for (uint64_t k = 0; k < returnedColVec.size(); k++) + while (it != functionVec2.end()) { - if (functionIdMap(returnedColVec[i].second) == ROWAGG_MULTI_PARM) + SP_ROWAGG_FUNC_t f = *it++; + + if ((f->fOutputColumnIndex == outIdx) && + (f->fAggFunction == ROWAGG_COUNT_DISTINCT_COL_NAME || + f->fAggFunction == ROWAGG_DISTINCT_SUM || + f->fAggFunction == ROWAGG_DISTINCT_AVG)) { - ++multiParms; - continue; - } - if (returnedColVec[k].first != distinctColKey) - continue; - - // search the function in functionVec - vector::iterator it = functionVec2.begin(); - - while (it != functionVec2.end()) - { - SP_ROWAGG_FUNC_t f = *it++; - - if ((f->fOutputColumnIndex == k) && - (f->fAggFunction == ROWAGG_COUNT_DISTINCT_COL_NAME || - f->fAggFunction == ROWAGG_DISTINCT_SUM || - f->fAggFunction == ROWAGG_DISTINCT_AVG)) - { - SP_ROWAGG_FUNC_t funct(new RowAggFunctionCol( - f->fAggFunction, - f->fStatsFunction, - groupBySub.size() - 1, - f->fOutputColumnIndex, - f->fAuxColumnIndex-multiParms)); - functionSub2.push_back(funct); - } + SP_ROWAGG_FUNC_t funct( + new RowAggFunctionCol( + f->fAggFunction, + f->fStatsFunction, + groupBySub.size() - 1, + f->fOutputColumnIndex, + f->fAuxColumnIndex-multiParms)); + functionSub2.push_back(funct); } } @@ -2834,6 +2895,8 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate( // add to rowAggDist multiDistinctAggregator->addSubAggregator(subAgg, subRg, functionSub2); + + ++outIdx; } // cover any non-distinct column functions @@ -3968,11 +4031,17 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( } // vectors for aggregate functions + RowAggFunctionType aggOp = ROWAGG_FUNCT_UNDEFINE; + RowAggFunctionType prevAggOp = ROWAGG_FUNCT_UNDEFINE; for (uint64_t i = 0; i < aggColVec.size(); i++) { - // skip on PM if this is a constant - RowAggFunctionType aggOp = functionIdMap(aggColVec[i].second); + aggOp = functionIdMap(aggColVec[i].second); + // Save the op for MULTI_PARM exclusion when COUNT(DISTINCT) + if (aggOp != ROWAGG_MULTI_PARM) + prevAggOp = aggOp; + + // skip on PM if this is a constant if (aggOp == ROWAGG_CONSTANT) continue; @@ -3994,17 +4063,21 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( throw logic_error(emsg.str()); } - RowAggFunctionType stats = statsFuncIdMap(aggColVec[i].second); + RowAggFunctionType stats = statsFuncIdMap(aggOp); // skip sum / count(column) if avg is also selected if ((aggOp == ROWAGG_SUM || aggOp == ROWAGG_COUNT_COL_NAME) && (avgSet.find(aggKey) != avgSet.end())) continue; + // We skip distinct aggs, including extra parms. These are handled by adding them to group by list above. if (aggOp == ROWAGG_DISTINCT_SUM || aggOp == ROWAGG_DISTINCT_AVG || aggOp == ROWAGG_COUNT_DISTINCT_COL_NAME) continue; + if (aggOp == ROWAGG_MULTI_PARM && prevAggOp == ROWAGG_COUNT_DISTINCT_COL_NAME) + continue; + uint64_t colProj = projColPosMap[aggKey]; SP_ROWAGG_FUNC_t funct; @@ -4273,7 +4346,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( funct->fpConstCol = udafc->aggParms()[udafcParamIdx]; } } - else + else if (prevAggOp != ROWAGG_COUNT_DISTINCT_COL_NAME) { throw QueryDataExcept("prep2PhasesDistinctAggregate: UDAF multi function with no parms", aggregateFuncErr); } @@ -4682,6 +4755,8 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( funct->fAggFunction = ROWAGG_DUP_STATS; else if (funct->fAggFunction == ROWAGG_UDAF) funct->fAggFunction = ROWAGG_DUP_UDAF; + else if (funct->fAggFunction == ROWAGG_COUNT_DISTINCT_COL_NAME) // Don't track dup for this one. Gets confused when multi-parm. + {} else funct->fAggFunction = ROWAGG_DUP_FUNCT; @@ -4874,16 +4949,39 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( widthAggGb.push_back(widthAggUm[i]); } - // for distinct, each column requires seperate rowgroup + // for distinct, each column requires a seperate rowgroup vector rowAggSubDistVec; - for (uint64_t i = 0; i < jobInfo.distinctColVec.size(); i++) + uint32_t distinctColKey; + int64_t j; + uint64_t k; + uint64_t outIdx = 0; + for (uint64_t i = 0; i < returnedColVec.size(); i++) { - uint32_t distinctColKey = jobInfo.distinctColVec[i]; - uint64_t j = -1; + if (returnedColVec[i].second == 0) + { + ++outIdx; + continue; + } + j = -1; + + distinctColKey = -1; + // Find the entry in distinctColVec, if any + for (k = 0; k < jobInfo.distinctColVec.size(); k++) + { + distinctColKey = jobInfo.distinctColVec[k]; + if (returnedColVec[i].first == distinctColKey) + break; + + } + if (distinctColKey == (uint32_t)-1) + { + ++outIdx; + continue; + } // locate the distinct key in the row group - for (uint64_t k = 0; k < keysAggUm.size(); k++) + for (k = 0; k < keysAggUm.size(); k++) { if (keysAggUm[k] == distinctColKey) { @@ -4892,7 +4990,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( } } - idbassert(j != (uint64_t) - 1); + idbassert(j != -1); oidsAggSub = oidsAggGb; keysAggSub = keysAggGb; @@ -4907,23 +5005,12 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( scaleAggSub.push_back(scaleAggUm[j]); precisionAggSub.push_back(precisionAggUm[j]); typeAggSub.push_back(typeAggUm[j]); - csNumAggSub.push_back(csNumAggUm[i]); + csNumAggSub.push_back(csNumAggUm[j]); widthAggSub.push_back(widthAggUm[j]); - // construct sub-rowgroup - posAggSub.clear(); - posAggSub.push_back(2); // rid - - for (uint64_t k = 0; k < oidsAggSub.size(); k++) - posAggSub.push_back(posAggSub[k] + widthAggSub[k]); - - RowGroup subRg(oidsAggSub.size(), posAggSub, oidsAggSub, keysAggSub, typeAggSub, - csNumAggSub, scaleAggSub, precisionAggSub, jobInfo.stringTableThreshold); - subRgVec.push_back(subRg); - // construct groupby vector vector groupBySub; - uint64_t k = 0; + k = 0; while (k < jobInfo.groupByColVec.size()) { @@ -4931,11 +5018,60 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( groupBySub.push_back(groupby); k++; } - // add the distinct column as groupby SP_ROWAGG_GRPBY_t groupby(new RowAggGroupByCol(j, k)); groupBySub.push_back(groupby); + // Add multi parm distinct + while ((i+1) < returnedColVec.size() && functionIdMap(returnedColVec[i+1].second) == ROWAGG_MULTI_PARM) + { + ++i; + uint32_t dColKey = -1; + j = -1; + + // Find the entry in distinctColVec, if any + for (k = 0; k < jobInfo.distinctColVec.size(); k++) + { + dColKey = jobInfo.distinctColVec[k]; + if (returnedColVec[i].first == dColKey) + break; + + } + idbassert(dColKey != (uint32_t)-1); + // locate the distinct key in the row group + for (k = 0; k < keysAggUm.size(); k++) + { + if (keysAggUm[k] == dColKey) + { + j = k; + break; + } + } + idbassert(j != -1); + + oidsAggSub.push_back(oidsAggUm[j]); + keysAggSub.push_back(keysAggUm[j]); + scaleAggSub.push_back(scaleAggUm[j]); + precisionAggSub.push_back(precisionAggUm[j]); + typeAggSub.push_back(typeAggUm[j]); + csNumAggSub.push_back(csNumAggUm[j]); + widthAggSub.push_back(widthAggUm[j]); + + SP_ROWAGG_GRPBY_t groupby(new RowAggGroupByCol(j, k)); + groupBySub.push_back(groupby); + } + + // construct sub-rowgroup + posAggSub.clear(); + posAggSub.push_back(2); // rid + + for ( k = 0; k < oidsAggSub.size(); k++) + posAggSub.push_back(posAggSub[k] + widthAggSub[k]); + + RowGroup subRg(oidsAggSub.size(), posAggSub, oidsAggSub, keysAggSub, typeAggSub, + csNumAggSub, scaleAggSub, precisionAggSub, jobInfo.stringTableThreshold); + subRgVec.push_back(subRg); + // Keep a count of the parms after the first for any aggregate. // These will be skipped and the count needs to be subtracted // from where the aux column will be. @@ -4945,38 +5081,26 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // -- dummy function vector for sub-aggregator, which does distinct only // -- aggregate function on this distinct column for rowAggDist vector functionSub1, functionSub2; + // search the function in functionVec + vector::iterator it = functionVecUm.begin(); - for (uint64_t k = 0; k < returnedColVec.size(); k++) + while (it != functionVecUm.end()) { - if (functionIdMap(returnedColVec[i].second) == ROWAGG_MULTI_PARM) + SP_ROWAGG_FUNC_t f = *it++; + + if ((f->fOutputColumnIndex == outIdx) && + (f->fAggFunction == ROWAGG_COUNT_DISTINCT_COL_NAME || + f->fAggFunction == ROWAGG_DISTINCT_SUM || + f->fAggFunction == ROWAGG_DISTINCT_AVG)) { - ++multiParms; - continue; - } - if (returnedColVec[k].first != distinctColKey) - continue; - - // search the function in functionVec - vector::iterator it = functionVecUm.begin(); - - while (it != functionVecUm.end()) - { - SP_ROWAGG_FUNC_t f = *it++; - - if ((f->fOutputColumnIndex == k) && - (f->fAggFunction == ROWAGG_COUNT_DISTINCT_COL_NAME || - f->fAggFunction == ROWAGG_DISTINCT_SUM || - f->fAggFunction == ROWAGG_DISTINCT_AVG)) - { - SP_ROWAGG_FUNC_t funct( - new RowAggFunctionCol( - f->fAggFunction, - f->fStatsFunction, - groupBySub.size() - 1, - f->fOutputColumnIndex, - f->fAuxColumnIndex-multiParms)); - functionSub2.push_back(funct); - } + SP_ROWAGG_FUNC_t funct( + new RowAggFunctionCol( + f->fAggFunction, + f->fStatsFunction, + groupBySub.size() - 1, + f->fOutputColumnIndex, + f->fAuxColumnIndex-multiParms)); + functionSub2.push_back(funct); } } @@ -4986,6 +5110,8 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate( // add to rowAggDist multiDistinctAggregator->addSubAggregator(subAgg, subRg, functionSub2); + + ++outIdx; } // cover any non-distinct column functions diff --git a/dbcon/mysql/ha_mcs_execplan.cpp b/dbcon/mysql/ha_mcs_execplan.cpp index 9cd882762..e4746ebf8 100755 --- a/dbcon/mysql/ha_mcs_execplan.cpp +++ b/dbcon/mysql/ha_mcs_execplan.cpp @@ -4737,10 +4737,11 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) gwi.aggOnSelect = true; // Argument_count() is the # of formal parms to the agg fcn. Columnstore - // only supports 1 argument except UDAnF and GROUP_CONCAT - // TODO: Support more than one parm for COUNT(DISTINCT) - if (isp->argument_count() != 1 && isp->sum_func() != Item_sum::GROUP_CONCAT_FUNC - && isp->sum_func() != Item_sum::UDF_SUM_FUNC) + // only supports 1 argument except UDAnF, COUNT(DISTINC) and GROUP_CONCAT + if (isp->argument_count() != 1 + && isp->sum_func() != Item_sum::COUNT_DISTINCT_FUNC + && isp->sum_func() != Item_sum::GROUP_CONCAT_FUNC + && isp->sum_func() != Item_sum::UDF_SUM_FUNC) { gwi.fatalParseError = true; gwi.parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_MUL_ARG_AGG); diff --git a/mysql-test/columnstore/basic/r/mcol3738_count_distinct_multiparm.result b/mysql-test/columnstore/basic/r/mcol3738_count_distinct_multiparm.result new file mode 100644 index 000000000..bbfc2c975 --- /dev/null +++ b/mysql-test/columnstore/basic/r/mcol3738_count_distinct_multiparm.result @@ -0,0 +1,38 @@ +DROP DATABASE IF EXISTS mcol_3738_db; +CREATE DATABASE mcol_3738_db; +USE mcol_3738_db; +CREATE TABLE `t1` ( +idx int(11), +c1 int(11), +c2 int(11), +c3 int(11), +char1 varchar(28) +) ENGINE=Columnstore DEFAULT CHARSET=utf8mb4; +insert into t1 values (1, 2, 2, 1, "something this way comes"), +(1, 2, 3, 1, "elsewhere this way comes"), +(1, 2, 4, 1, "something this way comes"), +(1, 3, 2, 2, "something this way comes"), +(1, 3, 3, 2, "elsewhere this way comes"), +(1, 3, 4, 2, "elsewhere this way comes"), +(2, 2, 2, 3, "something this way comes"), +(2, 2, 3, 3, "elsewhere this way comes"), +(2, 2, 4, 3, "something this way comes"), +(3, 3, 2, 4, "something this way comes"), +(3, 3, 3, 4, "elsewhere this way comes"), +(4, 3, 4, 5, "elsewhere this way comes"); +select count(distinct c1, c2), count(distinct char1) from t1; +count(distinct c1, c2) count(distinct char1) +6 2 +select idx, count(distinct c1, c2), count(distinct c1, c3, char1) from t1 group by idx order by idx; +idx count(distinct c1, c2) count(distinct c1, c3, char1) +1 6 4 +2 3 2 +3 2 2 +4 1 1 +select idx, sum(c3), count(distinct c1, c2), count(distinct c1, c3, char1), group_concat("ls_", char1) from t1 group by idx order by idx; +idx sum(c3) count(distinct c1, c2) count(distinct c1, c3, char1) group_concat("ls_", char1) +1 9 6 4 ls_something this way comes,ls_elsewhere this way comes,ls_something this way comes,ls_something this way comes,ls_elsewhere this way comes,ls_elsewhere this way comes +2 9 3 2 ls_something this way comes,ls_elsewhere this way comes,ls_something this way comes +3 8 2 2 ls_something this way comes,ls_elsewhere this way comes +4 5 1 1 ls_elsewhere this way comes +DROP DATABASE mcol_3738_db; diff --git a/mysql-test/columnstore/basic/t/mcol3738_count_distinct_multiparm.test b/mysql-test/columnstore/basic/t/mcol3738_count_distinct_multiparm.test new file mode 100644 index 000000000..ae58ffa8f --- /dev/null +++ b/mysql-test/columnstore/basic/t/mcol3738_count_distinct_multiparm.test @@ -0,0 +1,40 @@ +# +# Test COUNT(DISTINCT n, m, ...) +# Author: David Hall, david.hall@mariadb.com +# +-- source ../include/have_columnstore.inc + +--disable_warnings +DROP DATABASE IF EXISTS mcol_3738_db; +--enable_warnings + +CREATE DATABASE mcol_3738_db; +USE mcol_3738_db; +CREATE TABLE `t1` ( + idx int(11), + c1 int(11), + c2 int(11), + c3 int(11), + char1 varchar(28) +) ENGINE=Columnstore DEFAULT CHARSET=utf8mb4; + +insert into t1 values (1, 2, 2, 1, "something this way comes"), + (1, 2, 3, 1, "elsewhere this way comes"), + (1, 2, 4, 1, "something this way comes"), + (1, 3, 2, 2, "something this way comes"), + (1, 3, 3, 2, "elsewhere this way comes"), + (1, 3, 4, 2, "elsewhere this way comes"), + (2, 2, 2, 3, "something this way comes"), + (2, 2, 3, 3, "elsewhere this way comes"), + (2, 2, 4, 3, "something this way comes"), + (3, 3, 2, 4, "something this way comes"), + (3, 3, 3, 4, "elsewhere this way comes"), + (4, 3, 4, 5, "elsewhere this way comes"); + +select count(distinct c1, c2), count(distinct char1) from t1; +select idx, count(distinct c1, c2), count(distinct c1, c3, char1) from t1 group by idx order by idx; +# group_concat causes the aggregation to be performed on UM only. +select idx, sum(c3), count(distinct c1, c2), count(distinct c1, c3, char1), group_concat("ls_", char1) from t1 group by idx order by idx; + +# Clean UP +DROP DATABASE mcol_3738_db;