diff --git a/dbcon/joblist/jlf_common.h b/dbcon/joblist/jlf_common.h index 640757379..427b5bfed 100644 --- a/dbcon/joblist/jlf_common.h +++ b/dbcon/joblist/jlf_common.h @@ -93,6 +93,20 @@ struct TupleInfo uint32_t csNum; // For collations }; +// This struct holds information about `FunctionColumn`. +struct FunctionColumnInfo +{ + // Function argument. + uint64_t associatedColumnOid; + // Function name. + std::string functionName; + + FunctionColumnInfo(uint64_t colOid, std::string funcName) + : associatedColumnOid(colOid), functionName(funcName) + { + } +}; + // for compound join struct JoinData { @@ -383,6 +397,8 @@ struct JobInfo std::map, int64_t> joinEdgesToRestore; // Represents a pair of `table` to be on a large side and weight associated with that table. std::unordered_map tablesForLargeSide; + // Represents a pair of `tupleId` and `FunctionColumnInfo`. + std::unordered_map functionColumnMap; private: // defaults okay diff --git a/dbcon/joblist/joblistfactory.cpp b/dbcon/joblist/joblistfactory.cpp index 2054a7597..7289d0b5f 100644 --- a/dbcon/joblist/joblistfactory.cpp +++ b/dbcon/joblist/joblistfactory.cpp @@ -172,7 +172,7 @@ void projectSimpleColumn(const SimpleColumn* sc, JobStepVector& jsv, JobInfo& jo // This is a double-step step // if (jobInfo.trace) // cout << "doProject Emit pGetSignature for SimpleColumn " << dictOid << - //endl; + // endl; pds = new pDictionaryStep(dictOid, tbl_oid, ct, jobInfo); jobInfo.keyInfo->dictOidToColOid[dictOid] = oid; @@ -557,9 +557,9 @@ void checkGroupByCols(CalpontSelectExecutionPlan* csep, JobInfo& jobInfo) if (dynamic_cast(i->get()) != NULL) { if (csep->withRollup()) - { + { throw runtime_error("constant GROUP BY columns are not supported when WITH ROLLUP is used"); - } + } continue; } @@ -927,7 +927,9 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo { if (jobInfo.hasRollup) { - throw runtime_error("GROUP_CONCAT and JSONARRAYAGG aggregations are not supported when WITH ROLLUP modifier is used"); + throw runtime_error( + "GROUP_CONCAT and JSONARRAYAGG aggregations are not supported when WITH ROLLUP modifier is " + "used"); } jobInfo.groupConcatCols.push_back(retCols[i]); @@ -1246,6 +1248,7 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo const WindowFunctionColumn* wc = NULL; bool hasAggCols = false; bool hasWndCols = false; + bool hasFuncColsWithOneArgument = false; if ((ac = dynamic_cast(srcp.get())) != NULL) { @@ -1263,6 +1266,9 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo hasAggCols = true; if (fc->windowfunctionColumnList().size() > 0) hasWndCols = true; + // MCOL-5476 Currently support function with only one argument for group by list. + if (fc->simpleColumnList().size() == 1) + hasFuncColsWithOneArgument = true; } else if (dynamic_cast(srcp.get()) != NULL) { @@ -1291,6 +1297,13 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo { jobInfo.expressionVec.push_back(tupleKey); } + + if (hasFuncColsWithOneArgument) + { + FunctionColumnInfo fcInfo(fcInfo.associatedColumnOid = fc->simpleColumnList().front()->oid(), + fc->functionName()); + jobInfo.functionColumnMap.insert({tupleKey, fcInfo}); + } } // add to project list @@ -1704,7 +1717,7 @@ void parseExecutionPlan(CalpontSelectExecutionPlan* csep, JobInfo& jobInfo, JobS } // special case, select without a table, like: select 1; - if (jobInfo.constantCol == CONST_COL_ONLY) // XXX: WITH ROLLUP + if (jobInfo.constantCol == CONST_COL_ONLY) // XXX: WITH ROLLUP return; // If there are no filters (select * from table;) then add one simple scan diff --git a/dbcon/joblist/tupleaggregatestep.cpp b/dbcon/joblist/tupleaggregatestep.cpp index c234a7271..ab7f9027e 100644 --- a/dbcon/joblist/tupleaggregatestep.cpp +++ b/dbcon/joblist/tupleaggregatestep.cpp @@ -75,67 +75,9 @@ using namespace querytele; namespace { -struct cmpTuple -{ - bool operator()(boost::tuple*> a, - boost::tuple*> b) const - { - uint32_t keya = boost::get<0>(a); - uint32_t keyb = boost::get<0>(b); - int opa; - int opb; - mcsv1sdk::mcsv1_UDAF* pUDAFa; - mcsv1sdk::mcsv1_UDAF* pUDAFb; - - // If key is less than - if (keya < keyb) - return true; - if (keya == keyb) - { - // test Op - opa = boost::get<1>(a); - opb = boost::get<1>(b); - if (opa < opb) - return true; - if (opa == opb) - { - // look at the UDAF object - pUDAFa = boost::get<2>(a); - pUDAFb = boost::get<2>(b); - if (pUDAFa < pUDAFb) - return true; - if (pUDAFa == pUDAFb) - { - std::vector* paramKeysa = boost::get<3>(a); - std::vector* paramKeysb = boost::get<3>(b); - if (paramKeysa == NULL || paramKeysb == NULL) - return false; - if (paramKeysa->size() < paramKeysb->size()) - return true; - if (paramKeysa->size() == paramKeysb->size()) - { - for (uint64_t i = 0; i < paramKeysa->size(); ++i) - { - if ((*paramKeysa)[i] < (*paramKeysb)[i]) - return true; - } - } - } - } - } - return false; - } -}; - typedef vector> RowBucket; typedef vector RowBucketVec; -// The AGG_MAP type is used to maintain a list of aggregate functions in order to -// detect duplicates. Since all UDAF have the same op type (ROWAGG_UDAF), we add in -// the function pointer in order to ensure uniqueness. -typedef map*>, uint64_t, cmpTuple> - AGG_MAP; - inline RowAggFunctionType functionIdMap(int planFuncId) { switch (planFuncId) @@ -1190,13 +1132,31 @@ void TupleAggregateStep::prep1PhaseAggregate(JobInfo& jobInfo, vector& } else { - Message::Args args; - args.add(keyName(i, key, jobInfo)); - string emsg = IDBErrorInfo::instance()->errorMsg(ERR_NOT_GROUPBY_EXPRESSION, args); - cerr << "prep1PhaseAggregate: " << emsg << " oid=" << (int)jobInfo.keyInfo->tupleKeyVec[key].fId - << ", alias=" << jobInfo.keyInfo->tupleKeyVec[key].fTable - << ", view=" << jobInfo.keyInfo->tupleKeyVec[key].fView << ", function=" << (int)aggOp << endl; - throw IDBExcept(emsg, ERR_NOT_GROUPBY_EXPRESSION); + uint32_t foundTupleKey{0}; + if (tryToFindEqualFunctionColumnByTupleKey(jobInfo, groupbyMap, key, foundTupleKey)) + { + oidsAgg.push_back(oidsProj[colProj]); + keysAgg.push_back(key); + scaleAgg.push_back(scaleProj[colProj]); + precisionAgg.push_back(precisionProj[colProj]); + typeAgg.push_back(typeProj[colProj]); + csNumAgg.push_back(csNumProj[colProj]); + widthAgg.push_back(width[colProj]); + // Update key. + key = foundTupleKey; + ++outIdx; + continue; + } + else + { + Message::Args args; + args.add(keyName(i, key, jobInfo)); + string emsg = IDBErrorInfo::instance()->errorMsg(ERR_NOT_GROUPBY_EXPRESSION, args); + cerr << "prep1PhaseAggregate: " << emsg << " oid=" << (int)jobInfo.keyInfo->tupleKeyVec[key].fId + << ", alias=" << jobInfo.keyInfo->tupleKeyVec[key].fTable + << ", view=" << jobInfo.keyInfo->tupleKeyVec[key].fView << ", function=" << (int)aggOp << endl; + throw IDBExcept(emsg, ERR_NOT_GROUPBY_EXPRESSION); + } } } @@ -1503,7 +1463,8 @@ void TupleAggregateStep::prep1PhaseAggregate(JobInfo& jobInfo, vector& RowGroup aggRG(oidsAgg.size(), posAgg, oidsAgg, keysAgg, typeAgg, csNumAgg, scaleAgg, precisionAgg, jobInfo.stringTableThreshold); - SP_ROWAGG_UM_t rowAgg(new RowAggregationUM(groupBy, functionVec, jobInfo.rm, jobInfo.umMemLimit, jobInfo.hasRollup)); + SP_ROWAGG_UM_t rowAgg( + new RowAggregationUM(groupBy, functionVec, jobInfo.rm, jobInfo.umMemLimit, jobInfo.hasRollup)); rowAgg->timeZone(jobInfo.timeZone); rowgroups.push_back(aggRG); aggregators.push_back(rowAgg); @@ -2245,81 +2206,108 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(JobInfo& jobInfo, vectorgetContext().getParamKeys() : NULL)); - - if (it != aggFuncMap.end()) + AGG_MAP::iterator it = aggFuncMap.find(boost::make_tuple( + foundTupleKey, aggOp, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL)); + colAgg = it->second; + oidsAggDist.push_back(oidsAgg[colAgg]); + keysAggDist.push_back(keysAgg[colAgg]); + scaleAggDist.push_back(scaleAgg[colAgg]); + precisionAggDist.push_back(precisionAgg[colAgg]); + typeAggDist.push_back(typeAgg[colAgg]); + csNumAggDist.push_back(csNumAgg[colAgg]); + uint32_t width = widthAgg[colAgg]; + if (aggOp == ROWAGG_GROUP_CONCAT || aggOp == ROWAGG_JSON_ARRAY) { - // false alarm - returnColMissing = false; + TupleInfo ti = getTupleInfo(retKey, jobInfo); - colAgg = it->second; + if (ti.width > width) + width = ti.width; + } + widthAggDist.push_back(width); - if (aggOp == ROWAGG_SUM) + // Update the `retKey` to specify that this column is a duplicate. + retKey = foundTupleKey; + } + else + { + bool returnColMissing = true; + + // check if a SUM or COUNT covered by AVG + if (aggOp == ROWAGG_SUM || aggOp == ROWAGG_COUNT_COL_NAME) + { + it = aggFuncMap.find(boost::make_tuple(returnedColVec[i].first, ROWAGG_AVG, pUDAFFunc, + udafc ? udafc->getContext().getParamKeys() : NULL)); + + if (it != aggFuncMap.end()) { - oidsAggDist.push_back(oidsAgg[colAgg]); - keysAggDist.push_back(retKey); - csNumAggDist.push_back(8); - wideDecimalOrLongDouble(colAgg, typeAgg[colAgg], precisionAgg, scaleAgg, widthAgg, - typeAggDist, scaleAggDist, precisionAggDist, widthAggDist); - } - else - { - // leave the count() to avg - aggOp = ROWAGG_COUNT_NO_OP; + // false alarm + returnColMissing = false; - oidsAggDist.push_back(oidsAgg[colAgg]); - keysAggDist.push_back(retKey); - scaleAggDist.push_back(0); + colAgg = it->second; - if (isUnsigned(typeAgg[colAgg])) + if (aggOp == ROWAGG_SUM) { - typeAggDist.push_back(CalpontSystemCatalog::UBIGINT); - precisionAggDist.push_back(20); + oidsAggDist.push_back(oidsAgg[colAgg]); + keysAggDist.push_back(retKey); + csNumAggDist.push_back(8); + wideDecimalOrLongDouble(colAgg, typeAgg[colAgg], precisionAgg, scaleAgg, widthAgg, + typeAggDist, scaleAggDist, precisionAggDist, widthAggDist); } else { - typeAggDist.push_back(CalpontSystemCatalog::BIGINT); - precisionAggDist.push_back(19); + // leave the count() to avg + aggOp = ROWAGG_COUNT_NO_OP; + + oidsAggDist.push_back(oidsAgg[colAgg]); + keysAggDist.push_back(retKey); + scaleAggDist.push_back(0); + + if (isUnsigned(typeAgg[colAgg])) + { + typeAggDist.push_back(CalpontSystemCatalog::UBIGINT); + precisionAggDist.push_back(20); + } + else + { + typeAggDist.push_back(CalpontSystemCatalog::BIGINT); + precisionAggDist.push_back(19); + } + csNumAggDist.push_back(8); + widthAggDist.push_back(bigIntWidth); } - csNumAggDist.push_back(8); - widthAggDist.push_back(bigIntWidth); } } - } - else if (find(jobInfo.expressionVec.begin(), jobInfo.expressionVec.end(), retKey) != - jobInfo.expressionVec.end()) - { - // a function on aggregation - TupleInfo ti = getTupleInfo(retKey, jobInfo); - oidsAggDist.push_back(ti.oid); - keysAggDist.push_back(retKey); - scaleAggDist.push_back(ti.scale); - precisionAggDist.push_back(ti.precision); - typeAggDist.push_back(ti.dtype); - csNumAggDist.push_back(ti.csNum); - widthAggDist.push_back(ti.width); + else if (find(jobInfo.expressionVec.begin(), jobInfo.expressionVec.end(), retKey) != + jobInfo.expressionVec.end()) + { + // a function on aggregation + TupleInfo ti = getTupleInfo(retKey, jobInfo); + oidsAggDist.push_back(ti.oid); + keysAggDist.push_back(retKey); + scaleAggDist.push_back(ti.scale); + precisionAggDist.push_back(ti.precision); + typeAggDist.push_back(ti.dtype); + csNumAggDist.push_back(ti.csNum); + widthAggDist.push_back(ti.width); - returnColMissing = false; - } - else if (aggOp == ROWAGG_CONSTANT) - { - TupleInfo ti = getTupleInfo(retKey, jobInfo); - oidsAggDist.push_back(ti.oid); - keysAggDist.push_back(retKey); - scaleAggDist.push_back(ti.scale); - precisionAggDist.push_back(ti.precision); - typeAggDist.push_back(ti.dtype); - csNumAggDist.push_back(ti.csNum); - widthAggDist.push_back(ti.width); + returnColMissing = false; + } + else if (aggOp == ROWAGG_CONSTANT) + { + TupleInfo ti = getTupleInfo(retKey, jobInfo); + oidsAggDist.push_back(ti.oid); + keysAggDist.push_back(retKey); + scaleAggDist.push_back(ti.scale); + precisionAggDist.push_back(ti.precision); + typeAggDist.push_back(ti.dtype); + csNumAggDist.push_back(ti.csNum); + widthAggDist.push_back(ti.width); - returnColMissing = false; - } + returnColMissing = false; + } #if 0 else if (aggOp == ROWAGG_GROUP_CONCAT || aggOp == ROWAGG_JSON_ARRAY) @@ -2337,63 +2325,64 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(JobInfo& jobInfo, vector> 8); - precisionAggDist.push_back(precisionProj[k]); - typeAggDist.push_back(typeProj[k]); - csNumAggDist.push_back(csNumProj[k]); - widthAggDist.push_back(widthProj[k]); + if (retKey == keysProj[k]) + { + oidsAggDist.push_back(oidsProj[k]); + keysAggDist.push_back(retKey); + scaleAggDist.push_back(scaleProj[k] >> 8); + precisionAggDist.push_back(precisionProj[k]); + typeAggDist.push_back(typeProj[k]); + csNumAggDist.push_back(csNumProj[k]); + widthAggDist.push_back(widthProj[k]); - returnColMissing = false; - break; + returnColMissing = false; + break; + } } } - } - else if (jobInfo.windowSet.find(retKey) != jobInfo.windowSet.end()) - { - // skip window columns/expression, which are computed later - for (uint64_t k = 0; k < keysProj.size(); k++) + else if (jobInfo.windowSet.find(retKey) != jobInfo.windowSet.end()) { - if (retKey == keysProj[k]) + // skip window columns/expression, which are computed later + for (uint64_t k = 0; k < keysProj.size(); k++) { - oidsAggDist.push_back(oidsProj[k]); - keysAggDist.push_back(retKey); - scaleAggDist.push_back(scaleProj[k] >> 8); - precisionAggDist.push_back(precisionProj[k]); - typeAggDist.push_back(typeProj[k]); - csNumAggDist.push_back(csNumProj[k]); - widthAggDist.push_back(widthProj[k]); + if (retKey == keysProj[k]) + { + oidsAggDist.push_back(oidsProj[k]); + keysAggDist.push_back(retKey); + scaleAggDist.push_back(scaleProj[k] >> 8); + precisionAggDist.push_back(precisionProj[k]); + typeAggDist.push_back(typeProj[k]); + csNumAggDist.push_back(csNumProj[k]); + widthAggDist.push_back(widthProj[k]); - returnColMissing = false; - break; + returnColMissing = false; + break; + } } } - } - if (returnColMissing) - { - Message::Args args; - args.add(keyName(outIdx, retKey, jobInfo)); - string emsg = IDBErrorInfo::instance()->errorMsg(ERR_NOT_GROUPBY_EXPRESSION, args); - cerr << "prep1PhaseDistinctAggregate: " << emsg - << " oid=" << (int)jobInfo.keyInfo->tupleKeyVec[retKey].fId - << ", alias=" << jobInfo.keyInfo->tupleKeyVec[retKey].fTable - << ", view=" << jobInfo.keyInfo->tupleKeyVec[retKey].fView << ", function=" << (int)aggOp - << endl; - throw IDBExcept(emsg, ERR_NOT_GROUPBY_EXPRESSION); - } - } // else - } // switch + if (returnColMissing) + { + Message::Args args; + args.add(keyName(outIdx, retKey, jobInfo)); + string emsg = IDBErrorInfo::instance()->errorMsg(ERR_NOT_GROUPBY_EXPRESSION, args); + cerr << "prep1PhaseDistinctAggregate: " << emsg + << " oid=" << (int)jobInfo.keyInfo->tupleKeyVec[retKey].fId + << ", alias=" << jobInfo.keyInfo->tupleKeyVec[retKey].fTable + << ", view=" << jobInfo.keyInfo->tupleKeyVec[retKey].fView << ", function=" << (int)aggOp + << endl; + throw IDBExcept(emsg, ERR_NOT_GROUPBY_EXPRESSION); + } + } // else + } // switch + } } // update groupby vector if the groupby column is a returned column @@ -3179,14 +3168,14 @@ void TupleAggregateStep::prep2PhasesAggregate(JobInfo& jobInfo, vector colAggPm++; } - // PM: put the count column for avg next to the sum - // let fall through to add a count column for average function - if (aggOp != ROWAGG_AVG) - break; - // The AVG aggregation has a special treatment everywhere. - // This is so because AVG(column) is a SUM(column)/COUNT(column) - // and these aggregations can be utilized by AVG, if present. - /* fall through */ + // PM: put the count column for avg next to the sum + // let fall through to add a count column for average function + if (aggOp != ROWAGG_AVG) + break; + // The AVG aggregation has a special treatment everywhere. + // This is so because AVG(column) is a SUM(column)/COUNT(column) + // and these aggregations can be utilized by AVG, if present. + /* fall through */ case ROWAGG_COUNT_ASTERISK: case ROWAGG_COUNT_COL_NAME: @@ -3439,99 +3428,120 @@ void TupleAggregateStep::prep2PhasesAggregate(JobInfo& jobInfo, vector // not a direct hit -- a returned column is not already in the RG from PMs else { - bool returnColMissing = true; - - // check if a SUM or COUNT covered by AVG - if (aggOp == ROWAGG_SUM || aggOp == ROWAGG_COUNT_COL_NAME) + // MCOL-5476. + uint32_t foundTupleKey{0}; + if (tryToFindEqualFunctionColumnByTupleKey(jobInfo, aggFuncMap, retKey, foundTupleKey)) { - it = aggFuncMap.find(boost::make_tuple(returnedColVec[i].first, ROWAGG_AVG, pUDAFFunc, - udafc ? udafc->getContext().getParamKeys() : NULL)); + AGG_MAP::iterator it = aggFuncMap.find(boost::make_tuple( + foundTupleKey, aggOp, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL)); + colPm = it->second; + oidsAggUm.push_back(oidsAggPm[colPm]); + keysAggUm.push_back(retKey); + scaleAggUm.push_back(scaleAggPm[colPm]); + precisionAggUm.push_back(precisionAggPm[colPm]); + typeAggUm.push_back(typeAggPm[colPm]); + csNumAggUm.push_back(csNumAggPm[colPm]); + widthAggUm.push_back(widthAggPm[colPm]); + // Update the `retKey` to specify that this column is a duplicate. + retKey = foundTupleKey; + } + else + { + bool returnColMissing = true; - if (it != aggFuncMap.end()) + // check if a SUM or COUNT covered by AVG + if (aggOp == ROWAGG_SUM || aggOp == ROWAGG_COUNT_COL_NAME) { - // false alarm - returnColMissing = false; + it = aggFuncMap.find(boost::make_tuple(returnedColVec[i].first, ROWAGG_AVG, pUDAFFunc, + udafc ? udafc->getContext().getParamKeys() : NULL)); - colPm = it->second; - - if (aggOp == ROWAGG_SUM) + if (it != aggFuncMap.end()) { - wideDecimalOrLongDouble(colPm, typeAggPm[colPm], precisionAggPm, scaleAggPm, widthAggPm, - typeAggUm, scaleAggUm, precisionAggUm, widthAggUm); + // false alarm + returnColMissing = false; - oidsAggUm.push_back(oidsAggPm[colPm]); - keysAggUm.push_back(retKey); - csNumAggUm.push_back(8); - } - else - { - // leave the count() to avg - aggOp = ROWAGG_COUNT_NO_OP; + colPm = it->second; - colPm++; - oidsAggUm.push_back(oidsAggPm[colPm]); - keysAggUm.push_back(retKey); - scaleAggUm.push_back(0); - precisionAggUm.push_back(19); - typeAggUm.push_back(CalpontSystemCatalog::UBIGINT); - csNumAggUm.push_back(8); - widthAggUm.push_back(bigIntWidth); + if (aggOp == ROWAGG_SUM) + { + wideDecimalOrLongDouble(colPm, typeAggPm[colPm], precisionAggPm, scaleAggPm, widthAggPm, + typeAggUm, scaleAggUm, precisionAggUm, widthAggUm); + + oidsAggUm.push_back(oidsAggPm[colPm]); + keysAggUm.push_back(retKey); + csNumAggUm.push_back(8); + } + else + { + // leave the count() to avg + aggOp = ROWAGG_COUNT_NO_OP; + + colPm++; + oidsAggUm.push_back(oidsAggPm[colPm]); + keysAggUm.push_back(retKey); + scaleAggUm.push_back(0); + precisionAggUm.push_back(19); + typeAggUm.push_back(CalpontSystemCatalog::UBIGINT); + csNumAggUm.push_back(8); + widthAggUm.push_back(bigIntWidth); + } } } - } - else if (find(jobInfo.expressionVec.begin(), jobInfo.expressionVec.end(), retKey) != - jobInfo.expressionVec.end()) - { - // a function on aggregation - TupleInfo ti = getTupleInfo(retKey, jobInfo); - oidsAggUm.push_back(ti.oid); - keysAggUm.push_back(retKey); - scaleAggUm.push_back(ti.scale); - precisionAggUm.push_back(ti.precision); - typeAggUm.push_back(ti.dtype); - csNumAggUm.push_back(ti.csNum); - widthAggUm.push_back(ti.width); + else if (find(jobInfo.expressionVec.begin(), jobInfo.expressionVec.end(), retKey) != + jobInfo.expressionVec.end()) + { + // a function on aggregation + TupleInfo ti = getTupleInfo(retKey, jobInfo); + oidsAggUm.push_back(ti.oid); + keysAggUm.push_back(retKey); + scaleAggUm.push_back(ti.scale); + precisionAggUm.push_back(ti.precision); + typeAggUm.push_back(ti.dtype); + csNumAggUm.push_back(ti.csNum); + widthAggUm.push_back(ti.width); - returnColMissing = false; - } - else if (jobInfo.windowSet.find(retKey) != jobInfo.windowSet.end()) - { - // an window function - TupleInfo ti = getTupleInfo(retKey, jobInfo); - oidsAggUm.push_back(ti.oid); - keysAggUm.push_back(retKey); - scaleAggUm.push_back(ti.scale); - precisionAggUm.push_back(ti.precision); - typeAggUm.push_back(ti.dtype); - csNumAggUm.push_back(ti.csNum); - widthAggUm.push_back(ti.width); + returnColMissing = false; + } + else if (jobInfo.windowSet.find(retKey) != jobInfo.windowSet.end()) + { + // an window function + TupleInfo ti = getTupleInfo(retKey, jobInfo); + oidsAggUm.push_back(ti.oid); + keysAggUm.push_back(retKey); + scaleAggUm.push_back(ti.scale); + precisionAggUm.push_back(ti.precision); + typeAggUm.push_back(ti.dtype); + csNumAggUm.push_back(ti.csNum); + widthAggUm.push_back(ti.width); - returnColMissing = false; - } - else if (aggOp == ROWAGG_CONSTANT) - { - TupleInfo ti = getTupleInfo(retKey, jobInfo); - oidsAggUm.push_back(ti.oid); - keysAggUm.push_back(retKey); - scaleAggUm.push_back(ti.scale); - precisionAggUm.push_back(ti.precision); - typeAggUm.push_back(ti.dtype); - csNumAggUm.push_back(ti.csNum); - widthAggUm.push_back(ti.width); + returnColMissing = false; + } + else if (aggOp == ROWAGG_CONSTANT) + { + TupleInfo ti = getTupleInfo(retKey, jobInfo); + oidsAggUm.push_back(ti.oid); + keysAggUm.push_back(retKey); + scaleAggUm.push_back(ti.scale); + precisionAggUm.push_back(ti.precision); + typeAggUm.push_back(ti.dtype); + csNumAggUm.push_back(ti.csNum); + widthAggUm.push_back(ti.width); - returnColMissing = false; - } + returnColMissing = false; + } - if (returnColMissing) - { - Message::Args args; - args.add(keyName(outIdx, retKey, jobInfo)); - string emsg = IDBErrorInfo::instance()->errorMsg(ERR_NOT_GROUPBY_EXPRESSION, args); - cerr << "prep2PhasesAggregate: " << emsg << " oid=" << (int)jobInfo.keyInfo->tupleKeyVec[retKey].fId - << ", alias=" << jobInfo.keyInfo->tupleKeyVec[retKey].fTable - << ", view=" << jobInfo.keyInfo->tupleKeyVec[retKey].fView << ", function=" << (int)aggOp - << endl; - throw IDBExcept(emsg, ERR_NOT_GROUPBY_EXPRESSION); + if (returnColMissing) + { + Message::Args args; + args.add(keyName(outIdx, retKey, jobInfo)); + string emsg = IDBErrorInfo::instance()->errorMsg(ERR_NOT_GROUPBY_EXPRESSION, args); + cerr << "prep2PhasesAggregate: " << emsg + << " oid=" << (int)jobInfo.keyInfo->tupleKeyVec[retKey].fId + << ", alias=" << jobInfo.keyInfo->tupleKeyVec[retKey].fTable + << ", view=" << jobInfo.keyInfo->tupleKeyVec[retKey].fView << ", function=" << (int)aggOp + << endl; + throw IDBExcept(emsg, ERR_NOT_GROUPBY_EXPRESSION); + } } } @@ -3713,7 +3723,8 @@ void TupleAggregateStep::prep2PhasesAggregate(JobInfo& jobInfo, vector RowGroup aggRgUm(oidsAggUm.size(), posAggUm, oidsAggUm, keysAggUm, typeAggUm, csNumAggUm, scaleAggUm, precisionAggUm, jobInfo.stringTableThreshold); - SP_ROWAGG_UM_t rowAggUm(new RowAggregationUMP2(groupByUm, functionVecUm, jobInfo.rm, jobInfo.umMemLimit, false)); + SP_ROWAGG_UM_t rowAggUm( + new RowAggregationUMP2(groupByUm, functionVecUm, jobInfo.rm, jobInfo.umMemLimit, false)); rowAggUm->timeZone(jobInfo.timeZone); rowgroups.push_back(aggRgUm); aggregators.push_back(rowAggUm); @@ -4505,109 +4516,130 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(JobInfo& jobInfo, vectorgetContext().getParamKeys() : NULL)); + AGG_MAP::iterator it = aggFuncMap.find(boost::make_tuple( + foundTupleKey, aggOp, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL)); + colUm = it->second; + oidsAggDist.push_back(oidsAggUm[colUm]); + keysAggDist.push_back(keysAggUm[colUm]); + scaleAggDist.push_back(scaleAggUm[colUm]); + precisionAggDist.push_back(precisionAggUm[colUm]); + typeAggDist.push_back(typeAggUm[colUm]); + csNumAggDist.push_back(csNumAggUm[colUm]); + widthAggDist.push_back(widthAggUm[colUm]); + // Update the `retKey` to specify that this column is a duplicate. + retKey = foundTupleKey; + } + else + { + // here + bool returnColMissing = true; - if (it != aggFuncMap.end()) + // check if a SUM or COUNT covered by AVG + if (aggOp == ROWAGG_SUM || aggOp == ROWAGG_COUNT_COL_NAME) { - // false alarm - returnColMissing = false; + it = aggFuncMap.find(boost::make_tuple(returnedColVec[i].first, ROWAGG_AVG, pUDAFFunc, + udafc ? udafc->getContext().getParamKeys() : NULL)); - colUm = it->second; - - if (aggOp == ROWAGG_SUM) + if (it != aggFuncMap.end()) { - oidsAggDist.push_back(oidsAggUm[colUm]); - keysAggDist.push_back(retKey); - csNumAggDist.push_back(8); - wideDecimalOrLongDouble(colUm, typeAggUm[colUm], precisionAggUm, scaleAggUm, widthAggUm, - typeAggDist, scaleAggDist, precisionAggDist, widthAggDist); - } - else - { - // leave the count() to avg - aggOp = ROWAGG_COUNT_NO_OP; + // false alarm + returnColMissing = false; - oidsAggDist.push_back(oidsAggUm[colUm]); - keysAggDist.push_back(retKey); - scaleAggDist.push_back(0); - if (isUnsigned(typeAggUm[colUm])) + colUm = it->second; + + if (aggOp == ROWAGG_SUM) { - precisionAggDist.push_back(20); - typeAggDist.push_back(CalpontSystemCatalog::UBIGINT); + oidsAggDist.push_back(oidsAggUm[colUm]); + keysAggDist.push_back(retKey); + csNumAggDist.push_back(8); + wideDecimalOrLongDouble(colUm, typeAggUm[colUm], precisionAggUm, scaleAggUm, widthAggUm, + typeAggDist, scaleAggDist, precisionAggDist, widthAggDist); } else { - precisionAggDist.push_back(19); - typeAggDist.push_back(CalpontSystemCatalog::BIGINT); + // leave the count() to avg + aggOp = ROWAGG_COUNT_NO_OP; + + oidsAggDist.push_back(oidsAggUm[colUm]); + keysAggDist.push_back(retKey); + scaleAggDist.push_back(0); + if (isUnsigned(typeAggUm[colUm])) + { + precisionAggDist.push_back(20); + typeAggDist.push_back(CalpontSystemCatalog::UBIGINT); + } + else + { + precisionAggDist.push_back(19); + typeAggDist.push_back(CalpontSystemCatalog::BIGINT); + } + csNumAggDist.push_back(8); + widthAggDist.push_back(bigIntWidth); } - csNumAggDist.push_back(8); - widthAggDist.push_back(bigIntWidth); } } - } - else if (find(jobInfo.expressionVec.begin(), jobInfo.expressionVec.end(), retKey) != - jobInfo.expressionVec.end()) - { - // a function on aggregation - TupleInfo ti = getTupleInfo(retKey, jobInfo); - oidsAggDist.push_back(ti.oid); - keysAggDist.push_back(retKey); - scaleAggDist.push_back(ti.scale); - precisionAggDist.push_back(ti.precision); - typeAggDist.push_back(ti.dtype); - csNumAggDist.push_back(ti.csNum); - widthAggDist.push_back(ti.width); + else if (find(jobInfo.expressionVec.begin(), jobInfo.expressionVec.end(), retKey) != + jobInfo.expressionVec.end()) + { + // a function on aggregation + TupleInfo ti = getTupleInfo(retKey, jobInfo); + oidsAggDist.push_back(ti.oid); + keysAggDist.push_back(retKey); + scaleAggDist.push_back(ti.scale); + precisionAggDist.push_back(ti.precision); + typeAggDist.push_back(ti.dtype); + csNumAggDist.push_back(ti.csNum); + widthAggDist.push_back(ti.width); - returnColMissing = false; - } - else if (jobInfo.windowSet.find(retKey) != jobInfo.windowSet.end()) - { - // a window function - TupleInfo ti = getTupleInfo(retKey, jobInfo); - oidsAggDist.push_back(ti.oid); - keysAggDist.push_back(retKey); - scaleAggDist.push_back(ti.scale); - precisionAggDist.push_back(ti.precision); - typeAggDist.push_back(ti.dtype); - csNumAggDist.push_back(ti.csNum); - widthAggDist.push_back(ti.width); + returnColMissing = false; + } + else if (jobInfo.windowSet.find(retKey) != jobInfo.windowSet.end()) + { + // a window function + TupleInfo ti = getTupleInfo(retKey, jobInfo); + oidsAggDist.push_back(ti.oid); + keysAggDist.push_back(retKey); + scaleAggDist.push_back(ti.scale); + precisionAggDist.push_back(ti.precision); + typeAggDist.push_back(ti.dtype); + csNumAggDist.push_back(ti.csNum); + widthAggDist.push_back(ti.width); - returnColMissing = false; - } - else if (aggOp == ROWAGG_CONSTANT) - { - TupleInfo ti = getTupleInfo(retKey, jobInfo); - oidsAggDist.push_back(ti.oid); - keysAggDist.push_back(retKey); - scaleAggDist.push_back(ti.scale); - precisionAggDist.push_back(ti.precision); - typeAggDist.push_back(ti.dtype); - csNumAggDist.push_back(ti.csNum); - widthAggDist.push_back(ti.width); + returnColMissing = false; + } + else if (aggOp == ROWAGG_CONSTANT) + { + TupleInfo ti = getTupleInfo(retKey, jobInfo); + oidsAggDist.push_back(ti.oid); + keysAggDist.push_back(retKey); + scaleAggDist.push_back(ti.scale); + precisionAggDist.push_back(ti.precision); + typeAggDist.push_back(ti.dtype); + csNumAggDist.push_back(ti.csNum); + widthAggDist.push_back(ti.width); - returnColMissing = false; - } + returnColMissing = false; + } - if (returnColMissing) - { - Message::Args args; - args.add(keyName(outIdx, retKey, jobInfo)); - string emsg = IDBErrorInfo::instance()->errorMsg(ERR_NOT_GROUPBY_EXPRESSION, args); - cerr << "prep2PhasesDistinctAggregate: " << emsg - << " oid=" << (int)jobInfo.keyInfo->tupleKeyVec[retKey].fId - << ", alias=" << jobInfo.keyInfo->tupleKeyVec[retKey].fTable - << ", view=" << jobInfo.keyInfo->tupleKeyVec[retKey].fView << ", function=" << (int)aggOp - << endl; - throw IDBExcept(emsg, ERR_NOT_GROUPBY_EXPRESSION); - } - } // else not a direct hit - } // else not a DISTINCT + if (returnColMissing) + { + Message::Args args; + args.add(keyName(outIdx, retKey, jobInfo)); + string emsg = IDBErrorInfo::instance()->errorMsg(ERR_NOT_GROUPBY_EXPRESSION, args); + cerr << "prep2PhasesDistinctAggregate: " << emsg + << " oid=" << (int)jobInfo.keyInfo->tupleKeyVec[retKey].fId + << ", alias=" << jobInfo.keyInfo->tupleKeyVec[retKey].fTable + << ", view=" << jobInfo.keyInfo->tupleKeyVec[retKey].fView << ", function=" << (int)aggOp + << endl; + throw IDBExcept(emsg, ERR_NOT_GROUPBY_EXPRESSION); + } + } // else not a direct hit + } + } // else not a DISTINCT // update groupby vector if the groupby column is a returned column if (returnedColVec[i].second == 0) @@ -5496,18 +5528,18 @@ void TupleAggregateStep::threadedAggregateRowGroups(uint32_t threadID) // The key is the groupby columns, which are the leading columns. // TBD This approach could potential // put all values in on bucket. - // The fAggregator->hasRollup() is true when we perform one-phase - // aggregation and also are doing subtotals' computations. - // Subtotals produce new keys whose hash values may not be in - // the processing bucket. Consider case for key tuples (1,2) and (1,3). - // Their subtotals's keys will be (1, NULL) and (1, NULL) - // but they will be left in their processing buckets and never - // gets aggregated properly. - // Due to this, we put all rows into the same bucket 0 when perfoming - // single-phase aggregation with subtotals. - // For all other cases (single-phase without subtotals and two-phase - // aggregation with and without subtotals) fAggregator->hasRollup() is false. - // In these cases we have full parallel processing as expected. + // The fAggregator->hasRollup() is true when we perform one-phase + // aggregation and also are doing subtotals' computations. + // Subtotals produce new keys whose hash values may not be in + // the processing bucket. Consider case for key tuples (1,2) and (1,3). + // Their subtotals's keys will be (1, NULL) and (1, NULL) + // but they will be left in their processing buckets and never + // gets aggregated properly. + // Due to this, we put all rows into the same bucket 0 when perfoming + // single-phase aggregation with subtotals. + // For all other cases (single-phase without subtotals and two-phase + // aggregation with and without subtotals) fAggregator->hasRollup() is false. + // In these cases we have full parallel processing as expected. uint64_t hash = fAggregator->hasRollup() ? 0 : rowgroup::hashRow(rowIn, hashLens[0] - 1); int bucketID = hash % fNumOfBuckets; rowBucketVecs[bucketID][0].emplace_back(rowIn.getPointer(), hash); @@ -5953,4 +5985,44 @@ void TupleAggregateStep::formatMiniStats() fMiniInfo += oss.str(); } -} // namespace joblist +uint32_t TupleAggregateStep::getTupleKeyFromTuple( + const boost::tuple*>& tuple) +{ + return tuple.get<0>(); +} + +uint32_t TupleAggregateStep::getTupleKeyFromTuple(uint32_t key) +{ + return key; +} + +template +bool TupleAggregateStep::tryToFindEqualFunctionColumnByTupleKey(JobInfo& jobInfo, GroupByMap& groupByMap, + const uint32_t tupleKey, uint32_t& foundKey) +{ + auto funcMapIt = jobInfo.functionColumnMap.find(tupleKey); + if (funcMapIt != jobInfo.functionColumnMap.end()) + { + const auto& rFunctionInfo = funcMapIt->second; + // Try to match given `tupleKey` in `groupByMap`. + for (const auto& groupByMapPair : groupByMap) + { + const auto currentTupleKey = getTupleKeyFromTuple(groupByMapPair.first); + auto currentFuncMapIt = jobInfo.functionColumnMap.find(currentTupleKey); + // Skip if the keys are the same. + if (currentFuncMapIt != jobInfo.functionColumnMap.end() && currentTupleKey != tupleKey) + { + const auto& lFunctionInfo = currentFuncMapIt->second; + // Oid and function name should be the same. + if (lFunctionInfo.associatedColumnOid == rFunctionInfo.associatedColumnOid && + lFunctionInfo.functionName == rFunctionInfo.functionName) + { + foundKey = currentTupleKey; + return true; + } + } + } + } + return false; +} +} // namespace joblist \ No newline at end of file diff --git a/dbcon/joblist/tupleaggregatestep.h b/dbcon/joblist/tupleaggregatestep.h index 80d8e9da3..a0ddcfdfb 100644 --- a/dbcon/joblist/tupleaggregatestep.h +++ b/dbcon/joblist/tupleaggregatestep.h @@ -32,6 +32,64 @@ namespace joblist // forward reference struct JobInfo; +struct cmpTuple +{ + bool operator()(boost::tuple*> a, + boost::tuple*> b) const + { + uint32_t keya = boost::get<0>(a); + uint32_t keyb = boost::get<0>(b); + int opa; + int opb; + mcsv1sdk::mcsv1_UDAF* pUDAFa; + mcsv1sdk::mcsv1_UDAF* pUDAFb; + + // If key is less than + if (keya < keyb) + return true; + if (keya == keyb) + { + // test Op + opa = boost::get<1>(a); + opb = boost::get<1>(b); + if (opa < opb) + return true; + if (opa == opb) + { + // look at the UDAF object + pUDAFa = boost::get<2>(a); + pUDAFb = boost::get<2>(b); + if (pUDAFa < pUDAFb) + return true; + if (pUDAFa == pUDAFb) + { + std::vector* paramKeysa = boost::get<3>(a); + std::vector* paramKeysb = boost::get<3>(b); + if (paramKeysa == NULL || paramKeysb == NULL) + return false; + if (paramKeysa->size() < paramKeysb->size()) + return true; + if (paramKeysa->size() == paramKeysb->size()) + { + for (uint64_t i = 0; i < paramKeysa->size(); ++i) + { + if ((*paramKeysa)[i] < (*paramKeysb)[i]) + return true; + } + } + } + } + } + return false; + } +}; + +// The AGG_MAP type is used to maintain a list of aggregate functions in order to +// detect duplicates. Since all UDAF have the same op type (ROWAGG_UDAF), we add in +// the function pointer in order to ensure uniqueness. +using AGG_MAP = + map*>, uint64_t, cmpTuple>; + /** @brief class TupleAggregateStep * */ @@ -105,6 +163,13 @@ class TupleAggregateStep : public JobStep, public TupleDeliveryStep void pruneAuxColumns(); void formatMiniStats(); void printCalTrace(); + template + static bool tryToFindEqualFunctionColumnByTupleKey(JobInfo& jobInfo, GroupByMap& groupByMap, + const uint32_t tupleKey, uint32_t& foundTypleKey); + // This functions are workaround for the function above. For some reason different parts of the code with same + // semantics use different containers. + static uint32_t getTupleKeyFromTuple(const boost::tuple*>& tuple); + static uint32_t getTupleKeyFromTuple(uint32_t key); boost::shared_ptr fCatalog; uint64_t fRowsReturned; @@ -226,4 +291,3 @@ class TupleAggregateStep : public JobStep, public TupleDeliveryStep }; } // namespace joblist - diff --git a/mysql-test/columnstore/bugfixes/mcol-5476.result b/mysql-test/columnstore/bugfixes/mcol-5476.result new file mode 100644 index 000000000..7188b9007 --- /dev/null +++ b/mysql-test/columnstore/bugfixes/mcol-5476.result @@ -0,0 +1,71 @@ +DROP DATABASE IF EXISTS `mcol-5476`; +CREATE DATABASE `mcol-5476`; +USE `mcol-5476`; +create table t1 (a int, b int) engine=columnstore; +insert into t1 values (1, 1), (2, 1), (3, 1), (4, 2), (5, 2); +select sum(a), abs(b), abs(b) from t1 group by abs(b), abs(b); +sum(a) abs(b) abs(b) +6 1 1 +9 2 2 +select sum(a), abs(b), abs(b) from t1 group by abs(b); +sum(a) abs(b) abs(b) +6 1 1 +9 2 2 +select sum(distinct a), abs(b), abs(b) from t1 group by abs(b), abs(b); +sum(distinct a) abs(b) abs(b) +6 1 1 +9 2 2 +select sum(distinct a), abs(b), abs(b) from t1 group by abs(b); +sum(distinct a) abs(b) abs(b) +6 1 1 +9 2 2 +create table t2 (a int, b int, c varchar(20)) engine=columnstore; +insert into t2 values (1, 1, "abc"), (2, 1, "abc"), (1, 2, "abcd"), (3, 2, "abcd"); +select sum(a), abs(b), length(c), abs(b), length(c) from t2 group by abs(b), length(c); +sum(a) abs(b) length(c) abs(b) length(c) +3 1 3 1 3 +4 2 4 2 4 +select sum(a), abs(b), abs(b), length(c), length(c) from t2 group by abs(b), length(c); +sum(a) abs(b) abs(b) length(c) length(c) +3 1 1 3 3 +4 2 2 4 4 +select sum(a), abs(b), length(c), abs(b), length(c) from t2 group by abs(b), abs(b), length(c), length(c); +sum(a) abs(b) length(c) abs(b) length(c) +3 1 3 1 3 +4 2 4 2 4 +select sum(a), abs(b), length(c), abs(b), length(c) from t2 group by abs(b), length(c), length(c), abs(b); +sum(a) abs(b) length(c) abs(b) length(c) +3 1 3 1 3 +4 2 4 2 4 +select sum(distinct a), abs(b), length(c), abs(b), length(c) from t2 group by abs(b), length(c) order by abs(b); +sum(distinct a) abs(b) length(c) abs(b) length(c) +3 1 3 1 3 +4 2 4 2 4 +select sum(distinct a), abs(b), abs(b), length(c), length(c) from t2 group by abs(b), length(c) order by abs(b); +sum(distinct a) abs(b) abs(b) length(c) length(c) +3 1 1 3 3 +4 2 2 4 4 +select sum(distinct a), abs(b), length(c), abs(b), length(c) from t2 group by abs(b), abs(b), length(c), length(c); +sum(distinct a) abs(b) length(c) abs(b) length(c) +3 1 3 1 3 +4 2 4 2 4 +select sum(distinct a), abs(b), length(c), abs(b), length(c) from t2 group by abs(b), length(c), length(c), abs(b); +sum(distinct a) abs(b) length(c) abs(b) length(c) +3 1 3 1 3 +4 2 4 2 4 +select sum(distinct t1.a), abs(t2.b), abs(t2.b) from t1 join t2 on t1.a = t2.a group by abs(t2.b); +sum(distinct t1.a) abs(t2.b) abs(t2.b) +3 1 1 +4 2 2 +select sum(t1.a), abs(t2.b), abs(t2.b) from t1 join t2 on t1.a = t2.a group by abs(t2.b); +sum(t1.a) abs(t2.b) abs(t2.b) +3 1 1 +4 2 2 +create table t3 (a datetime, b int) engine=columnstore; +insert into t3 values ("2007-01-30 21:31:07", 1), ("2007-01-30 21:31:07", 3), ("2007-01-29 21:31:07", 1), ("2007-01-29 21:31:07", 2); +select distinct DAYOFWEEK(a) as C1, DAYOFWEEK(a) as C2, SUM(b) from t3 group by DAYOFWEEK(a), DAYOFWEEK(a); +C1 C2 SUM(b) +2 2 3 +3 3 4 +DROP TABLE t1, t2, t3; +DROP DATABASE `mcol-5476`; diff --git a/mysql-test/columnstore/bugfixes/mcol-5476.test b/mysql-test/columnstore/bugfixes/mcol-5476.test new file mode 100644 index 000000000..6eb42b0c4 --- /dev/null +++ b/mysql-test/columnstore/bugfixes/mcol-5476.test @@ -0,0 +1,59 @@ +-- source ../include/have_columnstore.inc + +--disable_warnings +DROP DATABASE IF EXISTS `mcol-5476`; +--enable_warnings +CREATE DATABASE `mcol-5476`; +USE `mcol-5476`; + +create table t1 (a int, b int) engine=columnstore; +insert into t1 values (1, 1), (2, 1), (3, 1), (4, 2), (5, 2); +#prep2aggregate +sorted_result; +select sum(a), abs(b), abs(b) from t1 group by abs(b), abs(b); +sorted_result; +select sum(a), abs(b), abs(b) from t1 group by abs(b); +#prep2distinctaggregate +sorted_result; +select sum(distinct a), abs(b), abs(b) from t1 group by abs(b), abs(b); +sorted_result; +select sum(distinct a), abs(b), abs(b) from t1 group by abs(b); + + +create table t2 (a int, b int, c varchar(20)) engine=columnstore; +insert into t2 values (1, 1, "abc"), (2, 1, "abc"), (1, 2, "abcd"), (3, 2, "abcd"); +#prep2aggregate +sorted_result; +select sum(a), abs(b), length(c), abs(b), length(c) from t2 group by abs(b), length(c); +sorted_result; +select sum(a), abs(b), abs(b), length(c), length(c) from t2 group by abs(b), length(c); +sorted_result; +select sum(a), abs(b), length(c), abs(b), length(c) from t2 group by abs(b), abs(b), length(c), length(c); +sorted_result; +select sum(a), abs(b), length(c), abs(b), length(c) from t2 group by abs(b), length(c), length(c), abs(b); +#prep2distinctaggregate +sorted_result; +select sum(distinct a), abs(b), length(c), abs(b), length(c) from t2 group by abs(b), length(c) order by abs(b); +sorted_result; +select sum(distinct a), abs(b), abs(b), length(c), length(c) from t2 group by abs(b), length(c) order by abs(b); +sorted_result; +select sum(distinct a), abs(b), length(c), abs(b), length(c) from t2 group by abs(b), abs(b), length(c), length(c); +sorted_result; +select sum(distinct a), abs(b), length(c), abs(b), length(c) from t2 group by abs(b), length(c), length(c), abs(b); + +#Joins +#prep1distinctaggregate +sorted_result; +select sum(distinct t1.a), abs(t2.b), abs(t2.b) from t1 join t2 on t1.a = t2.a group by abs(t2.b); +#prep1aggregate +sorted_result; +select sum(t1.a), abs(t2.b), abs(t2.b) from t1 join t2 on t1.a = t2.a group by abs(t2.b); + +#User test case +create table t3 (a datetime, b int) engine=columnstore; +insert into t3 values ("2007-01-30 21:31:07", 1), ("2007-01-30 21:31:07", 3), ("2007-01-29 21:31:07", 1), ("2007-01-29 21:31:07", 2); +sorted_result; +select distinct DAYOFWEEK(a) as C1, DAYOFWEEK(a) as C2, SUM(b) from t3 group by DAYOFWEEK(a), DAYOFWEEK(a); + +DROP TABLE t1, t2, t3; +DROP DATABASE `mcol-5476`;