fix(aggregation): MCOL-5467 Add support for duplicate expressions in group by. (#3052)

This patch adds support for duplicate expressions (builtin_functions) with one argument in select statement and group by statement.
2025-08-07 03:22:57 +03:00 · 2023-12-05 18:29:44 +03:00
parent 71f6a39078
commit 58e18eeb56
6 changed files with 644 additions and 350 deletions
--- a/dbcon/joblist/jlf_common.h
+++ b/dbcon/joblist/jlf_common.h
@@ -93,6 +93,20 @@ struct TupleInfo
  uint32_t csNum;  // For collations
 };

+// This struct holds information about `FunctionColumn`.
+struct FunctionColumnInfo
+{
+  // Function argument.
+  uint64_t associatedColumnOid;
+  // Function name.
+  std::string functionName;
+
+  FunctionColumnInfo(uint64_t colOid, std::string funcName)
+   : associatedColumnOid(colOid), functionName(funcName)
+  {
+  }
+};
+
 // for compound join
 struct JoinData
 {
@@ -383,6 +397,8 @@ struct JobInfo
  std::map<std::pair<uint32_t, uint32_t>, int64_t> joinEdgesToRestore;
  // Represents a pair of `table` to be on a large side and weight associated with that table.
  std::unordered_map<uint32_t, int64_t> tablesForLargeSide;
+  // Represents a pair of `tupleId` and `FunctionColumnInfo`.
+  std::unordered_map<uint32_t, FunctionColumnInfo> functionColumnMap;

 private:
  // defaults okay
--- a/dbcon/joblist/joblistfactory.cpp
+++ b/dbcon/joblist/joblistfactory.cpp
@@ -172,7 +172,7 @@ void projectSimpleColumn(const SimpleColumn* sc, JobStepVector& jsv, JobInfo& jo
      // This is a double-step step
      //			if (jobInfo.trace)
      //				cout << "doProject Emit pGetSignature for SimpleColumn " << dictOid <<
-      //endl;
+      // endl;

      pds = new pDictionaryStep(dictOid, tbl_oid, ct, jobInfo);
      jobInfo.keyInfo->dictOidToColOid[dictOid] = oid;
@@ -927,7 +927,9 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo
      {
        if (jobInfo.hasRollup)
        {
-          throw runtime_error("GROUP_CONCAT and JSONARRAYAGG aggregations are not supported when WITH ROLLUP modifier is used");
+          throw runtime_error(
+              "GROUP_CONCAT and JSONARRAYAGG aggregations are not supported when WITH ROLLUP modifier is "
+              "used");
        }
        jobInfo.groupConcatCols.push_back(retCols[i]);

@@ -1246,6 +1248,7 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo
        const WindowFunctionColumn* wc = NULL;
        bool hasAggCols = false;
        bool hasWndCols = false;
+        bool hasFuncColsWithOneArgument = false;

        if ((ac = dynamic_cast<const ArithmeticColumn*>(srcp.get())) != NULL)
        {
@@ -1263,6 +1266,9 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo
            hasAggCols = true;
          if (fc->windowfunctionColumnList().size() > 0)
            hasWndCols = true;
+          // MCOL-5476 Currently support function with only one argument for group by list.
+          if (fc->simpleColumnList().size() == 1)
+            hasFuncColsWithOneArgument = true;
        }
        else if (dynamic_cast<const AggregateColumn*>(srcp.get()) != NULL)
        {
@@ -1291,6 +1297,13 @@ const JobStepVector doAggProject(const CalpontSelectExecutionPlan* csep, JobInfo
        {
          jobInfo.expressionVec.push_back(tupleKey);
        }
+
+        if (hasFuncColsWithOneArgument)
+        {
+          FunctionColumnInfo fcInfo(fcInfo.associatedColumnOid = fc->simpleColumnList().front()->oid(),
+                                    fc->functionName());
+          jobInfo.functionColumnMap.insert({tupleKey, fcInfo});
+        }
      }

      // add to project list
--- a/dbcon/joblist/tupleaggregatestep.cpp
+++ b/dbcon/joblist/tupleaggregatestep.cpp
@@ -75,67 +75,9 @@ using namespace querytele;

 namespace
 {
-struct cmpTuple
-{
-  bool operator()(boost::tuple<uint32_t, int, mcsv1sdk::mcsv1_UDAF*, std::vector<uint32_t>*> a,
-                  boost::tuple<uint32_t, int, mcsv1sdk::mcsv1_UDAF*, std::vector<uint32_t>*> b) const
-  {
-    uint32_t keya = boost::get<0>(a);
-    uint32_t keyb = boost::get<0>(b);
-    int opa;
-    int opb;
-    mcsv1sdk::mcsv1_UDAF* pUDAFa;
-    mcsv1sdk::mcsv1_UDAF* pUDAFb;
-
-    // If key is less than
-    if (keya < keyb)
-      return true;
-    if (keya == keyb)
-    {
-      // test Op
-      opa = boost::get<1>(a);
-      opb = boost::get<1>(b);
-      if (opa < opb)
-        return true;
-      if (opa == opb)
-      {
-        // look at the UDAF object
-        pUDAFa = boost::get<2>(a);
-        pUDAFb = boost::get<2>(b);
-        if (pUDAFa < pUDAFb)
-          return true;
-        if (pUDAFa == pUDAFb)
-        {
-          std::vector<uint32_t>* paramKeysa = boost::get<3>(a);
-          std::vector<uint32_t>* paramKeysb = boost::get<3>(b);
-          if (paramKeysa == NULL || paramKeysb == NULL)
-            return false;
-          if (paramKeysa->size() < paramKeysb->size())
-            return true;
-          if (paramKeysa->size() == paramKeysb->size())
-          {
-            for (uint64_t i = 0; i < paramKeysa->size(); ++i)
-            {
-              if ((*paramKeysa)[i] < (*paramKeysb)[i])
-                return true;
-            }
-          }
-        }
-      }
-    }
-    return false;
-  }
-};
-
 typedef vector<std::pair<Row::Pointer, uint64_t>> RowBucket;
 typedef vector<RowBucket> RowBucketVec;

-// The AGG_MAP type is used to maintain a list of aggregate functions in order to
-// detect duplicates. Since all UDAF have the same op type (ROWAGG_UDAF), we add in
-// the function pointer in order to ensure uniqueness.
-typedef map<boost::tuple<uint32_t, int, mcsv1sdk::mcsv1_UDAF*, std::vector<uint32_t>*>, uint64_t, cmpTuple>
-    AGG_MAP;
-
 inline RowAggFunctionType functionIdMap(int planFuncId)
 {
  switch (planFuncId)
@@ -1189,6 +1131,23 @@ void TupleAggregateStep::prep1PhaseAggregate(JobInfo& jobInfo, vector<RowGroup>&
        continue;
      }
      else
+      {
+        uint32_t foundTupleKey{0};
+        if (tryToFindEqualFunctionColumnByTupleKey(jobInfo, groupbyMap, key, foundTupleKey))
+        {
+          oidsAgg.push_back(oidsProj[colProj]);
+          keysAgg.push_back(key);
+          scaleAgg.push_back(scaleProj[colProj]);
+          precisionAgg.push_back(precisionProj[colProj]);
+          typeAgg.push_back(typeProj[colProj]);
+          csNumAgg.push_back(csNumProj[colProj]);
+          widthAgg.push_back(width[colProj]);
+          // Update key.
+          key = foundTupleKey;
+          ++outIdx;
+          continue;
+        }
+        else
        {
          Message::Args args;
          args.add(keyName(i, key, jobInfo));
@@ -1199,6 +1158,7 @@ void TupleAggregateStep::prep1PhaseAggregate(JobInfo& jobInfo, vector<RowGroup>&
          throw IDBExcept(emsg, ERR_NOT_GROUPBY_EXPRESSION);
        }
      }
+    }

    SP_ROWAGG_FUNC_t funct;

@@ -2244,6 +2204,33 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(JobInfo& jobInfo, vector<Ro

          // not a direct hit -- a returned column is not already in the RG from PMs
          else
+          {
+            uint32_t foundTupleKey{0};
+            if (tryToFindEqualFunctionColumnByTupleKey(jobInfo, aggFuncMap, retKey, foundTupleKey))
+            {
+              AGG_MAP::iterator it = aggFuncMap.find(boost::make_tuple(
+                  foundTupleKey, aggOp, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL));
+              colAgg = it->second;
+              oidsAggDist.push_back(oidsAgg[colAgg]);
+              keysAggDist.push_back(keysAgg[colAgg]);
+              scaleAggDist.push_back(scaleAgg[colAgg]);
+              precisionAggDist.push_back(precisionAgg[colAgg]);
+              typeAggDist.push_back(typeAgg[colAgg]);
+              csNumAggDist.push_back(csNumAgg[colAgg]);
+              uint32_t width = widthAgg[colAgg];
+              if (aggOp == ROWAGG_GROUP_CONCAT || aggOp == ROWAGG_JSON_ARRAY)
+              {
+                TupleInfo ti = getTupleInfo(retKey, jobInfo);
+
+                if (ti.width > width)
+                  width = ti.width;
+              }
+              widthAggDist.push_back(width);
+
+              // Update the `retKey` to specify that this column is a duplicate.
+              retKey = foundTupleKey;
+            }
+            else
            {
              bool returnColMissing = true;

@@ -2395,6 +2382,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(JobInfo& jobInfo, vector<Ro
            }  // else
          }    // switch
        }
+      }

      // update groupby vector if the groupby column is a returned column
      if (returnedColVec[i].second == 0)
@@ -3438,6 +3426,25 @@ void TupleAggregateStep::prep2PhasesAggregate(JobInfo& jobInfo, vector<RowGroup>

      // not a direct hit -- a returned column is not already in the RG from PMs
      else
+      {
+        // MCOL-5476.
+        uint32_t foundTupleKey{0};
+        if (tryToFindEqualFunctionColumnByTupleKey(jobInfo, aggFuncMap, retKey, foundTupleKey))
+        {
+          AGG_MAP::iterator it = aggFuncMap.find(boost::make_tuple(
+              foundTupleKey, aggOp, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL));
+          colPm = it->second;
+          oidsAggUm.push_back(oidsAggPm[colPm]);
+          keysAggUm.push_back(retKey);
+          scaleAggUm.push_back(scaleAggPm[colPm]);
+          precisionAggUm.push_back(precisionAggPm[colPm]);
+          typeAggUm.push_back(typeAggPm[colPm]);
+          csNumAggUm.push_back(csNumAggPm[colPm]);
+          widthAggUm.push_back(widthAggPm[colPm]);
+          // Update the `retKey` to specify that this column is a duplicate.
+          retKey = foundTupleKey;
+        }
+        else
        {
          bool returnColMissing = true;

@@ -3527,13 +3534,15 @@ void TupleAggregateStep::prep2PhasesAggregate(JobInfo& jobInfo, vector<RowGroup>
            Message::Args args;
            args.add(keyName(outIdx, retKey, jobInfo));
            string emsg = IDBErrorInfo::instance()->errorMsg(ERR_NOT_GROUPBY_EXPRESSION, args);
-          cerr << "prep2PhasesAggregate: " << emsg << " oid=" << (int)jobInfo.keyInfo->tupleKeyVec[retKey].fId
+            cerr << "prep2PhasesAggregate: " << emsg
+                 << " oid=" << (int)jobInfo.keyInfo->tupleKeyVec[retKey].fId
                 << ", alias=" << jobInfo.keyInfo->tupleKeyVec[retKey].fTable
                 << ", view=" << jobInfo.keyInfo->tupleKeyVec[retKey].fView << ", function=" << (int)aggOp
                 << endl;
            throw IDBExcept(emsg, ERR_NOT_GROUPBY_EXPRESSION);
          }
        }
+      }

      // update groupby vector if the groupby column is a returned column
      if (returnedColVec[i].second == 0)
@@ -3713,7 +3722,8 @@ void TupleAggregateStep::prep2PhasesAggregate(JobInfo& jobInfo, vector<RowGroup>

  RowGroup aggRgUm(oidsAggUm.size(), posAggUm, oidsAggUm, keysAggUm, typeAggUm, csNumAggUm, scaleAggUm,
                   precisionAggUm, jobInfo.stringTableThreshold);
-  SP_ROWAGG_UM_t rowAggUm(new RowAggregationUMP2(groupByUm, functionVecUm, jobInfo.rm, jobInfo.umMemLimit, false));
+  SP_ROWAGG_UM_t rowAggUm(
+      new RowAggregationUMP2(groupByUm, functionVecUm, jobInfo.rm, jobInfo.umMemLimit, false));
  rowAggUm->timeZone(jobInfo.timeZone);
  rowgroups.push_back(aggRgUm);
  aggregators.push_back(rowAggUm);
@@ -4505,6 +4515,26 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(JobInfo& jobInfo, vector<R
        // not a direct hit -- a returned column is not already in the RG from PMs
        else
        {
+          // MCOL-5476.
+          uint32_t foundTupleKey{0};
+          if (tryToFindEqualFunctionColumnByTupleKey(jobInfo, aggFuncMap, retKey, foundTupleKey))
+          {
+            AGG_MAP::iterator it = aggFuncMap.find(boost::make_tuple(
+                foundTupleKey, aggOp, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL));
+            colUm = it->second;
+            oidsAggDist.push_back(oidsAggUm[colUm]);
+            keysAggDist.push_back(keysAggUm[colUm]);
+            scaleAggDist.push_back(scaleAggUm[colUm]);
+            precisionAggDist.push_back(precisionAggUm[colUm]);
+            typeAggDist.push_back(typeAggUm[colUm]);
+            csNumAggDist.push_back(csNumAggUm[colUm]);
+            widthAggDist.push_back(widthAggUm[colUm]);
+            // Update the `retKey` to specify that this column is a duplicate.
+            retKey = foundTupleKey;
+          }
+          else
+          {
+            // here
            bool returnColMissing = true;

            // check if a SUM or COUNT covered by AVG
@@ -4607,6 +4637,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(JobInfo& jobInfo, vector<R
              throw IDBExcept(emsg, ERR_NOT_GROUPBY_EXPRESSION);
            }
          }  // else not a direct hit
+        }
      }  // else not a DISTINCT

      // update groupby vector if the groupby column is a returned column
@@ -5942,4 +5973,44 @@ void TupleAggregateStep::formatMiniStats()
  fMiniInfo += oss.str();
 }

+uint32_t TupleAggregateStep::getTupleKeyFromTuple(
+    const boost::tuple<uint32_t, int, mcsv1sdk::mcsv1_UDAF*, std::vector<uint32_t>*>& tuple)
+{
+  return tuple.get<0>();
+}
+
+uint32_t TupleAggregateStep::getTupleKeyFromTuple(uint32_t key)
+{
+  return key;
+}
+
+template <class GroupByMap>
+bool TupleAggregateStep::tryToFindEqualFunctionColumnByTupleKey(JobInfo& jobInfo, GroupByMap& groupByMap,
+                                                                const uint32_t tupleKey, uint32_t& foundKey)
+{
+  auto funcMapIt = jobInfo.functionColumnMap.find(tupleKey);
+  if (funcMapIt != jobInfo.functionColumnMap.end())
+  {
+    const auto& rFunctionInfo = funcMapIt->second;
+    // Try to match given `tupleKey` in `groupByMap`.
+    for (const auto& groupByMapPair : groupByMap)
+    {
+      const auto currentTupleKey = getTupleKeyFromTuple(groupByMapPair.first);
+      auto currentFuncMapIt = jobInfo.functionColumnMap.find(currentTupleKey);
+      // Skip if the keys are the same.
+      if (currentFuncMapIt != jobInfo.functionColumnMap.end() && currentTupleKey != tupleKey)
+      {
+        const auto& lFunctionInfo = currentFuncMapIt->second;
+        // Oid and function name should be the same.
+        if (lFunctionInfo.associatedColumnOid == rFunctionInfo.associatedColumnOid &&
+            lFunctionInfo.functionName == rFunctionInfo.functionName)
+        {
+          foundKey = currentTupleKey;
+          return true;
+        }
+      }
+    }
+  }
+  return false;
+}
 }  // namespace joblist
--- a/dbcon/joblist/tupleaggregatestep.h
+++ b/dbcon/joblist/tupleaggregatestep.h
@@ -32,6 +32,64 @@ namespace joblist
 // forward reference
 struct JobInfo;

+struct cmpTuple
+{
+  bool operator()(boost::tuple<uint32_t, int, mcsv1sdk::mcsv1_UDAF*, std::vector<uint32_t>*> a,
+                  boost::tuple<uint32_t, int, mcsv1sdk::mcsv1_UDAF*, std::vector<uint32_t>*> b) const
+  {
+    uint32_t keya = boost::get<0>(a);
+    uint32_t keyb = boost::get<0>(b);
+    int opa;
+    int opb;
+    mcsv1sdk::mcsv1_UDAF* pUDAFa;
+    mcsv1sdk::mcsv1_UDAF* pUDAFb;
+
+    // If key is less than
+    if (keya < keyb)
+      return true;
+    if (keya == keyb)
+    {
+      // test Op
+      opa = boost::get<1>(a);
+      opb = boost::get<1>(b);
+      if (opa < opb)
+        return true;
+      if (opa == opb)
+      {
+        // look at the UDAF object
+        pUDAFa = boost::get<2>(a);
+        pUDAFb = boost::get<2>(b);
+        if (pUDAFa < pUDAFb)
+          return true;
+        if (pUDAFa == pUDAFb)
+        {
+          std::vector<uint32_t>* paramKeysa = boost::get<3>(a);
+          std::vector<uint32_t>* paramKeysb = boost::get<3>(b);
+          if (paramKeysa == NULL || paramKeysb == NULL)
+            return false;
+          if (paramKeysa->size() < paramKeysb->size())
+            return true;
+          if (paramKeysa->size() == paramKeysb->size())
+          {
+            for (uint64_t i = 0; i < paramKeysa->size(); ++i)
+            {
+              if ((*paramKeysa)[i] < (*paramKeysb)[i])
+                return true;
+            }
+          }
+        }
+      }
+    }
+    return false;
+  }
+};
+
+// The AGG_MAP type is used to maintain a list of aggregate functions in order to
+// detect duplicates. Since all UDAF have the same op type (ROWAGG_UDAF), we add in
+// the function pointer in order to ensure uniqueness.
+using AGG_MAP =
+    map<boost::tuple<uint32_t, int, mcsv1sdk::mcsv1_UDAF*, std::vector<uint32_t>*>, uint64_t, cmpTuple>;
+
 /** @brief class TupleAggregateStep
 *
 */
@@ -105,6 +163,13 @@ class TupleAggregateStep : public JobStep, public TupleDeliveryStep
  void pruneAuxColumns();
  void formatMiniStats();
  void printCalTrace();
+  template <class GroupByMap>
+  static bool tryToFindEqualFunctionColumnByTupleKey(JobInfo& jobInfo, GroupByMap& groupByMap,
+                                                     const uint32_t tupleKey, uint32_t& foundTypleKey);
+  // This functions are workaround for the function above. For some reason different parts of the code with same
+  // semantics use different containers.
+  static uint32_t getTupleKeyFromTuple(const boost::tuple<uint32_t, int, mcsv1sdk::mcsv1_UDAF*, std::vector<uint32_t>*>& tuple);
+  static uint32_t getTupleKeyFromTuple(uint32_t key);

  boost::shared_ptr<execplan::CalpontSystemCatalog> fCatalog;
  uint64_t fRowsReturned;
@@ -226,4 +291,3 @@ class TupleAggregateStep : public JobStep, public TupleDeliveryStep
 };

 }  // namespace joblist
-
--- a/mysql-test/columnstore/bugfixes/mcol-5476.result
+++ b/mysql-test/columnstore/bugfixes/mcol-5476.result
@@ -0,0 +1,71 @@
+DROP DATABASE IF EXISTS `mcol-5476`;
+CREATE DATABASE `mcol-5476`;
+USE `mcol-5476`;
+create table t1 (a int, b int) engine=columnstore;
+insert into t1 values (1, 1), (2, 1), (3, 1), (4, 2), (5, 2);
+select sum(a), abs(b), abs(b) from t1 group by abs(b), abs(b);
+sum(a)	abs(b)	abs(b)
+6	1	1
+9	2	2
+select sum(a), abs(b), abs(b) from t1 group by abs(b);
+sum(a)	abs(b)	abs(b)
+6	1	1
+9	2	2
+select sum(distinct a), abs(b), abs(b) from t1 group by abs(b), abs(b);
+sum(distinct a)	abs(b)	abs(b)
+6	1	1
+9	2	2
+select sum(distinct a), abs(b), abs(b) from t1 group by abs(b);
+sum(distinct a)	abs(b)	abs(b)
+6	1	1
+9	2	2
+create table t2 (a int, b int, c varchar(20)) engine=columnstore;
+insert into t2 values (1, 1, "abc"), (2, 1, "abc"), (1, 2, "abcd"), (3, 2, "abcd");
+select sum(a), abs(b), length(c), abs(b), length(c) from t2 group by abs(b), length(c);
+sum(a)	abs(b)	length(c)	abs(b)	length(c)
+3	1	3	1	3
+4	2	4	2	4
+select sum(a), abs(b), abs(b), length(c), length(c) from t2 group by abs(b), length(c);
+sum(a)	abs(b)	abs(b)	length(c)	length(c)
+3	1	1	3	3
+4	2	2	4	4
+select sum(a), abs(b), length(c), abs(b), length(c) from t2 group by abs(b), abs(b), length(c), length(c);
+sum(a)	abs(b)	length(c)	abs(b)	length(c)
+3	1	3	1	3
+4	2	4	2	4
+select sum(a), abs(b), length(c), abs(b), length(c) from t2 group by abs(b), length(c), length(c), abs(b);
+sum(a)	abs(b)	length(c)	abs(b)	length(c)
+3	1	3	1	3
+4	2	4	2	4
+select sum(distinct a), abs(b), length(c), abs(b), length(c) from t2 group by abs(b), length(c) order by abs(b);
+sum(distinct a)	abs(b)	length(c)	abs(b)	length(c)
+3	1	3	1	3
+4	2	4	2	4
+select sum(distinct a), abs(b), abs(b), length(c), length(c) from t2 group by abs(b), length(c) order by abs(b);
+sum(distinct a)	abs(b)	abs(b)	length(c)	length(c)
+3	1	1	3	3
+4	2	2	4	4
+select sum(distinct a), abs(b), length(c), abs(b), length(c) from t2 group by abs(b), abs(b), length(c), length(c);
+sum(distinct a)	abs(b)	length(c)	abs(b)	length(c)
+3	1	3	1	3
+4	2	4	2	4
+select sum(distinct a), abs(b), length(c), abs(b), length(c) from t2 group by abs(b), length(c), length(c), abs(b);
+sum(distinct a)	abs(b)	length(c)	abs(b)	length(c)
+3	1	3	1	3
+4	2	4	2	4
+select sum(distinct t1.a), abs(t2.b), abs(t2.b) from t1 join t2 on t1.a = t2.a group by abs(t2.b);
+sum(distinct t1.a)	abs(t2.b)	abs(t2.b)
+3	1	1
+4	2	2
+select sum(t1.a), abs(t2.b), abs(t2.b) from t1 join t2 on t1.a = t2.a group by abs(t2.b);
+sum(t1.a)	abs(t2.b)	abs(t2.b)
+3	1	1
+4	2	2
+create table t3 (a datetime, b int) engine=columnstore;
+insert into t3 values ("2007-01-30 21:31:07", 1), ("2007-01-30 21:31:07", 3), ("2007-01-29 21:31:07", 1), ("2007-01-29 21:31:07", 2);
+select distinct DAYOFWEEK(a) as C1, DAYOFWEEK(a) as C2, SUM(b) from t3 group by DAYOFWEEK(a), DAYOFWEEK(a);
+C1	C2	SUM(b)
+2	2	3
+3	3	4
+DROP TABLE t1, t2, t3;
+DROP DATABASE `mcol-5476`;
--- a/mysql-test/columnstore/bugfixes/mcol-5476.test
+++ b/mysql-test/columnstore/bugfixes/mcol-5476.test
@@ -0,0 +1,59 @@
+-- source ../include/have_columnstore.inc
+
+--disable_warnings
+DROP DATABASE IF EXISTS `mcol-5476`;
+--enable_warnings
+CREATE DATABASE `mcol-5476`;
+USE `mcol-5476`;
+
+create table t1 (a int, b int) engine=columnstore;
+insert into t1 values (1, 1), (2, 1), (3, 1), (4, 2), (5, 2);
+#prep2aggregate
+sorted_result;
+select sum(a), abs(b), abs(b) from t1 group by abs(b), abs(b);
+sorted_result;
+select sum(a), abs(b), abs(b) from t1 group by abs(b);
+#prep2distinctaggregate
+sorted_result;
+select sum(distinct a), abs(b), abs(b) from t1 group by abs(b), abs(b);
+sorted_result;
+select sum(distinct a), abs(b), abs(b) from t1 group by abs(b);
+
+
+create table t2 (a int, b int, c varchar(20)) engine=columnstore;
+insert into t2 values (1, 1, "abc"), (2, 1, "abc"), (1, 2, "abcd"), (3, 2, "abcd");
+#prep2aggregate
+sorted_result;
+select sum(a), abs(b), length(c), abs(b), length(c) from t2 group by abs(b), length(c);
+sorted_result;
+select sum(a), abs(b), abs(b), length(c), length(c) from t2 group by abs(b), length(c);
+sorted_result;
+select sum(a), abs(b), length(c), abs(b), length(c) from t2 group by abs(b), abs(b), length(c), length(c);
+sorted_result;
+select sum(a), abs(b), length(c), abs(b), length(c) from t2 group by abs(b), length(c), length(c), abs(b);
+#prep2distinctaggregate
+sorted_result;
+select sum(distinct a), abs(b), length(c), abs(b), length(c) from t2 group by abs(b), length(c) order by abs(b);
+sorted_result;
+select sum(distinct a), abs(b), abs(b), length(c), length(c) from t2 group by abs(b), length(c) order by abs(b);
+sorted_result;
+select sum(distinct a), abs(b), length(c), abs(b), length(c) from t2 group by abs(b), abs(b), length(c), length(c);
+sorted_result;
+select sum(distinct a), abs(b), length(c), abs(b), length(c) from t2 group by abs(b), length(c), length(c), abs(b);
+
+#Joins
+#prep1distinctaggregate
+sorted_result;
+select sum(distinct t1.a), abs(t2.b), abs(t2.b) from t1 join t2 on t1.a = t2.a group by abs(t2.b);
+#prep1aggregate
+sorted_result;
+select sum(t1.a), abs(t2.b), abs(t2.b) from t1 join t2 on t1.a = t2.a group by abs(t2.b);
+
+#User test case
+create table t3 (a datetime, b int) engine=columnstore;
+insert into t3 values ("2007-01-30 21:31:07", 1), ("2007-01-30 21:31:07", 3), ("2007-01-29 21:31:07", 1), ("2007-01-29 21:31:07", 2);
+sorted_result;
+select distinct DAYOFWEEK(a) as C1, DAYOFWEEK(a) as C2, SUM(b) from t3 group by DAYOFWEEK(a), DAYOFWEEK(a);
+
+DROP TABLE t1, t2, t3;
+DROP DATABASE `mcol-5476`;