MCOL-4234 and MCOL 5772 cherry-picked into [stable 23.10] (#3226)

* MCOL-4234: improve GROUP BY and ORDER BY interaction (#3194) This patch fixes the problem in MCOL-4234 and also generally improves behavior of GROUP BY. It does so by introducing a "dummy" aggregate and by wrapping columns into it. This allows for columns that are not in GROUP BY to be used more freely, for example, in SELECT * FROM tbl GROUP BY col - all columns that are not "col" will be wrapped into an aggregate and query will proceed to execution. The dummy aggregate itself does nothing more than remember last value passed into it. There also an additional error message that tries to explain what types of expressions can be wrapped into an aggregate. * MCOL-5772: incorrect ORDER BY ordering for a columns not in GROUP BY (#3214) When ORDER BY column is not in GROUP BY, is not an aggregate and there is a SELECT column that is also not an aggregate, there was a problem: ordering happened on the SELECTed column, not ORDERed one. This patch fixes that particular problem and also performs some tidying around newly added aggregate. --------- Co-authored-by: Leonid Fedorov <79837786+mariadb-LeonidFedorov@users.noreply.github.com>
2025-07-29 08:21:15 +03:00 · 2024-06-27 23:31:53 +03:00
parent 9f4231f87f
commit db4cb1d657
18 changed files with 370 additions and 24 deletions
--- a/dbcon/execplan/aggregatecolumn.h
+++ b/dbcon/execplan/aggregatecolumn.h
@ -75,6 +75,7 @@ class AggregateColumn : public ReturnedColumn
    BIT_XOR,
    GROUP_CONCAT,
    JSON_ARRAYAGG,
+    SELECT_SOME,
    UDAF,
    MULTI_PARM
  };
--- a/dbcon/joblist/joblistfactory.cpp
+++ b/dbcon/joblist/joblistfactory.cpp
@ -541,10 +541,12 @@ void checkGroupByCols(CalpontSelectExecutionPlan* csep, JobInfo& jobInfo)
        // Not an aggregate column and not an expression of aggregation.
        if (dynamic_cast<AggregateColumn*>(orderByCols[i].get()) == NULL &&
            orderByCols[i]->aggColumnList().empty())
+	{
          csep->groupByCols().push_back(orderByCols[i]);
 	}
      }
    }
+  }

  if (csep->groupByCols().size() > 0)
  {
@ -604,8 +606,10 @@ void checkGroupByCols(CalpontSelectExecutionPlan* csep, JobInfo& jobInfo)
    }

    if (csep->groupByCols().size() != uniqGbCols.size())
+    {
      (csep)->groupByCols(uniqGbCols);
    }
+  }
 }

 void checkAggregation(CalpontSelectExecutionPlan* csep, JobInfo& jobInfo)
--- a/dbcon/joblist/tupleaggregatestep.cpp
+++ b/dbcon/joblist/tupleaggregatestep.cpp
@ -124,6 +124,8 @@ inline RowAggFunctionType functionIdMap(int planFuncId)

    case AggregateColumn::MULTI_PARM: return ROWAGG_MULTI_PARM;

+    case AggregateColumn::SELECT_SOME: return ROWAGG_SELECT_SOME;
+
    default: return ROWAGG_FUNCT_UNDEFINE;
  }
 }
@ -1204,6 +1206,7 @@ void TupleAggregateStep::prep1PhaseAggregate(JobInfo& jobInfo, vector<RowGroup>&
    {
      case ROWAGG_MIN:
      case ROWAGG_MAX:
+      case ROWAGG_SELECT_SOME:
      {
        oidsAgg.push_back(oidsProj[colProj]);
        keysAgg.push_back(key);
@ -1766,6 +1769,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(JobInfo& jobInfo, vector<Ro
      {
        case ROWAGG_MIN:
        case ROWAGG_MAX:
+        case ROWAGG_SELECT_SOME:
        {
          oidsAgg.push_back(oidsProj[colProj]);
          keysAgg.push_back(aggKey);
@ -2176,6 +2180,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(JobInfo& jobInfo, vector<Ro
        case ROWAGG_BIT_AND:
        case ROWAGG_BIT_OR:
        case ROWAGG_BIT_XOR:
+        case ROWAGG_SELECT_SOME:
        default:
        {
          AGG_MAP::iterator it = aggFuncMap.find(
@ -2832,7 +2837,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(JobInfo& jobInfo, vector<Ro
                    f->fAggFunction == ROWAGG_STATS || f->fAggFunction == ROWAGG_BIT_AND ||
                    f->fAggFunction == ROWAGG_BIT_OR || f->fAggFunction == ROWAGG_BIT_XOR ||
                    f->fAggFunction == ROWAGG_CONSTANT || f->fAggFunction == ROWAGG_GROUP_CONCAT ||
-                    f->fAggFunction == ROWAGG_JSON_ARRAY))
+                    f->fAggFunction == ROWAGG_JSON_ARRAY || f->fAggFunction == ROWAGG_SELECT_SOME))
          {
            funct.reset(new RowAggFunctionCol(f->fAggFunction, f->fStatsFunction, f->fInputColumnIndex,
                                              f->fOutputColumnIndex, f->fAuxColumnIndex - multiParms));
@ -3127,6 +3132,7 @@ void TupleAggregateStep::prep2PhasesAggregate(JobInfo& jobInfo, vector<RowGroup>
      {
        case ROWAGG_MIN:
        case ROWAGG_MAX:
+        case ROWAGG_SELECT_SOME:
        {
          oidsAggPm.push_back(oidsProj[colProj]);
          keysAggPm.push_back(aggKey);
@ -4044,6 +4050,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(JobInfo& jobInfo, vector<R
      {
        case ROWAGG_MIN:
        case ROWAGG_MAX:
+        case ROWAGG_SELECT_SOME:
        {
          oidsAggPm.push_back(oidsProj[colProj]);
          keysAggPm.push_back(aggKey);
@ -5079,7 +5086,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(JobInfo& jobInfo, vector<R
                     f->fAggFunction == ROWAGG_MIN || f->fAggFunction == ROWAGG_MAX ||
                     f->fAggFunction == ROWAGG_STATS || f->fAggFunction == ROWAGG_BIT_AND ||
                     f->fAggFunction == ROWAGG_BIT_OR || f->fAggFunction == ROWAGG_BIT_XOR ||
-                     f->fAggFunction == ROWAGG_CONSTANT)
+                     f->fAggFunction == ROWAGG_CONSTANT || f->fAggFunction == ROWAGG_SELECT_SOME)
            {
              funct.reset(new RowAggFunctionCol(f->fAggFunction, f->fStatsFunction, f->fInputColumnIndex,
                                                f->fOutputColumnIndex, f->fAuxColumnIndex - multiParms));
--- a/dbcon/mysql/ha_mcs_execplan.cpp
+++ b/dbcon/mysql/ha_mcs_execplan.cpp
@ -474,6 +474,22 @@ bool sortItemIsInGrouping(Item* sort_item, ORDER* groupcol)
  if (sort_item->type() == Item::SUM_FUNC_ITEM)
  {
    found = true;
+    return found;
+  }
+
+  {
+    // as we now can warp ORDER BY or SELECT expression into
+    // an aggregate, we can pass FIELD_ITEM as "found" as well.
+    Item* item = sort_item;
+    while (item->type() == Item::REF_ITEM)
+    {
+      const Item_ref* ref_item = static_cast<const Item_ref*>(item);
+      item = (Item*)*ref_item->ref;
+    }
+    if (item->type() == Item::FIELD_ITEM || item->type() == Item::CONST_ITEM || item->type() == Item::NULL_ITEM)
+    {
+      return true;
+    }
  }

  // A function that contains an aggregate function
@ -3189,6 +3205,9 @@ CalpontSystemCatalog::ColType colType_MysqlToIDB(const Item* item)

        if (item->field_type() == MYSQL_TYPE_BLOB)
        {
+          // We default to BLOB, but then try to correct type,
+          // because many textual types in server have type_handler_blob
+          // (and variants) as their type.
          ct.colDataType = CalpontSystemCatalog::BLOB;
        }
      }
@ -4902,6 +4921,70 @@ static void processAggregateColumnConstArg(gp_walk_info& gwi, SRCP& parm, Aggreg
  }
 }

+void analyzeForImplicitGroupBy(Item* item, gp_walk_info& gwi)
+{
+  if (gwi.implicitExplicitGroupBy)
+  {
+    return;
+  }
+  while (item->type() == Item::REF_ITEM)
+  {
+    Item_ref* ref = static_cast<Item_ref*>(item);
+    item = *ref->ref;
+  }
+  if (item->type() == Item::SUM_FUNC_ITEM)
+  {
+    // definitely an aggregate and thus needs an implicit group by.
+    gwi.implicitExplicitGroupBy = true;
+    return;
+  }
+  if (item->type() == Item::FUNC_ITEM)
+  {
+    Item_func* ifp = static_cast<Item_func*>(item);
+    for(uint32_t i = 0;i<ifp->argument_count() && !gwi.implicitExplicitGroupBy;i++)
+    {
+      analyzeForImplicitGroupBy(ifp->arguments()[i], gwi);
+    }
+  }
+}
+
+ReturnedColumn* wrapIntoAggregate(ReturnedColumn* rc, gp_walk_info& gwi, SELECT_LEX& select_lex, Item* baseItem)
+{
+  if (!gwi.implicitExplicitGroupBy)
+  {
+    return rc;
+  }
+
+  if (dynamic_cast<AggregateColumn*>(rc) != nullptr || dynamic_cast<ConstantColumn*>(rc) != nullptr)
+  {
+    return rc;
+  }
+
+  ORDER* groupcol = static_cast<ORDER*>(select_lex.group_list.first);
+
+  while (groupcol)
+  {
+    if (baseItem->eq(*groupcol->item, false))
+    {
+      return rc;
+    }
+    groupcol = groupcol->next;
+  }
+
+  cal_connection_info* ci = static_cast<cal_connection_info*>(get_fe_conn_info_ptr());
+
+  AggregateColumn* ac = new AggregateColumn(gwi.sessionid);
+  ac->timeZone(gwi.timeZone);
+  ac->alias(rc->alias());
+  ac->aggOp(AggregateColumn::SELECT_SOME);
+  ac->asc(rc->asc());
+  ac->charsetNumber(rc->charsetNumber());
+  ac->expressionId(ci->expressionId++);
+
+  ac->aggParms().push_back(SRCP(rc));
+  return ac;
+}
+
 ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi)
 {
  // MCOL-1201 For UDAnF multiple parameters
@ -5009,8 +5092,9 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi)
        if (ord_col->type() == Item::CONST_ITEM && ord_col->cmp_type() == INT_RESULT)
        {
          Item_int* id = (Item_int*)ord_col;
+          int64_t index = id->val_int();

-          if (id->val_int() > (int)selCols.size())
+          if (index > (int)selCols.size() || index < 1)
          {
            gwi.fatalParseError = true;

@ -5020,8 +5104,8 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi)
            return NULL;
          }

-          rc = selCols[id->val_int() - 1]->clone();
-          rc->orderPos(id->val_int() - 1);
+          rc = selCols[index - 1]->clone();
+          rc->orderPos(index - 1);
        }
        else
        {
@ -7483,6 +7567,32 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i
  }
 #endif

+  // analyze SELECT and ORDER BY parts - do they have implicit GROUP BY induced by aggregates?
+  {
+    if (select_lex.group_list.first)
+    {
+      // we have an explicit GROUP BY.
+      gwi.implicitExplicitGroupBy = true;
+    }
+    else
+    {
+      // do we have an implicit GROUP BY?
+      List_iterator_fast<Item> it(select_lex.item_list);
+      Item* item;
+
+      while ((item = it++))
+      {
+        analyzeForImplicitGroupBy(item, gwi);
+      }
+      SQL_I_List<ORDER> order_list = select_lex.order_list;
+      ORDER* ordercol = static_cast<ORDER*>(order_list.first);
+
+      for (; ordercol; ordercol = ordercol->next)
+      {
+        analyzeForImplicitGroupBy(*(ordercol->item), gwi);
+      }
+    }
+  }
  // populate returnedcolumnlist and columnmap
  List_iterator_fast<Item> it(select_lex.item_list);
  Item* item;
@ -7509,6 +7619,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i

    // @bug 5916. Need to keep checking until getting concret item in case
    // of nested view.
+    Item* baseItem = item;
    while (item->type() == Item::REF_ITEM)
    {
      Item_ref* ref = (Item_ref*)item;
@ -7533,8 +7644,6 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i

        if (sc)
        {
-          boost::shared_ptr<SimpleColumn> spsc(sc);
-
          string fullname;
          String str;
          ifp->print(&str, QT_ORDINARY);
@ -7550,10 +7659,14 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i
              sc->alias(itemAlias);
          }

-          gwi.returnedCols.push_back(spsc);
+          // We need to look into GROUP BY columns to decide if we need to wrap a column.
+          ReturnedColumn* rc = wrapIntoAggregate(sc, gwi, select_lex, baseItem);
+
+          SRCP sprc(rc);
+          gwi.returnedCols.push_back(sprc);

          gwi.columnMap.insert(
-              CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name.str), spsc));
+              CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name.str), sprc));
          TABLE_LIST* tmp = 0;

          if (ifp->cached_table)
@ -8400,6 +8513,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i
    {
      if ((*(ordercol->item))->type() == Item::WINDOW_FUNC_ITEM)
        gwi.hasWindowFunc = true;
+      // XXX: TODO: implement a proper analysis of what we support.
      // MCOL-2166 Looking for this sorting item in GROUP_BY items list.
      // Shouldn't look into this if query doesn't have GROUP BY or
      // aggregations
@ -8411,10 +8525,10 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i
        getColNameFromItem(osr, *ordercol->item);
        Message::Args args;
        args.add(ostream.str());
-        string emsg = IDBErrorInfo::instance()->errorMsg(ERR_NOT_GROUPBY_EXPRESSION, args);
+        string emsg = IDBErrorInfo::instance()->errorMsg(ERR_NOT_SUPPORTED_GROUPBY_ORDERBY_EXPRESSION, args);
        gwi.parseErrorText = emsg;
        setError(gwi.thd, ER_INTERNAL_ERROR, emsg, gwi);
-        return ERR_NOT_GROUPBY_EXPRESSION;
+        return ERR_NOT_SUPPORTED_GROUPBY_ORDERBY_EXPRESSION;
      }
    }

@ -8464,6 +8578,8 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i
          else
          {
            rc = buildReturnedColumn(ord_item, gwi, gwi.fatalParseError);
+
+            rc = wrapIntoAggregate(rc, gwi, select_lex, ord_item);
          }
          // @bug5501 try item_ptr if item can not be fixed. For some
          // weird dml statement state, item can not be fixed but the
@ -8495,6 +8611,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i
        gwi.orderByCols.push_back(SRCP(rc));
      }
    }
+
    // make sure columnmap, returnedcols and count(*) arg_list are not empty
    TableMap::iterator tb_iter = gwi.tableMap.begin();

--- a/dbcon/mysql/ha_mcs_impl_if.h
+++ b/dbcon/mysql/ha_mcs_impl_if.h
@ -141,6 +141,9 @@ struct gp_walk_info
  std::vector<execplan::CalpontSystemCatalog::TableAliasName> correlatedTbNameVec;
  ClauseType clauseType;
  execplan::CalpontSystemCatalog::TableAliasName viewName;
+  // we can have explicit GROUP BY and implicit one, triggered by aggregate in pojection or ORDER BY.
+  // this flag tells us whether we have either case.
+  bool implicitExplicitGroupBy;
  bool aggOnSelect;
  bool hasWindowFunc;
  bool hasSubSelect;
@ -186,6 +189,7 @@ struct gp_walk_info
   , subSelectType(uint64_t(-1))
   , subQuery(0)
   , clauseType(INIT)
+   , implicitExplicitGroupBy(false)
   , aggOnSelect(false)
   , hasWindowFunc(false)
   , hasSubSelect(false)
--- a/mysql-test/columnstore/basic/r/MCOL-5772-hidden-order-by.result
+++ b/mysql-test/columnstore/basic/r/MCOL-5772-hidden-order-by.result
@ -0,0 +1,23 @@
+DROP DATABASE IF EXISTS MCOL5772;
+CREATE DATABASE MCOL5772;
+USE MCOL5772;
+CREATE TABLE products (
+product_id INT,
+product_name VARCHAR(100),
+category VARCHAR(50),
+unit_price DECIMAL(10, 2),
+stock_quantity INT
+) ENGINE=Columnstore;
+INSERT INTO products VALUES
+(1, 'Laptop', 'Electronics', 1200.00, 50),
+(2, 'Smartphone', 'Electronics', 800.00, 100),
+(3, 'Coffee Maker', 'Appliances', 50.00, 30),
+(4, 'Backpack', 'Fashion', 40.00, 80),
+(5, 'Desk Chair', 'Furniture', 150.00, 20);
+SELECT product_name, SUM(stock_quantity) AS total_stock FROM products GROUP BY category ORDER BY stock_quantity;
+product_name	total_stock
+Desk Chair	20
+Coffee Maker	30
+Backpack	80
+Smartphone	150
+DROP DATABASE MCOL5772;
--- a/mysql-test/columnstore/basic/r/mcol-4525.result
+++ b/mysql-test/columnstore/basic/r/mcol-4525.result
@ -80,7 +80,8 @@ SET columnstore_select_handler=ON;
 SELECT SUM(col1) AS col2 FROM t1 GROUP BY col2;
 ERROR 42000: The storage engine for the table doesn't support MCS-2016: Non supported item 'col2' on the GROUP BY list.
 SELECT col1 c FROM t1 ORDER BY AVG(col1);
-ERROR HY000: Internal error: MCS-2021: 'c' is not in GROUP BY clause. All non-aggregate columns in the SELECT and ORDER BY clause must be included in the GROUP BY clause.
+c
+10
 SET columnstore_select_handler=AUTO;
 SELECT SUM(col1) AS col2 FROM t1 GROUP BY col2;
 col2
--- a/mysql-test/columnstore/basic/r/mcs35_select_group_by.result
+++ b/mysql-test/columnstore/basic/r/mcs35_select_group_by.result
@ -56,7 +56,9 @@ NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL
 NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL
 NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL
 SELECT * FROM t1 GROUP BY t1_tinyint;
-ERROR HY000: Internal error: MCS-2021: '`mcs35_db1`.`t1`.`t1_int`' is not in GROUP BY clause. All non-aggregate columns in the SELECT and ORDER BY clause must be included in the GROUP BY clause.
+t1_tinyint	t1_int	t1_bigint	t1_double	t1_float	t1_blob	t1_text	t1_char	t1_varchar	t1_datetime
+0	NULL	403685477580676	54.797693231	8.40287	222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222	dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd	d	hello world!	4387-11-08 11:22:30
+NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL
 SELECT COUNT(*) FROM t1 GROUP BY t1_tinyint;
 COUNT(*)
 11
--- a/mysql-test/columnstore/basic/r/mcs36_select_order_by_group_by.result
+++ b/mysql-test/columnstore/basic/r/mcs36_select_order_by_group_by.result
@ -69,7 +69,11 @@ spID	userid	MIN(t1.score)
 3	3	3
 SELECT t1.spID, t2.userid, MIN(t1.score) 
 FROM t1, t2 WHERE t1.userID=t2.userID GROUP BY t2.userid, t1.spID ORDER BY t1.date;
-ERROR HY000: Internal error: MCS-2021: 'mcs36_db1.t1.date' is not in GROUP BY clause. All non-aggregate columns in the SELECT and ORDER BY clause must be included in the GROUP BY clause.
+spID	userid	MIN(t1.score)
+1	1	1
+2	1	1
+2	2	2
+3	3	3
 SELECT t2.userid, MIN(t1.score)
 FROM t1, t2 WHERE t1.userID=t2.userID GROUP BY unknown ORDER BY t2.userid;
 ERROR 42S22: Unknown column 'unknown' in 'group statement'
@ -78,5 +82,20 @@ FROM t1, t2 WHERE t1.userID=t2.userID GROUP BY t2.userid ORDER BY unknown;
 ERROR 42S22: Unknown column 'unknown' in 'order clause'
 SELECT t2.userid, MIN(t1.score)
 FROM t1, t2 WHERE t1.userID=t2.userID GROUP BY t2.userid ORDER BY NULL;
-ERROR HY000: Internal error: MCS-2021: 'unknown db.unknown table.unknown field' is not in GROUP BY clause. All non-aggregate columns in the SELECT and ORDER BY clause must be included in the GROUP BY clause.
+userid	MIN(t1.score)
+1	1
+2	2
+3	3
+SELECT * FROM t1 GROUP BY spID, userID ORDER BY score ASC, spID, userID;
+spID	userID	score	lsg	date
+1	1	1		0000-00-00
+2	1	1		0000-00-00
+2	2	2		0000-00-00
+3	3	3		0000-00-00
+SELECT * FROM t1 GROUP BY spID, userID ORDER BY score DESC, spID, userID;
+spID	userID	score	lsg	date
+3	3	3		0000-00-00
+2	2	2		0000-00-00
+1	1	1		0000-00-00
+2	1	1		0000-00-00
 DROP DATABASE IF EXISTS mcs36_db1;
--- a/mysql-test/columnstore/basic/r/mcs76_having.result
+++ b/mysql-test/columnstore/basic/r/mcs76_having.result
@ -23,7 +23,9 @@ col1	col2
 3	sss
 4	ooo
 SELECT col1, col2, SUM(LENGTH(col2)) FROM t1 GROUP BY col1 HAVING col1 > 1 AND col2 LIKE '%o%' ORDER BY col1;
-ERROR HY000: Internal error: MCS-2021: '`mcs76_db`.`t1`.`col2`' is not in GROUP BY clause. All non-aggregate columns in the SELECT and ORDER BY clause must be included in the GROUP BY clause.
+col1	col2	SUM(LENGTH(col2))
+2	oooooooooooooooooooo	40
+4	ooo	6
 CREATE TABLE t2(col1 INT, col2 DATETIME)ENGINE=Columnstore;
 INSERT INTO t2 VALUES(1, '2020-2-2'),(2, '2020-3-3'),(5,'2020-6-6'),(6, '2020-7-7');
 SELECT t1.col1, SUM(t1.col1*t2.col1) AS a FROM t1 JOIN t2 ON t1.col1 = t2.col1 GROUP BY t1.col1 HAVING a>1 ORDER BY t1.col1;
--- a/mysql-test/columnstore/basic/t/MCOL-5772-hidden-order-by.test
+++ b/mysql-test/columnstore/basic/t/MCOL-5772-hidden-order-by.test
@ -0,0 +1,28 @@
+# Order by a column that is not in GROUP BY and SELECT parts
+# should be correct.
+--disable_warnings
+DROP DATABASE IF EXISTS MCOL5772;
+--enable_warnings
+
+CREATE DATABASE MCOL5772;
+
+USE MCOL5772;
+
+CREATE TABLE products (
+product_id INT,
+product_name VARCHAR(100),
+category VARCHAR(50),
+unit_price DECIMAL(10, 2),
+stock_quantity INT
+) ENGINE=Columnstore;
+
+INSERT INTO products VALUES
+(1, 'Laptop', 'Electronics', 1200.00, 50),
+(2, 'Smartphone', 'Electronics', 800.00, 100),
+(3, 'Coffee Maker', 'Appliances', 50.00, 30),
+(4, 'Backpack', 'Fashion', 40.00, 80),
+(5, 'Desk Chair', 'Furniture', 150.00, 20);
+
+SELECT product_name, SUM(stock_quantity) AS total_stock FROM products GROUP BY category ORDER BY stock_quantity;
+
+DROP DATABASE MCOL5772;
--- a/mysql-test/columnstore/basic/t/mcol-4525.test
+++ b/mysql-test/columnstore/basic/t/mcol-4525.test
@ -85,7 +85,6 @@ INSERT INTO t1 VALUES(10,'bob',10);
 SET columnstore_select_handler=ON;
 --error 1178
 SELECT SUM(col1) AS col2 FROM t1 GROUP BY col2;
--error 1815
 SELECT col1 c FROM t1 ORDER BY AVG(col1);
 SET columnstore_select_handler=AUTO;
 SELECT SUM(col1) AS col2 FROM t1 GROUP BY col2;
--- a/mysql-test/columnstore/basic/t/mcs35_select_group_by.test
+++ b/mysql-test/columnstore/basic/t/mcs35_select_group_by.test
@ -52,7 +52,7 @@ INSERT INTO t1  VALUES(NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NUL
 --sorted_result
 SELECT * FROM t1;
 #Without aggregate function
--error 1815
+--sorted_result
 SELECT * FROM t1 GROUP BY t1_tinyint;

 --sorted_result
--- a/mysql-test/columnstore/basic/t/mcs36_select_order_by_group_by.test
+++ b/mysql-test/columnstore/basic/t/mcs36_select_order_by_group_by.test
@ -60,8 +60,7 @@ SELECT t1.spID, t2.userid, MIN(t1.score)
 FROM t1, t2 WHERE t1.userID=t2.userID GROUP BY t2.userid, t1.spID ORDER BY t1.spID, t2.userid;

 # columns in ORDER BY clause not included in the GROUP BY clause.
-#Suspected bug. Innodb succeeds.
--error 1815
+--sorted_result
 SELECT t1.spID, t2.userid, MIN(t1.score) 
 FROM t1, t2 WHERE t1.userID=t2.userID GROUP BY t2.userid, t1.spID ORDER BY t1.date;

@ -76,10 +75,13 @@ SELECT t2.userid, MIN(t1.score)
 FROM t1, t2 WHERE t1.userID=t2.userID GROUP BY t2.userid ORDER BY unknown;

 # order by null
-#Suspected bug. Innodb succeeds.
--error 1815
+# Innodb succeeds - it is a workaround to speed up GROUP BY.
+--sorted_result
 SELECT t2.userid, MIN(t1.score)
 FROM t1, t2 WHERE t1.userID=t2.userID GROUP BY t2.userid ORDER BY NULL;

+SELECT * FROM t1 GROUP BY spID, userID ORDER BY score ASC, spID, userID;
+SELECT * FROM t1 GROUP BY spID, userID ORDER BY score DESC, spID, userID;
+
 #Clean up
 DROP DATABASE IF EXISTS mcs36_db1;
--- a/mysql-test/columnstore/basic/t/mcs76_having.test
+++ b/mysql-test/columnstore/basic/t/mcs76_having.test
@ -18,7 +18,7 @@ SELECT col2 FROM t1 GROUP BY col2 HAVING col2 LIKE '%o%' ORDER BY col2;
 SELECT col1 FROM t1 GROUP BY col1 HAVING col1 > 1 ORDER BY col1;
 SELECT col1, col2 FROM t1 GROUP BY col1, col2 HAVING col1 > 1 AND col2 LIKE '%o%' ORDER BY col1;
 SELECT col1, col2 FROM t1 GROUP BY col1, col2 HAVING col1 > 1 OR col2 LIKE '%o%' ORDER BY col1;
--error ER_INTERNAL_ERROR
+--sorted_result
 SELECT col1, col2, SUM(LENGTH(col2)) FROM t1 GROUP BY col1 HAVING col1 > 1 AND col2 LIKE '%o%' ORDER BY col1;

 CREATE TABLE t2(col1 INT, col2 DATETIME)ENGINE=Columnstore;
--- a/utils/loggingcpp/ErrorMessage.txt
+++ b/utils/loggingcpp/ErrorMessage.txt
@ -108,6 +108,8 @@

 2060	ERR_UNION_DECIMAL_OVERFLOW	Union operation exceeds maximum DECIMAL precision of 38.

+2061	ERR_NOT_SUPPORTED_GROUPBY_ORDERBY_EXPRESSION	%1% is not in GROUP BY clause, not a column or an expression that contains function.
+
 # Sub-query errors
 3001	ERR_NON_SUPPORT_SUB_QUERY_TYPE	This subquery type is not supported yet.
 3002	ERR_MORE_THAN_1_ROW	Subquery returns more than 1 row.
--- a/utils/rowgroup/rowaggregation.cpp
+++ b/utils/rowgroup/rowaggregation.cpp
@ -1254,6 +1254,108 @@ void RowAggregation::doMinMax(const Row& rowIn, int64_t colIn, int64_t colOut, i
    }
  }
 }
+//------------------------------------------------------------------------------
+// Update the fields with anything that goes in.
+// rowIn(in)    - Row to be included in aggregation.
+// colIn(in)    - column in the input row group
+// colOut(in)   - column in the output row group
+//------------------------------------------------------------------------------
+void RowAggregation::doSelectSome(const Row& rowIn, int64_t colIn, int64_t colOut)
+{
+  int colDataType = (fRowGroupIn.getColTypes())[colIn];
+
+  switch (colDataType)
+  {
+    case execplan::CalpontSystemCatalog::UTINYINT:
+    case execplan::CalpontSystemCatalog::USMALLINT:
+    case execplan::CalpontSystemCatalog::UMEDINT:
+    case execplan::CalpontSystemCatalog::UINT:
+    case execplan::CalpontSystemCatalog::UBIGINT:
+    case execplan::CalpontSystemCatalog::TINYINT:
+    case execplan::CalpontSystemCatalog::SMALLINT:
+    case execplan::CalpontSystemCatalog::MEDINT:
+    case execplan::CalpontSystemCatalog::INT:
+    case execplan::CalpontSystemCatalog::BIGINT:
+    {
+      fRow.setIntField(rowIn.getIntField(colIn), colOut);
+      break;
+    }
+
+    case execplan::CalpontSystemCatalog::DECIMAL:
+    case execplan::CalpontSystemCatalog::UDECIMAL:
+    {
+      if (LIKELY(rowIn.getColumnWidth(colIn) == datatypes::MAXDECIMALWIDTH))
+      {
+        fRow.setInt128Field(rowIn.getTSInt128Field(colIn).getValue(), colOut);
+      }
+      else if (rowIn.getColumnWidth(colIn) <= datatypes::MAXLEGACYWIDTH)
+      {
+        fRow.setIntField(rowIn.getIntField(colIn), colOut);
+      }
+      else
+      {
+        idbassert(0);
+        throw std::logic_error("RowAggregation::doMinMax(): DECIMAL bad length.");
+      }
+
+      break;
+    }
+
+    case execplan::CalpontSystemCatalog::CHAR:
+    case execplan::CalpontSystemCatalog::VARCHAR:
+    case execplan::CalpontSystemCatalog::TEXT:
+    {
+      auto valIn = rowIn.getStringField(colIn);
+      fRow.setStringField(valIn, colOut);
+      break;
+    }
+
+    case execplan::CalpontSystemCatalog::DOUBLE:
+    case execplan::CalpontSystemCatalog::UDOUBLE:
+    {
+      double valIn = rowIn.getDoubleField(colIn);
+      fRow.setDoubleField(valIn, colOut);
+      break;
+    }
+
+    case execplan::CalpontSystemCatalog::FLOAT:
+    case execplan::CalpontSystemCatalog::UFLOAT:
+    {
+      float valIn = rowIn.getFloatField(colIn);
+      fRow.setFloatField(valIn, colOut);
+      break;
+    }
+
+    case execplan::CalpontSystemCatalog::DATE:
+    case execplan::CalpontSystemCatalog::DATETIME:
+    case execplan::CalpontSystemCatalog::TIMESTAMP:
+    case execplan::CalpontSystemCatalog::TIME:
+    {
+      uint64_t valIn = rowIn.getUintField(colIn);
+      fRow.setUintField(valIn, colOut);
+      break;
+    }
+
+    case execplan::CalpontSystemCatalog::LONGDOUBLE:
+    {
+      long double valIn = rowIn.getLongDoubleField(colIn);
+      fRow.setLongDoubleField(valIn, colOut);
+      break;
+    }
+
+    case execplan::CalpontSystemCatalog::CLOB:
+    case execplan::CalpontSystemCatalog::BLOB:
+    {
+      fRow.setVarBinaryField(rowIn.getVarBinaryField(colIn), rowIn.getVarBinaryLength(colIn), colOut);
+      break;
+    }
+    default:
+    {
+      idbassert_s(0, "unknown data type in doSelectSome()");
+      break;
+    }
+  }
+}

 //------------------------------------------------------------------------------
 // Update the sum fields if input is not null.
@ -1723,6 +1825,11 @@ void RowAggregation::updateEntry(const Row& rowIn, std::vector<mcsv1sdk::mcsv1Co
        doUDAF(rowIn, colIn, colOut, colOut + 1, i, rgContextColl);
        break;
      }
+      case ROWAGG_SELECT_SOME:
+      {
+        doSelectSome(rowIn, colIn, colOut);
+        break;
+      }

      default:
      {
@ -1783,6 +1890,12 @@ void RowAggregation::mergeEntries(const Row& rowIn)

      case ROWAGG_UDAF: doUDAF(rowIn, colOut, colOut, colOut + 1, i); break;

+      case ROWAGG_SELECT_SOME:
+      {
+        doSelectSome(rowIn, colOut, colOut);
+        break;
+      }
+
      default:
        std::ostringstream errmsg;
        errmsg << "RowAggregation: function (id = " << (uint64_t)fFunctionCols[i]->fAggFunction
@ -2617,6 +2730,12 @@ void RowAggregationUM::updateEntry(const Row& rowIn, std::vector<mcsv1sdk::mcsv1
        break;
      }

+      case ROWAGG_SELECT_SOME:
+      {
+        doSelectSome(rowIn, colIn, colOut);
+        break;
+      }
+
      default:
      {
        // need a exception to show the value
@ -4211,6 +4330,12 @@ void RowAggregationUMP2::updateEntry(const Row& rowIn, std::vector<mcsv1sdk::mcs
        break;
      }

+      case ROWAGG_SELECT_SOME:
+      {
+        doSelectSome(rowIn, colIn, colOut);
+        break;
+      }
+
      default:
      {
        std::ostringstream errmsg;
@ -4703,6 +4828,12 @@ void RowAggregationDistinct::updateEntry(const Row& rowIn, std::vector<mcsv1sdk:
        break;
      }

+      case ROWAGG_SELECT_SOME:
+      {
+        doSelectSome(rowIn, colIn, colOut);
+        break;
+      }
+
      default:
      {
        std::ostringstream errmsg;
--- a/utils/rowgroup/rowaggregation.h
+++ b/utils/rowgroup/rowaggregation.h
@ -113,7 +113,10 @@ enum RowAggFunctionType
  ROWAGG_DUP_FUNCT,    // duplicate aggregate Function(), except AVG and UDAF, in select
  ROWAGG_DUP_AVG,      // duplicate AVG(column_name) in select
  ROWAGG_DUP_STATS,    // duplicate statistics functions in select
-  ROWAGG_DUP_UDAF      // duplicate UDAF function in select
+  ROWAGG_DUP_UDAF,     // duplicate UDAF function in select
+
+  // a dummy "select some" aggregate needed for non-group-by values in SELECT's with GROUP BY's
+  ROWAGG_SELECT_SOME
 };

 //------------------------------------------------------------------------------
@ -538,6 +541,7 @@ class RowAggregation : public messageqcpp::Serializeable
  virtual void updateEntry(const Row& row, std::vector<mcsv1sdk::mcsv1Context>* rgContextColl = nullptr);
  void mergeEntries(const Row& row);
  virtual void doMinMax(const Row&, int64_t, int64_t, int);
+  virtual void doSelectSome(const Row& rowIn, int64_t colIn, int64_t colOut);
  virtual void doSum(const Row&, int64_t, int64_t, int);
  virtual void doAvg(const Row&, int64_t, int64_t, int64_t, bool merge = false);
  virtual void doStatistics(const Row&, int64_t, int64_t, int64_t);