fix(PrimProc): MCOL-5394 JSON_ARRAYAGG in MCS works differently than in InnoDB

2025-07-29 08:21:15 +03:00 · 2025-03-27 03:38:42 +01:00
parent 21ebd1ac20
commit c618fa284d
4 changed files with 315 additions and 43 deletions
--- a/dbcon/joblist/groupconcat.cpp
+++ b/dbcon/joblist/groupconcat.cpp
@ -498,9 +498,16 @@ void GroupConcator::outputRow(std::ostringstream& oss, const rowgroup::Row& row)
      case CalpontSystemCatalog::INT:
      case CalpontSystemCatalog::BIGINT:
      {
-        int64_t intVal = row.getIntField(*i);
+        if (fIsJsonArrayAgg && row.isNullValue(*i))
+        {
+          oss << "null";
+        }
+        else
+        {
+          int64_t intVal = row.getIntField(*i);

-        oss << intVal;
+          oss << intVal;
+        }

        break;
      }
@ -508,7 +515,14 @@ void GroupConcator::outputRow(std::ostringstream& oss, const rowgroup::Row& row)
      case CalpontSystemCatalog::DECIMAL:
      case CalpontSystemCatalog::UDECIMAL:
      {
-        oss << fixed << row.getDecimalField(*i);
+        if (fIsJsonArrayAgg && row.isNullValue(*i))
+        {
+          oss << "null";
+        }
+        else
+        {
+          oss << fixed << row.getDecimalField(*i);
+        }
        break;
      }

@ -518,18 +532,25 @@ void GroupConcator::outputRow(std::ostringstream& oss, const rowgroup::Row& row)
      case CalpontSystemCatalog::UINT:
      case CalpontSystemCatalog::UBIGINT:
      {
-        uint64_t uintVal = row.getUintField(*i);
-        int scale = (int)row.getScale(*i);
-
-        if (scale == 0)
+        if (fIsJsonArrayAgg && row.isNullValue(*i))
        {
-          oss << uintVal;
+          oss << "null";
        }
        else
        {
-          oss << fixed
-              << datatypes::Decimal(datatypes::TSInt128((int128_t)uintVal), scale,
-                                    datatypes::INT128MAXPRECISION);
+          uint64_t uintVal = row.getUintField(*i);
+          int scale = (int)row.getScale(*i);
+
+          if (scale == 0)
+          {
+            oss << uintVal;
+          }
+          else
+          {
+            oss << fixed
+                << datatypes::Decimal(datatypes::TSInt128((int128_t)uintVal), scale,
+                                      datatypes::INT128MAXPRECISION);
+          }
        }

        break;
@ -541,7 +562,8 @@ void GroupConcator::outputRow(std::ostringstream& oss, const rowgroup::Row& row)
      {
        if (fIsJsonArrayAgg)
        {
-          auto maybeJson = row.getStringField(*i).safeString("");  // XXX: MULL??? it is not checked anywhere.
+          auto maybeJson =
+              row.getStringField(*i).safeString("null");  // XXX: MULL??? it is not checked anywhere.
          const auto j = json::parse(maybeJson, nullptr, false);
          if (j.is_discarded())
          {
@ -562,56 +584,121 @@ void GroupConcator::outputRow(std::ostringstream& oss, const rowgroup::Row& row)
      case CalpontSystemCatalog::DOUBLE:
      case CalpontSystemCatalog::UDOUBLE:
      {
-        oss << setprecision(15) << row.getDoubleField(*i);
+        if (fIsJsonArrayAgg && row.isNullValue(*i))
+        {
+          oss << "null";
+        }
+        else
+        {
+          oss << setprecision(15) << row.getDoubleField(*i);
+        }
        break;
      }

      case CalpontSystemCatalog::LONGDOUBLE:
      {
-        oss << setprecision(15) << row.getLongDoubleField(*i);
+        if (fIsJsonArrayAgg && row.isNullValue(*i))
+        {
+          oss << "null";
+        }
+        else
+        {
+          oss << setprecision(15) << row.getLongDoubleField(*i);
+        }
        break;
      }

      case CalpontSystemCatalog::FLOAT:
      case CalpontSystemCatalog::UFLOAT:
      {
-        oss << row.getFloatField(*i);
+        if (fIsJsonArrayAgg && row.isNullValue(*i))
+        {
+          oss << "null";
+        }
+        else
+        {
+          oss << row.getFloatField(*i);
+        }
        break;
      }

      case CalpontSystemCatalog::DATE:
      {
        if (fIsJsonArrayAgg)
-          oss << std::quoted(DataConvert::dateToString(row.getUintField(*i)));
+        {
+          if (row.isNullValue(*i))
+          {
+            oss << "null";
+          }
+          else
+          {
+            oss << std::quoted(DataConvert::dateToString(row.getUintField(*i)));
+          }
+        }
        else
+        {
          oss << DataConvert::dateToString(row.getUintField(*i));
+        }
        break;
      }

      case CalpontSystemCatalog::DATETIME:
      {
        if (fIsJsonArrayAgg)
-          oss << std::quoted(DataConvert::datetimeToString(row.getUintField(*i)));
+        {
+          if (row.isNullValue(*i))
+          {
+            oss << "null";
+          }
+          else
+          {
+            oss << std::quoted(DataConvert::datetimeToString(row.getUintField(*i)));
+          }
+        }
        else
+        {
          oss << DataConvert::datetimeToString(row.getUintField(*i));
+        }
        break;
      }

      case CalpontSystemCatalog::TIMESTAMP:
      {
        if (fIsJsonArrayAgg)
-          oss << std::quoted(DataConvert::timestampToString(row.getUintField(*i), fTimeZone));
+        {
+          if (row.isNullValue(*i))
+          {
+            oss << "null";
+          }
+          else
+          {
+            oss << std::quoted(DataConvert::timestampToString(row.getUintField(*i), fTimeZone));
+          }
+        }
        else
+        {
          oss << DataConvert::timestampToString(row.getUintField(*i), fTimeZone);
+        }
        break;
      }

      case CalpontSystemCatalog::TIME:
      {
        if (fIsJsonArrayAgg)
-          oss << std::quoted(DataConvert::timeToString(row.getUintField(*i)));
+        {
+          if (row.isNullValue(*i))
+          {
+            oss << "null";
+          }
+          else
+          {
+            oss << std::quoted(DataConvert::timeToString(row.getUintField(*i)));
+          }
+        }
        else
+        {
          oss << DataConvert::timeToString(row.getUintField(*i));
+        }
        break;
      }

@ -621,7 +708,7 @@ void GroupConcator::outputRow(std::ostringstream& oss, const rowgroup::Row& row)
      }
    }

-    i++;
+    ++i;
  }
 }

@ -832,10 +919,12 @@ class GroupConcatOrderByRow
  ordering::CompareRule* fRule;
 };

-class GroupConcatOrderBy::SortingPQ : public priority_queue<GroupConcatOrderByRow, vector<GroupConcatOrderByRow>, less<GroupConcatOrderByRow>>
+class GroupConcatOrderBy::SortingPQ
+ : public priority_queue<GroupConcatOrderByRow, vector<GroupConcatOrderByRow>, less<GroupConcatOrderByRow>>
 {
 public:
-  using BaseType = std::priority_queue<GroupConcatOrderByRow, vector<GroupConcatOrderByRow>, less<GroupConcatOrderByRow>>;
+  using BaseType =
+      std::priority_queue<GroupConcatOrderByRow, vector<GroupConcatOrderByRow>, less<GroupConcatOrderByRow>>;
  using size_type = BaseType::size_type;

  SortingPQ(size_type capacity) : BaseType()
@ -1071,9 +1160,10 @@ void GroupConcatOrderBy::createNewRGData()

 rowgroup::RGDataSizeType GroupConcatOrderBy::getDataSize() const
 {
-  return fMemSize
-      + fOrderByQueue->capacity() * sizeof(GroupConcatOrderByRow)
-      + (fDistinct ? fDistinctMap->size() : 0) * 32 /* TODO: speculative unordered_map memory consumption per item, replace it with counting allocator */;
+  return fMemSize + fOrderByQueue->capacity() * sizeof(GroupConcatOrderByRow) +
+         (fDistinct ? fDistinctMap->size() : 0) * 32 /* TODO: speculative unordered_map memory consumption per
+                                                        item, replace it with counting allocator */
+      ;
 }

 void GroupConcatOrderBy::processRow(const rowgroup::Row& row)
@ -1083,7 +1173,7 @@ void GroupConcatOrderBy::processRow(const rowgroup::Row& row)
    return;

  // this row is skipped if any concatenated column is null.
-  if (concatColIsNull(row))
+  if (!fIsJsonArrayAgg && concatColIsNull(row))
    return;

  // if the row count is less than the limit
@ -1356,7 +1446,7 @@ void GroupConcatNoOrder::initialize(const rowgroup::SP_GroupConcat& gcc)
 void GroupConcatNoOrder::processRow(const rowgroup::Row& row)
 {
  // if the row count is less than the limit
-  if (fCurrentLength < fGroupConcatLen && concatColIsNull(row) == false)
+  if (fCurrentLength < fGroupConcatLen && (fIsJsonArrayAgg || concatColIsNull(row) == false))
  {
    copyRow(row, &fRow);

--- a/mysql-test/columnstore/basic/r/func_jsonarrayagg.result
+++ b/mysql-test/columnstore/basic/r/func_jsonarrayagg.result
@ -73,47 +73,47 @@ JSON_VALID(JSON_ARRAYAGG(b))
 1
 SELECT JSON_ARRAYAGG(a), JSON_ARRAYAGG(b) FROM t1;
 JSON_ARRAYAGG(a)	JSON_ARRAYAGG(b)
-[1,1,2,2,2,2,3,1,1,2,2,2,2,3]	["Hello","World","This","Will","Work","!","Hello","World","This","Will","Work","!"]
+[1,1,2,2,2,2,3,1,1,2,2,2,2,3]	["Hello","World","This","Will","Work","!",null,"Hello","World","This","Will","Work","!",null]
 SELECT JSON_ARRAYAGG(a), JSON_ARRAYAGG(b) FROM t1 GROUP BY a;
 JSON_ARRAYAGG(a)	JSON_ARRAYAGG(b)
 [1,1,1,1]	["Hello","World","Hello","World"]
 [2,2,2,2,2,2,2,2]	["This","Will","Work","!","This","Will","Work","!"]
-[3,3]	
+[3,3]	[null,null]
 #
 # DISTINCT and LIMIT
 #
 SELECT JSON_ARRAYAGG(b LIMIT 1) FROM t1;
 JSON_ARRAYAGG(b LIMIT 1)
-["Hello","World","This","Will","Work","!","Hello","World","This","Will","Work","!"]
+["Hello","World","This","Will","Work","!",null,"Hello","World","This","Will","Work","!",null]
 SELECT JSON_ARRAYAGG(b LIMIT 2) FROM t1;
 JSON_ARRAYAGG(b LIMIT 2)
-["Hello","World","This","Will","Work","!","Hello","World","This","Will","Work","!"]
+["Hello","World","This","Will","Work","!",null,"Hello","World","This","Will","Work","!",null]
 SELECT JSON_ARRAYAGG(b LIMIT 1) FROM t1 GROUP BY b;
 JSON_ARRAYAGG(b LIMIT 1)
-
 ["!","!"]
 ["Hello","Hello"]
 ["This","This"]
 ["Will","Will"]
 ["Work","Work"]
 ["World","World"]
+[null,null]
 SELECT JSON_ARRAYAGG(b LIMIT 2) FROM t1 GROUP BY a;
 JSON_ARRAYAGG(b LIMIT 2)
-
 ["Hello","World","Hello","World"]
 ["This","Will","Work","!","This","Will","Work","!"]
+[null,null]
 SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1;
 JSON_ARRAYAGG(DISTINCT a)
 [3,2,1]
 SELECT JSON_ARRAYAGG(DISTINCT b) FROM t1;
 JSON_ARRAYAGG(DISTINCT b)
-["Will","World","Work","!","This","Hello"]
+["Will","World","Work",null,"!","This","Hello"]
 SELECT JSON_ARRAYAGG(DISTINCT a LIMIT 2) FROM t1;
 JSON_ARRAYAGG(DISTINCT a LIMIT 2)
 [3,2,1]
 SELECT JSON_ARRAYAGG(DISTINCT b LIMIT 2) FROM t1;
 JSON_ARRAYAGG(DISTINCT b LIMIT 2)
-["Will","World","Work","!","This","Hello"]
+["Will","World","Work",null,"!","This","Hello"]
 #
 # JSON aggregation
 #
@ -156,7 +156,7 @@ DROP TABLE t1;
 CREATE TABLE t1 (a INT)ENGINE=COLUMNSTORE;
 SELECT JSON_ARRAYAGG(a) FROM t1;
 JSON_ARRAYAGG(a)
-
+NULL
 DROP TABLE t1;
 #
 #
@ -195,10 +195,10 @@ JSON_ARRAYAGG(DISTINCT a)
 INSERT INTO t1 VALUES (NULL,NULL), (NULL,NULL);
 SELECT JSON_ARRAYAGG(a) FROM t1;
 JSON_ARRAYAGG(a)
-[1,2,3,1,2,3]
+[1,2,3,1,2,3,null,null]
 SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1;
 JSON_ARRAYAGG(DISTINCT a)
-[3,2,1]
+[null,2,3,1]
 DROP TABLE t1;
 CREATE TABLE t1(a VARCHAR(10), b INT)ENGINE=COLUMNSTORE;
 INSERT INTO t1 VALUES (1,1), (2,2), (3,3);
@ -212,10 +212,10 @@ JSON_ARRAYAGG(DISTINCT a)
 INSERT INTO t1 VALUES (NULL,NULL), (NULL,NULL);
 SELECT JSON_ARRAYAGG(a) FROM t1;
 JSON_ARRAYAGG(a)
-[1,2,3,1,2,3]
+[1,2,3,1,2,3,null,null]
 SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1;
 JSON_ARRAYAGG(DISTINCT a)
-[3,2,1]
+[null,2,3,1]
 DROP TABLE t1;
 #
 #
@ -233,13 +233,13 @@ JSON_ARRAYAGG(a ORDER BY a ASC)
 INSERT INTO t1 VALUES (NULL);
 SELECT JSON_ARRAYAGG(a) FROM t1;
 JSON_ARRAYAGG(a)
-["red","blue"]
+["red","blue",null]
 SELECT JSON_ARRAYAGG(a ORDER BY a DESC) FROM t1;
 JSON_ARRAYAGG(a ORDER BY a DESC)
-["red","blue"]
+["red","blue",null]
 SELECT JSON_ARRAYAGG(a ORDER BY a ASC) FROM t1;
 JSON_ARRAYAGG(a ORDER BY a ASC)
-["blue","red"]
+[null,"blue","red"]
 DROP TABLE t1;
 set group_concat_max_len=64;
 create table t1 (a varchar(254))ENGINE=COLUMNSTORE;
--- a/mysql-test/columnstore/bugfixes/MCOL-5394-json_arrayagg.result
+++ b/mysql-test/columnstore/bugfixes/MCOL-5394-json_arrayagg.result
@ -0,0 +1,100 @@
+DROP DATABASE IF EXISTS mcol_5394 ;
+CREATE DATABASE mcol_5394 ;
+USE mcol_5394 ;
+CREATE TABLE t1 (a int, k int, b VARCHAR(10)) engine=columnstore;
+INSERT INTO t1 VALUES
+(1, 1, "alfa"),
+(1, 2, null),
+(2, 3, "doi"),
+(1, 4, "unu"),
+(3, 5, "trei"),
+(4, 6, null),
+(4, 7, null),
+(1, 8, "one");
+SELECT a, JSON_ARRAYAGG(b) FROM t1 GROUP BY a;
+a	JSON_ARRAYAGG(b)
+1	["alfa",null,"unu","one"]
+2	["doi"]
+3	["trei"]
+4	[null,null]
+SELECT JSON_ARRAYAGG(b) FROM t1;
+JSON_ARRAYAGG(b)
+["alfa",null,"doi","unu","trei",null,null,"one"]
+PREPARE p1 FROM "SELECT a, JSON_ARRAYAGG(b) FROM t1 GROUP BY a";
+EXECUTE p1;
+a	JSON_ARRAYAGG(b)
+1	["alfa",null,"unu","one"]
+2	["doi"]
+3	["trei"]
+4	[null,null]
+EXECUTE p1;
+a	JSON_ARRAYAGG(b)
+1	["alfa",null,"unu","one"]
+2	["doi"]
+3	["trei"]
+4	[null,null]
+deallocate prepare p1;
+PREPARE p3 FROM
+"SELECT SQL_BUFFER_RESULT  a, JSON_ARRAYAGG(b) FROM t1 GROUP BY a";
+EXECUTE p3;
+a	JSON_ARRAYAGG(b)
+1	["alfa",null,"unu","one"]
+2	["doi"]
+3	["trei"]
+4	[null,null]
+EXECUTE p3;
+a	JSON_ARRAYAGG(b)
+1	["alfa",null,"unu","one"]
+2	["doi"]
+3	["trei"]
+4	[null,null]
+deallocate prepare p3;
+PREPARE p4 FROM "SELECT JSON_ARRAYAGG(b) FROM t1";
+EXECUTE p4;
+JSON_ARRAYAGG(b)
+["alfa",null,"doi","unu","trei",null,null,"one"]
+EXECUTE p4;
+JSON_ARRAYAGG(b)
+["alfa",null,"doi","unu","trei",null,null,"one"]
+deallocate prepare p4;
+SELECT JSON_MERGE_PRESERVE(JSON_ARRAYAGG(b), '[true, false]') FROM t1;
+JSON_MERGE_PRESERVE(JSON_ARRAYAGG(b), '[true, false]')
+["alfa", null, "doi", "unu", "trei", null, null, "one", true, false]
+PREPARE p1 FROM
+"SELECT a, JSON_MERGE_PRESERVE(JSON_ARRAYAGG(b), '[true, false]') FROM t1 GROUP BY a";
+EXECUTE p1;
+a	JSON_MERGE_PRESERVE(JSON_ARRAYAGG(b), '[true, false]')
+1	["alfa", null, "unu", "one", true, false]
+2	["doi", true, false]
+3	["trei", true, false]
+4	[null, null, true, false]
+EXECUTE p1;
+a	JSON_MERGE_PRESERVE(JSON_ARRAYAGG(b), '[true, false]')
+1	["alfa", null, "unu", "one", true, false]
+2	["doi", true, false]
+3	["trei", true, false]
+4	[null, null, true, false]
+deallocate prepare p1;
+PREPARE p4 FROM
+"SELECT JSON_MERGE_PRESERVE(JSON_ARRAYAGG(b), '[true, false]') FROM t1";
+EXECUTE p4;
+JSON_MERGE_PRESERVE(JSON_ARRAYAGG(b), '[true, false]')
+["alfa", null, "doi", "unu", "trei", null, null, "one", true, false]
+EXECUTE p4;
+JSON_MERGE_PRESERVE(JSON_ARRAYAGG(b), '[true, false]')
+["alfa", null, "doi", "unu", "trei", null, null, "one", true, false]
+deallocate prepare p4;
+SELECT a, JSON_ARRAYAGG(b) as jarray
+FROM t1
+GROUP BY a
+HAVING jarray= JSON_ARRAY("trei");
+a	jarray
+3	["trei"]
+TRUNCATE TABLE t1;
+SELECT a, JSON_ARRAYAGG(b) FROM t1 GROUP BY a;
+a	JSON_ARRAYAGG(b)
+SELECT JSON_ARRAYAGG(b) FROM t1;
+JSON_ARRAYAGG(b)
+NULL
+DROP TABLE t1;
+DROP DATABASE mcol_5394;
--- a/mysql-test/columnstore/bugfixes/MCOL-5394-json_arrayagg.test
+++ b/mysql-test/columnstore/bugfixes/MCOL-5394-json_arrayagg.test
@ -0,0 +1,82 @@
+--source ../include/have_columnstore.inc
+--disable_warnings
+DROP DATABASE IF EXISTS mcol_5394 ;
+--enable_warnings
+CREATE DATABASE mcol_5394 ;
+USE mcol_5394 ;
+
+CREATE TABLE t1 (a int, k int, b VARCHAR(10)) engine=columnstore;
+INSERT INTO t1 VALUES
+(1, 1, "alfa"),
+(1, 2, null),
+(2, 3, "doi"),
+(1, 4, "unu"),
+(3, 5, "trei"),
+(4, 6, null),
+(4, 7, null),
+(1, 8, "one");
+
+--sorted_result
+SELECT a, JSON_ARRAYAGG(b) FROM t1 GROUP BY a;
+--sorted_result
+SELECT JSON_ARRAYAGG(b) FROM t1;
+
+PREPARE p1 FROM "SELECT a, JSON_ARRAYAGG(b) FROM t1 GROUP BY a";
+--sorted_result
+EXECUTE p1;
+--sorted_result
+EXECUTE p1;
+deallocate prepare p1;
+
+PREPARE p3 FROM
+"SELECT SQL_BUFFER_RESULT  a, JSON_ARRAYAGG(b) FROM t1 GROUP BY a";
+--sorted_result
+EXECUTE p3;
+--sorted_result
+EXECUTE p3;
+deallocate prepare p3;
+
+PREPARE p4 FROM "SELECT JSON_ARRAYAGG(b) FROM t1";
+--sorted_result
+EXECUTE p4;
+--sorted_result
+EXECUTE p4;
+deallocate prepare p4;
+
+
+--sorted_result
+SELECT JSON_MERGE_PRESERVE(JSON_ARRAYAGG(b), '[true, false]') FROM t1;
+
+PREPARE p1 FROM
+"SELECT a, JSON_MERGE_PRESERVE(JSON_ARRAYAGG(b), '[true, false]') FROM t1 GROUP BY a";
+--sorted_result
+EXECUTE p1;
+--sorted_result
+EXECUTE p1;
+deallocate prepare p1;
+
+
+PREPARE p4 FROM
+"SELECT JSON_MERGE_PRESERVE(JSON_ARRAYAGG(b), '[true, false]') FROM t1";
+--sorted_result
+EXECUTE p4;
+--sorted_result
+EXECUTE p4;
+deallocate prepare p4;
+
+
+--sorted_result
+SELECT a, JSON_ARRAYAGG(b) as jarray
+FROM t1
+GROUP BY a
+HAVING jarray= JSON_ARRAY("trei");
+
+TRUNCATE TABLE t1;
+
+SELECT a, JSON_ARRAYAGG(b) FROM t1 GROUP BY a;
+SELECT JSON_ARRAYAGG(b) FROM t1;
+
+
+DROP TABLE t1;
+
+DROP DATABASE mcol_5394;