From 786b9da5b0f4e35c56b68821baa69fb93847b812 Mon Sep 17 00:00:00 2001
From: Roman Nozdrin <rnozdrin@mariadb.com>
Date: Thu, 2 Mar 2023 18:37:09 +0000
Subject: [PATCH] MCOL-5438 COUNT() in math causes SEGV

---
 datatypes/mcs_decimal.h                       | 21 ++++++++++++
 dbcon/execplan/aggregatecolumn.h              |  1 -
 dbcon/joblist/tupleaggregatestep.cpp          | 21 ++++++------
 .../columnstore/bugfixes/mcol-5438.result     |  9 +++++
 .../columnstore/bugfixes/mcol-5438.test       | 13 +++++++
 utils/dataconvert/dataconvert.cpp             | 34 ++++++-------------
 utils/dataconvert/dataconvert.h               | 26 +++-----------
 utils/funcexp/func_cast.cpp                   | 32 +++++------------
 utils/loggingcpp/errorcodes.cpp               |  1 +
 utils/loggingcpp/errorcodes.h                 |  1 +
 utils/rowgroup/rowgroup.cpp                   |  5 ++-
 utils/rowgroup/rowgroup.h                     | 11 +++---
 12 files changed, 85 insertions(+), 90 deletions(-)
 create mode 100644 mysql-test/columnstore/bugfixes/mcol-5438.result
 create mode 100644 mysql-test/columnstore/bugfixes/mcol-5438.test

diff --git a/datatypes/mcs_decimal.h b/datatypes/mcs_decimal.h
index 91bdbe55f..6acd70605 100644
--- a/datatypes/mcs_decimal.h
+++ b/datatypes/mcs_decimal.h
@@ -164,6 +164,27 @@ const int128_t mcs_pow_10_128[20] = {
     100000000000000000000000000000000000000_xxl,
 };
 
+const int128_t ConversionRangeMaxValue[20] = {9999999999999999999_xxl,
+                                              99999999999999999999_xxl,
+                                              999999999999999999999_xxl,
+                                              9999999999999999999999_xxl,
+                                              99999999999999999999999_xxl,
+                                              999999999999999999999999_xxl,
+                                              9999999999999999999999999_xxl,
+                                              99999999999999999999999999_xxl,
+                                              999999999999999999999999999_xxl,
+                                              9999999999999999999999999999_xxl,
+                                              99999999999999999999999999999_xxl,
+                                              999999999999999999999999999999_xxl,
+                                              9999999999999999999999999999999_xxl,
+                                              99999999999999999999999999999999_xxl,
+                                              999999999999999999999999999999999_xxl,
+                                              9999999999999999999999999999999999_xxl,
+                                              99999999999999999999999999999999999_xxl,
+                                              999999999999999999999999999999999999_xxl,
+                                              9999999999999999999999999999999999999_xxl,
+                                              99999999999999999999999999999999999999_xxl};
+
 constexpr uint32_t maxPowOf10 = sizeof(mcs_pow_10) / sizeof(mcs_pow_10[0]) - 1;
 constexpr int128_t Decimal128Null = TSInt128::NullValue;
 constexpr int128_t Decimal128Empty = TSInt128::EmptyValue;
diff --git a/dbcon/execplan/aggregatecolumn.h b/dbcon/execplan/aggregatecolumn.h
index cb33b9468..8f6b8b6c5 100644
--- a/dbcon/execplan/aggregatecolumn.h
+++ b/dbcon/execplan/aggregatecolumn.h
@@ -78,7 +78,6 @@ class AggregateColumn : public ReturnedColumn
     UDAF,
     MULTI_PARM
   };
-
   /**
    * typedef
    */
diff --git a/dbcon/joblist/tupleaggregatestep.cpp b/dbcon/joblist/tupleaggregatestep.cpp
index 89286f3e0..6890bf9b6 100644
--- a/dbcon/joblist/tupleaggregatestep.cpp
+++ b/dbcon/joblist/tupleaggregatestep.cpp
@@ -18,8 +18,8 @@
 
 //  $Id: tupleaggregatestep.cpp 9732 2013-08-02 15:56:15Z pleblanc $
 
-//#define NDEBUG
-// Cross engine needs to be at top due to MySQL includes
+// #define NDEBUG
+//  Cross engine needs to be at top due to MySQL includes
 #define PREFER_MY_CONFIG_H
 #include "crossenginestep.h"
 
@@ -69,7 +69,7 @@ using namespace querytele;
 #include "tuplehashjoin.h"
 #include "tupleaggregatestep.h"
 
-//#include "stopwatch.cpp"
+// #include "stopwatch.cpp"
 
 // Stopwatch timer;
 
@@ -1290,7 +1290,7 @@ void TupleAggregateStep::prep1PhaseAggregate(JobInfo& jobInfo, vector<RowGroup>&
         keysAgg.push_back(key);
         scaleAgg.push_back(0);
         // work around count() in select subquery
-        precisionAgg.push_back(9999);
+        precisionAgg.push_back(rowgroup::MagicPrecisionForCountAgg);
         typeAgg.push_back(CalpontSystemCatalog::UBIGINT);
         csNumAgg.push_back(csNumProj[colProj]);
         widthAgg.push_back(bigIntWidth);
@@ -1858,7 +1858,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(JobInfo& jobInfo, vector<Ro
           keysAgg.push_back(aggKey);
           scaleAgg.push_back(0);
           // work around count() in select subquery
-          precisionAgg.push_back(9999);
+          precisionAgg.push_back(rowgroup::MagicPrecisionForCountAgg);
 
           if (isUnsigned(typeProj[colProj]))
           {
@@ -2196,7 +2196,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(JobInfo& jobInfo, vector<Ro
           keysAggDist.push_back(retKey);
           scaleAggDist.push_back(0);
           // work around count() in select subquery
-          precisionAggDist.push_back(9999);
+          precisionAggDist.push_back(rowgroup::MagicPrecisionForCountAgg);
           typeAggDist.push_back(CalpontSystemCatalog::UBIGINT);
           csNumAggDist.push_back(8);
           widthAggDist.push_back(bigIntWidth);
@@ -2840,7 +2840,8 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(JobInfo& jobInfo, vector<Ro
                     f->fAggFunction == ROWAGG_MIN || f->fAggFunction == ROWAGG_MAX ||
                     f->fAggFunction == ROWAGG_STATS || f->fAggFunction == ROWAGG_BIT_AND ||
                     f->fAggFunction == ROWAGG_BIT_OR || f->fAggFunction == ROWAGG_BIT_XOR ||
-                    f->fAggFunction == ROWAGG_CONSTANT || f->fAggFunction == ROWAGG_GROUP_CONCAT || f->fAggFunction == ROWAGG_JSON_ARRAY))
+                    f->fAggFunction == ROWAGG_CONSTANT || f->fAggFunction == ROWAGG_GROUP_CONCAT ||
+                    f->fAggFunction == ROWAGG_JSON_ARRAY))
           {
             funct.reset(new RowAggFunctionCol(f->fAggFunction, f->fStatsFunction, f->fInputColumnIndex,
                                               f->fOutputColumnIndex, f->fAuxColumnIndex - multiParms));
@@ -3189,7 +3190,7 @@ void TupleAggregateStep::prep2PhasesAggregate(JobInfo& jobInfo, vector<RowGroup>
           keysAggPm.push_back(aggKey);
           scaleAggPm.push_back(0);
           // work around count() in select subquery
-          precisionAggPm.push_back(9999);
+          precisionAggPm.push_back(rowgroup::MagicPrecisionForCountAgg);
           typeAggPm.push_back(CalpontSystemCatalog::UBIGINT);
           csNumAggPm.push_back(8);
           widthAggPm.push_back(bigIntWidth);
@@ -4080,7 +4081,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(JobInfo& jobInfo, vector<R
           keysAggPm.push_back(aggKey);
           scaleAggPm.push_back(0);
           // work around count() in select subquery
-          precisionAggPm.push_back(9999);
+          precisionAggPm.push_back(rowgroup::MagicPrecisionForCountAgg);
 
           if (isUnsigned(typeProj[colProj]))
           {
@@ -4463,7 +4464,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(JobInfo& jobInfo, vector<R
             keysAggDist.push_back(retKey);
             scaleAggDist.push_back(0);
             // work around count() in select subquery
-            precisionAggDist.push_back(9999);
+            precisionAggDist.push_back(rowgroup::MagicPrecisionForCountAgg);
             typeAggDist.push_back(CalpontSystemCatalog::UBIGINT);
             csNumAggDist.push_back(8);
             widthAggDist.push_back(bigIntWidth);
diff --git a/mysql-test/columnstore/bugfixes/mcol-5438.result b/mysql-test/columnstore/bugfixes/mcol-5438.result
new file mode 100644
index 000000000..caae08fe5
--- /dev/null
+++ b/mysql-test/columnstore/bugfixes/mcol-5438.result
@@ -0,0 +1,9 @@
+DROP DATABASE IF EXISTS `mcol-5438`;
+CREATE DATABASE `mcol-5438`;
+USE `mcol-5438`;
+CREATE TABLE cs1 (d DECIMAL(38))ENGINE=COLUMNSTORE;
+INSERT INTO cs1 VALUES (42);
+SELECT (COUNT(CASE WHEN d IN(42,4) THEN 1 ELSE 0 END) * 100/SUM(1))*0 from cs1;
+(COUNT(CASE WHEN d IN(42,4) THEN 1 ELSE 0 END) * 100/SUM(1))*0
+0.0000
+DROP DATABASE `mcol-5438`;
diff --git a/mysql-test/columnstore/bugfixes/mcol-5438.test b/mysql-test/columnstore/bugfixes/mcol-5438.test
new file mode 100644
index 000000000..3dc22dfbe
--- /dev/null
+++ b/mysql-test/columnstore/bugfixes/mcol-5438.test
@@ -0,0 +1,13 @@
+-- source ../include/have_columnstore.inc
+
+--disable_warnings
+DROP DATABASE IF EXISTS `mcol-5438`;
+--enable_warnings
+CREATE DATABASE `mcol-5438`;
+USE `mcol-5438`;
+
+CREATE TABLE cs1 (d DECIMAL(38))ENGINE=COLUMNSTORE;
+INSERT INTO cs1 VALUES (42);
+SELECT (COUNT(CASE WHEN d IN(42,4) THEN 1 ELSE 0 END) * 100/SUM(1))*0 from cs1;
+
+DROP DATABASE `mcol-5438`;
diff --git a/utils/dataconvert/dataconvert.cpp b/utils/dataconvert/dataconvert.cpp
index 93533601b..fefca375f 100644
--- a/utils/dataconvert/dataconvert.cpp
+++ b/utils/dataconvert/dataconvert.cpp
@@ -29,6 +29,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <type_traits>
+#include "mcs_decimal.h"
 using namespace std;
 #include <boost/algorithm/string/case_conv.hpp>
 #include <boost/algorithm/string.hpp>
@@ -43,7 +44,6 @@ using namespace boost::algorithm;
 #include "dataconvert.h"
 #undef DATACONVERT_DLLEXPORT
 
-
 using namespace logging;
 
 namespace
@@ -246,24 +246,6 @@ void number_int_value(const string& data, cscDataType typeCode,
         ++dpos;
         leftStr = valStr.substr(dpos);
       }
-
-// add above to keep the old behavior, to comply with tdriver
-// uncomment code below to support negative scale
-#if 0
-            // check if enough digits in the integer part
-            size_t spos = intPart.find_first_of("0123456789");
-
-            if (string::npos == spos)
-                spos = intPart.length();
-
-            size_t len = intPart.substr(spos).length();
-
-            if (len < scale)
-                intPart.insert(spos, scale - len, '0');  // padding digit 0, not null.
-
-            leftStr = intPart.substr(intPart.length() - scale) + leftStr;
-            intPart.erase(intPart.length() - scale, scale);
-#endif
     }
 
     valStr = intPart;
@@ -501,9 +483,14 @@ void number_int_value(const string& data, cscDataType typeCode,
     }
     else
     {
-      bool dummy = false;
-      char* ep = NULL;
-      rangeUp = (T)dataconvert::strtoll128(columnstore_big_precision[ct.precision - 19].c_str(), dummy, &ep);
+      auto precision =
+          ct.precision == rowgroup::MagicPrecisionForCountAgg ? datatypes::INT128MAXPRECISION : ct.precision;
+      if (precision > datatypes::INT128MAXPRECISION || precision < 0)
+      {
+        throw QueryDataExcept("Unsupported precision " + std::to_string(precision) + " converting DECIMAL ",
+                              dataTypeErr);
+      }
+      rangeUp = datatypes::ConversionRangeMaxValue[ct.precision - 19];
     }
 
     rangeLow = -rangeUp;
@@ -2902,8 +2889,7 @@ int64_t DataConvert::stringToTime(const string& data)
 }
 
 void DataConvert::joinColTypeForUnion(datatypes::SystemCatalog::TypeHolderStd& unionedType,
-                                      const datatypes::SystemCatalog::TypeHolderStd& type,
-                                      unsigned int& rc)
+                                      const datatypes::SystemCatalog::TypeHolderStd& type, unsigned int& rc)
 {
   // limited support for VARBINARY, no implicit conversion.
   if (type.colDataType == datatypes::SystemCatalog::VARBINARY ||
diff --git a/utils/dataconvert/dataconvert.h b/utils/dataconvert/dataconvert.h
index e3dfb9a9a..bddcb6d2a 100644
--- a/utils/dataconvert/dataconvert.h
+++ b/utils/dataconvert/dataconvert.h
@@ -77,6 +77,11 @@ using cscDataType = datatypes::SystemCatalog::ColDataType;
 
 #define EXPORT
 
+namespace rowgroup
+{
+const uint32_t MagicPrecisionForCountAgg = 9999;
+}
+
 const int64_t IDB_pow[19] = {1,
                              10,
                              100,
@@ -97,27 +102,6 @@ const int64_t IDB_pow[19] = {1,
                              100000000000000000LL,
                              1000000000000000000LL};
 
-const std::string columnstore_big_precision[20] = {"9999999999999999999",
-                                                   "99999999999999999999",
-                                                   "999999999999999999999",
-                                                   "9999999999999999999999",
-                                                   "99999999999999999999999",
-                                                   "999999999999999999999999",
-                                                   "9999999999999999999999999",
-                                                   "99999999999999999999999999",
-                                                   "999999999999999999999999999",
-                                                   "9999999999999999999999999999",
-                                                   "99999999999999999999999999999",
-                                                   "999999999999999999999999999999",
-                                                   "9999999999999999999999999999999",
-                                                   "99999999999999999999999999999999",
-                                                   "999999999999999999999999999999999",
-                                                   "9999999999999999999999999999999999",
-                                                   "99999999999999999999999999999999999",
-                                                   "999999999999999999999999999999999999",
-                                                   "9999999999999999999999999999999999999",
-                                                   "99999999999999999999999999999999999999"};
-
 const int32_t SECS_PER_MIN = 60;
 const int32_t MINS_PER_HOUR = 60;
 const int32_t HOURS_PER_DAY = 24;
diff --git a/utils/funcexp/func_cast.cpp b/utils/funcexp/func_cast.cpp
index ef0e2f515..98a2aec76 100644
--- a/utils/funcexp/func_cast.cpp
+++ b/utils/funcexp/func_cast.cpp
@@ -484,7 +484,6 @@ IDB_Decimal Func_cast_date::getDecimalVal(Row& row, FunctionParm& parm, bool& is
                                           CalpontSystemCatalog::ColType& operationColType)
 {
   IDB_Decimal decimal;
-
   if (parm[0]->data()->resultType().isWideDecimalType())
     decimal.s128Value = Func_cast_date::getDatetimeIntVal(row, parm, isNull, operationColType);
   else
@@ -1007,6 +1006,9 @@ IDB_Decimal Func_cast_decimal::getDecimalVal(Row& row, FunctionParm& parm, bool&
   int32_t decimals = parm[1]->data()->getIntVal(row, isNull);
   int64_t max_length = parm[2]->data()->getIntVal(row, isNull);
 
+  assert(max_length == rowgroup::MagicPrecisionForCountAgg ||
+         (max_length <= datatypes::INT128MAXPRECISION || max_length >= 0));
+
   if (max_length > datatypes::INT128MAXPRECISION || max_length <= 0)
     max_length = datatypes::INT128MAXPRECISION;
 
@@ -1022,10 +1024,7 @@ IDB_Decimal Func_cast_decimal::getDecimalVal(Row& row, FunctionParm& parm, bool&
     {
       if (decimal.isTSInt128ByPrecision())
       {
-        bool dummy = false;
-        char* ep = NULL;
-        int128_t max_number_decimal =
-            dataconvert::strtoll128(columnstore_big_precision[max_length - 19].c_str(), dummy, &ep);
+        int128_t max_number_decimal = datatypes::ConversionRangeMaxValue[max_length - 19];
         decimal.s128Value = parm[0]->data()->getIntVal(row, isNull);
         decimal.scale = 0;
         int128_t scaleDivisor;
@@ -1072,10 +1071,7 @@ IDB_Decimal Func_cast_decimal::getDecimalVal(Row& row, FunctionParm& parm, bool&
     {
       if (decimal.isTSInt128ByPrecision())
       {
-        bool dummy = false;
-        char* ep = NULL;
-        int128_t max_number_decimal =
-            dataconvert::strtoll128(columnstore_big_precision[max_length - 19].c_str(), dummy, &ep);
+        int128_t max_number_decimal = datatypes::ConversionRangeMaxValue[max_length - 19];
 
         uint128_t uval = parm[0]->data()->getUintVal(row, isNull);
 
@@ -1127,11 +1123,7 @@ IDB_Decimal Func_cast_decimal::getDecimalVal(Row& row, FunctionParm& parm, bool&
     {
       if (decimal.isTSInt128ByPrecision())
       {
-        bool dummy = false;
-        char* ep = NULL;
-        int128_t max_number_decimal =
-            dataconvert::strtoll128(columnstore_big_precision[max_length - 19].c_str(), dummy, &ep);
-
+        int128_t max_number_decimal = datatypes::ConversionRangeMaxValue[max_length - 19];
         float128_t value = parm[0]->data()->getDoubleVal(row, isNull);
 
         int128_t scaleDivisor;
@@ -1178,11 +1170,7 @@ IDB_Decimal Func_cast_decimal::getDecimalVal(Row& row, FunctionParm& parm, bool&
     {
       if (decimal.isTSInt128ByPrecision())
       {
-        bool dummy = false;
-        char* ep = NULL;
-        int128_t max_number_decimal =
-            dataconvert::strtoll128(columnstore_big_precision[max_length - 19].c_str(), dummy, &ep);
-
+        int128_t max_number_decimal = datatypes::ConversionRangeMaxValue[max_length - 19];
         float128_t value = parm[0]->data()->getLongDoubleVal(row, isNull);
 
         int128_t scaleDivisor;
@@ -1230,11 +1218,7 @@ IDB_Decimal Func_cast_decimal::getDecimalVal(Row& row, FunctionParm& parm, bool&
     {
       if (decimal.isTSInt128ByPrecision())
       {
-        bool dummy = false;
-        char* ep = NULL;
-        int128_t max_number_decimal =
-            dataconvert::strtoll128(columnstore_big_precision[max_length - 19].c_str(), dummy, &ep);
-
+        int128_t max_number_decimal = datatypes::ConversionRangeMaxValue[max_length - 19];
         decimal = parm[0]->data()->getDecimalVal(row, isNull);
 
         int128_t scaleDivisor;
diff --git a/utils/loggingcpp/errorcodes.cpp b/utils/loggingcpp/errorcodes.cpp
index c8e879c02..fbf9eb8d2 100644
--- a/utils/loggingcpp/errorcodes.cpp
+++ b/utils/loggingcpp/errorcodes.cpp
@@ -81,6 +81,7 @@ ErrorCodes::ErrorCodes()
   fErrorCodes[dataTypeErr] = "data type unknown.";
   fErrorCodes[incompatJoinCols] = "incompatible column types specified for join condition.";
   fErrorCodes[incompatFilterCols] = "incompatible column types specified for filter condition.";
+  fErrorCodes[dataConvertUnsupportedPrecisionValue] = "data conversion gets an unsupported precision value.";
 }
 
 string ErrorCodes::errorString(uint16_t code) const
diff --git a/utils/loggingcpp/errorcodes.h b/utils/loggingcpp/errorcodes.h
index 444bfc6b3..7b9263409 100644
--- a/utils/loggingcpp/errorcodes.h
+++ b/utils/loggingcpp/errorcodes.h
@@ -76,6 +76,7 @@ enum ErrorCodeValues
   incompatJoinCols,
   incompatFilterCols,
   aggregateResourceErr,
+  dataConvertUnsupportedPrecisionValue,
   statisticsJobListEmpty = 301
 };
 
diff --git a/utils/rowgroup/rowgroup.cpp b/utils/rowgroup/rowgroup.cpp
index 95861a48b..c9983ce1b 100644
--- a/utils/rowgroup/rowgroup.cpp
+++ b/utils/rowgroup/rowgroup.cpp
@@ -26,7 +26,7 @@
 // Author: Patrick LeBlanc <pleblanc@calpont.com>, (C) 2008
 //
 
-//#define NDEBUG
+// #define NDEBUG
 #include <sstream>
 #include <iterator>
 using namespace std;
@@ -682,7 +682,7 @@ void Row::initToNull()
         break;
 
       case CalpontSystemCatalog::BIGINT:
-        if (precision[i] != 9999)
+        if (precision[i] != MagicPrecisionForCountAgg)
           *((uint64_t*)&data[offsets[i]]) = joblist::BIGINTNULL;
         else  // work around for count() in outer join result.
           *((uint64_t*)&data[offsets[i]]) = 0;
@@ -1751,4 +1751,3 @@ RowGroup RowGroup::truncate(uint32_t cols)
 }
 
 }  // namespace rowgroup
-
diff --git a/utils/rowgroup/rowgroup.h b/utils/rowgroup/rowgroup.h
index 8ccf36745..6c724bc68 100644
--- a/utils/rowgroup/rowgroup.h
+++ b/utils/rowgroup/rowgroup.h
@@ -30,7 +30,7 @@
 #include <vector>
 #include <string>
 #include <stdexcept>
-//#define NDEBUG
+// #define NDEBUG
 #include <cassert>
 #include <boost/shared_ptr.hpp>
 #include <boost/shared_array.hpp>
@@ -524,7 +524,6 @@ class Row
   // that's not string-table safe, this one is
   inline void copyField(Row& dest, uint32_t destIndex, uint32_t srcIndex) const;
 
-
   inline void copyBinaryField(Row& dest, uint32_t destIndex, uint32_t srcIndex) const;
 
   std::string toString(uint32_t rownum = 0) const;
@@ -1070,7 +1069,7 @@ inline void Row::getInt128Field(uint32_t colIndex, int128_t& x) const
 
 inline datatypes::TSInt128 Row::getTSInt128Field(uint32_t colIndex) const
 {
-  const int128_t* ptr = reinterpret_cast<int128_t*>(&data[offsets[colIndex]]);;
+  const int128_t* ptr = reinterpret_cast<int128_t*>(&data[offsets[colIndex]]);
   return datatypes::TSInt128(ptr);
 }
 
@@ -1316,7 +1315,6 @@ inline void Row::copyField(Row& out, uint32_t destIndex, uint32_t srcIndex) cons
   }
 }
 
-
 inline void Row::copyBinaryField(Row& out, uint32_t destIndex, uint32_t srcIndex) const
 {
   out.setInt128Field(getTSInt128Field(srcIndex).getValue(), destIndex);
@@ -1481,8 +1479,8 @@ class RowGroup : public messageqcpp::Serializeable
   inline void setUseStringTable(bool);
 
   //	RGData *convertToInlineData(uint64_t *size = NULL) const;  // caller manages the memory returned by
-  //this 	void convertToInlineDataInPlace(); 	RGData *convertToStringTable(uint64_t *size = NULL) const; 	void
-  //convertToStringTableInPlace();
+  // this 	void convertToInlineDataInPlace(); 	RGData *convertToStringTable(uint64_t *size = NULL)
+  // const; void convertToStringTableInPlace();
   void serializeRGData(messageqcpp::ByteStream&) const;
   inline uint32_t getStringTableThreshold() const;
 
@@ -2139,4 +2137,3 @@ inline void RGData::getRow(uint32_t num, Row* row)
 }
 
 }  // namespace rowgroup
-