From 786b9da5b0f4e35c56b68821baa69fb93847b812 Mon Sep 17 00:00:00 2001 From: Roman Nozdrin Date: Thu, 2 Mar 2023 18:37:09 +0000 Subject: [PATCH] MCOL-5438 COUNT() in math causes SEGV --- datatypes/mcs_decimal.h | 21 ++++++++++++ dbcon/execplan/aggregatecolumn.h | 1 - dbcon/joblist/tupleaggregatestep.cpp | 21 ++++++------ .../columnstore/bugfixes/mcol-5438.result | 9 +++++ .../columnstore/bugfixes/mcol-5438.test | 13 +++++++ utils/dataconvert/dataconvert.cpp | 34 ++++++------------- utils/dataconvert/dataconvert.h | 26 +++----------- utils/funcexp/func_cast.cpp | 32 +++++------------ utils/loggingcpp/errorcodes.cpp | 1 + utils/loggingcpp/errorcodes.h | 1 + utils/rowgroup/rowgroup.cpp | 5 ++- utils/rowgroup/rowgroup.h | 11 +++--- 12 files changed, 85 insertions(+), 90 deletions(-) create mode 100644 mysql-test/columnstore/bugfixes/mcol-5438.result create mode 100644 mysql-test/columnstore/bugfixes/mcol-5438.test diff --git a/datatypes/mcs_decimal.h b/datatypes/mcs_decimal.h index 91bdbe55f..6acd70605 100644 --- a/datatypes/mcs_decimal.h +++ b/datatypes/mcs_decimal.h @@ -164,6 +164,27 @@ const int128_t mcs_pow_10_128[20] = { 100000000000000000000000000000000000000_xxl, }; +const int128_t ConversionRangeMaxValue[20] = {9999999999999999999_xxl, + 99999999999999999999_xxl, + 999999999999999999999_xxl, + 9999999999999999999999_xxl, + 99999999999999999999999_xxl, + 999999999999999999999999_xxl, + 9999999999999999999999999_xxl, + 99999999999999999999999999_xxl, + 999999999999999999999999999_xxl, + 9999999999999999999999999999_xxl, + 99999999999999999999999999999_xxl, + 999999999999999999999999999999_xxl, + 9999999999999999999999999999999_xxl, + 99999999999999999999999999999999_xxl, + 999999999999999999999999999999999_xxl, + 9999999999999999999999999999999999_xxl, + 99999999999999999999999999999999999_xxl, + 999999999999999999999999999999999999_xxl, + 9999999999999999999999999999999999999_xxl, + 99999999999999999999999999999999999999_xxl}; + constexpr uint32_t maxPowOf10 = sizeof(mcs_pow_10) / sizeof(mcs_pow_10[0]) - 1; constexpr int128_t Decimal128Null = TSInt128::NullValue; constexpr int128_t Decimal128Empty = TSInt128::EmptyValue; diff --git a/dbcon/execplan/aggregatecolumn.h b/dbcon/execplan/aggregatecolumn.h index cb33b9468..8f6b8b6c5 100644 --- a/dbcon/execplan/aggregatecolumn.h +++ b/dbcon/execplan/aggregatecolumn.h @@ -78,7 +78,6 @@ class AggregateColumn : public ReturnedColumn UDAF, MULTI_PARM }; - /** * typedef */ diff --git a/dbcon/joblist/tupleaggregatestep.cpp b/dbcon/joblist/tupleaggregatestep.cpp index 89286f3e0..6890bf9b6 100644 --- a/dbcon/joblist/tupleaggregatestep.cpp +++ b/dbcon/joblist/tupleaggregatestep.cpp @@ -18,8 +18,8 @@ // $Id: tupleaggregatestep.cpp 9732 2013-08-02 15:56:15Z pleblanc $ -//#define NDEBUG -// Cross engine needs to be at top due to MySQL includes +// #define NDEBUG +// Cross engine needs to be at top due to MySQL includes #define PREFER_MY_CONFIG_H #include "crossenginestep.h" @@ -69,7 +69,7 @@ using namespace querytele; #include "tuplehashjoin.h" #include "tupleaggregatestep.h" -//#include "stopwatch.cpp" +// #include "stopwatch.cpp" // Stopwatch timer; @@ -1290,7 +1290,7 @@ void TupleAggregateStep::prep1PhaseAggregate(JobInfo& jobInfo, vector& keysAgg.push_back(key); scaleAgg.push_back(0); // work around count() in select subquery - precisionAgg.push_back(9999); + precisionAgg.push_back(rowgroup::MagicPrecisionForCountAgg); typeAgg.push_back(CalpontSystemCatalog::UBIGINT); csNumAgg.push_back(csNumProj[colProj]); widthAgg.push_back(bigIntWidth); @@ -1858,7 +1858,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(JobInfo& jobInfo, vectorfAggFunction == ROWAGG_MIN || f->fAggFunction == ROWAGG_MAX || f->fAggFunction == ROWAGG_STATS || f->fAggFunction == ROWAGG_BIT_AND || f->fAggFunction == ROWAGG_BIT_OR || f->fAggFunction == ROWAGG_BIT_XOR || - f->fAggFunction == ROWAGG_CONSTANT || f->fAggFunction == ROWAGG_GROUP_CONCAT || f->fAggFunction == ROWAGG_JSON_ARRAY)) + f->fAggFunction == ROWAGG_CONSTANT || f->fAggFunction == ROWAGG_GROUP_CONCAT || + f->fAggFunction == ROWAGG_JSON_ARRAY)) { funct.reset(new RowAggFunctionCol(f->fAggFunction, f->fStatsFunction, f->fInputColumnIndex, f->fOutputColumnIndex, f->fAuxColumnIndex - multiParms)); @@ -3189,7 +3190,7 @@ void TupleAggregateStep::prep2PhasesAggregate(JobInfo& jobInfo, vector keysAggPm.push_back(aggKey); scaleAggPm.push_back(0); // work around count() in select subquery - precisionAggPm.push_back(9999); + precisionAggPm.push_back(rowgroup::MagicPrecisionForCountAgg); typeAggPm.push_back(CalpontSystemCatalog::UBIGINT); csNumAggPm.push_back(8); widthAggPm.push_back(bigIntWidth); @@ -4080,7 +4081,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(JobInfo& jobInfo, vector #include #include +#include "mcs_decimal.h" using namespace std; #include #include @@ -43,7 +44,6 @@ using namespace boost::algorithm; #include "dataconvert.h" #undef DATACONVERT_DLLEXPORT - using namespace logging; namespace @@ -246,24 +246,6 @@ void number_int_value(const string& data, cscDataType typeCode, ++dpos; leftStr = valStr.substr(dpos); } - -// add above to keep the old behavior, to comply with tdriver -// uncomment code below to support negative scale -#if 0 - // check if enough digits in the integer part - size_t spos = intPart.find_first_of("0123456789"); - - if (string::npos == spos) - spos = intPart.length(); - - size_t len = intPart.substr(spos).length(); - - if (len < scale) - intPart.insert(spos, scale - len, '0'); // padding digit 0, not null. - - leftStr = intPart.substr(intPart.length() - scale) + leftStr; - intPart.erase(intPart.length() - scale, scale); -#endif } valStr = intPart; @@ -501,9 +483,14 @@ void number_int_value(const string& data, cscDataType typeCode, } else { - bool dummy = false; - char* ep = NULL; - rangeUp = (T)dataconvert::strtoll128(columnstore_big_precision[ct.precision - 19].c_str(), dummy, &ep); + auto precision = + ct.precision == rowgroup::MagicPrecisionForCountAgg ? datatypes::INT128MAXPRECISION : ct.precision; + if (precision > datatypes::INT128MAXPRECISION || precision < 0) + { + throw QueryDataExcept("Unsupported precision " + std::to_string(precision) + " converting DECIMAL ", + dataTypeErr); + } + rangeUp = datatypes::ConversionRangeMaxValue[ct.precision - 19]; } rangeLow = -rangeUp; @@ -2902,8 +2889,7 @@ int64_t DataConvert::stringToTime(const string& data) } void DataConvert::joinColTypeForUnion(datatypes::SystemCatalog::TypeHolderStd& unionedType, - const datatypes::SystemCatalog::TypeHolderStd& type, - unsigned int& rc) + const datatypes::SystemCatalog::TypeHolderStd& type, unsigned int& rc) { // limited support for VARBINARY, no implicit conversion. if (type.colDataType == datatypes::SystemCatalog::VARBINARY || diff --git a/utils/dataconvert/dataconvert.h b/utils/dataconvert/dataconvert.h index e3dfb9a9a..bddcb6d2a 100644 --- a/utils/dataconvert/dataconvert.h +++ b/utils/dataconvert/dataconvert.h @@ -77,6 +77,11 @@ using cscDataType = datatypes::SystemCatalog::ColDataType; #define EXPORT +namespace rowgroup +{ +const uint32_t MagicPrecisionForCountAgg = 9999; +} + const int64_t IDB_pow[19] = {1, 10, 100, @@ -97,27 +102,6 @@ const int64_t IDB_pow[19] = {1, 100000000000000000LL, 1000000000000000000LL}; -const std::string columnstore_big_precision[20] = {"9999999999999999999", - "99999999999999999999", - "999999999999999999999", - "9999999999999999999999", - "99999999999999999999999", - "999999999999999999999999", - "9999999999999999999999999", - "99999999999999999999999999", - "999999999999999999999999999", - "9999999999999999999999999999", - "99999999999999999999999999999", - "999999999999999999999999999999", - "9999999999999999999999999999999", - "99999999999999999999999999999999", - "999999999999999999999999999999999", - "9999999999999999999999999999999999", - "99999999999999999999999999999999999", - "999999999999999999999999999999999999", - "9999999999999999999999999999999999999", - "99999999999999999999999999999999999999"}; - const int32_t SECS_PER_MIN = 60; const int32_t MINS_PER_HOUR = 60; const int32_t HOURS_PER_DAY = 24; diff --git a/utils/funcexp/func_cast.cpp b/utils/funcexp/func_cast.cpp index ef0e2f515..98a2aec76 100644 --- a/utils/funcexp/func_cast.cpp +++ b/utils/funcexp/func_cast.cpp @@ -484,7 +484,6 @@ IDB_Decimal Func_cast_date::getDecimalVal(Row& row, FunctionParm& parm, bool& is CalpontSystemCatalog::ColType& operationColType) { IDB_Decimal decimal; - if (parm[0]->data()->resultType().isWideDecimalType()) decimal.s128Value = Func_cast_date::getDatetimeIntVal(row, parm, isNull, operationColType); else @@ -1007,6 +1006,9 @@ IDB_Decimal Func_cast_decimal::getDecimalVal(Row& row, FunctionParm& parm, bool& int32_t decimals = parm[1]->data()->getIntVal(row, isNull); int64_t max_length = parm[2]->data()->getIntVal(row, isNull); + assert(max_length == rowgroup::MagicPrecisionForCountAgg || + (max_length <= datatypes::INT128MAXPRECISION || max_length >= 0)); + if (max_length > datatypes::INT128MAXPRECISION || max_length <= 0) max_length = datatypes::INT128MAXPRECISION; @@ -1022,10 +1024,7 @@ IDB_Decimal Func_cast_decimal::getDecimalVal(Row& row, FunctionParm& parm, bool& { if (decimal.isTSInt128ByPrecision()) { - bool dummy = false; - char* ep = NULL; - int128_t max_number_decimal = - dataconvert::strtoll128(columnstore_big_precision[max_length - 19].c_str(), dummy, &ep); + int128_t max_number_decimal = datatypes::ConversionRangeMaxValue[max_length - 19]; decimal.s128Value = parm[0]->data()->getIntVal(row, isNull); decimal.scale = 0; int128_t scaleDivisor; @@ -1072,10 +1071,7 @@ IDB_Decimal Func_cast_decimal::getDecimalVal(Row& row, FunctionParm& parm, bool& { if (decimal.isTSInt128ByPrecision()) { - bool dummy = false; - char* ep = NULL; - int128_t max_number_decimal = - dataconvert::strtoll128(columnstore_big_precision[max_length - 19].c_str(), dummy, &ep); + int128_t max_number_decimal = datatypes::ConversionRangeMaxValue[max_length - 19]; uint128_t uval = parm[0]->data()->getUintVal(row, isNull); @@ -1127,11 +1123,7 @@ IDB_Decimal Func_cast_decimal::getDecimalVal(Row& row, FunctionParm& parm, bool& { if (decimal.isTSInt128ByPrecision()) { - bool dummy = false; - char* ep = NULL; - int128_t max_number_decimal = - dataconvert::strtoll128(columnstore_big_precision[max_length - 19].c_str(), dummy, &ep); - + int128_t max_number_decimal = datatypes::ConversionRangeMaxValue[max_length - 19]; float128_t value = parm[0]->data()->getDoubleVal(row, isNull); int128_t scaleDivisor; @@ -1178,11 +1170,7 @@ IDB_Decimal Func_cast_decimal::getDecimalVal(Row& row, FunctionParm& parm, bool& { if (decimal.isTSInt128ByPrecision()) { - bool dummy = false; - char* ep = NULL; - int128_t max_number_decimal = - dataconvert::strtoll128(columnstore_big_precision[max_length - 19].c_str(), dummy, &ep); - + int128_t max_number_decimal = datatypes::ConversionRangeMaxValue[max_length - 19]; float128_t value = parm[0]->data()->getLongDoubleVal(row, isNull); int128_t scaleDivisor; @@ -1230,11 +1218,7 @@ IDB_Decimal Func_cast_decimal::getDecimalVal(Row& row, FunctionParm& parm, bool& { if (decimal.isTSInt128ByPrecision()) { - bool dummy = false; - char* ep = NULL; - int128_t max_number_decimal = - dataconvert::strtoll128(columnstore_big_precision[max_length - 19].c_str(), dummy, &ep); - + int128_t max_number_decimal = datatypes::ConversionRangeMaxValue[max_length - 19]; decimal = parm[0]->data()->getDecimalVal(row, isNull); int128_t scaleDivisor; diff --git a/utils/loggingcpp/errorcodes.cpp b/utils/loggingcpp/errorcodes.cpp index c8e879c02..fbf9eb8d2 100644 --- a/utils/loggingcpp/errorcodes.cpp +++ b/utils/loggingcpp/errorcodes.cpp @@ -81,6 +81,7 @@ ErrorCodes::ErrorCodes() fErrorCodes[dataTypeErr] = "data type unknown."; fErrorCodes[incompatJoinCols] = "incompatible column types specified for join condition."; fErrorCodes[incompatFilterCols] = "incompatible column types specified for filter condition."; + fErrorCodes[dataConvertUnsupportedPrecisionValue] = "data conversion gets an unsupported precision value."; } string ErrorCodes::errorString(uint16_t code) const diff --git a/utils/loggingcpp/errorcodes.h b/utils/loggingcpp/errorcodes.h index 444bfc6b3..7b9263409 100644 --- a/utils/loggingcpp/errorcodes.h +++ b/utils/loggingcpp/errorcodes.h @@ -76,6 +76,7 @@ enum ErrorCodeValues incompatJoinCols, incompatFilterCols, aggregateResourceErr, + dataConvertUnsupportedPrecisionValue, statisticsJobListEmpty = 301 }; diff --git a/utils/rowgroup/rowgroup.cpp b/utils/rowgroup/rowgroup.cpp index 95861a48b..c9983ce1b 100644 --- a/utils/rowgroup/rowgroup.cpp +++ b/utils/rowgroup/rowgroup.cpp @@ -26,7 +26,7 @@ // Author: Patrick LeBlanc , (C) 2008 // -//#define NDEBUG +// #define NDEBUG #include #include using namespace std; @@ -682,7 +682,7 @@ void Row::initToNull() break; case CalpontSystemCatalog::BIGINT: - if (precision[i] != 9999) + if (precision[i] != MagicPrecisionForCountAgg) *((uint64_t*)&data[offsets[i]]) = joblist::BIGINTNULL; else // work around for count() in outer join result. *((uint64_t*)&data[offsets[i]]) = 0; @@ -1751,4 +1751,3 @@ RowGroup RowGroup::truncate(uint32_t cols) } } // namespace rowgroup - diff --git a/utils/rowgroup/rowgroup.h b/utils/rowgroup/rowgroup.h index 8ccf36745..6c724bc68 100644 --- a/utils/rowgroup/rowgroup.h +++ b/utils/rowgroup/rowgroup.h @@ -30,7 +30,7 @@ #include #include #include -//#define NDEBUG +// #define NDEBUG #include #include #include @@ -524,7 +524,6 @@ class Row // that's not string-table safe, this one is inline void copyField(Row& dest, uint32_t destIndex, uint32_t srcIndex) const; - inline void copyBinaryField(Row& dest, uint32_t destIndex, uint32_t srcIndex) const; std::string toString(uint32_t rownum = 0) const; @@ -1070,7 +1069,7 @@ inline void Row::getInt128Field(uint32_t colIndex, int128_t& x) const inline datatypes::TSInt128 Row::getTSInt128Field(uint32_t colIndex) const { - const int128_t* ptr = reinterpret_cast(&data[offsets[colIndex]]);; + const int128_t* ptr = reinterpret_cast(&data[offsets[colIndex]]); return datatypes::TSInt128(ptr); } @@ -1316,7 +1315,6 @@ inline void Row::copyField(Row& out, uint32_t destIndex, uint32_t srcIndex) cons } } - inline void Row::copyBinaryField(Row& out, uint32_t destIndex, uint32_t srcIndex) const { out.setInt128Field(getTSInt128Field(srcIndex).getValue(), destIndex); @@ -1481,8 +1479,8 @@ class RowGroup : public messageqcpp::Serializeable inline void setUseStringTable(bool); // RGData *convertToInlineData(uint64_t *size = NULL) const; // caller manages the memory returned by - //this void convertToInlineDataInPlace(); RGData *convertToStringTable(uint64_t *size = NULL) const; void - //convertToStringTableInPlace(); + // this void convertToInlineDataInPlace(); RGData *convertToStringTable(uint64_t *size = NULL) + // const; void convertToStringTableInPlace(); void serializeRGData(messageqcpp::ByteStream&) const; inline uint32_t getStringTableThreshold() const; @@ -2139,4 +2137,3 @@ inline void RGData::getRow(uint32_t num, Row* row) } } // namespace rowgroup -