Replace underlying type for avg and sum for int types from long double to wide decimal

2025-07-29 08:21:15 +03:00 · 2021-04-13 21:15:33 +00:00
parent 123d602296
commit f81f743282
5 changed files with 159 additions and 142 deletions
--- a/datatypes/mcs_datatype.h
+++ b/datatypes/mcs_datatype.h
@ -361,7 +361,6 @@ static inline bool isWideDecimalType(const datatypes::SystemCatalog::ColDataType
          dt == SystemCatalog::UDECIMAL);
 }

-
 /** convenience function to determine if column type is a char
 *  type
 */
@ -480,27 +479,18 @@ inline bool isSignedInteger(const datatypes::SystemCatalog::ColDataType type)
  }
 }

-
 /**
-    @brief Returns true if all arguments have a DECIMAL/UDECIMAL type
+    @brief The method netects whether type sum and avg aggregate will have
+    wide decimal underlying type
 */
-static inline bool isDecimalOperands(const SystemCatalog::ColDataType resultDataType,
-    const SystemCatalog::ColDataType leftColDataType,
-    const SystemCatalog::ColDataType rightColDataType)
+inline bool hasUnderlyingWideDecimalForSumAndAvg(datatypes::SystemCatalog::ColDataType type)
 {
-  return ((resultDataType == SystemCatalog::DECIMAL ||
-           resultDataType == SystemCatalog::UDECIMAL) &&
-          (leftColDataType == SystemCatalog::DECIMAL ||
-           leftColDataType == SystemCatalog::UDECIMAL) &&
-          (rightColDataType == SystemCatalog::DECIMAL ||
-           rightColDataType == SystemCatalog::UDECIMAL));
+    return datatypes::isSignedInteger(type) || datatypes::isUnsigned(type);
 }

 } // end of namespace datatypes


-
-
 namespace datatypes
 {

--- a/dbcon/execplan/functioncolumn.h
+++ b/dbcon/execplan/functioncolumn.h
@ -285,7 +285,7 @@ public:
        }

        decimal.scale = fResultType.scale;
-        decimal.precision = fResultType.precision;
+        decimal.precision = std::max(fResultType.precision, static_cast<int32_t>(decimal.precision));
        return decimal;
    }
    virtual bool getBoolVal(rowgroup::Row& row, bool& isNull)
--- a/dbcon/joblist/tupleaggregatestep.cpp
+++ b/dbcon/joblist/tupleaggregatestep.cpp
@ -361,6 +361,13 @@ void wideDecimalOrLongDouble(const uint64_t colProj,
        precisionAgg.push_back(precisionProj[colProj]);
        widthAgg.push_back(width[colProj]);
    }
+    else if (datatypes::hasUnderlyingWideDecimalForSumAndAvg(type))
+    {
+        typeAgg.push_back(CalpontSystemCatalog::DECIMAL);
+        scaleAgg.push_back(0);
+        precisionAgg.push_back(datatypes::INT128MAXPRECISION);
+        widthAgg.push_back(datatypes::MAXDECIMALWIDTH);
+    }
    else
    {
        typeAgg.push_back(CalpontSystemCatalog::LONGDOUBLE);
@ -1683,6 +1690,14 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(
    // for count column of average function
    map<uint32_t, SP_ROWAGG_FUNC_t> avgFuncMap, avgDistFuncMap;

+     // collect the projected column info, prepare for aggregation
+    vector<uint32_t> width;
+    for (uint64_t i = 0; i < keysProj.size(); i++)
+    {
+        width.push_back(projRG.getColumnWidth(i));
+    }
+
+
    // associate the columns between projected RG and aggregate RG on UM
    // populated the aggregate columns
    //     the groupby columns are put in front, even not a returned column
@ -1934,11 +1949,10 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(

                    oidsAgg.push_back(oidsProj[colProj]);
                    keysAgg.push_back(aggKey);
-                    typeAgg.push_back(CalpontSystemCatalog::LONGDOUBLE);
                    csNumAgg.push_back(8);
-                    precisionAgg.push_back(-1);
-                    widthAgg.push_back(sizeof(long double));
-                    scaleAgg.push_back(0);
+                    wideDecimalOrLongDouble(colProj, typeProj[colProj],
+                                       precisionProj, scaleProj, width,
+                                       typeAgg, scaleAgg, precisionAgg, widthAgg);
                    colAgg++;

                // has distinct step, put the count column for avg next to the sum
@ -2265,11 +2279,10 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(

                    oidsAggDist.push_back(oidsAgg[colAgg]);
                    keysAggDist.push_back(retKey);
-                    typeAggDist.push_back(CalpontSystemCatalog::LONGDOUBLE);
+                    wideDecimalOrLongDouble(colAgg, typeAgg[colAgg],
+                                            precisionAgg, scaleAgg, widthAgg,
+                                            typeAggDist, scaleAggDist, precisionAggDist, widthAggDist);
                    csNumAggDist.push_back(8);
-                    precisionAggDist.push_back(-1);
-                    widthAggDist.push_back(sizeof(long double));
-                    scaleAggDist.push_back(0);
                }
                break;

@ -2343,11 +2356,10 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(
                                {
                                    oidsAggDist.push_back(oidsAgg[colAgg]);
                                    keysAggDist.push_back(retKey);
-                                    scaleAggDist.push_back(0);
-                                    typeAggDist.push_back(CalpontSystemCatalog::LONGDOUBLE);
                                    csNumAggDist.push_back(8);
-                                    precisionAggDist.push_back(-1);
-                                    widthAggDist.push_back(sizeof(long double));
+                                    wideDecimalOrLongDouble(colAgg, typeAgg[colAgg],
+                                                            precisionAgg, scaleAgg, widthAgg,
+                                                            typeAggDist, scaleAggDist, precisionAggDist, widthAggDist);
                                }
                                else
                                {
@ -4166,11 +4178,10 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(

                    oidsAggPm.push_back(oidsProj[colProj]);
                    keysAggPm.push_back(aggKey);
-                    typeAggPm.push_back(CalpontSystemCatalog::LONGDOUBLE);
                    csNumAggPm.push_back(8);
-                    precisionAggPm.push_back(-1);
-                    widthAggPm.push_back(sizeof(long double));
-                    scaleAggPm.push_back(0);
+                    wideDecimalOrLongDouble(colProj, typeProj[colProj],
+                                             precisionProj, scaleProj, width,
+                                             typeAggPm, scaleAggPm, precisionAggPm, widthAggPm);
                    colAggPm++;
                }

@ -4546,11 +4557,10 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(

                        oidsAggDist.push_back(oidsAggUm[colUm]);
                        keysAggDist.push_back(retKey);
-                        typeAggDist.push_back(CalpontSystemCatalog::LONGDOUBLE);
                        csNumAggDist.push_back(8);
-                        precisionAggDist.push_back(-1);
-                        widthAggDist.push_back(sizeof(long double));
-                        scaleAggDist.push_back(0);
+                        wideDecimalOrLongDouble(colUm, typeAggPm[colUm],
+                                                precisionAggPm, scaleAggPm, widthAggPm,
+                                                typeAggDist, scaleAggDist, precisionAggDist, widthAggDist);
                    }
                    // PM: put the count column for avg next to the sum
                    // let fall through to add a count column for average function
@ -4614,11 +4624,10 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(
                            {
                                oidsAggDist.push_back(oidsAggUm[colUm]);
                                keysAggDist.push_back(retKey);
-                                scaleAggDist.push_back(0);
-                                typeAggDist.push_back(CalpontSystemCatalog::LONGDOUBLE);
                                csNumAggDist.push_back(8);
-                                precisionAggDist.push_back(-1);
-                                widthAggDist.push_back(sizeof(long double));
+                                wideDecimalOrLongDouble(colUm, typeAggUm[colUm],
+                                                        precisionAggUm, scaleAggUm, widthAggUm,
+                                                        typeAggDist, scaleAggDist, precisionAggDist, widthAggDist);
                            }
                            else
                            {
--- a/dbcon/mysql/ha_mcs_execplan.cpp
+++ b/dbcon/mysql/ha_mcs_execplan.cpp
@ -3725,9 +3725,7 @@ ArithmeticColumn* buildArithmeticColumn(
    const CalpontSystemCatalog::ColType& leftColType = pt->left()->data()->resultType();
    const CalpontSystemCatalog::ColType& rightColType = pt->right()->data()->resultType();

-    // Only tinker with the type if all columns involved are decimal
-    if (datatypes::isDecimalOperands(mysqlType.colDataType,
-            leftColType.colDataType, rightColType.colDataType))
+    if (datatypes::isDecimal(leftColType.colDataType) || datatypes::isDecimal(rightColType.colDataType))
    {
        int32_t leftColWidth = leftColType.colWidth;
        int32_t rightColWidth = rightColType.colWidth;
@ -3742,6 +3740,8 @@ ArithmeticColumn* buildArithmeticColumn(
            int32_t scale1 = leftColType.scale;
            int32_t scale2 = rightColType.scale;

+            mysqlType.precision = datatypes::INT128MAXPRECISION;
+
            if (funcName == "/" &&
                (mysqlType.scale - (scale1 - scale2)) > datatypes::INT128MAXPRECISION)
            {
@ -5058,6 +5058,19 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi)
                    ct.precision = precision;
                    ct.scale = scale;
                }
+                else if (datatypes::hasUnderlyingWideDecimalForSumAndAvg(ct.colDataType))
+                {
+                   uint32_t precision = datatypes::INT128MAXPRECISION;
+                   uint32_t scale = ct.scale;
+                   ct.colDataType = CalpontSystemCatalog::DECIMAL;
+                   ct.colWidth = datatypes::MAXDECIMALWIDTH;
+                   if (isAvg)
+                   {
+                       datatypes::Decimal::setScalePrecision4Avg(precision, scale);
+                   }
+                   ct.scale = scale;
+                   ct.precision = precision;
+                }
                else
                {
                    ct.colDataType = CalpontSystemCatalog::LONGDOUBLE;
--- a/utils/rowgroup/rowaggregation.cpp
+++ b/utils/rowgroup/rowaggregation.cpp
@ -1278,7 +1278,7 @@ void RowAggregation::doMinMax(const Row& rowIn, int64_t colIn, int64_t colOut, i
 //------------------------------------------------------------------------------
 void RowAggregation::doSum(const Row& rowIn, int64_t colIn, int64_t colOut, int funcType)
 {
-    int colDataType = rowIn.getColType(colIn);
+    datatypes::SystemCatalog::ColDataType colDataType = rowIn.getColType(colIn);
    long double valIn = 0;
    bool isWideDataType = false;
    void *wideValInPtr = nullptr;
@ -1379,9 +1379,26 @@ void RowAggregation::doSum(const Row& rowIn, int64_t colIn, int64_t colOut, int
            break;
        }
    }
-    if (LIKELY(!isWideDataType))
+
+    bool notFirstValue = !isNull(fRowGroupOut, fRow, colOut);
+
+    if (datatypes::hasUnderlyingWideDecimalForSumAndAvg(colDataType) || isWideDataType)
    {
-        if (LIKELY(!isNull(fRowGroupOut, fRow, colOut)))
+        if (LIKELY(notFirstValue))
+        {
+            int128_t *valOutPtr = fRow.getBinaryField<int128_t>(colOut);
+            int128_t sum = (isWideDataType) ? *valOutPtr + *reinterpret_cast<int128_t*>(wideValInPtr) : *valOutPtr + valIn;
+            fRow.setBinaryField(&sum, colOut);
+        }
+        else
+        {
+            int128_t sum = (isWideDataType) ? *reinterpret_cast<int128_t*>(wideValInPtr) : valIn;
+            fRow.setBinaryField(&sum, colOut);
+        }
+    }
+    else
+    {
+        if (notFirstValue)
        {
            long double valOut = fRow.getLongDoubleField(colOut);
            fRow.setLongDoubleField(valIn+valOut, colOut);
@ -1390,22 +1407,7 @@ void RowAggregation::doSum(const Row& rowIn, int64_t colIn, int64_t colOut, int
        {
            fRow.setLongDoubleField(valIn, colOut);
        }
-    }
-    else
-    {
-        uint32_t offset = fRow.getOffset(colOut);
-        int128_t* dec = reinterpret_cast<int128_t*>(wideValInPtr);
-        if (LIKELY(!isNull(fRowGroupOut, fRow, colOut)))
-        {
-            int128_t *valOutPtr = fRow.getBinaryField<int128_t>(colOut);
-            int128_t sum = *valOutPtr + *dec;
-            fRow.setBinaryField_offset(&sum, sizeof(sum), offset);
-        }
-        else
-        {
-            fRow.setBinaryField_offset(dec, sizeof(*dec), offset);
-        }
-    } // end-of isWideDataType block
+    } // end-of long double processing block
 }

 //------------------------------------------------------------------------------
@ -1823,9 +1825,8 @@ void RowAggregation::doAvg(const Row& rowIn, int64_t colIn, int64_t colOut, int6
    if (rowIn.isNullValue(colIn))
        return;

-    int colDataType = rowIn.getColType(colIn);
+    datatypes::SystemCatalog::ColDataType colDataType = rowIn.getColType(colIn);
    long double valIn = 0;
-    long double valOut = fRow.getLongDoubleField(colOut);
    bool isWideDataType = false;
    void *wideValInPtr = nullptr;

@ -1907,8 +1908,6 @@ void RowAggregation::doAvg(const Row& rowIn, int64_t colIn, int64_t colOut, int6

    // min(count) = 0
    uint64_t count = fRow.getUintField(colAux);
-    bool notFirstValue = count > 0;
-
    // Set count column
    if (merge)
    {
@ -1919,29 +1918,34 @@ void RowAggregation::doAvg(const Row& rowIn, int64_t colIn, int64_t colOut, int6
        fRow.setUintField<8>(count + 1, colAux);
    }

-    // Set sum column
-    if (LIKELY(!isWideDataType))
+    bool notFirstValue = count > 0;
+
+    if (datatypes::hasUnderlyingWideDecimalForSumAndAvg(colDataType) || isWideDataType)
    {
-        if (LIKELY(notFirstValue))
-            fRow.setLongDoubleField(valIn + valOut, colOut);
-        else // This is the first value
-            fRow.setLongDoubleField(valIn, colOut);
-    }
-    else
-    {
-        uint32_t offset = fRow.getOffset(colOut);
-        int128_t* dec = reinterpret_cast<int128_t*>(wideValInPtr);
        if (LIKELY(notFirstValue))
        {
            int128_t *valOutPtr = fRow.getBinaryField<int128_t>(colOut);
-            int128_t sum = *valOutPtr + *dec;
-            fRow.setBinaryField_offset(&sum, sizeof(sum), offset);
+            int128_t sum = (isWideDataType) ? *valOutPtr + *reinterpret_cast<int128_t*>(wideValInPtr) : *valOutPtr + valIn;
+            fRow.setBinaryField(&sum, colOut);
        }
        else
        {
-            fRow.setBinaryField_offset(dec, sizeof(*dec), offset);
+            int128_t sum = (isWideDataType) ? *reinterpret_cast<int128_t*>(wideValInPtr) : valIn;
+            fRow.setBinaryField(&sum, colOut);
        }
    }
+    else
+    {
+        if (LIKELY(notFirstValue))
+        {
+            long double valOut = fRow.getLongDoubleField(colOut);
+            fRow.setLongDoubleField(valIn+valOut, colOut);
+        }
+        else
+        {
+            fRow.setLongDoubleField(valIn, colOut);
+        }
+    } // end-of long double processing block
 }


@ -4280,9 +4284,8 @@ void RowAggregationUMP2::doAvg(const Row& rowIn, int64_t colIn, int64_t colOut,
    if (rowIn.isNullValue(colIn))
        return;

-    int colDataType = rowIn.getColType(colIn);
+    datatypes::SystemCatalog::ColDataType colDataType = rowIn.getColType(colIn);
    long double valIn = 0;
-    long double valOut = fRow.getLongDoubleField(colOut);
    bool isWideDataType = false;
    void *wideValInPtr = nullptr;

@ -4362,37 +4365,39 @@ void RowAggregationUMP2::doAvg(const Row& rowIn, int64_t colIn, int64_t colOut,
        }
    }

-    uint64_t cnt = fRow.getUintField(colAux);
-    if (LIKELY(!isWideDataType))
+    uint64_t count = fRow.getUintField(colAux);
+    bool notFirstValue = count > 0;
+
+    if (datatypes::hasUnderlyingWideDecimalForSumAndAvg(colDataType) || isWideDataType)
    {
-        if (LIKELY(cnt > 0))
+        if (LIKELY(notFirstValue))
        {
-            fRow.setLongDoubleField(valIn + valOut, colOut);
-            fRow.setUintField(rowIn.getUintField(colIn + 1) + cnt, colAux);
+            int128_t *valOutPtr = fRow.getBinaryField<int128_t>(colOut);
+            int128_t sum = (isWideDataType) ? *valOutPtr + *reinterpret_cast<int128_t*>(wideValInPtr) : *valOutPtr + valIn;
+            fRow.setUintField(rowIn.getUintField(colIn + 1) + count, colAux);
+            fRow.setBinaryField(&sum, colOut);
        }
        else
        {
-            fRow.setLongDoubleField(valIn, colOut);
+            int128_t sum = (isWideDataType) ? *reinterpret_cast<int128_t*>(wideValInPtr) : valIn;
            fRow.setUintField(rowIn.getUintField(colIn + 1), colAux);
+            fRow.setBinaryField(&sum, colOut);
        }
    }
    else
    {
-        uint32_t offset = fRow.getOffset(colOut);
-        int128_t* dec = reinterpret_cast<int128_t*>(wideValInPtr);
-        if (LIKELY(cnt > 0))
+        if (LIKELY(notFirstValue))
        {
-            int128_t *valOutPtr = fRow.getBinaryField<int128_t>(colOut);
-            int128_t sum = *valOutPtr + *dec;
-            fRow.setBinaryField_offset(&sum, sizeof(sum), offset);
-            fRow.setUintField(rowIn.getUintField(colIn + 1) + cnt, colAux);
+            long double valOut = fRow.getLongDoubleField(colOut);
+            fRow.setUintField(rowIn.getUintField(colIn + 1) + count, colAux);
+            fRow.setLongDoubleField(valIn+valOut, colOut);
        }
        else
        {
-            fRow.setBinaryField_offset(dec, sizeof(*dec), offset);
            fRow.setUintField(rowIn.getUintField(colIn + 1), colAux);
+            fRow.setLongDoubleField(valIn, colOut);
        }
-    }
+    } // end-of long double processing block
 }

 //------------------------------------------------------------------------------