MCOL-641 Simple aggregates support: min, max, sum, avg for wide-DECIMALs.

2025-07-29 08:21:15 +03:00 · 2020-05-08 10:17:17 +00:00
parent 3d94ec1568
commit e88cbe9bc1
9 changed files with 343 additions and 212 deletions
--- a/datatypes/mcs_decimal.cpp
+++ b/datatypes/mcs_decimal.cpp
@ -203,6 +203,11 @@ namespace datatypes
        return std::string(buf);
    }

+    std::string Decimal::toString(const execplan::IDB_Decimal& value)
+    {
+        return toString(const_cast<execplan::IDB_Decimal&>(value));
+    }
+
    int Decimal::compare(const execplan::IDB_Decimal& l, const execplan::IDB_Decimal& r)
    {
        int128_t divisorL, divisorR;
--- a/datatypes/mcs_decimal.h
+++ b/datatypes/mcs_decimal.h
@ -24,6 +24,7 @@
 #include "calpontsystemcatalog.h"

 using int128_t = __int128;
+using ColTypeAlias = execplan::CalpontSystemCatalog::ColType;

 namespace execplan
 {
@ -37,6 +38,8 @@ constexpr uint32_t MAXDECIMALWIDTH = 16U;
 constexpr uint8_t INT64MAXPRECISION = 18U;
 constexpr uint8_t INT128MAXPRECISION = 38U;
 constexpr uint8_t MAXLEGACYWIDTH = 8U;
+constexpr uint8_t MAXSCALEINC4AVG = 4U;
+constexpr int8_t IGNOREPRECISION = -1;

 const uint64_t mcs_pow_10[20] =
 {
@ -159,12 +162,13 @@ class Decimal
            @brief Convenience method to put decimal into a std::string.
        */
        static std::string toString(execplan::IDB_Decimal& value);
+        static std::string toString(const execplan::IDB_Decimal& value);

        /**
            @brief The method detects whether decimal type is wide
            using csc data type.
        */
-        static constexpr inline bool isWideDecimalType(const execplan::CalpontSystemCatalog::ColType& ct)
+        static constexpr inline bool isWideDecimalType(const ColTypeAlias& ct)
        {
            return ((ct.colDataType == execplan::CalpontSystemCatalog::DECIMAL ||
                ct.colDataType == execplan::CalpontSystemCatalog::UDECIMAL) &&
@ -185,7 +189,7 @@ class Decimal
            @brief The method sets the legacy scale and precision of a wide decimal
            column which is the result of an arithmetic operation.
        */
-        static inline void setDecimalScalePrecisionLegacy(execplan::CalpontSystemCatalog::ColType& ct,
+        static inline void setDecimalScalePrecisionLegacy(ColTypeAlias& ct,
            unsigned int precision, unsigned int scale)
        {
            ct.scale = scale;
@ -200,7 +204,7 @@ class Decimal
            @brief The method sets the scale and precision of a wide decimal
            column which is the result of an arithmetic operation.
        */
-        static inline void setDecimalScalePrecision(execplan::CalpontSystemCatalog::ColType& ct,
+        static inline void setDecimalScalePrecision(ColTypeAlias& ct,
            unsigned int precision, unsigned int scale)
        {
            ct.colWidth = (precision > INT64MAXPRECISION)
@ -216,7 +220,7 @@ class Decimal
            @brief The method sets the scale and precision of a wide decimal
            column which is the result of an arithmetic operation, based on a heuristic.
        */
-        static inline void setDecimalScalePrecisionHeuristic(execplan::CalpontSystemCatalog::ColType& ct,
+        static inline void setDecimalScalePrecisionHeuristic(ColTypeAlias& ct,
            unsigned int precision, unsigned int scale)
        {
            unsigned int diff = 0;
@ -337,6 +341,19 @@ class Decimal

            return static_cast<int64_t>(value);
        }
+
+        /**
+            @brief MDB increases scale by up to 4 digits calculating avg()
+        */
+        static inline void setScalePrecision4Avg(
+            unsigned int& precision,
+            unsigned int& scale)
+        {
+            uint32_t scaleAvailable = INT128MAXPRECISION - scale;
+            uint32_t precisionAvailable = INT128MAXPRECISION - precision;
+            scale += (scaleAvailable >= MAXSCALEINC4AVG) ? MAXSCALEINC4AVG : scaleAvailable;
+            precision += (precisionAvailable >= MAXSCALEINC4AVG) ? MAXSCALEINC4AVG : precisionAvailable;
+        }
 };

 /**
--- a/dbcon/joblist/tupleaggregatestep.cpp
+++ b/dbcon/joblist/tupleaggregatestep.cpp
@ -343,6 +343,41 @@ string keyName(uint64_t i, uint32_t key, const joblist::JobInfo& jobInfo)
 namespace joblist
 {

+void wideDecimalOrLongDouble(const uint64_t colProj,
+    const CalpontSystemCatalog::ColDataType type,
+    const vector<uint32_t>& precisionProj,
+    const vector<uint32_t>& oidsProj,
+    const uint32_t aggKey,
+    const vector<uint32_t>& scaleProj,
+    const vector<uint32_t>& width,
+    vector<uint32_t>& oidsAgg,
+    vector<uint32_t>& keysAgg,
+    vector<CalpontSystemCatalog::ColDataType>& typeAgg,
+    vector<uint32_t>& scaleAgg,
+    vector<uint32_t>& precisionAgg,
+    vector<uint32_t>& widthAgg)
+{
+    if ((type == CalpontSystemCatalog::DECIMAL
+        || type == CalpontSystemCatalog::UDECIMAL)
+        && datatypes::Decimal::isWideDecimalType(precisionProj[colProj]))
+    {
+        oidsAgg.push_back(oidsProj[colProj]);
+        keysAgg.push_back(aggKey);
+        typeAgg.push_back(type);
+        scaleAgg.push_back(scaleProj[colProj]);
+        precisionAgg.push_back(precisionProj[colProj]);
+        widthAgg.push_back(width[colProj]);
+    }
+    else
+    {
+        oidsAgg.push_back(oidsProj[colProj]);
+        keysAgg.push_back(aggKey);
+        typeAgg.push_back(CalpontSystemCatalog::LONGDOUBLE);
+        scaleAgg.push_back(0);
+        precisionAgg.push_back(-1);
+        widthAgg.push_back(sizeof(long double));
+    }
+}

 TupleAggregateStep::TupleAggregateStep(
    const SP_ROWAGG_UM_t& agg,
@ -717,25 +752,47 @@ void TupleAggregateStep::configDeliveredRowGroup(const JobInfo& jobInfo)

    // correct the scale
    vector<uint32_t> scale = fRowGroupOut.getScale();
+    vector<uint32_t> precision = fRowGroupOut.getPrecision();

-//    for (uint64_t i = 0; i < scale.size(); i++)
-//    {
-        // to support CNX_DECIMAL_SCALE the avg column's scale is coded with two scales:
-        // fe's avg column scale << 8 + original column scale
-        //if ((scale[i] & 0x0000FF00) > 0)
-//        scale[i] = scale[i] &  0x000000FF;
-//    }
-
-    size_t retColCount = jobInfo.nonConstDelCols.size();
+    size_t retColCount = 0;
+    auto scaleIter = scale.begin();
+    auto precisionIter = precision.begin();

    if (jobInfo.havingStep)
+    {
        retColCount = jobInfo.returnedColVec.size();
+        idbassert(jobInfo.returnedColVec.size() == jobInfo.nonConstCols.size());
+        for (auto& rc : jobInfo.nonConstCols)
+        {
+            auto& colType = rc->resultType();
+            if (datatypes::Decimal::isWideDecimalType(colType))
+            {
+                *scaleIter = colType.scale;
+                *precisionIter = colType.precision;
+            }
+            scaleIter++; precisionIter++;
+        }
+    }
+    else
+    {
+        retColCount = jobInfo.nonConstDelCols.size();
+        for (auto& rc : jobInfo.nonConstDelCols)
+        {
+            auto& colType = rc->resultType();
+            if (datatypes::Decimal::isWideDecimalType(colType))
+            {
+                *scaleIter = colType.scale;
+                *precisionIter = colType.precision;
+            }
+            scaleIter++; precisionIter++;
+        }
+    }

    vector<uint32_t>::const_iterator offsets0 = fRowGroupOut.getOffsets().begin();
    vector<CalpontSystemCatalog::ColDataType>::const_iterator types0 =
        fRowGroupOut.getColTypes().begin();
    vector<uint32_t> csNums = fRowGroupOut.getCharsetNumbers();
-    vector<uint32_t>::const_iterator precision0 = fRowGroupOut.getPrecision().begin();
+    vector<uint32_t>::const_iterator precision0 = precision.begin();
    fRowGroupDelivered = RowGroup(retColCount,
                                  vector<uint32_t>(offsets0, offsets0 + retColCount + 1),
                                  vector<uint32_t>(oids.begin(), oids.begin() + retColCount),
@ -896,7 +953,6 @@ SJSTEP TupleAggregateStep::prepAggregate(SJSTEP& step, JobInfo& jobInfo)
    // preprocess the columns used by group_concat
    jobInfo.groupConcatInfo.prepGroupConcat(jobInfo);
    bool doUMOnly = jobInfo.groupConcatInfo.columns().size() > 0
-//                 || jobInfo.windowSet.size() > 0
                 || sas
                 || ces;

@ -1303,14 +1359,11 @@ void TupleAggregateStep::prep1PhaseAggregate(
                    cerr << "prep1PhaseAggregate: " << emsg << endl;
                    throw IDBExcept(emsg, ERR_AGGREGATE_TYPE_NOT_SUPPORT);
                }
-
-                oidsAgg.push_back(oidsProj[colProj]);
-                keysAgg.push_back(key);
-                typeAgg.push_back(CalpontSystemCatalog::LONGDOUBLE);
+                wideDecimalOrLongDouble(colProj, typeProj[colProj],
+                    precisionProj, oidsProj, key, scaleProj, width,
+                    oidsAgg, keysAgg, typeAgg, scaleAgg,
+                    precisionAgg, widthAgg);
                csNumAgg.push_back(csNumProj[colProj]);
-                precisionAgg.push_back(-1);
-                widthAgg.push_back(sizeof(long double));
-                scaleAgg.push_back(0);
            }
            break;

@ -1755,11 +1808,6 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(
                throw logic_error(emsg.str());
            }

-            // skip sum / count(column) if avg is also selected
-//            if ((aggOp == ROWAGG_SUM || aggOp == ROWAGG_COUNT_COL_NAME) &&
-//                    (avgSet.find(aggKey) != avgSet.end()))
-//                continue;
-
            if (aggOp == ROWAGG_DISTINCT_SUM ||
                    aggOp == ROWAGG_DISTINCT_AVG ||
                    aggOp == ROWAGG_COUNT_DISTINCT_COL_NAME)
@ -3128,31 +3176,11 @@ void TupleAggregateStep::prep2PhasesAggregate(
                        cerr << "prep2PhasesAggregate: " << emsg << endl;
                        throw IDBExcept(emsg, ERR_AGGREGATE_TYPE_NOT_SUPPORT);
                    }
-
-                    // WIP MCOL-641 Replace condition with a
-                    // dynamic one
-                    if (typeProj[colProj] == CalpontSystemCatalog::DECIMAL
-                        && width[colProj] == 16)
-                    { 
-                        oidsAggPm.push_back(oidsProj[colProj]);
-                        keysAggPm.push_back(aggKey);
-                        typeAggPm.push_back(CalpontSystemCatalog::DECIMAL);
+                    wideDecimalOrLongDouble(colProj, typeProj[colProj],
+                        precisionProj, oidsProj, aggKey, scaleProj, width,
+                        oidsAggPm, keysAggPm, typeAggPm, scaleAggPm,
+                        precisionAggPm, widthAggPm);
                    scaleAggPm.push_back(0);
-                        // WIP makes this dynamic
-                        precisionAggPm.push_back(38);
-                        widthAggPm.push_back(width[colProj]);
-                        csNumAggPm.push_back(8);
-                    }
-                    else
-                    {
-                        oidsAggPm.push_back(oidsProj[colProj]);
-                        keysAggPm.push_back(aggKey);
-                        typeAggPm.push_back(CalpontSystemCatalog::LONGDOUBLE);
-                        scaleAggPm.push_back(0);
-                        csNumAggPm.push_back(8);
-                        precisionAggPm.push_back(-1);
-                        widthAggPm.push_back(sizeof(long double));
-                    }
                    colAggPm++;
                }

@ -3435,13 +3463,11 @@ void TupleAggregateStep::prep2PhasesAggregate(

                        if (aggOp == ROWAGG_SUM)
                        {
-                            oidsAggUm.push_back(oidsAggPm[colPm]);
-                            keysAggUm.push_back(retKey);
-                            scaleAggUm.push_back(0);
-                            typeAggUm.push_back(CalpontSystemCatalog::LONGDOUBLE);
+                            wideDecimalOrLongDouble(colPm, typeProj[colPm],
+                                precisionProj, oidsProj, retKey, scaleProj, widthAggPm,
+                                oidsAggUm, keysAggUm, typeAggUm, scaleAggUm,
+                                precisionAggUm, widthAggUm);
                            csNumAggUm.push_back(8);
-                            precisionAggUm.push_back(-1);
-                            widthAggUm.push_back(sizeof(long double));
                        }
                        else
                        {
--- a/dbcon/mysql/ha_mcs_execplan.cpp
+++ b/dbcon/mysql/ha_mcs_execplan.cpp
@ -3621,7 +3621,8 @@ ArithmeticColumn* buildArithmeticColumn(
            unsigned int precision = idp->max_length;
            unsigned int scale = idp->decimals;

-            datatypes::Decimal::setDecimalScalePrecisionLegacy(mysql_type, precision, scale);
+            datatypes::Decimal::setDecimalScalePrecisionLegacy(mysql_type,
+                precision, scale);
        }
        else
        {
@ -3638,7 +3639,8 @@ ArithmeticColumn* buildArithmeticColumn(
                int32_t scale2 = pt->right()->data()->resultType().scale;

                if (funcName == "/" &&
-                    (mysql_type.scale - (scale1 - scale2)) > datatypes::INT128MAXPRECISION)
+                    (mysql_type.scale - (scale1 - scale2)) >
+                        datatypes::INT128MAXPRECISION)
                {
                    Item_decimal* idp = (Item_decimal*)item;

@ -4980,15 +4982,33 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi)
            // use the first parm for result type.
            parm = ac->aggParms()[0];

-            // WIP why do we use LONGDOUBLE for AVG?
-            if (isp->sum_func() == Item_sum::AVG_FUNC ||
-                    isp->sum_func() == Item_sum::AVG_DISTINCT_FUNC)
+            bool isAvg = (isp->sum_func() == Item_sum::AVG_FUNC ||
+                    isp->sum_func() == Item_sum::AVG_DISTINCT_FUNC);
+            if (isAvg || isp->sum_func() == Item_sum::SUM_FUNC ||
+                     isp->sum_func() == Item_sum::SUM_DISTINCT_FUNC)
            {
                CalpontSystemCatalog::ColType ct = parm->resultType();
+                if (datatypes::Decimal::isWideDecimalType(ct))
+                {
+                    uint32_t precision = ct.precision;
+                    uint32_t scale = ct.scale;
+                    if (isAvg)
+                    {
+                        datatypes::Decimal::setScalePrecision4Avg(precision, scale);
+                    }
+                    ct.precision = precision;
+                    ct.scale = scale;
+                }
+                else
+                {
                    ct.colDataType = CalpontSystemCatalog::LONGDOUBLE;
                    ct.colWidth = sizeof(long double);
-                ct.scale += 4;
-                ct.precision = -1;
+                    if (isAvg)
+                    {
+                        ct.scale += datatypes::MAXSCALEINC4AVG;
+                    }
+                    ct.precision = datatypes::IGNOREPRECISION;
+                }
                ac->resultType(ct);
            }
            else if (isp->sum_func() == Item_sum::COUNT_FUNC ||
@ -5000,25 +5020,6 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi)
                ct.scale = parm->resultType().scale;
                ac->resultType(ct);
            }
-            else if (isp->sum_func() == Item_sum::SUM_FUNC ||
-                     isp->sum_func() == Item_sum::SUM_DISTINCT_FUNC)
-            {
-                // WIP MCOL-641 This fast hack breaks aggregates for
-                // all float DT's
-                // UPD it doesn't break b/c actual DT for result type
-                // is set during JobList creation.
-                /*CalpontSystemCatalog::ColType ct = parm->resultType();
-                ct.colDataType = CalpontSystemCatalog::LONGDOUBLE;
-                ct.colWidth = sizeof(long double);
-                ct.precision = -1;*/
-                CalpontSystemCatalog::ColType ct = parm->resultType();
-                ct.colDataType = CalpontSystemCatalog::DECIMAL;
-                ct.colWidth = 16;
-                ct.precision = 38;
-                // WIP set the scale if argument is a float-based DT
-                ct.scale = 0;
-                ac->resultType(ct);
-            }
            else if (isp->sum_func() == Item_sum::STD_FUNC ||
                     isp->sum_func() == Item_sum::VARIANCE_FUNC)
            {
@ -5058,7 +5059,11 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi)
        }

        // adjust decimal result type according to internalDecimalScale
-        if (gwi.internalDecimalScale >= 0 && ac->resultType().colDataType == CalpontSystemCatalog::DECIMAL)
+        bool isWideDecimal =
+            datatypes::Decimal::isWideDecimalType(ac->resultType());
+        // This must be also valid for UDECIMAL
+        if (!isWideDecimal && gwi.internalDecimalScale >= 0
+            && ac->resultType().colDataType == CalpontSystemCatalog::DECIMAL)
        {
            CalpontSystemCatalog::ColType ct = ac->resultType();
            ct.scale = gwi.internalDecimalScale;
--- a/dbcon/mysql/ha_mcs_partition.cpp
+++ b/dbcon/mysql/ha_mcs_partition.cpp
@ -1100,7 +1100,7 @@ extern "C"

                    mapit = partMap.find(logicalPartNum);

-                    int state;
+                    int state = CP_INVALID;

                    if (ct.colWidth <= 8)
                        state = em.getExtentMaxMin(iter->range.start, partInfo.max, partInfo.min, seqNum);
--- a/utils/joiner/tuplejoiner.cpp
+++ b/utils/joiner/tuplejoiner.cpp
@ -1127,7 +1127,7 @@ void TupleJoiner::updateCPData(const Row& r)
        }
        else
        {
-            int64_t val;
+            int64_t val = 0;
            if (r.getColType(colIdx) == CalpontSystemCatalog::LONGDOUBLE)
            {
                double dval = (double)roundl(r.getLongDoubleField(colIdx));
--- a/utils/rowgroup/rowaggregation.cpp
+++ b/utils/rowgroup/rowaggregation.cpp
@ -1,6 +1,6 @@
 /*
   Copyright (C) 2014 InfiniDB, Inc.
-   Copyright (c) 2019 MariaDB Corporation
+   Copyright (c) 2019-2020 MariaDB Corporation

   This program is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public License
@ -57,6 +57,8 @@

 //..comment out NDEBUG to enable assertions, uncomment NDEBUG to disable
 //#define NDEBUG
+#include "funcexp/utils_utf8.h"
+#include "mcs_decimal.h"

 using namespace std;
 using namespace boost;
@ -70,12 +72,16 @@ namespace
 const int64_t AGG_ROWGROUP_SIZE = 256;

 template <typename T>
-bool minMax(T d1, T d2, int type)
+inline bool minMax(T d1, T d2, int type)
 {
    if (type == rowgroup::ROWAGG_MIN) return d1 < d2;
    else                              return d1 > d2;
 }

+inline bool minMax(int128_t* d1, int128_t* d2, int type)
+{
+    return (type == rowgroup::ROWAGG_MIN) ? *d1 < *d2 : *d1 > *d2;
+}

 inline int64_t getIntNullValue(int colType)
 {
@ -334,6 +340,16 @@ inline bool ExternalKeyEq::operator()(const RowPosition& pos1, const RowPosition

 static const string overflowMsg("Aggregation overflow.");

+inline void RowAggregation::updateIntMinMax(int128_t* val1, int128_t* val2, int64_t col, int func)
+{
+    int32_t colOutOffset = fRow.getOffset(col);
+    if (isNull(fRowGroupOut, fRow, col))
+        fRow.setBinaryField_offset(val1, sizeof(int128_t), colOutOffset);
+    else if (minMax(val1, val2, func))
+        fRow.setBinaryField_offset(val1, sizeof(int128_t), colOutOffset);
+}
+
+
 inline void RowAggregation::updateIntMinMax(int64_t val1, int64_t val2, int64_t col, int func)
 {
    if (isNull(fRowGroupOut, fRow, col))
@ -1010,13 +1026,31 @@ void RowAggregation::initMapData(const Row& rowIn)
            case execplan::CalpontSystemCatalog::MEDINT:
            case execplan::CalpontSystemCatalog::INT:
            case execplan::CalpontSystemCatalog::BIGINT:
-            case execplan::CalpontSystemCatalog::DECIMAL:
-            case execplan::CalpontSystemCatalog::UDECIMAL:
            {
                fRow.setIntField(rowIn.getIntField(colIn), colOut);
                break;
            }

+            case execplan::CalpontSystemCatalog::DECIMAL:
+            case execplan::CalpontSystemCatalog::UDECIMAL:
+            {
+                if (LIKELY(fRow.getColumnWidth(colIn) == datatypes::MAXDECIMALWIDTH))
+                {
+                    uint32_t colOutOffset = fRow.getOffset(colOut);
+                    fRow.setBinaryField_offset(
+                        rowIn.getBinaryField<int128_t>(colIn),
+                        sizeof(int128_t),
+                        colOutOffset);
+                }
+                else
+                {
+                    fRow.setIntField(rowIn.getIntField(colIn), colOut);
+                }
+
+                break;
+            }
+
+
            case execplan::CalpontSystemCatalog::UTINYINT:
            case execplan::CalpontSystemCatalog::USMALLINT:
            case execplan::CalpontSystemCatalog::UMEDINT:
@ -1113,8 +1147,6 @@ void RowAggregation::makeAggFieldsNull(Row& row)
                fFunctionCols[i]->fAggFunction == ROWAGG_GROUP_CONCAT ||
                fFunctionCols[i]->fAggFunction == ROWAGG_STATS)
        {
-//			done by memset
-//			row.setIntField(0, colOut);
            continue;
        }

@ -1160,18 +1192,18 @@ void RowAggregation::makeAggFieldsNull(Row& row)
            case execplan::CalpontSystemCatalog::UDECIMAL:
            {
                int colWidth = fRowGroupOut->getColumnWidth(colOut);
-                if (colWidth <= 8)
+                if (LIKELY(colWidth == datatypes::MAXDECIMALWIDTH))
+                {
+                    uint32_t offset = row.getOffset(colOut);
+                    row.setBinaryField_offset(
+                        const_cast<int128_t*>(&datatypes::Decimal128Null),
+                        colWidth,
+                        offset);
+                }
+                else if (colWidth == datatypes::MAXLEGACYWIDTH)
                {
                    row.setIntField(getUintNullValue(colDataType, colWidth), colOut);
                }
-                else
-                {
-                    int128_t nullValue = 0;
-                    utils::setWideDecimalNullValue(nullValue);
-                    uint32_t offset = row.getOffset(colOut);
-                    row.setBinaryField_offset(&nullValue, sizeof(nullValue),
-                        offset);
-                }
                break;
            }

@ -1183,7 +1215,7 @@ void RowAggregation::makeAggFieldsNull(Row& row)
            {
                int colWidth = fRowGroupOut->getColumnWidth(colOut);

-                if (colWidth <= 8)
+                if (colWidth <= datatypes::MAXLEGACYWIDTH)
                {
                    row.setUintField(getUintNullValue(colDataType, colWidth), colOut);
                }
@ -1256,8 +1288,6 @@ void RowAggregation::doMinMax(const Row& rowIn, int64_t colIn, int64_t colOut, i
        case execplan::CalpontSystemCatalog::MEDINT:
        case execplan::CalpontSystemCatalog::INT:
        case execplan::CalpontSystemCatalog::BIGINT:
-        case execplan::CalpontSystemCatalog::DECIMAL:
-        case execplan::CalpontSystemCatalog::UDECIMAL:
        {
            int64_t valIn = rowIn.getIntField(colIn);
            int64_t valOut = fRow.getIntField(colOut);
@ -1265,6 +1295,24 @@ void RowAggregation::doMinMax(const Row& rowIn, int64_t colIn, int64_t colOut, i
            break;
        }

+        case execplan::CalpontSystemCatalog::DECIMAL:
+        case execplan::CalpontSystemCatalog::UDECIMAL:
+        {
+            if (LIKELY(fRow.getColumnWidth(colIn) == datatypes::MAXDECIMALWIDTH))
+            {
+                updateIntMinMax(rowIn.getBinaryField<int128_t>(colIn),
+                    fRow.getBinaryField<int128_t>(colOut),
+                    colOut, funcType);
+            }
+            else
+            {
+                int64_t valIn = rowIn.getIntField(colIn);
+                int64_t valOut = fRow.getIntField(colOut);
+                updateIntMinMax(valIn, valOut, colOut, funcType);
+            }
+            break;
+        }
+
        case execplan::CalpontSystemCatalog::UTINYINT:
        case execplan::CalpontSystemCatalog::USMALLINT:
        case execplan::CalpontSystemCatalog::UMEDINT:
@ -1340,17 +1388,12 @@ void RowAggregation::doMinMax(const Row& rowIn, int64_t colIn, int64_t colOut, i
 // Note: NULL value check must be done on UM & PM
 //       UM may receive NULL values, too.
 //------------------------------------------------------------------------------
-// WIP MCOL-641. This and other methods must be type based to avoid needless mem
-// allocation for wide DTs
 void RowAggregation::doSum(const Row& rowIn, int64_t colIn, int64_t colOut, int funcType)
 {
    int colDataType = (fRowGroupIn.getColTypes())[colIn];
    long double valIn = 0;
    bool isWideDataType = false;
-    void *wideValInPtr = NULL;
-    // WIP MCOL-641 Probably the width must be taken
-    // from colOut
-    uint32_t width = fRowGroupOut->getColumnWidth(colOut);
+    void *wideValInPtr = nullptr;

    if (isNull(&fRowGroupIn, rowIn, colIn) == true)
        return;
@ -1380,28 +1423,20 @@ void RowAggregation::doSum(const Row& rowIn, int64_t colIn, int64_t colOut, int
        case execplan::CalpontSystemCatalog::DECIMAL:
        case execplan::CalpontSystemCatalog::UDECIMAL:
        {
-            // WIP MCOL-641 make the size dynamic and use branch prediction cond
-            isWideDataType = (width) > 8 ? true : false;
-            if (!isWideDataType)
-            {
-                valIn = rowIn.getIntField(colIn);
-                double scale = (double)(fRowGroupIn.getScale())[colIn];
-                if (valIn != 0 && scale > 0)
-                {
-                    valIn /= pow(10.0, scale);
-                }
-            }
-            else
-            {
-                if (colDataType == execplan::CalpontSystemCatalog::DECIMAL)
+            uint32_t width = fRowGroupOut->getColumnWidth(colOut);
+            isWideDataType = width == datatypes::MAXDECIMALWIDTH;
+            if(LIKELY(isWideDataType))
            {
                int128_t *dec = rowIn.getBinaryField<int128_t>(colIn);
                wideValInPtr = reinterpret_cast<void*>(dec);
            }
            else
            {
-                    uint128_t *dec = rowIn.getBinaryField<uint128_t>(colIn);
-                    wideValInPtr = reinterpret_cast<void*>(dec);
+                valIn = rowIn.getIntField(colIn);
+                double scale = (double)(fRowGroupIn.getScale())[colIn];
+                if (valIn != 0 && scale > 0)
+                {
+                    valIn /= pow(10.0, scale);
                }
            }
    
@ -1455,50 +1490,32 @@ void RowAggregation::doSum(const Row& rowIn, int64_t colIn, int64_t colOut, int
            break;
        }
    }
-    // WIP MCOL-641
-    if (!isWideDataType)
+    if (LIKELY(!isWideDataType))
    {
-        if (isNull(fRowGroupOut, fRow, colOut))
-        {
-            fRow.setLongDoubleField(valIn, colOut);
-        }
-        else
+        if (LIKELY(!isNull(fRowGroupOut, fRow, colOut)))
        {
            long double valOut = fRow.getLongDoubleField(colOut);
            fRow.setLongDoubleField(valIn+valOut, colOut);
        }
+        else
+        {
+            fRow.setLongDoubleField(valIn, colOut);
+        }
    }
    else
    {
        uint32_t offset = fRow.getOffset(colOut);
-        if (colDataType == execplan::CalpontSystemCatalog::DECIMAL)
-        {
-            int128_t *dec = reinterpret_cast<int128_t*>(wideValInPtr);
-            if (isNull(fRowGroupOut, fRow, colOut))
-            {
-                fRow.setBinaryField_offset(dec, sizeof(*dec), offset);
-            }
-            else
+        int128_t* dec = reinterpret_cast<int128_t*>(wideValInPtr);
+        if (LIKELY(!isNull(fRowGroupOut, fRow, colOut)))
        {
            int128_t *valOutPtr = fRow.getBinaryField(valOutPtr, colOut);
            int128_t sum = *valOutPtr + *dec;
            fRow.setBinaryField_offset(&sum, sizeof(sum), offset);
        }
-        }
        else
-        {
-            uint128_t *dec = reinterpret_cast<uint128_t*>(wideValInPtr);
-            if (isNull(fRowGroupOut, fRow, colOut))
        {
            fRow.setBinaryField_offset(dec, sizeof(*dec), offset);
        }
-            else
-            {
-                uint128_t *valOutPtr = fRow.getBinaryField(valOutPtr, colOut);
-                uint128_t sum = *valOutPtr + *dec; 
-                fRow.setBinaryField_offset(&sum, sizeof(sum), offset);
-            }
-        }
    } // end-of isWideDataType block
 }

@ -1791,7 +1808,7 @@ void RowAggregation::updateEntry(const Row& rowIn)

            case ROWAGG_AVG:
                // count(column) for average is inserted after the sum,
-                // colOut+1 is the position of the count column.
+                // colOut+1 is the position of the aux count column.
                doAvg(rowIn, colIn, colOut, colOut + 1);
                break;

@ -1851,6 +1868,8 @@ void RowAggregation::doAvg(const Row& rowIn, int64_t colIn, int64_t colOut, int6
    int colDataType = (fRowGroupIn.getColTypes())[colIn];
    long double valIn = 0;
    long double valOut = fRow.getLongDoubleField(colOut);
+    bool isWideDataType = false;
+    void *wideValInPtr = nullptr;

    switch (colDataType)
    {
@ -1862,7 +1881,6 @@ void RowAggregation::doAvg(const Row& rowIn, int64_t colIn, int64_t colOut, int6
        {
            valIn = rowIn.getIntField(colIn);
            break;
-            break;
        }

        case execplan::CalpontSystemCatalog::UTINYINT:
@ -1877,6 +1895,15 @@ void RowAggregation::doAvg(const Row& rowIn, int64_t colIn, int64_t colOut, int6

        case execplan::CalpontSystemCatalog::DECIMAL:
        case execplan::CalpontSystemCatalog::UDECIMAL:
+        {
+            uint32_t width = fRowGroupOut->getColumnWidth(colOut);
+            isWideDataType = width == datatypes::MAXDECIMALWIDTH;
+            if(LIKELY(isWideDataType))
+            {
+                int128_t* dec = rowIn.getBinaryField<int128_t>(colIn);
+                wideValInPtr = reinterpret_cast<void*>(dec);
+            }
+            else
            {
                valIn = rowIn.getIntField(colIn);
                double scale = (double)(fRowGroupIn.getScale())[colIn];
@ -1884,6 +1911,7 @@ void RowAggregation::doAvg(const Row& rowIn, int64_t colIn, int64_t colOut, int6
                {
                    valIn /= pow(10.0, scale);
                }
+            }
            break;
        }

@ -1917,16 +1945,32 @@ void RowAggregation::doAvg(const Row& rowIn, int64_t colIn, int64_t colOut, int6
        }
    }

-    if (fRow.getUintField(colAux) == 0)
+    // min(count) = 0
+    uint64_t count = fRow.getUintField(colAux) + 1;
+    fRow.setUintField<8>(count, colAux);
+    bool notFirstValue = count > 1;
+
+    if (LIKELY(!isWideDataType))
    {
-        // This is the first value
+        if (LIKELY(notFirstValue))
+            fRow.setLongDoubleField(valIn + valOut, colOut);
+        else // This is the first value
            fRow.setLongDoubleField(valIn, colOut);
-        fRow.setUintField(1, colAux);
    }
    else
    {
-        fRow.setLongDoubleField(valIn + valOut, colOut);
-        fRow.setUintField(fRow.getUintField(colAux) + 1, colAux);
+        uint32_t offset = fRow.getOffset(colOut);
+        int128_t* dec = reinterpret_cast<int128_t*>(wideValInPtr);
+        if (LIKELY(notFirstValue))
+        {
+            int128_t *valOutPtr = fRow.getBinaryField(valOutPtr, colOut);
+            int128_t sum = *valOutPtr + *dec;
+            fRow.setBinaryField_offset(&sum, sizeof(sum), offset);
+        }
+        else
+        {
+            fRow.setBinaryField_offset(dec, sizeof(*dec), offset);
+        }
    }
 }

@ -2632,11 +2676,6 @@ void RowAggregationUM::calculateAvgColumns()
            int64_t colOut = fFunctionCols[i]->fOutputColumnIndex;
            int64_t colAux = fFunctionCols[i]->fAuxColumnIndex;

-//            int scale = fRowGroupOut->getScale()[colOut];
-//            int scale1 = scale >> 8;
-//            int scale2 = scale & 0x000000FF;
-//            long double factor = pow(10.0, scale2 - scale1);
-
            for (uint64_t j = 0; j < fRowGroupOut->getRowCount(); j++)
            {
                fRowGroupOut->getRow(j, &fRow);
@ -2645,15 +2684,39 @@ void RowAggregationUM::calculateAvgColumns()
                if (cnt == 0) // empty set, value is initialized to null.
                    continue;

+                uint32_t precision = fRow.getPrecision(colOut);
+                bool isWideDecimal =
+                    datatypes::Decimal::isWideDecimalType(precision);
+
+                if (LIKELY(!isWideDecimal))
+                {
                    long double sum = 0.0;
                    long double avg = 0.0;
-
-                // MCOL-1822 Always long double
                    sum = fRow.getLongDoubleField(colOut);
                    avg = sum / cnt;
-//                avg *= factor;
                    fRow.setLongDoubleField(avg, colOut);
                }
+                else
+                {
+                    uint32_t offset = fRow.getOffset(colOut);
+                    uint32_t scale = fRow.getScale(colOut);
+                    // Get multiplied to deliver AVG with the scale closest
+                    // to the expected original scale + 4. 
+                    // There is a counterpart in buildAggregateColumn.
+                    datatypes::Decimal::setScalePrecision4Avg(precision, scale);
+                    int128_t* sumPnt = fRow.getBinaryField_offset<int128_t>(offset);
+                    uint32_t scaleDiff = scale - fRow.getScale(colOut);
+                    // multiplication overflow check
+                    datatypes::MultiplicationOverflowCheck multOp;
+                    int128_t sum = 0;
+                    if (scaleDiff > 0)
+                        multOp(*sumPnt, datatypes::mcs_pow_10[scaleDiff], sum);
+                    else
+                        sum = *sumPnt;
+                    int128_t avg = sum / cnt;
+                    fRow.setBinaryField_offset(&avg, sizeof(avg), offset);
+                }
+            }
        }
    }
 }
@ -4174,6 +4237,8 @@ void RowAggregationUMP2::doAvg(const Row& rowIn, int64_t colIn, int64_t colOut,
    int colDataType = (fRowGroupIn.getColTypes())[colIn];
    long double valIn = 0;
    long double valOut = fRow.getLongDoubleField(colOut);
+    bool isWideDataType = false;
+    void *wideValInPtr = nullptr;

    switch (colDataType)
    {
@ -4199,14 +4264,23 @@ void RowAggregationUMP2::doAvg(const Row& rowIn, int64_t colIn, int64_t colOut,

        case execplan::CalpontSystemCatalog::DECIMAL:
        case execplan::CalpontSystemCatalog::UDECIMAL:
+        {
+            uint32_t width = fRowGroupOut->getColumnWidth(colOut);
+            isWideDataType = width == datatypes::MAXDECIMALWIDTH;
+            if(LIKELY(isWideDataType))
+            {
+                int128_t* dec = rowIn.getBinaryField<int128_t>(colIn);
+                wideValInPtr = reinterpret_cast<void*>(dec);
+            }
+            else
            {
                valIn = rowIn.getIntField(colIn);
-            break;
                double scale = (double)(fRowGroupIn.getScale())[colIn];
                if (valIn != 0 && scale > 0)
                {
                    valIn /= pow(10.0, scale);
                }
+            }
            break;
        }

@ -4240,17 +4314,37 @@ void RowAggregationUMP2::doAvg(const Row& rowIn, int64_t colIn, int64_t colOut,
        }
    }

-    int64_t cnt = fRow.getUintField(colAux);
-    if (cnt == 0)
+    uint64_t cnt = fRow.getUintField(colAux);
+    if (LIKELY(!isWideDataType))
+    {
+        if (LIKELY(cnt > 0))
+        {
+            fRow.setLongDoubleField(valIn + valOut, colOut);
+            fRow.setUintField(rowIn.getUintField(colIn + 1) + cnt, colAux);
+        }
+        else
        {
            fRow.setLongDoubleField(valIn, colOut);
            fRow.setUintField(rowIn.getUintField(colIn + 1), colAux);
        }
+    }
    else
    {
-        fRow.setLongDoubleField(valIn + valOut, colOut);
+        uint32_t offset = fRow.getOffset(colOut);
+        int128_t* dec = reinterpret_cast<int128_t*>(wideValInPtr);
+        if (LIKELY(cnt > 0))
+        {
+            int128_t *valOutPtr = fRow.getBinaryField(valOutPtr, colOut);
+            int128_t sum = *valOutPtr + *dec;
+            fRow.setBinaryField_offset(&sum, sizeof(sum), offset);
            fRow.setUintField(rowIn.getUintField(colIn + 1) + cnt, colAux);
        }
+        else
+        {
+            fRow.setBinaryField_offset(dec, sizeof(*dec), offset);
+            fRow.setUintField(rowIn.getUintField(colIn + 1), colAux);
+        }
+    }
 }

 //------------------------------------------------------------------------------
--- a/utils/rowgroup/rowaggregation.h
+++ b/utils/rowgroup/rowaggregation.h
@ -653,6 +653,7 @@ protected:
        copyRow(fNullRow, &row);
    }

+    inline void updateIntMinMax(int128_t* val1, int128_t* val2, int64_t col, int func);
    inline void updateIntMinMax(int64_t val1, int64_t val2, int64_t col, int func);
    inline void updateUintMinMax(uint64_t val1, uint64_t val2, int64_t col, int func);
    inline void updateCharMinMax(uint64_t val1, uint64_t val2, int64_t col, int func);
--- a/utils/rowgroup/rowgroup.h
+++ b/utils/rowgroup/rowgroup.h
@ -810,25 +810,16 @@ inline uint32_t Row::getStringLength(uint32_t colIndex) const
    return strnlen((char*) &data[offsets[colIndex]], getColumnWidth(colIndex));
 }

-// WIP Remove this
-// Check whether memcpy affects perf here
-/*inline void Row::setBinaryField(const uint8_t* strdata, uint32_t length, uint32_t offset)
-{
-    memcpy(&data[offset], strdata, length);
-}*/
-
-// MCOL-641. This method can be applied to uint8_t* buffers.
 template<typename T>
 inline void Row::setBinaryField(T* value, uint32_t width, uint32_t colIndex)
 {
    memcpy(&data[offsets[colIndex]], value, width);
 }

-// MCOL-641. This method !cannot! be applied to uint8_t* buffers.
+// This method !cannot! be applied to uint8_t* buffers.
 template<typename T>
 inline void Row::setBinaryField_offset(T* value, uint32_t width, uint32_t offset)
 {
-    // WIP Compare performance.
    *reinterpret_cast<T*>(&data[offset]) = *value;
 }

@ -871,23 +862,15 @@ inline std::string Row::getStringField(uint32_t colIndex) const
                       strnlen((char*) &data[offsets[colIndex]], getColumnWidth(colIndex)));
 }

-/*inline std::string Row::getBinaryField(uint32_t colIndex) const
-{
-    return std::string((char*) &data[offsets[colIndex]], getColumnWidth(colIndex));
-}*/
-
-// WIP MCOL-641
 template <typename T>
 inline T* Row::getBinaryField(uint32_t colIndex) const
 {
-    //return reinterpret_cast<T*>(&data[offsets[colIndex]]);
    return getBinaryField_offset<T>(offsets[colIndex]);
 }

 template <typename T>
 inline T* Row::getBinaryField(T* argtype, uint32_t colIndex) const
 {
-    //return reinterpret_cast<T*>(&data[offsets[colIndex]]);
    return getBinaryField_offset<T>(offsets[colIndex]);
 }