MCOL-641 Simple aggregates support: min, max, sum, avg for wide-DECIMALs.

2025-07-29 08:21:15 +03:00 · 2020-05-08 10:17:17 +00:00
parent 3d94ec1568
commit e88cbe9bc1
9 changed files with 343 additions and 212 deletions
--- a/datatypes/mcs_decimal.cpp
+++ b/datatypes/mcs_decimal.cpp
@ -203,6 +203,11 @@ namespace datatypes
        return std::string(buf);
    }
    std::string Decimal::toString(const execplan::IDB_Decimal& value)
    {
        return toString(const_cast<execplan::IDB_Decimal&>(value));
    }
    int Decimal::compare(const execplan::IDB_Decimal& l, const execplan::IDB_Decimal& r)
    {
        int128_t divisorL, divisorR;
--- a/datatypes/mcs_decimal.h
+++ b/datatypes/mcs_decimal.h
@ -24,6 +24,7 @@
 #include "calpontsystemcatalog.h"
 using int128_t = __int128;
 using ColTypeAlias = execplan::CalpontSystemCatalog::ColType;
 namespace execplan
 {
@ -37,6 +38,8 @@ constexpr uint32_t MAXDECIMALWIDTH = 16U;
 constexpr uint8_t INT64MAXPRECISION = 18U;
 constexpr uint8_t INT128MAXPRECISION = 38U;
 constexpr uint8_t MAXLEGACYWIDTH = 8U;
 constexpr uint8_t MAXSCALEINC4AVG = 4U;
 constexpr int8_t IGNOREPRECISION = -1;
 const uint64_t mcs_pow_10[20] =
 {
@ -159,12 +162,13 @@ class Decimal
            @brief Convenience method to put decimal into a std::string.
        */
        static std::string toString(execplan::IDB_Decimal& value);
        static std::string toString(const execplan::IDB_Decimal& value);
        /**
            @brief The method detects whether decimal type is wide
            using csc data type.
        */
-        static constexpr inline bool isWideDecimalType(const execplan::CalpontSystemCatalog::ColType& ct)
+        static constexpr inline bool isWideDecimalType(const ColTypeAlias& ct)
        {
            return ((ct.colDataType == execplan::CalpontSystemCatalog::DECIMAL ||
                ct.colDataType == execplan::CalpontSystemCatalog::UDECIMAL) &&
@ -185,7 +189,7 @@ class Decimal
            @brief The method sets the legacy scale and precision of a wide decimal
            column which is the result of an arithmetic operation.
        */
-        static inline void setDecimalScalePrecisionLegacy(execplan::CalpontSystemCatalog::ColType& ct,
+        static inline void setDecimalScalePrecisionLegacy(ColTypeAlias& ct,
            unsigned int precision, unsigned int scale)
        {
            ct.scale = scale;
@ -200,7 +204,7 @@ class Decimal
            @brief The method sets the scale and precision of a wide decimal
            column which is the result of an arithmetic operation.
        */
-        static inline void setDecimalScalePrecision(execplan::CalpontSystemCatalog::ColType& ct,
+        static inline void setDecimalScalePrecision(ColTypeAlias& ct,
            unsigned int precision, unsigned int scale)
        {
            ct.colWidth = (precision > INT64MAXPRECISION)
@ -216,7 +220,7 @@ class Decimal
            @brief The method sets the scale and precision of a wide decimal
            column which is the result of an arithmetic operation, based on a heuristic.
        */
-        static inline void setDecimalScalePrecisionHeuristic(execplan::CalpontSystemCatalog::ColType& ct,
+        static inline void setDecimalScalePrecisionHeuristic(ColTypeAlias& ct,
            unsigned int precision, unsigned int scale)
        {
            unsigned int diff = 0;
@ -337,6 +341,19 @@ class Decimal
            return static_cast<int64_t>(value);
        }
        /**
            @brief MDB increases scale by up to 4 digits calculating avg()
        */
        static inline void setScalePrecision4Avg(
            unsigned int& precision,
            unsigned int& scale)
        {
            uint32_t scaleAvailable = INT128MAXPRECISION - scale;
            uint32_t precisionAvailable = INT128MAXPRECISION - precision;
            scale += (scaleAvailable >= MAXSCALEINC4AVG) ? MAXSCALEINC4AVG : scaleAvailable;
            precision += (precisionAvailable >= MAXSCALEINC4AVG) ? MAXSCALEINC4AVG : precisionAvailable;
        }
 };
 /**
--- a/dbcon/joblist/tupleaggregatestep.cpp
+++ b/dbcon/joblist/tupleaggregatestep.cpp
@ -343,6 +343,41 @@ string keyName(uint64_t i, uint32_t key, const joblist::JobInfo& jobInfo)
 namespace joblist
 {
 void wideDecimalOrLongDouble(const uint64_t colProj,
    const CalpontSystemCatalog::ColDataType type,
    const vector<uint32_t>& precisionProj,
    const vector<uint32_t>& oidsProj,
    const uint32_t aggKey,
    const vector<uint32_t>& scaleProj,
    const vector<uint32_t>& width,
    vector<uint32_t>& oidsAgg,
    vector<uint32_t>& keysAgg,
    vector<CalpontSystemCatalog::ColDataType>& typeAgg,
    vector<uint32_t>& scaleAgg,
    vector<uint32_t>& precisionAgg,
    vector<uint32_t>& widthAgg)
 {
    if ((type == CalpontSystemCatalog::DECIMAL
        || type == CalpontSystemCatalog::UDECIMAL)
        && datatypes::Decimal::isWideDecimalType(precisionProj[colProj]))
    {
        oidsAgg.push_back(oidsProj[colProj]);
        keysAgg.push_back(aggKey);
        typeAgg.push_back(type);
        scaleAgg.push_back(scaleProj[colProj]);
        precisionAgg.push_back(precisionProj[colProj]);
        widthAgg.push_back(width[colProj]);
    }
    else
    {
        oidsAgg.push_back(oidsProj[colProj]);
        keysAgg.push_back(aggKey);
        typeAgg.push_back(CalpontSystemCatalog::LONGDOUBLE);
        scaleAgg.push_back(0);
        precisionAgg.push_back(-1);
        widthAgg.push_back(sizeof(long double));
    }
 }
 TupleAggregateStep::TupleAggregateStep(
    const SP_ROWAGG_UM_t& agg,
@ -717,25 +752,47 @@ void TupleAggregateStep::configDeliveredRowGroup(const JobInfo& jobInfo)
    // correct the scale
    vector<uint32_t> scale = fRowGroupOut.getScale();
    vector<uint32_t> precision = fRowGroupOut.getPrecision();
-//    for (uint64_t i = 0; i < scale.size(); i++)
+    size_t retColCount = 0;
-//    {
+    auto scaleIter = scale.begin();
-        // to support CNX_DECIMAL_SCALE the avg column's scale is coded with two scales:
+    auto precisionIter = precision.begin();
        // fe's avg column scale << 8 + original column scale
        //if ((scale[i] & 0x0000FF00) > 0)
 //        scale[i] = scale[i] &  0x000000FF;
 //    }
    size_t retColCount = jobInfo.nonConstDelCols.size();
    if (jobInfo.havingStep)
    {
        retColCount = jobInfo.returnedColVec.size();
        idbassert(jobInfo.returnedColVec.size() == jobInfo.nonConstCols.size());
        for (auto& rc : jobInfo.nonConstCols)
        {
            auto& colType = rc->resultType();
            if (datatypes::Decimal::isWideDecimalType(colType))
            {
                *scaleIter = colType.scale;
                *precisionIter = colType.precision;
            }
            scaleIter++; precisionIter++;
        }
    }
    else
    {
        retColCount = jobInfo.nonConstDelCols.size();
        for (auto& rc : jobInfo.nonConstDelCols)
        {
            auto& colType = rc->resultType();
            if (datatypes::Decimal::isWideDecimalType(colType))
            {
                *scaleIter = colType.scale;
                *precisionIter = colType.precision;
            }
            scaleIter++; precisionIter++;
        }
    }
    vector<uint32_t>::const_iterator offsets0 = fRowGroupOut.getOffsets().begin();
    vector<CalpontSystemCatalog::ColDataType>::const_iterator types0 =
        fRowGroupOut.getColTypes().begin();
    vector<uint32_t> csNums = fRowGroupOut.getCharsetNumbers();
-    vector<uint32_t>::const_iterator precision0 = fRowGroupOut.getPrecision().begin();
+    vector<uint32_t>::const_iterator precision0 = precision.begin();
    fRowGroupDelivered = RowGroup(retColCount,
                                  vector<uint32_t>(offsets0, offsets0 + retColCount + 1),
                                  vector<uint32_t>(oids.begin(), oids.begin() + retColCount),
@ -896,7 +953,6 @@ SJSTEP TupleAggregateStep::prepAggregate(SJSTEP& step, JobInfo& jobInfo)
    // preprocess the columns used by group_concat
    jobInfo.groupConcatInfo.prepGroupConcat(jobInfo);
    bool doUMOnly = jobInfo.groupConcatInfo.columns().size() > 0
 //                 || jobInfo.windowSet.size() > 0
                 || sas
                 || ces;
@ -1303,14 +1359,11 @@ void TupleAggregateStep::prep1PhaseAggregate(
                    cerr << "prep1PhaseAggregate: " << emsg << endl;
                    throw IDBExcept(emsg, ERR_AGGREGATE_TYPE_NOT_SUPPORT);
                }
-
+                wideDecimalOrLongDouble(colProj, typeProj[colProj],
-                oidsAgg.push_back(oidsProj[colProj]);
+                    precisionProj, oidsProj, key, scaleProj, width,
-                keysAgg.push_back(key);
+                    oidsAgg, keysAgg, typeAgg, scaleAgg,
-                typeAgg.push_back(CalpontSystemCatalog::LONGDOUBLE);
+                    precisionAgg, widthAgg);
                csNumAgg.push_back(csNumProj[colProj]);
                precisionAgg.push_back(-1);
                widthAgg.push_back(sizeof(long double));
                scaleAgg.push_back(0);
            }
            break;
@ -1755,11 +1808,6 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(
                throw logic_error(emsg.str());
            }
            // skip sum / count(column) if avg is also selected
 //            if ((aggOp == ROWAGG_SUM || aggOp == ROWAGG_COUNT_COL_NAME) &&
 //                    (avgSet.find(aggKey) != avgSet.end()))
 //                continue;
            if (aggOp == ROWAGG_DISTINCT_SUM ||
                    aggOp == ROWAGG_DISTINCT_AVG ||
                    aggOp == ROWAGG_COUNT_DISTINCT_COL_NAME)
@ -3128,31 +3176,11 @@ void TupleAggregateStep::prep2PhasesAggregate(
                        cerr << "prep2PhasesAggregate: " << emsg << endl;
                        throw IDBExcept(emsg, ERR_AGGREGATE_TYPE_NOT_SUPPORT);
                    }
-
+                    wideDecimalOrLongDouble(colProj, typeProj[colProj],
-                    // WIP MCOL-641 Replace condition with a
+                        precisionProj, oidsProj, aggKey, scaleProj, width,
-                    // dynamic one
+                        oidsAggPm, keysAggPm, typeAggPm, scaleAggPm,
-                    if (typeProj[colProj] == CalpontSystemCatalog::DECIMAL
+                        precisionAggPm, widthAggPm);
-                        && width[colProj] == 16)
+                    scaleAggPm.push_back(0);
                    { 
                        oidsAggPm.push_back(oidsProj[colProj]);
                        keysAggPm.push_back(aggKey);
                        typeAggPm.push_back(CalpontSystemCatalog::DECIMAL);
                        scaleAggPm.push_back(0);
                        // WIP makes this dynamic
                        precisionAggPm.push_back(38);
                        widthAggPm.push_back(width[colProj]);
                        csNumAggPm.push_back(8);
                    }
                    else
                    {
                        oidsAggPm.push_back(oidsProj[colProj]);
                        keysAggPm.push_back(aggKey);
                        typeAggPm.push_back(CalpontSystemCatalog::LONGDOUBLE);
                        scaleAggPm.push_back(0);
                        csNumAggPm.push_back(8);
                        precisionAggPm.push_back(-1);
                        widthAggPm.push_back(sizeof(long double));
                    }
                    colAggPm++;
                }
@ -3435,13 +3463,11 @@ void TupleAggregateStep::prep2PhasesAggregate(
                        if (aggOp == ROWAGG_SUM)
                        {
-                            oidsAggUm.push_back(oidsAggPm[colPm]);
+                            wideDecimalOrLongDouble(colPm, typeProj[colPm],
-                            keysAggUm.push_back(retKey);
+                                precisionProj, oidsProj, retKey, scaleProj, widthAggPm,
-                            scaleAggUm.push_back(0);
+                                oidsAggUm, keysAggUm, typeAggUm, scaleAggUm,
-                            typeAggUm.push_back(CalpontSystemCatalog::LONGDOUBLE);
+                                precisionAggUm, widthAggUm);
                            csNumAggUm.push_back(8);
                            precisionAggUm.push_back(-1);
                            widthAggUm.push_back(sizeof(long double));
                        }
                        else
                        {
--- a/dbcon/mysql/ha_mcs_execplan.cpp
+++ b/dbcon/mysql/ha_mcs_execplan.cpp
@ -3621,7 +3621,8 @@ ArithmeticColumn* buildArithmeticColumn(
            unsigned int precision = idp->max_length;
            unsigned int scale = idp->decimals;
-            datatypes::Decimal::setDecimalScalePrecisionLegacy(mysql_type, precision, scale);
+            datatypes::Decimal::setDecimalScalePrecisionLegacy(mysql_type,
                precision, scale);
        }
        else
        {
@ -3638,7 +3639,8 @@ ArithmeticColumn* buildArithmeticColumn(
                int32_t scale2 = pt->right()->data()->resultType().scale;
                if (funcName == "/" &&
-                    (mysql_type.scale - (scale1 - scale2)) > datatypes::INT128MAXPRECISION)
+                    (mysql_type.scale - (scale1 - scale2)) >
                        datatypes::INT128MAXPRECISION)
                {
                    Item_decimal* idp = (Item_decimal*)item;
@ -4980,15 +4982,33 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi)
            // use the first parm for result type.
            parm = ac->aggParms()[0];
-            // WIP why do we use LONGDOUBLE for AVG?
+            bool isAvg = (isp->sum_func() == Item_sum::AVG_FUNC ||
-            if (isp->sum_func() == Item_sum::AVG_FUNC ||
+                    isp->sum_func() == Item_sum::AVG_DISTINCT_FUNC);
-                    isp->sum_func() == Item_sum::AVG_DISTINCT_FUNC)
+            if (isAvg || isp->sum_func() == Item_sum::SUM_FUNC ||
                     isp->sum_func() == Item_sum::SUM_DISTINCT_FUNC)
            {
                CalpontSystemCatalog::ColType ct = parm->resultType();
-                ct.colDataType = CalpontSystemCatalog::LONGDOUBLE;
+                if (datatypes::Decimal::isWideDecimalType(ct))
-                ct.colWidth = sizeof(long double);
+                {
-                ct.scale += 4;
+                    uint32_t precision = ct.precision;
-                ct.precision = -1;
+                    uint32_t scale = ct.scale;
                    if (isAvg)
                    {
                        datatypes::Decimal::setScalePrecision4Avg(precision, scale);
                    }
                    ct.precision = precision;
                    ct.scale = scale;
                }
                else
                {
                    ct.colDataType = CalpontSystemCatalog::LONGDOUBLE;
                    ct.colWidth = sizeof(long double);
                    if (isAvg)
                    {
                        ct.scale += datatypes::MAXSCALEINC4AVG;
                    }
                    ct.precision = datatypes::IGNOREPRECISION;
                }
                ac->resultType(ct);
            }
            else if (isp->sum_func() == Item_sum::COUNT_FUNC ||
@ -5000,25 +5020,6 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi)
                ct.scale = parm->resultType().scale;
                ac->resultType(ct);
            }
            else if (isp->sum_func() == Item_sum::SUM_FUNC ||
                     isp->sum_func() == Item_sum::SUM_DISTINCT_FUNC)
            {
                // WIP MCOL-641 This fast hack breaks aggregates for
                // all float DT's
                // UPD it doesn't break b/c actual DT for result type
                // is set during JobList creation.
                /*CalpontSystemCatalog::ColType ct = parm->resultType();
                ct.colDataType = CalpontSystemCatalog::LONGDOUBLE;
                ct.colWidth = sizeof(long double);
                ct.precision = -1;*/
                CalpontSystemCatalog::ColType ct = parm->resultType();
                ct.colDataType = CalpontSystemCatalog::DECIMAL;
                ct.colWidth = 16;
                ct.precision = 38;
                // WIP set the scale if argument is a float-based DT
                ct.scale = 0;
                ac->resultType(ct);
            }
            else if (isp->sum_func() == Item_sum::STD_FUNC ||
                     isp->sum_func() == Item_sum::VARIANCE_FUNC)
            {
@ -5058,7 +5059,11 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi)
        }
        // adjust decimal result type according to internalDecimalScale
-        if (gwi.internalDecimalScale >= 0 && ac->resultType().colDataType == CalpontSystemCatalog::DECIMAL)
+        bool isWideDecimal =
            datatypes::Decimal::isWideDecimalType(ac->resultType());
        // This must be also valid for UDECIMAL
        if (!isWideDecimal && gwi.internalDecimalScale >= 0
            && ac->resultType().colDataType == CalpontSystemCatalog::DECIMAL)
        {
            CalpontSystemCatalog::ColType ct = ac->resultType();
            ct.scale = gwi.internalDecimalScale;
--- a/dbcon/mysql/ha_mcs_partition.cpp
+++ b/dbcon/mysql/ha_mcs_partition.cpp
@ -1100,7 +1100,7 @@ extern "C"
                    mapit = partMap.find(logicalPartNum);
-                    int state;
+                    int state = CP_INVALID;
                    if (ct.colWidth <= 8)
                        state = em.getExtentMaxMin(iter->range.start, partInfo.max, partInfo.min, seqNum);
--- a/utils/joiner/tuplejoiner.cpp
+++ b/utils/joiner/tuplejoiner.cpp
@ -1127,7 +1127,7 @@ void TupleJoiner::updateCPData(const Row& r)
        }
        else
        {
-            int64_t val;
+            int64_t val = 0;
            if (r.getColType(colIdx) == CalpontSystemCatalog::LONGDOUBLE)
            {
                double dval = (double)roundl(r.getLongDoubleField(colIdx));
--- a/utils/rowgroup/rowaggregation.cpp
+++ b/utils/rowgroup/rowaggregation.cpp
@ -1,6 +1,6 @@
 /*
   Copyright (C) 2014 InfiniDB, Inc.
-   Copyright (c) 2019 MariaDB Corporation
+   Copyright (c) 2019-2020 MariaDB Corporation
   This program is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public License
@ -57,6 +57,8 @@
 //..comment out NDEBUG to enable assertions, uncomment NDEBUG to disable
 //#define NDEBUG
 #include "funcexp/utils_utf8.h"
 #include "mcs_decimal.h"
 using namespace std;
 using namespace boost;
@ -70,12 +72,16 @@ namespace
 const int64_t AGG_ROWGROUP_SIZE = 256;
 template <typename T>
-bool minMax(T d1, T d2, int type)
+inline bool minMax(T d1, T d2, int type)
 {
    if (type == rowgroup::ROWAGG_MIN) return d1 < d2;
    else                              return d1 > d2;
 }
 inline bool minMax(int128_t* d1, int128_t* d2, int type)
 {
    return (type == rowgroup::ROWAGG_MIN) ? *d1 < *d2 : *d1 > *d2;
 }
 inline int64_t getIntNullValue(int colType)
 {
@ -334,6 +340,16 @@ inline bool ExternalKeyEq::operator()(const RowPosition& pos1, const RowPosition
 static const string overflowMsg("Aggregation overflow.");
 inline void RowAggregation::updateIntMinMax(int128_t* val1, int128_t* val2, int64_t col, int func)
 {
    int32_t colOutOffset = fRow.getOffset(col);
    if (isNull(fRowGroupOut, fRow, col))
        fRow.setBinaryField_offset(val1, sizeof(int128_t), colOutOffset);
    else if (minMax(val1, val2, func))
        fRow.setBinaryField_offset(val1, sizeof(int128_t), colOutOffset);
 }
 inline void RowAggregation::updateIntMinMax(int64_t val1, int64_t val2, int64_t col, int func)
 {
    if (isNull(fRowGroupOut, fRow, col))
@ -1010,13 +1026,31 @@ void RowAggregation::initMapData(const Row& rowIn)
            case execplan::CalpontSystemCatalog::MEDINT:
            case execplan::CalpontSystemCatalog::INT:
            case execplan::CalpontSystemCatalog::BIGINT:
            case execplan::CalpontSystemCatalog::DECIMAL:
            case execplan::CalpontSystemCatalog::UDECIMAL:
            {
                fRow.setIntField(rowIn.getIntField(colIn), colOut);
                break;
            }
            case execplan::CalpontSystemCatalog::DECIMAL:
            case execplan::CalpontSystemCatalog::UDECIMAL:
            {
                if (LIKELY(fRow.getColumnWidth(colIn) == datatypes::MAXDECIMALWIDTH))
                {
                    uint32_t colOutOffset = fRow.getOffset(colOut);
                    fRow.setBinaryField_offset(
                        rowIn.getBinaryField<int128_t>(colIn),
                        sizeof(int128_t),
                        colOutOffset);
                }
                else
                {
                    fRow.setIntField(rowIn.getIntField(colIn), colOut);
                }
                break;
            }
            case execplan::CalpontSystemCatalog::UTINYINT:
            case execplan::CalpontSystemCatalog::USMALLINT:
            case execplan::CalpontSystemCatalog::UMEDINT:
@ -1113,8 +1147,6 @@ void RowAggregation::makeAggFieldsNull(Row& row)
                fFunctionCols[i]->fAggFunction == ROWAGG_GROUP_CONCAT ||
                fFunctionCols[i]->fAggFunction == ROWAGG_STATS)
        {
 //			done by memset
 //			row.setIntField(0, colOut);
            continue;
        }
@ -1160,18 +1192,18 @@ void RowAggregation::makeAggFieldsNull(Row& row)
            case execplan::CalpontSystemCatalog::UDECIMAL:
            {
                int colWidth = fRowGroupOut->getColumnWidth(colOut);
-                if (colWidth <= 8)
+                if (LIKELY(colWidth == datatypes::MAXDECIMALWIDTH))
                {
                    uint32_t offset = row.getOffset(colOut);
                    row.setBinaryField_offset(
                        const_cast<int128_t*>(&datatypes::Decimal128Null),
                        colWidth,
                        offset);
                }
                else if (colWidth == datatypes::MAXLEGACYWIDTH)
                {
                    row.setIntField(getUintNullValue(colDataType, colWidth), colOut);
                }
                else
                {
                    int128_t nullValue = 0;
                    utils::setWideDecimalNullValue(nullValue);
                    uint32_t offset = row.getOffset(colOut);
                    row.setBinaryField_offset(&nullValue, sizeof(nullValue),
                        offset);
                }
                break;
            }
@ -1183,7 +1215,7 @@ void RowAggregation::makeAggFieldsNull(Row& row)
            {
                int colWidth = fRowGroupOut->getColumnWidth(colOut);
-                if (colWidth <= 8)
+                if (colWidth <= datatypes::MAXLEGACYWIDTH)
                {
                    row.setUintField(getUintNullValue(colDataType, colWidth), colOut);
                }
@ -1256,8 +1288,6 @@ void RowAggregation::doMinMax(const Row& rowIn, int64_t colIn, int64_t colOut, i
        case execplan::CalpontSystemCatalog::MEDINT:
        case execplan::CalpontSystemCatalog::INT:
        case execplan::CalpontSystemCatalog::BIGINT:
        case execplan::CalpontSystemCatalog::DECIMAL:
        case execplan::CalpontSystemCatalog::UDECIMAL:
        {
            int64_t valIn = rowIn.getIntField(colIn);
            int64_t valOut = fRow.getIntField(colOut);
@ -1265,6 +1295,24 @@ void RowAggregation::doMinMax(const Row& rowIn, int64_t colIn, int64_t colOut, i
            break;
        }
        case execplan::CalpontSystemCatalog::DECIMAL:
        case execplan::CalpontSystemCatalog::UDECIMAL:
        {
            if (LIKELY(fRow.getColumnWidth(colIn) == datatypes::MAXDECIMALWIDTH))
            {
                updateIntMinMax(rowIn.getBinaryField<int128_t>(colIn),
                    fRow.getBinaryField<int128_t>(colOut),
                    colOut, funcType);
            }
            else
            {
                int64_t valIn = rowIn.getIntField(colIn);
                int64_t valOut = fRow.getIntField(colOut);
                updateIntMinMax(valIn, valOut, colOut, funcType);
            }
            break;
        }
        case execplan::CalpontSystemCatalog::UTINYINT:
        case execplan::CalpontSystemCatalog::USMALLINT:
        case execplan::CalpontSystemCatalog::UMEDINT:
@ -1340,17 +1388,12 @@ void RowAggregation::doMinMax(const Row& rowIn, int64_t colIn, int64_t colOut, i
 // Note: NULL value check must be done on UM & PM
 //       UM may receive NULL values, too.
 //------------------------------------------------------------------------------
 // WIP MCOL-641. This and other methods must be type based to avoid needless mem
 // allocation for wide DTs
 void RowAggregation::doSum(const Row& rowIn, int64_t colIn, int64_t colOut, int funcType)
 {
    int colDataType = (fRowGroupIn.getColTypes())[colIn];
    long double valIn = 0;
    bool isWideDataType = false;
-    void *wideValInPtr = NULL;
+    void *wideValInPtr = nullptr;
    // WIP MCOL-641 Probably the width must be taken
    // from colOut
    uint32_t width = fRowGroupOut->getColumnWidth(colOut);
    if (isNull(&fRowGroupIn, rowIn, colIn) == true)
        return;
@ -1380,9 +1423,14 @@ void RowAggregation::doSum(const Row& rowIn, int64_t colIn, int64_t colOut, int
        case execplan::CalpontSystemCatalog::DECIMAL:
        case execplan::CalpontSystemCatalog::UDECIMAL:
        {
-            // WIP MCOL-641 make the size dynamic and use branch prediction cond
+            uint32_t width = fRowGroupOut->getColumnWidth(colOut);
-            isWideDataType = (width) > 8 ? true : false;
+            isWideDataType = width == datatypes::MAXDECIMALWIDTH;
-            if (!isWideDataType)
+            if(LIKELY(isWideDataType))
            {
                int128_t *dec = rowIn.getBinaryField<int128_t>(colIn);
                wideValInPtr = reinterpret_cast<void*>(dec);
            }
            else
            {
                valIn = rowIn.getIntField(colIn);
                double scale = (double)(fRowGroupIn.getScale())[colIn];
@ -1391,19 +1439,6 @@ void RowAggregation::doSum(const Row& rowIn, int64_t colIn, int64_t colOut, int
                    valIn /= pow(10.0, scale);
                }
            }
            else
            {
                if (colDataType == execplan::CalpontSystemCatalog::DECIMAL)
                {
                    int128_t *dec = rowIn.getBinaryField<int128_t>(colIn);
                    wideValInPtr = reinterpret_cast<void*>(dec);
                }
                else
                {
                    uint128_t *dec = rowIn.getBinaryField<uint128_t>(colIn);
                    wideValInPtr = reinterpret_cast<void*>(dec);
                }
            }
            break;
        }
@ -1455,49 +1490,31 @@ void RowAggregation::doSum(const Row& rowIn, int64_t colIn, int64_t colOut, int
            break;
        }
    }
-    // WIP MCOL-641
+    if (LIKELY(!isWideDataType))
    if (!isWideDataType)
    {
-        if (isNull(fRowGroupOut, fRow, colOut))
+        if (LIKELY(!isNull(fRowGroupOut, fRow, colOut)))
        {
            fRow.setLongDoubleField(valIn, colOut);
        }
        else
        {
            long double valOut = fRow.getLongDoubleField(colOut);
            fRow.setLongDoubleField(valIn+valOut, colOut);
        }
        else
        {
            fRow.setLongDoubleField(valIn, colOut);
        }
    }
    else
    {
        uint32_t offset = fRow.getOffset(colOut);
-        if (colDataType == execplan::CalpontSystemCatalog::DECIMAL)
+        int128_t* dec = reinterpret_cast<int128_t*>(wideValInPtr);
        if (LIKELY(!isNull(fRowGroupOut, fRow, colOut)))
        {
-            int128_t *dec = reinterpret_cast<int128_t*>(wideValInPtr);
+            int128_t *valOutPtr = fRow.getBinaryField(valOutPtr, colOut);
-            if (isNull(fRowGroupOut, fRow, colOut))
+            int128_t sum = *valOutPtr + *dec;
-            {
+            fRow.setBinaryField_offset(&sum, sizeof(sum), offset);
                fRow.setBinaryField_offset(dec, sizeof(*dec), offset);
            }
            else
            {
                int128_t *valOutPtr = fRow.getBinaryField(valOutPtr, colOut);
                int128_t sum = *valOutPtr + *dec; 
                fRow.setBinaryField_offset(&sum, sizeof(sum), offset);
            }
        }
        else
        {
-            uint128_t *dec = reinterpret_cast<uint128_t*>(wideValInPtr);
+            fRow.setBinaryField_offset(dec, sizeof(*dec), offset);
            if (isNull(fRowGroupOut, fRow, colOut))
            {
                fRow.setBinaryField_offset(dec, sizeof(*dec), offset);
            }
            else
            {
                uint128_t *valOutPtr = fRow.getBinaryField(valOutPtr, colOut);
                uint128_t sum = *valOutPtr + *dec; 
                fRow.setBinaryField_offset(&sum, sizeof(sum), offset);
            }
        }
    } // end-of isWideDataType block
 }
@ -1791,7 +1808,7 @@ void RowAggregation::updateEntry(const Row& rowIn)
            case ROWAGG_AVG:
                // count(column) for average is inserted after the sum,
-                // colOut+1 is the position of the count column.
+                // colOut+1 is the position of the aux count column.
                doAvg(rowIn, colIn, colOut, colOut + 1);
                break;
@ -1851,6 +1868,8 @@ void RowAggregation::doAvg(const Row& rowIn, int64_t colIn, int64_t colOut, int6
    int colDataType = (fRowGroupIn.getColTypes())[colIn];
    long double valIn = 0;
    long double valOut = fRow.getLongDoubleField(colOut);
    bool isWideDataType = false;
    void *wideValInPtr = nullptr;
    switch (colDataType)
    {
@ -1862,7 +1881,6 @@ void RowAggregation::doAvg(const Row& rowIn, int64_t colIn, int64_t colOut, int6
        {
            valIn = rowIn.getIntField(colIn);
            break;
            break;
        }
        case execplan::CalpontSystemCatalog::UTINYINT:
@ -1878,11 +1896,21 @@ void RowAggregation::doAvg(const Row& rowIn, int64_t colIn, int64_t colOut, int6
        case execplan::CalpontSystemCatalog::DECIMAL:
        case execplan::CalpontSystemCatalog::UDECIMAL:
        {
-            valIn = rowIn.getIntField(colIn);
+            uint32_t width = fRowGroupOut->getColumnWidth(colOut);
-            double scale = (double)(fRowGroupIn.getScale())[colIn];
+            isWideDataType = width == datatypes::MAXDECIMALWIDTH;
-            if (valIn != 0 && scale > 0)
+            if(LIKELY(isWideDataType))
            {
-                valIn /= pow(10.0, scale);
+                int128_t* dec = rowIn.getBinaryField<int128_t>(colIn);
                wideValInPtr = reinterpret_cast<void*>(dec);
            }
            else
            {
                valIn = rowIn.getIntField(colIn);
                double scale = (double)(fRowGroupIn.getScale())[colIn];
                if (valIn != 0 && scale > 0)
                {
                    valIn /= pow(10.0, scale);
                }
            }
            break;
        }
@ -1917,16 +1945,32 @@ void RowAggregation::doAvg(const Row& rowIn, int64_t colIn, int64_t colOut, int6
        }
    }
-    if (fRow.getUintField(colAux) == 0)
+    // min(count) = 0
    uint64_t count = fRow.getUintField(colAux) + 1;
    fRow.setUintField<8>(count, colAux);
    bool notFirstValue = count > 1;
    if (LIKELY(!isWideDataType))
    {
-        // This is the first value
+        if (LIKELY(notFirstValue))
-        fRow.setLongDoubleField(valIn, colOut);
+            fRow.setLongDoubleField(valIn + valOut, colOut);
-        fRow.setUintField(1, colAux);
+        else // This is the first value
            fRow.setLongDoubleField(valIn, colOut);
    }
    else
    {
-        fRow.setLongDoubleField(valIn + valOut, colOut);
+        uint32_t offset = fRow.getOffset(colOut);
-        fRow.setUintField(fRow.getUintField(colAux) + 1, colAux);
+        int128_t* dec = reinterpret_cast<int128_t*>(wideValInPtr);
        if (LIKELY(notFirstValue))
        {
            int128_t *valOutPtr = fRow.getBinaryField(valOutPtr, colOut);
            int128_t sum = *valOutPtr + *dec;
            fRow.setBinaryField_offset(&sum, sizeof(sum), offset);
        }
        else
        {
            fRow.setBinaryField_offset(dec, sizeof(*dec), offset);
        }
    }
 }
@ -2632,11 +2676,6 @@ void RowAggregationUM::calculateAvgColumns()
            int64_t colOut = fFunctionCols[i]->fOutputColumnIndex;
            int64_t colAux = fFunctionCols[i]->fAuxColumnIndex;
 //            int scale = fRowGroupOut->getScale()[colOut];
 //            int scale1 = scale >> 8;
 //            int scale2 = scale & 0x000000FF;
 //            long double factor = pow(10.0, scale2 - scale1);
            for (uint64_t j = 0; j < fRowGroupOut->getRowCount(); j++)
            {
                fRowGroupOut->getRow(j, &fRow);
@ -2645,14 +2684,38 @@ void RowAggregationUM::calculateAvgColumns()
                if (cnt == 0) // empty set, value is initialized to null.
                    continue;
-                long double sum = 0.0;
+                uint32_t precision = fRow.getPrecision(colOut);
-                long double avg = 0.0;
+                bool isWideDecimal =
                    datatypes::Decimal::isWideDecimalType(precision);
-                // MCOL-1822 Always long double
+                if (LIKELY(!isWideDecimal))
-                sum = fRow.getLongDoubleField(colOut);
+                {
-                avg = sum / cnt;
+                    long double sum = 0.0;
-//                avg *= factor;
+                    long double avg = 0.0;
-                fRow.setLongDoubleField(avg, colOut);
+                    sum = fRow.getLongDoubleField(colOut);
                    avg = sum / cnt;
                    fRow.setLongDoubleField(avg, colOut);
                }
                else
                {
                    uint32_t offset = fRow.getOffset(colOut);
                    uint32_t scale = fRow.getScale(colOut);
                    // Get multiplied to deliver AVG with the scale closest
                    // to the expected original scale + 4. 
                    // There is a counterpart in buildAggregateColumn.
                    datatypes::Decimal::setScalePrecision4Avg(precision, scale);
                    int128_t* sumPnt = fRow.getBinaryField_offset<int128_t>(offset);
                    uint32_t scaleDiff = scale - fRow.getScale(colOut);
                    // multiplication overflow check
                    datatypes::MultiplicationOverflowCheck multOp;
                    int128_t sum = 0;
                    if (scaleDiff > 0)
                        multOp(*sumPnt, datatypes::mcs_pow_10[scaleDiff], sum);
                    else
                        sum = *sumPnt;
                    int128_t avg = sum / cnt;
                    fRow.setBinaryField_offset(&avg, sizeof(avg), offset);
                }
            }
        }
    }
@ -4174,6 +4237,8 @@ void RowAggregationUMP2::doAvg(const Row& rowIn, int64_t colIn, int64_t colOut,
    int colDataType = (fRowGroupIn.getColTypes())[colIn];
    long double valIn = 0;
    long double valOut = fRow.getLongDoubleField(colOut);
    bool isWideDataType = false;
    void *wideValInPtr = nullptr;
    switch (colDataType)
    {
@ -4200,12 +4265,21 @@ void RowAggregationUMP2::doAvg(const Row& rowIn, int64_t colIn, int64_t colOut,
        case execplan::CalpontSystemCatalog::DECIMAL:
        case execplan::CalpontSystemCatalog::UDECIMAL:
        {
-            valIn = rowIn.getIntField(colIn);
+            uint32_t width = fRowGroupOut->getColumnWidth(colOut);
-            break;
+            isWideDataType = width == datatypes::MAXDECIMALWIDTH;
-            double scale = (double)(fRowGroupIn.getScale())[colIn];
+            if(LIKELY(isWideDataType))
            if (valIn != 0 && scale > 0)
            {
-                valIn /= pow(10.0, scale);
+                int128_t* dec = rowIn.getBinaryField<int128_t>(colIn);
                wideValInPtr = reinterpret_cast<void*>(dec);
            }
            else
            {
                valIn = rowIn.getIntField(colIn);
                double scale = (double)(fRowGroupIn.getScale())[colIn];
                if (valIn != 0 && scale > 0)
                {
                    valIn /= pow(10.0, scale);
                }
            }
            break;
        }
@ -4240,16 +4314,36 @@ void RowAggregationUMP2::doAvg(const Row& rowIn, int64_t colIn, int64_t colOut,
        }
    }
-    int64_t cnt = fRow.getUintField(colAux);
+    uint64_t cnt = fRow.getUintField(colAux);
-    if (cnt == 0)
+    if (LIKELY(!isWideDataType))
    {
-        fRow.setLongDoubleField(valIn, colOut);
+        if (LIKELY(cnt > 0))
-        fRow.setUintField(rowIn.getUintField(colIn + 1), colAux);
+        {
            fRow.setLongDoubleField(valIn + valOut, colOut);
            fRow.setUintField(rowIn.getUintField(colIn + 1) + cnt, colAux);
        }
        else
        {
            fRow.setLongDoubleField(valIn, colOut);
            fRow.setUintField(rowIn.getUintField(colIn + 1), colAux);
        }
    }
    else
    {
-        fRow.setLongDoubleField(valIn + valOut, colOut);
+        uint32_t offset = fRow.getOffset(colOut);
-        fRow.setUintField(rowIn.getUintField(colIn + 1) + cnt, colAux);
+        int128_t* dec = reinterpret_cast<int128_t*>(wideValInPtr);
        if (LIKELY(cnt > 0))
        {
            int128_t *valOutPtr = fRow.getBinaryField(valOutPtr, colOut);
            int128_t sum = *valOutPtr + *dec;
            fRow.setBinaryField_offset(&sum, sizeof(sum), offset);
            fRow.setUintField(rowIn.getUintField(colIn + 1) + cnt, colAux);
        }
        else
        {
            fRow.setBinaryField_offset(dec, sizeof(*dec), offset);
            fRow.setUintField(rowIn.getUintField(colIn + 1), colAux);
        }
    }
 }
--- a/utils/rowgroup/rowaggregation.h
+++ b/utils/rowgroup/rowaggregation.h
@ -653,6 +653,7 @@ protected:
        copyRow(fNullRow, &row);
    }
    inline void updateIntMinMax(int128_t* val1, int128_t* val2, int64_t col, int func);
    inline void updateIntMinMax(int64_t val1, int64_t val2, int64_t col, int func);
    inline void updateUintMinMax(uint64_t val1, uint64_t val2, int64_t col, int func);
    inline void updateCharMinMax(uint64_t val1, uint64_t val2, int64_t col, int func);
--- a/utils/rowgroup/rowgroup.h
+++ b/utils/rowgroup/rowgroup.h
@ -810,25 +810,16 @@ inline uint32_t Row::getStringLength(uint32_t colIndex) const
    return strnlen((char*) &data[offsets[colIndex]], getColumnWidth(colIndex));
 }
 // WIP Remove this
 // Check whether memcpy affects perf here
 /*inline void Row::setBinaryField(const uint8_t* strdata, uint32_t length, uint32_t offset)
 {
    memcpy(&data[offset], strdata, length);
 }*/
 // MCOL-641. This method can be applied to uint8_t* buffers.
 template<typename T>
 inline void Row::setBinaryField(T* value, uint32_t width, uint32_t colIndex)
 {
    memcpy(&data[offsets[colIndex]], value, width);
 }
-// MCOL-641. This method !cannot! be applied to uint8_t* buffers.
+// This method !cannot! be applied to uint8_t* buffers.
 template<typename T>
 inline void Row::setBinaryField_offset(T* value, uint32_t width, uint32_t offset)
 {
    // WIP Compare performance.
    *reinterpret_cast<T*>(&data[offset]) = *value;
 }
@ -871,23 +862,15 @@ inline std::string Row::getStringField(uint32_t colIndex) const
                       strnlen((char*) &data[offsets[colIndex]], getColumnWidth(colIndex)));
 }
 /*inline std::string Row::getBinaryField(uint32_t colIndex) const
 {
    return std::string((char*) &data[offsets[colIndex]], getColumnWidth(colIndex));
 }*/
 // WIP MCOL-641
 template <typename T>
 inline T* Row::getBinaryField(uint32_t colIndex) const
 {
    //return reinterpret_cast<T*>(&data[offsets[colIndex]]);
    return getBinaryField_offset<T>(offsets[colIndex]);
 }
 template <typename T>
 inline T* Row::getBinaryField(T* argtype, uint32_t colIndex) const
 {
    //return reinterpret_cast<T*>(&data[offsets[colIndex]]);
    return getBinaryField_offset<T>(offsets[colIndex]);
 }