diff --git a/datatypes/mcs_decimal.cpp b/datatypes/mcs_decimal.cpp
index 325f67721..c5b61b5fa 100644
--- a/datatypes/mcs_decimal.cpp
+++ b/datatypes/mcs_decimal.cpp
@@ -203,6 +203,11 @@ namespace datatypes
         return std::string(buf);
     }
 
+    std::string Decimal::toString(const execplan::IDB_Decimal& value)
+    {
+        return toString(const_cast<execplan::IDB_Decimal&>(value));
+    }
+
     int Decimal::compare(const execplan::IDB_Decimal& l, const execplan::IDB_Decimal& r)
     {
         int128_t divisorL, divisorR;
diff --git a/datatypes/mcs_decimal.h b/datatypes/mcs_decimal.h
index cf70a357d..346dd2dda 100644
--- a/datatypes/mcs_decimal.h
+++ b/datatypes/mcs_decimal.h
@@ -24,6 +24,7 @@
 #include "calpontsystemcatalog.h"
 
 using int128_t = __int128;
+using ColTypeAlias = execplan::CalpontSystemCatalog::ColType;
 
 namespace execplan
 {
@@ -37,6 +38,8 @@ constexpr uint32_t MAXDECIMALWIDTH = 16U;
 constexpr uint8_t INT64MAXPRECISION = 18U;
 constexpr uint8_t INT128MAXPRECISION = 38U;
 constexpr uint8_t MAXLEGACYWIDTH = 8U;
+constexpr uint8_t MAXSCALEINC4AVG = 4U;
+constexpr int8_t IGNOREPRECISION = -1;
 
 const uint64_t mcs_pow_10[20] =
 {
@@ -159,12 +162,13 @@ class Decimal
             @brief Convenience method to put decimal into a std::string.
         */
         static std::string toString(execplan::IDB_Decimal& value);
+        static std::string toString(const execplan::IDB_Decimal& value);
 
         /**
             @brief The method detects whether decimal type is wide
             using csc data type.
         */
-        static constexpr inline bool isWideDecimalType(const execplan::CalpontSystemCatalog::ColType& ct)
+        static constexpr inline bool isWideDecimalType(const ColTypeAlias& ct)
         {
             return ((ct.colDataType == execplan::CalpontSystemCatalog::DECIMAL ||
                 ct.colDataType == execplan::CalpontSystemCatalog::UDECIMAL) &&
@@ -185,7 +189,7 @@ class Decimal
             @brief The method sets the legacy scale and precision of a wide decimal
             column which is the result of an arithmetic operation.
         */
-        static inline void setDecimalScalePrecisionLegacy(execplan::CalpontSystemCatalog::ColType& ct,
+        static inline void setDecimalScalePrecisionLegacy(ColTypeAlias& ct,
             unsigned int precision, unsigned int scale)
         {
             ct.scale = scale;
@@ -200,7 +204,7 @@ class Decimal
             @brief The method sets the scale and precision of a wide decimal
             column which is the result of an arithmetic operation.
         */
-        static inline void setDecimalScalePrecision(execplan::CalpontSystemCatalog::ColType& ct,
+        static inline void setDecimalScalePrecision(ColTypeAlias& ct,
             unsigned int precision, unsigned int scale)
         {
             ct.colWidth = (precision > INT64MAXPRECISION)
@@ -216,7 +220,7 @@ class Decimal
             @brief The method sets the scale and precision of a wide decimal
             column which is the result of an arithmetic operation, based on a heuristic.
         */
-        static inline void setDecimalScalePrecisionHeuristic(execplan::CalpontSystemCatalog::ColType& ct,
+        static inline void setDecimalScalePrecisionHeuristic(ColTypeAlias& ct,
             unsigned int precision, unsigned int scale)
         {
             unsigned int diff = 0;
@@ -337,6 +341,19 @@ class Decimal
 
             return static_cast<int64_t>(value);
         }
+
+        /**
+            @brief MDB increases scale by up to 4 digits calculating avg()
+        */
+        static inline void setScalePrecision4Avg(
+            unsigned int& precision,
+            unsigned int& scale)
+        {
+            uint32_t scaleAvailable = INT128MAXPRECISION - scale;
+            uint32_t precisionAvailable = INT128MAXPRECISION - precision;
+            scale += (scaleAvailable >= MAXSCALEINC4AVG) ? MAXSCALEINC4AVG : scaleAvailable;
+            precision += (precisionAvailable >= MAXSCALEINC4AVG) ? MAXSCALEINC4AVG : precisionAvailable;
+        }
 };
 
 /**
diff --git a/dbcon/joblist/tupleaggregatestep.cpp b/dbcon/joblist/tupleaggregatestep.cpp
index 789932ff7..6a0b0fe11 100644
--- a/dbcon/joblist/tupleaggregatestep.cpp
+++ b/dbcon/joblist/tupleaggregatestep.cpp
@@ -343,6 +343,41 @@ string keyName(uint64_t i, uint32_t key, const joblist::JobInfo& jobInfo)
 namespace joblist
 {
 
+void wideDecimalOrLongDouble(const uint64_t colProj,
+    const CalpontSystemCatalog::ColDataType type,
+    const vector<uint32_t>& precisionProj,
+    const vector<uint32_t>& oidsProj,
+    const uint32_t aggKey,
+    const vector<uint32_t>& scaleProj,
+    const vector<uint32_t>& width,
+    vector<uint32_t>& oidsAgg,
+    vector<uint32_t>& keysAgg,
+    vector<CalpontSystemCatalog::ColDataType>& typeAgg,
+    vector<uint32_t>& scaleAgg,
+    vector<uint32_t>& precisionAgg,
+    vector<uint32_t>& widthAgg)
+{
+    if ((type == CalpontSystemCatalog::DECIMAL
+        || type == CalpontSystemCatalog::UDECIMAL)
+        && datatypes::Decimal::isWideDecimalType(precisionProj[colProj]))
+    {
+        oidsAgg.push_back(oidsProj[colProj]);
+        keysAgg.push_back(aggKey);
+        typeAgg.push_back(type);
+        scaleAgg.push_back(scaleProj[colProj]);
+        precisionAgg.push_back(precisionProj[colProj]);
+        widthAgg.push_back(width[colProj]);
+    }
+    else
+    {
+        oidsAgg.push_back(oidsProj[colProj]);
+        keysAgg.push_back(aggKey);
+        typeAgg.push_back(CalpontSystemCatalog::LONGDOUBLE);
+        scaleAgg.push_back(0);
+        precisionAgg.push_back(-1);
+        widthAgg.push_back(sizeof(long double));
+    }
+}
 
 TupleAggregateStep::TupleAggregateStep(
     const SP_ROWAGG_UM_t& agg,
@@ -717,25 +752,47 @@ void TupleAggregateStep::configDeliveredRowGroup(const JobInfo& jobInfo)
 
     // correct the scale
     vector<uint32_t> scale = fRowGroupOut.getScale();
+    vector<uint32_t> precision = fRowGroupOut.getPrecision();
 
-//    for (uint64_t i = 0; i < scale.size(); i++)
-//    {
-        // to support CNX_DECIMAL_SCALE the avg column's scale is coded with two scales:
-        // fe's avg column scale << 8 + original column scale
-        //if ((scale[i] & 0x0000FF00) > 0)
-//        scale[i] = scale[i] &  0x000000FF;
-//    }
-
-    size_t retColCount = jobInfo.nonConstDelCols.size();
+    size_t retColCount = 0;
+    auto scaleIter = scale.begin();
+    auto precisionIter = precision.begin();
 
     if (jobInfo.havingStep)
+    {
         retColCount = jobInfo.returnedColVec.size();
+        idbassert(jobInfo.returnedColVec.size() == jobInfo.nonConstCols.size());
+        for (auto& rc : jobInfo.nonConstCols)
+        {
+            auto& colType = rc->resultType();
+            if (datatypes::Decimal::isWideDecimalType(colType))
+            {
+                *scaleIter = colType.scale;
+                *precisionIter = colType.precision;
+            }
+            scaleIter++; precisionIter++;
+        }
+    }
+    else
+    {
+        retColCount = jobInfo.nonConstDelCols.size();
+        for (auto& rc : jobInfo.nonConstDelCols)
+        {
+            auto& colType = rc->resultType();
+            if (datatypes::Decimal::isWideDecimalType(colType))
+            {
+                *scaleIter = colType.scale;
+                *precisionIter = colType.precision;
+            }
+            scaleIter++; precisionIter++;
+        }
+    }
 
     vector<uint32_t>::const_iterator offsets0 = fRowGroupOut.getOffsets().begin();
     vector<CalpontSystemCatalog::ColDataType>::const_iterator types0 =
         fRowGroupOut.getColTypes().begin();
     vector<uint32_t> csNums = fRowGroupOut.getCharsetNumbers();
-    vector<uint32_t>::const_iterator precision0 = fRowGroupOut.getPrecision().begin();
+    vector<uint32_t>::const_iterator precision0 = precision.begin();
     fRowGroupDelivered = RowGroup(retColCount,
                                   vector<uint32_t>(offsets0, offsets0 + retColCount + 1),
                                   vector<uint32_t>(oids.begin(), oids.begin() + retColCount),
@@ -896,7 +953,6 @@ SJSTEP TupleAggregateStep::prepAggregate(SJSTEP& step, JobInfo& jobInfo)
     // preprocess the columns used by group_concat
     jobInfo.groupConcatInfo.prepGroupConcat(jobInfo);
     bool doUMOnly = jobInfo.groupConcatInfo.columns().size() > 0
-//                 || jobInfo.windowSet.size() > 0
                  || sas
                  || ces;
 
@@ -1303,14 +1359,11 @@ void TupleAggregateStep::prep1PhaseAggregate(
                     cerr << "prep1PhaseAggregate: " << emsg << endl;
                     throw IDBExcept(emsg, ERR_AGGREGATE_TYPE_NOT_SUPPORT);
                 }
-
-                oidsAgg.push_back(oidsProj[colProj]);
-                keysAgg.push_back(key);
-                typeAgg.push_back(CalpontSystemCatalog::LONGDOUBLE);
+                wideDecimalOrLongDouble(colProj, typeProj[colProj],
+                    precisionProj, oidsProj, key, scaleProj, width,
+                    oidsAgg, keysAgg, typeAgg, scaleAgg,
+                    precisionAgg, widthAgg);
                 csNumAgg.push_back(csNumProj[colProj]);
-                precisionAgg.push_back(-1);
-                widthAgg.push_back(sizeof(long double));
-                scaleAgg.push_back(0);
             }
             break;
 
@@ -1755,11 +1808,6 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(
                 throw logic_error(emsg.str());
             }
 
-            // skip sum / count(column) if avg is also selected
-//            if ((aggOp == ROWAGG_SUM || aggOp == ROWAGG_COUNT_COL_NAME) &&
-//                    (avgSet.find(aggKey) != avgSet.end()))
-//                continue;
-
             if (aggOp == ROWAGG_DISTINCT_SUM ||
                     aggOp == ROWAGG_DISTINCT_AVG ||
                     aggOp == ROWAGG_COUNT_DISTINCT_COL_NAME)
@@ -3128,31 +3176,11 @@ void TupleAggregateStep::prep2PhasesAggregate(
                         cerr << "prep2PhasesAggregate: " << emsg << endl;
                         throw IDBExcept(emsg, ERR_AGGREGATE_TYPE_NOT_SUPPORT);
                     }
-
-                    // WIP MCOL-641 Replace condition with a
-                    // dynamic one
-                    if (typeProj[colProj] == CalpontSystemCatalog::DECIMAL
-                        && width[colProj] == 16)
-                    { 
-                        oidsAggPm.push_back(oidsProj[colProj]);
-                        keysAggPm.push_back(aggKey);
-                        typeAggPm.push_back(CalpontSystemCatalog::DECIMAL);
-                        scaleAggPm.push_back(0);
-                        // WIP makes this dynamic
-                        precisionAggPm.push_back(38);
-                        widthAggPm.push_back(width[colProj]);
-                        csNumAggPm.push_back(8);
-                    }
-                    else
-                    {
-                        oidsAggPm.push_back(oidsProj[colProj]);
-                        keysAggPm.push_back(aggKey);
-                        typeAggPm.push_back(CalpontSystemCatalog::LONGDOUBLE);
-                        scaleAggPm.push_back(0);
-                        csNumAggPm.push_back(8);
-                        precisionAggPm.push_back(-1);
-                        widthAggPm.push_back(sizeof(long double));
-                    }
+                    wideDecimalOrLongDouble(colProj, typeProj[colProj],
+                        precisionProj, oidsProj, aggKey, scaleProj, width,
+                        oidsAggPm, keysAggPm, typeAggPm, scaleAggPm,
+                        precisionAggPm, widthAggPm);
+                    scaleAggPm.push_back(0);
                     colAggPm++;
                 }
 
@@ -3435,13 +3463,11 @@ void TupleAggregateStep::prep2PhasesAggregate(
 
                         if (aggOp == ROWAGG_SUM)
                         {
-                            oidsAggUm.push_back(oidsAggPm[colPm]);
-                            keysAggUm.push_back(retKey);
-                            scaleAggUm.push_back(0);
-                            typeAggUm.push_back(CalpontSystemCatalog::LONGDOUBLE);
+                            wideDecimalOrLongDouble(colPm, typeProj[colPm],
+                                precisionProj, oidsProj, retKey, scaleProj, widthAggPm,
+                                oidsAggUm, keysAggUm, typeAggUm, scaleAggUm,
+                                precisionAggUm, widthAggUm);
                             csNumAggUm.push_back(8);
-                            precisionAggUm.push_back(-1);
-                            widthAggUm.push_back(sizeof(long double));
                         }
                         else
                         {
diff --git a/dbcon/mysql/ha_mcs_execplan.cpp b/dbcon/mysql/ha_mcs_execplan.cpp
index a3a72428f..6bc80e42c 100755
--- a/dbcon/mysql/ha_mcs_execplan.cpp
+++ b/dbcon/mysql/ha_mcs_execplan.cpp
@@ -3621,7 +3621,8 @@ ArithmeticColumn* buildArithmeticColumn(
             unsigned int precision = idp->max_length;
             unsigned int scale = idp->decimals;
 
-            datatypes::Decimal::setDecimalScalePrecisionLegacy(mysql_type, precision, scale);
+            datatypes::Decimal::setDecimalScalePrecisionLegacy(mysql_type,
+                precision, scale);
         }
         else
         {
@@ -3638,7 +3639,8 @@ ArithmeticColumn* buildArithmeticColumn(
                 int32_t scale2 = pt->right()->data()->resultType().scale;
 
                 if (funcName == "/" &&
-                    (mysql_type.scale - (scale1 - scale2)) > datatypes::INT128MAXPRECISION)
+                    (mysql_type.scale - (scale1 - scale2)) >
+                        datatypes::INT128MAXPRECISION)
                 {
                     Item_decimal* idp = (Item_decimal*)item;
 
@@ -4980,15 +4982,33 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi)
             // use the first parm for result type.
             parm = ac->aggParms()[0];
 
-            // WIP why do we use LONGDOUBLE for AVG?
-            if (isp->sum_func() == Item_sum::AVG_FUNC ||
-                    isp->sum_func() == Item_sum::AVG_DISTINCT_FUNC)
+            bool isAvg = (isp->sum_func() == Item_sum::AVG_FUNC ||
+                    isp->sum_func() == Item_sum::AVG_DISTINCT_FUNC);
+            if (isAvg || isp->sum_func() == Item_sum::SUM_FUNC ||
+                     isp->sum_func() == Item_sum::SUM_DISTINCT_FUNC)
             {
                 CalpontSystemCatalog::ColType ct = parm->resultType();
-                ct.colDataType = CalpontSystemCatalog::LONGDOUBLE;
-                ct.colWidth = sizeof(long double);
-                ct.scale += 4;
-                ct.precision = -1;
+                if (datatypes::Decimal::isWideDecimalType(ct))
+                {
+                    uint32_t precision = ct.precision;
+                    uint32_t scale = ct.scale;
+                    if (isAvg)
+                    {
+                        datatypes::Decimal::setScalePrecision4Avg(precision, scale);
+                    }
+                    ct.precision = precision;
+                    ct.scale = scale;
+                }
+                else
+                {
+                    ct.colDataType = CalpontSystemCatalog::LONGDOUBLE;
+                    ct.colWidth = sizeof(long double);
+                    if (isAvg)
+                    {
+                        ct.scale += datatypes::MAXSCALEINC4AVG;
+                    }
+                    ct.precision = datatypes::IGNOREPRECISION;
+                }
                 ac->resultType(ct);
             }
             else if (isp->sum_func() == Item_sum::COUNT_FUNC ||
@@ -5000,25 +5020,6 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi)
                 ct.scale = parm->resultType().scale;
                 ac->resultType(ct);
             }
-            else if (isp->sum_func() == Item_sum::SUM_FUNC ||
-                     isp->sum_func() == Item_sum::SUM_DISTINCT_FUNC)
-            {
-                // WIP MCOL-641 This fast hack breaks aggregates for
-                // all float DT's
-                // UPD it doesn't break b/c actual DT for result type
-                // is set during JobList creation.
-                /*CalpontSystemCatalog::ColType ct = parm->resultType();
-                ct.colDataType = CalpontSystemCatalog::LONGDOUBLE;
-                ct.colWidth = sizeof(long double);
-                ct.precision = -1;*/
-                CalpontSystemCatalog::ColType ct = parm->resultType();
-                ct.colDataType = CalpontSystemCatalog::DECIMAL;
-                ct.colWidth = 16;
-                ct.precision = 38;
-                // WIP set the scale if argument is a float-based DT
-                ct.scale = 0;
-                ac->resultType(ct);
-            }
             else if (isp->sum_func() == Item_sum::STD_FUNC ||
                      isp->sum_func() == Item_sum::VARIANCE_FUNC)
             {
@@ -5058,7 +5059,11 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi)
         }
 
         // adjust decimal result type according to internalDecimalScale
-        if (gwi.internalDecimalScale >= 0 && ac->resultType().colDataType == CalpontSystemCatalog::DECIMAL)
+        bool isWideDecimal =
+            datatypes::Decimal::isWideDecimalType(ac->resultType());
+        // This must be also valid for UDECIMAL
+        if (!isWideDecimal && gwi.internalDecimalScale >= 0
+            && ac->resultType().colDataType == CalpontSystemCatalog::DECIMAL)
         {
             CalpontSystemCatalog::ColType ct = ac->resultType();
             ct.scale = gwi.internalDecimalScale;
diff --git a/dbcon/mysql/ha_mcs_partition.cpp b/dbcon/mysql/ha_mcs_partition.cpp
index edf357df2..7271fba51 100644
--- a/dbcon/mysql/ha_mcs_partition.cpp
+++ b/dbcon/mysql/ha_mcs_partition.cpp
@@ -1100,7 +1100,7 @@ extern "C"
 
                     mapit = partMap.find(logicalPartNum);
 
-                    int state;
+                    int state = CP_INVALID;
 
                     if (ct.colWidth <= 8)
                         state = em.getExtentMaxMin(iter->range.start, partInfo.max, partInfo.min, seqNum);
diff --git a/utils/joiner/tuplejoiner.cpp b/utils/joiner/tuplejoiner.cpp
index 9282e7253..a8e853553 100644
--- a/utils/joiner/tuplejoiner.cpp
+++ b/utils/joiner/tuplejoiner.cpp
@@ -1127,7 +1127,7 @@ void TupleJoiner::updateCPData(const Row& r)
         }
         else
         {
-            int64_t val;
+            int64_t val = 0;
             if (r.getColType(colIdx) == CalpontSystemCatalog::LONGDOUBLE)
             {
                 double dval = (double)roundl(r.getLongDoubleField(colIdx));
diff --git a/utils/rowgroup/rowaggregation.cpp b/utils/rowgroup/rowaggregation.cpp
index ce52b8d2e..ad5d02233 100755
--- a/utils/rowgroup/rowaggregation.cpp
+++ b/utils/rowgroup/rowaggregation.cpp
@@ -1,6 +1,6 @@
 /*
    Copyright (C) 2014 InfiniDB, Inc.
-   Copyright (c) 2019 MariaDB Corporation
+   Copyright (c) 2019-2020 MariaDB Corporation
 
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License
@@ -57,6 +57,8 @@
 
 //..comment out NDEBUG to enable assertions, uncomment NDEBUG to disable
 //#define NDEBUG
+#include "funcexp/utils_utf8.h"
+#include "mcs_decimal.h"
 
 using namespace std;
 using namespace boost;
@@ -70,12 +72,16 @@ namespace
 const int64_t AGG_ROWGROUP_SIZE = 256;
 
 template <typename T>
-bool minMax(T d1, T d2, int type)
+inline bool minMax(T d1, T d2, int type)
 {
     if (type == rowgroup::ROWAGG_MIN) return d1 < d2;
     else                              return d1 > d2;
 }
 
+inline bool minMax(int128_t* d1, int128_t* d2, int type)
+{
+    return (type == rowgroup::ROWAGG_MIN) ? *d1 < *d2 : *d1 > *d2;
+}
 
 inline int64_t getIntNullValue(int colType)
 {
@@ -334,6 +340,16 @@ inline bool ExternalKeyEq::operator()(const RowPosition& pos1, const RowPosition
 
 static const string overflowMsg("Aggregation overflow.");
 
+inline void RowAggregation::updateIntMinMax(int128_t* val1, int128_t* val2, int64_t col, int func)
+{
+    int32_t colOutOffset = fRow.getOffset(col);
+    if (isNull(fRowGroupOut, fRow, col))
+        fRow.setBinaryField_offset(val1, sizeof(int128_t), colOutOffset);
+    else if (minMax(val1, val2, func))
+        fRow.setBinaryField_offset(val1, sizeof(int128_t), colOutOffset);
+}
+
+
 inline void RowAggregation::updateIntMinMax(int64_t val1, int64_t val2, int64_t col, int func)
 {
     if (isNull(fRowGroupOut, fRow, col))
@@ -1010,13 +1026,31 @@ void RowAggregation::initMapData(const Row& rowIn)
             case execplan::CalpontSystemCatalog::MEDINT:
             case execplan::CalpontSystemCatalog::INT:
             case execplan::CalpontSystemCatalog::BIGINT:
-            case execplan::CalpontSystemCatalog::DECIMAL:
-            case execplan::CalpontSystemCatalog::UDECIMAL:
             {
                 fRow.setIntField(rowIn.getIntField(colIn), colOut);
                 break;
             }
 
+            case execplan::CalpontSystemCatalog::DECIMAL:
+            case execplan::CalpontSystemCatalog::UDECIMAL:
+            {
+                if (LIKELY(fRow.getColumnWidth(colIn) == datatypes::MAXDECIMALWIDTH))
+                {
+                    uint32_t colOutOffset = fRow.getOffset(colOut);
+                    fRow.setBinaryField_offset(
+                        rowIn.getBinaryField<int128_t>(colIn),
+                        sizeof(int128_t),
+                        colOutOffset);
+                }
+                else
+                {
+                    fRow.setIntField(rowIn.getIntField(colIn), colOut);
+                }
+
+                break;
+            }
+
+
             case execplan::CalpontSystemCatalog::UTINYINT:
             case execplan::CalpontSystemCatalog::USMALLINT:
             case execplan::CalpontSystemCatalog::UMEDINT:
@@ -1113,8 +1147,6 @@ void RowAggregation::makeAggFieldsNull(Row& row)
                 fFunctionCols[i]->fAggFunction == ROWAGG_GROUP_CONCAT ||
                 fFunctionCols[i]->fAggFunction == ROWAGG_STATS)
         {
-//			done by memset
-//			row.setIntField(0, colOut);
             continue;
         }
 
@@ -1160,18 +1192,18 @@ void RowAggregation::makeAggFieldsNull(Row& row)
             case execplan::CalpontSystemCatalog::UDECIMAL:
             {
                 int colWidth = fRowGroupOut->getColumnWidth(colOut);
-                if (colWidth <= 8)
+                if (LIKELY(colWidth == datatypes::MAXDECIMALWIDTH))
+                {
+                    uint32_t offset = row.getOffset(colOut);
+                    row.setBinaryField_offset(
+                        const_cast<int128_t*>(&datatypes::Decimal128Null),
+                        colWidth,
+                        offset);
+                }
+                else if (colWidth == datatypes::MAXLEGACYWIDTH)
                 {
                     row.setIntField(getUintNullValue(colDataType, colWidth), colOut);
                 }
-                else
-                {
-                    int128_t nullValue = 0;
-                    utils::setWideDecimalNullValue(nullValue);
-                    uint32_t offset = row.getOffset(colOut);
-                    row.setBinaryField_offset(&nullValue, sizeof(nullValue),
-                        offset);
-                }
                 break;
             }
 
@@ -1183,7 +1215,7 @@ void RowAggregation::makeAggFieldsNull(Row& row)
             {
                 int colWidth = fRowGroupOut->getColumnWidth(colOut);
 
-                if (colWidth <= 8)
+                if (colWidth <= datatypes::MAXLEGACYWIDTH)
                 {
                     row.setUintField(getUintNullValue(colDataType, colWidth), colOut);
                 }
@@ -1256,8 +1288,6 @@ void RowAggregation::doMinMax(const Row& rowIn, int64_t colIn, int64_t colOut, i
         case execplan::CalpontSystemCatalog::MEDINT:
         case execplan::CalpontSystemCatalog::INT:
         case execplan::CalpontSystemCatalog::BIGINT:
-        case execplan::CalpontSystemCatalog::DECIMAL:
-        case execplan::CalpontSystemCatalog::UDECIMAL:
         {
             int64_t valIn = rowIn.getIntField(colIn);
             int64_t valOut = fRow.getIntField(colOut);
@@ -1265,6 +1295,24 @@ void RowAggregation::doMinMax(const Row& rowIn, int64_t colIn, int64_t colOut, i
             break;
         }
 
+        case execplan::CalpontSystemCatalog::DECIMAL:
+        case execplan::CalpontSystemCatalog::UDECIMAL:
+        {
+            if (LIKELY(fRow.getColumnWidth(colIn) == datatypes::MAXDECIMALWIDTH))
+            {
+                updateIntMinMax(rowIn.getBinaryField<int128_t>(colIn),
+                    fRow.getBinaryField<int128_t>(colOut),
+                    colOut, funcType);
+            }
+            else
+            {
+                int64_t valIn = rowIn.getIntField(colIn);
+                int64_t valOut = fRow.getIntField(colOut);
+                updateIntMinMax(valIn, valOut, colOut, funcType);
+            }
+            break;
+        }
+
         case execplan::CalpontSystemCatalog::UTINYINT:
         case execplan::CalpontSystemCatalog::USMALLINT:
         case execplan::CalpontSystemCatalog::UMEDINT:
@@ -1340,17 +1388,12 @@ void RowAggregation::doMinMax(const Row& rowIn, int64_t colIn, int64_t colOut, i
 // Note: NULL value check must be done on UM & PM
 //       UM may receive NULL values, too.
 //------------------------------------------------------------------------------
-// WIP MCOL-641. This and other methods must be type based to avoid needless mem
-// allocation for wide DTs
 void RowAggregation::doSum(const Row& rowIn, int64_t colIn, int64_t colOut, int funcType)
 {
     int colDataType = (fRowGroupIn.getColTypes())[colIn];
     long double valIn = 0;
     bool isWideDataType = false;
-    void *wideValInPtr = NULL;
-    // WIP MCOL-641 Probably the width must be taken
-    // from colOut
-    uint32_t width = fRowGroupOut->getColumnWidth(colOut);
+    void *wideValInPtr = nullptr;
 
     if (isNull(&fRowGroupIn, rowIn, colIn) == true)
         return;
@@ -1380,9 +1423,14 @@ void RowAggregation::doSum(const Row& rowIn, int64_t colIn, int64_t colOut, int
         case execplan::CalpontSystemCatalog::DECIMAL:
         case execplan::CalpontSystemCatalog::UDECIMAL:
         {
-            // WIP MCOL-641 make the size dynamic and use branch prediction cond
-            isWideDataType = (width) > 8 ? true : false;
-            if (!isWideDataType)
+            uint32_t width = fRowGroupOut->getColumnWidth(colOut);
+            isWideDataType = width == datatypes::MAXDECIMALWIDTH;
+            if(LIKELY(isWideDataType))
+            {
+                int128_t *dec = rowIn.getBinaryField<int128_t>(colIn);
+                wideValInPtr = reinterpret_cast<void*>(dec);
+            }
+            else
             {
                 valIn = rowIn.getIntField(colIn);
                 double scale = (double)(fRowGroupIn.getScale())[colIn];
@@ -1391,19 +1439,6 @@ void RowAggregation::doSum(const Row& rowIn, int64_t colIn, int64_t colOut, int
                     valIn /= pow(10.0, scale);
                 }
             }
-            else
-            {
-                if (colDataType == execplan::CalpontSystemCatalog::DECIMAL)
-                {
-                    int128_t *dec = rowIn.getBinaryField<int128_t>(colIn);
-                    wideValInPtr = reinterpret_cast<void*>(dec);
-                }
-                else
-                {
-                    uint128_t *dec = rowIn.getBinaryField<uint128_t>(colIn);
-                    wideValInPtr = reinterpret_cast<void*>(dec);
-                }
-            }
     
             break;
         }
@@ -1455,49 +1490,31 @@ void RowAggregation::doSum(const Row& rowIn, int64_t colIn, int64_t colOut, int
             break;
         }
     }
-    // WIP MCOL-641
-    if (!isWideDataType)
+    if (LIKELY(!isWideDataType))
     {
-        if (isNull(fRowGroupOut, fRow, colOut))
-        {
-            fRow.setLongDoubleField(valIn, colOut);
-        }
-        else
+        if (LIKELY(!isNull(fRowGroupOut, fRow, colOut)))
         {
             long double valOut = fRow.getLongDoubleField(colOut);
             fRow.setLongDoubleField(valIn+valOut, colOut);
         }
+        else
+        {
+            fRow.setLongDoubleField(valIn, colOut);
+        }
     }
     else
     {
         uint32_t offset = fRow.getOffset(colOut);
-        if (colDataType == execplan::CalpontSystemCatalog::DECIMAL)
+        int128_t* dec = reinterpret_cast<int128_t*>(wideValInPtr);
+        if (LIKELY(!isNull(fRowGroupOut, fRow, colOut)))
         {
-            int128_t *dec = reinterpret_cast<int128_t*>(wideValInPtr);
-            if (isNull(fRowGroupOut, fRow, colOut))
-            {
-                fRow.setBinaryField_offset(dec, sizeof(*dec), offset);
-            }
-            else
-            {
-                int128_t *valOutPtr = fRow.getBinaryField(valOutPtr, colOut);
-                int128_t sum = *valOutPtr + *dec; 
-                fRow.setBinaryField_offset(&sum, sizeof(sum), offset);
-            }
+            int128_t *valOutPtr = fRow.getBinaryField(valOutPtr, colOut);
+            int128_t sum = *valOutPtr + *dec;
+            fRow.setBinaryField_offset(&sum, sizeof(sum), offset);
         }
         else
         {
-            uint128_t *dec = reinterpret_cast<uint128_t*>(wideValInPtr);
-            if (isNull(fRowGroupOut, fRow, colOut))
-            {
-                fRow.setBinaryField_offset(dec, sizeof(*dec), offset);
-            }
-            else
-            {
-                uint128_t *valOutPtr = fRow.getBinaryField(valOutPtr, colOut);
-                uint128_t sum = *valOutPtr + *dec; 
-                fRow.setBinaryField_offset(&sum, sizeof(sum), offset);
-            }
+            fRow.setBinaryField_offset(dec, sizeof(*dec), offset);
         }
     } // end-of isWideDataType block
 }
@@ -1791,7 +1808,7 @@ void RowAggregation::updateEntry(const Row& rowIn)
 
             case ROWAGG_AVG:
                 // count(column) for average is inserted after the sum,
-                // colOut+1 is the position of the count column.
+                // colOut+1 is the position of the aux count column.
                 doAvg(rowIn, colIn, colOut, colOut + 1);
                 break;
 
@@ -1851,6 +1868,8 @@ void RowAggregation::doAvg(const Row& rowIn, int64_t colIn, int64_t colOut, int6
     int colDataType = (fRowGroupIn.getColTypes())[colIn];
     long double valIn = 0;
     long double valOut = fRow.getLongDoubleField(colOut);
+    bool isWideDataType = false;
+    void *wideValInPtr = nullptr;
 
     switch (colDataType)
     {
@@ -1862,7 +1881,6 @@ void RowAggregation::doAvg(const Row& rowIn, int64_t colIn, int64_t colOut, int6
         {
             valIn = rowIn.getIntField(colIn);
             break;
-            break;
         }
 
         case execplan::CalpontSystemCatalog::UTINYINT:
@@ -1878,11 +1896,21 @@ void RowAggregation::doAvg(const Row& rowIn, int64_t colIn, int64_t colOut, int6
         case execplan::CalpontSystemCatalog::DECIMAL:
         case execplan::CalpontSystemCatalog::UDECIMAL:
         {
-            valIn = rowIn.getIntField(colIn);
-            double scale = (double)(fRowGroupIn.getScale())[colIn];
-            if (valIn != 0 && scale > 0)
+            uint32_t width = fRowGroupOut->getColumnWidth(colOut);
+            isWideDataType = width == datatypes::MAXDECIMALWIDTH;
+            if(LIKELY(isWideDataType))
             {
-                valIn /= pow(10.0, scale);
+                int128_t* dec = rowIn.getBinaryField<int128_t>(colIn);
+                wideValInPtr = reinterpret_cast<void*>(dec);
+            }
+            else
+            {
+                valIn = rowIn.getIntField(colIn);
+                double scale = (double)(fRowGroupIn.getScale())[colIn];
+                if (valIn != 0 && scale > 0)
+                {
+                    valIn /= pow(10.0, scale);
+                }
             }
             break;
         }
@@ -1917,16 +1945,32 @@ void RowAggregation::doAvg(const Row& rowIn, int64_t colIn, int64_t colOut, int6
         }
     }
 
-    if (fRow.getUintField(colAux) == 0)
+    // min(count) = 0
+    uint64_t count = fRow.getUintField(colAux) + 1;
+    fRow.setUintField<8>(count, colAux);
+    bool notFirstValue = count > 1;
+
+    if (LIKELY(!isWideDataType))
     {
-        // This is the first value
-        fRow.setLongDoubleField(valIn, colOut);
-        fRow.setUintField(1, colAux);
+        if (LIKELY(notFirstValue))
+            fRow.setLongDoubleField(valIn + valOut, colOut);
+        else // This is the first value
+            fRow.setLongDoubleField(valIn, colOut);
     }
     else
     {
-        fRow.setLongDoubleField(valIn + valOut, colOut);
-        fRow.setUintField(fRow.getUintField(colAux) + 1, colAux);
+        uint32_t offset = fRow.getOffset(colOut);
+        int128_t* dec = reinterpret_cast<int128_t*>(wideValInPtr);
+        if (LIKELY(notFirstValue))
+        {
+            int128_t *valOutPtr = fRow.getBinaryField(valOutPtr, colOut);
+            int128_t sum = *valOutPtr + *dec;
+            fRow.setBinaryField_offset(&sum, sizeof(sum), offset);
+        }
+        else
+        {
+            fRow.setBinaryField_offset(dec, sizeof(*dec), offset);
+        }
     }
 }
 
@@ -2632,11 +2676,6 @@ void RowAggregationUM::calculateAvgColumns()
             int64_t colOut = fFunctionCols[i]->fOutputColumnIndex;
             int64_t colAux = fFunctionCols[i]->fAuxColumnIndex;
 
-//            int scale = fRowGroupOut->getScale()[colOut];
-//            int scale1 = scale >> 8;
-//            int scale2 = scale & 0x000000FF;
-//            long double factor = pow(10.0, scale2 - scale1);
-
             for (uint64_t j = 0; j < fRowGroupOut->getRowCount(); j++)
             {
                 fRowGroupOut->getRow(j, &fRow);
@@ -2645,14 +2684,38 @@ void RowAggregationUM::calculateAvgColumns()
                 if (cnt == 0) // empty set, value is initialized to null.
                     continue;
 
-                long double sum = 0.0;
-                long double avg = 0.0;
+                uint32_t precision = fRow.getPrecision(colOut);
+                bool isWideDecimal =
+                    datatypes::Decimal::isWideDecimalType(precision);
 
-                // MCOL-1822 Always long double
-                sum = fRow.getLongDoubleField(colOut);
-                avg = sum / cnt;
-//                avg *= factor;
-                fRow.setLongDoubleField(avg, colOut);
+                if (LIKELY(!isWideDecimal))
+                {
+                    long double sum = 0.0;
+                    long double avg = 0.0;
+                    sum = fRow.getLongDoubleField(colOut);
+                    avg = sum / cnt;
+                    fRow.setLongDoubleField(avg, colOut);
+                }
+                else
+                {
+                    uint32_t offset = fRow.getOffset(colOut);
+                    uint32_t scale = fRow.getScale(colOut);
+                    // Get multiplied to deliver AVG with the scale closest
+                    // to the expected original scale + 4. 
+                    // There is a counterpart in buildAggregateColumn.
+                    datatypes::Decimal::setScalePrecision4Avg(precision, scale);
+                    int128_t* sumPnt = fRow.getBinaryField_offset<int128_t>(offset);
+                    uint32_t scaleDiff = scale - fRow.getScale(colOut);
+                    // multiplication overflow check
+                    datatypes::MultiplicationOverflowCheck multOp;
+                    int128_t sum = 0;
+                    if (scaleDiff > 0)
+                        multOp(*sumPnt, datatypes::mcs_pow_10[scaleDiff], sum);
+                    else
+                        sum = *sumPnt;
+                    int128_t avg = sum / cnt;
+                    fRow.setBinaryField_offset(&avg, sizeof(avg), offset);
+                }
             }
         }
     }
@@ -4174,6 +4237,8 @@ void RowAggregationUMP2::doAvg(const Row& rowIn, int64_t colIn, int64_t colOut,
     int colDataType = (fRowGroupIn.getColTypes())[colIn];
     long double valIn = 0;
     long double valOut = fRow.getLongDoubleField(colOut);
+    bool isWideDataType = false;
+    void *wideValInPtr = nullptr;
 
     switch (colDataType)
     {
@@ -4200,12 +4265,21 @@ void RowAggregationUMP2::doAvg(const Row& rowIn, int64_t colIn, int64_t colOut,
         case execplan::CalpontSystemCatalog::DECIMAL:
         case execplan::CalpontSystemCatalog::UDECIMAL:
         {
-            valIn = rowIn.getIntField(colIn);
-            break;
-            double scale = (double)(fRowGroupIn.getScale())[colIn];
-            if (valIn != 0 && scale > 0)
+            uint32_t width = fRowGroupOut->getColumnWidth(colOut);
+            isWideDataType = width == datatypes::MAXDECIMALWIDTH;
+            if(LIKELY(isWideDataType))
             {
-                valIn /= pow(10.0, scale);
+                int128_t* dec = rowIn.getBinaryField<int128_t>(colIn);
+                wideValInPtr = reinterpret_cast<void*>(dec);
+            }
+            else
+            {
+                valIn = rowIn.getIntField(colIn);
+                double scale = (double)(fRowGroupIn.getScale())[colIn];
+                if (valIn != 0 && scale > 0)
+                {
+                    valIn /= pow(10.0, scale);
+                }
             }
             break;
         }
@@ -4240,16 +4314,36 @@ void RowAggregationUMP2::doAvg(const Row& rowIn, int64_t colIn, int64_t colOut,
         }
     }
 
-    int64_t cnt = fRow.getUintField(colAux);
-    if (cnt == 0)
+    uint64_t cnt = fRow.getUintField(colAux);
+    if (LIKELY(!isWideDataType))
     {
-        fRow.setLongDoubleField(valIn, colOut);
-        fRow.setUintField(rowIn.getUintField(colIn + 1), colAux);
+        if (LIKELY(cnt > 0))
+        {
+            fRow.setLongDoubleField(valIn + valOut, colOut);
+            fRow.setUintField(rowIn.getUintField(colIn + 1) + cnt, colAux);
+        }
+        else
+        {
+            fRow.setLongDoubleField(valIn, colOut);
+            fRow.setUintField(rowIn.getUintField(colIn + 1), colAux);
+        }
     }
     else
     {
-        fRow.setLongDoubleField(valIn + valOut, colOut);
-        fRow.setUintField(rowIn.getUintField(colIn + 1) + cnt, colAux);
+        uint32_t offset = fRow.getOffset(colOut);
+        int128_t* dec = reinterpret_cast<int128_t*>(wideValInPtr);
+        if (LIKELY(cnt > 0))
+        {
+            int128_t *valOutPtr = fRow.getBinaryField(valOutPtr, colOut);
+            int128_t sum = *valOutPtr + *dec;
+            fRow.setBinaryField_offset(&sum, sizeof(sum), offset);
+            fRow.setUintField(rowIn.getUintField(colIn + 1) + cnt, colAux);
+        }
+        else
+        {
+            fRow.setBinaryField_offset(dec, sizeof(*dec), offset);
+            fRow.setUintField(rowIn.getUintField(colIn + 1), colAux);
+        }
     }
 }
 
diff --git a/utils/rowgroup/rowaggregation.h b/utils/rowgroup/rowaggregation.h
index f0d50381f..3c138b1cf 100644
--- a/utils/rowgroup/rowaggregation.h
+++ b/utils/rowgroup/rowaggregation.h
@@ -653,6 +653,7 @@ protected:
         copyRow(fNullRow, &row);
     }
 
+    inline void updateIntMinMax(int128_t* val1, int128_t* val2, int64_t col, int func);
     inline void updateIntMinMax(int64_t val1, int64_t val2, int64_t col, int func);
     inline void updateUintMinMax(uint64_t val1, uint64_t val2, int64_t col, int func);
     inline void updateCharMinMax(uint64_t val1, uint64_t val2, int64_t col, int func);
diff --git a/utils/rowgroup/rowgroup.h b/utils/rowgroup/rowgroup.h
index 17d955a1f..7cda3a60a 100644
--- a/utils/rowgroup/rowgroup.h
+++ b/utils/rowgroup/rowgroup.h
@@ -810,25 +810,16 @@ inline uint32_t Row::getStringLength(uint32_t colIndex) const
     return strnlen((char*) &data[offsets[colIndex]], getColumnWidth(colIndex));
 }
 
-// WIP Remove this
-// Check whether memcpy affects perf here
-/*inline void Row::setBinaryField(const uint8_t* strdata, uint32_t length, uint32_t offset)
-{
-    memcpy(&data[offset], strdata, length);
-}*/
-
-// MCOL-641. This method can be applied to uint8_t* buffers.
 template<typename T>
 inline void Row::setBinaryField(T* value, uint32_t width, uint32_t colIndex)
 {
     memcpy(&data[offsets[colIndex]], value, width);
 }
 
-// MCOL-641. This method !cannot! be applied to uint8_t* buffers.
+// This method !cannot! be applied to uint8_t* buffers.
 template<typename T>
 inline void Row::setBinaryField_offset(T* value, uint32_t width, uint32_t offset)
 {
-    // WIP Compare performance.
     *reinterpret_cast<T*>(&data[offset]) = *value;
 }
 
@@ -871,23 +862,15 @@ inline std::string Row::getStringField(uint32_t colIndex) const
                        strnlen((char*) &data[offsets[colIndex]], getColumnWidth(colIndex)));
 }
 
-/*inline std::string Row::getBinaryField(uint32_t colIndex) const
-{
-    return std::string((char*) &data[offsets[colIndex]], getColumnWidth(colIndex));
-}*/
-
-// WIP MCOL-641
 template <typename T>
 inline T* Row::getBinaryField(uint32_t colIndex) const
 {
-    //return reinterpret_cast<T*>(&data[offsets[colIndex]]);
     return getBinaryField_offset<T>(offsets[colIndex]);
 }
 
 template <typename T>
 inline T* Row::getBinaryField(T* argtype, uint32_t colIndex) const
 {
-    //return reinterpret_cast<T*>(&data[offsets[colIndex]]);
     return getBinaryField_offset<T>(offsets[colIndex]);
 }