diff --git a/dbcon/joblist/tupleaggregatestep.cpp b/dbcon/joblist/tupleaggregatestep.cpp
index fd256f36b..789932ff7 100644
--- a/dbcon/joblist/tupleaggregatestep.cpp
+++ b/dbcon/joblist/tupleaggregatestep.cpp
@@ -3129,13 +3129,30 @@ void TupleAggregateStep::prep2PhasesAggregate(
                         throw IDBExcept(emsg, ERR_AGGREGATE_TYPE_NOT_SUPPORT);
                     }
 
-                    oidsAggPm.push_back(oidsProj[colProj]);
-                    keysAggPm.push_back(aggKey);
-                    typeAggPm.push_back(CalpontSystemCatalog::LONGDOUBLE);
-                    csNumAggPm.push_back(8);
-                    scaleAggPm.push_back(0);
-                    precisionAggPm.push_back(-1);
-                    widthAggPm.push_back(sizeof(long double));
+                    // WIP MCOL-641 Replace condition with a
+                    // dynamic one
+                    if (typeProj[colProj] == CalpontSystemCatalog::DECIMAL
+                        && width[colProj] == 16)
+                    { 
+                        oidsAggPm.push_back(oidsProj[colProj]);
+                        keysAggPm.push_back(aggKey);
+                        typeAggPm.push_back(CalpontSystemCatalog::DECIMAL);
+                        scaleAggPm.push_back(0);
+                        // WIP makes this dynamic
+                        precisionAggPm.push_back(38);
+                        widthAggPm.push_back(width[colProj]);
+                        csNumAggPm.push_back(8);
+                    }
+                    else
+                    {
+                        oidsAggPm.push_back(oidsProj[colProj]);
+                        keysAggPm.push_back(aggKey);
+                        typeAggPm.push_back(CalpontSystemCatalog::LONGDOUBLE);
+                        scaleAggPm.push_back(0);
+                        csNumAggPm.push_back(8);
+                        precisionAggPm.push_back(-1);
+                        widthAggPm.push_back(sizeof(long double));
+                    }
                     colAggPm++;
                 }
 
diff --git a/dbcon/mysql/ha_mcs_execplan.cpp b/dbcon/mysql/ha_mcs_execplan.cpp
index 1e4557a71..bba566d7b 100755
--- a/dbcon/mysql/ha_mcs_execplan.cpp
+++ b/dbcon/mysql/ha_mcs_execplan.cpp
@@ -4589,7 +4589,6 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi)
 
     Item_sum* isp = reinterpret_cast<Item_sum*>(item);
     Item** sfitempp = isp->get_orig_args();
-//	Item** sfitempp = isp->arguments();
     SRCP parm;
 
     // @bug4756
@@ -4809,7 +4808,6 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi)
 
                             if ((fc && fc->functionParms().empty()) || !fc)
                             {
-                                //ac->aggOp(AggregateColumn::COUNT_ASTERISK);
                                 ReturnedColumn* rc = buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError);
 
                                 if (dynamic_cast<ConstantColumn*>(rc))
@@ -4896,6 +4894,7 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi)
             // use the first parm for result type.
             parm = ac->aggParms()[0];
 
+            // WIP why do we use LONGDOUBLE for AVG?
             if (isp->sum_func() == Item_sum::AVG_FUNC ||
                     isp->sum_func() == Item_sum::AVG_DISTINCT_FUNC)
             {
@@ -4918,10 +4917,20 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi)
             else if (isp->sum_func() == Item_sum::SUM_FUNC ||
                      isp->sum_func() == Item_sum::SUM_DISTINCT_FUNC)
             {
-                CalpontSystemCatalog::ColType ct = parm->resultType();
+                // WIP MCOL-641 This fast hack breaks aggregates for
+                // all float DT's
+                // UPD it doesn't break b/c actual DT for result type
+                // is set during JobList creation.
+                /*CalpontSystemCatalog::ColType ct = parm->resultType();
                 ct.colDataType = CalpontSystemCatalog::LONGDOUBLE;
                 ct.colWidth = sizeof(long double);
-                ct.precision = -1;
+                ct.precision = -1;*/
+                CalpontSystemCatalog::ColType ct = parm->resultType();
+                ct.colDataType = CalpontSystemCatalog::DECIMAL;
+                ct.colWidth = 16;
+                ct.precision = 38;
+                // WIP set the scale if argument is a float-based DT
+                ct.scale = 0;
                 ac->resultType(ct);
             }
             else if (isp->sum_func() == Item_sum::STD_FUNC ||
diff --git a/primitives/primproc/batchprimitiveprocessor.cpp b/primitives/primproc/batchprimitiveprocessor.cpp
index 54fdcf482..3750c0cde 100644
--- a/primitives/primproc/batchprimitiveprocessor.cpp
+++ b/primitives/primproc/batchprimitiveprocessor.cpp
@@ -1726,7 +1726,7 @@ void BatchPrimitiveProcessor::execute()
                     if (fe2->evaluate(&fe2In))
                     {
                         applyMapping(fe2Mapping, fe2In, &fe2Out);
-                        cerr << "   passed. output row: " << fe2Out.toString() << endl;
+                        //cerr << "   passed. output row: " << fe2Out.toString() << endl;
                         fe2Out.setRid (fe2In.getRelRid());
                         fe2Output.incRowCount();
                         fe2Out.nextRow();
diff --git a/utils/rowgroup/rowaggregation.cpp b/utils/rowgroup/rowaggregation.cpp
index c73e751af..7d62c4a78 100755
--- a/utils/rowgroup/rowaggregation.cpp
+++ b/utils/rowgroup/rowaggregation.cpp
@@ -222,6 +222,11 @@ inline string getStringNullValue()
     return joblist::CPNULLSTRMARK;
 }
 
+inline uint64_t getBinaryNullValue()
+{
+    return joblist::BINARYNULL;
+} 
+
 }
 
 
@@ -413,6 +418,7 @@ void RowAggregation::updateStringMinMax(string val1, string val2, int64_t col, i
 inline bool RowAggregation::isNull(const RowGroup* pRowGroup, const Row& row, int64_t col)
 {
     /* TODO: Can we replace all of this with a call to row.isNullValue(col)? */
+    // WIP MCOL-641 Yes. We can
     bool ret = false;
 
     int colDataType = (pRowGroup->getColTypes())[col];
@@ -536,18 +542,7 @@ inline bool RowAggregation::isNull(const RowGroup* pRowGroup, const Row& row, in
         case execplan::CalpontSystemCatalog::DECIMAL:
         case execplan::CalpontSystemCatalog::UDECIMAL:
         {
-            int colWidth = pRowGroup->getColumnWidth(col);
-            int64_t val = row.getIntField(col);
-
-            if (colWidth == 1)
-                ret = ((uint8_t)val == joblist::TINYINTNULL);
-            else if (colWidth == 2)
-                ret = ((uint16_t)val == joblist::SMALLINTNULL);
-            else if (colWidth == 4)
-                ret = ((uint32_t)val == joblist::INTNULL);
-            else
-                ret = ((uint64_t)val == joblist::BIGINTNULL);
-
+            row.isNullValue(col);
             break;
         }
 
@@ -1170,7 +1165,20 @@ void RowAggregation::makeAggFieldsNull(Row& row)
             case execplan::CalpontSystemCatalog::UDECIMAL:
             {
                 int colWidth = fRowGroupOut->getColumnWidth(colOut);
-                row.setIntField(getUintNullValue(colDataType, colWidth), colOut);
+                if (colWidth <= 8)
+                {
+                    row.setIntField(getUintNullValue(colDataType, colWidth), colOut);
+                }
+                else
+                {
+                    // WIP This is only 1st part of the value
+                    uint64_t nullValue = getBinaryNullValue();
+                    uint32_t offset = row.getOffset(colOut);
+                    row.setBinaryField_offset(&nullValue, sizeof(nullValue),
+                        offset);
+                    row.setBinaryField_offset(&nullValue, sizeof(nullValue),
+                        offset+sizeof(nullValue));
+                }
                 break;
             }
 
@@ -1339,11 +1347,18 @@ void RowAggregation::doMinMax(const Row& rowIn, int64_t colIn, int64_t colOut, i
 // Note: NULL value check must be done on UM & PM
 //       UM may receive NULL values, too.
 //------------------------------------------------------------------------------
+// WIP MCOL-641. This and other methods must be type based to avoid needless mem
+// allocation for wide DTs
 void RowAggregation::doSum(const Row& rowIn, int64_t colIn, int64_t colOut, int funcType)
 {
     int colDataType = (fRowGroupIn.getColTypes())[colIn];
     long double valIn = 0;
-    long double valOut = fRow.getLongDoubleField(colOut);
+    bool isWideDataType = false;
+    void *wideValInPtr = NULL;
+    // WIP MCOL-641 Probably the width must be taken
+    // from colOut
+    uint32_t width = fRowGroupOut->getColumnWidth(colOut);
+
     if (isNull(&fRowGroupIn, rowIn, colIn) == true)
         return;
 
@@ -1372,12 +1387,31 @@ void RowAggregation::doSum(const Row& rowIn, int64_t colIn, int64_t colOut, int
         case execplan::CalpontSystemCatalog::DECIMAL:
         case execplan::CalpontSystemCatalog::UDECIMAL:
         {
-            valIn = rowIn.getIntField(colIn);
-            double scale = (double)(fRowGroupIn.getScale())[colIn];
-            if (valIn != 0 && scale > 0)
+            // WIP MCOL-641 make the size dynamic and use branch prediction cond
+            isWideDataType = (width) > 8 ? true : false;
+            if (!isWideDataType)
             {
-                valIn /= pow(10.0, scale);
+                valIn = rowIn.getIntField(colIn);
+                double scale = (double)(fRowGroupIn.getScale())[colIn];
+                if (valIn != 0 && scale > 0)
+                {
+                    valIn /= pow(10.0, scale);
+                }
             }
+            else
+            {
+                if (colDataType == execplan::CalpontSystemCatalog::DECIMAL)
+                {
+                    int128_t *dec = rowIn.getBinaryField<int128_t>(colIn);
+                    wideValInPtr = reinterpret_cast<void*>(dec);
+                }
+                else
+                {
+                    uint128_t *dec = rowIn.getBinaryField<uint128_t>(colIn);
+                    wideValInPtr = reinterpret_cast<void*>(dec);
+                }
+            }
+    
             break;
         }
 
@@ -1428,14 +1462,51 @@ void RowAggregation::doSum(const Row& rowIn, int64_t colIn, int64_t colOut, int
             break;
         }
     }
-    if (isNull(fRowGroupOut, fRow, colOut))
+    // WIP MCOL-641
+    if (!isWideDataType)
     {
-        fRow.setLongDoubleField(valIn, colOut);
+        if (isNull(fRowGroupOut, fRow, colOut))
+        {
+            fRow.setLongDoubleField(valIn, colOut);
+        }
+        else
+        {
+            long double valOut = fRow.getLongDoubleField(colOut);
+            fRow.setLongDoubleField(valIn+valOut, colOut);
+        }
     }
     else
     {
-        fRow.setLongDoubleField(valIn+valOut, colOut);
-    }
+        if (colDataType == execplan::CalpontSystemCatalog::DECIMAL)
+        {
+            int128_t *dec = reinterpret_cast<int128_t*>(wideValInPtr);
+            // WIP MCOL-641 Replace Row::setBinaryField1
+            if (isNull(fRowGroupOut, fRow, colOut))
+            {
+                fRow.setBinaryField1<int128_t>(dec, width, colOut);
+            }
+            else
+            {
+                int128_t *valOutPtr = fRow.getBinaryField<int128_t>(colOut);
+                int128_t sum = *valOutPtr + *dec; 
+                fRow.setBinaryField1<int128_t>(&sum, width, colOut);
+            }
+        }
+        else
+        {
+            uint128_t *dec = reinterpret_cast<uint128_t*>(wideValInPtr);
+            if (isNull(fRowGroupOut, fRow, colOut))
+            {
+                fRow.setBinaryField1<uint128_t>(dec, width, colOut);
+            }
+            else
+            {
+                uint128_t *valOutPtr = fRow.getBinaryField<uint128_t>(colOut);
+                uint128_t sum = *valOutPtr + *dec; 
+                fRow.setBinaryField1<uint128_t>(&sum, width, colOut);
+            }
+        }
+    } // end-of isWideDataType block
 }
 
 //------------------------------------------------------------------------------
diff --git a/utils/rowgroup/rowgroup.cpp b/utils/rowgroup/rowgroup.cpp
index e756e1c5e..ec229d8b5 100644
--- a/utils/rowgroup/rowgroup.cpp
+++ b/utils/rowgroup/rowgroup.cpp
@@ -815,7 +815,6 @@ void Row::initToNull()
                     default:
                         *((uint64_t*) &data[offsets[i]]) = *((uint64_t*) joblist::CPNULLSTRMARK.c_str());
                         memset(&data[offsets[i] + 8], 0, len - 8);
-                        //strcpy((char *) &data[offsets[i]], joblist::CPNULLSTRMARK.c_str());
                         break;
                 }
 
@@ -846,6 +845,13 @@ void Row::initToNull()
                         *((int32_t*) &data[offsets[i]]) = static_cast<int32_t>(joblist::INTNULL);
                         break;
 
+                    case 16 :
+                        // WIP MCOL-641
+                        uint64_t *dec = reinterpret_cast<uint64_t*>(&data[offsets[i]]);
++                       dec[0] = joblist::BINARYNULL;
++                       dec[1] = joblist::BINARYNULL;
+                        break;
+
                     default:
                         *((int64_t*) &data[offsets[i]]) = static_cast<int64_t>(joblist::BIGINTNULL);
                         break;
@@ -1039,13 +1045,15 @@ bool Row::isNullValue(uint32_t colIndex) const
         case CalpontSystemCatalog::UDECIMAL:
         {
             uint32_t len = getColumnWidth(colIndex);
+            const uint64_t *dec;
 
             switch (len)
             {
-                // MCOL-641 WIP
+                // MCOL-641
                 case 16:
-                    return (*((int64_t*) &data[offsets[colIndex]]) == static_cast<int64_t>(joblist::BIGINTNULL));
-                    break;
+                    dec = reinterpret_cast<const uint64_t*>(&data[offsets[colIndex]]);
+                    return ((dec[0] == joblist::BINARYNULL)
+                        && (dec[1] == joblist::BINARYNULL));
 
                 case 1 :
                     return (data[offsets[colIndex]] == joblist::TINYINTNULL);
diff --git a/utils/rowgroup/rowgroup.h b/utils/rowgroup/rowgroup.h
index ed02c6ee7..37f1ad3e4 100644
--- a/utils/rowgroup/rowgroup.h
+++ b/utils/rowgroup/rowgroup.h
@@ -66,6 +66,9 @@ typedef const struct charset_info_st CHARSET_INFO;
 
 // Workaround for my_global.h #define of isnan(X) causing a std::std namespace
 
+using int128_t = __int128;
+using uint128_t = unsigned __int128;
+
 namespace rowgroup
 {
 
@@ -424,6 +427,10 @@ public:
     void setStringField(const std::string& val, uint32_t colIndex);
     inline void setStringField(const uint8_t*, uint32_t len, uint32_t colIndex);
     inline void setBinaryField(const uint8_t* strdata, uint32_t length, uint32_t offset);
+    template<typename T>
+    inline void setBinaryField1(T* strdata, uint32_t width, uint32_t colIndex);
+    template<typename T>
+    inline void setBinaryField_offset(T* strdata, uint32_t width, uint32_t colIndex);
     // support VARBINARY
     // Add 2-byte length at the CHARSET_INFO*beginning of the field.  NULL and zero length field are
     // treated the same, could use one of the length bit to distinguish these two cases.
@@ -804,6 +811,20 @@ inline void Row::setBinaryField(const uint8_t* strdata, uint32_t length, uint32_
     memcpy(&data[offset], strdata, length);
 }
 
+template<typename T>
+inline void Row::setBinaryField1(T* value, uint32_t width, uint32_t colIndex)
+{
+   memcpy(&data[offsets[colIndex]], value, width);
+}
+
+template<typename T>
+inline void Row::setBinaryField_offset(T* value, uint32_t width, uint32_t offset)
+{
+   // WIP
+   //memcpy(&data[offset], value, width);
+    *reinterpret_cast<T*>(&data[offset]) = *value;
+}
+
 inline void Row::setStringField(const uint8_t* strdata, uint32_t length, uint32_t colIndex)
 {
     uint64_t offset;