diff --git a/datatypes/mcs_decimal.cpp b/datatypes/mcs_decimal.cpp
index c5b61b5fa..4b7c4e7c6 100644
--- a/datatypes/mcs_decimal.cpp
+++ b/datatypes/mcs_decimal.cpp
@@ -208,6 +208,21 @@ namespace datatypes
         return toString(const_cast<execplan::IDB_Decimal&>(value));
     }
 
+    // Compare perf with f(string&, ColTypeAlias&, int128_t&)
+    int128_t Decimal::int128FromString(const std::string& value,
+                                       ColTypeAlias& colType)
+    {
+        int128_t result = 0;
+        bool pushWarning = false;
+        bool noRoundup = false;
+        dataconvert::number_int_value<int128_t>(value,
+                                      colType,
+                                      pushWarning,
+                                      noRoundup,
+                                      result);
+        return result;
+    }
+
     int Decimal::compare(const execplan::IDB_Decimal& l, const execplan::IDB_Decimal& r)
     {
         int128_t divisorL, divisorR;
diff --git a/datatypes/mcs_decimal.h b/datatypes/mcs_decimal.h
index 5e30cfbaa..329487743 100644
--- a/datatypes/mcs_decimal.h
+++ b/datatypes/mcs_decimal.h
@@ -81,6 +81,10 @@ constexpr uint8_t INT128MAXPRECISION = 38U;
 constexpr uint8_t MAXLEGACYWIDTH = 8U;
 constexpr uint8_t MAXSCALEINC4AVG = 4U;
 constexpr int8_t IGNOREPRECISION = -1;
+constexpr uint8_t MAXLENGTH16BYTES = 42;
+constexpr uint8_t MAXLENGTH8BYTES = 23;
+
+
 
 const uint64_t mcs_pow_10[20] =
 {
@@ -220,6 +224,11 @@ class Decimal
         static std::string toString(execplan::IDB_Decimal& value);
         static std::string toString(const execplan::IDB_Decimal& value);
 
+        /**
+            @brief Convenience method to get int128 from a std::string.
+        */
+        static int128_t int128FromString(const std::string& value, ColTypeAlias& colType);
+
         /**
             @brief The method detects whether decimal type is wide
             using csc colType.
diff --git a/dbcon/joblist/tupleaggregatestep.cpp b/dbcon/joblist/tupleaggregatestep.cpp
index 7ace89be5..50767ab36 100644
--- a/dbcon/joblist/tupleaggregatestep.cpp
+++ b/dbcon/joblist/tupleaggregatestep.cpp
@@ -106,17 +106,14 @@ struct cmpTuple
                     return true;
                 if (pUDAFa == pUDAFb)
                 {
-                    if (pUDAFa == NULL)
-                        return false;
                     std::vector<uint32_t>* paramKeysa = boost::get<3>(a);
                     std::vector<uint32_t>* paramKeysb = boost::get<3>(b);
-
+                    if (paramKeysa == NULL || paramKeysb == NULL)
+                        return false;
                     if (paramKeysa->size() < paramKeysb->size())
                         return true;
                     if (paramKeysa->size() == paramKeysb->size())
                     {
-                        if (paramKeysa == NULL)
-                            return false;
                         for (uint64_t i = 0; i < paramKeysa->size(); ++i)
                         {
                             if ((*paramKeysa)[i] < (*paramKeysb)[i])
@@ -1471,7 +1468,7 @@ void TupleAggregateStep::prep1PhaseAggregate(
         // find if this func is a duplicate
         AGG_MAP::iterator iter = aggFuncMap.find(boost::make_tuple(key, aggOp, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL));
 
-        if (iter != aggFuncMap.end())
+        if (aggOp != ROWAGG_UDAF && aggOp != ROWAGG_MULTI_PARM && iter != aggFuncMap.end())
         {
             if (funct->fAggFunction == ROWAGG_AVG)
                 funct->fAggFunction = ROWAGG_DUP_AVG;
@@ -1513,7 +1510,7 @@ void TupleAggregateStep::prep1PhaseAggregate(
     }
 
     // there is avg(k), but no count(k) in the select list
-    uint64_t lastCol = returnedColVec.size();
+    uint64_t lastCol = outIdx;
 
     for (map<uint32_t, SP_ROWAGG_FUNC_t>::iterator k = avgFuncMap.begin(); k != avgFuncMap.end(); k++)
     {
@@ -1855,8 +1852,12 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(
             }
 
             // skip if this is a duplicate
-            if (aggFuncMap.find(boost::make_tuple(aggKey, aggOp, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL)) != aggFuncMap.end())
+            if (aggOp != ROWAGG_UDAF && aggOp != ROWAGG_MULTI_PARM
+             && aggFuncMap.find(boost::make_tuple(aggKey, aggOp, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL)) != aggFuncMap.end())
+            {
+                // skip if this is a duplicate
                 continue;
+            }
 
             functionVec1.push_back(funct);
             aggFuncMap.insert(make_pair(boost::make_tuple(aggKey, aggOp, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL), colAgg));
@@ -3134,9 +3135,13 @@ void TupleAggregateStep::prep2PhasesAggregate(
             }
 
             // skip if this is a duplicate
-            if (aggFuncMap.find(boost::make_tuple(aggKey, aggOp, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL)) != aggFuncMap.end())
+            if (aggOp != ROWAGG_UDAF && aggOp != ROWAGG_MULTI_PARM
+             && aggFuncMap.find(boost::make_tuple(aggKey, aggOp, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL)) != aggFuncMap.end())
+            {
+                // skip if this is a duplicate
                 continue;
-
+            }
+            
             functionVecPm.push_back(funct);
             aggFuncMap.insert(make_pair(boost::make_tuple(aggKey, aggOp, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL), colAggPm));
 
@@ -4010,8 +4015,12 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(
             }
 
             // skip if this is a duplicate
-            if (aggFuncMap.find(boost::make_tuple(aggKey, aggOp, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL)) != aggFuncMap.end())
+            if (aggOp != ROWAGG_UDAF && aggOp != ROWAGG_MULTI_PARM
+             && aggFuncMap.find(boost::make_tuple(aggKey, aggOp, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL)) != aggFuncMap.end())
+            {
+                // skip if this is a duplicate
                 continue;
+            }
 
             functionVecPm.push_back(funct);
             aggFuncMap.insert(make_pair(boost::make_tuple(aggKey, aggOp, pUDAFFunc, udafc ? udafc->getContext().getParamKeys() : NULL), colAggPm-multiParm));
diff --git a/dbcon/mysql/ha_mcs_execplan.cpp b/dbcon/mysql/ha_mcs_execplan.cpp
index 2c4c2d775..aa5dd5184 100755
--- a/dbcon/mysql/ha_mcs_execplan.cpp
+++ b/dbcon/mysql/ha_mcs_execplan.cpp
@@ -4671,6 +4671,9 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi)
     vector<SRCP> selCols;
     vector<SRCP> orderCols;
     bool bIsConst = false;
+    unsigned int constValPrecision = 0;
+    unsigned int constValScale = 0;
+    bool hasDecimalConst = false;
     if (get_fe_conn_info_ptr() == NULL)
         set_fe_conn_info_ptr((void*)new cal_connection_info());
 
@@ -4854,6 +4857,13 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi)
                                 parm.reset(buildReturnedColumn(sfitemp, gwi, gwi.fatalParseError));
                                 ac->constCol(parm);
                                 bIsConst = true;
+                                if (sfitemp->cmp_type() == DECIMAL_RESULT)
+                                {
+                                    hasDecimalConst = true;
+                                    Item_decimal* idp = (Item_decimal*)sfitemp;
+                                    constValPrecision = idp->decimal_precision();
+                                    constValScale = idp->decimal_scale();
+                                }
                                 break;
                             }
                             default:
@@ -4975,6 +4985,9 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi)
             }
         }
 
+        bool isAvg = (isp->sum_func() == Item_sum::AVG_FUNC ||
+                    isp->sum_func() == Item_sum::AVG_DISTINCT_FUNC);
+
         // Get result type
         // Modified for MCOL-1201 multi-argument aggregate
         if (!bIsConst && ac->aggParms().size() > 0)
@@ -4983,10 +4996,8 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi)
             // use the first parm for result type.
             parm = ac->aggParms()[0];
 
-            bool isAvg = (isp->sum_func() == Item_sum::AVG_FUNC ||
-                    isp->sum_func() == Item_sum::AVG_DISTINCT_FUNC);
             if (isAvg || isp->sum_func() == Item_sum::SUM_FUNC ||
-                     isp->sum_func() == Item_sum::SUM_DISTINCT_FUNC)
+                isp->sum_func() == Item_sum::SUM_DISTINCT_FUNC)
             {
                 CalpontSystemCatalog::ColType ct = parm->resultType();
                 if (datatypes::Decimal::isWideDecimalType(ct))
@@ -5054,6 +5065,17 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi)
                 ac->resultType(parm->resultType());
             }
         }
+        else if (bIsConst && hasDecimalConst && isAvg)
+        {
+            CalpontSystemCatalog::ColType ct = parm->resultType();
+            if (datatypes::Decimal::isWideDecimalType(constValPrecision))
+            {
+                ct.precision = constValPrecision;
+                ct.scale = constValScale;
+                ct.colWidth = datatypes::MAXDECIMALWIDTH;
+            }
+            ac->resultType(ct);
+        }
         else
         {
             ac->resultType(colType_MysqlToIDB(isp));
diff --git a/dbcon/mysql/ha_mcs_partition.cpp b/dbcon/mysql/ha_mcs_partition.cpp
index 1b1d1352d..17ba58992 100644
--- a/dbcon/mysql/ha_mcs_partition.cpp
+++ b/dbcon/mysql/ha_mcs_partition.cpp
@@ -48,7 +48,6 @@ using namespace BRM;
 
 #include "dataconvert.h"
 using namespace dataconvert;
-#include "widedecimalutils.h"
 #include "ddlpkg.h"
 #include "sqlparser.h"
 using namespace ddlpackage;
@@ -66,6 +65,7 @@ using namespace logging;
 
 #include <boost/algorithm/string/case_conv.hpp>
 using namespace boost;
+#include "mcs_decimal.h"
 
 namespace
 {
@@ -238,12 +238,12 @@ struct PartitionInfo
     int64_t max;
     union
     {
-        __int128 bigMin;
+        int128_t int128Min;
         int64_t min_;
     };
     union
     {
-        __int128 bigMax;
+        int128_t int128Max;
         int64_t max_;
     };
     uint64_t status;
@@ -251,8 +251,8 @@ struct PartitionInfo
         max((uint64_t) - 0x8000000000000001LL),
         status(0)
     {
-        utils::int128Min(bigMin);
-        utils::int128Max(bigMax);
+        int128Min = datatypes::Decimal::minInt128;
+        int128Max = datatypes::Decimal::maxInt128;
     };
 };
 
@@ -312,8 +312,8 @@ const string format(T v, CalpontSystemCatalog::ColType& ct)
             }
             else
             {
-                char buf[utils::MAXLENGTH16BYTES];
-                DataConvert::decimalToString((__int128*)&v, (unsigned)ct.scale, buf, sizeof(buf), ct.colDataType);
+                char buf[datatypes::MAXLENGTH16BYTES];
+                DataConvert::decimalToString((int128_t*)&v, (unsigned)ct.scale, buf, sizeof(buf), ct.colDataType);
                 oss << buf;
             }
 
@@ -659,8 +659,8 @@ void partitionByValue_common(UDF_ARGS* args,								// input
     int32_t seqNum;
     string schema, table, column;
     CalpontSystemCatalog::ColType ct;
-    int64_t startVal, endVal;
-    int128_t bigStartVal, bigEndVal;
+    int64_t startVal = 0, endVal = 0;
+    int128_t int128StartVal = 0, int128EndVal = 0;
     uint8_t rfMin = 0, rfMax = 0;
 
     if (args->arg_count == 5)
@@ -748,11 +748,11 @@ void partitionByValue_common(UDF_ARGS* args,								// input
                 {
                     if (isUnsigned(ct.colDataType))
                     {
-                        bigStartVal = 0;
+                        int128StartVal = 0;
                     }
                     else
                     {
-                        utils::int128Min(bigStartVal);
+                        int128StartVal = datatypes::Decimal::minInt128;
                     }
                 }
             }
@@ -761,7 +761,7 @@ void partitionByValue_common(UDF_ARGS* args,								// input
                 if (!datatypes::Decimal::isWideDecimalType(ct))
                     startVal = IDB_format<int64_t>((char*) args->args[2], ct, rfMin);
                 else
-                    bigStartVal = IDB_format<int128_t>((char*) args->args[2], ct, rfMin);
+                    int128StartVal = IDB_format<int128_t>((char*) args->args[2], ct, rfMin);
             }
 
             if (!args->args[3])
@@ -781,11 +781,11 @@ void partitionByValue_common(UDF_ARGS* args,								// input
                 {
                     if (isUnsigned(ct.colDataType))
                     {
-                        bigEndVal = -1;
+                        int128EndVal = -1;
                     }
                     else
                     {
-                        utils::int128Max(bigEndVal);
+                        int128EndVal = datatypes::Decimal::maxInt128;
                     }
                 }
             }
@@ -794,7 +794,7 @@ void partitionByValue_common(UDF_ARGS* args,								// input
                 if (!datatypes::Decimal::isWideDecimalType(ct))
                     endVal = IDB_format<int64_t>((char*) args->args[3], ct, rfMax);
                 else
-                    bigEndVal = IDB_format<int128_t>((char*) args->args[3], ct, rfMax);
+                    int128EndVal = IDB_format<int128_t>((char*) args->args[3], ct, rfMax);
             }
         }
         else
@@ -816,11 +816,11 @@ void partitionByValue_common(UDF_ARGS* args,								// input
                 {
                     if (isUnsigned(ct.colDataType))
                     {
-                        bigStartVal = 0;
+                        int128StartVal = 0;
                     }
                     else
                     {
-                        utils::int128Min(bigStartVal);
+                        int128StartVal = datatypes::Decimal::minInt128;
                     }
                 }
             }
@@ -829,7 +829,7 @@ void partitionByValue_common(UDF_ARGS* args,								// input
                 if (!datatypes::Decimal::isWideDecimalType(ct))
                     startVal = IDB_format<int64_t>((char*) args->args[3], ct, rfMin);
                 else
-                    bigStartVal = IDB_format<int128_t>((char*) args->args[3], ct, rfMin);
+                    int128StartVal = IDB_format<int128_t>((char*) args->args[3], ct, rfMin);
             }
 
             if (!args->args[4])
@@ -849,11 +849,11 @@ void partitionByValue_common(UDF_ARGS* args,								// input
                 {
                     if (isUnsigned(ct.colDataType))
                     {
-                        bigEndVal = -1;
+                        int128EndVal = -1;
                     }
                     else
                     {
-                        utils::int128Max(bigEndVal);
+                        int128EndVal = datatypes::Decimal::maxInt128;
                     }
                 }
             }
@@ -862,7 +862,7 @@ void partitionByValue_common(UDF_ARGS* args,								// input
                 if (!datatypes::Decimal::isWideDecimalType(ct))
                     endVal = IDB_format<int64_t>((char*) args->args[4], ct, rfMax);
                 else
-                    bigEndVal = IDB_format<int128_t>((char*) args->args[4], ct, rfMax);
+                    int128EndVal = IDB_format<int128_t>((char*) args->args[4], ct, rfMax);
             }
         }
 
@@ -889,7 +889,7 @@ void partitionByValue_common(UDF_ARGS* args,								// input
                 if (!datatypes::Decimal::isWideDecimalType(ct))
                     state = em.getExtentMaxMin(iter->range.start, partInfo.max, partInfo.min, seqNum);
                 else
-                    state = em.getExtentMaxMin(iter->range.start, partInfo.bigMax, partInfo.bigMin, seqNum);
+                    state = em.getExtentMaxMin(iter->range.start, partInfo.int128Max, partInfo.int128Min, seqNum);
 
                 // char column order swap
                 if ((ct.colDataType == CalpontSystemCatalog::CHAR && ct.colWidth <= 8) ||
@@ -928,18 +928,8 @@ void partitionByValue_common(UDF_ARGS* args,								// input
                     }
                     else
                     {
-                        if (isUnsigned(ct.colDataType))
-                        {
-                            mapit->second.bigMin =
-                                (static_cast<uint128_t>(partInfo.bigMin) < static_cast<uint128_t>(mapit->second.bigMin) ? partInfo.bigMin : mapit->second.bigMin);
-                            mapit->second.bigMax =
-                                (static_cast<uint128_t>(partInfo.bigMax) > static_cast<uint128_t>(mapit->second.bigMax) ? partInfo.bigMax : mapit->second.bigMax);
-                        }
-                        else
-                        {
-                            mapit->second.bigMin = (partInfo.bigMin < mapit->second.bigMin ? partInfo.bigMin : mapit->second.bigMin);
-                            mapit->second.bigMax = (partInfo.bigMax > mapit->second.bigMax ? partInfo.bigMax : mapit->second.bigMax);
-                        }
+                        mapit->second.int128Min = (partInfo.int128Min < mapit->second.int128Min ? partInfo.int128Min : mapit->second.int128Min);
+                        mapit->second.int128Max = (partInfo.int128Max > mapit->second.int128Max ? partInfo.int128Max : mapit->second.int128Max);
                     }
                 }
             }
@@ -984,36 +974,16 @@ void partitionByValue_common(UDF_ARGS* args,								// input
                 }
                 else
                 {
-                    if (isUnsigned(ct.colDataType))
+                    if (!(mapit->second.status & CPINVALID) && mapit->second.int128Min >= int128StartVal && mapit->second.int128Max <= int128EndVal &&
+                            !(mapit->second.int128Min == datatypes::Decimal::maxInt128 && mapit->second.int128Max == datatypes::Decimal::minInt128))
                     {
-                        if (!(mapit->second.status & CPINVALID) &&
-                                static_cast<uint128_t>(mapit->second.bigMin) >= static_cast<uint128_t>(bigStartVal) &&
-                                static_cast<uint128_t>(mapit->second.bigMax) <= static_cast<uint128_t>(bigEndVal) &&
-                                !(static_cast<uint128_t>(mapit->second.bigMin) == static_cast<uint128_t>(-1) &&
-                                  static_cast<uint128_t>(mapit->second.bigMax == 0)))
-                        {
-                            if (rfMin == ROUND_POS && mapit->second.bigMin == bigStartVal)
-                                continue;
+                        if (rfMin == ROUND_POS && mapit->second.int128Min == int128StartVal)
+                            continue;
 
-                            if (rfMax == ROUND_NEG && mapit->second.bigMax == bigEndVal)
-                                continue;
+                        if (rfMax == ROUND_NEG && mapit->second.int128Max == int128EndVal)
+                            continue;
 
-                            partSet.insert(mapit->first);
-                        }
-                    }
-                    else
-                    {
-                        if (!(mapit->second.status & CPINVALID) && mapit->second.bigMin >= bigStartVal && mapit->second.bigMax <= bigEndVal &&
-                                !(mapit->second.bigMin == utils::maxInt128 && mapit->second.bigMax == utils::minInt128))
-                        {
-                            if (rfMin == ROUND_POS && mapit->second.bigMin == bigStartVal)
-                                continue;
-
-                            if (rfMax == ROUND_NEG && mapit->second.bigMax == bigEndVal)
-                                continue;
-
-                            partSet.insert(mapit->first);
-                        }
+                        partSet.insert(mapit->first);
                     }
                 }
             }
@@ -1239,7 +1209,7 @@ extern "C"
                     if (!datatypes::Decimal::isWideDecimalType(ct))
                         state = em.getExtentMaxMin(iter->range.start, partInfo.max, partInfo.min, seqNum);
                     else
-                        state = em.getExtentMaxMin(iter->range.start, partInfo.bigMax, partInfo.bigMin, seqNum);
+                        state = em.getExtentMaxMin(iter->range.start, partInfo.int128Max, partInfo.int128Min, seqNum);
 
                     // char column order swap for compare
                     if ((ct.colDataType == CalpontSystemCatalog::CHAR && ct.colWidth <= 8) ||
@@ -1268,8 +1238,8 @@ extern "C"
                         }
                         else
                         {
-                            mapit->second.bigMin = (partInfo.bigMin < mapit->second.bigMin ? partInfo.bigMin : mapit->second.bigMin);
-                            mapit->second.bigMax = (partInfo.bigMax > mapit->second.bigMax ? partInfo.bigMax : mapit->second.bigMax);
+                            mapit->second.int128Min = (partInfo.int128Min < mapit->second.int128Min ? partInfo.int128Min : mapit->second.int128Min);
+                            mapit->second.int128Max = (partInfo.int128Max > mapit->second.int128Max ? partInfo.int128Max : mapit->second.int128Max);
                         }
                     }
                 }
@@ -1299,19 +1269,16 @@ extern "C"
         else
         {
             output << setw(10) << "Part#"
-                   << setw(utils::MAXLENGTH16BYTES) << "Min"
-                   << setw(utils::MAXLENGTH16BYTES) << "Max" << "Status";
+                   << setw(datatypes::MAXLENGTH16BYTES) << "Min"
+                   << setw(datatypes::MAXLENGTH16BYTES) << "Max" << "Status";
         }
 
         int64_t maxLimit = numeric_limits<int64_t>::max();
         int64_t minLimit = numeric_limits<int64_t>::min();
 
-        __int128 bigMaxLimit, bigMinLimit;
-        utils::int128Max(bigMaxLimit);
-        utils::int128Min(bigMinLimit);
-        unsigned __int128 ubigMaxLimit, ubigMinLimit;
-        utils::uint128Max(ubigMaxLimit);
-        ubigMinLimit = 0;
+        int128_t int128MaxLimit, int128MinLimit;
+        int128MaxLimit = datatypes::Decimal::maxInt128;
+        int128MinLimit = datatypes::Decimal::minInt128;
 
         // char column order swap for compare in subsequent loop
         if ((ct.colDataType == CalpontSystemCatalog::CHAR && ct.colWidth <= 8) ||
@@ -1334,7 +1301,7 @@ extern "C"
                 if (!datatypes::Decimal::isWideDecimalType(ct))
                     output << setw(30) << "N/A" << setw(30) << "N/A";
                 else
-                    output << setw(utils::MAXLENGTH16BYTES) << "N/A" << setw(utils::MAXLENGTH16BYTES) << "N/A";
+                    output << setw(datatypes::MAXLENGTH16BYTES) << "N/A" << setw(datatypes::MAXLENGTH16BYTES) << "N/A";
             }
             else
             {
@@ -1350,11 +1317,11 @@ extern "C"
                     }
                     else
                     {
-                        if (static_cast<unsigned __int128>(partIt->second.bigMin) == ubigMaxLimit
-                                &&  static_cast<uint64_t>(partIt->second.bigMax) == ubigMinLimit)
-                            output << setw(utils::MAXLENGTH16BYTES) << "Empty/Null" << setw(utils::MAXLENGTH16BYTES) << "Empty/Null";
+                        if (partIt->second.int128Min == int128MaxLimit
+                                &&  partIt->second.int128Max == int128MinLimit)
+                            output << setw(datatypes::MAXLENGTH16BYTES) << "Empty/Null" << setw(datatypes::MAXLENGTH16BYTES) << "Empty/Null";
                         else
-                            output << setw(utils::MAXLENGTH16BYTES) << format(partIt->second.bigMin, ct) << setw(utils::MAXLENGTH16BYTES) << format(partIt->second.bigMax, ct);
+                            output << setw(datatypes::MAXLENGTH16BYTES) << format(partIt->second.int128Min, ct) << setw(datatypes::MAXLENGTH16BYTES) << format(partIt->second.int128Max, ct);
                     }
                 }
                 else
@@ -1368,10 +1335,10 @@ extern "C"
                     }
                     else
                     {
-                        if (partIt->second.bigMin == bigMaxLimit && partIt->second.bigMax == bigMinLimit)
-                            output << setw(utils::MAXLENGTH16BYTES) << "Empty/Null" << setw(utils::MAXLENGTH16BYTES) << "Empty/Null";
+                        if (partIt->second.int128Min == int128MaxLimit && partIt->second.int128Max == int128MinLimit)
+                            output << setw(datatypes::MAXLENGTH16BYTES) << "Empty/Null" << setw(datatypes::MAXLENGTH16BYTES) << "Empty/Null";
                         else
-                            output << setw(utils::MAXLENGTH16BYTES) << format(partIt->second.bigMin, ct) << setw(utils::MAXLENGTH16BYTES) << format(partIt->second.bigMax, ct);
+                            output << setw(datatypes::MAXLENGTH16BYTES) << format(partIt->second.int128Min, ct) << setw(datatypes::MAXLENGTH16BYTES) << format(partIt->second.int128Max, ct);
                     }
                 }
             }
@@ -1934,8 +1901,8 @@ extern "C"
         string schema, table, column;
         CalpontSystemCatalog::ColType ct;
         string errMsg;
-        int64_t startVal, endVal;
-        int128_t bigStartVal, bigEndVal;
+        int64_t startVal = 0, endVal = 0;
+        int128_t int128StartVal = 0, int128EndVal = 0;
         uint8_t rfMin = 0, rfMax = 0;
 
         try
@@ -2009,11 +1976,11 @@ extern "C"
                     {
                         if (isUnsigned(ct.colDataType))
                         {
-                            bigStartVal = 0;
+                            int128StartVal = 0;
                         }
                         else
                         {
-                            utils::int128Min(bigStartVal);
+                            int128StartVal = datatypes::Decimal::minInt128;
                         }
                     }
                 }
@@ -2022,7 +1989,7 @@ extern "C"
                     if (!datatypes::Decimal::isWideDecimalType(ct))
                         startVal = IDB_format<int64_t>((char*) args->args[2], ct, rfMin);
                     else
-                        bigStartVal = IDB_format<int128_t>((char*) args->args[2], ct, rfMin);
+                        int128StartVal = IDB_format<int128_t>((char*) args->args[2], ct, rfMin);
                 }
 
                 if (!args->args[3])
@@ -2042,11 +2009,11 @@ extern "C"
                     {
                         if (isUnsigned(ct.colDataType))
                         {
-                            bigEndVal = -1;
+                            int128EndVal = -1;
                         }
                         else
                         {
-                            utils::int128Max(bigEndVal);
+                            int128EndVal = datatypes::Decimal::maxInt128;
                         }
                     }
                 }
@@ -2055,7 +2022,7 @@ extern "C"
                     if (!datatypes::Decimal::isWideDecimalType(ct))
                         endVal = IDB_format<int64_t>((char*) args->args[3], ct, rfMax);
                     else
-                        bigEndVal = IDB_format<int128_t>((char*) args->args[3], ct, rfMax);
+                        int128EndVal = IDB_format<int128_t>((char*) args->args[3], ct, rfMax);
                 }
             }
             else
@@ -2077,11 +2044,11 @@ extern "C"
                     {
                         if (isUnsigned(ct.colDataType))
                         {
-                            bigStartVal = 0;
+                            int128StartVal = 0;
                         }
                         else
                         {
-                            utils::int128Min(bigStartVal);
+                            int128StartVal = datatypes::Decimal::minInt128;
                         }
                     }
                 }
@@ -2090,7 +2057,7 @@ extern "C"
                     if (!datatypes::Decimal::isWideDecimalType(ct))
                         startVal = IDB_format<int64_t>((char*) args->args[3], ct, rfMin);
                     else
-                        bigStartVal = IDB_format<int128_t>((char*) args->args[3], ct, rfMin);
+                        int128StartVal = IDB_format<int128_t>((char*) args->args[3], ct, rfMin);
                 }
 
                 if (!args->args[4])
@@ -2110,11 +2077,11 @@ extern "C"
                     {
                         if (isUnsigned(ct.colDataType))
                         {
-                            bigEndVal = -1;
+                            int128EndVal = -1;
                         }
                         else
                         {
-                            utils::int128Max(bigEndVal);
+                            int128EndVal = datatypes::Decimal::maxInt128;
                         }
                     }
                 }
@@ -2123,7 +2090,7 @@ extern "C"
                     if (!datatypes::Decimal::isWideDecimalType(ct))
                         endVal = IDB_format<int64_t>((char*) args->args[4], ct, rfMax);
                     else
-                        bigEndVal = IDB_format<int128_t>((char*) args->args[4], ct, rfMax);
+                        int128EndVal = IDB_format<int128_t>((char*) args->args[4], ct, rfMax);
                 }
             }
 
@@ -2152,7 +2119,7 @@ extern "C"
                     if (!datatypes::Decimal::isWideDecimalType(ct))
                         state = em.getExtentMaxMin(iter->range.start, partInfo.max, partInfo.min, seqNum);
                     else
-                        state = em.getExtentMaxMin(iter->range.start, partInfo.bigMax, partInfo.bigMin, seqNum);
+                        state = em.getExtentMaxMin(iter->range.start, partInfo.int128Max, partInfo.int128Min, seqNum);
 
                     // char column order swap
                     if ((ct.colDataType == CalpontSystemCatalog::CHAR && ct.colWidth <= 8) ||
@@ -2191,18 +2158,8 @@ extern "C"
                         }
                         else
                         {
-                            if (isUnsigned(ct.colDataType))
-                            {
-                                mapit->second.bigMin =
-                                    (static_cast<uint128_t>(partInfo.bigMin) < static_cast<uint128_t>(mapit->second.bigMin) ? partInfo.bigMin : mapit->second.bigMin);
-                                mapit->second.bigMax =
-                                    (static_cast<uint128_t>(partInfo.bigMax) > static_cast<uint128_t>(mapit->second.bigMax) ? partInfo.bigMax : mapit->second.bigMax);
-                            }
-                            else
-                            {
-                                mapit->second.bigMin = (partInfo.bigMin < mapit->second.bigMin ? partInfo.bigMin : mapit->second.bigMin);
-                                mapit->second.bigMax = (partInfo.bigMax > mapit->second.bigMax ? partInfo.bigMax : mapit->second.bigMax);
-                            }
+                            mapit->second.int128Min = (partInfo.int128Min < mapit->second.int128Min ? partInfo.int128Min : mapit->second.int128Min);
+                            mapit->second.int128Max = (partInfo.int128Max > mapit->second.int128Max ? partInfo.int128Max : mapit->second.int128Max);
                         }
                     }
                 }
@@ -2294,13 +2251,13 @@ extern "C"
             }
             else
             {
-                if (!(mapit->second.status & CPINVALID) && mapit->second.bigMin >= bigStartVal && mapit->second.bigMax <= bigEndVal &&
-                        !(mapit->second.bigMin == utils::maxInt128 && mapit->second.bigMax == utils::minInt128))
+                if (!(mapit->second.status & CPINVALID) && mapit->second.int128Min >= int128StartVal && mapit->second.int128Max <= int128EndVal &&
+                        !(mapit->second.int128Min == datatypes::Decimal::maxInt128 && mapit->second.int128Max == datatypes::Decimal::minInt128))
                 {
-                    if (rfMin == ROUND_POS && mapit->second.bigMin == bigStartVal)
+                    if (rfMin == ROUND_POS && mapit->second.int128Min == int128StartVal)
                         continue;
 
-                    if (rfMax == ROUND_NEG && mapit->second.bigMax == bigEndVal)
+                    if (rfMax == ROUND_NEG && mapit->second.int128Max == int128EndVal)
                         continue;
 
                     // print header
@@ -2308,8 +2265,8 @@ extern "C"
                     {
                         output.setf(ios::left, ios::adjustfield);
                         output << setw(10) << "Part#"
-                               << setw(utils::MAXLENGTH16BYTES) << "Min"
-                               << setw(utils::MAXLENGTH16BYTES) << "Max" << "Status";
+                               << setw(datatypes::MAXLENGTH16BYTES) << "Min"
+                               << setw(datatypes::MAXLENGTH16BYTES) << "Max" << "Status";
                     }
 
                     noPartFound = false;
@@ -2321,24 +2278,14 @@ extern "C"
 
                     if (mapit->second.status & CPINVALID)
                     {
-                        output << setw(utils::MAXLENGTH16BYTES) << "N/A" << setw(utils::MAXLENGTH16BYTES) << "N/A";
+                        output << setw(datatypes::MAXLENGTH16BYTES) << "N/A" << setw(datatypes::MAXLENGTH16BYTES) << "N/A";
                     }
                     else
                     {
-                        if ((isUnsigned(ct.colDataType)))
-                        {
-                            if (static_cast<uint128_t>(mapit->second.bigMin) > static_cast<uint128_t>(mapit->second.bigMax))
-                                output << setw(utils::MAXLENGTH16BYTES) << "Empty/Null" << setw(utils::MAXLENGTH16BYTES) << "Empty/Null";
-                            else
-                                output << setw(utils::MAXLENGTH16BYTES) << format(mapit->second.bigMin, ct) << setw(utils::MAXLENGTH16BYTES) << format(mapit->second.bigMax, ct);
-                        }
+                        if (mapit->second.int128Min > mapit->second.int128Max)
+                            output << setw(datatypes::MAXLENGTH16BYTES) << "Empty/Null" << setw(datatypes::MAXLENGTH16BYTES) << "Empty/Null";
                         else
-                        {
-                            if (mapit->second.bigMin > mapit->second.bigMax)
-                                output << setw(utils::MAXLENGTH16BYTES) << "Empty/Null" << setw(utils::MAXLENGTH16BYTES) << "Empty/Null";
-                            else
-                                output << setw(utils::MAXLENGTH16BYTES) << format(mapit->second.bigMin, ct) << setw(utils::MAXLENGTH16BYTES) << format(mapit->second.bigMax, ct);
-                        }
+                            output << setw(datatypes::MAXLENGTH16BYTES) << format(mapit->second.int128Min, ct) << setw(datatypes::MAXLENGTH16BYTES) << format(mapit->second.int128Max, ct);
                     }
 
                     if (mapit->second.status & ET_DISABLED)
diff --git a/utils/common/any.hpp b/utils/common/any.hpp
index 78c65ffb2..7ae5d553e 100755
--- a/utils/common/any.hpp
+++ b/utils/common/any.hpp
@@ -9,7 +9,7 @@
  * http://www.boost.org/LICENSE_1_0.txt
  */
 
-#include  <stdint.h>
+#include <stdint.h>
 #include <stdexcept>
 #include <cstring>
 
@@ -123,7 +123,14 @@ namespace anyimpl
         typedef void type;
     };
 
-    /// Specializations for small types.
+    /// Specializations for big types.
+#define BIG_POLICY(TYPE) template<> struct \
+   choose_policy<TYPE> { typedef big_any_policy<TYPE> type; };
+
+   BIG_POLICY(__int128);
+   BIG_POLICY(unsigned __int128);
+   
+   /// Specializations for small types.
 #define SMALL_POLICY(TYPE) template<> struct \
    choose_policy<TYPE> { typedef small_any_policy<TYPE> type; };
 
diff --git a/utils/funcexp/func_char.cpp b/utils/funcexp/func_char.cpp
index f303bb8d3..70320c2b1 100644
--- a/utils/funcexp/func_char.cpp
+++ b/utils/funcexp/func_char.cpp
@@ -95,7 +95,7 @@ string Func_char::getStrVal(Row& row,
     buf[0]= 0;
     char* pBuf = buf;
     CHARSET_INFO* cs = ct.getCharset();
-    int32_t value;
+    int32_t value = 0;
     int32_t numBytes = 0;
     for (uint32_t i = 0; i < parm.size(); ++i)
     {
diff --git a/utils/rowgroup/rowaggregation.cpp b/utils/rowgroup/rowaggregation.cpp
index 491b4ef27..04fe593ef 100755
--- a/utils/rowgroup/rowaggregation.cpp
+++ b/utils/rowgroup/rowaggregation.cpp
@@ -239,16 +239,20 @@ const static_any::any& RowAggregation::shortTypeId((short)1);
 const static_any::any& RowAggregation::intTypeId((int)1);
 const static_any::any& RowAggregation::longTypeId((long)1);
 const static_any::any& RowAggregation::llTypeId((long long)1);
+const static_any::any& RowAggregation::int128TypeId((__int128)1);
 const static_any::any& RowAggregation::ucharTypeId((unsigned char)1);
 const static_any::any& RowAggregation::ushortTypeId((unsigned short)1);
 const static_any::any& RowAggregation::uintTypeId((unsigned int)1);
 const static_any::any& RowAggregation::ulongTypeId((unsigned long)1);
 const static_any::any& RowAggregation::ullTypeId((unsigned long long)1);
+const static_any::any& RowAggregation::uint128TypeId((unsigned __int128)1);
 const static_any::any& RowAggregation::floatTypeId((float)1);
 const static_any::any& RowAggregation::doubleTypeId((double)1);
 const static_any::any& RowAggregation::longdoubleTypeId((long double)1);
 const static_any::any& RowAggregation::strTypeId(typeStr);
 
+using Dec = datatypes::Decimal;
+
 KeyStorage::KeyStorage(const RowGroup& keys, Row** tRow) : tmpRow(tRow), rg(keys)
 {
     RGData data(rg);
@@ -619,9 +623,6 @@ RowAggregation::RowAggregation(const RowAggregation& rhs):
     fSmallSideRGs(NULL), fLargeSideRG(NULL), fSmallSideCount(0),
     fRGContext(rhs.fRGContext), fOrigFunctionCols(NULL)
 {
-    //fGroupByCols.clear();
-    //fFunctionCols.clear();
-
     fGroupByCols.assign(rhs.fGroupByCols.begin(), rhs.fGroupByCols.end());
     fFunctionCols.assign(rhs.fFunctionCols.begin(), rhs.fFunctionCols.end());
 }
@@ -721,31 +722,32 @@ void RowAggregation::setJoinRowGroups(vector<RowGroup>* pSmallSideRG, RowGroup*
 // threads on the PM and by multple threads on the UM. It must remain
 // thread safe.
 //------------------------------------------------------------------------------
-void RowAggregation::resetUDAF(RowUDAFFunctionCol* rowUDAF)
+void RowAggregation::resetUDAF(RowUDAFFunctionCol* rowUDAF, uint64_t funcColsIdx)
 {
     // RowAggregation and it's functions need to be re-entrant which means
     // each instance (thread) needs its own copy of the context object.
     // Note: operator=() doesn't copy userData.
-    fRGContext = rowUDAF->fUDAFContext;
+    fRGContextColl[funcColsIdx] = rowUDAF->fUDAFContext;
 
     // Call the user reset for the group userData. Since, at this point,
     // context's userData will be NULL, reset will generate a new one.
     mcsv1sdk::mcsv1_UDAF::ReturnCode rc;
-    rc = fRGContext.getFunction()->reset(&fRGContext);
+    rc = fRGContextColl[funcColsIdx].getFunction()->reset(&fRGContextColl[funcColsIdx]);
 
     if (rc == mcsv1sdk::mcsv1_UDAF::ERROR)
     {
         rowUDAF->bInterrupted = true;
-        throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr);
+        throw logging::QueryDataExcept(fRGContextColl[funcColsIdx].getErrorMessage(),
+                                       logging::aggregateFuncErr);
     }
 
     fRow.setUserDataStore(fRowGroupOut->getRGData()->getUserDataStore());
-    fRow.setUserData(fRGContext,
-                     fRGContext.getUserDataSP(),
-                     fRGContext.getUserDataSize(),
+    fRow.setUserData(fRGContextColl[funcColsIdx],
+                     fRGContextColl[funcColsIdx].getUserDataSP(),
+                     fRGContextColl[funcColsIdx].getUserDataSize(),
                      rowUDAF->fAuxColumnIndex);
-
-    fRGContext.setUserData(NULL); // Prevents calling deleteUserData on the fRGContext.
+    // Prevents calling deleteUserData on the mcsv1Context.
+    fRGContextColl[funcColsIdx].setUserData(NULL);
 }
 
 //------------------------------------------------------------------------------
@@ -784,13 +786,15 @@ void RowAggregation::initialize()
     {
         fRowGroupOut->setRowCount(1);
         attachGroupConcatAg();
-
+        // Lazy approach w/o a mapping b/w fFunctionCols idx and fRGContextColl idx
+        fRGContextColl.resize(fFunctionCols.size());
         // For UDAF, reset the data
         for (uint64_t i = 0; i < fFunctionCols.size(); i++)
         {
             if (fFunctionCols[i]->fAggFunction == ROWAGG_UDAF)
             {
-                resetUDAF(dynamic_cast<RowUDAFFunctionCol*>(fFunctionCols[i].get()));
+                auto rowUDAFColumnPtr = dynamic_cast<RowUDAFFunctionCol*>(fFunctionCols[i].get());
+                resetUDAF(rowUDAFColumnPtr, i);
             }
         }
     }
@@ -842,7 +846,8 @@ void RowAggregation::aggReset()
     {
         if (fFunctionCols[i]->fAggFunction == ROWAGG_UDAF)
         {
-            resetUDAF(dynamic_cast<RowUDAFFunctionCol*>(fFunctionCols[i].get()));
+            auto rowUDAFColumnPtr = dynamic_cast<RowUDAFFunctionCol*>(fFunctionCols[i].get());
+            resetUDAF(rowUDAFColumnPtr, i);
         }
     }
 }
@@ -897,7 +902,8 @@ void RowAggregationUM::aggregateRowWithRemap(Row& row)
             {
                 if ((*fOrigFunctionCols)[i]->fAggFunction == ROWAGG_UDAF)
                 {
-                    resetUDAF(dynamic_cast<RowUDAFFunctionCol*>((*fOrigFunctionCols)[i].get()));
+                    auto rowUDAFColumnPtr = dynamic_cast<RowUDAFFunctionCol*>((*fOrigFunctionCols)[i].get());
+                    resetUDAF(rowUDAFColumnPtr, i);
                 }
             }
         }
@@ -907,7 +913,8 @@ void RowAggregationUM::aggregateRowWithRemap(Row& row)
             {
                 if (fFunctionCols[i]->fAggFunction == ROWAGG_UDAF)
                 {
-                    resetUDAF(dynamic_cast<RowUDAFFunctionCol*>(fFunctionCols[i].get()));
+                    auto rowUDAFColumnPtr = dynamic_cast<RowUDAFFunctionCol*>(fFunctionCols[i].get());
+                    resetUDAF(rowUDAFColumnPtr, i);
                 }
             }
         }
@@ -972,7 +979,8 @@ void RowAggregation::aggregateRow(Row& row)
                 {
                     if ((*fOrigFunctionCols)[i]->fAggFunction == ROWAGG_UDAF)
                     {
-                        resetUDAF(dynamic_cast<RowUDAFFunctionCol*>((*fOrigFunctionCols)[i].get()));
+                        auto rowUDAFColumnPtr = dynamic_cast<RowUDAFFunctionCol*>((*fOrigFunctionCols)[i].get());
+                        resetUDAF(rowUDAFColumnPtr, i);
                     }
                 }
             }
@@ -982,7 +990,8 @@ void RowAggregation::aggregateRow(Row& row)
                 {
                     if (fFunctionCols[i]->fAggFunction == ROWAGG_UDAF)
                     {
-                        resetUDAF(dynamic_cast<RowUDAFFunctionCol*>(fFunctionCols[i].get()));
+                        auto rowUDAFColumnPtr = dynamic_cast<RowUDAFFunctionCol*>(fFunctionCols[i].get());
+                        resetUDAF(rowUDAFColumnPtr, i);
                     }
                 }
             }
@@ -1033,7 +1042,7 @@ void RowAggregation::initMapData(const Row& rowIn)
             case execplan::CalpontSystemCatalog::DECIMAL:
             case execplan::CalpontSystemCatalog::UDECIMAL:
             {
-                if (LIKELY(fRow.getColumnWidth(colIn) == datatypes::MAXDECIMALWIDTH))
+                if (LIKELY(rowIn.getColumnWidth(colIn) == datatypes::MAXDECIMALWIDTH))
                 {
                     uint32_t colOutOffset = fRow.getOffset(colOut);
                     fRow.setBinaryField_offset(
@@ -1041,7 +1050,7 @@ void RowAggregation::initMapData(const Row& rowIn)
                         sizeof(int128_t),
                         colOutOffset);
                 }
-                else if (fRow.getColumnWidth(colIn) <= datatypes::MAXLEGACYWIDTH)
+                else if (rowIn.getColumnWidth(colIn) <= datatypes::MAXLEGACYWIDTH)
                 {
                     fRow.setIntField(rowIn.getIntField(colIn), colOut);
                 }
@@ -2025,9 +2034,24 @@ void RowAggregation::doStatistics(const Row& rowIn, int64_t colIn, int64_t colOu
         case execplan::CalpontSystemCatalog::MEDINT:
         case execplan::CalpontSystemCatalog::INT:
         case execplan::CalpontSystemCatalog::BIGINT:
+            valIn = (long double) rowIn.getIntField(colIn);
+            break;
+
         case execplan::CalpontSystemCatalog::DECIMAL:   // handle scale later
         case execplan::CalpontSystemCatalog::UDECIMAL:  // handle scale later
-            valIn = (long double) rowIn.getIntField(colIn);
+            if (LIKELY(fRowGroupIn.getColumnWidth(colIn) == datatypes::MAXDECIMALWIDTH))
+            {
+                int128_t* val128InPtr = rowIn.getBinaryField<int128_t>(colIn);
+                valIn = Dec::getLongDoubleFromWideDecimal(*val128InPtr);
+            }
+            else if (fRowGroupIn.getColumnWidth(colIn) <= datatypes::MAXLEGACYWIDTH)
+            {
+                valIn = (long double) rowIn.getIntField(colIn);
+            }
+            else
+            {
+                idbassert(false);
+            }
             break;
 
         case execplan::CalpontSystemCatalog::UTINYINT:
@@ -2068,7 +2092,10 @@ void RowAggregation::doStatistics(const Row& rowIn, int64_t colIn, int64_t colOu
 void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut,
                             int64_t colAux, uint64_t& funcColsIdx)
 {
-    uint32_t paramCount = fRGContext.getParameterCount();
+    uint32_t paramCount = fRGContextColl[funcColsIdx].getParameterCount();
+    // doUDAF changes funcColsIdx to skip UDAF arguments so the real UDAF
+    // column idx is the initial value of the funcColsIdx
+    uint64_t origFuncColsIdx = funcColsIdx;
     // The vector of parameters to be sent to the UDAF
     utils::VLArray<mcsv1sdk::ColumnDatum> valsIn(paramCount);
     utils::VLArray<uint32_t> dataFlags(paramCount);
@@ -2094,7 +2121,7 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut,
         if ((cc && cc->type() == execplan::ConstantColumn::NULLDATA)
                 ||  (!cc && isNull(&fRowGroupIn, rowIn, colIn) == true))
         {
-            if (fRGContext.getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS))
+            if (fRGContextColl[origFuncColsIdx].getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS))
             {
                 // When Ignore nulls, if there are multiple parameters and any 
                 // one of them is NULL, we ignore the entry. We need to increment
@@ -2139,7 +2166,6 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut,
                         datum.scale = fRowGroupIn.getScale()[colIn];
                         datum.precision = fRowGroupIn.getPrecision()[colIn];
                     }
-
                     break;
                 }
 
@@ -2156,7 +2182,19 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut,
                     }
                     else
                     {
-                        datum.columnData = rowIn.getIntField(colIn);
+                        if (LIKELY(fRowGroupIn.getColumnWidth(colIn)
+                            == datatypes::MAXDECIMALWIDTH))
+                        {
+                            datum.columnData = rowIn.getInt128Field(colIn);
+                        }
+                        else if (fRowGroupIn.getColumnWidth(colIn) <= datatypes::MAXLEGACYWIDTH)
+                        {
+                            datum.columnData = rowIn.getIntField(colIn);
+                        }
+                        else
+                        {
+                            idbassert(false);
+                        }
                         datum.scale = fRowGroupIn.getScale()[colIn];
                         datum.precision = fRowGroupIn.getPrecision()[colIn];
                     }
@@ -2322,7 +2360,7 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut,
                 default:
                 {
                     std::ostringstream errmsg;
-                    errmsg << "RowAggregation " << fRGContext.getName() <<
+                    errmsg << "RowAggregation " << fRGContextColl[origFuncColsIdx].getName() <<
                            ": No logic for data type: " << colDataType;
                     throw logging::QueryDataExcept(errmsg.str(), logging::aggregateFuncErr);
                     break;
@@ -2348,18 +2386,20 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut,
     }
 
     // The intermediate values are stored in userData referenced by colAux.
-    fRGContext.setDataFlags(dataFlags);
-    fRGContext.setUserData(fRow.getUserData(colAux));
+    fRGContextColl[origFuncColsIdx].setDataFlags(dataFlags);
+    fRGContextColl[origFuncColsIdx].setUserData(fRow.getUserData(colAux));
 
     mcsv1sdk::mcsv1_UDAF::ReturnCode rc;
-    rc = fRGContext.getFunction()->nextValue(&fRGContext, valsIn);
-    fRGContext.setUserData(NULL);
+    rc = fRGContextColl[origFuncColsIdx].getFunction()->nextValue(&fRGContextColl[origFuncColsIdx],
+                                                              valsIn);
+    fRGContextColl[origFuncColsIdx].setUserData(NULL);
 
     if (rc == mcsv1sdk::mcsv1_UDAF::ERROR)
     {
-        RowUDAFFunctionCol* rowUDAF = dynamic_cast<RowUDAFFunctionCol*>(fFunctionCols[funcColsIdx].get());
+        RowUDAFFunctionCol* rowUDAF = dynamic_cast<RowUDAFFunctionCol*>(fFunctionCols[origFuncColsIdx].get());
         rowUDAF->bInterrupted = true;
-        throw logging::QueryDataExcept(fRGContext.getErrorMessage(), logging::aggregateFuncErr);
+        throw logging::QueryDataExcept(fRGContextColl[origFuncColsIdx].getErrorMessage(),
+                                       logging::aggregateFuncErr);
     }
 }
 
@@ -2758,11 +2798,11 @@ void RowAggregationUM::SetUDAFValue(static_any::any& valOut, int64_t colOut)
         return;
     }
 
-    int64_t intOut = 0;
-    uint64_t uintOut = 0;
-    float floatOut = 0.0;
-    double doubleOut = 0.0;
-    long double longdoubleOut = 0.0;
+    int64_t intOut;
+    uint64_t uintOut;
+    float floatOut;
+    double doubleOut;
+    long double longdoubleOut;
     ostringstream oss;
     std::string strOut;
 
@@ -2829,6 +2869,12 @@ void RowAggregationUM::SetUDAFValue(static_any::any& valOut, int64_t colOut)
                 fRow.setIntField<8>(intOut, colOut);
                 bSetSuccess = true;
             }
+            else if (valOut.compatible(int128TypeId))
+            {
+                int128_t int128Out = valOut.cast<int128_t>();
+                fRow.setInt128Field(int128Out, colOut);
+                bSetSuccess = true;
+            }
 
             break;
 
@@ -2952,6 +2998,8 @@ void RowAggregationUM::SetUDAFValue(static_any::any& valOut, int64_t colOut)
 
     if (!bSetSuccess)
     {
+        // This means the return from the UDAF doesn't match the field
+        // This handles the mismatch
         SetUDAFAnyValue(valOut, colOut);
     }
 }
@@ -2964,104 +3012,102 @@ void RowAggregationUM::SetUDAFAnyValue(static_any::any& valOut, int64_t colOut)
     // that they didn't set in mcsv1_UDAF::init(), but this
     // handles whatever return type is given and casts
     // it to whatever they said to return.
-    // TODO: Save cpu cycles here.
+    // TODO: Save cpu cycles here. For one, we don't need to initialize these
+
     int64_t intOut = 0;
     uint64_t uintOut = 0;
-    float floatOut = 0.0;
     double doubleOut = 0.0;
     long double longdoubleOut = 0.0;
+    int128_t int128Out = 0;
     ostringstream oss;
     std::string strOut;
 
     if (valOut.compatible(charTypeId))
     {
-        uintOut = intOut  = valOut.cast<char>();
-        floatOut = intOut;
+        int128Out = uintOut = intOut  = valOut.cast<char>();
+        doubleOut = intOut;
         oss << intOut;
     }
     else if (valOut.compatible(scharTypeId))
     {
-        uintOut = intOut = valOut.cast<signed char>();
-        floatOut = intOut;
+        int128Out = uintOut = intOut = valOut.cast<signed char>();
+        doubleOut = intOut;
         oss << intOut;
     }
     else if (valOut.compatible(shortTypeId))
     {
-        uintOut = intOut = valOut.cast<short>();
-        floatOut = intOut;
+        int128Out = uintOut = intOut = valOut.cast<short>();
+        doubleOut = intOut;
         oss << intOut;
     }
     else if (valOut.compatible(intTypeId))
     {
-        uintOut = intOut = valOut.cast<int>();
-        floatOut = intOut;
+        int128Out = uintOut = intOut = valOut.cast<int>();
+        doubleOut = intOut;
         oss << intOut;
     }
     else if (valOut.compatible(longTypeId))
     {
-        uintOut = intOut = valOut.cast<long>();
-        floatOut = intOut;
+        int128Out = uintOut = intOut = valOut.cast<long>();
+        doubleOut = intOut;
         oss << intOut;
     }
     else if (valOut.compatible(llTypeId))
     {
-        uintOut = intOut = valOut.cast<long long>();
-        floatOut = intOut;
+        int128Out = uintOut = intOut = valOut.cast<long long>();
+        doubleOut = intOut;
         oss << intOut;
     }
     else if (valOut.compatible(ucharTypeId))
     {
-        intOut = uintOut = valOut.cast<unsigned char>();
-        floatOut = uintOut;
+        int128Out = intOut = uintOut = valOut.cast<unsigned char>();
+        doubleOut = uintOut;
         oss << uintOut;
     }
     else if (valOut.compatible(ushortTypeId))
     {
-        intOut = uintOut = valOut.cast<unsigned short>();
-        floatOut = uintOut;
+        int128Out = intOut = uintOut = valOut.cast<unsigned short>();
+        doubleOut = uintOut;
         oss << uintOut;
     }
     else if (valOut.compatible(uintTypeId))
     {
-        intOut = uintOut = valOut.cast<unsigned int>();
-        floatOut = uintOut;
+        int128Out = intOut = uintOut = valOut.cast<unsigned int>();
+        doubleOut = uintOut;
         oss << uintOut;
     }
     else if (valOut.compatible(ulongTypeId))
     {
-        intOut = uintOut = valOut.cast<unsigned long>();
-        floatOut = uintOut;
+        int128Out = intOut = uintOut = valOut.cast<unsigned long>();
+        doubleOut = uintOut;
         oss << uintOut;
     }
     else if (valOut.compatible(ullTypeId))
     {
-        intOut = uintOut = valOut.cast<unsigned long long>();
-        floatOut = uintOut;
+        int128Out = intOut = uintOut = valOut.cast<unsigned long long>();
+        doubleOut = uintOut;
         oss << uintOut;
     }
-    else if (valOut.compatible(floatTypeId))
+    else if (valOut.compatible(int128TypeId))
     {
-        floatOut = valOut.cast<float>();
-        doubleOut = floatOut;
-        longdoubleOut = doubleOut;
-        intOut = uintOut = floatOut;
-        oss << floatOut;
+        intOut = uintOut = int128Out = valOut.cast<int128_t>();
+        doubleOut = uintOut;
+        oss << uintOut;
     }
-    else if (valOut.compatible(doubleTypeId))
+    else if (valOut.compatible(floatTypeId) || valOut.compatible(doubleTypeId))
     {
-        doubleOut = valOut.cast<double>();
-        longdoubleOut = doubleOut;
-        floatOut = (float)doubleOut;
-        uintOut = (uint64_t)doubleOut;
-        intOut = (int64_t)doubleOut;
+        // Should look at scale for decimal and adjust
+        doubleOut = valOut.cast<float>();
+        int128Out = doubleOut;
+        intOut = uintOut = doubleOut;
         oss << doubleOut;
     }
-
     else if (valOut.compatible(longdoubleTypeId))
     {
+        // Should look at scale for decimal and adjust
         longdoubleOut = valOut.cast<long double>();
+        int128Out = longdoubleOut;
         doubleOut = (double)longdoubleOut;
-        floatOut = (float)doubleOut;
         uintOut = (uint64_t)doubleOut;
         intOut = (int64_t)doubleOut;
         oss << doubleOut;
@@ -3075,7 +3121,7 @@ void RowAggregationUM::SetUDAFAnyValue(static_any::any& valOut, int64_t colOut)
         uintOut = strtoul(strOut.c_str(), NULL, 10);
         doubleOut = strtod(strOut.c_str(), NULL);
         longdoubleOut = strtold(strOut.c_str(), NULL);
-        floatOut = (float)doubleOut;
+        int128Out = longdoubleOut;
     }
     else
     {
@@ -3099,11 +3145,20 @@ void RowAggregationUM::SetUDAFAnyValue(static_any::any& valOut, int64_t colOut)
             break;
 
         case execplan::CalpontSystemCatalog::BIGINT:
-        case execplan::CalpontSystemCatalog::DECIMAL:
-        case execplan::CalpontSystemCatalog::UDECIMAL:
             fRow.setIntField<8>(intOut, colOut);
             break;
 
+        case execplan::CalpontSystemCatalog::DECIMAL:
+        case execplan::CalpontSystemCatalog::UDECIMAL:
+        {
+            uint32_t width = fRowGroupOut->getColumnWidth(colOut);
+            if (width == datatypes::MAXDECIMALWIDTH)
+                fRow.setInt128Field(int128Out, colOut);
+            else
+                fRow.setIntField<8>(intOut, colOut);
+            break;
+        }
+
         case execplan::CalpontSystemCatalog::UTINYINT:
             fRow.setUintField<1>(uintOut, colOut);
             break;
@@ -3135,8 +3190,11 @@ void RowAggregationUM::SetUDAFAnyValue(static_any::any& valOut, int64_t colOut)
 
         case execplan::CalpontSystemCatalog::FLOAT:
         case execplan::CalpontSystemCatalog::UFLOAT:
+        {
+            float floatOut = (float)doubleOut;
             fRow.setFloatField(floatOut, colOut);
             break;
+        }
 
         case execplan::CalpontSystemCatalog::DOUBLE:
         case execplan::CalpontSystemCatalog::UDOUBLE:
@@ -3407,10 +3465,28 @@ void RowAggregationUM::doNullConstantAggregate(const ConstantAggData& aggData, u
                 case execplan::CalpontSystemCatalog::MEDINT:
                 case execplan::CalpontSystemCatalog::INT:
                 case execplan::CalpontSystemCatalog::BIGINT:
+                {
+                    fRow.setIntField(getIntNullValue(colDataType), colOut);
+                }
+                break;
+
                 case execplan::CalpontSystemCatalog::DECIMAL:
                 case execplan::CalpontSystemCatalog::UDECIMAL:
                 {
-                    fRow.setIntField(getIntNullValue(colDataType), colOut);
+                    auto width = fRow.getColumnWidth(colOut);
+                    if (fRow.getColumnWidth(colOut) == datatypes::MAXDECIMALWIDTH)
+                    {
+                        fRow.setInt128Field(datatypes::Decimal128Null, colOut);
+                    }
+                    else if (width <= datatypes::MAXLEGACYWIDTH)
+                    {
+                        fRow.setIntField(getIntNullValue(colDataType), colOut);
+                    }
+                    else
+                    {
+                        idbassert(0);
+                        throw std::logic_error("RowAggregationUM::doNullConstantAggregate(): DECIMAL bad length.");
+                    }
                 }
                 break;
 
@@ -3570,7 +3646,7 @@ void RowAggregationUM::doNullConstantAggregate(const ConstantAggData& aggData, u
 void RowAggregationUM::doNotNullConstantAggregate(const ConstantAggData& aggData, uint64_t i)
 {
     int64_t colOut = fFunctionCols[i]->fOutputColumnIndex;
-    int colDataType = (fRowGroupOut->getColTypes())[colOut];
+    auto colDataType = (fRowGroupOut->getColTypes())[colOut];
     int64_t rowCnt = fRow.getIntField(fFunctionCols[i]->fAuxColumnIndex);
 
     switch (aggData.fOp)
@@ -3592,7 +3668,7 @@ void RowAggregationUM::doNotNullConstantAggregate(const ConstantAggData& aggData
                 {
                     fRow.setIntField(strtol(aggData.fConstValue.c_str(), 0, 10), colOut);
                 }
-                break;
+                break;                    
 
                 // AVG should not be uint32_t result type.
                 case execplan::CalpontSystemCatalog::UTINYINT:
@@ -3608,9 +3684,27 @@ void RowAggregationUM::doNotNullConstantAggregate(const ConstantAggData& aggData
                 case execplan::CalpontSystemCatalog::DECIMAL:
                 case execplan::CalpontSystemCatalog::UDECIMAL:
                 {
-                    double dbl = strtod(aggData.fConstValue.c_str(), 0);
-                    double scale = pow(10.0, (double) fRowGroupOut->getScale()[i]);
-                    fRow.setIntField((int64_t)(scale * dbl), colOut);
+                    auto width = fRow.getColumnWidth(colOut);
+                    if (width == datatypes::MAXDECIMALWIDTH)
+                    {
+                        ColTypeAlias colType;
+                        colType.colWidth = width;
+                        colType.precision = fRow.getPrecision(i);
+                        colType.scale = fRow.getScale(i);
+                        colType.colDataType =  colDataType;
+                        fRow.setInt128Field(Dec::int128FromString(aggData.fConstValue, colType), colOut);
+                    }
+                    else if (width <= datatypes::MAXLEGACYWIDTH)
+                    {
+                        double dbl = strtod(aggData.fConstValue.c_str(), 0);
+                        double scale = pow(10.0, (double) fRowGroupOut->getScale()[i]);
+                        fRow.setIntField((int64_t)(scale * dbl), colOut);
+                    }
+                    else
+                    {
+                        idbassert(0);
+                        throw std::logic_error("RowAggregationUM::doNotNullConstantAggregate(): DECIMAL bad length.");
+                    }
                 }
                 break;
 
@@ -3715,15 +3809,39 @@ void RowAggregationUM::doNotNullConstantAggregate(const ConstantAggData& aggData
                 case execplan::CalpontSystemCatalog::DECIMAL:
                 case execplan::CalpontSystemCatalog::UDECIMAL:
                 {
-                    double dbl = strtod(aggData.fConstValue.c_str(), 0);
-                    dbl *= pow(10.0, (double) fRowGroupOut->getScale()[i]);
-                    dbl *= rowCnt;
+                    auto width = fRow.getColumnWidth(colOut);
+                    if (width == datatypes::MAXDECIMALWIDTH)
+                    {
+                        ColTypeAlias colType;
+                        colType.colWidth = width;
+                        colType.precision = fRow.getPrecision(i);
+                        colType.scale = fRow.getScale(i);
+                        colType.colDataType =  colDataType;
+                        int128_t constValue = Dec::int128FromString(aggData.fConstValue,
+                                                             colType);
+                        int128_t sum;
 
-                    if ((dbl > 0 && dbl > (double) numeric_limits<int64_t>::max()) ||
-                            (dbl < 0 && dbl < (double) numeric_limits<int64_t>::min()))
-                        throw logging::QueryDataExcept(overflowMsg, logging::aggregateDataErr);
+                        datatypes::MultiplicationOverflowCheck multOp;
+                        multOp(constValue, rowCnt, sum);
+                        fRow.setInt128Field(sum, colOut);
+                    }
+                    else if (width == datatypes::MAXLEGACYWIDTH)
+                    {
+                        double dbl = strtod(aggData.fConstValue.c_str(), 0);
+                        dbl *= pow(10.0, (double) fRowGroupOut->getScale()[i]);
+                        dbl *= rowCnt;
+ 
+                        if ((dbl > 0 && dbl > (double) numeric_limits<int64_t>::max()) ||
+                                (dbl < 0 && dbl < (double) numeric_limits<int64_t>::min()))
+                            throw logging::QueryDataExcept(overflowMsg, logging::aggregateDataErr);
+                        fRow.setIntField((int64_t) dbl, colOut);
+                    }
+                    else
+                    {
+                        idbassert(0);
+                        throw std::logic_error("RowAggregationUM::doNotNullConstantAggregate(): sum() DECIMAL bad length.");
+                    }
 
-                    fRow.setIntField((int64_t) dbl, colOut);
                 }
                 break;
 
@@ -4459,7 +4577,7 @@ void RowAggregationUMP2::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut,
 
     if (!userDataIn)
     {
-        if (fRGContext.getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS))
+        if (fRGContextColl[funcColsIdx].getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS))
         {
             return;
         }
@@ -4468,14 +4586,14 @@ void RowAggregationUMP2::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut,
         flags[0] |= mcsv1sdk::PARAM_IS_NULL;
     }
 
-    fRGContext.setDataFlags(flags);
+    fRGContextColl[funcColsIdx].setDataFlags(flags);
 
     // The intermediate values are stored in colAux.
-    fRGContext.setUserData(fRow.getUserData(colAux));
+    fRGContextColl[funcColsIdx].setUserData(fRow.getUserData(colAux));
 
     // Call the UDAF subEvaluate method
     mcsv1sdk::mcsv1_UDAF::ReturnCode rc;
-    rc = fRGContext.getFunction()->subEvaluate(&fRGContext, userDataIn.get());
+    rc = fRGContextColl[funcColsIdx].getFunction()->subEvaluate(&fRGContextColl[funcColsIdx], userDataIn.get());
     fRGContext.setUserData(NULL);
 
     if (rc == mcsv1sdk::mcsv1_UDAF::ERROR)
diff --git a/utils/rowgroup/rowaggregation.h b/utils/rowgroup/rowaggregation.h
index 3c138b1cf..7fb4a10dc 100644
--- a/utils/rowgroup/rowaggregation.h
+++ b/utils/rowgroup/rowaggregation.h
@@ -645,6 +645,7 @@ protected:
     }
 
     void resetUDAF(RowUDAFFunctionCol* rowUDAF);
+    void resetUDAF(RowUDAFFunctionCol* rowUDAF, uint64_t funcColIdx);
 
     inline bool isNull(const RowGroup* pRowGroup, const Row& row, int64_t col);
     inline void makeAggFieldsNull(Row& row);
@@ -710,6 +711,7 @@ protected:
 
     // We need a separate copy for each thread.
     mcsv1sdk::mcsv1Context fRGContext;
+    std::vector<mcsv1sdk::mcsv1Context> fRGContextColl;
     
     // These are handy for testing the actual type of static_any for UDAF
     static const static_any::any& charTypeId;
@@ -718,10 +720,12 @@ protected:
     static const static_any::any& intTypeId;
     static const static_any::any& longTypeId;
     static const static_any::any& llTypeId;
+    static const static_any::any& int128TypeId;
     static const static_any::any& ucharTypeId;
     static const static_any::any& ushortTypeId;
     static const static_any::any& uintTypeId;
     static const static_any::any& ulongTypeId;
+    static const static_any::any& uint128TypeId;
     static const static_any::any& ullTypeId;
     static const static_any::any& floatTypeId;
     static const static_any::any& doubleTypeId;
diff --git a/utils/udfsdk/mcsv1_udaf.cpp b/utils/udfsdk/mcsv1_udaf.cpp
index f23d53c52..2ad8905a8 100755
--- a/utils/udfsdk/mcsv1_udaf.cpp
+++ b/utils/udfsdk/mcsv1_udaf.cpp
@@ -278,13 +278,13 @@ const static_any::any& mcsv1_UDAF::shortTypeId((short)1);
 const static_any::any& mcsv1_UDAF::intTypeId((int)1);
 const static_any::any& mcsv1_UDAF::longTypeId((long)1);
 const static_any::any& mcsv1_UDAF::llTypeId((long long)1);
-const static_any::any& mcsv1_UDAF::int128TypeId((int128_t)1);
+const static_any::any& mcsv1_UDAF::int128TypeId((__int128)1);
 const static_any::any& mcsv1_UDAF::ucharTypeId((unsigned char)1);
 const static_any::any& mcsv1_UDAF::ushortTypeId((unsigned short)1);
 const static_any::any& mcsv1_UDAF::uintTypeId((unsigned int)1);
 const static_any::any& mcsv1_UDAF::ulongTypeId((unsigned long)1);
 const static_any::any& mcsv1_UDAF::ullTypeId((unsigned long long)1);
-const static_any::any& mcsv1_UDAF::uint128TypeId((uint128_t)1);
+const static_any::any& mcsv1_UDAF::uint128TypeId((unsigned __int128)1);
 const static_any::any& mcsv1_UDAF::floatTypeId((float)1);
 const static_any::any& mcsv1_UDAF::doubleTypeId((double)1);
 const static_any::any& mcsv1_UDAF::strTypeId(typeStr);
diff --git a/utils/windowfunction/wf_udaf.cpp b/utils/windowfunction/wf_udaf.cpp
index 62d4a2714..75075b778 100644
--- a/utils/windowfunction/wf_udaf.cpp
+++ b/utils/windowfunction/wf_udaf.cpp
@@ -544,13 +544,13 @@ void WF_udaf::SetUDAFValue(static_any::any& valOut, int64_t colOut,
     static const static_any::any& intTypeId = (int)1;
     static const static_any::any& longTypeId = (long)1;
     static const static_any::any& llTypeId = (long long)1;
-    static const static_any::any& int128TypeId = (int128_t)1;
+    static const static_any::any& int128TypeId = (__int128)1;
     static const static_any::any& ucharTypeId = (unsigned char)1;
     static const static_any::any& ushortTypeId = (unsigned short)1;
     static const static_any::any& uintTypeId = (unsigned int)1;
     static const static_any::any& ulongTypeId = (unsigned long)1;
     static const static_any::any& ullTypeId = (unsigned long long)1;
-    static const static_any::any& uint128TypeId = (uint128_t)1;
+    static const static_any::any& uint128TypeId = (unsigned __int128)1;
     static const static_any::any& floatTypeId = (float)1;
     static const static_any::any& doubleTypeId = (double)1;
     static const std::string typeStr("");