MCOL-4361 Replace pow(10.0, (double)scale) expressions with a static dictionary lookup.

2025-07-30 19:23:07 +03:00 · 2021-04-09 12:14:41 +04:00
parent fd720bfd7d
commit 362bfcd15e
13 changed files with 153 additions and 132 deletions
--- a/datatypes/mcs_decimal.h
+++ b/datatypes/mcs_decimal.h
@ -195,6 +195,14 @@ public:
 };


+template<typename T>
+T applySignedScale(const T & val, int32_t scale)
+{
+    return scale < 0 ?
+           val / datatypes::scaleDivisor<T>((uint32_t) -scale) :
+           val * datatypes::scaleDivisor<T>((uint32_t) scale);
+}
+
 /**
    @brief The function to produce scale multiplier/divisor for
    wide decimals.
@ -267,6 +275,14 @@ public:
    {
        return DecomposedDecimal<int64_t>(value, scale).toSIntCeil();
    }
+
+    // Convert to an arbitrary floating point data type,
+    // e.g. float, double, long double
+    template<typename T>
+    T toXFloat(uint32_t scale) const
+    {
+        return (T) value / scaleDivisor<T>(scale);
+    }
 };


@ -590,6 +606,11 @@ class Decimal: public TDecimal128, public TDecimal64
            return TDecimal64::toUInt64Round((uint32_t) scale);
        }

+        template<typename T> T decimal64ToXFloat() const
+        {
+             return TDecimal64::toXFloat<T>((uint32_t) scale);
+        }
+
        int64_t toSInt64Round() const
        {
            return isWideDecimalTypeByPrecision(precision) ?
--- a/dbcon/execplan/simplecolumn_decimal.h
+++ b/dbcon/execplan/simplecolumn_decimal.h
@ -166,7 +166,9 @@ inline int64_t SimpleColumn_Decimal<len>:: getIntVal(rowgroup::Row& row, bool& i
    if (row.equals<len>(fNullVal, fInputIndex))
        isNull = true;

-    return (int64_t)(row.getIntField<len>(fInputIndex) / pow((double)10, fResultType.scale));
+    // TODO: fix double division to integer division
+    return (int64_t)(row.getIntField<len>(fInputIndex) /
+                     datatypes::scaleDivisor<double>(fResultType.scale));
 }

 template<int len>
@ -175,7 +177,8 @@ inline float SimpleColumn_Decimal<len>::getFloatVal(rowgroup::Row& row, bool& is
    if (row.equals<len>(fNullVal, fInputIndex))
        isNull = true;

-    return (row.getIntField<len>(fInputIndex) / pow((double)10, fResultType.scale));
+    return (row.getIntField<len>(fInputIndex) /
+            datatypes::scaleDivisor<double>(fResultType.scale));
 }

 template<int len>
@ -184,7 +187,8 @@ inline double SimpleColumn_Decimal<len>::getDoubleVal(rowgroup::Row& row, bool&
    if (row.equals<len>(fNullVal, fInputIndex))
        isNull = true;

-    return (row.getIntField<len>(fInputIndex) / pow((double)10, fResultType.scale));
+    return (row.getIntField<len>(fInputIndex) /
+            datatypes::scaleDivisor<double>(fResultType.scale));
 }

 template<int len>
@ -193,7 +197,8 @@ inline long double SimpleColumn_Decimal<len>::getLongDoubleVal(rowgroup::Row& ro
    if (row.equals<len>(fNullVal, fInputIndex))
        isNull = true;

-    return (row.getIntField<len>(fInputIndex) / pow((double)10, fResultType.scale));
+    return (row.getIntField<len>(fInputIndex) /
+            datatypes::scaleDivisor<long double>(fResultType.scale));
 }

 template<int len>
--- a/dbcon/execplan/treenode.h
+++ b/dbcon/execplan/treenode.h
@ -828,7 +828,7 @@ inline float TreeNode::getFloatVal()
            }
            else
            {
-                return (fResult.decimalVal.value / pow((double)10, fResult.decimalVal.scale));
+                return (float) fResult.decimalVal.decimal64ToXFloat<double>();
            }
        }

@ -903,8 +903,7 @@ inline double TreeNode::getDoubleVal()
            }
            else
            {
-                // this may not be accurate. if this is problematic, change to pre-calculated power array.
-                return (double)(fResult.decimalVal.value / pow((double)10, fResult.decimalVal.scale));
+                return fResult.decimalVal.decimal64ToXFloat<double>();
            }
        }

@ -979,8 +978,7 @@ inline long double TreeNode::getLongDoubleVal()
            }
            else
            {
-                // this may not be accurate. if this is problematic, change to pre-calculated power array.
-                return (long double)(fResult.decimalVal.value / pow((long double)10, fResult.decimalVal.scale));
+                return fResult.decimalVal.decimal64ToXFloat<long double>();
            }
        }

--- a/dbcon/joblist/tupleunion.cpp
+++ b/dbcon/joblist/tupleunion.cpp
@ -479,45 +479,22 @@ void TupleUnion::normalize(const Row& in, Row* out)
                    case CalpontSystemCatalog::FLOAT:
                    case CalpontSystemCatalog::UFLOAT:
                    {
-                        int scale = in.getScale(i);
-
-                        if (scale != 0)
-                        {
-                            float f = in.getIntField(i);
-                            f /= (uint64_t) pow(10.0, scale);
-                            out->setFloatField(f, i);
-                        }
-                        else
-                            out->setFloatField(in.getIntField(i), i);
-
+                        auto d = in.getScaledSInt64FieldAsXFloat<double>(i);
+                        out->setFloatField((float) d, i);
                        break;
                    }

                    case CalpontSystemCatalog::DOUBLE:
                    case CalpontSystemCatalog::UDOUBLE:
                    {
-                        int scale = in.getScale(i);
-
-                        if (scale != 0)
-                        {
-                            double d = in.getIntField(i);
-                            d /= (uint64_t) pow(10.0, scale);
-                            out->setDoubleField(d, i);
-                        }
-                        else
-                            out->setDoubleField(in.getIntField(i), i);
-
+                        auto d = in.getScaledSInt64FieldAsXFloat<double>(i);
+                        out->setDoubleField(d, i);
                        break;
                    }

                    case CalpontSystemCatalog::LONGDOUBLE:
                    {
-                        int scale = in.getScale(i);
-                        long double d = in.getIntField(i);
-                        if (scale != 0)
-                        {
-                            d /= (uint64_t) pow(10.0, scale);
-                        }
+                        auto d = in.getScaledSInt64FieldAsXFloat<long double>(i);
                        out->setLongDoubleField(d, i);
                        break;
                    }
@ -526,13 +503,30 @@ void TupleUnion::normalize(const Row& in, Row* out)
                    case CalpontSystemCatalog::UDECIMAL:
                    {
 dec1:
-                        uint64_t val = in.getIntField(i);
                        int diff = out->getScale(i) - in.getScale(i);
-
-                        if (diff < 0)
-                            val /= (uint64_t) pow((double) 10, (double) - diff);
-                        else
-                            val *= (uint64_t) pow((double) 10, (double) diff);
+                        /*
+                           Signed INT to XDecimal
+                           TODO: there are a few problems here:
+                           - This code is not wide decimal friendly.
+                             `uint64_t val` can overflow when applying a positive scale
+                             It's not possible to make a reproducible bug report
+                             at the moment: MCOL-4612 and MCOL-4613 should be fixed first.
+                           - Using uint64_t is wrong here. The data type of "in" field is
+                             signed. In case of a negative diff, the result will be wrong,
+                             because division (unlike multiplication) is sensitive to
+                             the signess of the operands.
+                             Perhaps diff cannot be negative and we can put an assert for it.
+                             Anyway, it's safer to change `uint64_t val` to `int64_t val`.
+                           - This code does not handle overflow that may happen on
+                             scale multiplication (MCOL-4613). Instead of returning a garbage
+                             we should probably apply saturation here. In long terms we
+                             should implement DECIMAL(65,x) to avoid overflow completely
+                             (so the UNION between DECIMAL and integer can choose a proper
+                              DECIMAL(M,N) result data type to guarantee that any incoming
+                              integer value can fit into it).
+                        */
+                        // TODO: isn't overflow possible below?
+                        uint64_t val = datatypes::applySignedScale<uint64_t>(in.getIntField(i), diff);

                        if (out->getColumnWidth(i) == datatypes::MAXDECIMALWIDTH)
                            out->setInt128Field(val, i);
@ -606,50 +600,23 @@ dec1:
                    case CalpontSystemCatalog::FLOAT:
                    case CalpontSystemCatalog::UFLOAT:
                    {
-                        int scale = in.getScale(i);
-
-                        if (scale != 0)
-                        {
-                            float f = in.getUintField(i);
-                            f /= (uint64_t) pow(10.0, scale);
-                            out->setFloatField(f, i);
-                        }
-                        else
-                            out->setFloatField(in.getUintField(i), i);
-
+                        auto d = in.getScaledUInt64FieldAsXFloat<double>(i);
+                        out->setFloatField((float) d, i);
                        break;
                    }

                    case CalpontSystemCatalog::DOUBLE:
                    case CalpontSystemCatalog::UDOUBLE:
                    {
-                        int scale = in.getScale(i);
-
-                        if (scale != 0)
-                        {
-                            double d = in.getUintField(i);
-                            d /= (uint64_t) pow(10.0, scale);
-                            out->setDoubleField(d, i);
-                        }
-                        else
-                            out->setDoubleField(in.getUintField(i), i);
-
+                        auto d = in.getScaledUInt64FieldAsXFloat<double>(i);
+                        out->setDoubleField(d, i);
                        break;
                    }

                    case CalpontSystemCatalog::LONGDOUBLE:
                    {
-                        int scale = in.getScale(i);
-
-                        if (scale != 0)
-                        {
-                            long double d = in.getUintField(i);
-                            d /= (uint64_t) pow(10.0, scale);
-                            out->setLongDoubleField(d, i);
-                        }
-                        else
-                            out->setLongDoubleField(in.getUintField(i), i);
-
+                        auto d = in.getScaledUInt64FieldAsXFloat<long double>(i);
+                        out->setLongDoubleField(d, i);
                        break;
                    }

@ -657,13 +624,16 @@ dec1:
                    case CalpontSystemCatalog::UDECIMAL:
                    {
 dec2:
-                        uint64_t val = in.getIntField(i);
                        int diff = out->getScale(i) - in.getScale(i);
-
-                        if (diff < 0)
-                            val /= (uint64_t) pow((double) 10, (double) - diff);
-                        else
-                            val *= (uint64_t) pow((double) 10, (double) diff);
+                        /*
+                          Unsigned INT to XDecimal
+                          TODO: There are a few problems here:
+                          - It should use in.getUintField() instead of in.getIntField()
+                          - All problems mentioned in the code under label "dec1:" are
+                            also applicable here.
+                        */
+                        // TODO: isn't overflow possible below?
+                        uint64_t val = datatypes::applySignedScale<uint64_t>(in.getIntField(i), diff);

                        if (out->getColumnWidth(i) == datatypes::MAXDECIMALWIDTH)
                            out->setInt128Field(val, i);
@ -989,13 +959,15 @@ dec2:
                    {
 dec3:                   /* have to pick a scale to use for the double. using 5... */
                        uint32_t scale = 5;
-                        uint64_t ival = (uint64_t) (double) (val * pow((double) 10, (double) scale));
+                        uint64_t ival = (uint64_t) (double) (val * datatypes::scaleDivisor<double>(scale));
                        int diff = out->getScale(i) - scale;
-
-                        if (diff < 0)
-                            ival /= (uint64_t) pow((double) 10, (double) - diff);
-                        else
-                            ival *= (uint64_t) pow((double) 10, (double) diff);
+                        // xFLOAT or xDOUBLE to xDECIMAL conversion. Is it really possible?
+                        // TODO:
+                        // Perhaps we should add an assert here that this combination is not possible
+                        // In the current reduction all problems mentioned in the code under
+                        //  label "dec1:" are also applicable here.
+                        // TODO: isn't overflow possible below?
+                        ival = datatypes::applySignedScale<uint64_t>(ival, diff);

                        if (out->getColumnWidth(i) == datatypes::MAXDECIMALWIDTH)
                            out->setInt128Field(ival, i);
@ -1070,13 +1042,15 @@ dec3:                   /* have to pick a scale to use for the double. using 5..
                    {
 dec4:                   /* have to pick a scale to use for the double. using 5... */
                        uint32_t scale = 5;
-                        uint64_t ival = (uint64_t) (double) (val * pow((double) 10, (double) scale));
+                        uint64_t ival = (uint64_t) (double) (val * datatypes::scaleDivisor<double>(scale));
                        int diff = out->getScale(i) - scale;

-                        if (diff < 0)
-                            ival /= (uint64_t) pow((double) 10, (double) - diff);
-                        else
-                            ival *= (uint64_t) pow((double) 10, (double) diff);
+                        // LONGDOUBLE to xDECIMAL conversions: is it really possible?
+                        // TODO:
+                        // Perhaps we should add an assert here that this combination is not possible
+                        // In the current reduction all problems mentioned in the code under
+                        //  label "dec1:" are also applicable here.
+                        ival = datatypes::applySignedScale<uint64_t>(ival, diff);

                        if (out->getColumnWidth(i) == datatypes::MAXDECIMALWIDTH)
                            out->setInt128Field(ival, i);
--- a/dbcon/mysql/ha_mcs_execplan.cpp
+++ b/dbcon/mysql/ha_mcs_execplan.cpp
@ -4463,8 +4463,8 @@ ConstantColumn* buildDecimalColumn(Item* item, gp_walk_info& gwi)
    if (gwi.internalDecimalScale >= 0 && idp->decimals > (uint)gwi.internalDecimalScale)
    {
        columnstore_decimal.scale = gwi.internalDecimalScale;
-        double val = (double)(columnstore_decimal.value / pow((double)10, idp->decimals - gwi.internalDecimalScale));
-        columnstore_decimal.value = (int64_t)(val > 0 ? val + 0.5 : val - 0.5);
+        uint32_t diff = (uint32_t) (idp->decimals - gwi.internalDecimalScale);
+        columnstore_decimal.value= columnstore_decimal.TDecimal64::toSInt64Round(diff);
    }
    else
        columnstore_decimal.scale = idp->decimal_scale();
--- a/primitives/primproc/filtercommand.cpp
+++ b/primitives/primproc/filtercommand.cpp
@ -97,8 +97,8 @@ Command* FilterCommand::makeFilterCommand(ByteStream& bs, vector<SCommand>& cmds

        ColumnCommand* cmd0 = dynamic_cast<ColumnCommand*>(cmds[nc - 2].get());
        ColumnCommand* cmd1 = dynamic_cast<ColumnCommand*>(cmds[nc - 1].get());
-        int scale0 = cmd0->getScale();
-        int scale1 = cmd1->getScale();
+        uint32_t scale0 = cmd0->getScale();
+        uint32_t scale1 = cmd1->getScale();

        // char[] is stored as int, but cannot directly compare if length is different
        // due to endian issue
@ -117,7 +117,8 @@ Command* FilterCommand::makeFilterCommand(ByteStream& bs, vector<SCommand>& cmds
        else
        {
            ScaledFilterCmd* sc = new ScaledFilterCmd();
-            sc->setFactor(pow(10.0, scale1) / pow(10.0, scale0));
+            sc->setFactor(datatypes::scaleDivisor<double>(scale1) /
+                          datatypes::scaleDivisor<double>(scale0));
            fc = sc;
        }

--- a/utils/funcexp/func_cast.cpp
+++ b/utils/funcexp/func_cast.cpp
@ -1519,7 +1519,7 @@ double Func_cast_decimal::getDoubleVal(Row& row,
        return static_cast<double>(decimal);
    }

-    return (double) decimal.value / helpers::powerOf10_c[decimal.scale];
+    return decimal.decimal64ToXFloat<double>();
 }


@ -1631,7 +1631,7 @@ double Func_cast_double::getDoubleVal(Row& row,
            }
            else
            {
-                dblval = (double)(decimal.value / pow((double)10, decimal.scale));
+                dblval = decimal.decimal64ToXFloat<double>();
            }
        }
        break;
--- a/utils/regr/moda.cpp
+++ b/utils/regr/moda.cpp
@ -248,7 +248,7 @@ mcsv1_UDAF::ReturnCode Moda_impl_T<T>::nextValue(mcsv1Context* context, ColumnDa

        if (val != 0 && scale > 0)
        {
-            val /= pow(10.0, (double)scale);
+            val /= datatypes::scaleDivisor<double>(scale);
        }
    }
    
--- a/utils/rowgroup/rowaggregation.cpp
+++ b/utils/rowgroup/rowaggregation.cpp
@ -1458,12 +1458,8 @@ void RowAggregation::doSum(const Row& rowIn, int64_t colIn, int64_t colOut, int
            }
            else if (width <= datatypes::MAXLEGACYWIDTH)
            {
-                valIn = rowIn.getIntField(colIn);
-                double scale = (double)(fRowGroupIn.getScale())[colIn];
-                if (valIn != 0 && scale > 0)
-                {
-                    valIn /= pow(10.0, scale);
-                }
+                uint32_t scale = fRowGroupIn.getScale()[colIn];
+                valIn = rowIn.getScaledSInt64FieldAsXFloat<long double>(colIn, scale);
            }
            else
            {
@ -1938,12 +1934,8 @@ void RowAggregation::doAvg(const Row& rowIn, int64_t colIn, int64_t colOut, int6
            }
            else if (width <= datatypes::MAXLEGACYWIDTH)
            {
-                valIn = rowIn.getIntField(colIn);
-                double scale = (double)(fRowGroupIn.getScale())[colIn];
-                if (valIn != 0 && scale > 0)
-                {
-                    valIn /= pow(10.0, scale);
-                }
+                uint32_t scale = fRowGroupIn.getScale()[colIn];
+                valIn = rowIn.getScaledSInt64FieldAsXFloat<long double>(colIn, scale);
            }
            else
            {
@ -3366,8 +3358,8 @@ void RowAggregationUM::calculateStatisticsFunctions()
                    long double sum1 = fRow.getLongDoubleField(colAux);
                    long double sum2 = fRow.getLongDoubleField(colAux + 1);

-                    int scale = fRow.getScale(colOut);
-                    long double factor = pow(10.0, scale);
+                    uint32_t scale = fRow.getScale(colOut);
+                    auto factor = datatypes::scaleDivisor<long double>(scale);

                    if (scale != 0) // adjust the scale if necessary
                    {
@ -3732,7 +3724,8 @@ void RowAggregationUM::doNotNullConstantAggregate(const ConstantAggData& aggData
                    else if (width <= datatypes::MAXLEGACYWIDTH)
                    {
                        double dbl = strtod(aggData.fConstValue.c_str(), 0);
-                        double scale = pow(10.0, (double) fRowGroupOut->getScale()[i]);
+                        auto scale = datatypes::scaleDivisor<double>(fRowGroupOut->getScale()[i]);
+                        // TODO: isn't overflow possible below:
                        fRow.setIntField((int64_t)(scale * dbl), colOut);
                    }
                    else
@ -3862,7 +3855,8 @@ void RowAggregationUM::doNotNullConstantAggregate(const ConstantAggData& aggData
                    else if (width == datatypes::MAXLEGACYWIDTH)
                    {
                        double dbl = strtod(aggData.fConstValue.c_str(), 0);
-                        dbl *= pow(10.0, (double) fRowGroupOut->getScale()[i]);
+                        // TODO: isn't precision loss possible below?
+                        dbl *= datatypes::scaleDivisor<double>(fRowGroupOut->getScale()[i]);
                        dbl *= rowCnt;
 
                        if ((dbl > 0 && dbl > (double) numeric_limits<int64_t>::max()) ||
@ -4077,9 +4071,9 @@ void RowAggregationUM::doNotNullConstantAggregate(const ConstantAggData& aggData
                case execplan::CalpontSystemCatalog::UDECIMAL:
                {
                    double dbl = strtod(aggData.fConstValue.c_str(), 0);
-                    double scale = pow(10.0, (double) fRowGroupOut->getScale()[i]);
-                    datum.columnData = (int64_t)(scale * dbl);
-                    datum.scale = scale;
+                    // TODO: isn't overflow possible below?
+                    datum.columnData = (int64_t) (dbl * datatypes::scaleDivisor<double>(fRowGroupOut->getScale()[i]));
+                    datum.scale = fRowGroupOut->getScale()[i];
                    datum.precision = fRowGroupOut->getPrecision()[i];
                }
                break;
@ -4454,12 +4448,8 @@ void RowAggregationUMP2::doAvg(const Row& rowIn, int64_t colIn, int64_t colOut,
            }
            else if (width <= datatypes::MAXLEGACYWIDTH)
            {
-                valIn = rowIn.getIntField(colIn);
-                double scale = (double)(fRowGroupIn.getScale())[colIn];
-                if (valIn != 0 && scale > 0)
-                {
-                    valIn /= pow(10.0, scale);
-                }
+                uint32_t scale = fRowGroupIn.getScale()[colIn];
+                valIn = rowIn.getScaledSInt64FieldAsXFloat<long double>(colIn, scale);
            }
            else
            {
--- a/utils/rowgroup/rowgroup.h
+++ b/utils/rowgroup/rowgroup.h
@ -376,6 +376,38 @@ public:
    inline uint64_t getUintField(uint32_t colIndex) const;
    template<int len> inline int64_t getIntField(uint32_t colIndex) const;
    inline int64_t getIntField(uint32_t colIndex) const;
+    // Get a signed 64-bit integer column value, convert to the given
+    // floating point data type T (e.g. float, double, long double)
+    // and divide it according to the scale.
+    template<typename T>
+    inline T getScaledSInt64FieldAsXFloat(uint32_t colIndex, uint32_t scale) const
+    {
+        const T d = getIntField(colIndex);
+        if (!scale)
+            return d;
+        return d / datatypes::scaleDivisor<T>(scale);
+    }
+    template<typename T>
+    inline T getScaledSInt64FieldAsXFloat(uint32_t colIndex) const
+    {
+        return getScaledSInt64FieldAsXFloat<T>(colIndex, getScale(colIndex));
+    }
+    // Get an unsigned 64-bit integer column value, convert to the given
+    // floating point data type T (e.g. float, double, long double)
+    // and divide it according to the scale.
+    template<typename T>
+    inline T getScaledUInt64FieldAsXFloat(uint32_t colIndex, uint32_t scale) const
+    {
+        const T d = getUintField(colIndex);
+        if (!scale)
+            return d;
+        return d / datatypes::scaleDivisor<T>(scale);
+    }
+    template<typename T>
+    inline T getScaledUInt64FieldAsXFloat(uint32_t colIndex) const
+    {
+        return getScaledUInt64FieldAsXFloat<T>(colIndex, getScale(colIndex));
+    }
    template<typename T>
    inline bool equals(T* value, uint32_t colIndex) const;
    template<int len> inline bool equals(uint64_t val, uint32_t colIndex) const;
--- a/utils/udfsdk/mcsv1_udaf.h
+++ b/utils/udfsdk/mcsv1_udaf.h
@ -633,7 +633,7 @@ protected:
    {
        double val = convertAnyTo<double>(datum.columnData);
        if (val != 0 && datum.scale > 0)
-            val /= pow(10.0, (double) datum.scale);
+            val /= datatypes::scaleDivisor<double>(datum.scale);
        return val;
    }

--- a/utils/udfsdk/udfsdk.cpp
+++ b/utils/udfsdk/udfsdk.cpp
@ -181,16 +181,16 @@ double MCS_add::getDoubleVal(Row& row,
            else if (op1.scale >= op2.scale)
            {
                dec.scale = op2.scale;
-                op1.value *= (int64_t)pow((double)10, op1.scale - op2.scale);
+                op1.value *= datatypes::scaleDivisor<int64_t>((uint32_t) (op1.scale - op2.scale));
            }
            else
            {
                dec.scale = op1.scale;
-                op2.value *= (int64_t)pow((double)10, op2.scale - op1.scale);
+                op2.value *= datatypes::scaleDivisor<int64_t>((uint32_t) (op2.scale - op1.scale));
            }

            dec.value = op1.value + op2.value;
-            return (double)(dec.value / pow((double)10, dec.scale));
+            return dec.decimal64ToXFloat<double>();
        }

        default:
--- a/utils/windowfunction/wf_stats.cpp
+++ b/utils/windowfunction/wf_stats.cpp
@ -191,8 +191,8 @@ void WF_stats<T>::operator()(int64_t b, int64_t e, int64_t c)

        if (fCount > 1)
        {
-            int scale = fRow.getScale(colIn);
-            long double factor = pow(10.0, scale);
+            uint32_t scale = fRow.getScale(colIn);
+            auto factor = datatypes::scaleDivisor<long double>(scale);
            long double ldSum1 = fSum1;
            long double ldSum2 = fSum2;