MCOL-4531 New string-to-decimal conversion implementation

This change fixes: MCOL-4462 CAST(varchar_expr AS DECIMAL(M,N)) returns a wrong result MCOL-4500 Bit functions processing throws internally trying to cast char into decimal representation MCOL-4532 CAST(AS DECIMAL) returns a garbage for large values Also, this change makes string-to-decimal conversion 5-10 times faster, depending on exact data. Performance implemenent is achieved by the fact that (unlike in the old implementation), the new version does not do any "string" object copying.
2025-08-08 14:22:09 +03:00 · 2021-01-28 21:44:41 +04:00
parent de581897c9
commit 69da915160
11 changed files with 1150 additions and 239 deletions
--- a/utils/funcexp/func_cast.cpp
+++ b/utils/funcexp/func_cast.cpp
@@ -40,6 +40,7 @@ using namespace rowgroup;
 using namespace logging;

 #include "dataconvert.h"
+#include "numericliteral.h"
 using namespace dataconvert;
 #include "collation.h"

@@ -1512,241 +1513,15 @@ IDB_Decimal Func_cast_decimal::getDecimalVal(Row& row,
        case execplan::CalpontSystemCatalog::TEXT:
        {
            const string& strValue = parm[0]->data()->getStrVal(row, isNull);
-            const char* str = strValue.c_str();
-            const char* s;
-            const char* firstInt = NULL;
-            char*       endptr = NULL;
-            char        fracBuf[20];
-            int         fracChars;
-            int         negate = 1;
-            bool        bFoundSign = false;
-            bool        bRound = false;
-
            if (strValue.empty())
            {
                isNull = true;
                return IDB_Decimal();  // need a null value for IDB_Decimal??
            }
-
-            decimal.scale = decimals;
-            decimal.value = 0;
-
-            // Look for scientific notation. The existence of an 'e' indicates it probably is.
-            for (s = str; *s; ++s)  // This is faster than two finds.
-            {
-                if (*s == 'e' || *s == 'E')
-                {
-                    if (decimal.isTSInt128ByPrecision())
-                    {
-                        // it is worth to parse the exponent first to detect an overflow
-                        bool dummy = false;
-                        char *ep = NULL;
-                        int128_t max_number_decimal = dataconvert::strtoll128(columnstore_big_precision[max_length - 19].c_str(), dummy, &ep);
-
-                        int128_t scaleDivisor;
-                        datatypes::getScaleDivisor(scaleDivisor, decimals);
-                        float128_t floatValue = datatypes::TFloat128::fromString(strValue);
-
-                        // If the float value is too large, the saturated result may end up with
-                        // the wrong sign, so we just check first.
-                        if ((int128_t)floatValue > max_number_decimal)
-                            decimal.s128Value = max_number_decimal;
-                        else if ((int128_t)floatValue < -max_number_decimal)
-                            decimal.s128Value = -max_number_decimal;
-                        else if (floatValue > 0)
-                            decimal.s128Value = (int128_t) (floatValue * scaleDivisor + 0.5);
-                        else if (floatValue < 0)
-                            decimal.s128Value = (int128_t) (floatValue * scaleDivisor - 0.5);
-                        else
-                            decimal.s128Value = 0;
-
-                        if (decimal.s128Value > max_number_decimal)
-                            decimal.s128Value = max_number_decimal;
-                        else if (decimal.s128Value < -max_number_decimal)
-                            decimal.s128Value = -max_number_decimal;
-
-                        return decimal;
-                    }
-                    else
-                    {
-                        int64_t max_number_decimal = helpers::maxNumber_c[max_length];
-
-                        double floatValue = strtod(str, 0);
-
-                        // If the float value is too large, the saturated result may end up with
-                        // the wrong sign, so we just check first.
-                        if ((int64_t)floatValue > max_number_decimal)
-                            decimal.value = max_number_decimal;
-                        else if ((int64_t)floatValue < -max_number_decimal)
-                            decimal.value = -max_number_decimal;
-                        else if (floatValue > 0)
-                            decimal.value = (int64_t) (floatValue * helpers::powerOf10_c[decimals] + 0.5);
-                        else if (floatValue < 0)
-                            decimal.value = (int64_t) (floatValue * helpers::powerOf10_c[decimals] - 0.5);
-                        else
-                            decimal.value = 0;
-
-                        if (decimal.value > max_number_decimal)
-                            decimal.value = max_number_decimal;
-                        else if (decimal.value < -max_number_decimal)
-                            decimal.value = -max_number_decimal;
-
-                        return decimal;
-                    }
-                }
-            }
-
-            // There are cases (such as "-.95" that should return that may not result in the desired rounding.
-            // By stripping the sign and adding it back in later, we can get a more accurate answer.
-            for (s = str; *s; ++s)
-            {
-                if (*s == '-')
-                {
-                    if (bFoundSign) // If we find a duplicate sign char, it's an error.
-                    {
-                        return decimal;
-                    }
-
-                    bFoundSign = true;
-                    negate = -1;
-                }
-                else if (*s == '+')
-                {
-                    if (bFoundSign)
-                    {
-                        return decimal;
-                    }
-
-                    bFoundSign = true;
-                }
-                else if (*s == *convData->decimal_point || *s == '.')
-                {
-                    // If we find a decimal point, that means there's no leading integer. (like ".99")
-                    // In this case we need to mark where we are.
-                    endptr = const_cast<char*>(s);
-                    break;
-                }
-                else if (isdigit(*s))
-                {
-                    firstInt = s;
-                    break;
-                }
-            }
-
-            if (decimal.isTSInt128ByPrecision())
-            {
-                bool dummy = false;
-                char *ep = NULL;
-                int128_t max_number_decimal = dataconvert::strtoll128(columnstore_big_precision[max_length - 19].c_str(), dummy, &ep);
-
-                int128_t value = 0, frac = 0;
-
-                if (firstInt)   // Checking to see if we have a decimal point, but no previous digits.
-                {
-                    value = dataconvert::strtoll128(firstInt, dummy, &endptr);
-                }
-
-                int128_t scaleDivisor;
-                datatypes::getScaleDivisor(scaleDivisor, decimals);
-
-                if (!dummy && endptr)
-                {
-                    // Scale the integer portion according to the DECIMAL description
-                    value *= scaleDivisor;
-
-                    // Get the fractional part.
-                    if (endptr && (*endptr == *convData->decimal_point || *endptr == '.'))
-                    {
-                        s = endptr + 1;
-
-                        // Get the digits to the right of the decimal
-                        // Only retrieve those that matter based on scale.
-                        for (fracChars = 0;
-                                *s && isdigit(*s) && fracChars < decimals;
-                                ++fracChars, ++s)
-                        {
-                            // Save the frac characters to a side buffer. This way we can limit
-                            // ourselves to the scale without modifying the original string.
-                            fracBuf[fracChars] = *s;
-                        }
-
-                        fracBuf[fracChars] = 0;
-
-                        // Check to see if we need to round
-                        if (isdigit(*s) && *s > '4')
-                        {
-                            bRound = true;
-                        }
-                    }
-
-                    frac = dataconvert::strtoll128(fracBuf, dummy, &ep);
-                    value += frac + (bRound ? 1 : 0);
-                    value *= negate;
-                }
-
-                decimal.s128Value = value;
-
-                if (decimal.s128Value > max_number_decimal)
-                    decimal.s128Value = max_number_decimal;
-                else if (decimal.s128Value < -max_number_decimal)
-                    decimal.s128Value = -max_number_decimal;
-            }
-            else
-            {
-                int64_t max_number_decimal = helpers::maxNumber_c[max_length];
-
-                int64_t value = 0, frac = 0;
-
-                errno = 0;
-
-                if (firstInt)   // Checking to see if we have a decimal point, but no previous digits.
-                {
-                    value = strtoll(firstInt, &endptr, 10);
-                }
-
-                if (!errno && endptr)
-                {
-                    // Scale the integer portion according to the DECIMAL description
-                    value *= helpers::powerOf10_c[decimals];
-
-                    // Get the fractional part.
-                    if (endptr && (*endptr == *convData->decimal_point || *endptr == '.'))
-                    {
-                        s = endptr + 1;
-
-                        // Get the digits to the right of the decimal
-                        // Only retrieve those that matter based on scale.
-                        for (fracChars = 0;
-                                *s && isdigit(*s) && fracChars < decimals;
-                                ++fracChars, ++s)
-                        {
-                            // Save the frac characters to a side buffer. This way we can limit
-                            // ourselves to the scale without modifying the original string.
-                            fracBuf[fracChars] = *s;
-                        }
-
-                        fracBuf[fracChars] = 0;
-
-                        // Check to see if we need to round
-                        if (isdigit(*s) && *s > '4')
-                        {
-                            bRound = true;
-                        }
-                    }
-
-                    frac = strtoll(fracBuf, &endptr, 10);
-                    value += frac + (bRound ? 1 : 0);
-                    value *= negate;
-                }
-
-                decimal.value = value;
-
-                if (decimal.value > max_number_decimal)
-                    decimal.value = max_number_decimal;
-                else if (decimal.value < -max_number_decimal)
-                    decimal.value = -max_number_decimal;
-            }
+            datatypes::DataCondition convError;
+            return IDB_Decimal(strValue.data(), strValue.length(), convError, decimals, max_length);
        }
+
        break;

        case execplan::CalpontSystemCatalog::DATE: