MCOL-4531 New string-to-decimal conversion implementation

This change fixes: MCOL-4462 CAST(varchar_expr AS DECIMAL(M,N)) returns a wrong result MCOL-4500 Bit functions processing throws internally trying to cast char into decimal representation MCOL-4532 CAST(AS DECIMAL) returns a garbage for large values Also, this change makes string-to-decimal conversion 5-10 times faster, depending on exact data. Performance implemenent is achieved by the fact that (unlike in the old implementation), the new version does not do any "string" object copying.
2025-07-30 19:23:07 +03:00 · 2021-01-28 21:44:41 +04:00
parent de581897c9
commit 69da915160
11 changed files with 1150 additions and 239 deletions
--- a/datatypes/mcs_data_condition.h
+++ b/datatypes/mcs_data_condition.h
@ -0,0 +1,83 @@
+/* Copyright (C) 2021 MariaDB Corporation.
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License
+   as published by the Free Software Foundation; version 2 of
+   the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA. */
+
+
+#ifndef MCS_DATA_CONDITION_H
+#define MCS_DATA_CONDITION_H
+
+namespace datatypes
+{
+
+/*
+  A subset of SQL Conditions related to data processing.
+  SQLSTATE terminology is used for categories:
+  - S stands for "success"
+  - W stands for "warning"
+  - X stands for "exceptions"
+*/
+class DataCondition
+{
+public:
+  enum Code
+  {
+    // Code                                Value       SQLSTATE
+    S_SUCCESS                              = 0,        // 00000
+    W_STRING_DATA_RIGHT_TRUNCATION         = 1 << 1,   // 01004
+    X_STRING_DATA_RIGHT_TRUNCATION         = 1 << 16,  // 22001
+    X_NUMERIC_VALUE_OUT_OF_RANGE           = 1 << 17,  // 22003
+    X_INVALID_CHARACTER_VALUE_FOR_CAST     = 1 << 18,  // 22018
+  };
+  DataCondition()
+   :mError(S_SUCCESS)
+  { }
+  DataCondition(Code code)
+   :mError(code)
+  { }
+  DataCondition & operator|=(Code code)
+  {
+    mError= (Code) (mError | code);
+    return *this;
+  }
+  DataCondition operator&(Code rhs) const
+  {
+    return DataCondition((Code) (mError & rhs));
+  }
+  operator Code () const { return mError; }
+
+  // Adjust a sigened integer of any size to the range [-absMaxVal , +absMaxVal]
+  template<typename T>
+  void adjustSIntXRange(T & val, T absMaxVal)
+  {
+    if (val > absMaxVal)
+    {
+      val = absMaxVal;
+      *this |= DataCondition::X_NUMERIC_VALUE_OUT_OF_RANGE;
+    }
+    else if (val < -absMaxVal)
+    {
+      val = -absMaxVal;
+      *this |= DataCondition::X_NUMERIC_VALUE_OUT_OF_RANGE;
+    }
+  }
+
+private:
+  Code mError;
+};
+
+} // namespace datatypes
+
+#endif // MCS_DATA_CONDITION_H
--- a/datatypes/mcs_datatype.h
+++ b/datatypes/mcs_datatype.h
@ -18,10 +18,11 @@
 #ifndef MCS_DATATYPE_H_INCLUDED
 #define MCS_DATATYPE_H_INCLUDED

-#include <limits>
 #include <sstream>
 #include <boost/any.hpp>
 #include "exceptclasses.h"
+#include "mcs_numeric_limits.h"
+#include "mcs_data_condition.h"
 #include "mcs_decimal.h"


@ -105,6 +106,24 @@ namespace execplan



+namespace datatypes
+{
+
+template <typename T>
+struct make_unsigned
+{
+  typedef struct { } type;
+};
+
+template<> struct make_unsigned<int8_t>   { typedef uint8_t type;  };
+template<> struct make_unsigned<int16_t>  { typedef uint16_t type;  };
+template<> struct make_unsigned<int32_t>  { typedef uint32_t type;  };
+template<> struct make_unsigned<int64_t>  { typedef uint64_t type;  };
+template<> struct make_unsigned<int128_t> { typedef uint128_t type; };
+
+} // namespace datatypes
+
+
 namespace datatypes
 {

--- a/datatypes/mcs_decimal.cpp
+++ b/datatypes/mcs_decimal.cpp
@ -19,6 +19,7 @@

 #include "utils/common/branchpred.h"
 #include "mcs_decimal.h"
+#include "numericliteral.h"

 namespace datatypes
 {
@ -170,6 +171,33 @@ namespace datatypes
        }
    }

+    Decimal::Decimal(const char *str, size_t length, DataCondition & convError,
+                     int8_t s, uint8_t p)
+       :TSInt128(),
+        value(0),
+        scale(s),
+        precision(p)
+    {
+        literal::Converter<literal::SignedNumericLiteral> conv(str, length, convError);
+        // We don't check "convErr" here. Let the caller do it.
+        // Let's just convert what has been parsed.
+
+        // Remove redundant leading integral and trailing fractional digits
+        conv.normalize();
+        if (isTSInt128ByPrecision())
+        {
+            s128Value = conv.toPackedSDecimal<int128_t>((literal::scale_t) scale, convError);
+            int128_t max_number_decimal = mcs_pow_10_128[precision - 19]  - 1;
+            convError.adjustSIntXRange(s128Value, max_number_decimal);
+        }
+        else
+        {
+            value = conv.toPackedSDecimal<int64_t>((literal::scale_t) scale, convError);
+            int64_t max_number_decimal = (int64_t) mcs_pow_10[precision] - 1;
+            convError.adjustSIntXRange(value, max_number_decimal);
+        }
+    }
+
    int Decimal::compare(const Decimal& l, const Decimal& r)
    {
        int128_t divisorL, divisorR;
--- a/datatypes/mcs_decimal.h
+++ b/datatypes/mcs_decimal.h
@ -29,6 +29,7 @@
 #include "mcs_float128.h"
 #include "checks.h"
 #include "branchpred.h"
+#include "mcs_data_condition.h"


 namespace datatypes
@ -302,6 +303,9 @@ class Decimal: public TSInt128
        { }


+        Decimal(const char *str, size_t length, DataCondition & error,
+                int8_t s, uint8_t p);
+
        int decimalComp(const Decimal& d) const
        {
            lldiv_t d1 = lldiv(value, static_cast<int64_t>(mcs_pow_10[scale]));
--- a/datatypes/mcs_float128.h
+++ b/datatypes/mcs_float128.h
@ -22,6 +22,7 @@
 #include <cfloat>
 #include <cstdint>
 #include <cstring>
+#include "mcs_numeric_limits.h"

 #ifdef __aarch64__
 using float128_t = long double;
@ -98,8 +99,6 @@ using int128_t = __int128;

 static const float128_t mcs_fl_one = 1.0, mcs_fl_Zero[] = {0.0, -0.0,};

-template<typename T>
-class numeric_limits { };
 // Copy from boost::multiprecision::float128
 template<> class numeric_limits<float128_t> {
  public:
--- a/datatypes/mcs_numeric_limits.h
+++ b/datatypes/mcs_numeric_limits.h
@ -0,0 +1,62 @@
+/*
+   Copyright (C) 2021 MariaDB Corporation
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License
+   as published by the Free Software Foundation; version 2 of
+   the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA. */
+#ifndef MCS_NUMERIC_LIMITS_H_INCLUDED
+#define MCS_NUMERIC_LIMITS_H_INCLUDED
+
+#include <limits>
+
+namespace datatypes
+{
+
+template <typename T>
+struct numeric_limits
+{
+  static constexpr T min() { return std::numeric_limits<T>::min(); }
+  static constexpr T max() { return std::numeric_limits<T>::max(); }
+};
+
+using int128_t = __int128;
+using uint128_t = unsigned __int128;
+
+template<> struct numeric_limits<int128_t>
+{
+  static constexpr int128_t min()
+  {
+    return int128_t(0x8000000000000000LL) << 64;
+  }
+  static constexpr int128_t max()
+  {
+    return (int128_t(0x7FFFFFFFFFFFFFFFLL) << 64) + 0xFFFFFFFFFFFFFFFFLL;
+  }
+};
+
+template<> struct numeric_limits<uint128_t>
+{
+  static constexpr uint128_t min()
+  {
+    return uint128_t(0);
+  }
+  static constexpr uint128_t max()
+  {
+    return (uint128_t(0xFFFFFFFFFFFFFFFFULL) << 64) + 0xFFFFFFFFFFFFFFFFULL;
+  }
+};
+
+} // namespace datatypes
+
+#endif // MCS_NUMERIC_LIMITS_H_INCLUDED
--- a/datatypes/numericliteral.h
+++ b/datatypes/numericliteral.h
@ -0,0 +1,620 @@
+/* Copyright (C) 2021 MariaDB Corporation.
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License
+   as published by the Free Software Foundation; version 2 of
+   the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA. */
+
+
+#ifndef NUMERICLITERAL_H
+#define NUMERICLITERAL_H
+
+#include "genericparser.h"
+#include "mcs_datatype.h"
+
+
+namespace literal
+{
+
+using utils::ConstString;
+using genericparser::Parser;
+using datatypes::DataCondition;
+
+typedef uint32_t scale_t;
+
+
+
+template<class A>
+class Converter: public Parser,
+                 public A
+{
+public:
+  Converter(const char *str, size_t length, DataCondition & error)
+   :Parser(str, length),
+    A(&Parser::skipLeadingSpaces())
+  {
+    if (Parser::syntaxError())
+    {
+      /*
+        Non-recoverable syntax error happened. The parser parsed the first part
+        of a combined rule (and therefore shifted the tokenizer position)
+        but then failed to parse the rule till the end.
+
+        For example in the <signed numeric literal>:
+        ''    -  empty string
+        '+'   -  sign was not followed by a digit or period,        expect '+1'
+        '.'   -  period was not followed by a digit,                expect '.1'
+        '1e'  -  exponent marker was not followed by <exponent>,    expect '1e1'
+        '1e+' -  in <exponent>, <sign> was not followed by a digit, expect '1e+1'
+      */
+      error|=(DataCondition::X_INVALID_CHARACTER_VALUE_FOR_CAST);
+    }
+  }
+  Converter(const std::string & str, DataCondition &error)
+   :Converter(str.data(), str.length(), error)
+  { }
+};
+
+
+/*
+
+SQL Standard definition for <cast specification>
+related to character string to exact number conversion
+======================================================
+Abbreviations:
+- TD - the target data type
+- SD - the datatype of the source value
+- SV - the source value
+
+8) If TD is exact numeric, then
+a) If SD is exact numeric or approximate numeric, then
+Case:
+
+  i) If there is a representation of SV in the data type TD that does not lose
+     any leading significant digits after rounding or truncating if necessary,
+     then TV is that representation. The choice of whether to round or truncate
+     is implementation-defined. (NoteAI)
+  ii) Otherwise, an exception condition is raised:
+       data exception -- numeric value out of range. (NoteAII)
+
+b) If SD is character string, then SV is replaced by SV with any leading
+   or trailing <space>s removed. (NoteB)
+Case:
+
+  i) If SV does not comprise a <signed numeric literal> as defined by the rules
+     for <literal> in Subclause "<literal>", then an exception condition is raised:
+       data exception - invalid character value for cast. (NoteBI)
+  ii) Otherwise, let LT be that <signed numeric literal>.
+      The <cast specification> is equivalent to CAST ( LT AS TD )
+
+
+Implementation details
+======================
+NoteAI
+----
+The implementation defined choice whether to round or truncate is
+"round away from zero".
+
+NoteAII
+-----
+When the "numeric value out of range" state is found, it is signalled
+to the caller, and the returned value is adjusted according to the TD range.
+The caller later decides whether to raise an error or to use the adjusted value.
+
+NoteB
+-----
+The implementation removes only leading spaces. The caller can
+check if any trailing spaces are left by the parser.
+
+NoteBI
+------
+The implementation stops on the first character that does not
+conform to the <signed numeric literal> syntax. The caller can
+check if any trailing garbage characters are left by the parser.
+
+
+Grammar
+=======
+
+<signed numeric literal>    ::=   [ <sign> ] <unsigned numeric literal>
+
+<unsigned numeric literal>  ::=   <exact numeric literal> [ E <exponent> ]
+
+<exact numeric literal>    ::=
+                   <unsigned integer> [ <period> [ <unsigned integer> ] ]
+               |   <period> <unsigned integer>
+
+<sign>    ::=   <plus sign> | <minus sign>
+
+<exponent>    ::=   <signed integer>
+
+<signed integer>    ::=   [ <sign> ] <unsigned integer>
+
+<unsigned integer>    ::=   <digit> ...
+
+*/
+
+
+//
+//  Terminal symbols
+//
+
+class Period: public ConstString
+{
+public:
+  explicit Period(Parser *p)
+   :ConstString(p->tokenChar('.'))
+  { }
+  bool isNull() const { return mStr == nullptr; }
+};
+
+
+class ExponentMarker: public ConstString
+{
+public:
+  explicit ExponentMarker(Parser *p)
+   :ConstString(p->tokenAnyCharOf('e', 'E'))
+  { }
+  bool isNull() const { return mStr == nullptr; }
+};
+
+
+class Sign: public ConstString
+{
+public:
+  explicit Sign(): ConstString(NULL, 0) { }
+  explicit Sign(const ConstString &str)
+   :ConstString(str)
+  { }
+  explicit Sign(Parser *p)
+   :ConstString(p->tokenAnyCharOf('+', '-'))
+  { }
+  static Sign empty(Parser *p)
+  {
+    return Sign(p->tokStartConstString());
+  }
+  bool isNull() const { return mStr == nullptr; }
+  bool negative() const { return eq('-'); }
+};
+
+
+class Digits: public ConstString
+{
+public:
+  explicit Digits()
+   :ConstString(NULL, 0)
+  { }
+  explicit Digits(const char *str, size_t length)
+   :ConstString(str, length)
+  { }
+  explicit Digits(const ConstString &str)
+   :ConstString(str)
+  { }
+  explicit Digits(Parser *p)
+   :ConstString(p->tokenDigits())
+  { }
+  bool isNull() const { return mStr == nullptr; }
+
+  void skipLeadingZeroDigits()
+  {
+    for ( ; mLength > 0 && mStr[0] == '0'; )
+    {
+      mStr++;
+      mLength--;
+    }
+  }
+  void skipTrailingZeroDigits()
+  {
+    for ( ; mLength > 0 && mStr[mLength - 1] == '0' ; )
+      mLength--;
+  }
+};
+
+
+//
+// Non-terminal symbols
+//
+
+// <unsigned integer>    ::=   <digit> ...
+class UnsignedInteger: public Digits
+{
+public:
+  explicit UnsignedInteger()
+   :Digits()
+  { }
+  explicit UnsignedInteger(const char *str, size_t length)
+   :Digits(str, length)
+  { }
+  explicit UnsignedInteger(const ConstString &str)
+   :Digits(str)
+  { }
+  explicit UnsignedInteger(Parser *p)
+   :Digits(p)
+  { }
+  static UnsignedInteger empty(const Parser *p)
+  {
+    return UnsignedInteger(p->tokStartConstString());
+  }
+  UnsignedInteger left(size_t len) const
+  {
+    return UnsignedInteger(str(), length() > len ? len : length());
+  }
+
+  template<typename T>
+  T toXIntPositiveContinue(T start, DataCondition & error) const
+  {
+    const char *e = end();
+    T val = start;
+    for (const char *s= mStr; s < e; s++)
+    {
+      constexpr T cutoff = datatypes::numeric_limits<T>::max() / 10;
+      if (val > cutoff)
+      {
+        error|= DataCondition::X_NUMERIC_VALUE_OUT_OF_RANGE;
+        return datatypes::numeric_limits<T>::max();
+      }
+      val*= 10;
+      T newval = val + (s[0] - '0');
+      if (newval < val)
+      {
+        error|= DataCondition::X_NUMERIC_VALUE_OUT_OF_RANGE;
+        return datatypes::numeric_limits<T>::max();
+      }
+      val = newval;
+    }
+    return val;
+  }
+  template<typename T>
+  T toXIntPositive(DataCondition & error) const
+  {
+    return toXIntPositiveContinue<T>(0, error);
+  }
+
+  template<typename T>
+  T toSIntNegativeContinue(T start, DataCondition & error) const
+  {
+    const char *e = end();
+    T val = start;
+    for (const char *s= mStr; s < e; s++)
+    {
+      constexpr T cutoff = datatypes::numeric_limits<T>::min() / 10;
+      if (val < cutoff)
+      {
+        error|= DataCondition::X_NUMERIC_VALUE_OUT_OF_RANGE;
+        return datatypes::numeric_limits<T>::min();
+      }
+      val*= 10;
+      T newval = val - (s[0] - '0');
+      if (newval > val)
+      {
+        error|= DataCondition::X_NUMERIC_VALUE_OUT_OF_RANGE;
+        return datatypes::numeric_limits<T>::min();
+      }
+      val = newval;
+    }
+    return val;
+  }
+  template<typename T>
+  T toSIntNegative(DataCondition & error) const
+  {
+    return toSIntNegativeContinue<T>(0, error);
+  }
+
+  template<typename T>
+  T toXIntPositiveRoundAwayFromZeroContinue(T start, bool round, DataCondition & error) const
+  {
+    T val = toXIntPositiveContinue<T>(start, error);
+    if (val == datatypes::numeric_limits<T>::max() && round)
+    {
+      error|= DataCondition::X_NUMERIC_VALUE_OUT_OF_RANGE;
+      return val;
+    }
+    return val + round;
+  }
+  template<typename T>
+  T toXIntPositiveRoundAwayFromZero(bool round, DataCondition & error) const
+  {
+    return toXIntPositiveRoundAwayFromZeroContinue<T>(0, round, error);
+  }
+};
+
+
+// <signed integer> := [<sign>] <unsigned integer>
+class SignedInteger: public Parser::DD2OM<Sign,UnsignedInteger>
+{
+public:
+  using DD2OM::DD2OM;
+  bool isNull() const { return UnsignedInteger::isNull(); }
+
+  template<typename T> T abs(DataCondition & error) const
+  {
+    return toXIntPositive<T>(error);
+  }
+
+  template<typename T> T toSInt(DataCondition & error) const
+  {
+    return negative() ?
+           toSIntNegative<T>(error) :
+           toXIntPositive<T>(error);
+  }
+};
+
+
+// E <signed integer>
+class EExponent: public Parser::UD2MM<ExponentMarker, SignedInteger>
+{
+public:
+  using UD2MM::UD2MM;
+};
+
+
+// <period> <unsigned integer>
+class ExactUnsignedNumericLiteralFractionAlone: public Parser::UD2MM<Period, UnsignedInteger>
+{
+public:
+  using UD2MM::UD2MM;
+};
+
+
+// <period> [ <unsigned integer> ]
+class PeriodOptUnsignedInteger: public Parser::UD2MO<Period, UnsignedInteger>
+{
+public:
+  using UD2MO::UD2MO;
+  static PeriodOptUnsignedInteger empty(Parser *p)
+  {
+    return PeriodOptUnsignedInteger(UnsignedInteger(p->tokStartConstString()));
+  }
+  const PeriodOptUnsignedInteger & fraction() const
+  {
+    return *this;
+  }
+};
+
+
+// <integral unsigned integer> := <unsigned integer>
+class IntegralUnsignedInteger: public UnsignedInteger
+{
+public:
+  explicit IntegralUnsignedInteger(Parser *p)
+   :UnsignedInteger(p)
+  { }
+  const UnsignedInteger & integral() const
+  {
+    return *this;
+  }
+};
+
+
+// <integral unsigned integer> [ <period> [ <unsigned integer> ] ]
+
+class ExactUnsignedNumericLiteralIntegralOptFraction:
+                             public Parser::DD2MO<IntegralUnsignedInteger,
+                                                  PeriodOptUnsignedInteger>
+{
+public:
+  using DD2MO::DD2MO;
+};
+
+
+// A container for integral and fractional parts
+class UnsignedIntegerDecimal
+{
+protected:
+  UnsignedInteger mIntegral;
+  UnsignedInteger mFraction;
+public:
+  explicit UnsignedIntegerDecimal(const UnsignedInteger &intg,
+                                  const UnsignedInteger &frac)
+   :mIntegral(intg),
+    mFraction(frac)
+  { }
+  explicit UnsignedIntegerDecimal(const ExactUnsignedNumericLiteralFractionAlone &rhs)
+   :mFraction(rhs)
+  { }
+  explicit UnsignedIntegerDecimal(const ExactUnsignedNumericLiteralIntegralOptFraction &rhs)
+   :mIntegral(rhs.integral()),
+    mFraction(rhs.fraction())
+  { }
+
+  size_t IntFracDigits() const
+  {
+    return mIntegral.length() + mFraction.length();
+  }
+
+  bool isNull() const
+  {
+    return mIntegral.isNull() && mFraction.isNull();
+  }
+
+  void normalize()
+  {
+    mIntegral.skipLeadingZeroDigits();
+    mFraction.skipTrailingZeroDigits();
+  }
+
+  template<typename T> T toXIntPositive(DataCondition & error) const
+  {
+    T val = mIntegral.toXIntPositive<T>(error);
+    return mFraction.toXIntPositiveContinue<T>(val, error);
+  }
+
+  template<typename T> T toXIntPositiveRoundAwayFromZero(bool roundUp, DataCondition & error) const
+  {
+    T val = mIntegral.toXIntPositive<T>(error);
+    return mFraction.toXIntPositiveRoundAwayFromZeroContinue<T>(val, roundUp, error);
+  }
+
+  template<typename T> T toXIntPositiveScaleUp(size_t scale, DataCondition & error) const
+  {
+    T val = toXIntPositive<T>(error);
+    if (val == datatypes::numeric_limits<T>::max())
+      return val;
+    for ( ; scale ; scale--)
+    {
+      constexpr T cutoff = datatypes::numeric_limits<T>::max() / 10;
+      if (val > cutoff)
+      {
+        error|= DataCondition::X_NUMERIC_VALUE_OUT_OF_RANGE;
+        return datatypes::numeric_limits<T>::max();
+      }
+      val*= 10;
+    }
+    return val;
+  }
+
+  template<typename T> T toXIntPositiveRound(DataCondition & error) const
+  {
+    bool roundUp = mFraction.length() && mFraction.str()[0] >= '5';
+    return mIntegral.toXIntPositiveRoundAwayFromZero<T>(roundUp, error);
+  }
+
+  template<typename T> T toXIntPositiveRoundExp(uint64_t absExp, bool negExp,
+                                                DataCondition & error) const
+  {
+    if (absExp == 0)
+      return toXIntPositiveRound<T>(error);
+
+    if (negExp)
+    {
+      if (mIntegral.length() == absExp) // 567.8e-3 -> 0.5678 -> 1
+        return mIntegral.str()[0] >= '5' ? 1 : 0;
+      if (mIntegral.length() < absExp)  // 123e-4 -> 0.0123
+        return 0;
+      // mIntegral.length() > absExp: 5678.8e-3 -> 5.6788 -> 6
+      size_t diff = mIntegral.length() - absExp;
+      const UnsignedInteger tmp(mIntegral.str(), diff);
+      bool roundUp = mIntegral.str()[diff] >= '5';
+      return tmp.toXIntPositiveRoundAwayFromZero<T>(roundUp, error);
+    }
+
+    // Positive exponent: 123.456e2
+    if (mFraction.length() >= absExp) // 123.456e2 -> 12345.6 -> 12346
+    {
+      bool roundUp = mFraction.length() > absExp && mFraction.str()[absExp] >= '5';
+      UnsignedIntegerDecimal tmp(mIntegral, mFraction.left(absExp));
+      return tmp.toXIntPositiveRoundAwayFromZero<T>(roundUp, error);
+    }
+
+    // Pad int+frac with right zeros 123.4e3 -> 123400
+    size_t diff = absExp - mFraction.length();
+    return toXIntPositiveScaleUp<T>(diff, error);
+  }
+
+};
+
+
+// <exact unsigned numeric literal> :=
+//           <period> [ <unsigned integer> ]
+//         | <unsigned integer> [ <period> [ <unsigned integer> ] ]
+
+class ExactUnsignedNumericLiteral:
+          public Parser::Choice2<UnsignedIntegerDecimal,
+                                 ExactUnsignedNumericLiteralFractionAlone,
+                                 ExactUnsignedNumericLiteralIntegralOptFraction>
+{
+public:
+  using Choice2::Choice2;
+};
+
+
+// <unsigned numeric literal>  ::=  <exact numeric literal> [ E <exponent> ]
+
+class UnsignedNumericLiteral: public Parser::DM2MO<ExactUnsignedNumericLiteral,EExponent>
+{
+public:
+  using DM2MO::DM2MO;
+  void normalize()
+  {
+    ExactUnsignedNumericLiteral::normalize();
+    mB.skipLeadingZeroDigits();
+  }
+  const SignedInteger & exponent() const
+  {
+    return mB;
+  }
+
+  template<typename T>
+  T toXIntPositiveRound(DataCondition & error) const
+  {
+    size_t availableDigits = IntFracDigits();
+    if (!availableDigits)
+      return 0;
+    T absexp = exponent().abs<T>(error);
+    return ExactUnsignedNumericLiteral::toXIntPositiveRoundExp<T>(absexp, exponent().negative(), error);
+  }
+
+  template<typename T>
+  T toPackedDecimalPositive(scale_t scale, DataCondition & error) const
+  {
+    size_t availableDigits = IntFracDigits();
+    if (!availableDigits)
+      return 0;
+    int64_t exp = exponent().toSInt<int64_t>(error);
+    if (exp <= datatypes::numeric_limits<int64_t>::max() - scale)
+      exp+= scale;
+    if (exp < 0)
+    {
+      if (exp == datatypes::numeric_limits<int64_t>::min())
+        exp++; // Avoid undefined behaviour in the unary minus below:
+      return ExactUnsignedNumericLiteral::toXIntPositiveRoundExp<T>((uint64_t) -exp, true, error);
+    }
+    return ExactUnsignedNumericLiteral::toXIntPositiveRoundExp<T>((uint64_t) exp, false, error);
+  }
+
+};
+
+
+// <signed numeric literal>    ::=   [ <sign> ] <unsigned numeric literal>
+class SignedNumericLiteral: public Parser::DD2OM<Sign,UnsignedNumericLiteral>
+{
+public:
+  using DD2OM::DD2OM;
+  bool isNull() const { return UnsignedNumericLiteral::isNull(); }
+
+  template<typename T>
+  T toUIntXRound() const
+  {
+    if (negative())
+      return 0;
+    return UnsignedNumericLiteral::toXIntPositiveRound<T>();
+  }
+
+  template<typename T>
+  T toPackedUDecimal(scale_t scale, DataCondition & error) const
+  {
+    if (negative())
+      return 0;
+    return UnsignedNumericLiteral::toPackedDecimalPositive<T>(scale, error);
+  }
+
+  template<typename T>
+  T toPackedSDecimal(scale_t scale, DataCondition & error) const
+  {
+    if (!negative())
+      return UnsignedNumericLiteral::toPackedDecimalPositive<T>(scale, error);
+    typedef typename datatypes::make_unsigned<T>::type UT;
+    UT absval = UnsignedNumericLiteral::toPackedDecimalPositive<UT>(scale, error);
+    if (absval >= (UT) datatypes::numeric_limits<T>::min())
+    {
+      error|= DataCondition::X_NUMERIC_VALUE_OUT_OF_RANGE;
+      return datatypes::numeric_limits<T>::min();
+    }
+    return - (T) absval;
+  }
+};
+
+
+} // namespace literal
+
+#endif // NUMERICLITERAL_H
--- a/utils/common/conststring.h
+++ b/utils/common/conststring.h
@ -25,6 +25,7 @@ namespace utils

 class ConstString
 {
+protected:
  const char *mStr;
  size_t mLength;
 public:
@ -35,7 +36,12 @@ public:
      :mStr(str.data()), mLength(str.length())
  { }
  const char *str() const { return mStr; }
+  const char *end() const { return mStr + mLength; }
  size_t length() const { return mLength; }
+  bool eq(char ch) const
+  {
+    return mLength == 1 && mStr[0] == ch;
+  }
  ConstString & rtrimZero()
  {
    for ( ; mLength && mStr[mLength - 1] == '\0'; mLength--)
--- a/utils/common/genericparser.h
+++ b/utils/common/genericparser.h
@ -0,0 +1,313 @@
+/* Copyright (C) 2021 MariaDB Corporation.
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License
+   as published by the Free Software Foundation; version 2 of
+   the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA. */
+
+
+#ifndef GENERICPARSER_H
+#define GENERICPARSER_H
+
+#include "conststring.h"
+
+
+namespace genericparser
+{
+
+using utils::ConstString;
+
+
+class Tokenizer
+{
+protected:
+  const char *mStr;
+  const char *mEnd;
+public:
+  explicit Tokenizer(const char *str, size_t length)
+   :mStr(str), mEnd(str + length)
+  { }
+  size_t length() const
+  {
+    return mEnd - mStr;
+  }
+  const char *ptr() const
+  {
+    return mStr;
+  }
+  bool isSpace() const
+  {
+    return mStr < mEnd && mStr[0] == ' ';
+  }
+  bool isDigit() const
+  {
+    return mStr < mEnd && mStr[0] >= '0' && mStr[0] <= '9';
+  }
+  bool isChar(char chr) const
+  {
+    return mStr < mEnd && mStr[0] == chr;
+  }
+  bool isAnyCharOf(char chr0, char chr1)
+  {
+    return mStr < mEnd && (mStr[0] == chr0 || mStr[0] == chr1);
+  }
+
+  ConstString tokenSpaces()
+  {
+    if (!isSpace())
+      return ConstString(nullptr, 0);
+    const char *start = mStr;
+    for ( ; isSpace() ; mStr++)
+    { }
+    return ConstString(start, mStr - start);
+  }
+  ConstString tokenDigits()
+  {
+    if (!isDigit())
+      return ConstString(nullptr, 0);
+    const char *start = mStr;
+    for ( ; isDigit() ; mStr++)
+    { }
+    return ConstString(start, mStr - start);
+  }
+  ConstString tokenChar(char chr)
+  {
+    if (!isChar(chr))
+      return ConstString(nullptr, 0);
+    return ConstString(mStr++, 1);
+  }
+  ConstString tokenAnyCharOf(char chr0, char chr1)
+  {
+    if (!isAnyCharOf(chr0, chr1))
+      return ConstString(nullptr, 0);
+    return ConstString(mStr++, 1);
+  }
+};
+
+
+class Parser: public Tokenizer
+{
+protected:
+  bool mSyntaxError;
+public:
+  explicit Parser(const char *str, size_t length)
+   :Tokenizer(str, length), mSyntaxError(false)
+  { }
+  explicit Parser(const std::string & str)
+   :Parser(str.data(), str.length())
+  { }
+  Parser & skipLeadingSpaces()
+  {
+    tokenSpaces();
+    return *this;
+  }
+  bool syntaxError() const
+  {
+    return mSyntaxError;
+  }
+  bool setSyntaxError()
+  {
+    mSyntaxError = true;
+    return false;
+  }
+  const char *tokStart() const
+  {
+    return mStr;
+  }
+  const ConstString tokStartConstString() const
+  {
+    return ConstString(mStr, 0);
+  }
+
+
+  // A helper class template to set the parser syntax error
+  // if A returned isNull() after parsing.
+
+  template<class A>
+  class SetSyntaxErrorOnNull :public A
+  {
+  public:
+    SetSyntaxErrorOnNull(Parser *p)
+      :A(p)
+    {
+      if (A::isNull())
+        p->setSyntaxError();
+    }
+  };
+
+  // A helper class template for a rule in the form:        <res> := [ <a> ]
+
+  template<class A>
+  class Opt: public A
+  {
+  public:
+    explicit Opt(const A &rhs)
+     :A(rhs)
+    { }
+    explicit Opt(Parser *p)
+     :A(p)
+    {
+      if (A::isNull() && !p->syntaxError())
+        A::operator=(A::empty(p));
+    }
+  };
+
+  // Letters in the class template names below mean:
+  // U - unused - the result class does not have the source class inside
+  // D - derive - the result class derives from the source class
+  // M - member - the result class adds the source class as a member
+
+  // M - mandatory - this part is required during parse time
+  // O - optional  - this part is optional during parse time
+
+
+  // A helper class template for a rule in the form:        <res> := <a> <b>
+  // i.e. both parts are mandatory at parse time
+  // The value of <a> is not important, and is created
+  // only temporary on the stack.
+  // Only the value of <b> is important.
+  // Example:
+  //    <period> <unsigned integer>
+
+  template<class A, class B>
+  class UD2MM: public B
+  {
+  public:
+    explicit UD2MM(Parser *p)
+     :B(A(p).isNull() ? B() :SetSyntaxErrorOnNull<B>(p))
+    { }
+    explicit UD2MM(const B & b)
+     :B(b)
+    { }
+    explicit UD2MM()
+     :B()
+    { }
+    bool isNull() const { return B::isNull(); }
+  };
+
+  // A helper class template for a rule in the form:       <res> := <a> <b>
+  // i.e. both parts are mandatory at parse time.
+  template<class A, class B>
+  class DD2MM: public A,
+               public B
+  {
+  public:
+    // Sets syntax error if <a> was not followed by <b>
+    explicit DD2MM(Parser *p)
+     :A(p),
+      B(A::isNull() ? B() : SetSyntaxErrorOnNull<B>(p))
+    { }
+    explicit DD2MM(const A & a, const B &b)
+     :A(b), B(b)
+    { }
+  };
+
+  // A helper class template for a rule in the form:       <res> := <a> [ <b> ]
+  // i.e. <a> is mandatory, <b> is optional at parse time.
+  template<class A, class B>
+  class DD2MO: public A,
+               public B
+  {
+  public:
+    explicit DD2MO(Parser *p)
+     :A(p),
+      B(A::isNull() ? B() : B(p))
+    { }
+    explicit DD2MO(const A &a, const B &b)
+     :A(a), B(b)
+    { }
+  };
+
+  // A helper class template for a rule in the form:       <res> := <a> [ <b> ]
+  // i.e. <a> is mandatory, <b> is optional at parse time.
+  // The value of <a> is not important and is not included
+  // into the target class, e.g.
+  // <period> [ <unsigned integer> ]
+  template<class A, class B>
+  class UD2MO: public B
+  {
+  public:
+    explicit UD2MO(Parser *p)
+     :B(A(p).isNull() ? B() : B(p))
+    { }
+    explicit UD2MO(const B &b)
+     :B(b)
+    { }
+    explicit UD2MO()
+     :B()
+    { }
+  };
+
+
+  // A helper class template for a rule in the form:       <res> := <a> [ <b> ]
+  // i.e. <a> is mandatory, <b> is optional at parse time.
+  // The result class derives from "A".
+  // The result class puts "B" as a member.
+  template<class A, class B>
+  class DM2MO: public A
+  {
+  protected:
+    B mB;
+  public:
+    explicit DM2MO(Parser *p)
+     :A(p),
+      mB(A::isNull() ? B() : B(p))
+    { }
+  };
+
+
+  // A helper class template for a rule in the form:       <res> := [ <a> ] <b>
+  // i.e. <a> is optional, <b> is mandatory at parse time.
+  template<class A,class B>
+  class DD2OM: public Opt<A>,
+               public B
+  {
+  public:
+    explicit DD2OM(Parser *p)
+     :Opt<A>(p), B(p)
+    {
+      if (B::isNull())
+        p->setSyntaxError();
+    }
+    explicit DD2OM()
+     :Opt<A>(A())
+    { }
+    explicit DD2OM(const A & a, const B & b)
+     :Opt<A>(a), B(b)
+    { }
+  };
+
+  // A helper class template for a rule in the form:       <res> := a | b
+  template<class Container, class A, class B>
+  class Choice2: public Container
+  {
+  public:
+    explicit Choice2(const A & a)
+     :Container(a)
+    { }
+    explicit Choice2(const B & b)
+     :Container(b)
+    { }
+    explicit Choice2(Parser *p)
+     :Container(A(p))
+    {
+      if (Container::isNull() && !p->syntaxError())
+        *this = Choice2(B(p));
+    }
+  };
+};
+
+
+} // namespace genericparser
+
+#endif // GENERICPARSER_H
--- a/utils/funcexp/func_bitwise.cpp
+++ b/utils/funcexp/func_bitwise.cpp
@ -41,6 +41,8 @@ using namespace logging;
 #include "mcs_int64.h"
 #include "mcs_decimal.h"
 #include "dataconvert.h"
+#include "numericliteral.h"
+
 using namespace dataconvert;

 namespace
@ -163,14 +165,14 @@ datatypes::TUInt64Null GenericToBitOperand(
            const string& str = parm->data()->getStrVal(row, tmpIsNull);
            if (tmpIsNull)
                return datatypes::TUInt64Null();
-            static const datatypes::SystemCatalog::TypeAttributesStd
-              attr(datatypes::MAXDECIMALWIDTH, 6, datatypes::INT128MAXPRECISION);
-            int128_t val = attr.decimal128FromString(str);
-            datatypes::Decimal d(0, attr.scale, attr.precision, &val);
-            val = d.getPosNegRoundedIntegralPart(0).getValue();
-            return ConvertToBitOperand<int128_t>(val);
-        }

+            datatypes::DataCondition cnverr;
+            literal::Converter<literal::SignedNumericLiteral> cnv(str, cnverr);
+            cnv.normalize();
+            return cnv.negative() ?
+              datatypes::TUInt64Null((uint64_t)cnv.toPackedSDecimal<int64_t>(0, cnverr)) :
+              datatypes::TUInt64Null(cnv.toPackedUDecimal<uint64_t>(0, cnverr));
+        }
        case execplan::CalpontSystemCatalog::DECIMAL:
        case execplan::CalpontSystemCatalog::UDECIMAL:
            return DecimalToBitOperand(row, parm, thisFunc);
--- a/utils/funcexp/func_cast.cpp
+++ b/utils/funcexp/func_cast.cpp
@ -40,6 +40,7 @@ using namespace rowgroup;
 using namespace logging;

 #include "dataconvert.h"
+#include "numericliteral.h"
 using namespace dataconvert;
 #include "collation.h"

@ -1512,241 +1513,15 @@ IDB_Decimal Func_cast_decimal::getDecimalVal(Row& row,
        case execplan::CalpontSystemCatalog::TEXT:
        {
            const string& strValue = parm[0]->data()->getStrVal(row, isNull);
-            const char* str = strValue.c_str();
-            const char* s;
-            const char* firstInt = NULL;
-            char*       endptr = NULL;
-            char        fracBuf[20];
-            int         fracChars;
-            int         negate = 1;
-            bool        bFoundSign = false;
-            bool        bRound = false;
-
            if (strValue.empty())
            {
                isNull = true;
                return IDB_Decimal();  // need a null value for IDB_Decimal??
            }
-
-            decimal.scale = decimals;
-            decimal.value = 0;
-
-            // Look for scientific notation. The existence of an 'e' indicates it probably is.
-            for (s = str; *s; ++s)  // This is faster than two finds.
-            {
-                if (*s == 'e' || *s == 'E')
-                {
-                    if (decimal.isTSInt128ByPrecision())
-                    {
-                        // it is worth to parse the exponent first to detect an overflow
-                        bool dummy = false;
-                        char *ep = NULL;
-                        int128_t max_number_decimal = dataconvert::strtoll128(columnstore_big_precision[max_length - 19].c_str(), dummy, &ep);
-
-                        int128_t scaleDivisor;
-                        datatypes::getScaleDivisor(scaleDivisor, decimals);
-                        float128_t floatValue = datatypes::TFloat128::fromString(strValue);
-
-                        // If the float value is too large, the saturated result may end up with
-                        // the wrong sign, so we just check first.
-                        if ((int128_t)floatValue > max_number_decimal)
-                            decimal.s128Value = max_number_decimal;
-                        else if ((int128_t)floatValue < -max_number_decimal)
-                            decimal.s128Value = -max_number_decimal;
-                        else if (floatValue > 0)
-                            decimal.s128Value = (int128_t) (floatValue * scaleDivisor + 0.5);
-                        else if (floatValue < 0)
-                            decimal.s128Value = (int128_t) (floatValue * scaleDivisor - 0.5);
-                        else
-                            decimal.s128Value = 0;
-
-                        if (decimal.s128Value > max_number_decimal)
-                            decimal.s128Value = max_number_decimal;
-                        else if (decimal.s128Value < -max_number_decimal)
-                            decimal.s128Value = -max_number_decimal;
-
-                        return decimal;
-                    }
-                    else
-                    {
-                        int64_t max_number_decimal = helpers::maxNumber_c[max_length];
-
-                        double floatValue = strtod(str, 0);
-
-                        // If the float value is too large, the saturated result may end up with
-                        // the wrong sign, so we just check first.
-                        if ((int64_t)floatValue > max_number_decimal)
-                            decimal.value = max_number_decimal;
-                        else if ((int64_t)floatValue < -max_number_decimal)
-                            decimal.value = -max_number_decimal;
-                        else if (floatValue > 0)
-                            decimal.value = (int64_t) (floatValue * helpers::powerOf10_c[decimals] + 0.5);
-                        else if (floatValue < 0)
-                            decimal.value = (int64_t) (floatValue * helpers::powerOf10_c[decimals] - 0.5);
-                        else
-                            decimal.value = 0;
-
-                        if (decimal.value > max_number_decimal)
-                            decimal.value = max_number_decimal;
-                        else if (decimal.value < -max_number_decimal)
-                            decimal.value = -max_number_decimal;
-
-                        return decimal;
-                    }
-                }
+            datatypes::DataCondition convError;
+            return IDB_Decimal(strValue.data(), strValue.length(), convError, decimals, max_length);
        }

-            // There are cases (such as "-.95" that should return that may not result in the desired rounding.
-            // By stripping the sign and adding it back in later, we can get a more accurate answer.
-            for (s = str; *s; ++s)
-            {
-                if (*s == '-')
-                {
-                    if (bFoundSign) // If we find a duplicate sign char, it's an error.
-                    {
-                        return decimal;
-                    }
-
-                    bFoundSign = true;
-                    negate = -1;
-                }
-                else if (*s == '+')
-                {
-                    if (bFoundSign)
-                    {
-                        return decimal;
-                    }
-
-                    bFoundSign = true;
-                }
-                else if (*s == *convData->decimal_point || *s == '.')
-                {
-                    // If we find a decimal point, that means there's no leading integer. (like ".99")
-                    // In this case we need to mark where we are.
-                    endptr = const_cast<char*>(s);
-                    break;
-                }
-                else if (isdigit(*s))
-                {
-                    firstInt = s;
-                    break;
-                }
-            }
-
-            if (decimal.isTSInt128ByPrecision())
-            {
-                bool dummy = false;
-                char *ep = NULL;
-                int128_t max_number_decimal = dataconvert::strtoll128(columnstore_big_precision[max_length - 19].c_str(), dummy, &ep);
-
-                int128_t value = 0, frac = 0;
-
-                if (firstInt)   // Checking to see if we have a decimal point, but no previous digits.
-                {
-                    value = dataconvert::strtoll128(firstInt, dummy, &endptr);
-                }
-
-                int128_t scaleDivisor;
-                datatypes::getScaleDivisor(scaleDivisor, decimals);
-
-                if (!dummy && endptr)
-                {
-                    // Scale the integer portion according to the DECIMAL description
-                    value *= scaleDivisor;
-
-                    // Get the fractional part.
-                    if (endptr && (*endptr == *convData->decimal_point || *endptr == '.'))
-                    {
-                        s = endptr + 1;
-
-                        // Get the digits to the right of the decimal
-                        // Only retrieve those that matter based on scale.
-                        for (fracChars = 0;
-                                *s && isdigit(*s) && fracChars < decimals;
-                                ++fracChars, ++s)
-                        {
-                            // Save the frac characters to a side buffer. This way we can limit
-                            // ourselves to the scale without modifying the original string.
-                            fracBuf[fracChars] = *s;
-                        }
-
-                        fracBuf[fracChars] = 0;
-
-                        // Check to see if we need to round
-                        if (isdigit(*s) && *s > '4')
-                        {
-                            bRound = true;
-                        }
-                    }
-
-                    frac = dataconvert::strtoll128(fracBuf, dummy, &ep);
-                    value += frac + (bRound ? 1 : 0);
-                    value *= negate;
-                }
-
-                decimal.s128Value = value;
-
-                if (decimal.s128Value > max_number_decimal)
-                    decimal.s128Value = max_number_decimal;
-                else if (decimal.s128Value < -max_number_decimal)
-                    decimal.s128Value = -max_number_decimal;
-            }
-            else
-            {
-                int64_t max_number_decimal = helpers::maxNumber_c[max_length];
-
-                int64_t value = 0, frac = 0;
-
-                errno = 0;
-
-                if (firstInt)   // Checking to see if we have a decimal point, but no previous digits.
-                {
-                    value = strtoll(firstInt, &endptr, 10);
-                }
-
-                if (!errno && endptr)
-                {
-                    // Scale the integer portion according to the DECIMAL description
-                    value *= helpers::powerOf10_c[decimals];
-
-                    // Get the fractional part.
-                    if (endptr && (*endptr == *convData->decimal_point || *endptr == '.'))
-                    {
-                        s = endptr + 1;
-
-                        // Get the digits to the right of the decimal
-                        // Only retrieve those that matter based on scale.
-                        for (fracChars = 0;
-                                *s && isdigit(*s) && fracChars < decimals;
-                                ++fracChars, ++s)
-                        {
-                            // Save the frac characters to a side buffer. This way we can limit
-                            // ourselves to the scale without modifying the original string.
-                            fracBuf[fracChars] = *s;
-                        }
-
-                        fracBuf[fracChars] = 0;
-
-                        // Check to see if we need to round
-                        if (isdigit(*s) && *s > '4')
-                        {
-                            bRound = true;
-                        }
-                    }
-
-                    frac = strtoll(fracBuf, &endptr, 10);
-                    value += frac + (bRound ? 1 : 0);
-                    value *= negate;
-                }
-
-                decimal.value = value;
-
-                if (decimal.value > max_number_decimal)
-                    decimal.value = max_number_decimal;
-                else if (decimal.value < -max_number_decimal)
-                    decimal.value = -max_number_decimal;
-            }
-        }
        break;

        case execplan::CalpontSystemCatalog::DATE: