diff --git a/datatypes/mcs_data_condition.h b/datatypes/mcs_data_condition.h new file mode 100644 index 000000000..597daf911 --- /dev/null +++ b/datatypes/mcs_data_condition.h @@ -0,0 +1,83 @@ +/* Copyright (C) 2021 MariaDB Corporation. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + + +#ifndef MCS_DATA_CONDITION_H +#define MCS_DATA_CONDITION_H + +namespace datatypes +{ + +/* + A subset of SQL Conditions related to data processing. + SQLSTATE terminology is used for categories: + - S stands for "success" + - W stands for "warning" + - X stands for "exceptions" +*/ +class DataCondition +{ +public: + enum Code + { + // Code Value SQLSTATE + S_SUCCESS = 0, // 00000 + W_STRING_DATA_RIGHT_TRUNCATION = 1 << 1, // 01004 + X_STRING_DATA_RIGHT_TRUNCATION = 1 << 16, // 22001 + X_NUMERIC_VALUE_OUT_OF_RANGE = 1 << 17, // 22003 + X_INVALID_CHARACTER_VALUE_FOR_CAST = 1 << 18, // 22018 + }; + DataCondition() + :mError(S_SUCCESS) + { } + DataCondition(Code code) + :mError(code) + { } + DataCondition & operator|=(Code code) + { + mError= (Code) (mError | code); + return *this; + } + DataCondition operator&(Code rhs) const + { + return DataCondition((Code) (mError & rhs)); + } + operator Code () const { return mError; } + + // Adjust a sigened integer of any size to the range [-absMaxVal , +absMaxVal] + template + void adjustSIntXRange(T & val, T absMaxVal) + { + if (val > absMaxVal) + { + val = absMaxVal; + *this |= DataCondition::X_NUMERIC_VALUE_OUT_OF_RANGE; + } + else if (val < -absMaxVal) + { + val = -absMaxVal; + *this |= DataCondition::X_NUMERIC_VALUE_OUT_OF_RANGE; + } + } + +private: + Code mError; +}; + +} // namespace datatypes + +#endif // MCS_DATA_CONDITION_H diff --git a/datatypes/mcs_datatype.h b/datatypes/mcs_datatype.h index 69fc4a5db..0929362fa 100644 --- a/datatypes/mcs_datatype.h +++ b/datatypes/mcs_datatype.h @@ -18,10 +18,11 @@ #ifndef MCS_DATATYPE_H_INCLUDED #define MCS_DATATYPE_H_INCLUDED -#include #include #include #include "exceptclasses.h" +#include "mcs_numeric_limits.h" +#include "mcs_data_condition.h" #include "mcs_decimal.h" @@ -105,6 +106,24 @@ namespace execplan +namespace datatypes +{ + +template +struct make_unsigned +{ + typedef struct { } type; +}; + +template<> struct make_unsigned { typedef uint8_t type; }; +template<> struct make_unsigned { typedef uint16_t type; }; +template<> struct make_unsigned { typedef uint32_t type; }; +template<> struct make_unsigned { typedef uint64_t type; }; +template<> struct make_unsigned { typedef uint128_t type; }; + +} // namespace datatypes + + namespace datatypes { diff --git a/datatypes/mcs_decimal.cpp b/datatypes/mcs_decimal.cpp index a7915f965..49cdd5bf5 100644 --- a/datatypes/mcs_decimal.cpp +++ b/datatypes/mcs_decimal.cpp @@ -19,6 +19,7 @@ #include "utils/common/branchpred.h" #include "mcs_decimal.h" +#include "numericliteral.h" namespace datatypes { @@ -170,6 +171,33 @@ namespace datatypes } } + Decimal::Decimal(const char *str, size_t length, DataCondition & convError, + int8_t s, uint8_t p) + :TSInt128(), + value(0), + scale(s), + precision(p) + { + literal::Converter conv(str, length, convError); + // We don't check "convErr" here. Let the caller do it. + // Let's just convert what has been parsed. + + // Remove redundant leading integral and trailing fractional digits + conv.normalize(); + if (isTSInt128ByPrecision()) + { + s128Value = conv.toPackedSDecimal((literal::scale_t) scale, convError); + int128_t max_number_decimal = mcs_pow_10_128[precision - 19] - 1; + convError.adjustSIntXRange(s128Value, max_number_decimal); + } + else + { + value = conv.toPackedSDecimal((literal::scale_t) scale, convError); + int64_t max_number_decimal = (int64_t) mcs_pow_10[precision] - 1; + convError.adjustSIntXRange(value, max_number_decimal); + } + } + int Decimal::compare(const Decimal& l, const Decimal& r) { int128_t divisorL, divisorR; diff --git a/datatypes/mcs_decimal.h b/datatypes/mcs_decimal.h index e818d6060..40a5b47e2 100644 --- a/datatypes/mcs_decimal.h +++ b/datatypes/mcs_decimal.h @@ -29,6 +29,7 @@ #include "mcs_float128.h" #include "checks.h" #include "branchpred.h" +#include "mcs_data_condition.h" namespace datatypes @@ -302,6 +303,9 @@ class Decimal: public TSInt128 { } + Decimal(const char *str, size_t length, DataCondition & error, + int8_t s, uint8_t p); + int decimalComp(const Decimal& d) const { lldiv_t d1 = lldiv(value, static_cast(mcs_pow_10[scale])); diff --git a/datatypes/mcs_float128.h b/datatypes/mcs_float128.h index 503b2a758..f768474b1 100644 --- a/datatypes/mcs_float128.h +++ b/datatypes/mcs_float128.h @@ -22,6 +22,7 @@ #include #include #include +#include "mcs_numeric_limits.h" #ifdef __aarch64__ using float128_t = long double; @@ -98,8 +99,6 @@ using int128_t = __int128; static const float128_t mcs_fl_one = 1.0, mcs_fl_Zero[] = {0.0, -0.0,}; -template -class numeric_limits { }; // Copy from boost::multiprecision::float128 template<> class numeric_limits { public: diff --git a/datatypes/mcs_numeric_limits.h b/datatypes/mcs_numeric_limits.h new file mode 100644 index 000000000..48fe545db --- /dev/null +++ b/datatypes/mcs_numeric_limits.h @@ -0,0 +1,62 @@ +/* + Copyright (C) 2021 MariaDB Corporation + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ +#ifndef MCS_NUMERIC_LIMITS_H_INCLUDED +#define MCS_NUMERIC_LIMITS_H_INCLUDED + +#include + +namespace datatypes +{ + +template +struct numeric_limits +{ + static constexpr T min() { return std::numeric_limits::min(); } + static constexpr T max() { return std::numeric_limits::max(); } +}; + +using int128_t = __int128; +using uint128_t = unsigned __int128; + +template<> struct numeric_limits +{ + static constexpr int128_t min() + { + return int128_t(0x8000000000000000LL) << 64; + } + static constexpr int128_t max() + { + return (int128_t(0x7FFFFFFFFFFFFFFFLL) << 64) + 0xFFFFFFFFFFFFFFFFLL; + } +}; + +template<> struct numeric_limits +{ + static constexpr uint128_t min() + { + return uint128_t(0); + } + static constexpr uint128_t max() + { + return (uint128_t(0xFFFFFFFFFFFFFFFFULL) << 64) + 0xFFFFFFFFFFFFFFFFULL; + } +}; + +} // namespace datatypes + +#endif // MCS_NUMERIC_LIMITS_H_INCLUDED diff --git a/datatypes/numericliteral.h b/datatypes/numericliteral.h new file mode 100644 index 000000000..2aeb58ab3 --- /dev/null +++ b/datatypes/numericliteral.h @@ -0,0 +1,620 @@ +/* Copyright (C) 2021 MariaDB Corporation. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + + +#ifndef NUMERICLITERAL_H +#define NUMERICLITERAL_H + +#include "genericparser.h" +#include "mcs_datatype.h" + + +namespace literal +{ + +using utils::ConstString; +using genericparser::Parser; +using datatypes::DataCondition; + +typedef uint32_t scale_t; + + + +template +class Converter: public Parser, + public A +{ +public: + Converter(const char *str, size_t length, DataCondition & error) + :Parser(str, length), + A(&Parser::skipLeadingSpaces()) + { + if (Parser::syntaxError()) + { + /* + Non-recoverable syntax error happened. The parser parsed the first part + of a combined rule (and therefore shifted the tokenizer position) + but then failed to parse the rule till the end. + + For example in the : + '' - empty string + '+' - sign was not followed by a digit or period, expect '+1' + '.' - period was not followed by a digit, expect '.1' + '1e' - exponent marker was not followed by , expect '1e1' + '1e+' - in , was not followed by a digit, expect '1e+1' + */ + error|=(DataCondition::X_INVALID_CHARACTER_VALUE_FOR_CAST); + } + } + Converter(const std::string & str, DataCondition &error) + :Converter(str.data(), str.length(), error) + { } +}; + + +/* + +SQL Standard definition for +related to character string to exact number conversion +====================================================== +Abbreviations: +- TD - the target data type +- SD - the datatype of the source value +- SV - the source value + +8) If TD is exact numeric, then +a) If SD is exact numeric or approximate numeric, then +Case: + + i) If there is a representation of SV in the data type TD that does not lose + any leading significant digits after rounding or truncating if necessary, + then TV is that representation. The choice of whether to round or truncate + is implementation-defined. (NoteAI) + ii) Otherwise, an exception condition is raised: + data exception -- numeric value out of range. (NoteAII) + +b) If SD is character string, then SV is replaced by SV with any leading + or trailing s removed. (NoteB) +Case: + + i) If SV does not comprise a as defined by the rules + for in Subclause "", then an exception condition is raised: + data exception - invalid character value for cast. (NoteBI) + ii) Otherwise, let LT be that . + The is equivalent to CAST ( LT AS TD ) + + +Implementation details +====================== +NoteAI +---- +The implementation defined choice whether to round or truncate is +"round away from zero". + +NoteAII +----- +When the "numeric value out of range" state is found, it is signalled +to the caller, and the returned value is adjusted according to the TD range. +The caller later decides whether to raise an error or to use the adjusted value. + +NoteB +----- +The implementation removes only leading spaces. The caller can +check if any trailing spaces are left by the parser. + +NoteBI +------ +The implementation stops on the first character that does not +conform to the syntax. The caller can +check if any trailing garbage characters are left by the parser. + + +Grammar +======= + + ::= [ ] + + ::= [ E ] + + ::= + [ [ ] ] + | + + ::= | + + ::= + + ::= [ ] + + ::= ... + +*/ + + +// +// Terminal symbols +// + +class Period: public ConstString +{ +public: + explicit Period(Parser *p) + :ConstString(p->tokenChar('.')) + { } + bool isNull() const { return mStr == nullptr; } +}; + + +class ExponentMarker: public ConstString +{ +public: + explicit ExponentMarker(Parser *p) + :ConstString(p->tokenAnyCharOf('e', 'E')) + { } + bool isNull() const { return mStr == nullptr; } +}; + + +class Sign: public ConstString +{ +public: + explicit Sign(): ConstString(NULL, 0) { } + explicit Sign(const ConstString &str) + :ConstString(str) + { } + explicit Sign(Parser *p) + :ConstString(p->tokenAnyCharOf('+', '-')) + { } + static Sign empty(Parser *p) + { + return Sign(p->tokStartConstString()); + } + bool isNull() const { return mStr == nullptr; } + bool negative() const { return eq('-'); } +}; + + +class Digits: public ConstString +{ +public: + explicit Digits() + :ConstString(NULL, 0) + { } + explicit Digits(const char *str, size_t length) + :ConstString(str, length) + { } + explicit Digits(const ConstString &str) + :ConstString(str) + { } + explicit Digits(Parser *p) + :ConstString(p->tokenDigits()) + { } + bool isNull() const { return mStr == nullptr; } + + void skipLeadingZeroDigits() + { + for ( ; mLength > 0 && mStr[0] == '0'; ) + { + mStr++; + mLength--; + } + } + void skipTrailingZeroDigits() + { + for ( ; mLength > 0 && mStr[mLength - 1] == '0' ; ) + mLength--; + } +}; + + +// +// Non-terminal symbols +// + +// ::= ... +class UnsignedInteger: public Digits +{ +public: + explicit UnsignedInteger() + :Digits() + { } + explicit UnsignedInteger(const char *str, size_t length) + :Digits(str, length) + { } + explicit UnsignedInteger(const ConstString &str) + :Digits(str) + { } + explicit UnsignedInteger(Parser *p) + :Digits(p) + { } + static UnsignedInteger empty(const Parser *p) + { + return UnsignedInteger(p->tokStartConstString()); + } + UnsignedInteger left(size_t len) const + { + return UnsignedInteger(str(), length() > len ? len : length()); + } + + template + T toXIntPositiveContinue(T start, DataCondition & error) const + { + const char *e = end(); + T val = start; + for (const char *s= mStr; s < e; s++) + { + constexpr T cutoff = datatypes::numeric_limits::max() / 10; + if (val > cutoff) + { + error|= DataCondition::X_NUMERIC_VALUE_OUT_OF_RANGE; + return datatypes::numeric_limits::max(); + } + val*= 10; + T newval = val + (s[0] - '0'); + if (newval < val) + { + error|= DataCondition::X_NUMERIC_VALUE_OUT_OF_RANGE; + return datatypes::numeric_limits::max(); + } + val = newval; + } + return val; + } + template + T toXIntPositive(DataCondition & error) const + { + return toXIntPositiveContinue(0, error); + } + + template + T toSIntNegativeContinue(T start, DataCondition & error) const + { + const char *e = end(); + T val = start; + for (const char *s= mStr; s < e; s++) + { + constexpr T cutoff = datatypes::numeric_limits::min() / 10; + if (val < cutoff) + { + error|= DataCondition::X_NUMERIC_VALUE_OUT_OF_RANGE; + return datatypes::numeric_limits::min(); + } + val*= 10; + T newval = val - (s[0] - '0'); + if (newval > val) + { + error|= DataCondition::X_NUMERIC_VALUE_OUT_OF_RANGE; + return datatypes::numeric_limits::min(); + } + val = newval; + } + return val; + } + template + T toSIntNegative(DataCondition & error) const + { + return toSIntNegativeContinue(0, error); + } + + template + T toXIntPositiveRoundAwayFromZeroContinue(T start, bool round, DataCondition & error) const + { + T val = toXIntPositiveContinue(start, error); + if (val == datatypes::numeric_limits::max() && round) + { + error|= DataCondition::X_NUMERIC_VALUE_OUT_OF_RANGE; + return val; + } + return val + round; + } + template + T toXIntPositiveRoundAwayFromZero(bool round, DataCondition & error) const + { + return toXIntPositiveRoundAwayFromZeroContinue(0, round, error); + } +}; + + +// := [] +class SignedInteger: public Parser::DD2OM +{ +public: + using DD2OM::DD2OM; + bool isNull() const { return UnsignedInteger::isNull(); } + + template T abs(DataCondition & error) const + { + return toXIntPositive(error); + } + + template T toSInt(DataCondition & error) const + { + return negative() ? + toSIntNegative(error) : + toXIntPositive(error); + } +}; + + +// E +class EExponent: public Parser::UD2MM +{ +public: + using UD2MM::UD2MM; +}; + + +// +class ExactUnsignedNumericLiteralFractionAlone: public Parser::UD2MM +{ +public: + using UD2MM::UD2MM; +}; + + +// [ ] +class PeriodOptUnsignedInteger: public Parser::UD2MO +{ +public: + using UD2MO::UD2MO; + static PeriodOptUnsignedInteger empty(Parser *p) + { + return PeriodOptUnsignedInteger(UnsignedInteger(p->tokStartConstString())); + } + const PeriodOptUnsignedInteger & fraction() const + { + return *this; + } +}; + + +// := +class IntegralUnsignedInteger: public UnsignedInteger +{ +public: + explicit IntegralUnsignedInteger(Parser *p) + :UnsignedInteger(p) + { } + const UnsignedInteger & integral() const + { + return *this; + } +}; + + +// [ [ ] ] + +class ExactUnsignedNumericLiteralIntegralOptFraction: + public Parser::DD2MO +{ +public: + using DD2MO::DD2MO; +}; + + +// A container for integral and fractional parts +class UnsignedIntegerDecimal +{ +protected: + UnsignedInteger mIntegral; + UnsignedInteger mFraction; +public: + explicit UnsignedIntegerDecimal(const UnsignedInteger &intg, + const UnsignedInteger &frac) + :mIntegral(intg), + mFraction(frac) + { } + explicit UnsignedIntegerDecimal(const ExactUnsignedNumericLiteralFractionAlone &rhs) + :mFraction(rhs) + { } + explicit UnsignedIntegerDecimal(const ExactUnsignedNumericLiteralIntegralOptFraction &rhs) + :mIntegral(rhs.integral()), + mFraction(rhs.fraction()) + { } + + size_t IntFracDigits() const + { + return mIntegral.length() + mFraction.length(); + } + + bool isNull() const + { + return mIntegral.isNull() && mFraction.isNull(); + } + + void normalize() + { + mIntegral.skipLeadingZeroDigits(); + mFraction.skipTrailingZeroDigits(); + } + + template T toXIntPositive(DataCondition & error) const + { + T val = mIntegral.toXIntPositive(error); + return mFraction.toXIntPositiveContinue(val, error); + } + + template T toXIntPositiveRoundAwayFromZero(bool roundUp, DataCondition & error) const + { + T val = mIntegral.toXIntPositive(error); + return mFraction.toXIntPositiveRoundAwayFromZeroContinue(val, roundUp, error); + } + + template T toXIntPositiveScaleUp(size_t scale, DataCondition & error) const + { + T val = toXIntPositive(error); + if (val == datatypes::numeric_limits::max()) + return val; + for ( ; scale ; scale--) + { + constexpr T cutoff = datatypes::numeric_limits::max() / 10; + if (val > cutoff) + { + error|= DataCondition::X_NUMERIC_VALUE_OUT_OF_RANGE; + return datatypes::numeric_limits::max(); + } + val*= 10; + } + return val; + } + + template T toXIntPositiveRound(DataCondition & error) const + { + bool roundUp = mFraction.length() && mFraction.str()[0] >= '5'; + return mIntegral.toXIntPositiveRoundAwayFromZero(roundUp, error); + } + + template T toXIntPositiveRoundExp(uint64_t absExp, bool negExp, + DataCondition & error) const + { + if (absExp == 0) + return toXIntPositiveRound(error); + + if (negExp) + { + if (mIntegral.length() == absExp) // 567.8e-3 -> 0.5678 -> 1 + return mIntegral.str()[0] >= '5' ? 1 : 0; + if (mIntegral.length() < absExp) // 123e-4 -> 0.0123 + return 0; + // mIntegral.length() > absExp: 5678.8e-3 -> 5.6788 -> 6 + size_t diff = mIntegral.length() - absExp; + const UnsignedInteger tmp(mIntegral.str(), diff); + bool roundUp = mIntegral.str()[diff] >= '5'; + return tmp.toXIntPositiveRoundAwayFromZero(roundUp, error); + } + + // Positive exponent: 123.456e2 + if (mFraction.length() >= absExp) // 123.456e2 -> 12345.6 -> 12346 + { + bool roundUp = mFraction.length() > absExp && mFraction.str()[absExp] >= '5'; + UnsignedIntegerDecimal tmp(mIntegral, mFraction.left(absExp)); + return tmp.toXIntPositiveRoundAwayFromZero(roundUp, error); + } + + // Pad int+frac with right zeros 123.4e3 -> 123400 + size_t diff = absExp - mFraction.length(); + return toXIntPositiveScaleUp(diff, error); + } + +}; + + +// := +// [ ] +// | [ [ ] ] + +class ExactUnsignedNumericLiteral: + public Parser::Choice2 +{ +public: + using Choice2::Choice2; +}; + + +// ::= [ E ] + +class UnsignedNumericLiteral: public Parser::DM2MO +{ +public: + using DM2MO::DM2MO; + void normalize() + { + ExactUnsignedNumericLiteral::normalize(); + mB.skipLeadingZeroDigits(); + } + const SignedInteger & exponent() const + { + return mB; + } + + template + T toXIntPositiveRound(DataCondition & error) const + { + size_t availableDigits = IntFracDigits(); + if (!availableDigits) + return 0; + T absexp = exponent().abs(error); + return ExactUnsignedNumericLiteral::toXIntPositiveRoundExp(absexp, exponent().negative(), error); + } + + template + T toPackedDecimalPositive(scale_t scale, DataCondition & error) const + { + size_t availableDigits = IntFracDigits(); + if (!availableDigits) + return 0; + int64_t exp = exponent().toSInt(error); + if (exp <= datatypes::numeric_limits::max() - scale) + exp+= scale; + if (exp < 0) + { + if (exp == datatypes::numeric_limits::min()) + exp++; // Avoid undefined behaviour in the unary minus below: + return ExactUnsignedNumericLiteral::toXIntPositiveRoundExp((uint64_t) -exp, true, error); + } + return ExactUnsignedNumericLiteral::toXIntPositiveRoundExp((uint64_t) exp, false, error); + } + +}; + + +// ::= [ ] +class SignedNumericLiteral: public Parser::DD2OM +{ +public: + using DD2OM::DD2OM; + bool isNull() const { return UnsignedNumericLiteral::isNull(); } + + template + T toUIntXRound() const + { + if (negative()) + return 0; + return UnsignedNumericLiteral::toXIntPositiveRound(); + } + + template + T toPackedUDecimal(scale_t scale, DataCondition & error) const + { + if (negative()) + return 0; + return UnsignedNumericLiteral::toPackedDecimalPositive(scale, error); + } + + template + T toPackedSDecimal(scale_t scale, DataCondition & error) const + { + if (!negative()) + return UnsignedNumericLiteral::toPackedDecimalPositive(scale, error); + typedef typename datatypes::make_unsigned::type UT; + UT absval = UnsignedNumericLiteral::toPackedDecimalPositive(scale, error); + if (absval >= (UT) datatypes::numeric_limits::min()) + { + error|= DataCondition::X_NUMERIC_VALUE_OUT_OF_RANGE; + return datatypes::numeric_limits::min(); + } + return - (T) absval; + } +}; + + +} // namespace literal + +#endif // NUMERICLITERAL_H diff --git a/utils/common/conststring.h b/utils/common/conststring.h index f686690af..f7fd890e9 100644 --- a/utils/common/conststring.h +++ b/utils/common/conststring.h @@ -25,6 +25,7 @@ namespace utils class ConstString { +protected: const char *mStr; size_t mLength; public: @@ -35,7 +36,12 @@ public: :mStr(str.data()), mLength(str.length()) { } const char *str() const { return mStr; } + const char *end() const { return mStr + mLength; } size_t length() const { return mLength; } + bool eq(char ch) const + { + return mLength == 1 && mStr[0] == ch; + } ConstString & rtrimZero() { for ( ; mLength && mStr[mLength - 1] == '\0'; mLength--) diff --git a/utils/common/genericparser.h b/utils/common/genericparser.h new file mode 100644 index 000000000..2e23d9568 --- /dev/null +++ b/utils/common/genericparser.h @@ -0,0 +1,313 @@ +/* Copyright (C) 2021 MariaDB Corporation. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + + +#ifndef GENERICPARSER_H +#define GENERICPARSER_H + +#include "conststring.h" + + +namespace genericparser +{ + +using utils::ConstString; + + +class Tokenizer +{ +protected: + const char *mStr; + const char *mEnd; +public: + explicit Tokenizer(const char *str, size_t length) + :mStr(str), mEnd(str + length) + { } + size_t length() const + { + return mEnd - mStr; + } + const char *ptr() const + { + return mStr; + } + bool isSpace() const + { + return mStr < mEnd && mStr[0] == ' '; + } + bool isDigit() const + { + return mStr < mEnd && mStr[0] >= '0' && mStr[0] <= '9'; + } + bool isChar(char chr) const + { + return mStr < mEnd && mStr[0] == chr; + } + bool isAnyCharOf(char chr0, char chr1) + { + return mStr < mEnd && (mStr[0] == chr0 || mStr[0] == chr1); + } + + ConstString tokenSpaces() + { + if (!isSpace()) + return ConstString(nullptr, 0); + const char *start = mStr; + for ( ; isSpace() ; mStr++) + { } + return ConstString(start, mStr - start); + } + ConstString tokenDigits() + { + if (!isDigit()) + return ConstString(nullptr, 0); + const char *start = mStr; + for ( ; isDigit() ; mStr++) + { } + return ConstString(start, mStr - start); + } + ConstString tokenChar(char chr) + { + if (!isChar(chr)) + return ConstString(nullptr, 0); + return ConstString(mStr++, 1); + } + ConstString tokenAnyCharOf(char chr0, char chr1) + { + if (!isAnyCharOf(chr0, chr1)) + return ConstString(nullptr, 0); + return ConstString(mStr++, 1); + } +}; + + +class Parser: public Tokenizer +{ +protected: + bool mSyntaxError; +public: + explicit Parser(const char *str, size_t length) + :Tokenizer(str, length), mSyntaxError(false) + { } + explicit Parser(const std::string & str) + :Parser(str.data(), str.length()) + { } + Parser & skipLeadingSpaces() + { + tokenSpaces(); + return *this; + } + bool syntaxError() const + { + return mSyntaxError; + } + bool setSyntaxError() + { + mSyntaxError = true; + return false; + } + const char *tokStart() const + { + return mStr; + } + const ConstString tokStartConstString() const + { + return ConstString(mStr, 0); + } + + + // A helper class template to set the parser syntax error + // if A returned isNull() after parsing. + + template + class SetSyntaxErrorOnNull :public A + { + public: + SetSyntaxErrorOnNull(Parser *p) + :A(p) + { + if (A::isNull()) + p->setSyntaxError(); + } + }; + + // A helper class template for a rule in the form: := [ ] + + template + class Opt: public A + { + public: + explicit Opt(const A &rhs) + :A(rhs) + { } + explicit Opt(Parser *p) + :A(p) + { + if (A::isNull() && !p->syntaxError()) + A::operator=(A::empty(p)); + } + }; + + // Letters in the class template names below mean: + // U - unused - the result class does not have the source class inside + // D - derive - the result class derives from the source class + // M - member - the result class adds the source class as a member + + // M - mandatory - this part is required during parse time + // O - optional - this part is optional during parse time + + + // A helper class template for a rule in the form: := + // i.e. both parts are mandatory at parse time + // The value of is not important, and is created + // only temporary on the stack. + // Only the value of is important. + // Example: + // + + template + class UD2MM: public B + { + public: + explicit UD2MM(Parser *p) + :B(A(p).isNull() ? B() :SetSyntaxErrorOnNull(p)) + { } + explicit UD2MM(const B & b) + :B(b) + { } + explicit UD2MM() + :B() + { } + bool isNull() const { return B::isNull(); } + }; + + // A helper class template for a rule in the form: := + // i.e. both parts are mandatory at parse time. + template + class DD2MM: public A, + public B + { + public: + // Sets syntax error if was not followed by + explicit DD2MM(Parser *p) + :A(p), + B(A::isNull() ? B() : SetSyntaxErrorOnNull(p)) + { } + explicit DD2MM(const A & a, const B &b) + :A(b), B(b) + { } + }; + + // A helper class template for a rule in the form: := [ ] + // i.e. is mandatory, is optional at parse time. + template + class DD2MO: public A, + public B + { + public: + explicit DD2MO(Parser *p) + :A(p), + B(A::isNull() ? B() : B(p)) + { } + explicit DD2MO(const A &a, const B &b) + :A(a), B(b) + { } + }; + + // A helper class template for a rule in the form: := [ ] + // i.e. is mandatory, is optional at parse time. + // The value of is not important and is not included + // into the target class, e.g. + // [ ] + template + class UD2MO: public B + { + public: + explicit UD2MO(Parser *p) + :B(A(p).isNull() ? B() : B(p)) + { } + explicit UD2MO(const B &b) + :B(b) + { } + explicit UD2MO() + :B() + { } + }; + + + // A helper class template for a rule in the form: := [ ] + // i.e. is mandatory, is optional at parse time. + // The result class derives from "A". + // The result class puts "B" as a member. + template + class DM2MO: public A + { + protected: + B mB; + public: + explicit DM2MO(Parser *p) + :A(p), + mB(A::isNull() ? B() : B(p)) + { } + }; + + + // A helper class template for a rule in the form: := [ ] + // i.e. is optional, is mandatory at parse time. + template + class DD2OM: public Opt, + public B + { + public: + explicit DD2OM(Parser *p) + :Opt(p), B(p) + { + if (B::isNull()) + p->setSyntaxError(); + } + explicit DD2OM() + :Opt(A()) + { } + explicit DD2OM(const A & a, const B & b) + :Opt(a), B(b) + { } + }; + + // A helper class template for a rule in the form: := a | b + template + class Choice2: public Container + { + public: + explicit Choice2(const A & a) + :Container(a) + { } + explicit Choice2(const B & b) + :Container(b) + { } + explicit Choice2(Parser *p) + :Container(A(p)) + { + if (Container::isNull() && !p->syntaxError()) + *this = Choice2(B(p)); + } + }; +}; + + +} // namespace genericparser + +#endif // GENERICPARSER_H diff --git a/utils/funcexp/func_bitwise.cpp b/utils/funcexp/func_bitwise.cpp index f6723e5dc..b8d2f7a93 100644 --- a/utils/funcexp/func_bitwise.cpp +++ b/utils/funcexp/func_bitwise.cpp @@ -41,6 +41,8 @@ using namespace logging; #include "mcs_int64.h" #include "mcs_decimal.h" #include "dataconvert.h" +#include "numericliteral.h" + using namespace dataconvert; namespace @@ -163,14 +165,14 @@ datatypes::TUInt64Null GenericToBitOperand( const string& str = parm->data()->getStrVal(row, tmpIsNull); if (tmpIsNull) return datatypes::TUInt64Null(); - static const datatypes::SystemCatalog::TypeAttributesStd - attr(datatypes::MAXDECIMALWIDTH, 6, datatypes::INT128MAXPRECISION); - int128_t val = attr.decimal128FromString(str); - datatypes::Decimal d(0, attr.scale, attr.precision, &val); - val = d.getPosNegRoundedIntegralPart(0).getValue(); - return ConvertToBitOperand(val); - } + datatypes::DataCondition cnverr; + literal::Converter cnv(str, cnverr); + cnv.normalize(); + return cnv.negative() ? + datatypes::TUInt64Null((uint64_t)cnv.toPackedSDecimal(0, cnverr)) : + datatypes::TUInt64Null(cnv.toPackedUDecimal(0, cnverr)); + } case execplan::CalpontSystemCatalog::DECIMAL: case execplan::CalpontSystemCatalog::UDECIMAL: return DecimalToBitOperand(row, parm, thisFunc); diff --git a/utils/funcexp/func_cast.cpp b/utils/funcexp/func_cast.cpp index 20f9e212d..3224126cf 100644 --- a/utils/funcexp/func_cast.cpp +++ b/utils/funcexp/func_cast.cpp @@ -40,6 +40,7 @@ using namespace rowgroup; using namespace logging; #include "dataconvert.h" +#include "numericliteral.h" using namespace dataconvert; #include "collation.h" @@ -1512,241 +1513,15 @@ IDB_Decimal Func_cast_decimal::getDecimalVal(Row& row, case execplan::CalpontSystemCatalog::TEXT: { const string& strValue = parm[0]->data()->getStrVal(row, isNull); - const char* str = strValue.c_str(); - const char* s; - const char* firstInt = NULL; - char* endptr = NULL; - char fracBuf[20]; - int fracChars; - int negate = 1; - bool bFoundSign = false; - bool bRound = false; - if (strValue.empty()) { isNull = true; return IDB_Decimal(); // need a null value for IDB_Decimal?? } - - decimal.scale = decimals; - decimal.value = 0; - - // Look for scientific notation. The existence of an 'e' indicates it probably is. - for (s = str; *s; ++s) // This is faster than two finds. - { - if (*s == 'e' || *s == 'E') - { - if (decimal.isTSInt128ByPrecision()) - { - // it is worth to parse the exponent first to detect an overflow - bool dummy = false; - char *ep = NULL; - int128_t max_number_decimal = dataconvert::strtoll128(columnstore_big_precision[max_length - 19].c_str(), dummy, &ep); - - int128_t scaleDivisor; - datatypes::getScaleDivisor(scaleDivisor, decimals); - float128_t floatValue = datatypes::TFloat128::fromString(strValue); - - // If the float value is too large, the saturated result may end up with - // the wrong sign, so we just check first. - if ((int128_t)floatValue > max_number_decimal) - decimal.s128Value = max_number_decimal; - else if ((int128_t)floatValue < -max_number_decimal) - decimal.s128Value = -max_number_decimal; - else if (floatValue > 0) - decimal.s128Value = (int128_t) (floatValue * scaleDivisor + 0.5); - else if (floatValue < 0) - decimal.s128Value = (int128_t) (floatValue * scaleDivisor - 0.5); - else - decimal.s128Value = 0; - - if (decimal.s128Value > max_number_decimal) - decimal.s128Value = max_number_decimal; - else if (decimal.s128Value < -max_number_decimal) - decimal.s128Value = -max_number_decimal; - - return decimal; - } - else - { - int64_t max_number_decimal = helpers::maxNumber_c[max_length]; - - double floatValue = strtod(str, 0); - - // If the float value is too large, the saturated result may end up with - // the wrong sign, so we just check first. - if ((int64_t)floatValue > max_number_decimal) - decimal.value = max_number_decimal; - else if ((int64_t)floatValue < -max_number_decimal) - decimal.value = -max_number_decimal; - else if (floatValue > 0) - decimal.value = (int64_t) (floatValue * helpers::powerOf10_c[decimals] + 0.5); - else if (floatValue < 0) - decimal.value = (int64_t) (floatValue * helpers::powerOf10_c[decimals] - 0.5); - else - decimal.value = 0; - - if (decimal.value > max_number_decimal) - decimal.value = max_number_decimal; - else if (decimal.value < -max_number_decimal) - decimal.value = -max_number_decimal; - - return decimal; - } - } - } - - // There are cases (such as "-.95" that should return that may not result in the desired rounding. - // By stripping the sign and adding it back in later, we can get a more accurate answer. - for (s = str; *s; ++s) - { - if (*s == '-') - { - if (bFoundSign) // If we find a duplicate sign char, it's an error. - { - return decimal; - } - - bFoundSign = true; - negate = -1; - } - else if (*s == '+') - { - if (bFoundSign) - { - return decimal; - } - - bFoundSign = true; - } - else if (*s == *convData->decimal_point || *s == '.') - { - // If we find a decimal point, that means there's no leading integer. (like ".99") - // In this case we need to mark where we are. - endptr = const_cast(s); - break; - } - else if (isdigit(*s)) - { - firstInt = s; - break; - } - } - - if (decimal.isTSInt128ByPrecision()) - { - bool dummy = false; - char *ep = NULL; - int128_t max_number_decimal = dataconvert::strtoll128(columnstore_big_precision[max_length - 19].c_str(), dummy, &ep); - - int128_t value = 0, frac = 0; - - if (firstInt) // Checking to see if we have a decimal point, but no previous digits. - { - value = dataconvert::strtoll128(firstInt, dummy, &endptr); - } - - int128_t scaleDivisor; - datatypes::getScaleDivisor(scaleDivisor, decimals); - - if (!dummy && endptr) - { - // Scale the integer portion according to the DECIMAL description - value *= scaleDivisor; - - // Get the fractional part. - if (endptr && (*endptr == *convData->decimal_point || *endptr == '.')) - { - s = endptr + 1; - - // Get the digits to the right of the decimal - // Only retrieve those that matter based on scale. - for (fracChars = 0; - *s && isdigit(*s) && fracChars < decimals; - ++fracChars, ++s) - { - // Save the frac characters to a side buffer. This way we can limit - // ourselves to the scale without modifying the original string. - fracBuf[fracChars] = *s; - } - - fracBuf[fracChars] = 0; - - // Check to see if we need to round - if (isdigit(*s) && *s > '4') - { - bRound = true; - } - } - - frac = dataconvert::strtoll128(fracBuf, dummy, &ep); - value += frac + (bRound ? 1 : 0); - value *= negate; - } - - decimal.s128Value = value; - - if (decimal.s128Value > max_number_decimal) - decimal.s128Value = max_number_decimal; - else if (decimal.s128Value < -max_number_decimal) - decimal.s128Value = -max_number_decimal; - } - else - { - int64_t max_number_decimal = helpers::maxNumber_c[max_length]; - - int64_t value = 0, frac = 0; - - errno = 0; - - if (firstInt) // Checking to see if we have a decimal point, but no previous digits. - { - value = strtoll(firstInt, &endptr, 10); - } - - if (!errno && endptr) - { - // Scale the integer portion according to the DECIMAL description - value *= helpers::powerOf10_c[decimals]; - - // Get the fractional part. - if (endptr && (*endptr == *convData->decimal_point || *endptr == '.')) - { - s = endptr + 1; - - // Get the digits to the right of the decimal - // Only retrieve those that matter based on scale. - for (fracChars = 0; - *s && isdigit(*s) && fracChars < decimals; - ++fracChars, ++s) - { - // Save the frac characters to a side buffer. This way we can limit - // ourselves to the scale without modifying the original string. - fracBuf[fracChars] = *s; - } - - fracBuf[fracChars] = 0; - - // Check to see if we need to round - if (isdigit(*s) && *s > '4') - { - bRound = true; - } - } - - frac = strtoll(fracBuf, &endptr, 10); - value += frac + (bRound ? 1 : 0); - value *= negate; - } - - decimal.value = value; - - if (decimal.value > max_number_decimal) - decimal.value = max_number_decimal; - else if (decimal.value < -max_number_decimal) - decimal.value = -max_number_decimal; - } + datatypes::DataCondition convError; + return IDB_Decimal(strValue.data(), strValue.length(), convError, decimals, max_length); } + break; case execplan::CalpontSystemCatalog::DATE: