/* Copyright (C) 2021 MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #pragma once #include "genericparser.h" #include "mcs_datatype.h" namespace literal { using datatypes::DataCondition; using genericparser::Parser; using utils::ConstString; typedef uint32_t scale_t; template class Converter : public Parser, public A { public: Converter(const char* str, size_t length, DataCondition& error) : Parser(str, length), A(&Parser::skipLeadingSpaces()) { if (Parser::syntaxError()) { /* Non-recoverable syntax error happened. The parser parsed the first part of a combined rule (and therefore shifted the tokenizer position) but then failed to parse the rule till the end. For example in the : '' - empty string '+' - sign was not followed by a digit or period, expect '+1' '.' - period was not followed by a digit, expect '.1' '1e' - exponent marker was not followed by , expect '1e1' '1e+' - in , was not followed by a digit, expect '1e+1' */ error |= (DataCondition::X_INVALID_CHARACTER_VALUE_FOR_CAST); } } Converter(const std::string& str, DataCondition& error) : Converter(str.data(), str.length(), error) { } }; /* SQL Standard definition for related to character string to exact number conversion ====================================================== Abbreviations: - TD - the target data type - SD - the datatype of the source value - SV - the source value 8) If TD is exact numeric, then a) If SD is exact numeric or approximate numeric, then Case: i) If there is a representation of SV in the data type TD that does not lose any leading significant digits after rounding or truncating if necessary, then TV is that representation. The choice of whether to round or truncate is implementation-defined. (NoteAI) ii) Otherwise, an exception condition is raised: data exception -- numeric value out of range. (NoteAII) b) If SD is character string, then SV is replaced by SV with any leading or trailing s removed. (NoteB) Case: i) If SV does not comprise a as defined by the rules for in Subclause "", then an exception condition is raised: data exception - invalid character value for cast. (NoteBI) ii) Otherwise, let LT be that . The is equivalent to CAST ( LT AS TD ) Implementation details ====================== NoteAI ---- The implementation defined choice whether to round or truncate is "round away from zero". NoteAII ----- When the "numeric value out of range" state is found, it is signalled to the caller, and the returned value is adjusted according to the TD range. The caller later decides whether to raise an error or to use the adjusted value. NoteB ----- The implementation removes only leading spaces. The caller can check if any trailing spaces are left by the parser. NoteBI ------ The implementation stops on the first character that does not conform to the syntax. The caller can check if any trailing garbage characters are left by the parser. Grammar ======= ::= [ ] ::= [ E ] ::= [ [ ] ] | ::= | ::= ::= [ ] ::= ... */ // // Terminal symbols // class Period : public ConstString { public: explicit Period(Parser* p) : ConstString(p->tokenChar('.')) { } bool isNull() const { return mStr == nullptr; } }; class ExponentMarker : public ConstString { public: explicit ExponentMarker(Parser* p) : ConstString(p->tokenAnyCharOf('e', 'E')) { } bool isNull() const { return mStr == nullptr; } }; class Sign : public ConstString { public: explicit Sign() : ConstString(NULL, 0) { } explicit Sign(const ConstString& str) : ConstString(str) { } explicit Sign(Parser* p) : ConstString(p->tokenAnyCharOf('+', '-')) { } static Sign empty(Parser* p) { return Sign(p->tokStartConstString()); } bool isNull() const { return mStr == nullptr; } bool negative() const { return eq('-'); } }; class Digits : public ConstString { public: explicit Digits() : ConstString(NULL, 0) { } explicit Digits(const char* str, size_t length) : ConstString(str, length) { } explicit Digits(const ConstString& str) : ConstString(str) { } explicit Digits(Parser* p) : ConstString(p->tokenDigits()) { } bool isNull() const { return mStr == nullptr; } void skipLeadingZeroDigits() { for (; mLength > 0 && mStr[0] == '0';) { mStr++; mLength--; } } void skipTrailingZeroDigits() { for (; mLength > 0 && mStr[mLength - 1] == '0';) mLength--; } }; // // Non-terminal symbols // // ::= ... class UnsignedInteger : public Digits { public: explicit UnsignedInteger() : Digits() { } explicit UnsignedInteger(const char* str, size_t length) : Digits(str, length) { } explicit UnsignedInteger(const ConstString& str) : Digits(str) { } explicit UnsignedInteger(Parser* p) : Digits(p) { } static UnsignedInteger empty(const Parser* p) { return UnsignedInteger(p->tokStartConstString()); } UnsignedInteger left(size_t len) const { return UnsignedInteger(str(), length() > len ? len : length()); } template T toXIntPositiveContinue(T start, DataCondition& error) const { const char* e = end(); T val = start; for (const char* s = mStr; s < e; s++) { constexpr T cutoff = datatypes::numeric_limits::max() / 10; if (val > cutoff) { error |= DataCondition::X_NUMERIC_VALUE_OUT_OF_RANGE; return datatypes::numeric_limits::max(); } val *= 10; T newval = val + (s[0] - '0'); if (newval < val) { error |= DataCondition::X_NUMERIC_VALUE_OUT_OF_RANGE; return datatypes::numeric_limits::max(); } val = newval; } return val; } template T toXIntPositive(DataCondition& error) const { return toXIntPositiveContinue(0, error); } template T toSIntNegativeContinue(T start, DataCondition& error) const { const char* e = end(); T val = start; for (const char* s = mStr; s < e; s++) { constexpr T cutoff = datatypes::numeric_limits::min() / 10; if (val < cutoff) { error |= DataCondition::X_NUMERIC_VALUE_OUT_OF_RANGE; return datatypes::numeric_limits::min(); } val *= 10; T newval = val - (s[0] - '0'); if (newval > val) { error |= DataCondition::X_NUMERIC_VALUE_OUT_OF_RANGE; return datatypes::numeric_limits::min(); } val = newval; } return val; } template T toSIntNegative(DataCondition& error) const { return toSIntNegativeContinue(0, error); } template T toXIntPositiveRoundAwayFromZeroContinue(T start, bool round, DataCondition& error) const { T val = toXIntPositiveContinue(start, error); if (val == datatypes::numeric_limits::max() && round) { error |= DataCondition::X_NUMERIC_VALUE_OUT_OF_RANGE; return val; } return val + round; } template T toXIntPositiveRoundAwayFromZero(bool round, DataCondition& error) const { return toXIntPositiveRoundAwayFromZeroContinue(0, round, error); } }; // := [] class SignedInteger : public Parser::DD2OM { public: using DD2OM::DD2OM; bool isNull() const { return UnsignedInteger::isNull(); } template T abs(DataCondition& error) const { return toXIntPositive(error); } template T toSInt(DataCondition& error) const { return negative() ? toSIntNegative(error) : toXIntPositive(error); } }; // E class EExponent : public Parser::UD2MM { public: using UD2MM::UD2MM; }; // class ExactUnsignedNumericLiteralFractionAlone : public Parser::UD2MM { public: using UD2MM::UD2MM; }; // [ ] class PeriodOptUnsignedInteger : public Parser::UD2MO { public: using UD2MO::UD2MO; static PeriodOptUnsignedInteger empty(Parser* p) { return PeriodOptUnsignedInteger(UnsignedInteger(p->tokStartConstString())); } const PeriodOptUnsignedInteger& fraction() const { return *this; } }; // := class IntegralUnsignedInteger : public UnsignedInteger { public: explicit IntegralUnsignedInteger(Parser* p) : UnsignedInteger(p) { } const UnsignedInteger& integral() const { return *this; } }; // [ [ ] ] class ExactUnsignedNumericLiteralIntegralOptFraction : public Parser::DD2MO { public: using DD2MO::DD2MO; }; // A container for integral and fractional parts class UnsignedIntegerDecimal { protected: UnsignedInteger mIntegral; UnsignedInteger mFraction; public: explicit UnsignedIntegerDecimal(const UnsignedInteger& intg, const UnsignedInteger& frac) : mIntegral(intg), mFraction(frac) { } explicit UnsignedIntegerDecimal(const ExactUnsignedNumericLiteralFractionAlone& rhs) : mFraction(rhs) { } explicit UnsignedIntegerDecimal(const ExactUnsignedNumericLiteralIntegralOptFraction& rhs) : mIntegral(rhs.integral()), mFraction(rhs.fraction()) { } size_t IntFracDigits() const { return mIntegral.length() + mFraction.length(); } bool isNull() const { return mIntegral.isNull() && mFraction.isNull(); } void normalize() { mIntegral.skipLeadingZeroDigits(); mFraction.skipTrailingZeroDigits(); } template T toXIntPositive(DataCondition& error) const { T val = mIntegral.toXIntPositive(error); return mFraction.toXIntPositiveContinue(val, error); } template T toXIntPositiveRoundAwayFromZero(bool roundUp, DataCondition& error) const { T val = mIntegral.toXIntPositive(error); return mFraction.toXIntPositiveRoundAwayFromZeroContinue(val, roundUp, error); } template T toXIntPositiveScaleUp(size_t scale, DataCondition& error) const { T val = toXIntPositive(error); if (val == datatypes::numeric_limits::max()) return val; for (; scale; scale--) { constexpr T cutoff = datatypes::numeric_limits::max() / 10; if (val > cutoff) { error |= DataCondition::X_NUMERIC_VALUE_OUT_OF_RANGE; return datatypes::numeric_limits::max(); } val *= 10; } return val; } template T toXIntPositiveRound(DataCondition& error) const { bool roundUp = mFraction.length() && mFraction.str()[0] >= '5'; return mIntegral.toXIntPositiveRoundAwayFromZero(roundUp, error); } template T toXIntPositiveRoundExp(uint64_t absExp, bool negExp, DataCondition& error) const { if (absExp == 0) return toXIntPositiveRound(error); if (negExp) { if (mIntegral.length() == absExp) // 567.8e-3 -> 0.5678 -> 1 return mIntegral.str()[0] >= '5' ? 1 : 0; if (mIntegral.length() < absExp) // 123e-4 -> 0.0123 return 0; // mIntegral.length() > absExp: 5678.8e-3 -> 5.6788 -> 6 size_t diff = mIntegral.length() - absExp; const UnsignedInteger tmp(mIntegral.str(), diff); bool roundUp = mIntegral.str()[diff] >= '5'; return tmp.toXIntPositiveRoundAwayFromZero(roundUp, error); } // Positive exponent: 123.456e2 if (mFraction.length() >= absExp) // 123.456e2 -> 12345.6 -> 12346 { bool roundUp = mFraction.length() > absExp && mFraction.str()[absExp] >= '5'; UnsignedIntegerDecimal tmp(mIntegral, mFraction.left(absExp)); return tmp.toXIntPositiveRoundAwayFromZero(roundUp, error); } // Pad int+frac with right zeros 123.4e3 -> 123400 size_t diff = absExp - mFraction.length(); return toXIntPositiveScaleUp(diff, error); } }; // := // [ ] // | [ [ ] ] class ExactUnsignedNumericLiteral : public Parser::Choice2 { public: using Choice2::Choice2; }; // ::= [ E ] class UnsignedNumericLiteral : public Parser::DM2MO { public: using DM2MO::DM2MO; void normalize() { ExactUnsignedNumericLiteral::normalize(); mB.skipLeadingZeroDigits(); } const SignedInteger& exponent() const { return mB; } template T toXIntPositiveRound(DataCondition& error) const { size_t availableDigits = IntFracDigits(); if (!availableDigits) return 0; T absexp = exponent().abs(error); return ExactUnsignedNumericLiteral::toXIntPositiveRoundExp(absexp, exponent().negative(), error); } template T toPackedDecimalPositive(scale_t scale, DataCondition& error) const { size_t availableDigits = IntFracDigits(); if (!availableDigits) return 0; int64_t exp = exponent().toSInt(error); if (exp <= datatypes::numeric_limits::max() - scale) exp += scale; if (exp < 0) { if (exp == datatypes::numeric_limits::min()) exp++; // Avoid undefined behaviour in the unary minus below: return ExactUnsignedNumericLiteral::toXIntPositiveRoundExp((uint64_t)-exp, true, error); } return ExactUnsignedNumericLiteral::toXIntPositiveRoundExp((uint64_t)exp, false, error); } }; // ::= [ ] class SignedNumericLiteral : public Parser::DD2OM { public: using DD2OM::DD2OM; bool isNull() const { return UnsignedNumericLiteral::isNull(); } template T toUIntXRound() const { if (negative()) return 0; return UnsignedNumericLiteral::toXIntPositiveRound(); } template T toPackedUDecimal(scale_t scale, DataCondition& error) const { if (negative()) return 0; return UnsignedNumericLiteral::toPackedDecimalPositive(scale, error); } template T toPackedSDecimal(scale_t scale, DataCondition& error) const { if (!negative()) return UnsignedNumericLiteral::toPackedDecimalPositive(scale, error); typedef typename datatypes::make_unsigned::type UT; UT absval = UnsignedNumericLiteral::toPackedDecimalPositive(scale, error); if (absval >= (UT)datatypes::numeric_limits::min()) { error |= DataCondition::X_NUMERIC_VALUE_OUT_OF_RANGE; return datatypes::numeric_limits::min(); } return -(T)absval; } }; } // namespace literal