mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-04-18 21:44:02 +03:00
617 lines
16 KiB
C++
617 lines
16 KiB
C++
/* Copyright (C) 2021 MariaDB Corporation.
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public License
|
|
as published by the Free Software Foundation; version 2 of
|
|
the License.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
|
MA 02110-1301, USA. */
|
|
|
|
#pragma once
|
|
|
|
#include "genericparser.h"
|
|
#include "mcs_datatype.h"
|
|
|
|
namespace literal
|
|
{
|
|
using datatypes::DataCondition;
|
|
using genericparser::Parser;
|
|
using utils::ConstString;
|
|
|
|
typedef uint32_t scale_t;
|
|
|
|
template <class A>
|
|
class Converter : public Parser, public A
|
|
{
|
|
public:
|
|
Converter(const char* str, size_t length, DataCondition& error)
|
|
: Parser(str, length), A(&Parser::skipLeadingSpaces())
|
|
{
|
|
if (Parser::syntaxError())
|
|
{
|
|
/*
|
|
Non-recoverable syntax error happened. The parser parsed the first part
|
|
of a combined rule (and therefore shifted the tokenizer position)
|
|
but then failed to parse the rule till the end.
|
|
|
|
For example in the <signed numeric literal>:
|
|
'' - empty string
|
|
'+' - sign was not followed by a digit or period, expect '+1'
|
|
'.' - period was not followed by a digit, expect '.1'
|
|
'1e' - exponent marker was not followed by <exponent>, expect '1e1'
|
|
'1e+' - in <exponent>, <sign> was not followed by a digit, expect '1e+1'
|
|
*/
|
|
error |= (DataCondition::X_INVALID_CHARACTER_VALUE_FOR_CAST);
|
|
}
|
|
}
|
|
Converter(const std::string& str, DataCondition& error) : Converter(str.data(), str.length(), error)
|
|
{
|
|
}
|
|
};
|
|
|
|
/*
|
|
|
|
SQL Standard definition for <cast specification>
|
|
related to character string to exact number conversion
|
|
======================================================
|
|
Abbreviations:
|
|
- TD - the target data type
|
|
- SD - the datatype of the source value
|
|
- SV - the source value
|
|
|
|
8) If TD is exact numeric, then
|
|
a) If SD is exact numeric or approximate numeric, then
|
|
Case:
|
|
|
|
i) If there is a representation of SV in the data type TD that does not lose
|
|
any leading significant digits after rounding or truncating if necessary,
|
|
then TV is that representation. The choice of whether to round or truncate
|
|
is implementation-defined. (NoteAI)
|
|
ii) Otherwise, an exception condition is raised:
|
|
data exception -- numeric value out of range. (NoteAII)
|
|
|
|
b) If SD is character string, then SV is replaced by SV with any leading
|
|
or trailing <space>s removed. (NoteB)
|
|
Case:
|
|
|
|
i) If SV does not comprise a <signed numeric literal> as defined by the rules
|
|
for <literal> in Subclause "<literal>", then an exception condition is raised:
|
|
data exception - invalid character value for cast. (NoteBI)
|
|
ii) Otherwise, let LT be that <signed numeric literal>.
|
|
The <cast specification> is equivalent to CAST ( LT AS TD )
|
|
|
|
|
|
Implementation details
|
|
======================
|
|
NoteAI
|
|
----
|
|
The implementation defined choice whether to round or truncate is
|
|
"round away from zero".
|
|
|
|
NoteAII
|
|
-----
|
|
When the "numeric value out of range" state is found, it is signalled
|
|
to the caller, and the returned value is adjusted according to the TD range.
|
|
The caller later decides whether to raise an error or to use the adjusted value.
|
|
|
|
NoteB
|
|
-----
|
|
The implementation removes only leading spaces. The caller can
|
|
check if any trailing spaces are left by the parser.
|
|
|
|
NoteBI
|
|
------
|
|
The implementation stops on the first character that does not
|
|
conform to the <signed numeric literal> syntax. The caller can
|
|
check if any trailing garbage characters are left by the parser.
|
|
|
|
|
|
Grammar
|
|
=======
|
|
|
|
<signed numeric literal> ::= [ <sign> ] <unsigned numeric literal>
|
|
|
|
<unsigned numeric literal> ::= <exact numeric literal> [ E <exponent> ]
|
|
|
|
<exact numeric literal> ::=
|
|
<unsigned integer> [ <period> [ <unsigned integer> ] ]
|
|
| <period> <unsigned integer>
|
|
|
|
<sign> ::= <plus sign> | <minus sign>
|
|
|
|
<exponent> ::= <signed integer>
|
|
|
|
<signed integer> ::= [ <sign> ] <unsigned integer>
|
|
|
|
<unsigned integer> ::= <digit> ...
|
|
|
|
*/
|
|
|
|
//
|
|
// Terminal symbols
|
|
//
|
|
|
|
class Period : public ConstString
|
|
{
|
|
public:
|
|
explicit Period(Parser* p) : ConstString(p->tokenChar('.'))
|
|
{
|
|
}
|
|
bool isNull() const
|
|
{
|
|
return mStr == nullptr;
|
|
}
|
|
};
|
|
|
|
class ExponentMarker : public ConstString
|
|
{
|
|
public:
|
|
explicit ExponentMarker(Parser* p) : ConstString(p->tokenAnyCharOf('e', 'E'))
|
|
{
|
|
}
|
|
bool isNull() const
|
|
{
|
|
return mStr == nullptr;
|
|
}
|
|
};
|
|
|
|
class Sign : public ConstString
|
|
{
|
|
public:
|
|
explicit Sign() : ConstString(NULL, 0)
|
|
{
|
|
}
|
|
explicit Sign(const ConstString& str) : ConstString(str)
|
|
{
|
|
}
|
|
explicit Sign(Parser* p) : ConstString(p->tokenAnyCharOf('+', '-'))
|
|
{
|
|
}
|
|
static Sign empty(Parser* p)
|
|
{
|
|
return Sign(p->tokStartConstString());
|
|
}
|
|
bool isNull() const
|
|
{
|
|
return mStr == nullptr;
|
|
}
|
|
bool negative() const
|
|
{
|
|
return eq('-');
|
|
}
|
|
};
|
|
|
|
class Digits : public ConstString
|
|
{
|
|
public:
|
|
explicit Digits() : ConstString(NULL, 0)
|
|
{
|
|
}
|
|
explicit Digits(const char* str, size_t length) : ConstString(str, length)
|
|
{
|
|
}
|
|
explicit Digits(const ConstString& str) : ConstString(str)
|
|
{
|
|
}
|
|
explicit Digits(Parser* p) : ConstString(p->tokenDigits())
|
|
{
|
|
}
|
|
bool isNull() const
|
|
{
|
|
return mStr == nullptr;
|
|
}
|
|
|
|
void skipLeadingZeroDigits()
|
|
{
|
|
for (; mLength > 0 && mStr[0] == '0';)
|
|
{
|
|
mStr++;
|
|
mLength--;
|
|
}
|
|
}
|
|
void skipTrailingZeroDigits()
|
|
{
|
|
for (; mLength > 0 && mStr[mLength - 1] == '0';)
|
|
mLength--;
|
|
}
|
|
};
|
|
|
|
//
|
|
// Non-terminal symbols
|
|
//
|
|
|
|
// <unsigned integer> ::= <digit> ...
|
|
class UnsignedInteger : public Digits
|
|
{
|
|
public:
|
|
explicit UnsignedInteger() : Digits()
|
|
{
|
|
}
|
|
explicit UnsignedInteger(const char* str, size_t length) : Digits(str, length)
|
|
{
|
|
}
|
|
explicit UnsignedInteger(const ConstString& str) : Digits(str)
|
|
{
|
|
}
|
|
explicit UnsignedInteger(Parser* p) : Digits(p)
|
|
{
|
|
}
|
|
static UnsignedInteger empty(const Parser* p)
|
|
{
|
|
return UnsignedInteger(p->tokStartConstString());
|
|
}
|
|
UnsignedInteger left(size_t len) const
|
|
{
|
|
return UnsignedInteger(str(), length() > len ? len : length());
|
|
}
|
|
|
|
template <typename T>
|
|
T toXIntPositiveContinue(T start, DataCondition& error) const
|
|
{
|
|
const char* e = end();
|
|
T val = start;
|
|
for (const char* s = mStr; s < e; s++)
|
|
{
|
|
constexpr T cutoff = datatypes::numeric_limits<T>::max() / 10;
|
|
if (val > cutoff)
|
|
{
|
|
error |= DataCondition::X_NUMERIC_VALUE_OUT_OF_RANGE;
|
|
return datatypes::numeric_limits<T>::max();
|
|
}
|
|
val *= 10;
|
|
T newval = val + (s[0] - '0');
|
|
if (newval < val)
|
|
{
|
|
error |= DataCondition::X_NUMERIC_VALUE_OUT_OF_RANGE;
|
|
return datatypes::numeric_limits<T>::max();
|
|
}
|
|
val = newval;
|
|
}
|
|
return val;
|
|
}
|
|
template <typename T>
|
|
T toXIntPositive(DataCondition& error) const
|
|
{
|
|
return toXIntPositiveContinue<T>(0, error);
|
|
}
|
|
|
|
template <typename T>
|
|
T toSIntNegativeContinue(T start, DataCondition& error) const
|
|
{
|
|
const char* e = end();
|
|
T val = start;
|
|
for (const char* s = mStr; s < e; s++)
|
|
{
|
|
constexpr T cutoff = datatypes::numeric_limits<T>::min() / 10;
|
|
if (val < cutoff)
|
|
{
|
|
error |= DataCondition::X_NUMERIC_VALUE_OUT_OF_RANGE;
|
|
return datatypes::numeric_limits<T>::min();
|
|
}
|
|
val *= 10;
|
|
T newval = val - (s[0] - '0');
|
|
if (newval > val)
|
|
{
|
|
error |= DataCondition::X_NUMERIC_VALUE_OUT_OF_RANGE;
|
|
return datatypes::numeric_limits<T>::min();
|
|
}
|
|
val = newval;
|
|
}
|
|
return val;
|
|
}
|
|
template <typename T>
|
|
T toSIntNegative(DataCondition& error) const
|
|
{
|
|
return toSIntNegativeContinue<T>(0, error);
|
|
}
|
|
|
|
template <typename T>
|
|
T toXIntPositiveRoundAwayFromZeroContinue(T start, bool round, DataCondition& error) const
|
|
{
|
|
T val = toXIntPositiveContinue<T>(start, error);
|
|
if (val == datatypes::numeric_limits<T>::max() && round)
|
|
{
|
|
error |= DataCondition::X_NUMERIC_VALUE_OUT_OF_RANGE;
|
|
return val;
|
|
}
|
|
return val + round;
|
|
}
|
|
template <typename T>
|
|
T toXIntPositiveRoundAwayFromZero(bool round, DataCondition& error) const
|
|
{
|
|
return toXIntPositiveRoundAwayFromZeroContinue<T>(0, round, error);
|
|
}
|
|
};
|
|
|
|
// <signed integer> := [<sign>] <unsigned integer>
|
|
class SignedInteger : public Parser::DD2OM<Sign, UnsignedInteger>
|
|
{
|
|
public:
|
|
using DD2OM::DD2OM;
|
|
bool isNull() const
|
|
{
|
|
return UnsignedInteger::isNull();
|
|
}
|
|
|
|
template <typename T>
|
|
T abs(DataCondition& error) const
|
|
{
|
|
return toXIntPositive<T>(error);
|
|
}
|
|
|
|
template <typename T>
|
|
T toSInt(DataCondition& error) const
|
|
{
|
|
return negative() ? toSIntNegative<T>(error) : toXIntPositive<T>(error);
|
|
}
|
|
};
|
|
|
|
// E <signed integer>
|
|
class EExponent : public Parser::UD2MM<ExponentMarker, SignedInteger>
|
|
{
|
|
public:
|
|
using UD2MM::UD2MM;
|
|
};
|
|
|
|
// <period> <unsigned integer>
|
|
class ExactUnsignedNumericLiteralFractionAlone : public Parser::UD2MM<Period, UnsignedInteger>
|
|
{
|
|
public:
|
|
using UD2MM::UD2MM;
|
|
};
|
|
|
|
// <period> [ <unsigned integer> ]
|
|
class PeriodOptUnsignedInteger : public Parser::UD2MO<Period, UnsignedInteger>
|
|
{
|
|
public:
|
|
using UD2MO::UD2MO;
|
|
static PeriodOptUnsignedInteger empty(Parser* p)
|
|
{
|
|
return PeriodOptUnsignedInteger(UnsignedInteger(p->tokStartConstString()));
|
|
}
|
|
const PeriodOptUnsignedInteger& fraction() const
|
|
{
|
|
return *this;
|
|
}
|
|
};
|
|
|
|
// <integral unsigned integer> := <unsigned integer>
|
|
class IntegralUnsignedInteger : public UnsignedInteger
|
|
{
|
|
public:
|
|
explicit IntegralUnsignedInteger(Parser* p) : UnsignedInteger(p)
|
|
{
|
|
}
|
|
const UnsignedInteger& integral() const
|
|
{
|
|
return *this;
|
|
}
|
|
};
|
|
|
|
// <integral unsigned integer> [ <period> [ <unsigned integer> ] ]
|
|
|
|
class ExactUnsignedNumericLiteralIntegralOptFraction
|
|
: public Parser::DD2MO<IntegralUnsignedInteger, PeriodOptUnsignedInteger>
|
|
{
|
|
public:
|
|
using DD2MO::DD2MO;
|
|
};
|
|
|
|
// A container for integral and fractional parts
|
|
class UnsignedIntegerDecimal
|
|
{
|
|
protected:
|
|
UnsignedInteger mIntegral;
|
|
UnsignedInteger mFraction;
|
|
|
|
public:
|
|
explicit UnsignedIntegerDecimal(const UnsignedInteger& intg, const UnsignedInteger& frac)
|
|
: mIntegral(intg), mFraction(frac)
|
|
{
|
|
}
|
|
explicit UnsignedIntegerDecimal(const ExactUnsignedNumericLiteralFractionAlone& rhs) : mFraction(rhs)
|
|
{
|
|
}
|
|
explicit UnsignedIntegerDecimal(const ExactUnsignedNumericLiteralIntegralOptFraction& rhs)
|
|
: mIntegral(rhs.integral()), mFraction(rhs.fraction())
|
|
{
|
|
}
|
|
|
|
size_t IntFracDigits() const
|
|
{
|
|
return mIntegral.length() + mFraction.length();
|
|
}
|
|
|
|
bool isNull() const
|
|
{
|
|
return mIntegral.isNull() && mFraction.isNull();
|
|
}
|
|
|
|
void normalize()
|
|
{
|
|
mIntegral.skipLeadingZeroDigits();
|
|
mFraction.skipTrailingZeroDigits();
|
|
}
|
|
|
|
template <typename T>
|
|
T toXIntPositive(DataCondition& error) const
|
|
{
|
|
T val = mIntegral.toXIntPositive<T>(error);
|
|
return mFraction.toXIntPositiveContinue<T>(val, error);
|
|
}
|
|
|
|
template <typename T>
|
|
T toXIntPositiveRoundAwayFromZero(bool roundUp, DataCondition& error) const
|
|
{
|
|
T val = mIntegral.toXIntPositive<T>(error);
|
|
return mFraction.toXIntPositiveRoundAwayFromZeroContinue<T>(val, roundUp, error);
|
|
}
|
|
|
|
template <typename T>
|
|
T toXIntPositiveScaleUp(size_t scale, DataCondition& error) const
|
|
{
|
|
T val = toXIntPositive<T>(error);
|
|
if (val == datatypes::numeric_limits<T>::max())
|
|
return val;
|
|
for (; scale; scale--)
|
|
{
|
|
constexpr T cutoff = datatypes::numeric_limits<T>::max() / 10;
|
|
if (val > cutoff)
|
|
{
|
|
error |= DataCondition::X_NUMERIC_VALUE_OUT_OF_RANGE;
|
|
return datatypes::numeric_limits<T>::max();
|
|
}
|
|
val *= 10;
|
|
}
|
|
return val;
|
|
}
|
|
|
|
template <typename T>
|
|
T toXIntPositiveRound(DataCondition& error) const
|
|
{
|
|
bool roundUp = mFraction.length() && mFraction.str()[0] >= '5';
|
|
return mIntegral.toXIntPositiveRoundAwayFromZero<T>(roundUp, error);
|
|
}
|
|
|
|
template <typename T>
|
|
T toXIntPositiveRoundExp(uint64_t absExp, bool negExp, DataCondition& error) const
|
|
{
|
|
if (absExp == 0)
|
|
return toXIntPositiveRound<T>(error);
|
|
|
|
if (negExp)
|
|
{
|
|
if (mIntegral.length() == absExp) // 567.8e-3 -> 0.5678 -> 1
|
|
return mIntegral.str()[0] >= '5' ? 1 : 0;
|
|
if (mIntegral.length() < absExp) // 123e-4 -> 0.0123
|
|
return 0;
|
|
// mIntegral.length() > absExp: 5678.8e-3 -> 5.6788 -> 6
|
|
size_t diff = mIntegral.length() - absExp;
|
|
const UnsignedInteger tmp(mIntegral.str(), diff);
|
|
bool roundUp = mIntegral.str()[diff] >= '5';
|
|
return tmp.toXIntPositiveRoundAwayFromZero<T>(roundUp, error);
|
|
}
|
|
|
|
// Positive exponent: 123.456e2
|
|
if (mFraction.length() >= absExp) // 123.456e2 -> 12345.6 -> 12346
|
|
{
|
|
bool roundUp = mFraction.length() > absExp && mFraction.str()[absExp] >= '5';
|
|
UnsignedIntegerDecimal tmp(mIntegral, mFraction.left(absExp));
|
|
return tmp.toXIntPositiveRoundAwayFromZero<T>(roundUp, error);
|
|
}
|
|
|
|
// Pad int+frac with right zeros 123.4e3 -> 123400
|
|
size_t diff = absExp - mFraction.length();
|
|
return toXIntPositiveScaleUp<T>(diff, error);
|
|
}
|
|
};
|
|
|
|
// <exact unsigned numeric literal> :=
|
|
// <period> [ <unsigned integer> ]
|
|
// | <unsigned integer> [ <period> [ <unsigned integer> ] ]
|
|
|
|
class ExactUnsignedNumericLiteral
|
|
: public Parser::Choice2<UnsignedIntegerDecimal, ExactUnsignedNumericLiteralFractionAlone,
|
|
ExactUnsignedNumericLiteralIntegralOptFraction>
|
|
{
|
|
public:
|
|
using Choice2::Choice2;
|
|
};
|
|
|
|
// <unsigned numeric literal> ::= <exact numeric literal> [ E <exponent> ]
|
|
|
|
class UnsignedNumericLiteral : public Parser::DM2MO<ExactUnsignedNumericLiteral, EExponent>
|
|
{
|
|
public:
|
|
using DM2MO::DM2MO;
|
|
void normalize()
|
|
{
|
|
ExactUnsignedNumericLiteral::normalize();
|
|
mB.skipLeadingZeroDigits();
|
|
}
|
|
const SignedInteger& exponent() const
|
|
{
|
|
return mB;
|
|
}
|
|
|
|
template <typename T>
|
|
T toXIntPositiveRound(DataCondition& error) const
|
|
{
|
|
size_t availableDigits = IntFracDigits();
|
|
if (!availableDigits)
|
|
return 0;
|
|
T absexp = exponent().abs<T>(error);
|
|
return ExactUnsignedNumericLiteral::toXIntPositiveRoundExp<T>(absexp, exponent().negative(), error);
|
|
}
|
|
|
|
template <typename T>
|
|
T toPackedDecimalPositive(scale_t scale, DataCondition& error) const
|
|
{
|
|
size_t availableDigits = IntFracDigits();
|
|
if (!availableDigits)
|
|
return 0;
|
|
int64_t exp = exponent().toSInt<int64_t>(error);
|
|
if (exp <= datatypes::numeric_limits<int64_t>::max() - scale)
|
|
exp += scale;
|
|
if (exp < 0)
|
|
{
|
|
if (exp == datatypes::numeric_limits<int64_t>::min())
|
|
exp++; // Avoid undefined behaviour in the unary minus below:
|
|
return ExactUnsignedNumericLiteral::toXIntPositiveRoundExp<T>((uint64_t)-exp, true, error);
|
|
}
|
|
return ExactUnsignedNumericLiteral::toXIntPositiveRoundExp<T>((uint64_t)exp, false, error);
|
|
}
|
|
};
|
|
|
|
// <signed numeric literal> ::= [ <sign> ] <unsigned numeric literal>
|
|
class SignedNumericLiteral : public Parser::DD2OM<Sign, UnsignedNumericLiteral>
|
|
{
|
|
public:
|
|
using DD2OM::DD2OM;
|
|
bool isNull() const
|
|
{
|
|
return UnsignedNumericLiteral::isNull();
|
|
}
|
|
|
|
template <typename T>
|
|
T toUIntXRound() const
|
|
{
|
|
if (negative())
|
|
return 0;
|
|
return UnsignedNumericLiteral::toXIntPositiveRound<T>();
|
|
}
|
|
|
|
template <typename T>
|
|
T toPackedUDecimal(scale_t scale, DataCondition& error) const
|
|
{
|
|
if (negative())
|
|
return 0;
|
|
return UnsignedNumericLiteral::toPackedDecimalPositive<T>(scale, error);
|
|
}
|
|
|
|
template <typename T>
|
|
T toPackedSDecimal(scale_t scale, DataCondition& error) const
|
|
{
|
|
if (!negative())
|
|
return UnsignedNumericLiteral::toPackedDecimalPositive<T>(scale, error);
|
|
typedef typename datatypes::make_unsigned<T>::type UT;
|
|
UT absval = UnsignedNumericLiteral::toPackedDecimalPositive<UT>(scale, error);
|
|
if (absval >= (UT)datatypes::numeric_limits<T>::min())
|
|
{
|
|
error |= DataCondition::X_NUMERIC_VALUE_OUT_OF_RANGE;
|
|
return datatypes::numeric_limits<T>::min();
|
|
}
|
|
return -(T)absval;
|
|
}
|
|
};
|
|
|
|
} // namespace literal
|