mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-04-18 21:44:02 +03:00
The idea is relatively simple - encode prefixes of collated strings as integers and use them to compute extents' ranges. Then we can eliminate extents with strings. The actual patch does have all the code there but miss one important step: we do not keep collation index, we keep charset index. Because of this, some of the tests in the bugfix suite fail and thus main functionality is turned off. The reason of this patch to be put into PR at all is that it contains changes that made CHAR/VARCHAR columns unsigned. This change is needed in vectorization work.
618 lines
16 KiB
C++
618 lines
16 KiB
C++
/* Copyright (C) 2021 MariaDB Corporation.
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public License
|
|
as published by the Free Software Foundation; version 2 of
|
|
the License.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
|
MA 02110-1301, USA. */
|
|
|
|
#pragma once
|
|
|
|
#include "genericparser.h"
|
|
#include "mcs_datatype.h"
|
|
|
|
namespace literal
|
|
{
|
|
using datatypes::DataCondition;
|
|
using genericparser::Parser;
|
|
using utils::ConstString;
|
|
|
|
typedef uint32_t scale_t;
|
|
|
|
template <class A>
|
|
class Converter : public Parser, public A
|
|
{
|
|
public:
|
|
Converter(const char* str, size_t length, DataCondition& error)
|
|
: Parser(str, length), A(&Parser::skipLeadingSpaces())
|
|
{
|
|
if (Parser::syntaxError())
|
|
{
|
|
/*
|
|
Non-recoverable syntax error happened. The parser parsed the first part
|
|
of a combined rule (and therefore shifted the tokenizer position)
|
|
but then failed to parse the rule till the end.
|
|
|
|
For example in the <signed numeric literal>:
|
|
'' - empty string
|
|
'+' - sign was not followed by a digit or period, expect '+1'
|
|
'.' - period was not followed by a digit, expect '.1'
|
|
'1e' - exponent marker was not followed by <exponent>, expect '1e1'
|
|
'1e+' - in <exponent>, <sign> was not followed by a digit, expect '1e+1'
|
|
*/
|
|
error |= (DataCondition::X_INVALID_CHARACTER_VALUE_FOR_CAST);
|
|
}
|
|
}
|
|
Converter(const std::string& str, DataCondition& error) : Converter(str.data(), str.length(), error)
|
|
{
|
|
}
|
|
};
|
|
|
|
/*
|
|
|
|
SQL Standard definition for <cast specification>
|
|
related to character string to exact number conversion
|
|
======================================================
|
|
Abbreviations:
|
|
- TD - the target data type
|
|
- SD - the datatype of the source value
|
|
- SV - the source value
|
|
|
|
8) If TD is exact numeric, then
|
|
a) If SD is exact numeric or approximate numeric, then
|
|
Case:
|
|
|
|
i) If there is a representation of SV in the data type TD that does not lose
|
|
any leading significant digits after rounding or truncating if necessary,
|
|
then TV is that representation. The choice of whether to round or truncate
|
|
is implementation-defined. (NoteAI)
|
|
ii) Otherwise, an exception condition is raised:
|
|
data exception -- numeric value out of range. (NoteAII)
|
|
|
|
b) If SD is character string, then SV is replaced by SV with any leading
|
|
or trailing <space>s removed. (NoteB)
|
|
Case:
|
|
|
|
i) If SV does not comprise a <signed numeric literal> as defined by the rules
|
|
for <literal> in Subclause "<literal>", then an exception condition is raised:
|
|
data exception - invalid character value for cast. (NoteBI)
|
|
ii) Otherwise, let LT be that <signed numeric literal>.
|
|
The <cast specification> is equivalent to CAST ( LT AS TD )
|
|
|
|
|
|
Implementation details
|
|
======================
|
|
NoteAI
|
|
----
|
|
The implementation defined choice whether to round or truncate is
|
|
"round away from zero".
|
|
|
|
NoteAII
|
|
-----
|
|
When the "numeric value out of range" state is found, it is signalled
|
|
to the caller, and the returned value is adjusted according to the TD range.
|
|
The caller later decides whether to raise an error or to use the adjusted value.
|
|
|
|
NoteB
|
|
-----
|
|
The implementation removes only leading spaces. The caller can
|
|
check if any trailing spaces are left by the parser.
|
|
|
|
NoteBI
|
|
------
|
|
The implementation stops on the first character that does not
|
|
conform to the <signed numeric literal> syntax. The caller can
|
|
check if any trailing garbage characters are left by the parser.
|
|
|
|
|
|
Grammar
|
|
=======
|
|
|
|
<signed numeric literal> ::= [ <sign> ] <unsigned numeric literal>
|
|
|
|
<unsigned numeric literal> ::= <exact numeric literal> [ E <exponent> ]
|
|
|
|
<exact numeric literal> ::=
|
|
<unsigned integer> [ <period> [ <unsigned integer> ] ]
|
|
| <period> <unsigned integer>
|
|
|
|
<sign> ::= <plus sign> | <minus sign>
|
|
|
|
<exponent> ::= <signed integer>
|
|
|
|
<signed integer> ::= [ <sign> ] <unsigned integer>
|
|
|
|
<unsigned integer> ::= <digit> ...
|
|
|
|
*/
|
|
|
|
//
|
|
// Terminal symbols
|
|
//
|
|
|
|
class Period : public ConstString
|
|
{
|
|
public:
|
|
explicit Period(Parser* p) : ConstString(p->tokenChar('.'))
|
|
{
|
|
}
|
|
bool isNull() const
|
|
{
|
|
return mStr == nullptr;
|
|
}
|
|
};
|
|
|
|
class ExponentMarker : public ConstString
|
|
{
|
|
public:
|
|
explicit ExponentMarker(Parser* p) : ConstString(p->tokenAnyCharOf('e', 'E'))
|
|
{
|
|
}
|
|
bool isNull() const
|
|
{
|
|
return mStr == nullptr;
|
|
}
|
|
};
|
|
|
|
class Sign : public ConstString
|
|
{
|
|
public:
|
|
explicit Sign() : ConstString(NULL, 0)
|
|
{
|
|
}
|
|
explicit Sign(const ConstString& str) : ConstString(str)
|
|
{
|
|
}
|
|
explicit Sign(Parser* p) : ConstString(p->tokenAnyCharOf('+', '-'))
|
|
{
|
|
}
|
|
static Sign empty(Parser* p)
|
|
{
|
|
return Sign(p->tokStartConstString());
|
|
}
|
|
bool isNull() const
|
|
{
|
|
return mStr == nullptr;
|
|
}
|
|
bool negative() const
|
|
{
|
|
return eq('-');
|
|
}
|
|
};
|
|
|
|
class Digits : public ConstString
|
|
{
|
|
public:
|
|
explicit Digits() : ConstString(NULL, 0)
|
|
{
|
|
}
|
|
explicit Digits(const char* str, size_t length) : ConstString(str, length)
|
|
{
|
|
}
|
|
explicit Digits(const ConstString& str) : ConstString(str)
|
|
{
|
|
}
|
|
explicit Digits(Parser* p) : ConstString(p->tokenDigits())
|
|
{
|
|
}
|
|
bool isNull() const
|
|
{
|
|
return mStr == nullptr;
|
|
}
|
|
|
|
void skipLeadingZeroDigits()
|
|
{
|
|
for (; mLength > 0 && mStr[0] == '0';)
|
|
{
|
|
mStr++;
|
|
mLength--;
|
|
}
|
|
}
|
|
void skipTrailingZeroDigits()
|
|
{
|
|
for (; mLength > 0 && mStr[mLength - 1] == '0';)
|
|
mLength--;
|
|
}
|
|
};
|
|
|
|
//
|
|
// Non-terminal symbols
|
|
//
|
|
|
|
// <unsigned integer> ::= <digit> ...
|
|
class UnsignedInteger : public Digits
|
|
{
|
|
public:
|
|
explicit UnsignedInteger() : Digits()
|
|
{
|
|
}
|
|
explicit UnsignedInteger(const char* str, size_t length) : Digits(str, length)
|
|
{
|
|
}
|
|
explicit UnsignedInteger(const ConstString& str) : Digits(str)
|
|
{
|
|
}
|
|
explicit UnsignedInteger(Parser* p) : Digits(p)
|
|
{
|
|
}
|
|
static UnsignedInteger empty(const Parser* p)
|
|
{
|
|
return UnsignedInteger(p->tokStartConstString());
|
|
}
|
|
UnsignedInteger left(size_t len) const
|
|
{
|
|
return UnsignedInteger(str(), length() > len ? len : length());
|
|
}
|
|
|
|
template <typename T>
|
|
T toXIntPositiveContinue(T start, DataCondition& error) const
|
|
{
|
|
const char* e = end();
|
|
T val = start;
|
|
for (const char* s = mStr; s < e; s++)
|
|
{
|
|
constexpr T cutoff = datatypes::numeric_limits<T>::max() / 10;
|
|
if (val > cutoff)
|
|
{
|
|
error |= DataCondition::X_NUMERIC_VALUE_OUT_OF_RANGE;
|
|
return datatypes::numeric_limits<T>::max();
|
|
}
|
|
val *= 10;
|
|
T newval = val + (s[0] - '0');
|
|
if (newval < val)
|
|
{
|
|
error |= DataCondition::X_NUMERIC_VALUE_OUT_OF_RANGE;
|
|
return datatypes::numeric_limits<T>::max();
|
|
}
|
|
val = newval;
|
|
}
|
|
return val;
|
|
}
|
|
template <typename T>
|
|
T toXIntPositive(DataCondition& error) const
|
|
{
|
|
return toXIntPositiveContinue<T>(0, error);
|
|
}
|
|
|
|
template <typename T>
|
|
T toSIntNegativeContinue(T start, DataCondition& error) const
|
|
{
|
|
const char* e = end();
|
|
T val = start;
|
|
for (const char* s = mStr; s < e; s++)
|
|
{
|
|
constexpr T cutoff = datatypes::numeric_limits<T>::min() / 10;
|
|
if (val < cutoff)
|
|
{
|
|
error |= DataCondition::X_NUMERIC_VALUE_OUT_OF_RANGE;
|
|
return datatypes::numeric_limits<T>::min();
|
|
}
|
|
val *= 10;
|
|
T newval = val - (s[0] - '0');
|
|
if (newval > val)
|
|
{
|
|
error |= DataCondition::X_NUMERIC_VALUE_OUT_OF_RANGE;
|
|
return datatypes::numeric_limits<T>::min();
|
|
}
|
|
val = newval;
|
|
}
|
|
return val;
|
|
}
|
|
template <typename T>
|
|
T toSIntNegative(DataCondition& error) const
|
|
{
|
|
return toSIntNegativeContinue<T>(0, error);
|
|
}
|
|
|
|
template <typename T>
|
|
T toXIntPositiveRoundAwayFromZeroContinue(T start, bool round, DataCondition& error) const
|
|
{
|
|
T val = toXIntPositiveContinue<T>(start, error);
|
|
if (val == datatypes::numeric_limits<T>::max() && round)
|
|
{
|
|
error |= DataCondition::X_NUMERIC_VALUE_OUT_OF_RANGE;
|
|
return val;
|
|
}
|
|
return val + round;
|
|
}
|
|
template <typename T>
|
|
T toXIntPositiveRoundAwayFromZero(bool round, DataCondition& error) const
|
|
{
|
|
return toXIntPositiveRoundAwayFromZeroContinue<T>(0, round, error);
|
|
}
|
|
};
|
|
|
|
// <signed integer> := [<sign>] <unsigned integer>
|
|
class SignedInteger : public Parser::DD2OM<Sign, UnsignedInteger>
|
|
{
|
|
public:
|
|
using DD2OM::DD2OM;
|
|
|
|
bool isNull() const
|
|
{
|
|
return UnsignedInteger::isNull();
|
|
}
|
|
|
|
template <typename T>
|
|
T abs(DataCondition& error) const
|
|
{
|
|
return toXIntPositive<T>(error);
|
|
}
|
|
|
|
template <typename T>
|
|
T toSInt(DataCondition& error) const
|
|
{
|
|
return negative() ? toSIntNegative<T>(error) : toXIntPositive<T>(error);
|
|
}
|
|
};
|
|
|
|
// E <signed integer>
|
|
class EExponent : public Parser::UD2MM<ExponentMarker, SignedInteger>
|
|
{
|
|
public:
|
|
using UD2MM::UD2MM;
|
|
};
|
|
|
|
// <period> <unsigned integer>
|
|
class ExactUnsignedNumericLiteralFractionAlone : public Parser::UD2MM<Period, UnsignedInteger>
|
|
{
|
|
public:
|
|
using UD2MM::UD2MM;
|
|
};
|
|
|
|
// <period> [ <unsigned integer> ]
|
|
class PeriodOptUnsignedInteger : public Parser::UD2MO<Period, UnsignedInteger>
|
|
{
|
|
public:
|
|
using UD2MO::UD2MO;
|
|
static PeriodOptUnsignedInteger empty(Parser* p)
|
|
{
|
|
return PeriodOptUnsignedInteger(UnsignedInteger(p->tokStartConstString()));
|
|
}
|
|
const PeriodOptUnsignedInteger& fraction() const
|
|
{
|
|
return *this;
|
|
}
|
|
};
|
|
|
|
// <integral unsigned integer> := <unsigned integer>
|
|
class IntegralUnsignedInteger : public UnsignedInteger
|
|
{
|
|
public:
|
|
explicit IntegralUnsignedInteger(Parser* p) : UnsignedInteger(p)
|
|
{
|
|
}
|
|
const UnsignedInteger& integral() const
|
|
{
|
|
return *this;
|
|
}
|
|
};
|
|
|
|
// <integral unsigned integer> [ <period> [ <unsigned integer> ] ]
|
|
|
|
class ExactUnsignedNumericLiteralIntegralOptFraction
|
|
: public Parser::DD2MO<IntegralUnsignedInteger, PeriodOptUnsignedInteger>
|
|
{
|
|
public:
|
|
using DD2MO::DD2MO;
|
|
};
|
|
|
|
// A container for integral and fractional parts
|
|
class UnsignedIntegerDecimal
|
|
{
|
|
protected:
|
|
UnsignedInteger mIntegral;
|
|
UnsignedInteger mFraction;
|
|
|
|
public:
|
|
explicit UnsignedIntegerDecimal(const UnsignedInteger& intg, const UnsignedInteger& frac)
|
|
: mIntegral(intg), mFraction(frac)
|
|
{
|
|
}
|
|
explicit UnsignedIntegerDecimal(const ExactUnsignedNumericLiteralFractionAlone& rhs) : mFraction(rhs)
|
|
{
|
|
}
|
|
explicit UnsignedIntegerDecimal(const ExactUnsignedNumericLiteralIntegralOptFraction& rhs)
|
|
: mIntegral(rhs.integral()), mFraction(rhs.fraction())
|
|
{
|
|
}
|
|
|
|
size_t IntFracDigits() const
|
|
{
|
|
return mIntegral.length() + mFraction.length();
|
|
}
|
|
|
|
bool isNull() const
|
|
{
|
|
return mIntegral.isNull() && mFraction.isNull();
|
|
}
|
|
|
|
void normalize()
|
|
{
|
|
mIntegral.skipLeadingZeroDigits();
|
|
mFraction.skipTrailingZeroDigits();
|
|
}
|
|
|
|
template <typename T>
|
|
T toXIntPositive(DataCondition& error) const
|
|
{
|
|
T val = mIntegral.toXIntPositive<T>(error);
|
|
return mFraction.toXIntPositiveContinue<T>(val, error);
|
|
}
|
|
|
|
template <typename T>
|
|
T toXIntPositiveRoundAwayFromZero(bool roundUp, DataCondition& error) const
|
|
{
|
|
T val = mIntegral.toXIntPositive<T>(error);
|
|
return mFraction.toXIntPositiveRoundAwayFromZeroContinue<T>(val, roundUp, error);
|
|
}
|
|
|
|
template <typename T>
|
|
T toXIntPositiveScaleUp(size_t scale, DataCondition& error) const
|
|
{
|
|
T val = toXIntPositive<T>(error);
|
|
if (val == datatypes::numeric_limits<T>::max())
|
|
return val;
|
|
for (; scale; scale--)
|
|
{
|
|
constexpr T cutoff = datatypes::numeric_limits<T>::max() / 10;
|
|
if (val > cutoff)
|
|
{
|
|
error |= DataCondition::X_NUMERIC_VALUE_OUT_OF_RANGE;
|
|
return datatypes::numeric_limits<T>::max();
|
|
}
|
|
val *= 10;
|
|
}
|
|
return val;
|
|
}
|
|
|
|
template <typename T>
|
|
T toXIntPositiveRound(DataCondition& error) const
|
|
{
|
|
bool roundUp = mFraction.length() && mFraction.str()[0] >= '5';
|
|
return mIntegral.toXIntPositiveRoundAwayFromZero<T>(roundUp, error);
|
|
}
|
|
|
|
template <typename T>
|
|
T toXIntPositiveRoundExp(uint64_t absExp, bool negExp, DataCondition& error) const
|
|
{
|
|
if (absExp == 0)
|
|
return toXIntPositiveRound<T>(error);
|
|
|
|
if (negExp)
|
|
{
|
|
if (mIntegral.length() == absExp) // 567.8e-3 -> 0.5678 -> 1
|
|
return mIntegral.str()[0] >= '5' ? 1 : 0;
|
|
if (mIntegral.length() < absExp) // 123e-4 -> 0.0123
|
|
return 0;
|
|
// mIntegral.length() > absExp: 5678.8e-3 -> 5.6788 -> 6
|
|
size_t diff = mIntegral.length() - absExp;
|
|
const UnsignedInteger tmp(mIntegral.str(), diff);
|
|
bool roundUp = mIntegral.str()[diff] >= '5';
|
|
return tmp.toXIntPositiveRoundAwayFromZero<T>(roundUp, error);
|
|
}
|
|
|
|
// Positive exponent: 123.456e2
|
|
if (mFraction.length() >= absExp) // 123.456e2 -> 12345.6 -> 12346
|
|
{
|
|
bool roundUp = mFraction.length() > absExp && mFraction.str()[absExp] >= '5';
|
|
UnsignedIntegerDecimal tmp(mIntegral, mFraction.left(absExp));
|
|
return tmp.toXIntPositiveRoundAwayFromZero<T>(roundUp, error);
|
|
}
|
|
|
|
// Pad int+frac with right zeros 123.4e3 -> 123400
|
|
size_t diff = absExp - mFraction.length();
|
|
return toXIntPositiveScaleUp<T>(diff, error);
|
|
}
|
|
};
|
|
|
|
// <exact unsigned numeric literal> :=
|
|
// <period> [ <unsigned integer> ]
|
|
// | <unsigned integer> [ <period> [ <unsigned integer> ] ]
|
|
|
|
class ExactUnsignedNumericLiteral
|
|
: public Parser::Choice2<UnsignedIntegerDecimal, ExactUnsignedNumericLiteralFractionAlone,
|
|
ExactUnsignedNumericLiteralIntegralOptFraction>
|
|
{
|
|
public:
|
|
using Choice2::Choice2;
|
|
};
|
|
|
|
// <unsigned numeric literal> ::= <exact numeric literal> [ E <exponent> ]
|
|
|
|
class UnsignedNumericLiteral : public Parser::DM2MO<ExactUnsignedNumericLiteral, EExponent>
|
|
{
|
|
public:
|
|
using DM2MO::DM2MO;
|
|
void normalize()
|
|
{
|
|
ExactUnsignedNumericLiteral::normalize();
|
|
mB.skipLeadingZeroDigits();
|
|
}
|
|
const SignedInteger& exponent() const
|
|
{
|
|
return mB;
|
|
}
|
|
|
|
template <typename T>
|
|
T toXIntPositiveRound(DataCondition& error) const
|
|
{
|
|
size_t availableDigits = IntFracDigits();
|
|
if (!availableDigits)
|
|
return 0;
|
|
T absexp = exponent().abs<T>(error);
|
|
return ExactUnsignedNumericLiteral::toXIntPositiveRoundExp<T>(absexp, exponent().negative(), error);
|
|
}
|
|
|
|
template <typename T>
|
|
T toPackedDecimalPositive(scale_t scale, DataCondition& error) const
|
|
{
|
|
size_t availableDigits = IntFracDigits();
|
|
if (!availableDigits)
|
|
return 0;
|
|
int64_t exp = exponent().toSInt<int64_t>(error);
|
|
if (exp <= datatypes::numeric_limits<int64_t>::max() - scale)
|
|
exp += scale;
|
|
if (exp < 0)
|
|
{
|
|
if (exp == datatypes::numeric_limits<int64_t>::min())
|
|
exp++; // Avoid undefined behaviour in the unary minus below:
|
|
return ExactUnsignedNumericLiteral::toXIntPositiveRoundExp<T>((uint64_t)-exp, true, error);
|
|
}
|
|
return ExactUnsignedNumericLiteral::toXIntPositiveRoundExp<T>((uint64_t)exp, false, error);
|
|
}
|
|
};
|
|
|
|
// <signed numeric literal> ::= [ <sign> ] <unsigned numeric literal>
|
|
class SignedNumericLiteral : public Parser::DD2OM<Sign, UnsignedNumericLiteral>
|
|
{
|
|
public:
|
|
using DD2OM::DD2OM;
|
|
bool isNull() const
|
|
{
|
|
return UnsignedNumericLiteral::isNull();
|
|
}
|
|
|
|
template <typename T>
|
|
T toUIntXRound() const
|
|
{
|
|
if (negative())
|
|
return 0;
|
|
return UnsignedNumericLiteral::toXIntPositiveRound<T>();
|
|
}
|
|
|
|
template <typename T>
|
|
T toPackedUDecimal(scale_t scale, DataCondition& error) const
|
|
{
|
|
if (negative())
|
|
return 0;
|
|
return UnsignedNumericLiteral::toPackedDecimalPositive<T>(scale, error);
|
|
}
|
|
|
|
template <typename T>
|
|
T toPackedSDecimal(scale_t scale, DataCondition& error) const
|
|
{
|
|
if (!negative())
|
|
return UnsignedNumericLiteral::toPackedDecimalPositive<T>(scale, error);
|
|
typedef typename datatypes::make_unsigned<T>::type UT;
|
|
UT absval = UnsignedNumericLiteral::toPackedDecimalPositive<UT>(scale, error);
|
|
if (absval >= (UT)datatypes::numeric_limits<T>::min())
|
|
{
|
|
error |= DataCondition::X_NUMERIC_VALUE_OUT_OF_RANGE;
|
|
return datatypes::numeric_limits<T>::min();
|
|
}
|
|
return -(T)absval;
|
|
}
|
|
};
|
|
|
|
} // namespace literal
|