1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-29 08:21:15 +03:00

MCOL-641 Refactored MultiplicationOverflowCheck but it still has flaws.

Introduced fDecimalOverflowCheck to enable/disable overflow check.

Add support into a FunctionColumn.

Low level scanning crashes on medium sized data sets.
This commit is contained in:
Roman Nozdrin
2020-03-24 13:41:28 +00:00
parent 74b64eb4f1
commit b5534eb847
13 changed files with 528 additions and 36 deletions

View File

@ -62,7 +62,7 @@ namespace execplan
*/
ArithmeticColumn::ArithmeticColumn():
ReturnedColumn(),
fExpression (0)
fExpression(0)
{}
ArithmeticColumn::ArithmeticColumn(const string& sql, const uint32_t sessionID):
@ -314,8 +314,7 @@ const string ArithmeticColumn::toString() const
oss << "expressionId=" << fExpressionId << endl;
oss << "joinInfo=" << fJoinInfo << " returnAll=" << fReturnAll << " sequence#=" << fSequence << endl;
oss << "resultType=" << colDataTypeToString(fResultType.colDataType) << "|" << fResultType.colWidth <<
endl;
oss << "resultType=" << colDataTypeToString(fResultType.colDataType) << "|" << fResultType.colWidth << endl;
return oss.str();
}

View File

@ -34,17 +34,20 @@ namespace execplan
/**
* Constructors/Destructors
*/
ArithmeticOperator::ArithmeticOperator() : Operator()
ArithmeticOperator::ArithmeticOperator() : Operator(),
fDecimalOverflowCheck(true)
{
}
ArithmeticOperator::ArithmeticOperator(const string& operatorName): Operator(operatorName)
ArithmeticOperator::ArithmeticOperator(const string& operatorName): Operator(operatorName),
fDecimalOverflowCheck(true)
{
}
ArithmeticOperator::ArithmeticOperator(const ArithmeticOperator& rhs):
Operator(rhs),
fTimeZone(rhs.timeZone())
fTimeZone(rhs.timeZone()),
fDecimalOverflowCheck(true)
{
}
@ -63,6 +66,7 @@ ostream& operator<<(ostream& output, const ArithmeticOperator& rhs)
{
output << rhs.toString();
output << "opType=" << rhs.operationType().colDataType << endl;
output << "decimalOverflowCheck=" << rhs.getOverflowCheck() << endl;
return output;
}
@ -73,6 +77,8 @@ void ArithmeticOperator::serialize(messageqcpp::ByteStream& b) const
{
b << (ObjectReader::id_t) ObjectReader::ARITHMETICOPERATOR;
b << fTimeZone;
const messageqcpp::ByteStream::byte tmp = fDecimalOverflowCheck;
b << tmp;
Operator::serialize(b);
}
@ -80,6 +86,9 @@ void ArithmeticOperator::unserialize(messageqcpp::ByteStream& b)
{
ObjectReader::checkType(b, ObjectReader::ARITHMETICOPERATOR);
b >> fTimeZone;
messageqcpp::ByteStream::byte tmp;
b >> tmp;
fDecimalOverflowCheck = tmp;
Operator::unserialize(b);
}

View File

@ -196,6 +196,14 @@ public:
return TreeNode::getBoolVal();
}
void adjustResultType(const CalpontSystemCatalog::ColType& m);
constexpr inline bool getOverflowCheck()
{
return fDecimalOverflowCheck;
}
inline void setOverflowCheck(bool check)
{
fDecimalOverflowCheck = check;
}
private:
template <typename result_t>
@ -203,6 +211,7 @@ private:
inline void execute(IDB_Decimal& result, IDB_Decimal op1, IDB_Decimal op2, bool& isNull);
inline void execute(IDB_Decimal& result, IDB_Decimal op1, IDB_Decimal op2, bool& isNull, cscType& resultCscType);
std::string fTimeZone;
bool fDecimalOverflowCheck;
};
#include "parsetree.h"
@ -288,12 +297,12 @@ inline void ArithmeticOperator::execute(IDB_Decimal& result, IDB_Decimal op1, ID
switch (fOp)
{
case OP_ADD:
if (resultCscType.colWidth == 16)
if (resultCscType.colWidth == datatypes::MAXDECIMALWIDTH)
{
datatypes::Decimal::addition<decltype(result.s128Value),false>(
datatypes::Decimal::addition<decltype(result.s128Value),true>(
op1, op2, result);
}
else if (resultCscType.colWidth == 8)
else if (resultCscType.colWidth == utils::MAXLEGACYWIDTH)
{
datatypes::Decimal::addition<decltype(result.value),false>(
op1, op2, result);

View File

@ -246,15 +246,34 @@ public:
{
IDB_Decimal decimal = fFunctor->getDecimalVal(row, fFunctionParms, isNull, fOperationType);
if (fResultType.scale == decimal.scale)
if (UNLIKELY(fResultType.colWidth == utils::MAXLEGACYWIDTH
&& fResultType.scale == decimal.scale))
return decimal;
if (fResultType.scale > decimal.scale)
decimal.value *= IDB_pow[fResultType.scale - decimal.scale];
else
decimal.value = (int64_t)(decimal.value > 0 ?
(double)decimal.value / IDB_pow[decimal.scale - fResultType.scale] + 0.5 :
(double)decimal.value / IDB_pow[decimal.scale - fResultType.scale] - 0.5);
if (LIKELY(fResultType.colWidth == datatypes::MAXDECIMALWIDTH))
{
decimal.s128Value =
(datatypes::Decimal::isWideDecimalType(decimal.precision)) ?
decimal.s128Value : decimal.value;
int128_t scaleMultiplier, result;
int32_t scaleDiff = fResultType.scale - decimal.scale;
datatypes::getScaleDivisor(scaleMultiplier, abs(scaleDiff));
// WIP MCOL-641 Unconditionall overflow check
datatypes::MultiplicationOverflowCheck mul;
decimal.s128Value = (scaleDiff > 0
&& mul(decimal.s128Value, scaleMultiplier, result))
? result : decimal.s128Value / scaleMultiplier;
}
else if (fResultType.colWidth == utils::MAXLEGACYWIDTH)
{
if (fResultType.scale > decimal.scale)
decimal.value *= IDB_pow[fResultType.scale - decimal.scale];
else
decimal.value = (int64_t)(decimal.value > 0 ?
(double)decimal.value / IDB_pow[decimal.scale - fResultType.scale] + 0.5 :
(double)decimal.value / IDB_pow[decimal.scale - fResultType.scale] - 0.5);
}
decimal.scale = fResultType.scale;
decimal.precision = fResultType.precision;

View File

@ -30,6 +30,7 @@
#include <fstream>
#include "treenode.h"
#include "operator.h"
#include "mcs_decimal.h"
namespace rowgroup
{

View File

@ -120,6 +120,7 @@ public:
void setOpType(Type& l, Type& r);
private:
inline bool numericCompare(IDB_Decimal& op1, IDB_Decimal& op2);
template <typename result_t>
inline bool numericCompare(result_t op1, result_t op2);
inline bool strTrimCompare(const std::string& op1, const std::string& op2);
@ -127,6 +128,435 @@ private:
const CHARSET_INFO* cs;
};
inline bool PredicateOperator::getBoolVal(rowgroup::Row& row, bool& isNull, ReturnedColumn* lop, ReturnedColumn* rop)
{
// like operator. both sides are string.
if (fOp == OP_LIKE || fOp == OP_NOTLIKE)
{
SP_CNX_Regex regex = rop->regex();
// Ugh. The strings returned by getStrVal have null padding out to the col width. boost::regex
// considers these nulls significant, but they're not in the pattern, so we need to strip
// them off...
const std::string& v = lop->getStrVal(row, isNull);
// char* c = (char*)alloca(v.length() + 1);
// memcpy(c, v.c_str(), v.length());
// c[v.length()] = 0;
// std::string vv(c);
if (regex)
{
#ifdef POSIX_REGEX
bool ret = regexec(regex.get(), v.c_str(), 0, NULL, 0) == 0;
#else
bool ret = boost::regex_match(v.c_str(), *regex);
#endif
return (((fOp == OP_LIKE) ? ret : !ret) && !isNull);
}
else
{
#ifdef POSIX_REGEX
regex_t regex;
std::string str = dataconvert::DataConvert::constructRegexp(rop->getStrVal(row, isNull));
regcomp(&regex, str.c_str(), REG_NOSUB | REG_EXTENDED);
bool ret = regexec(&regex, v.c_str(), 0, NULL, 0) == 0;
regfree(&regex);
#else
boost::regex regex(dataconvert::DataConvert::constructRegexp(rop->getStrVal(row, isNull)));
bool ret = boost::regex_match(v.c_str(), regex);
#endif
return (((fOp == OP_LIKE) ? ret : !ret) && !isNull);
}
}
// fOpType should have already been set on the connector during parsing
switch (fOperationType.colDataType)
{
case execplan::CalpontSystemCatalog::BIGINT:
case execplan::CalpontSystemCatalog::INT:
case execplan::CalpontSystemCatalog::MEDINT:
case execplan::CalpontSystemCatalog::TINYINT:
case execplan::CalpontSystemCatalog::SMALLINT:
{
if (fOp == OP_ISNULL)
{
lop->getIntVal(row, isNull);
bool ret = isNull;
isNull = false;
return ret;
}
if (fOp == OP_ISNOTNULL)
{
lop->getIntVal(row, isNull);
bool ret = isNull;
isNull = false;
return !ret;
}
if (isNull)
return false;
int64_t val1 = lop->getIntVal(row, isNull);
if (isNull)
return false;
return numericCompare(val1, rop->getIntVal(row, isNull)) && !isNull;
}
case execplan::CalpontSystemCatalog::UBIGINT:
case execplan::CalpontSystemCatalog::UINT:
case execplan::CalpontSystemCatalog::UMEDINT:
case execplan::CalpontSystemCatalog::UTINYINT:
case execplan::CalpontSystemCatalog::USMALLINT:
{
if (fOp == OP_ISNULL)
{
lop->getUintVal(row, isNull);
bool ret = isNull;
isNull = false;
return ret;
}
if (fOp == OP_ISNOTNULL)
{
lop->getUintVal(row, isNull);
bool ret = isNull;
isNull = false;
return !ret;
}
if (isNull)
return false;
uint64_t val1 = lop->getUintVal(row, isNull);
if (isNull)
return false;
return numericCompare(val1, rop->getUintVal(row, isNull)) && !isNull;
}
case execplan::CalpontSystemCatalog::FLOAT:
case execplan::CalpontSystemCatalog::UFLOAT:
case execplan::CalpontSystemCatalog::DOUBLE:
case execplan::CalpontSystemCatalog::UDOUBLE:
{
if (fOp == OP_ISNULL)
{
lop->getDoubleVal(row, isNull);
bool ret = isNull;
isNull = false;
return ret;
}
if (fOp == OP_ISNOTNULL)
{
lop->getDoubleVal(row, isNull);
bool ret = isNull;
isNull = false;
return !ret;
}
if (isNull)
return false;
double val1 = lop->getDoubleVal(row, isNull);
if (isNull)
return false;
return numericCompare(val1, rop->getDoubleVal(row, isNull)) && !isNull;
}
case execplan::CalpontSystemCatalog::LONGDOUBLE:
{
if (fOp == OP_ISNULL)
{
lop->getLongDoubleVal(row, isNull);
bool ret = isNull;
isNull = false;
return ret;
}
if (fOp == OP_ISNOTNULL)
{
lop->getLongDoubleVal(row, isNull);
bool ret = isNull;
isNull = false;
return !ret;
}
if (isNull)
return false;
long double val1 = lop->getLongDoubleVal(row, isNull);
if (isNull)
return false;
long double val2 = rop->getLongDoubleVal(row, isNull);
if (isNull)
return false;
// In many case, rounding error will prevent an eq compare to work
// In these cases, use the largest scale of the two items.
if (fOp == execplan::OP_EQ)
{
// In case a val is a representation of a very large integer,
// we won't want to just multiply by scale, as it may move
// significant digits out of scope. So we break them apart
// and compare each separately
int64_t scale = std::max(lop->resultType().scale, rop->resultType().scale);
if (scale)
{
long double intpart1;
long double fract1 = modfl(val1, &intpart1);
long double intpart2;
long double fract2 = modfl(val2, &intpart2);
if (numericCompare(intpart1, intpart2))
{
double factor = pow(10.0, (double)scale);
fract1 = roundl(fract1 * factor);
fract2 = roundl(fract2 * factor);
return numericCompare(fract1, fract2);
}
else
{
return false;
}
}
}
return numericCompare(val1, val2);
}
case execplan::CalpontSystemCatalog::DECIMAL:
case execplan::CalpontSystemCatalog::UDECIMAL:
{
if (fOp == OP_ISNULL)
{
lop->getDecimalVal(row, isNull);
bool ret = isNull;
isNull = false;
return ret;
}
if (fOp == OP_ISNOTNULL)
{
lop->getDecimalVal(row, isNull);
bool ret = isNull;
isNull = false;
return !ret;
}
if (isNull)
return false;
IDB_Decimal val1 = lop->getDecimalVal(row, isNull);
if (isNull)
return false;
return numericCompare(val1, rop->getDecimalVal(row, isNull)) && !isNull;
}
case execplan::CalpontSystemCatalog::DATE:
{
if (fOp == OP_ISNULL)
{
lop->getDateIntVal(row, isNull);
bool ret = isNull;
isNull = false;
return ret;
}
if (fOp == OP_ISNOTNULL)
{
lop->getDateIntVal(row, isNull);
bool ret = isNull;
isNull = false;
return !ret;
}
if (isNull)
return false;
int64_t val1 = lop->getDateIntVal(row, isNull);
if (isNull)
return false;
return numericCompare(val1, (int64_t)rop->getDateIntVal(row, isNull)) && !isNull;
}
case execplan::CalpontSystemCatalog::DATETIME:
{
if (fOp == OP_ISNULL)
{
lop->getDatetimeIntVal(row, isNull);
bool ret = isNull;
isNull = false;
return ret;
}
if (fOp == OP_ISNOTNULL)
{
lop->getDatetimeIntVal(row, isNull);
bool ret = isNull;
isNull = false;
return !ret;
}
if (isNull)
return false;
int64_t val1 = lop->getDatetimeIntVal(row, isNull);
if (isNull)
return false;
return numericCompare(val1, rop->getDatetimeIntVal(row, isNull)) && !isNull;
}
case execplan::CalpontSystemCatalog::TIMESTAMP:
{
if (fOp == OP_ISNULL)
{
lop->getTimestampIntVal(row, isNull);
bool ret = isNull;
isNull = false;
return ret;
}
if (fOp == OP_ISNOTNULL)
{
lop->getTimestampIntVal(row, isNull);
bool ret = isNull;
isNull = false;
return !ret;
}
if (isNull)
return false;
int64_t val1 = lop->getTimestampIntVal(row, isNull);
if (isNull)
return false;
return numericCompare(val1, rop->getTimestampIntVal(row, isNull)) && !isNull;
}
case execplan::CalpontSystemCatalog::TIME:
{
if (fOp == OP_ISNULL)
{
lop->getTimeIntVal(row, isNull);
bool ret = isNull;
isNull = false;
return ret;
}
if (fOp == OP_ISNOTNULL)
{
lop->getTimeIntVal(row, isNull);
bool ret = isNull;
isNull = false;
return !ret;
}
if (isNull)
return false;
int64_t val1 = lop->getTimeIntVal(row, isNull);
if (isNull)
return false;
return numericCompare(val1, rop->getTimeIntVal(row, isNull)) && !isNull;
}
case execplan::CalpontSystemCatalog::VARCHAR:
case execplan::CalpontSystemCatalog::CHAR:
case execplan::CalpontSystemCatalog::TEXT:
{
if (fOp == OP_ISNULL)
{
lop->getStrVal(row, isNull);
bool ret = isNull;
isNull = false;
return ret;
}
if (fOp == OP_ISNOTNULL)
{
lop->getStrVal(row, isNull);
bool ret = isNull;
isNull = false;
return !ret;
}
if (isNull)
return false;
const std::string& val1 = lop->getStrVal(row, isNull);
if (isNull)
return false;
return strTrimCompare(val1, rop->getStrVal(row, isNull)) && !isNull;
// return strCompare(val1, rop->getStrVal(row, isNull)) && !isNull;
}
// MCOL-641 WIP This is an incorrect assumption.
case execplan::CalpontSystemCatalog::VARBINARY:
case execplan::CalpontSystemCatalog::BLOB:
return false;
break;
default:
{
std::ostringstream oss;
oss << "invalid predicate operation type: " << fOperationType.colDataType;
throw logging::InvalidOperationExcept(oss.str());
}
}
return false;
}
inline bool PredicateOperator::numericCompare(IDB_Decimal& op1, IDB_Decimal& op2)
{
switch (fOp)
{
case OP_EQ:
return op1 == op2;
case OP_NE:
return op1 != op2;
case OP_GT:
return op1 > op2;
case OP_GE:
return op1 >= op2;
case OP_LT:
return op1 < op2;
case OP_LE:
return op1 <= op2;
default:
{
std::ostringstream oss;
oss << "invalid predicate operation: " << fOp;
throw logging::InvalidOperationExcept(oss.str());
}
}
}
template <typename result_t>
inline bool PredicateOperator::numericCompare(result_t op1, result_t op2)

View File

@ -32,6 +32,7 @@
#include "objectreader.h"
#include "joblisttypes.h"
#include "rowgroup.h"
#include "mcs_decimal.h"
/**
* Namespace
@ -217,7 +218,7 @@ inline IDB_Decimal SimpleColumn_INT<len>::getDecimalVal(rowgroup::Row& row, bool
isNull = true;
fResult.decimalVal.value = (int64_t)row.getIntField<len>(fInputIndex);
fResult.decimalVal.precision = 65;
fResult.decimalVal.precision = datatypes::INT64MAXPRECISION;
fResult.decimalVal.scale = 0;
return fResult.decimalVal;
}
@ -242,8 +243,6 @@ void SimpleColumn_INT<len>::serialize(messageqcpp::ByteStream& b) const
case 8:
b << (ObjectReader::id_t) ObjectReader::SIMPLECOLUMN_INT8;
break;
case 16:
std::cout << __FILE__<< ":" << __LINE__ << " Fix for 16 Bytes ?" << std::endl;
}
SimpleColumn::serialize(b);
@ -269,8 +268,6 @@ void SimpleColumn_INT<len>::unserialize(messageqcpp::ByteStream& b)
case 8:
ObjectReader::checkType(b, ObjectReader::SIMPLECOLUMN_INT8);
break;
case 16:
std::cout << __FILE__<< ":" << __LINE__ << " Fix for 16 Bytes ?" << std::endl;
}
SimpleColumn::unserialize(b);

View File

@ -32,6 +32,7 @@
#include "objectreader.h"
#include "joblisttypes.h"
#include "rowgroup.h"
#include "mcs_decimal.h"
/**
* Namespace
@ -218,7 +219,8 @@ inline IDB_Decimal SimpleColumn_UINT<len>::getDecimalVal(rowgroup::Row& row, boo
isNull = true;
fResult.decimalVal.value = (uint64_t)row.getUintField<len>(fInputIndex);
fResult.decimalVal.precision = 65;
// WIP MCOL-641
fResult.decimalVal.precision = datatypes::INT64MAXPRECISION+1;
fResult.decimalVal.scale = 0;
return fResult.decimalVal;
}