MCOL-641 Refactored MultiplicationOverflowCheck but it still has flaws.

Introduced fDecimalOverflowCheck to enable/disable overflow check. Add support into a FunctionColumn. Low level scanning crashes on medium sized data sets.
2025-07-29 08:21:15 +03:00 · 2020-03-24 13:41:28 +00:00
parent 74b64eb4f1
commit b5534eb847
13 changed files with 528 additions and 36 deletions
--- a/dbcon/execplan/arithmeticcolumn.cpp
+++ b/dbcon/execplan/arithmeticcolumn.cpp
@ -62,7 +62,7 @@ namespace execplan
 */
 ArithmeticColumn::ArithmeticColumn():
    ReturnedColumn(),
-    fExpression (0)
+    fExpression(0)
 {}

 ArithmeticColumn::ArithmeticColumn(const string& sql, const uint32_t sessionID):
@ -314,8 +314,7 @@ const string ArithmeticColumn::toString() const

    oss << "expressionId=" << fExpressionId << endl;
    oss << "joinInfo=" << fJoinInfo << " returnAll=" << fReturnAll << " sequence#=" << fSequence << endl;
-    oss << "resultType=" << colDataTypeToString(fResultType.colDataType) << "|" << fResultType.colWidth <<
-        endl;
+    oss << "resultType=" << colDataTypeToString(fResultType.colDataType) << "|" << fResultType.colWidth << endl;
    return oss.str();
 }

--- a/dbcon/execplan/arithmeticoperator.cpp
+++ b/dbcon/execplan/arithmeticoperator.cpp
@ -34,17 +34,20 @@ namespace execplan
 /**
 * Constructors/Destructors
 */
-ArithmeticOperator::ArithmeticOperator() : Operator()
+ArithmeticOperator::ArithmeticOperator() : Operator(), 
+    fDecimalOverflowCheck(true)
 {
 }

-ArithmeticOperator::ArithmeticOperator(const string& operatorName): Operator(operatorName)
+ArithmeticOperator::ArithmeticOperator(const string& operatorName): Operator(operatorName),
+    fDecimalOverflowCheck(true)
 {
 }

 ArithmeticOperator::ArithmeticOperator(const ArithmeticOperator& rhs):
    Operator(rhs),
-    fTimeZone(rhs.timeZone())
+    fTimeZone(rhs.timeZone()),
+    fDecimalOverflowCheck(true)
 {
 }

@ -63,6 +66,7 @@ ostream& operator<<(ostream& output, const ArithmeticOperator& rhs)
 {
    output << rhs.toString();
    output << "opType=" << rhs.operationType().colDataType << endl;
+    output << "decimalOverflowCheck=" << rhs.getOverflowCheck() << endl;
    return output;
 }

@ -73,6 +77,8 @@ void ArithmeticOperator::serialize(messageqcpp::ByteStream& b) const
 {
    b << (ObjectReader::id_t) ObjectReader::ARITHMETICOPERATOR;
    b << fTimeZone;
+    const messageqcpp::ByteStream::byte tmp = fDecimalOverflowCheck;
+    b << tmp;
    Operator::serialize(b);
 }

@ -80,6 +86,9 @@ void ArithmeticOperator::unserialize(messageqcpp::ByteStream& b)
 {
    ObjectReader::checkType(b, ObjectReader::ARITHMETICOPERATOR);
    b >> fTimeZone;
+    messageqcpp::ByteStream::byte tmp;
+    b >> tmp;
+    fDecimalOverflowCheck = tmp;
    Operator::unserialize(b);
 }

--- a/dbcon/execplan/arithmeticoperator.h
+++ b/dbcon/execplan/arithmeticoperator.h
@ -196,6 +196,14 @@ public:
        return TreeNode::getBoolVal();
    }
    void adjustResultType(const CalpontSystemCatalog::ColType& m);
+    constexpr inline bool getOverflowCheck()
+    {
+        return fDecimalOverflowCheck;
+    }
+    inline void setOverflowCheck(bool check)
+    {
+        fDecimalOverflowCheck = check;
+    }

 private:
    template <typename result_t>
@ -203,6 +211,7 @@ private:
    inline void execute(IDB_Decimal& result, IDB_Decimal op1, IDB_Decimal op2, bool& isNull);
    inline void execute(IDB_Decimal& result, IDB_Decimal op1, IDB_Decimal op2, bool& isNull, cscType& resultCscType);
    std::string fTimeZone;
+    bool fDecimalOverflowCheck;
 };

 #include "parsetree.h"
@ -288,12 +297,12 @@ inline void ArithmeticOperator::execute(IDB_Decimal& result, IDB_Decimal op1, ID
    switch (fOp)
    {
        case OP_ADD:
-            if (resultCscType.colWidth == 16)
+            if (resultCscType.colWidth == datatypes::MAXDECIMALWIDTH)
            {
-                datatypes::Decimal::addition<decltype(result.s128Value),false>(
+                datatypes::Decimal::addition<decltype(result.s128Value),true>(
                    op1, op2, result);
            }
-            else if (resultCscType.colWidth == 8)
+            else if (resultCscType.colWidth == utils::MAXLEGACYWIDTH)
            {
                datatypes::Decimal::addition<decltype(result.value),false>(
                    op1, op2, result);
--- a/dbcon/execplan/functioncolumn.h
+++ b/dbcon/execplan/functioncolumn.h
@ -246,15 +246,34 @@ public:
    {
        IDB_Decimal decimal = fFunctor->getDecimalVal(row, fFunctionParms, isNull, fOperationType);

-        if (fResultType.scale == decimal.scale)
+        if (UNLIKELY(fResultType.colWidth == utils::MAXLEGACYWIDTH
+                && fResultType.scale == decimal.scale))
            return decimal;

-        if (fResultType.scale > decimal.scale)
-            decimal.value *= IDB_pow[fResultType.scale - decimal.scale];
-        else
-            decimal.value = (int64_t)(decimal.value > 0 ?
-                                      (double)decimal.value / IDB_pow[decimal.scale - fResultType.scale] + 0.5 :
-                                      (double)decimal.value / IDB_pow[decimal.scale - fResultType.scale] - 0.5);
+        if (LIKELY(fResultType.colWidth == datatypes::MAXDECIMALWIDTH))
+        {
+            decimal.s128Value =
+                (datatypes::Decimal::isWideDecimalType(decimal.precision)) ?
+                    decimal.s128Value : decimal.value;
+
+            int128_t scaleMultiplier, result;
+            int32_t scaleDiff = fResultType.scale - decimal.scale;
+            datatypes::getScaleDivisor(scaleMultiplier, abs(scaleDiff));
+            // WIP MCOL-641 Unconditionall overflow check
+            datatypes::MultiplicationOverflowCheck mul;
+            decimal.s128Value = (scaleDiff > 0
+                && mul(decimal.s128Value, scaleMultiplier, result))
+                ? result : decimal.s128Value / scaleMultiplier;
+        }
+        else if (fResultType.colWidth == utils::MAXLEGACYWIDTH)
+        {
+            if (fResultType.scale > decimal.scale)
+                decimal.value *= IDB_pow[fResultType.scale - decimal.scale];
+            else
+                decimal.value = (int64_t)(decimal.value > 0 ?
+                                          (double)decimal.value / IDB_pow[decimal.scale - fResultType.scale] + 0.5 :
+                                          (double)decimal.value / IDB_pow[decimal.scale - fResultType.scale] - 0.5);
+        }

        decimal.scale = fResultType.scale;
        decimal.precision = fResultType.precision;
--- a/dbcon/execplan/parsetree.h
+++ b/dbcon/execplan/parsetree.h
@ -30,6 +30,7 @@
 #include <fstream>
 #include "treenode.h"
 #include "operator.h"
+#include "mcs_decimal.h"

 namespace rowgroup
 {
--- a/dbcon/execplan/predicateoperator.h
+++ b/dbcon/execplan/predicateoperator.h
@ -120,6 +120,7 @@ public:
    void setOpType(Type& l, Type& r);

 private:
+    inline bool numericCompare(IDB_Decimal& op1, IDB_Decimal& op2);
    template <typename result_t>
    inline bool numericCompare(result_t op1, result_t op2);
    inline bool strTrimCompare(const std::string& op1, const std::string& op2);
@ -127,6 +128,435 @@ private:
    const CHARSET_INFO* cs;
 };

+inline bool PredicateOperator::getBoolVal(rowgroup::Row& row, bool& isNull, ReturnedColumn* lop, ReturnedColumn* rop)
+{
+    // like operator. both sides are string.
+    if (fOp == OP_LIKE || fOp == OP_NOTLIKE)
+    {
+        SP_CNX_Regex regex = rop->regex();
+
+        // Ugh. The strings returned by getStrVal have null padding out to the col width. boost::regex
+        //  considers these nulls significant, but they're not in the pattern, so we need to strip
+        //   them off...
+        const std::string& v = lop->getStrVal(row, isNull);
+//        char* c = (char*)alloca(v.length() + 1);
+//        memcpy(c, v.c_str(), v.length());
+//        c[v.length()] = 0;
+//        std::string vv(c);
+
+        if (regex)
+        {
+#ifdef POSIX_REGEX
+            bool ret = regexec(regex.get(), v.c_str(), 0, NULL, 0) == 0;
+#else
+            bool ret = boost::regex_match(v.c_str(), *regex);
+#endif
+            return (((fOp == OP_LIKE) ? ret : !ret) && !isNull);
+        }
+        else
+        {
+#ifdef POSIX_REGEX
+            regex_t regex;
+            std::string str = dataconvert::DataConvert::constructRegexp(rop->getStrVal(row, isNull));
+            regcomp(&regex, str.c_str(), REG_NOSUB | REG_EXTENDED);
+            bool ret = regexec(&regex, v.c_str(), 0, NULL, 0) == 0;
+            regfree(&regex);
+#else
+            boost::regex regex(dataconvert::DataConvert::constructRegexp(rop->getStrVal(row, isNull)));
+            bool ret = boost::regex_match(v.c_str(), regex);
+#endif
+            return (((fOp == OP_LIKE) ? ret : !ret) && !isNull);
+        }
+    }
+
+    // fOpType should have already been set on the connector during parsing
+    switch (fOperationType.colDataType)
+    {
+        case execplan::CalpontSystemCatalog::BIGINT:
+        case execplan::CalpontSystemCatalog::INT:
+        case execplan::CalpontSystemCatalog::MEDINT:
+        case execplan::CalpontSystemCatalog::TINYINT:
+        case execplan::CalpontSystemCatalog::SMALLINT:
+        {
+            if (fOp == OP_ISNULL)
+            {
+                lop->getIntVal(row, isNull);
+                bool ret = isNull;
+                isNull = false;
+                return ret;
+            }
+
+            if (fOp == OP_ISNOTNULL)
+            {
+                lop->getIntVal(row, isNull);
+                bool ret = isNull;
+                isNull = false;
+                return !ret;
+            }
+
+            if (isNull)
+                return false;
+
+            int64_t val1 = lop->getIntVal(row, isNull);
+
+            if (isNull)
+                return false;
+
+            return numericCompare(val1,  rop->getIntVal(row, isNull)) && !isNull;
+        }
+
+        case execplan::CalpontSystemCatalog::UBIGINT:
+        case execplan::CalpontSystemCatalog::UINT:
+        case execplan::CalpontSystemCatalog::UMEDINT:
+        case execplan::CalpontSystemCatalog::UTINYINT:
+        case execplan::CalpontSystemCatalog::USMALLINT:
+        {
+            if (fOp == OP_ISNULL)
+            {
+                lop->getUintVal(row, isNull);
+                bool ret = isNull;
+                isNull = false;
+                return ret;
+            }
+
+            if (fOp == OP_ISNOTNULL)
+            {
+                lop->getUintVal(row, isNull);
+                bool ret = isNull;
+                isNull = false;
+                return !ret;
+            }
+
+            if (isNull)
+                return false;
+
+            uint64_t val1 = lop->getUintVal(row, isNull);
+
+            if (isNull)
+                return false;
+
+            return numericCompare(val1,  rop->getUintVal(row, isNull)) && !isNull;
+        }
+
+        case execplan::CalpontSystemCatalog::FLOAT:
+        case execplan::CalpontSystemCatalog::UFLOAT:
+        case execplan::CalpontSystemCatalog::DOUBLE:
+        case execplan::CalpontSystemCatalog::UDOUBLE:
+        {
+            if (fOp == OP_ISNULL)
+            {
+                lop->getDoubleVal(row, isNull);
+                bool ret = isNull;
+                isNull = false;
+                return ret;
+            }
+
+            if (fOp == OP_ISNOTNULL)
+            {
+                lop->getDoubleVal(row, isNull);
+                bool ret = isNull;
+                isNull = false;
+                return !ret;
+            }
+
+            if (isNull)
+                return false;
+
+            double val1 = lop->getDoubleVal(row, isNull);
+
+            if (isNull)
+                return false;
+
+            return numericCompare(val1, rop->getDoubleVal(row, isNull)) && !isNull;
+        }
+
+        case execplan::CalpontSystemCatalog::LONGDOUBLE:
+        {
+            if (fOp == OP_ISNULL)
+            {
+                lop->getLongDoubleVal(row, isNull);
+                bool ret = isNull;
+                isNull = false;
+                return ret;
+            }
+
+            if (fOp == OP_ISNOTNULL)
+            {
+                lop->getLongDoubleVal(row, isNull);
+                bool ret = isNull;
+                isNull = false;
+                return !ret;
+            }
+
+            if (isNull)
+                return false;
+
+            long double val1 = lop->getLongDoubleVal(row, isNull);
+            if (isNull)
+                return false;
+
+            long double val2 = rop->getLongDoubleVal(row, isNull);
+            if (isNull)
+                return false;
+
+            // In many case, rounding error will prevent an eq compare to work
+            // In these cases, use the largest scale of the two items.
+            if (fOp == execplan::OP_EQ)
+            {
+                // In case a val is a representation of a very large integer,
+                // we won't want to just multiply by scale, as it may move
+                // significant digits out of scope. So we break them apart
+                // and compare each separately 
+                int64_t scale = std::max(lop->resultType().scale, rop->resultType().scale);
+                if (scale)
+                {
+                    long double intpart1;
+                    long double fract1 = modfl(val1, &intpart1);
+                    long double intpart2;
+                    long double fract2 = modfl(val2, &intpart2);
+                    if (numericCompare(intpart1, intpart2))
+                    {
+                        double factor = pow(10.0, (double)scale);
+                        fract1 = roundl(fract1 * factor);
+                        fract2 = roundl(fract2 * factor);
+                        return numericCompare(fract1, fract2);
+                    }
+                    else
+                    {
+                        return false;
+                    }
+                }
+            }
+            return numericCompare(val1, val2);
+        }
+
+        case execplan::CalpontSystemCatalog::DECIMAL:
+        case execplan::CalpontSystemCatalog::UDECIMAL:
+        {
+            if (fOp == OP_ISNULL)
+            {
+                lop->getDecimalVal(row, isNull);
+                bool ret = isNull;
+                isNull = false;
+                return ret;
+            }
+
+            if (fOp == OP_ISNOTNULL)
+            {
+                lop->getDecimalVal(row, isNull);
+                bool ret = isNull;
+                isNull = false;
+                return !ret;
+            }
+
+            if (isNull)
+                return false;
+
+            IDB_Decimal val1 = lop->getDecimalVal(row, isNull);
+
+            if (isNull)
+                return false;
+
+            return numericCompare(val1, rop->getDecimalVal(row, isNull)) && !isNull;
+        }
+
+        case execplan::CalpontSystemCatalog::DATE:
+        {
+            if (fOp == OP_ISNULL)
+            {
+                lop->getDateIntVal(row, isNull);
+                bool ret = isNull;
+                isNull = false;
+                return ret;
+            }
+
+            if (fOp == OP_ISNOTNULL)
+            {
+                lop->getDateIntVal(row, isNull);
+                bool ret = isNull;
+                isNull = false;
+                return !ret;
+            }
+
+            if (isNull)
+                return false;
+
+            int64_t val1 = lop->getDateIntVal(row, isNull);
+
+            if (isNull)
+                return false;
+
+            return numericCompare(val1, (int64_t)rop->getDateIntVal(row, isNull)) && !isNull;
+        }
+
+        case execplan::CalpontSystemCatalog::DATETIME:
+        {
+            if (fOp == OP_ISNULL)
+            {
+                lop->getDatetimeIntVal(row, isNull);
+                bool ret = isNull;
+                isNull = false;
+                return ret;
+            }
+
+            if (fOp == OP_ISNOTNULL)
+            {
+                lop->getDatetimeIntVal(row, isNull);
+                bool ret = isNull;
+                isNull = false;
+                return !ret;
+            }
+
+            if (isNull)
+                return false;
+
+            int64_t val1 = lop->getDatetimeIntVal(row, isNull);
+
+            if (isNull)
+                return false;
+
+            return numericCompare(val1, rop->getDatetimeIntVal(row, isNull)) && !isNull;
+        }
+
+        case execplan::CalpontSystemCatalog::TIMESTAMP:
+        {
+            if (fOp == OP_ISNULL)
+            {
+                lop->getTimestampIntVal(row, isNull);
+                bool ret = isNull;
+                isNull = false;
+                return ret;
+            }
+
+            if (fOp == OP_ISNOTNULL)
+            {
+                lop->getTimestampIntVal(row, isNull);
+                bool ret = isNull;
+                isNull = false;
+                return !ret;
+            }
+
+            if (isNull)
+                return false;
+
+            int64_t val1 = lop->getTimestampIntVal(row, isNull);
+
+            if (isNull)
+                return false;
+
+            return numericCompare(val1, rop->getTimestampIntVal(row, isNull)) && !isNull;
+        }
+
+        case execplan::CalpontSystemCatalog::TIME:
+        {
+            if (fOp == OP_ISNULL)
+            {
+                lop->getTimeIntVal(row, isNull);
+                bool ret = isNull;
+                isNull = false;
+                return ret;
+            }
+
+            if (fOp == OP_ISNOTNULL)
+            {
+                lop->getTimeIntVal(row, isNull);
+                bool ret = isNull;
+                isNull = false;
+                return !ret;
+            }
+
+            if (isNull)
+                return false;
+
+            int64_t val1 = lop->getTimeIntVal(row, isNull);
+
+            if (isNull)
+                return false;
+
+            return numericCompare(val1, rop->getTimeIntVal(row, isNull)) && !isNull;
+        }
+
+
+
+        case execplan::CalpontSystemCatalog::VARCHAR:
+        case execplan::CalpontSystemCatalog::CHAR:
+        case execplan::CalpontSystemCatalog::TEXT:
+        {
+            if (fOp == OP_ISNULL)
+            {
+                lop->getStrVal(row, isNull);
+                bool ret = isNull;
+                isNull = false;
+                return ret;
+            }
+
+            if (fOp == OP_ISNOTNULL)
+            {
+                lop->getStrVal(row, isNull);
+                bool ret = isNull;
+                isNull = false;
+                return !ret;
+            }
+
+            if (isNull)
+                return false;
+
+            const std::string& val1 = lop->getStrVal(row, isNull);
+            if (isNull)
+                return false;
+
+            return strTrimCompare(val1, rop->getStrVal(row, isNull)) && !isNull;
+//            return strCompare(val1, rop->getStrVal(row, isNull)) && !isNull;
+
+        }
+
+        // MCOL-641 WIP This is an incorrect assumption.
+        case execplan::CalpontSystemCatalog::VARBINARY:
+        case execplan::CalpontSystemCatalog::BLOB:
+            return false;
+            break;
+
+        default:
+        {
+            std::ostringstream oss;
+            oss << "invalid predicate operation type: " << fOperationType.colDataType;
+            throw logging::InvalidOperationExcept(oss.str());
+        }
+    }
+
+    return false;
+}
+
+inline bool PredicateOperator::numericCompare(IDB_Decimal& op1, IDB_Decimal& op2)
+{
+    switch (fOp)
+    {
+        case OP_EQ:
+            return op1 == op2;
+
+        case OP_NE:
+            return op1 != op2;
+
+        case OP_GT:
+            return op1 > op2;
+
+        case OP_GE:
+            return op1 >= op2;
+
+        case OP_LT:
+            return op1 < op2;
+
+        case OP_LE:
+            return op1 <= op2;
+
+        default:
+        {
+            std::ostringstream oss;
+            oss << "invalid predicate operation: " << fOp;
+            throw logging::InvalidOperationExcept(oss.str());
+        }
+    }
+}

 template <typename result_t>
 inline bool PredicateOperator::numericCompare(result_t op1, result_t op2)
--- a/dbcon/execplan/simplecolumn_int.h
+++ b/dbcon/execplan/simplecolumn_int.h
@ -32,6 +32,7 @@
 #include "objectreader.h"
 #include "joblisttypes.h"
 #include "rowgroup.h"
+#include "mcs_decimal.h"

 /**
 * Namespace
@ -217,7 +218,7 @@ inline IDB_Decimal SimpleColumn_INT<len>::getDecimalVal(rowgroup::Row& row, bool
        isNull = true;

    fResult.decimalVal.value = (int64_t)row.getIntField<len>(fInputIndex);
-    fResult.decimalVal.precision = 65;
+    fResult.decimalVal.precision = datatypes::INT64MAXPRECISION;
    fResult.decimalVal.scale = 0;
    return fResult.decimalVal;
 }
@ -242,8 +243,6 @@ void SimpleColumn_INT<len>::serialize(messageqcpp::ByteStream& b) const
        case 8:
            b << (ObjectReader::id_t) ObjectReader::SIMPLECOLUMN_INT8;
            break;
-        case 16:
-            std::cout << __FILE__<< ":" << __LINE__ << " Fix for 16 Bytes ?" << std::endl;
    }

    SimpleColumn::serialize(b);
@ -269,8 +268,6 @@ void SimpleColumn_INT<len>::unserialize(messageqcpp::ByteStream& b)
        case 8:
            ObjectReader::checkType(b, ObjectReader::SIMPLECOLUMN_INT8);
            break;
-        case 16:
-            std::cout << __FILE__<< ":" << __LINE__ << " Fix for 16 Bytes ?" << std::endl;
    }

    SimpleColumn::unserialize(b);
--- a/dbcon/execplan/simplecolumn_uint.h
+++ b/dbcon/execplan/simplecolumn_uint.h
@ -32,6 +32,7 @@
 #include "objectreader.h"
 #include "joblisttypes.h"
 #include "rowgroup.h"
+#include "mcs_decimal.h"

 /**
 * Namespace
@ -218,7 +219,8 @@ inline IDB_Decimal SimpleColumn_UINT<len>::getDecimalVal(rowgroup::Row& row, boo
        isNull = true;

    fResult.decimalVal.value = (uint64_t)row.getUintField<len>(fInputIndex);
-    fResult.decimalVal.precision = 65;
+    // WIP MCOL-641
+    fResult.decimalVal.precision = datatypes::INT64MAXPRECISION+1;
    fResult.decimalVal.scale = 0;
    return fResult.decimalVal;
 }