From f6b55c1e18406a371d668e2414d5328641579789 Mon Sep 17 00:00:00 2001 From: Gagan Goel Date: Fri, 11 Dec 2020 14:44:28 -0500 Subject: [PATCH] MCOL-4177 Add support for bulk insertion for wide decimals. 1. This patch adds support for wide decimals with/without scale to cpimport. In addition, INSERT ... SELECT and LDI are also now supported. 2. Logic to compute the number of bytes to convert a binary representation in the buffer to a narrow decimal is also simplified. --- datatypes/mcs_datatype.cpp | 7 +- datatypes/mcs_datatype.h | 7 +- dbcon/mysql/ha_mcs_datatype.h | 282 +++---------------------- utils/dataconvert/dataconvert.cpp | 63 +++++- utils/dataconvert/dataconvert.h | 2 +- writeengine/bulk/we_bulkloadbuffer.cpp | 118 ++++++++--- writeengine/bulk/we_columninfo.cpp | 1 + writeengine/shared/we_type.h | 4 +- writeengine/xml/we_xmljob.cpp | 20 +- 9 files changed, 205 insertions(+), 299 deletions(-) diff --git a/datatypes/mcs_datatype.cpp b/datatypes/mcs_datatype.cpp index 5e3fa225a..e58653c3b 100644 --- a/datatypes/mcs_datatype.cpp +++ b/datatypes/mcs_datatype.cpp @@ -57,7 +57,9 @@ using namespace dataconvert; namespace datatypes { -int128_t SystemCatalog::TypeAttributesStd::decimal128FromString(const std::string& value) const +int128_t +SystemCatalog::TypeAttributesStd::decimal128FromString( + const std::string& value, bool *saturate) const { int128_t result = 0; bool pushWarning = false; @@ -67,7 +69,8 @@ int128_t SystemCatalog::TypeAttributesStd::decimal128FromString(const std::strin *this, pushWarning, noRoundup, - result); + result, + saturate); return result; } diff --git a/datatypes/mcs_datatype.h b/datatypes/mcs_datatype.h index 1d25400ef..4b67429a9 100644 --- a/datatypes/mcs_datatype.h +++ b/datatypes/mcs_datatype.h @@ -167,10 +167,15 @@ public: scale(0), precision(-1) {} + TypeAttributesStd(int32_t w, int32_t s, int32_t p) + :colWidth(w), + scale(s), + precision(p) + {} /** @brief Convenience method to get int128 from a std::string. */ - int128_t decimal128FromString(const std::string& value) const; + int128_t decimal128FromString(const std::string& value, bool *saturate = 0) const; /** @brief The method sets the legacy scale and precision of a wide decimal diff --git a/dbcon/mysql/ha_mcs_datatype.h b/dbcon/mysql/ha_mcs_datatype.h index 8d4d739e1..cb753b454 100644 --- a/dbcon/mysql/ha_mcs_datatype.h +++ b/dbcon/mysql/ha_mcs_datatype.h @@ -188,6 +188,20 @@ public: class WriteBatchFieldMariaDB: public WriteBatchField { + // Maximum number of decimal digits that can be represented in 4 bytes + static const int DIG_PER_DEC = 9; + // See strings/decimal.c + const int dig2bytes[DIG_PER_DEC+1]={0, 1, 1, 2, 2, 3, 3, 4, 4, 4}; + + + // Returns the number of bytes required to store a given number + // of decimal digits + int numDecimalBytes(int digits) + { + return (((digits/DIG_PER_DEC) * 4) + dig2bytes[digits % DIG_PER_DEC]); + } + + public: Field *m_field; const CalpontSystemCatalog::ColType &m_type; @@ -539,264 +553,28 @@ public: size_t ColWriteBatchXDecimal(const uchar *buf, bool nullVal, ColBatchWriter &ci) override { - uint bytesBefore = 1; - uint totalBytes = 9; - - switch (m_type.precision) - { - case 18: - case 17: - case 16: - { - totalBytes = 8; - break; - } - - case 15: - case 14: - { - totalBytes = 7; - break; - } - - case 13: - case 12: - { - totalBytes = 6; - break; - } - - case 11: - { - totalBytes = 5; - break; - } - - case 10: - { - totalBytes = 5; - break; - } - - case 9: - case 8: - case 7: - { - totalBytes = 4; - break; - } - - case 6: - case 5: - { - totalBytes = 3; - break; - } - - case 4: - case 3: - { - totalBytes = 2; - break; - } - - case 2: - case 1: - { - totalBytes = 1; - break; - } - - default: - break; - } - - switch (m_type.scale) - { - case 0: - { - bytesBefore = totalBytes; - break; - } - - case 1: //1 byte for digits after decimal point - { - if ((m_type.precision != 16) && (m_type.precision != 14) - && (m_type.precision != 12) && (m_type.precision != 10) - && (m_type.precision != 7) && (m_type.precision != 5) - && (m_type.precision != 3) && (m_type.precision != 1)) - totalBytes++; - - bytesBefore = totalBytes - 1; - break; - } - - case 2: //1 byte for digits after decimal point - { - if ((m_type.precision == 18) || (m_type.precision == 9)) - totalBytes++; - - bytesBefore = totalBytes - 1; - break; - } - - case 3: //2 bytes for digits after decimal point - { - if ((m_type.precision != 16) && (m_type.precision != 14) - && (m_type.precision != 12) && (m_type.precision != 7) - && (m_type.precision != 5) && (m_type.precision != 3)) - totalBytes++; - - bytesBefore = totalBytes - 2; - break; - } - - case 4: - { - if ((m_type.precision == 18) || (m_type.precision == 11) - || (m_type.precision == 9)) - totalBytes++; - - bytesBefore = totalBytes - 2; - break; - - } - - case 5: - { - if ((m_type.precision != 16) && (m_type.precision != 14) - && (m_type.precision != 7) && (m_type.precision != 5)) - totalBytes++; - - bytesBefore = totalBytes - 3; - break; - } - - case 6: - { - if ((m_type.precision == 18) || (m_type.precision == 13) - || (m_type.precision == 11) || (m_type.precision == 9)) - totalBytes++; - - bytesBefore = totalBytes - 3; - break; - } - - case 7: - { - if ((m_type.precision != 16) && (m_type.precision != 7)) - totalBytes++; - - bytesBefore = totalBytes - 4; - break; - } - - case 8: - { - if ((m_type.precision == 18) || (m_type.precision == 15) - || (m_type.precision == 13) || (m_type.precision == 11) - || (m_type.precision == 9)) - totalBytes++; - - bytesBefore = totalBytes - 4;; - break; - } - - case 9: - { - bytesBefore = totalBytes - 4;; - break; - } - - case 10: - { - if ((m_type.precision != 16) && (m_type.precision != 14) - && (m_type.precision != 12) && (m_type.precision != 10)) - totalBytes++; - - bytesBefore = totalBytes - 5;; - break; - } - - case 11: - { - if (m_type.precision == 18) - totalBytes++; - - bytesBefore = totalBytes - 5; - break; - } - - case 12: - { - if ((m_type.precision != 16) && (m_type.precision != 14) - && (m_type.precision != 12)) - totalBytes++; - - bytesBefore = totalBytes - 6; - break; - } - - case 13: - { - if (m_type.precision == 18) - totalBytes++; - - bytesBefore = totalBytes - 6; - break; - } - - case 14: - { - if ((m_type.precision != 16) && (m_type.precision != 14)) - totalBytes++; - - bytesBefore = totalBytes - 7; - break; - } - - case 15: - { - if (m_type.precision == 18) - totalBytes++; - - bytesBefore = totalBytes - 7; - break; - } - - case 16: - { - if (m_type.precision != 16) - totalBytes++; - - bytesBefore = totalBytes - 8; - break; - } - - case 17: - { - if (m_type.precision == 18) - totalBytes++; - - bytesBefore = totalBytes - 8; - break; - } - - case 18: - { - bytesBefore = totalBytes - 8; - break; - } - - default: - break; - } + uint bytesBefore = numDecimalBytes(m_type.precision - m_type.scale); + uint totalBytes = bytesBefore + numDecimalBytes(m_type.scale); if (nullVal && (m_type.constraintType != CalpontSystemCatalog::NOTNULL_CONSTRAINT)) { fprintf(ci.filePtr(), "%c", ci.delimiter()); //printf("|"); } + else if (m_type.precision > datatypes::INT64MAXPRECISION) + { + // TODO MCOL-641 The below else block for narrow decimal + // i.e. (m_type.precision <= datatypes::INT64MAXPRECISION) + // converts the decimal binary representation in buf directly + // to a string, while here, the my_decimal ctor first calls + // bin2decimal() on buf, and then we construct the string from + // the my_decimal. This approach might be a bit slower than the + // narrow decimal approach. + my_decimal dec(buf, m_type.precision, m_type.scale); + String str; + dec.to_string(&str); + fprintf(ci.filePtr(), "%s%c", str.c_ptr(), ci.delimiter()); + } else { uint32_t mask [5] = {0, 0xFF, 0xFFFF, 0xFFFFFF, 0xFFFFFFFF}; diff --git a/utils/dataconvert/dataconvert.cpp b/utils/dataconvert/dataconvert.cpp index c0bcc8e90..e2027ef60 100644 --- a/utils/dataconvert/dataconvert.cpp +++ b/utils/dataconvert/dataconvert.cpp @@ -111,7 +111,7 @@ void number_int_value(const string& data, const datatypes::SystemCatalog::TypeAttributesStd& ct, bool& pushwarning, bool noRoundup, - T& intVal) + T& intVal, bool* saturate) { // copy of the original input string valStr(data); @@ -304,11 +304,17 @@ void number_int_value(const string& data, { intVal = MIN_TINYINT; pushwarning = true; + + if (saturate) + *saturate = true; } else if (intVal > MAX_TINYINT) { intVal = MAX_TINYINT; pushwarning = true; + + if (saturate) + *saturate = true; } break; @@ -318,11 +324,17 @@ void number_int_value(const string& data, { intVal = MIN_SMALLINT; pushwarning = true; + + if (saturate) + *saturate = true; } else if (intVal > MAX_SMALLINT) { intVal = MAX_SMALLINT; pushwarning = true; + + if (saturate) + *saturate = true; } break; @@ -332,11 +344,17 @@ void number_int_value(const string& data, { intVal = MIN_MEDINT; pushwarning = true; + + if (saturate) + *saturate = true; } else if (intVal > MAX_MEDINT) { intVal = MAX_MEDINT; pushwarning = true; + + if (saturate) + *saturate = true; } break; @@ -346,11 +364,17 @@ void number_int_value(const string& data, { intVal = MIN_INT; pushwarning = true; + + if (saturate) + *saturate = true; } else if (intVal > MAX_INT) { intVal = MAX_INT; pushwarning = true; + + if (saturate) + *saturate = true; } break; @@ -360,6 +384,9 @@ void number_int_value(const string& data, { intVal = MIN_BIGINT; pushwarning = true; + + if (saturate) + *saturate = true; } break; @@ -374,6 +401,9 @@ void number_int_value(const string& data, { intVal = tmp + 2; pushwarning = true; + + if (saturate) + *saturate = true; } } else if (ct.colWidth == 8) @@ -382,6 +412,9 @@ void number_int_value(const string& data, { intVal = MIN_BIGINT; pushwarning = true; + + if (saturate) + *saturate = true; } } else if (ct.colWidth == 4) @@ -390,11 +423,17 @@ void number_int_value(const string& data, { intVal = MIN_INT; pushwarning = true; + + if (saturate) + *saturate = true; } else if (intVal > MAX_INT) { intVal = MAX_INT; pushwarning = true; + + if (saturate) + *saturate = true; } } else if (ct.colWidth == 2) @@ -403,11 +442,17 @@ void number_int_value(const string& data, { intVal = MIN_SMALLINT; pushwarning = true; + + if (saturate) + *saturate = true; } else if (intVal > MAX_SMALLINT) { intVal = MAX_SMALLINT; pushwarning = true; + + if (saturate) + *saturate = true; } } else if (ct.colWidth == 1) @@ -416,11 +461,17 @@ void number_int_value(const string& data, { intVal = MIN_TINYINT; pushwarning = true; + + if (saturate) + *saturate = true; } else if (intVal > MAX_TINYINT) { intVal = MAX_TINYINT; pushwarning = true; + + if (saturate) + *saturate = true; } } @@ -454,11 +505,17 @@ void number_int_value(const string& data, { intVal = rangeUp; pushwarning = true; + + if (saturate) + *saturate = true; } else if (intVal < rangeLow) { intVal = rangeLow; pushwarning = true; + + if (saturate) + *saturate = true; } } } @@ -470,7 +527,7 @@ void number_int_value(const std::string& data, const datatypes::SystemCatalog::TypeAttributesStd& ct, bool& pushwarning, bool noRoundup, - int64_t& intVal); + int64_t& intVal, bool* saturate); template void number_int_value(const std::string& data, @@ -478,7 +535,7 @@ void number_int_value(const std::string& data, const datatypes::SystemCatalog::TypeAttributesStd& ct, bool& pushwarning, bool noRoundup, - int128_t& intVal); + int128_t& intVal, bool* saturate); uint64_t number_uint_value(const string& data, cscDataType typeCode, diff --git a/utils/dataconvert/dataconvert.h b/utils/dataconvert/dataconvert.h index 5cbd28929..cd06707c8 100644 --- a/utils/dataconvert/dataconvert.h +++ b/utils/dataconvert/dataconvert.h @@ -882,7 +882,7 @@ void number_int_value(const std::string& data, const datatypes::SystemCatalog::TypeAttributesStd &ct, bool& pushwarning, bool noRoundup, - T& intVal); + T& intVal, bool* saturate = 0); uint64_t number_uint_value(const string& data, cscDataType typeCode, diff --git a/writeengine/bulk/we_bulkloadbuffer.cpp b/writeengine/bulk/we_bulkloadbuffer.cpp index e0fadf1f8..26d7c91ab 100644 --- a/writeengine/bulk/we_bulkloadbuffer.cpp +++ b/writeengine/bulk/we_bulkloadbuffer.cpp @@ -940,9 +940,7 @@ void BulkLoadBuffer::convert(char* field, int fieldLength, // BIG INT //---------------------------------------------------------------------- case WriteEngine::WR_LONGLONG: - case WriteEngine::WR_BINARY: { - // TODO MCOL-641 Add full support here. bool bSatVal = false; if ( column.dataType != CalpontSystemCatalog::DATETIME && @@ -987,18 +985,9 @@ void BulkLoadBuffer::convert(char* field, int fieldLength, if ( (column.dataType == CalpontSystemCatalog::DECIMAL) || (column.dataType == CalpontSystemCatalog::UDECIMAL)) { - if (LIKELY(width == datatypes::MAXDECIMALWIDTH)) - { - bool saturate = false; - bigllVal = dataconvert::string_to_ll(string(field), saturate); - // TODO MCOL-641 check saturate - } - else if (width <= 8) - { - // errno is initialized and set in convertDecimalString - llVal = Convertor::convertDecimalString( - field, fieldLength, column.scale ); - } + // errno is initialized and set in convertDecimalString + llVal = Convertor::convertDecimalString( + field, fieldLength, column.scale ); } else { @@ -1024,31 +1013,17 @@ void BulkLoadBuffer::convert(char* field, int fieldLength, bSatVal = true; } - if (bSatVal) bufStats.satCount++; // Update min/max range - if (width <= 8) - { - if (llVal < bufStats.minBufferVal) - bufStats.minBufferVal = llVal; + if (llVal < bufStats.minBufferVal) + bufStats.minBufferVal = llVal; - if (llVal > bufStats.maxBufferVal) - bufStats.maxBufferVal = llVal; + if (llVal > bufStats.maxBufferVal) + bufStats.maxBufferVal = llVal; - pVal = &llVal; - } - else - { - if (bigllVal < bufStats.bigMinBufferVal) - bufStats.bigMinBufferVal = bigllVal; - - if (bigllVal > bufStats.bigMaxBufferVal) - bufStats.bigMaxBufferVal = bigllVal; - - pVal = &bigllVal; - } + pVal = &llVal; } else if (column.dataType == CalpontSystemCatalog::TIME) { @@ -1212,6 +1187,75 @@ void BulkLoadBuffer::convert(char* field, int fieldLength, break; } + //---------------------------------------------------------------------- + // WIDE DECIMAL + //---------------------------------------------------------------------- + case WriteEngine::WR_BINARY: + { + bool bSatVal = false; + + if (nullFlag) + { + if (!column.autoIncFlag) + { + if (column.fWithDefault) + { + bigllVal = column.fDefaultWideDecimal; + // fall through to update saturation and min/max + } + else + { + bigllVal = datatypes::Decimal128Null; + pVal = &bigllVal; + break; + } + } + else + { + // TODO MCOL-641 Add support for int128_t version of + // fAutoIncNextValue + bigllVal = fAutoIncNextValue++; + } + } + else + { + if (fImportDataMode != IMPORT_DATA_TEXT) + { + memcpy(&bigllVal, field, sizeof(bigllVal)); + } + else + { + if (isTrueWord(const_cast(field), fieldLength)) + { + strcpy(field, "1"); + fieldLength = 1; + } + + bool dummy = false; + // Value saturation to 9999... or -9999... is handled by + // number_int_value(), and the bSatVal flag is set to true + dataconvert::number_int_value(string(field), column.dataType, + datatypes::SystemCatalog::TypeAttributesStd( + column.width, column.scale, column.precision), + dummy, false, bigllVal, &bSatVal); + } + } + + if (bSatVal) + bufStats.satCount++; + + // Update min/max range + if (bigllVal < bufStats.bigMinBufferVal) + bufStats.bigMinBufferVal = bigllVal; + + if (bigllVal > bufStats.bigMaxBufferVal) + bufStats.bigMaxBufferVal = bigllVal; + + pVal = &bigllVal; + + break; + } + //---------------------------------------------------------------------- // UNSIGNED BIG INT //---------------------------------------------------------------------- @@ -3395,6 +3439,14 @@ bool BulkLoadBuffer::isBinaryFieldNull(void* val, break; } + case WriteEngine::WR_BINARY: + { + if ((*((int128_t*)val)) == datatypes::Decimal128Null) + isNullFlag = true; + + break; + } + default: { break; diff --git a/writeengine/bulk/we_columninfo.cpp b/writeengine/bulk/we_columninfo.cpp index f0e40fb9d..853ef8b65 100644 --- a/writeengine/bulk/we_columninfo.cpp +++ b/writeengine/bulk/we_columninfo.cpp @@ -204,6 +204,7 @@ ColumnInfo::ColumnInfo(Log* logger, case WriteEngine::WR_ULONGLONG: case WriteEngine::WR_UMEDINT: case WriteEngine::WR_UINT: + case WriteEngine::WR_BINARY: default: { fColExtInf = new ColExtInf(column.mapOid, logger); diff --git a/writeengine/shared/we_type.h b/writeengine/shared/we_type.h index 824058d7f..dcda94bb4 100644 --- a/writeengine/shared/we_type.h +++ b/writeengine/shared/we_type.h @@ -367,6 +367,7 @@ struct JobColumn /** @brief Job Column Structure */ long long fDefaultInt; /** @brief Integer column default */ unsigned long long fDefaultUInt; /** @brief UnsignedInt col default*/ double fDefaultDbl; /** @brief Dbl/Flt column default */ + int128_t fDefaultWideDecimal; /** @brief Wide decimal column default */ std::string fDefaultChr; /** @brief Char column default */ JobColumn() : mapOid(0), dataType(execplan::CalpontSystemCatalog::INT), weType(WR_INT), typeName("integer"), emptyVal(0), @@ -376,7 +377,8 @@ struct JobColumn /** @brief Job Column Structure */ compressionType(0), autoIncFlag(false), fMinIntSat(0), fMaxIntSat(0), fMinDblSat(0), fMaxDblSat(0), fWithDefault(false), - fDefaultInt(0), fDefaultUInt(0), fDefaultDbl(0.0) + fDefaultInt(0), fDefaultUInt(0), fDefaultDbl(0.0), + fDefaultWideDecimal(0) { } }; diff --git a/writeengine/xml/we_xmljob.cpp b/writeengine/xml/we_xmljob.cpp index f93859b61..257b50cf7 100644 --- a/writeengine/xml/we_xmljob.cpp +++ b/writeengine/xml/we_xmljob.cpp @@ -1087,13 +1087,21 @@ void XMLJob::fillInXMLDataNotNullDefault( case execplan::CalpontSystemCatalog::DECIMAL: case execplan::CalpontSystemCatalog::UDECIMAL: { - col.fDefaultInt = Convertor::convertDecimalString( - col_defaultValue.c_str(), - col_defaultValue.length(), - colType.scale); + if (LIKELY(colType.colWidth == datatypes::MAXDECIMALWIDTH)) + { + col.fDefaultWideDecimal = colType.decimal128FromString( + col_defaultValue, &bDefaultConvertError); + } + else + { + col.fDefaultInt = Convertor::convertDecimalString( + col_defaultValue.c_str(), + col_defaultValue.length(), + colType.scale); - if (errno == ERANGE) - bDefaultConvertError = true; + if (errno == ERANGE) + bDefaultConvertError = true; + } break; }