Merge pull request #1677 from tntnatbry/MCOL-4177-2

MCOL-4177 Add support for bulk insertion for wide decimals.
2025-04-18 21:44:02 +03:00 · 2020-12-18 12:37:23 +03:00 · 2020-12-18 12:37:23 +03:00 · bfe90be3db
commit bfe90be3db
parent 45197c49d1 f6b55c1e18
9 changed files with 205 additions and 299 deletions
--- a/datatypes/mcs_datatype.cpp
+++ b/datatypes/mcs_datatype.cpp
@ -57,7 +57,9 @@ using namespace dataconvert;
 namespace datatypes
 {

-int128_t SystemCatalog::TypeAttributesStd::decimal128FromString(const std::string& value) const
+int128_t
+SystemCatalog::TypeAttributesStd::decimal128FromString(
+    const std::string& value, bool *saturate) const
 {
  int128_t result = 0;
  bool pushWarning = false;
@ -67,7 +69,8 @@ int128_t SystemCatalog::TypeAttributesStd::decimal128FromString(const std::strin
                                          *this,
                                          pushWarning,
                                          noRoundup,
-                                          result);
+                                          result,
+                                          saturate);
  return result;
 }

--- a/datatypes/mcs_datatype.h
+++ b/datatypes/mcs_datatype.h
@ -167,10 +167,15 @@ public:
          scale(0),
          precision(-1)
      {}
+      TypeAttributesStd(int32_t w, int32_t s, int32_t p)
+         :colWidth(w),
+          scale(s),
+          precision(p)
+      {}
      /**
          @brief Convenience method to get int128 from a std::string.
      */
-      int128_t decimal128FromString(const std::string& value) const;
+      int128_t decimal128FromString(const std::string& value, bool *saturate = 0) const;

      /**
          @brief The method sets the legacy scale and precision of a wide decimal
--- a/dbcon/mysql/ha_mcs_datatype.h
+++ b/dbcon/mysql/ha_mcs_datatype.h
@ -188,6 +188,20 @@ public:

 class WriteBatchFieldMariaDB: public WriteBatchField
 {
+  // Maximum number of decimal digits that can be represented in 4 bytes
+  static const int DIG_PER_DEC = 9;
+  // See strings/decimal.c
+  const int dig2bytes[DIG_PER_DEC+1]={0, 1, 1, 2, 2, 3, 3, 4, 4, 4};
+
+
+  // Returns the number of bytes required to store a given number
+  // of decimal digits
+  int numDecimalBytes(int digits)
+  {
+    return (((digits/DIG_PER_DEC) * 4) + dig2bytes[digits % DIG_PER_DEC]);
+  }
+
+
 public:
  Field *m_field;
  const CalpontSystemCatalog::ColType &m_type;
@ -539,264 +553,28 @@ public:

  size_t ColWriteBatchXDecimal(const uchar *buf, bool nullVal, ColBatchWriter &ci) override 
  {
-    uint bytesBefore = 1;
-    uint totalBytes = 9;
-
-    switch (m_type.precision)
-    {
-      case 18:
-      case 17:
-      case 16:
-      {
-        totalBytes = 8;
-        break;
-      }
-
-      case 15:
-      case 14:
-      {
-        totalBytes = 7;
-        break;
-      }
-
-      case 13:
-      case 12:
-      {
-        totalBytes =  6;
-        break;
-      }
-
-      case 11:
-      {
-        totalBytes =  5;
-        break;
-      }
-
-      case 10:
-      {
-        totalBytes =  5;
-        break;
-      }
-
-      case 9:
-      case 8:
-      case 7:
-      {
-        totalBytes =  4;
-        break;
-      }
-
-      case 6:
-      case 5:
-      {
-        totalBytes = 3;
-        break;
-      }
-
-      case 4:
-      case 3:
-      {
-        totalBytes =  2;
-        break;
-      }
-
-      case 2:
-      case 1:
-      {
-        totalBytes = 1;
-        break;
-      }
-
-      default:
-        break;
-    }
-
-    switch (m_type.scale)
-    {
-      case 0:
-      {
-        bytesBefore = totalBytes;
-        break;
-      }
-
-      case 1: //1 byte for digits after decimal point
-      {
-        if ((m_type.precision != 16) && (m_type.precision != 14)
-            && (m_type.precision != 12) && (m_type.precision != 10)
-            && (m_type.precision != 7) && (m_type.precision != 5)
-            && (m_type.precision != 3) && (m_type.precision != 1))
-          totalBytes++;
-
-        bytesBefore = totalBytes - 1;
-        break;
-      }
-
-      case 2: //1 byte for digits after decimal point
-      {
-        if ((m_type.precision == 18) || (m_type.precision == 9))
-          totalBytes++;
-
-        bytesBefore = totalBytes - 1;
-        break;
-      }
-
-      case 3: //2 bytes for digits after decimal point
-      {
-        if ((m_type.precision != 16) && (m_type.precision != 14)
-            && (m_type.precision != 12) && (m_type.precision != 7)
-            && (m_type.precision != 5) && (m_type.precision != 3))
-          totalBytes++;
-
-        bytesBefore = totalBytes - 2;
-        break;
-      }
-
-      case 4:
-      {
-        if ((m_type.precision == 18) || (m_type.precision == 11)
-            || (m_type.precision == 9))
-          totalBytes++;
-
-        bytesBefore = totalBytes - 2;
-        break;
-
-      }
-
-      case 5:
-      {
-        if ((m_type.precision != 16) && (m_type.precision != 14)
-            && (m_type.precision != 7) && (m_type.precision != 5))
-          totalBytes++;
-
-        bytesBefore = totalBytes - 3;
-        break;
-      }
-
-      case 6:
-      {
-        if ((m_type.precision == 18) || (m_type.precision == 13)
-            || (m_type.precision == 11) || (m_type.precision == 9))
-          totalBytes++;
-
-        bytesBefore = totalBytes - 3;
-        break;
-      }
-
-      case 7:
-      {
-        if ((m_type.precision != 16) && (m_type.precision != 7))
-          totalBytes++;
-
-        bytesBefore = totalBytes - 4;
-        break;
-      }
-
-      case 8:
-      {
-        if ((m_type.precision == 18) || (m_type.precision == 15)
-            || (m_type.precision == 13) || (m_type.precision == 11)
-            || (m_type.precision == 9))
-          totalBytes++;
-
-        bytesBefore = totalBytes - 4;;
-        break;
-      }
-
-      case 9:
-      {
-        bytesBefore = totalBytes - 4;;
-        break;
-      }
-
-      case 10:
-      {
-        if ((m_type.precision != 16) && (m_type.precision != 14)
-            && (m_type.precision != 12) && (m_type.precision != 10))
-          totalBytes++;
-
-        bytesBefore = totalBytes - 5;;
-        break;
-      }
-
-      case 11:
-      {
-        if (m_type.precision == 18)
-          totalBytes++;
-
-        bytesBefore = totalBytes - 5;
-        break;
-      }
-
-      case 12:
-      {
-        if ((m_type.precision != 16) && (m_type.precision != 14)
-            && (m_type.precision != 12))
-          totalBytes++;
-
-        bytesBefore = totalBytes - 6;
-        break;
-      }
-
-      case 13:
-      {
-        if (m_type.precision == 18)
-          totalBytes++;
-
-        bytesBefore = totalBytes - 6;
-        break;
-      }
-
-      case 14:
-      {
-        if ((m_type.precision != 16) && (m_type.precision != 14))
-          totalBytes++;
-
-        bytesBefore = totalBytes - 7;
-        break;
-      }
-
-      case 15:
-      {
-        if (m_type.precision == 18)
-          totalBytes++;
-
-        bytesBefore = totalBytes - 7;
-        break;
-      }
-
-      case 16:
-      {
-        if (m_type.precision != 16)
-          totalBytes++;
-
-        bytesBefore = totalBytes - 8;
-        break;
-      }
-
-      case 17:
-      {
-        if (m_type.precision == 18)
-          totalBytes++;
-
-        bytesBefore = totalBytes - 8;
-        break;
-      }
-
-      case 18:
-      {
-        bytesBefore = totalBytes - 8;
-        break;
-      }
-
-      default:
-        break;
-    }
+    uint bytesBefore = numDecimalBytes(m_type.precision - m_type.scale);
+    uint totalBytes = bytesBefore + numDecimalBytes(m_type.scale);

    if (nullVal && (m_type.constraintType != CalpontSystemCatalog::NOTNULL_CONSTRAINT))
    {
      fprintf(ci.filePtr(), "%c", ci.delimiter());
      //printf("|");
    }
+    else if (m_type.precision > datatypes::INT64MAXPRECISION)
+    {
+      // TODO MCOL-641 The below else block for narrow decimal
+      // i.e. (m_type.precision <= datatypes::INT64MAXPRECISION)
+      // converts the decimal binary representation in buf directly
+      // to a string, while here, the my_decimal ctor first calls
+      // bin2decimal() on buf, and then we construct the string from
+      // the my_decimal. This approach might be a bit slower than the
+      // narrow decimal approach.
+      my_decimal dec(buf, m_type.precision, m_type.scale);
+      String str;
+      dec.to_string(&str);
+      fprintf(ci.filePtr(), "%s%c", str.c_ptr(), ci.delimiter());
+    }
    else
    {
      uint32_t mask [5] = {0, 0xFF, 0xFFFF, 0xFFFFFF, 0xFFFFFFFF};
--- a/utils/dataconvert/dataconvert.cpp
+++ b/utils/dataconvert/dataconvert.cpp
@ -111,7 +111,7 @@ void number_int_value(const string& data,
                      const datatypes::SystemCatalog::TypeAttributesStd& ct,
                      bool& pushwarning,
                      bool noRoundup,
-                      T& intVal)
+                      T& intVal, bool* saturate)
 {
    // copy of the original input
    string valStr(data);
@ -304,11 +304,17 @@ void number_int_value(const string& data,
            {
                intVal = MIN_TINYINT;
                pushwarning = true;
+
+                if (saturate)
+                    *saturate = true;
            }
            else if (intVal > MAX_TINYINT)
            {
                intVal = MAX_TINYINT;
                pushwarning = true;
+
+                if (saturate)
+                    *saturate = true;
            }

            break;
@ -318,11 +324,17 @@ void number_int_value(const string& data,
            {
                intVal = MIN_SMALLINT;
                pushwarning = true;
+
+                if (saturate)
+                    *saturate = true;
            }
            else if (intVal > MAX_SMALLINT)
            {
                intVal = MAX_SMALLINT;
                pushwarning = true;
+
+                if (saturate)
+                    *saturate = true;
            }

            break;
@ -332,11 +344,17 @@ void number_int_value(const string& data,
            {
                intVal = MIN_MEDINT;
                pushwarning = true;
+
+                if (saturate)
+                    *saturate = true;
            }
            else if (intVal > MAX_MEDINT)
            {
                intVal = MAX_MEDINT;
                pushwarning = true;
+
+                if (saturate)
+                    *saturate = true;
            }

            break;
@ -346,11 +364,17 @@ void number_int_value(const string& data,
            {
                intVal = MIN_INT;
                pushwarning = true;
+
+                if (saturate)
+                    *saturate = true;
            }
            else if (intVal > MAX_INT)
            {
                intVal = MAX_INT;
                pushwarning = true;
+
+                if (saturate)
+                    *saturate = true;
            }

            break;
@ -360,6 +384,9 @@ void number_int_value(const string& data,
            {
                intVal = MIN_BIGINT;
                pushwarning = true;
+
+                if (saturate)
+                    *saturate = true;
            }

            break;
@ -374,6 +401,9 @@ void number_int_value(const string& data,
                {
                    intVal = tmp + 2;
                    pushwarning = true;
+
+                    if (saturate)
+                        *saturate = true;
                }
            }
            else if (ct.colWidth == 8)
@ -382,6 +412,9 @@ void number_int_value(const string& data,
                {
                    intVal = MIN_BIGINT;
                    pushwarning = true;
+
+                    if (saturate)
+                        *saturate = true;
                }
            }
            else if (ct.colWidth == 4)
@ -390,11 +423,17 @@ void number_int_value(const string& data,
                {
                    intVal = MIN_INT;
                    pushwarning = true;
+
+                    if (saturate)
+                        *saturate = true;
                }
                else if (intVal > MAX_INT)
                {
                    intVal = MAX_INT;
                    pushwarning = true;
+
+                    if (saturate)
+                        *saturate = true;
                }
            }
            else if (ct.colWidth == 2)
@ -403,11 +442,17 @@ void number_int_value(const string& data,
                {
                    intVal = MIN_SMALLINT;
                    pushwarning = true;
+
+                    if (saturate)
+                        *saturate = true;
                }
                else if (intVal > MAX_SMALLINT)
                {
                    intVal = MAX_SMALLINT;
                    pushwarning = true;
+
+                    if (saturate)
+                        *saturate = true;
                }
            }
            else if (ct.colWidth == 1)
@ -416,11 +461,17 @@ void number_int_value(const string& data,
                {
                    intVal = MIN_TINYINT;
                    pushwarning = true;
+
+                    if (saturate)
+                        *saturate = true;
                }
                else if (intVal > MAX_TINYINT)
                {
                    intVal = MAX_TINYINT;
                    pushwarning = true;
+
+                    if (saturate)
+                        *saturate = true;
                }
            }

@ -454,11 +505,17 @@ void number_int_value(const string& data,
        {
            intVal = rangeUp;
            pushwarning = true;
+
+            if (saturate)
+                *saturate = true;
        }
        else if (intVal < rangeLow)
        {
            intVal = rangeLow;
            pushwarning = true;
+
+            if (saturate)
+                *saturate = true;
        }
    }
 }
@ -470,7 +527,7 @@ void number_int_value<int64_t>(const std::string& data,
                               const datatypes::SystemCatalog::TypeAttributesStd& ct,
                               bool& pushwarning,
                               bool noRoundup,
-                               int64_t& intVal);
+                               int64_t& intVal, bool* saturate);

 template
 void number_int_value<int128_t>(const std::string& data,
@ -478,7 +535,7 @@ void number_int_value<int128_t>(const std::string& data,
                                const datatypes::SystemCatalog::TypeAttributesStd& ct,
                                bool& pushwarning,
                                bool noRoundup,
-                                int128_t& intVal);
+                                int128_t& intVal, bool* saturate);

 uint64_t number_uint_value(const string& data,
                           cscDataType typeCode,
--- a/utils/dataconvert/dataconvert.h
+++ b/utils/dataconvert/dataconvert.h
@ -882,7 +882,7 @@ void number_int_value(const std::string& data,
                      const datatypes::SystemCatalog::TypeAttributesStd &ct,
                      bool& pushwarning,
                      bool noRoundup,
-                      T& intVal);
+                      T& intVal, bool* saturate = 0);

 uint64_t number_uint_value(const string& data,
                           cscDataType typeCode,
--- a/writeengine/bulk/we_bulkloadbuffer.cpp
+++ b/writeengine/bulk/we_bulkloadbuffer.cpp
@ -940,9 +940,7 @@ void BulkLoadBuffer::convert(char* field, int fieldLength,
        // BIG INT
        //----------------------------------------------------------------------
        case WriteEngine::WR_LONGLONG:
-        case WriteEngine::WR_BINARY:
        {
-            // TODO MCOL-641 Add full support here.
            bool bSatVal = false;

            if ( column.dataType != CalpontSystemCatalog::DATETIME &&
@ -987,18 +985,9 @@ void BulkLoadBuffer::convert(char* field, int fieldLength,
                        if ( (column.dataType == CalpontSystemCatalog::DECIMAL) ||
                                (column.dataType == CalpontSystemCatalog::UDECIMAL))
                        {
-                            if (LIKELY(width == datatypes::MAXDECIMALWIDTH))
-                            {
-                                bool saturate = false;
-                                bigllVal = dataconvert::string_to_ll<int128_t>(string(field), saturate);
-                                // TODO MCOL-641 check saturate
-                            }
-                            else if (width <= 8)
-                            {
-                                // errno is initialized and set in convertDecimalString
-                                llVal = Convertor::convertDecimalString(
-                                            field, fieldLength, column.scale );
-                            }
+                            // errno is initialized and set in convertDecimalString
+                            llVal = Convertor::convertDecimalString(
+                                        field, fieldLength, column.scale );
                        }
                        else
                        {
@ -1024,31 +1013,17 @@ void BulkLoadBuffer::convert(char* field, int fieldLength,
                    bSatVal = true;
                }

-
                if (bSatVal)
                    bufStats.satCount++;

                // Update min/max range
-                if (width <= 8)
-                {
-                    if (llVal < bufStats.minBufferVal)
-                        bufStats.minBufferVal = llVal;
+                if (llVal < bufStats.minBufferVal)
+                    bufStats.minBufferVal = llVal;

-                    if (llVal > bufStats.maxBufferVal)
-                        bufStats.maxBufferVal = llVal;
+                if (llVal > bufStats.maxBufferVal)
+                    bufStats.maxBufferVal = llVal;

-                    pVal = &llVal;
-                }
-                else
-                {
-                    if (bigllVal < bufStats.bigMinBufferVal)
-                        bufStats.bigMinBufferVal = bigllVal;
-
-                    if (bigllVal > bufStats.bigMaxBufferVal)
-                        bufStats.bigMaxBufferVal = bigllVal;
-
-                    pVal = &bigllVal;
-                }
+                pVal = &llVal;
            }
            else if (column.dataType == CalpontSystemCatalog::TIME)
            {
@ -1212,6 +1187,75 @@ void BulkLoadBuffer::convert(char* field, int fieldLength,
            break;
        }

+        //----------------------------------------------------------------------
+        // WIDE DECIMAL
+        //----------------------------------------------------------------------
+        case WriteEngine::WR_BINARY:
+        {
+            bool bSatVal = false;
+
+            if (nullFlag)
+            {
+                if (!column.autoIncFlag)
+                {
+                    if (column.fWithDefault)
+                    {
+                        bigllVal = column.fDefaultWideDecimal;
+                        // fall through to update saturation and min/max
+                    }
+                    else
+                    {
+                        bigllVal = datatypes::Decimal128Null;
+                        pVal = &bigllVal;
+                        break;
+                    }
+                }
+                else
+                {
+                    // TODO MCOL-641 Add support for int128_t version of
+                    // fAutoIncNextValue
+                    bigllVal = fAutoIncNextValue++;
+                }
+            }
+            else
+            {
+                if (fImportDataMode != IMPORT_DATA_TEXT)
+                {
+                    memcpy(&bigllVal, field, sizeof(bigllVal));
+                }
+                else
+                {
+                    if (isTrueWord(const_cast<const char*>(field), fieldLength))
+                    {
+                        strcpy(field, "1");
+                        fieldLength = 1;
+                    }
+
+                    bool dummy = false;
+                    // Value saturation to 9999... or -9999... is handled by
+                    // number_int_value(), and the bSatVal flag is set to true
+                    dataconvert::number_int_value(string(field), column.dataType,
+                        datatypes::SystemCatalog::TypeAttributesStd(
+                            column.width, column.scale, column.precision),
+                        dummy, false, bigllVal, &bSatVal);
+                }
+            }
+
+            if (bSatVal)
+                bufStats.satCount++;
+
+            // Update min/max range
+            if (bigllVal < bufStats.bigMinBufferVal)
+                bufStats.bigMinBufferVal = bigllVal;
+
+            if (bigllVal > bufStats.bigMaxBufferVal)
+                bufStats.bigMaxBufferVal = bigllVal;
+
+            pVal = &bigllVal;
+
+            break;
+        }
+
        //----------------------------------------------------------------------
        // UNSIGNED BIG INT
        //----------------------------------------------------------------------
@ -3395,6 +3439,14 @@ bool BulkLoadBuffer::isBinaryFieldNull(void* val,
            break;
        }

+        case WriteEngine::WR_BINARY:
+        {
+            if ((*((int128_t*)val)) == datatypes::Decimal128Null)
+                isNullFlag = true;
+
+            break;
+        }
+
        default:
        {
            break;
--- a/writeengine/bulk/we_columninfo.cpp
+++ b/writeengine/bulk/we_columninfo.cpp
@ -204,6 +204,7 @@ ColumnInfo::ColumnInfo(Log*             logger,
        case WriteEngine::WR_ULONGLONG:
        case WriteEngine::WR_UMEDINT:
        case WriteEngine::WR_UINT:
+        case WriteEngine::WR_BINARY:
        default:
        {
            fColExtInf = new ColExtInf(column.mapOid, logger);
--- a/writeengine/shared/we_type.h
+++ b/writeengine/shared/we_type.h
@ -367,6 +367,7 @@ struct JobColumn                        /** @brief Job Column Structure */
    long long      fDefaultInt;         /** @brief Integer column default */
    unsigned long long fDefaultUInt;    /** @brief UnsignedInt col default*/
    double         fDefaultDbl;         /** @brief Dbl/Flt column default */
+    int128_t       fDefaultWideDecimal; /** @brief Wide decimal column default */
    std::string    fDefaultChr;         /** @brief Char column default */
    JobColumn() : mapOid(0), dataType(execplan::CalpontSystemCatalog::INT), weType(WR_INT),
        typeName("integer"), emptyVal(0),
@ -376,7 +377,8 @@ struct JobColumn                        /** @brief Job Column Structure */
        compressionType(0), autoIncFlag(false),
        fMinIntSat(0), fMaxIntSat(0),
        fMinDblSat(0), fMaxDblSat(0), fWithDefault(false),
-        fDefaultInt(0), fDefaultUInt(0), fDefaultDbl(0.0)
+        fDefaultInt(0), fDefaultUInt(0), fDefaultDbl(0.0),
+        fDefaultWideDecimal(0)
    { }
 };

--- a/writeengine/xml/we_xmljob.cpp
+++ b/writeengine/xml/we_xmljob.cpp
@ -1087,13 +1087,21 @@ void XMLJob::fillInXMLDataNotNullDefault(
            case execplan::CalpontSystemCatalog::DECIMAL:
            case execplan::CalpontSystemCatalog::UDECIMAL:
            {
-                col.fDefaultInt = Convertor::convertDecimalString(
-                                      col_defaultValue.c_str(),
-                                      col_defaultValue.length(),
-                                      colType.scale);
+                if (LIKELY(colType.colWidth == datatypes::MAXDECIMALWIDTH))
+                {
+                    col.fDefaultWideDecimal = colType.decimal128FromString(
+                                                  col_defaultValue, &bDefaultConvertError);
+                }
+                else
+                {
+                    col.fDefaultInt = Convertor::convertDecimalString(
+                                          col_defaultValue.c_str(),
+                                          col_defaultValue.length(),
+                                          colType.scale);

-                if (errno == ERANGE)
-                    bDefaultConvertError = true;
+                    if (errno == ERANGE)
+                        bDefaultConvertError = true;
+                }

                break;
            }