MCOL-641 This commit introduces templates for DataConvert and RowGroup methods.

2025-08-17 09:41:06 +03:00 · 2020-01-21 12:57:31 +03:00
parent 0c67b6ab50
commit 54c152d6c8
9 changed files with 111 additions and 37 deletions
--- a/dbcon/execplan/simplecolumn.cpp
+++ b/dbcon/execplan/simplecolumn.cpp
@@ -625,6 +625,12 @@ void SimpleColumn::evaluate(Row& row, bool& isNull)
        {
            switch (fResultType.colWidth)
            {
                case 16:
                {
                    fResult.decimalVal.value = row.getIntField<16>(fInputIndex);
                    fResult.decimalVal.scale = (unsigned)fResultType.scale;
                    break;
                }
                case 1:
                {
                    fResult.decimalVal.value = row.getIntField<1>(fInputIndex);
--- a/dbcon/execplan/treenode.h
+++ b/dbcon/execplan/treenode.h
@@ -36,6 +36,8 @@
 #include "exceptclasses.h"
 #include "dataconvert.h"
 using uint128_t = unsigned __int128;
 namespace messageqcpp
 {
 class ByteStream;
@@ -61,7 +63,7 @@ typedef execplan::CalpontSystemCatalog::ColType Type;
 */
 struct IDB_Decimal
 {
-    IDB_Decimal(): value(0), scale(0), precision(0) {}
+    IDB_Decimal(): val(0), value(0), scale(0), precision(0) {}
    IDB_Decimal(int64_t val, int8_t s, uint8_t p) :
        value (val),
        scale(s),
@@ -149,9 +151,10 @@ struct IDB_Decimal
            return (decimalComp(rhs) != 0);
    }
    uint128_t val;
    int64_t value;
-    int8_t  scale;	  // 0~18
+    int8_t  scale;	  // 0~38
-    uint8_t precision;  // 1~18
+    uint8_t precision;  // 1~38
 };
 typedef IDB_Decimal CNX_Decimal;
--- a/dbcon/mysql/ha_mcs_execplan.cpp
+++ b/dbcon/mysql/ha_mcs_execplan.cpp
@@ -3106,13 +3106,14 @@ CalpontSystemCatalog::ColType colType_MysqlToIDB (const Item* item)
        {
            Item_decimal* idp = (Item_decimal*)item;
            ct.colDataType = CalpontSystemCatalog::DECIMAL;
-            ct.colWidth = 8;
+            // MCOL-641 WIP Make this dynamic
            ct.colWidth = (idp->max_length >= 18) ? 16 : 8;
            ct.scale = idp->decimals;
            if (ct.scale == 0)
-                ct.precision = idp->max_length - 1;
+                ct.precision = (idp->max_length > 38) ? 38 : idp->max_length - 1;
            else
-                ct.precision = idp->max_length - idp->decimals;
+                ct.precision = (idp->max_length > 38) ? 38 : idp->max_length - idp->decimals;
            break;
        }
@@ -3599,10 +3600,8 @@ ArithmeticColumn* buildArithmeticColumn(
        pt->right(rhs);
    }
    //aop->resultType(colType_MysqlToIDB(item));
    // @bug5715. Use InfiniDB adjusted coltype for result type.
    // decimal arithmetic operation gives double result when the session variable is set.
    //idbassert(pt->left() && pt->right() && pt->left()->data() && pt->right()->data());
    CalpontSystemCatalog::ColType mysql_type = colType_MysqlToIDB(item);
    if (get_double_for_decimal_math(current_thd) == true)
--- a/dbcon/mysql/ha_mcs_impl.cpp
+++ b/dbcon/mysql/ha_mcs_impl.cpp
@@ -145,6 +145,8 @@ extern bool nonConstFunc(Item_func* ifp);
 namespace
 {
    using int128_t = __int128;
    using uint128_t = unsigned __int128;
 // Calpont vtable non-support error message
 const string infinidb_autoswitch_warning = "The query includes syntax that is not supported by MariaDB Columnstore distributed mode. The execution was switched to standard mode with downgraded performance.";
@@ -247,9 +249,6 @@ void force_close_fep_conn(THD *thd, cal_connection_info* ci, bool check_prev_rc
    ci->cal_conn_hndl = 0;
 }
 // WIP MCOL-641
 using uint128_t = unsigned __int128;
 void storeNumericField(Field** f, int64_t value, CalpontSystemCatalog::ColType& ct)
 {
    // unset null bit first
@@ -268,6 +267,8 @@ void storeNumericField(Field** f, int64_t value, CalpontSystemCatalog::ColType&
            //if (f2->dec < ct.scale)
            //    f2->dec = ct.scale;
            // WIP MCOL-641
            // This is too much
            char buf[256];
            dataconvert::DataConvert::decimalToString(value, (unsigned)ct.scale, buf, 256, ct.colDataType);
            (*f)->store(buf, strlen(buf), (*f)->charset());
@@ -808,12 +809,33 @@ int fetchNextRow(uchar* buf, cal_table_info& ti, cal_connection_info* ci, bool h
                    if (row.getPrecision(s) > 18)
                    {
                        // unset null bit first
                        // Might be redundant
                        if ((*f)->null_ptr)
                            *(*f)->null_ptr &= ~(*f)->null_bit;
-                        const uint128_t val = *reinterpret_cast<const uint128_t*>(row.getBinaryField2(s));
+                        uint128_t* udec;
-                        char buf[256];
+                        int128_t* dec;
-                        dataconvert::DataConvert::decimalToString(val, (unsigned)colType.scale, buf, 256, colType.colDataType);
+                        // We won't have more than 38 digits + sign + dp
                        // Make this precision based
                        char buf[41];
                        // This C-style cast doesn't look appropriate.
                        // Is there a way to use decltype instead of if?
                        if (colType.colDataType == CalpontSystemCatalog::DECIMAL)
                        {
                            dec = row.getBinaryField<int128_t>(s);
                            dataconvert::DataConvert::decimalToString<int128_t>(dec,
                                (unsigned)colType.scale, buf,
                                sizeof(buf), colType.colDataType);
                        }
                        else
                        {
                            udec = row.getBinaryField<uint128_t>(s);
                            dataconvert::DataConvert::decimalToString<uint128_t>(udec,
                                (unsigned)colType.scale, buf,
                                sizeof(buf), colType.colDataType);
                        }
                        Field_new_decimal* f2 = (Field_new_decimal*)*f;
                        f2->store(buf, strlen(buf), f2->charset());
                    }
--- a/primitives/primproc/columncommand.cpp
+++ b/primitives/primproc/columncommand.cpp
@@ -281,6 +281,7 @@ void ColumnCommand::process_OT_BOTH()
                bpp->relRids[i] = *((uint16_t*) &bpp->outputMsg[pos]);
                pos += 2;
                // WIP
                // values[i] is 8 Bytes wide so coping the pointer to bpp->outputMsg[pos] and crossing fingers
                // I dont know the liveness of bpp->outputMsg but also I dont know if there is other memory area I can use
                values[i] = (int64_t) &bpp->outputMsg[pos];
@@ -591,6 +592,7 @@ void ColumnCommand::prep(int8_t outputType, bool absRids)
            mask = 0x01;
            break;
        case 16:
            // WIP MCOL-641
            cout << __FILE__<< ":" <<__LINE__ << " Fix shift and mask for 16 Bytes ?"<< endl;
            shift = 1;
            mask = 0x01;
@@ -778,10 +780,8 @@ void ColumnCommand::projectResultRG(RowGroup& rg, uint32_t pos)
                r.setUintField_offset<4>(*((uint32_t*) msg8), offset);
                r.nextRow(rowSize);
            }
            break;
        }
        case 8:
        {
            for (i = 0; i < outMsg->NVALS; ++i, msg8 += gapSize)
@@ -789,12 +789,19 @@ void ColumnCommand::projectResultRG(RowGroup& rg, uint32_t pos)
                r.setUintField_offset<8>(*((uint64_t*) msg8), offset);
                r.nextRow(rowSize);
            }
-
+            break;
        }
        case 16:
        {
            cout << __FILE__<< ":" <<__LINE__ << " ColumnCommand::projectResultRG " << endl;
            for (i = 0; i < outMsg->NVALS; ++i, msg8 += gapSize)
            {
                r.setBinaryField(msg8, colType.colWidth, offset);
                r.nextRow(rowSize);
            }
            break;
        }
        case 16:
        cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << endl;
    }
 }
--- a/utils/dataconvert/dataconvert.cpp
+++ b/utils/dataconvert/dataconvert.cpp
@@ -1174,11 +1174,14 @@ struct uint128_pod
 };
 // WIP MCOL-641
-void DataConvert::toString(unsigned __int128 i, char *p)
+// Check for overflows with buflen
 template<typename T>
 void DataConvert::toString(T* dec, char *p, size_t buflen)
 { 
  uint64_t div = 10000000000000000000ULL;
  size_t div_log = 19;
-  uint128_t high = i;
+  // template this
  uint128_t high = *dec;
  uint128_t low;
  low = high % div;
  high /= div;
@@ -1186,9 +1189,13 @@ void DataConvert::toString(unsigned __int128 i, char *p)
  mid = high % div;
  high /= div;
  // WIP How to treat PODs here ?
  // use typeof
  // Or a templated structure
  uint128_pod *high_pod = reinterpret_cast<uint128_pod*>(&high);
  uint128_pod *mid_pod = reinterpret_cast<uint128_pod*>(&mid);
  uint128_pod *low_pod = reinterpret_cast<uint128_pod*>(&low);
  char* original_p = p;
  int printed_chars = 0;
  // WIP replace snprintf with streams 
@@ -1202,9 +1209,9 @@ void DataConvert::toString(unsigned __int128 i, char *p)
    p += printed_chars;
  }
  snprintf(p, div_log+1, "%019lu", low_pod->lo);
  if (buflen <= p-original_p)
    std::cout << "DataConvert::toString char buffer overflow" << std::endl;
 }
 // WIP MCOL-641
 // Template this
 // result must be calloc-ed
@@ -1213,7 +1220,7 @@ void DataConvert::toString(unsigned __int128 i, char *p)
 void atoi128(const string& arg, int128_t& res) 
 {
    // WIP
-    //char buf[40];
+    //char buf[41];
    //int128_t *res_ptr = reinterpret_cast<int128_t*>(result);
    res = 0;
    for (size_t j = 0; j < arg.size(); j++) 
@@ -1228,10 +1235,14 @@ void atoi128(const string& arg, int128_t& res)
 }
 // WIP MCOL-641
-void DataConvert::decimalToString(unsigned __int128 int_val, uint8_t scale, char* buf, unsigned int buflen,
+template <typename T>
 void DataConvert::decimalToString(T* valuePtr,
    uint8_t scale,
    char* buf,
    unsigned int buflen,
    execplan::CalpontSystemCatalog::ColDataType colDataType)
 {
-    toString(int_val, buf);
+    toString<T>(valuePtr, buf, buflen);
    // Biggest ColumnStore supports is DECIMAL(38,x), or 38 total digits+dp+sign for column
@@ -1243,7 +1254,7 @@ void DataConvert::decimalToString(unsigned __int128 int_val, uint8_t scale, char
    size_t l1 = strlen(buf);
    char* ptr = &buf[0];
-    if (int_val < 0)
+    if (*valuePtr < 0)
    {
        ptr++;
        idbassert(l1 >= 2);
@@ -1259,7 +1270,7 @@ void DataConvert::decimalToString(unsigned __int128 int_val, uint8_t scale, char
        const char* zeros = "00000000000000000000000000000000000000"; //38 0's
        size_t diff = 0;
-        if (int_val != 0)
+        if (*valuePtr != 0)
            diff = scale - l1; //this will always be > 0
        else
            diff = scale;
@@ -1267,7 +1278,7 @@ void DataConvert::decimalToString(unsigned __int128 int_val, uint8_t scale, char
        memmove((ptr + diff), ptr, l1 + 1); //also move null
        memcpy(ptr, zeros, diff);
-        if (int_val != 0)
+        if (*valuePtr != 0)
            l1 = 0;
        else
            l1 = 1;
@@ -1289,6 +1300,11 @@ void DataConvert::decimalToString(unsigned __int128 int_val, uint8_t scale, char
    *(ptr + l1) = '.';
 }
 // Explicit instantiation
 template
 void DataConvert::decimalToString<int128_t>(int128_t* value, uint8_t scale, char* buf, unsigned int buflen, execplan::CalpontSystemCatalog::ColDataType colDataType);
 template
 void DataConvert::decimalToString<uint128_t>(uint128_t* value, uint8_t scale, char* buf, unsigned int buflen, execplan::CalpontSystemCatalog::ColDataType colDataType);
 boost::any
 DataConvert::convertColumnData(const CalpontSystemCatalog::ColType& colType,
--- a/utils/dataconvert/dataconvert.h
+++ b/utils/dataconvert/dataconvert.h
@@ -1011,8 +1011,11 @@ public:
    EXPORT static bool isNullData(execplan::ColumnResult* cr, int rownum, execplan::CalpontSystemCatalog::ColType colType);
    static inline std::string decimalToString(int64_t value, uint8_t scale, execplan::CalpontSystemCatalog::ColDataType colDataType);
    static inline void decimalToString(int64_t value, uint8_t scale, char* buf, unsigned int buflen, execplan::CalpontSystemCatalog::ColDataType colDataType);
-    EXPORT static void decimalToString(unsigned __int128 value, uint8_t scale, char* buf, unsigned int buflen, execplan::CalpontSystemCatalog::ColDataType colDataType);
+    template <typename T>
-    EXPORT static void toString(unsigned __int128 i, char *p);
+    EXPORT static void decimalToString(T* value, uint8_t scale, char* buf, unsigned int buflen, execplan::CalpontSystemCatalog::ColDataType colDataType);
    template <typename T>
    EXPORT static void toString(T* dec, char *p, size_t buflen);
    static inline std::string constructRegexp(const std::string& str);
    static inline void trimWhitespace(int64_t& charData);
    static inline bool isEscapedChar(char c)
--- a/utils/rowgroup/rowgroup.cpp
+++ b/utils/rowgroup/rowgroup.cpp
@@ -953,6 +953,11 @@ bool Row::isNullValue(uint32_t colIndex) const
            switch (len)
            {
                // MCOL-641 WIP
                case 16:
                    return (*((int64_t*) &data[offsets[colIndex]]) == static_cast<int64_t>(joblist::BIGINTNULL));
                    break;
                case 1 :
                    return (data[offsets[colIndex]] == joblist::TINYINTNULL);
--- a/utils/rowgroup/rowgroup.h
+++ b/utils/rowgroup/rowgroup.h
@@ -435,7 +435,10 @@ public:
    inline void setVarBinaryField(const uint8_t* val, uint32_t len, uint32_t colIndex);
    inline std::string getBinaryField(uint32_t colIndex) const;
-    inline const uint8_t* getBinaryField2(uint32_t colIndex) const;
+    template <typename T>
    inline T* getBinaryField(uint32_t colIndex) const;
    template <typename T>
    inline T* getBinaryField_offset(uint32_t offset) const;
    inline boost::shared_ptr<mcsv1sdk::UserData> getUserData(uint32_t colIndex) const;
    inline void setUserData(mcsv1sdk::mcsv1Context& context,
@@ -792,7 +795,7 @@ inline uint32_t Row::getStringLength(uint32_t colIndex) const
    return strnlen((char*) &data[offsets[colIndex]], getColumnWidth(colIndex));
 }
-
+// Check whether memcpy affects perf here
 inline void Row::setBinaryField(const uint8_t* strdata, uint32_t length, uint32_t offset)
 {
    memcpy(&data[offset], strdata, length);
@@ -837,11 +840,19 @@ inline std::string Row::getBinaryField(uint32_t colIndex) const
 }
 // WIP MCOL-641
-inline const uint8_t* Row::getBinaryField2(uint32_t colIndex) const
+template <typename T>
 inline T* Row::getBinaryField(uint32_t colIndex) const
 {
-    return &data[offsets[colIndex]];
+    return reinterpret_cast<T*>(&data[offsets[colIndex]]);
 }
 template <typename T>
 inline T* Row::getBinaryField_offset(uint32_t offset) const
 {
    return reinterpret_cast<T*>(&data[offset]);
 }
 inline std::string Row::getVarBinaryStringField(uint32_t colIndex) const
 {
    if (inStringTable(colIndex))
@@ -961,10 +972,12 @@ inline void Row::setUintField_offset(uint64_t val, uint32_t offset)
        case 8:
            *((uint64_t*) &data[offset]) = val;
            break;
        /* This doesn't look like appropriate place
        case 16:
            std::cout << __FILE__<< ":" <<__LINE__ << " Fix for 16 Bytes ?" << std::endl;
            *((uint64_t*) &data[offset]) = val;
            break;
        */
        default:
            idbassert(0);
            throw std::logic_error("Row::setUintField called on a non-uint32_t field");