A cleanup for MCOL-4064 Make JOIN collation aware

After creating and populating tables with CHAR(5) case insensitive columns, in a set of consequent joins like: select * from t1, t2 where t1.c1=t2.c1; select * from t1, t2 where t1.c1=t2.c2; select * from t1, t2 where t1.c2=t2.c1; select * from t1, t2 where t1.c2=t2.c2; only the first join worked reliably case insensitively. Removing the remaining pieces of the code that used order_swap() to compare short CHAR columns, and using Charset::strnncollsp() instead. This fixes the issue.
2025-11-25 20:23:16 +03:00 · 2020-12-10 18:58:49 +04:00
parent 4da3d8b376
commit a433c65575
6 changed files with 49 additions and 63 deletions
--- a/datatypes/mcs_string.h
+++ b/datatypes/mcs_string.h
@@ -20,6 +20,7 @@
 #define MCS_DATATYPES_STRING_H
 #include "conststring.h"
 #include "collation.h"
 namespace datatypes
 {
@@ -36,6 +37,13 @@ public:
      utils::ConstString res = utils::ConstString((const char *) &mValue, 8);
      return res.rtrimZero();
    }
    static int strnncollsp(const datatypes::Charset &cs, int64_t a, int64_t b)
    {
      datatypes::TCharShort sa(a);
      datatypes::TCharShort sb(b);
      return cs.strnncollsp(static_cast<utils::ConstString>(sa),
                            static_cast<utils::ConstString>(sb));
    }
 };
--- a/dbcon/joblist/lbidlist.cpp
+++ b/dbcon/joblist/lbidlist.cpp
@@ -24,6 +24,7 @@
 #include "primitivemsg.h"
 #include "blocksize.h"
 #include "lbidlist.h"
 #include "mcs_string.h"
 #include "calpontsystemcatalog.h"
 #include "brm.h"
 #include "brmtypes.h"
@@ -41,18 +42,6 @@ using namespace BRM;
 namespace joblist
 {
 inline uint64_t order_swap(uint64_t x)
 {
    return (x >> 56) |
           ((x << 40) & 0x00FF000000000000ULL) |
           ((x << 24) & 0x0000FF0000000000ULL) |
           ((x << 8)  & 0x000000FF00000000ULL) |
           ((x >> 8)  & 0x00000000FF000000ULL) |
           ((x >> 24) & 0x0000000000FF0000ULL) |
           ((x >> 40) & 0x000000000000FF00ULL) |
           (x << 56);
 }
 LBIDList::LBIDList()
 {
    throw logic_error("Don't use LBIDList()");
@@ -361,7 +350,8 @@ int LBIDList::getMinMaxFromEntries(T& min, T& max, int32_t& seq,
 }
 template <typename T>
-void LBIDList::UpdateMinMax(T min, T max, int64_t lbid, CalpontSystemCatalog::ColDataType type,
+void LBIDList::UpdateMinMax(T min, T max, int64_t lbid,
                            const CalpontSystemCatalog::ColType & type,
                            bool validData)
 {
    MinMaxPartition* mmp = NULL;
@@ -400,17 +390,18 @@ void LBIDList::UpdateMinMax(T min, T max, int64_t lbid, CalpontSystemCatalog::Co
            if (mmp->isValid == BRM::CP_INVALID)
            {
-                if (datatypes::isCharType(type))
+                if (datatypes::isCharType(type.colDataType))
                {
-                    if (order_swap(min) < order_swap(mmp->min) ||
+                    datatypes::Charset cs(const_cast<CalpontSystemCatalog::ColType &>(type).getCharset());
                    if (datatypes::TCharShort::strnncollsp(cs, min, mmp->min) < 0 ||
                            mmp->min == numeric_limits<int64_t>::max())
                        mmp->min = min;
-                    if (order_swap(max) > order_swap(mmp->max) ||
+                    if (datatypes::TCharShort::strnncollsp(cs, max, mmp->max) > 0 ||
                            mmp->max == numeric_limits<int64_t>::min())
                        mmp->max = max;
                }
-                else if (datatypes::isUnsigned(type))
+                else if (datatypes::isUnsigned(type.colDataType))
                {
                    if (static_cast<uint64_t>(min) < static_cast<uint64_t>(mmp->min))
                        mmp->min = min;
@@ -702,18 +693,17 @@ static inline bool compareStr(const datatypes::Charset &cs,
 template<typename T>
 bool LBIDList::checkSingleValue(T min, T max, T value,
-                                execplan::CalpontSystemCatalog::ColDataType type)
+                                const execplan::CalpontSystemCatalog::ColType & type)
 {
-    if (isCharType(type))
+    if (isCharType(type.colDataType))
    {
        // MCOL-641 LBIDList::CasualPartitionDataType() returns false if
        // width > 8 for a character type, so T cannot be int128_t here
-        uint64_t mmin = order_swap(min);
+        datatypes::Charset cs(const_cast<execplan::CalpontSystemCatalog::ColType&>(type).getCharset());
-        uint64_t mmax = order_swap(max);
+        return datatypes::TCharShort::strnncollsp(cs, value, min) >= 0 &&
-        uint64_t vvalue = order_swap(value);
+               datatypes::TCharShort::strnncollsp(cs, value, max) <= 0;
        return (vvalue >= mmin && vvalue <= mmax);
    }
-    else if (isUnsigned(type))
+    else if (isUnsigned(type.colDataType))
    {
        return (static_cast<uint64_t>(value) >= static_cast<uint64_t>(min) &&
                static_cast<uint64_t>(value) <= static_cast<uint64_t>(max));
@@ -726,19 +716,17 @@ bool LBIDList::checkSingleValue(T min, T max, T value,
 template<typename T>
 bool LBIDList::checkRangeOverlap(T min, T max, T tmin, T tmax,
-                                 execplan::CalpontSystemCatalog::ColDataType type)
+                                 const execplan::CalpontSystemCatalog::ColType & type)
 {
-    if (isCharType(type))
+    if (isCharType(type.colDataType))
    {
        // MCOL-641 LBIDList::CasualPartitionDataType() returns false if
        // width > 8 for a character type, so T cannot be int128_t here
-        uint64_t min2 = order_swap(min);
+        datatypes::Charset cs(const_cast<execplan::CalpontSystemCatalog::ColType&>(type).getCharset());
-        uint64_t max2 = order_swap(max);
+        return datatypes::TCharShort::strnncollsp(cs, tmin, max) <= 0 &&
-        uint64_t tmin2 = order_swap(tmin);
+               datatypes::TCharShort::strnncollsp(cs, tmax, min) >= 0;
        uint64_t tmax2 = order_swap(tmax);
        return (tmin2 <= max2 && tmax2 >= min2);
    }
-    else if (isUnsigned(type))
+    else if (isUnsigned(type.colDataType))
    {
        return (static_cast<uint64_t>(tmin) <= static_cast<uint64_t>(max) &&
                static_cast<uint64_t>(tmax) >= static_cast<uint64_t>(min));
@@ -964,27 +952,27 @@ bool LBIDList::GetMinMax<int64_t>(int64_t* min, int64_t* max, int64_t* seq,
 template
 void LBIDList::UpdateMinMax<int128_t>(int128_t min, int128_t max, int64_t lbid,
-                                      execplan::CalpontSystemCatalog::ColDataType type, bool validData = true);
+                                      const execplan::CalpontSystemCatalog::ColType & type, bool validData = true);
 template
 void LBIDList::UpdateMinMax<int64_t>(int64_t min, int64_t max, int64_t lbid,
-                                     execplan::CalpontSystemCatalog::ColDataType type, bool validData = true);
+                                     const execplan::CalpontSystemCatalog::ColType & type, bool validData = true);
 template
 bool LBIDList::checkSingleValue<int128_t>(int128_t min, int128_t max, int128_t value,
-                                execplan::CalpontSystemCatalog::ColDataType type);
+                                const execplan::CalpontSystemCatalog::ColType & type);
 template
 bool LBIDList::checkSingleValue<int64_t>(int64_t min, int64_t max, int64_t value,
-                               execplan::CalpontSystemCatalog::ColDataType type);
+                               const execplan::CalpontSystemCatalog::ColType & type);
 template
 bool LBIDList::checkRangeOverlap<int128_t>(int128_t min, int128_t max, int128_t tmin, int128_t tmax,
-                                 execplan::CalpontSystemCatalog::ColDataType type);
+                                 const execplan::CalpontSystemCatalog::ColType & type);
 template
 bool LBIDList::checkRangeOverlap<int64_t>(int64_t min, int64_t max, int64_t tmin, int64_t tmax,
-                                execplan::CalpontSystemCatalog::ColDataType type);
+                                const execplan::CalpontSystemCatalog::ColType & type);
 } //namespace joblist
--- a/dbcon/joblist/lbidlist.h
+++ b/dbcon/joblist/lbidlist.h
@@ -104,7 +104,7 @@ public:
    template <typename T>
    void UpdateMinMax(T min, T max, int64_t lbid,
-                      execplan::CalpontSystemCatalog::ColDataType type, bool validData = true);
+                      const execplan::CalpontSystemCatalog::ColType & type, bool validData = true);
    void UpdateAllPartitionInfo(const execplan::CalpontSystemCatalog::ColType& colType);
@@ -118,11 +118,11 @@ public:
    template<typename T>
    bool checkSingleValue(T min, T max, T value,
-                          execplan::CalpontSystemCatalog::ColDataType type);
+                          const execplan::CalpontSystemCatalog::ColType & type);
    template<typename T>
    bool checkRangeOverlap(T min, T max, T tmin, T tmax,
-                           execplan::CalpontSystemCatalog::ColDataType type);
+                           const execplan::CalpontSystemCatalog::ColType & type);
    // check the column data type and the column size to determine if it
    // is a data type  to apply casual paritioning.
--- a/dbcon/joblist/tuple-bps.cpp
+++ b/dbcon/joblist/tuple-bps.cpp
@@ -2396,12 +2396,12 @@ void TupleBPS::receiveMultiPrimitiveMessages(uint32_t threadID)
                {
                    if (fColType.colWidth > 8)
                    {
-                        lbidList->UpdateMinMax(cpv[i].bigMin, cpv[i].bigMax, cpv[i].LBID, fColType.colDataType,
+                        lbidList->UpdateMinMax(cpv[i].bigMin, cpv[i].bigMax, cpv[i].LBID, fColType,
                                               cpv[i].valid);
                    }
                    else
                    {
-                        lbidList->UpdateMinMax(cpv[i].min, cpv[i].max, cpv[i].LBID, fColType.colDataType,
+                        lbidList->UpdateMinMax(cpv[i].min, cpv[i].max, cpv[i].LBID, fColType,
                                               cpv[i].valid);
                    }
                }
@@ -3289,12 +3289,12 @@ void TupleBPS::addCPPredicates(uint32_t OID, const vector<int128_t>& vals, bool
                            if (!isSmallSideWideDecimal)
                            {
                                runtimeCPFlags[j] = ll.checkRangeOverlap(min, max, (int64_t) vals[0], (int64_t) vals[1],
-                                                    colType.colDataType) && runtimeCPFlags[j];
+                                                    colType) && runtimeCPFlags[j];
                            }
                            else
                            {
                                runtimeCPFlags[j] = ll.checkRangeOverlap((int128_t) min, (int128_t) max, vals[0], vals[1],
-                                                    colType.colDataType) && runtimeCPFlags[j];
+                                                    colType) && runtimeCPFlags[j];
                            }
                        }
                        else
@@ -3306,12 +3306,12 @@ void TupleBPS::addCPPredicates(uint32_t OID, const vector<int128_t>& vals, bool
                                if (!isSmallSideWideDecimal)
                                {
                                    intersection = intersection ||
-                                                   ll.checkSingleValue(min, max, (int64_t) vals[k], colType.colDataType);
+                                                   ll.checkSingleValue(min, max, (int64_t) vals[k], colType);
                                }
                                else
                                {
                                    intersection = intersection ||
-                                                   ll.checkSingleValue((int128_t) min, (int128_t) max, vals[k], colType.colDataType);
+                                                   ll.checkSingleValue((int128_t) min, (int128_t) max, vals[k], colType);
                                }
                            }
@@ -3331,14 +3331,14 @@ void TupleBPS::addCPPredicates(uint32_t OID, const vector<int128_t>& vals, bool
                    {
                        if (isRange)
                            runtimeCPFlags[j] = ll.checkRangeOverlap(bigMin, bigMax, vals[0], vals[1],
-                                                colType.colDataType) && runtimeCPFlags[j];
+                                                colType) && runtimeCPFlags[j];
                        else
                        {
                            intersection = false;
                            for (k = 0; k < vals.size(); k++)
                                intersection = intersection ||
-                                               ll.checkSingleValue(bigMin, bigMax, vals[k], colType.colDataType);
+                                               ll.checkSingleValue(bigMin, bigMax, vals[k], colType);
                            runtimeCPFlags[j] = intersection && runtimeCPFlags[j];
                        }
--- a/utils/joiner/tuplejoiner.cpp
+++ b/utils/joiner/tuplejoiner.cpp
@@ -30,6 +30,7 @@
 #include "lbidlist.h"
 #include "spinlock.h"
 #include "vlarray.h"
 #include "mcs_string.h"
 using namespace std;
@@ -1127,15 +1128,16 @@ void TupleJoiner::updateCPData(const Row& r)
        if (r.isCharType(colIdx))
        {
            datatypes::Charset cs(r.getCharset(col));
            int64_t val = r.getIntField(colIdx);
-            if (order_swap(val) < order_swap((int64_t) min) ||
+            if (datatypes::TCharShort::strnncollsp(cs, val, min) < 0 ||
                    ((int64_t) min) == numeric_limits<int64_t>::max())
            {
                min = val;
            }
-            if (order_swap(val) > order_swap((int64_t) max) ||
+            if (datatypes::TCharShort::strnncollsp(cs, val, max) > 0 ||
                    ((int64_t) max) == numeric_limits<int64_t>::min())
            {
                max = val;
--- a/utils/joiner/tuplejoiner.h
+++ b/utils/joiner/tuplejoiner.h
@@ -44,18 +44,6 @@
 namespace joiner
 {
 inline uint64_t order_swap(uint64_t x)
 {
    return (x >> 56) |
           ((x << 40) & 0x00FF000000000000ULL) |
           ((x << 24) & 0x0000FF0000000000ULL) |
           ((x << 8)  & 0x000000FF00000000ULL) |
           ((x >> 8)  & 0x00000000FF000000ULL) |
           ((x >> 24) & 0x0000000000FF0000ULL) |
           ((x >> 40) & 0x000000000000FF00ULL) |
           (x << 56);
 }
 class TypelessData
 {
 public: