From 17e954db7de3a8cc2c68241a2a653af48ca9e516 Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Wed, 7 Mar 2018 16:56:42 +0000 Subject: [PATCH] MCOL-1246 Fix string matching for whitespace For equality string matches other engines ignore trailing whitespace (this does not apply to LIKE matches). So we should do the same. This patch trims whitespace for MIN/MAX extent elimination checks, fixed width columns and dictionary columns during equality matches against constants (SELECT * FROM t1 WHERE b = 'ABC'). --- dbcon/joblist/lbidlist.cpp | 10 +++++++++- primitives/linux-port/column.cpp | 7 +++++++ primitives/linux-port/dictionary.cpp | 6 +++++- utils/dataconvert/dataconvert.h | 13 +++++++++++++ 4 files changed, 34 insertions(+), 2 deletions(-) diff --git a/dbcon/joblist/lbidlist.cpp b/dbcon/joblist/lbidlist.cpp index 69fbbb7c2..b7cbe9e57 100644 --- a/dbcon/joblist/lbidlist.cpp +++ b/dbcon/joblist/lbidlist.cpp @@ -27,6 +27,7 @@ #include "calpontsystemcatalog.h" #include "brm.h" #include "brmtypes.h" +#include "dataconvert.h" #define IS_VERBOSE (fDebug >= 4) #define IS_DETAIL (fDebug >= 3) @@ -653,7 +654,14 @@ bool LBIDList::CasualPartitionPredicate(const int64_t Min, if (bIsChar && 1 < ct.colWidth) { - scan = compareVal(order_swap(Min), order_swap(Max), order_swap(value), + // MCOL-1246 Trim trailing whitespace for matching so that we have + // the same as InnoDB behaviour + int64_t tMin = Min; + int64_t tMax = Max; + dataconvert::DataConvert::trimWhitespace(tMin); + dataconvert::DataConvert::trimWhitespace(tMax); + + scan = compareVal(order_swap(tMin), order_swap(tMax), order_swap(value), op, lcf); // cout << "scan=" << (uint32_t) scan << endl; } diff --git a/primitives/linux-port/column.cpp b/primitives/linux-port/column.cpp index 6804c8b21..cdb70ef67 100644 --- a/primitives/linux-port/column.cpp +++ b/primitives/linux-port/column.cpp @@ -39,6 +39,7 @@ using namespace boost; #include "we_type.h" #include "stats.h" #include "primproc.h" +#include "dataconvert.h" using namespace logging; using namespace dbbc; using namespace primitives; @@ -527,7 +528,13 @@ inline bool colCompare(int64_t val1, int64_t val2, uint8_t COP, uint8_t rf, int type == CalpontSystemCatalog::TEXT) && !isNull ) { if (!regex.used && !rf) + { + // MCOL-1246 Trim trailing whitespace for matching, but not for + // regex + dataconvert::DataConvert::trimWhitespace(val1); + dataconvert::DataConvert::trimWhitespace(val2); return colCompare_(order_swap(val1), order_swap(val2), COP); + } else return colStrCompare_(order_swap(val1), order_swap(val2), COP, rf, ®ex); } diff --git a/primitives/linux-port/dictionary.cpp b/primitives/linux-port/dictionary.cpp index 0cc56df34..88d71dc3d 100644 --- a/primitives/linux-port/dictionary.cpp +++ b/primitives/linux-port/dictionary.cpp @@ -21,6 +21,7 @@ #include #include +#include #include using namespace std; @@ -164,7 +165,10 @@ void PrimitiveProcessor::p_TokenByScan(const TokenByScanRequestHeader *h, string arg_utf8; if (eqFilter) { - bool gotIt = eqFilter->find(string(sig, siglen)) != eqFilter->end(); + // MCOL-1246 Trim whitespace before match + string strData(sig, siglen); + boost::trim_right(strData); + bool gotIt = eqFilter->find(strData) != eqFilter->end(); if ((h->COP1 == COMPARE_EQ && gotIt) || (h->COP1 == COMPARE_NE && !gotIt)) goto store; diff --git a/utils/dataconvert/dataconvert.h b/utils/dataconvert/dataconvert.h index 019ebcd77..aba5a8bd8 100644 --- a/utils/dataconvert/dataconvert.h +++ b/utils/dataconvert/dataconvert.h @@ -408,6 +408,7 @@ public: static inline std::string decimalToString(int64_t value, uint8_t scale, execplan::CalpontSystemCatalog::ColDataType colDataType); static inline void decimalToString(int64_t value, uint8_t scale, char* buf, unsigned int buflen, execplan::CalpontSystemCatalog::ColDataType colDataType); static inline std::string constructRegexp(const std::string& str); + static inline void trimWhitespace(int64_t &charData); static inline bool isEscapedChar(char c) { return ('%' == c || '_' == c); } // convert string to date @@ -552,6 +553,18 @@ inline void DataConvert::decimalToString(int64_t int_val, uint8_t scale, char* b *(ptr + l1) = '.'; } +inline void DataConvert::trimWhitespace(int64_t &charData) +{ + // Trims whitespace characters off non-dict character data + char *ch_data = (char*) &charData; + for (int8_t i = 7; i > 0; i--) + { + if (isspace(ch_data[i]) || ch_data[i] == '\0') + ch_data[i] = '\0'; + else + break; + } +} //FIXME: copy/pasted from dictionary.cpp: refactor inline std::string DataConvert::constructRegexp(const std::string& str)