From 474039b6cf07fc193678ee9bc48c6136bff9085a Mon Sep 17 00:00:00 2001 From: David Hall Date: Thu, 27 Jun 2019 10:18:10 -0500 Subject: [PATCH] MCOL-1559 trim strings before compare on equal operator --- dbcon/execplan/predicateoperator.h | 1 + dbcon/joblist/jlf_execplantojoblist.cpp | 9 ++- primitives/linux-port/dictionary.cpp | 3 +- utils/funcexp/utils_utf8.h | 94 ++++++++----------------- 4 files changed, 38 insertions(+), 69 deletions(-) diff --git a/dbcon/execplan/predicateoperator.h b/dbcon/execplan/predicateoperator.h index e597f57ec..8b3a51fec 100644 --- a/dbcon/execplan/predicateoperator.h +++ b/dbcon/execplan/predicateoperator.h @@ -466,6 +466,7 @@ inline bool PredicateOperator::getBoolVal(rowgroup::Row& row, bool& isNull, Retu return false; return strTrimCompare(val1, rop->getStrVal(row, isNull)) && !isNull; +// return strCompare(val1, rop->getStrVal(row, isNull)) && !isNull; } diff --git a/dbcon/joblist/jlf_execplantojoblist.cpp b/dbcon/joblist/jlf_execplantojoblist.cpp index f3782c9d5..a95dd38cc 100644 --- a/dbcon/joblist/jlf_execplantojoblist.cpp +++ b/dbcon/joblist/jlf_execplantojoblist.cpp @@ -1635,6 +1635,8 @@ const JobStepVector doSimpleFilter(SimpleFilter* sf, JobInfo& jobInfo) } string constval(cc->constval()); + // Because, on a filter, we want to compare ignoring trailing spaces +// boost::algorithm::trim_right(constval); CalpontSystemCatalog::OID dictOid = 0; @@ -2770,7 +2772,8 @@ const JobStepVector doConstantFilter(const ConstantFilter* cf, JobInfo& jobInfo) cop = COMPARE_NIL; string value = cc->constval(); - + // Because, on a filter, we want to compare ignoring trailing spaces +// boost::algorithm::trim_right(value); pds->addFilter(cop, value); } @@ -2853,7 +2856,8 @@ const JobStepVector doConstantFilter(const ConstantFilter* cf, JobInfo& jobInfo) cop = COMPARE_NIL; string value = cc->constval(); - + // Because, on a filter, we want to compare ignoring trailing spaces +// boost::algorithm::trim_right(value); pds->addFilter(cop, value); } @@ -2960,7 +2964,6 @@ const JobStepVector doConstantFilter(const ConstantFilter* cf, JobInfo& jobInfo) int64_t value = 0; string constval = cc->constval(); - // @bug 1151 string longer than colwidth of char/varchar. uint8_t rf = 0; bool isNull = ConstantColumn::NULLDATA == cc->type(); diff --git a/primitives/linux-port/dictionary.cpp b/primitives/linux-port/dictionary.cpp index e5a334436..49ca3cff2 100644 --- a/primitives/linux-port/dictionary.cpp +++ b/primitives/linux-port/dictionary.cpp @@ -188,7 +188,8 @@ void PrimitiveProcessor::p_TokenByScan(const TokenByScanRequestHeader* h, { // MCOL-1246 Trim whitespace before match string strData(sig, siglen); - boost::trim_right_if(strData, boost::is_any_of(" ")); +// boost::trim_right_if(strData, boost::is_any_of(" ")); + boost::algorithm::trim_right(strData); bool gotIt = eqFilter->find(strData) != eqFilter->end(); if ((h->COP1 == COMPARE_EQ && gotIt) || (h->COP1 == COMPARE_NE && diff --git a/utils/funcexp/utils_utf8.h b/utils/funcexp/utils_utf8.h index 878f7aee7..03035957e 100644 --- a/utils/funcexp/utils_utf8.h +++ b/utils/funcexp/utils_utf8.h @@ -75,6 +75,8 @@ std::string idb_setlocale() } char* pLoc = setlocale(LC_ALL, systemLang.c_str()); + // MCOL-1559 also set the C++ locale + std::locale::global(std::locale(pLoc)); if (pLoc == NULL) { @@ -131,78 +133,40 @@ int idb_strcoll(const char* str1, const char* str2) } // MCOL-1559 Add a trimmed version of strcoll -// We want to compare str1 and str2 ignoring any trailing whitespace -// without making a copy of the strings (performance hit). -// I can't find any library that does this while paying attention to -// Locale. string::compare can be used to compare substrings, but -// it's a byte by byte compare. -// We find the last real character, and if a space is following, we -// temporarily replace it with a NULL, do the compare, and restore the -// original value to that spot. -// WARNING: This is not thread safe. It temporarily modifies the -// strings and assumes it is free to do so. +// The intent here is to make no copy of the original strings and +// not modify them, so we can't use trim to deal with the spaces. inline int idb_strtrimcoll(const std::string& str1, const std::string& str2) { - const std::string whitespaces (" \t\f\v\n\r"); - int rtn = 0; - char orig1; - char orig2; - char* s1 = NULL; - char* s2 = NULL; - - // Set found1 to the first whitespace char in str1 + const std::string whitespaces (" "); + const char* s1 = str1.c_str(); + const char* s2 = str2.c_str(); + // Set found1 to the last non-whitespace char in str1 std::size_t found1 = str1.find_last_not_of(whitespaces); - if (found1 == std::string::npos) // Either the string is empty or all whitespace. - { - if (strlen(str1) > 0) // Is all whitespace - found1 = 0; // First whitespace position - } - else - { - if (strlen(str1) > found1+1) - ++found1; // move to the first whitespace position - else - found1 = std::string::npos; // No trailing whitespace - } - // Save the value at found1 and set to NULL - if (found1 != std::string::npos) - { - s1 = &const_cast(str1)[found1]; - orig1 = *s1; - *s1 = 0; - } - // Set found2 to the first whitespace char in str2 std::size_t found2 = str2.find_last_not_of(whitespaces); - if (found2 == std::string::npos) // Either the string is empty or all whitespace. - { - if (strlen(str2) > 0) // Is all whitespace - found2 = 0; // First whitespace position - } - else - { - if (strlen(str2) > found2+1) - ++found2; // move to the first whitespace position - else - found2 = std::string::npos; // No trailing whitespace - } - // Save the value at found2 and set to NULL - if (found2 != std::string::npos) - { - s2 = &const_cast(str2)[found2]; - orig2 = *s2; - *s2 = 0; - } - - // Compare the trimmed strings - rtn = idb_strcoll(str1.c_str(), str2.c_str()); - // Restore the whitespace - if (s1) - *s1 = orig1; - if (s2) - *s2 = orig2; + // Are both strings empty or all whitespace? + if (found1 == std::string::npos && found2 == std::string::npos) + { + return 0; // they match + } + // If str1 is empty or all spaces + if (found1 == std::string::npos && found2 != std::string::npos) + { + return -1; + } + // If str2 is empty or all spaces + if (found1 != std::string::npos && found2 == std::string::npos) + { + return 1; + } + + // Compare the (trimmed) strings + std::locale loc; + const std::collate& coll = std::use_facet >(loc); + int rtn = coll.compare(s1, s1+found1+1, s2, s2+found2+1); +// return coll.compare(s1, s1+found1, s2, s2+found2); return rtn; }