MCOL-1559 trim strings before compare on equal operator

2025-07-30 19:23:07 +03:00 · 2019-06-27 10:18:10 -05:00
parent 567de10204
commit 474039b6cf
4 changed files with 38 additions and 69 deletions
--- a/dbcon/execplan/predicateoperator.h
+++ b/dbcon/execplan/predicateoperator.h
@ -466,6 +466,7 @@ inline bool PredicateOperator::getBoolVal(rowgroup::Row& row, bool& isNull, Retu
                return false;
            return strTrimCompare(val1, rop->getStrVal(row, isNull)) && !isNull;
 //            return strCompare(val1, rop->getStrVal(row, isNull)) && !isNull;
        }
--- a/dbcon/joblist/jlf_execplantojoblist.cpp
+++ b/dbcon/joblist/jlf_execplantojoblist.cpp
@ -1635,6 +1635,8 @@ const JobStepVector doSimpleFilter(SimpleFilter* sf, JobInfo& jobInfo)
        }
        string constval(cc->constval());
        // Because, on a filter, we want to compare ignoring trailing spaces
 //        boost::algorithm::trim_right(constval);
        CalpontSystemCatalog::OID dictOid = 0;
@ -2770,7 +2772,8 @@ const JobStepVector doConstantFilter(const ConstantFilter* cf, JobInfo& jobInfo)
                        cop = COMPARE_NIL;
                    string value = cc->constval();
-
+                    // Because, on a filter, we want to compare ignoring trailing spaces
 //                    boost::algorithm::trim_right(value);
                    pds->addFilter(cop, value);
                }
@ -2853,7 +2856,8 @@ const JobStepVector doConstantFilter(const ConstantFilter* cf, JobInfo& jobInfo)
                        cop = COMPARE_NIL;
                    string value = cc->constval();
-
+                    // Because, on a filter, we want to compare ignoring trailing spaces
 //                    boost::algorithm::trim_right(value);
                    pds->addFilter(cop, value);
                }
@ -2960,7 +2964,6 @@ const JobStepVector doConstantFilter(const ConstantFilter* cf, JobInfo& jobInfo)
                    int64_t value = 0;
                    string constval = cc->constval();
                    // @bug 1151 string longer than colwidth of char/varchar.
                    uint8_t rf = 0;
                    bool isNull = ConstantColumn::NULLDATA == cc->type();
--- a/primitives/linux-port/dictionary.cpp
+++ b/primitives/linux-port/dictionary.cpp
@ -188,7 +188,8 @@ void PrimitiveProcessor::p_TokenByScan(const TokenByScanRequestHeader* h,
        {
            // MCOL-1246 Trim whitespace before match
            string strData(sig, siglen);
-            boost::trim_right_if(strData, boost::is_any_of(" "));
+//            boost::trim_right_if(strData, boost::is_any_of(" "));
            boost::algorithm::trim_right(strData);
            bool gotIt = eqFilter->find(strData) != eqFilter->end();
            if ((h->COP1 == COMPARE_EQ && gotIt) || (h->COP1 == COMPARE_NE &&
--- a/utils/funcexp/utils_utf8.h
+++ b/utils/funcexp/utils_utf8.h
@ -75,6 +75,8 @@ std::string idb_setlocale()
    }
    char* pLoc = setlocale(LC_ALL, systemLang.c_str());
    // MCOL-1559 also set the C++ locale
    std::locale::global(std::locale(pLoc));
    if (pLoc == NULL)
    {
@ -131,78 +133,40 @@ int idb_strcoll(const char* str1, const char* str2)
 }
 // MCOL-1559 Add a trimmed version of strcoll
-// We want to compare str1 and str2 ignoring any trailing whitespace
+// The intent here is to make no copy of the original strings and
-// without making a copy of the strings (performance hit).
+// not modify them, so we can't use trim to deal with the spaces.
 // I can't find any library that does this while paying attention to
 // Locale. string::compare can be used to compare substrings, but
 // it's a byte by byte compare.
 // We find the last real character, and if a space is following, we
 // temporarily replace it with a NULL, do the compare, and restore the
 // original value to that spot. 
 // WARNING: This is not thread safe. It temporarily modifies the
 // strings and assumes it is free to do so.
 inline
 int idb_strtrimcoll(const std::string& str1, const std::string& str2)
 {
-    const std::string whitespaces (" \t\f\v\n\r");
+    const std::string whitespaces (" ");
-    int rtn = 0;
+    const char* s1 = str1.c_str();
-    char orig1;
+    const char* s2 = str2.c_str();
-    char orig2;
+    // Set found1 to the last non-whitespace char in str1
    char* s1 = NULL;
    char* s2 = NULL;
    // Set found1 to the first whitespace char in str1
    std::size_t found1 = str1.find_last_not_of(whitespaces);
    if (found1 == std::string::npos) // Either the string is empty or all whitespace.
    {
        if (strlen(str1) > 0) // Is all whitespace
            found1 = 0;       // First whitespace position
    }
    else
    {
        if (strlen(str1) > found1+1)
            ++found1; // move to the first whitespace position
        else
            found1 = std::string::npos; // No trailing whitespace
    }
    // Save the value at found1 and set to NULL
    if (found1 != std::string::npos)
    {
        s1 = &const_cast<std::string&>(str1)[found1];
        orig1 = *s1;
        *s1 = 0;
    }
    // Set found2 to the first whitespace char in str2
    std::size_t found2 = str2.find_last_not_of(whitespaces);
    if (found2 == std::string::npos) // Either the string is empty or all whitespace.
    {
        if (strlen(str2) > 0) // Is all whitespace
            found2 = 0;       // First whitespace position
    }
    else
    {
        if (strlen(str2) > found2+1)
            ++found2; // move to the first whitespace position
        else
            found2 = std::string::npos; // No trailing whitespace
    }
    // Save the value at found2 and set to NULL
    if (found2 != std::string::npos)
    {
        s2 = &const_cast<std::string&>(str2)[found2];
        orig2 = *s2;
        *s2 = 0;
    }
    // Compare the trimmed strings
    rtn = idb_strcoll(str1.c_str(), str2.c_str());
-    // Restore the whitespace
+     // Are both strings empty or all whitespace?
-    if (s1)
+    if (found1 == std::string::npos && found2 == std::string::npos)
-        *s1 = orig1;
+    {
-    if (s2)
+        return 0; // they match
-        *s2 = orig2;
+    }
    // If str1 is empty or all spaces
    if (found1 == std::string::npos && found2 != std::string::npos)
    {
        return -1;
    }
    // If str2 is empty or all spaces
    if (found1 != std::string::npos && found2 == std::string::npos)
    {
        return 1;
    }
    // Compare the (trimmed) strings
    std::locale loc;
    const std::collate<char>& coll = std::use_facet<std::collate<char> >(loc);
    int rtn = coll.compare(s1, s1+found1+1, s2, s2+found2+1);
 //    return coll.compare(s1, s1+found1, s2, s2+found2);
    return rtn;
 }