From d0818f2b4efb0621d633c778307505e4040c7a2e Mon Sep 17 00:00:00 2001 From: David Hall Date: Mon, 15 Jun 2020 11:08:59 -0500 Subject: [PATCH] MCOl-3536 Collation phase 2 --- utils/funcexp/func_replace.cpp | 37 ++++++++++++++------------ utils/funcexp/func_rtrim.cpp | 6 +++-- utils/funcexp/func_substr.cpp | 2 +- utils/funcexp/func_substring_index.cpp | 5 ++-- utils/funcexp/func_trim.cpp | 6 +++-- utils/windowfunction/idborderby.cpp | 9 ++++++- 6 files changed, 40 insertions(+), 25 deletions(-) diff --git a/utils/funcexp/func_replace.cpp b/utils/funcexp/func_replace.cpp index f147f3a03..4f72ac195 100644 --- a/utils/funcexp/func_replace.cpp +++ b/utils/funcexp/func_replace.cpp @@ -75,12 +75,7 @@ std::string Func_replace::getStrVal(rowgroup::Row& row, size_t pos = 0; if (binaryCmp) { - uint32_t i = 0; - pos = str.find(fromstr); - if (pos == string::npos) - return str; - - // Count the number of fromstr in strend + // Count the number of fromstr in strend so we can reserve buffer space. int count = 0; do { @@ -91,7 +86,11 @@ std::string Func_replace::getStrVal(rowgroup::Row& row, newstr.reserve(strLen + (count * ((int)toLen - (int)fromLen)) + 1); - // Now move the stuff into newstr + uint32_t i = 0; + pos = str.find(fromstr); + if (pos == string::npos) + return str; + // Move the stuff into newstr do { if (pos > i) @@ -114,12 +113,14 @@ std::string Func_replace::getStrVal(rowgroup::Row& row, const char* from = fromstr.c_str(); const char* fromEnd = from + fromLen; const char* to = tostr.c_str(); - char* ptr = const_cast(src); + const char* ptr = src; char *i,*j; size_t count = 10; // Some arbitray number to reserve some space to start. - size_t growlen = count * ((int)toLen - (int)fromLen); + int growlen = (int)toLen - (int)fromLen; + growlen = growlen < 1 ? 1 : growlen; + growlen *= count; newstr.reserve(strLen + (count * growlen) + 1); - size_t maxsize = newstr.max_size(); + size_t maxsize = newstr.capacity(); uint32_t l; // We don't know where byte patterns might match so @@ -132,7 +133,7 @@ std::string Func_replace::getStrVal(rowgroup::Row& row, if (*ptr == *from) // If the first byte matches, maybe we have a match { // Do a byte by byte compare of src at that spot against from - i = ptr + 1; + i = const_cast(ptr) + 1; j = const_cast(from) + 1; found = true; while (j != fromEnd) @@ -148,18 +149,19 @@ std::string Func_replace::getStrVal(rowgroup::Row& row, { if (ptr < i) { - int mvsize = i - ptr; - if (newstr.length() + mvsize + toLen < maxsize) + int mvsize = ptr - src; + if (newstr.length() + mvsize + toLen > maxsize) { // We need a re-alloc newstr.reserve(maxsize + growlen); + maxsize = newstr.capacity(); growlen *= 2; } - newstr.append(ptr, mvsize); - ptr += mvsize; + newstr.append(src, ptr - src); + src += mvsize + fromLen; + ptr = src; } newstr.append(to, toLen); - ptr += toLen; } else { @@ -170,8 +172,9 @@ std::string Func_replace::getStrVal(rowgroup::Row& row, ++ptr; } } + // Copy in the trailing src chars. + newstr.append(src, ptr - src); } - return newstr; } diff --git a/utils/funcexp/func_rtrim.cpp b/utils/funcexp/func_rtrim.cpp index 77dc22424..bddafefbf 100644 --- a/utils/funcexp/func_rtrim.cpp +++ b/utils/funcexp/func_rtrim.cpp @@ -76,10 +76,12 @@ std::string Func_rtrim::getStrVal(rowgroup::Row& row, if (strTLen == 0 || strTLen > strLen) return src; + bool binaryCmp = (cs->state & MY_CS_BINSORT) || !cs->use_mb(); + if (binTLen == 1) { const char* ptr = pos; - if (cs->use_mb()) // This is a multi-byte charset + if (!binaryCmp) // This is a multi-byte charset { const char* p = pos; uint32 l; @@ -110,7 +112,7 @@ std::string Func_rtrim::getStrVal(rowgroup::Row& row, else { // An uncommon case where the space character is > 1 byte - if (cs->use_mb()) // This is a multi-byte charset + if (binaryCmp) // This is a multi-byte charset { // The problem is that the byte pattern at the end could // match memcmp, but not be correct since the first byte compared diff --git a/utils/funcexp/func_substr.cpp b/utils/funcexp/func_substr.cpp index 6fdb5c16a..ddc0644b7 100644 --- a/utils/funcexp/func_substr.cpp +++ b/utils/funcexp/func_substr.cpp @@ -75,7 +75,7 @@ std::string Func_substr::getStrVal(rowgroup::Row& row, int64_t length; if (fp.size() == 3) { - int64_t length = fp[2]->data()->getIntVal(row, isNull); + length = fp[2]->data()->getIntVal(row, isNull); if (isNull) return ""; if (length < 1) diff --git a/utils/funcexp/func_substring_index.cpp b/utils/funcexp/func_substring_index.cpp index 0fb9efd38..da6e5d12d 100644 --- a/utils/funcexp/func_substring_index.cpp +++ b/utils/funcexp/func_substring_index.cpp @@ -66,7 +66,7 @@ std::string Func_substring_index::getStrVal(rowgroup::Row& row, if (isNull) return ""; - if (strLen == 0 || delimLen == 0 || !count == 0) + if (strLen == 0 || delimLen == 0 || count == 0) return ""; if (count > strLen) @@ -75,9 +75,10 @@ std::string Func_substring_index::getStrVal(rowgroup::Row& row, if ((count < 0) && ((count * -1) > strLen)) return str; + bool binaryCmp = (cs->state & MY_CS_BINSORT) || !cs->use_mb(); std::string value; // Only used if !use_mb() - if (cs->use_mb()) // Charset supports multibyte characters + if (!binaryCmp) // Charset supports multibyte characters { const char* src = str.c_str(); const char* srcEnd = src + strLen; diff --git a/utils/funcexp/func_trim.cpp b/utils/funcexp/func_trim.cpp index 005183fcd..13482de45 100644 --- a/utils/funcexp/func_trim.cpp +++ b/utils/funcexp/func_trim.cpp @@ -75,6 +75,8 @@ std::string Func_trim::getStrVal(rowgroup::Row& row, if (strTLen == 0 || strTLen > strLen) return src; + bool binaryCmp = (cs->state & MY_CS_BINSORT) || !cs->use_mb(); + if (binTLen == 1) { // If the trim string is 1 byte, don't waste cpu for memcmp @@ -86,7 +88,7 @@ std::string Func_trim::getStrVal(rowgroup::Row& row, } // Trim trailing const char* ptr = pos; - if (cs->use_mb()) // This is a multi-byte charset + if (!binaryCmp) // This is a multi-byte charset { const char* p = pos; uint32 l; @@ -124,7 +126,7 @@ std::string Func_trim::getStrVal(rowgroup::Row& row, } // Trim trailing - if (cs->use_mb()) // This is a multi-byte charset + if (!binaryCmp) // This is a multi-byte charset { // The problem is that the byte pattern at the end could // match memcmp, but not be correct since the first byte compared diff --git a/utils/windowfunction/idborderby.cpp b/utils/windowfunction/idborderby.cpp index cb37796a6..f92e1a5c0 100644 --- a/utils/windowfunction/idborderby.cpp +++ b/utils/windowfunction/idborderby.cpp @@ -304,7 +304,14 @@ int StringCompare::operator()(IdbCompare* l, Row::Pointer r1, Row::Pointer r2) if (!cs) cs = l->rowGroup()->getCharset(fSpec.fIndex); - ret = fSpec.fAsc * cs->strnncollsp(s1, len1, s2, len2); + if (cs->state & MY_CS_BINSORT) + { + ret = fSpec.fAsc * strncmp(s1, s2, max(len1,len2)); + } + else + { + ret = fSpec.fAsc * cs->strnncoll(s1, len1, s2, len2); + } } return ret;