1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-30 19:23:07 +03:00

MCOl-3536 Collation phase 2

This commit is contained in:
David Hall
2020-06-15 11:08:59 -05:00
parent 165ae4a6f3
commit d0818f2b4e
6 changed files with 40 additions and 25 deletions

View File

@ -75,12 +75,7 @@ std::string Func_replace::getStrVal(rowgroup::Row& row,
size_t pos = 0; size_t pos = 0;
if (binaryCmp) if (binaryCmp)
{ {
uint32_t i = 0; // Count the number of fromstr in strend so we can reserve buffer space.
pos = str.find(fromstr);
if (pos == string::npos)
return str;
// Count the number of fromstr in strend
int count = 0; int count = 0;
do do
{ {
@ -91,7 +86,11 @@ std::string Func_replace::getStrVal(rowgroup::Row& row,
newstr.reserve(strLen + (count * ((int)toLen - (int)fromLen)) + 1); newstr.reserve(strLen + (count * ((int)toLen - (int)fromLen)) + 1);
// Now move the stuff into newstr uint32_t i = 0;
pos = str.find(fromstr);
if (pos == string::npos)
return str;
// Move the stuff into newstr
do do
{ {
if (pos > i) if (pos > i)
@ -114,12 +113,14 @@ std::string Func_replace::getStrVal(rowgroup::Row& row,
const char* from = fromstr.c_str(); const char* from = fromstr.c_str();
const char* fromEnd = from + fromLen; const char* fromEnd = from + fromLen;
const char* to = tostr.c_str(); const char* to = tostr.c_str();
char* ptr = const_cast<char*>(src); const char* ptr = src;
char *i,*j; char *i,*j;
size_t count = 10; // Some arbitray number to reserve some space to start. size_t count = 10; // Some arbitray number to reserve some space to start.
size_t growlen = count * ((int)toLen - (int)fromLen); int growlen = (int)toLen - (int)fromLen;
growlen = growlen < 1 ? 1 : growlen;
growlen *= count;
newstr.reserve(strLen + (count * growlen) + 1); newstr.reserve(strLen + (count * growlen) + 1);
size_t maxsize = newstr.max_size(); size_t maxsize = newstr.capacity();
uint32_t l; uint32_t l;
// We don't know where byte patterns might match so // We don't know where byte patterns might match so
@ -132,7 +133,7 @@ std::string Func_replace::getStrVal(rowgroup::Row& row,
if (*ptr == *from) // If the first byte matches, maybe we have a match if (*ptr == *from) // If the first byte matches, maybe we have a match
{ {
// Do a byte by byte compare of src at that spot against from // Do a byte by byte compare of src at that spot against from
i = ptr + 1; i = const_cast<char*>(ptr) + 1;
j = const_cast<char*>(from) + 1; j = const_cast<char*>(from) + 1;
found = true; found = true;
while (j != fromEnd) while (j != fromEnd)
@ -148,18 +149,19 @@ std::string Func_replace::getStrVal(rowgroup::Row& row,
{ {
if (ptr < i) if (ptr < i)
{ {
int mvsize = i - ptr; int mvsize = ptr - src;
if (newstr.length() + mvsize + toLen < maxsize) if (newstr.length() + mvsize + toLen > maxsize)
{ {
// We need a re-alloc // We need a re-alloc
newstr.reserve(maxsize + growlen); newstr.reserve(maxsize + growlen);
maxsize = newstr.capacity();
growlen *= 2; growlen *= 2;
} }
newstr.append(ptr, mvsize); newstr.append(src, ptr - src);
ptr += mvsize; src += mvsize + fromLen;
ptr = src;
} }
newstr.append(to, toLen); newstr.append(to, toLen);
ptr += toLen;
} }
else else
{ {
@ -170,8 +172,9 @@ std::string Func_replace::getStrVal(rowgroup::Row& row,
++ptr; ++ptr;
} }
} }
// Copy in the trailing src chars.
newstr.append(src, ptr - src);
} }
return newstr; return newstr;
} }

View File

@ -76,10 +76,12 @@ std::string Func_rtrim::getStrVal(rowgroup::Row& row,
if (strTLen == 0 || strTLen > strLen) if (strTLen == 0 || strTLen > strLen)
return src; return src;
bool binaryCmp = (cs->state & MY_CS_BINSORT) || !cs->use_mb();
if (binTLen == 1) if (binTLen == 1)
{ {
const char* ptr = pos; const char* ptr = pos;
if (cs->use_mb()) // This is a multi-byte charset if (!binaryCmp) // This is a multi-byte charset
{ {
const char* p = pos; const char* p = pos;
uint32 l; uint32 l;
@ -110,7 +112,7 @@ std::string Func_rtrim::getStrVal(rowgroup::Row& row,
else else
{ {
// An uncommon case where the space character is > 1 byte // An uncommon case where the space character is > 1 byte
if (cs->use_mb()) // This is a multi-byte charset if (binaryCmp) // This is a multi-byte charset
{ {
// The problem is that the byte pattern at the end could // The problem is that the byte pattern at the end could
// match memcmp, but not be correct since the first byte compared // match memcmp, but not be correct since the first byte compared

View File

@ -75,7 +75,7 @@ std::string Func_substr::getStrVal(rowgroup::Row& row,
int64_t length; int64_t length;
if (fp.size() == 3) if (fp.size() == 3)
{ {
int64_t length = fp[2]->data()->getIntVal(row, isNull); length = fp[2]->data()->getIntVal(row, isNull);
if (isNull) if (isNull)
return ""; return "";
if (length < 1) if (length < 1)

View File

@ -66,7 +66,7 @@ std::string Func_substring_index::getStrVal(rowgroup::Row& row,
if (isNull) if (isNull)
return ""; return "";
if (strLen == 0 || delimLen == 0 || !count == 0) if (strLen == 0 || delimLen == 0 || count == 0)
return ""; return "";
if (count > strLen) if (count > strLen)
@ -75,9 +75,10 @@ std::string Func_substring_index::getStrVal(rowgroup::Row& row,
if ((count < 0) && ((count * -1) > strLen)) if ((count < 0) && ((count * -1) > strLen))
return str; return str;
bool binaryCmp = (cs->state & MY_CS_BINSORT) || !cs->use_mb();
std::string value; // Only used if !use_mb() std::string value; // Only used if !use_mb()
if (cs->use_mb()) // Charset supports multibyte characters if (!binaryCmp) // Charset supports multibyte characters
{ {
const char* src = str.c_str(); const char* src = str.c_str();
const char* srcEnd = src + strLen; const char* srcEnd = src + strLen;

View File

@ -75,6 +75,8 @@ std::string Func_trim::getStrVal(rowgroup::Row& row,
if (strTLen == 0 || strTLen > strLen) if (strTLen == 0 || strTLen > strLen)
return src; return src;
bool binaryCmp = (cs->state & MY_CS_BINSORT) || !cs->use_mb();
if (binTLen == 1) if (binTLen == 1)
{ {
// If the trim string is 1 byte, don't waste cpu for memcmp // If the trim string is 1 byte, don't waste cpu for memcmp
@ -86,7 +88,7 @@ std::string Func_trim::getStrVal(rowgroup::Row& row,
} }
// Trim trailing // Trim trailing
const char* ptr = pos; const char* ptr = pos;
if (cs->use_mb()) // This is a multi-byte charset if (!binaryCmp) // This is a multi-byte charset
{ {
const char* p = pos; const char* p = pos;
uint32 l; uint32 l;
@ -124,7 +126,7 @@ std::string Func_trim::getStrVal(rowgroup::Row& row,
} }
// Trim trailing // Trim trailing
if (cs->use_mb()) // This is a multi-byte charset if (!binaryCmp) // This is a multi-byte charset
{ {
// The problem is that the byte pattern at the end could // The problem is that the byte pattern at the end could
// match memcmp, but not be correct since the first byte compared // match memcmp, but not be correct since the first byte compared

View File

@ -304,7 +304,14 @@ int StringCompare::operator()(IdbCompare* l, Row::Pointer r1, Row::Pointer r2)
if (!cs) if (!cs)
cs = l->rowGroup()->getCharset(fSpec.fIndex); cs = l->rowGroup()->getCharset(fSpec.fIndex);
ret = fSpec.fAsc * cs->strnncollsp(s1, len1, s2, len2); if (cs->state & MY_CS_BINSORT)
{
ret = fSpec.fAsc * strncmp(s1, s2, max(len1,len2));
}
else
{
ret = fSpec.fAsc * cs->strnncoll(s1, len1, s2, len2);
}
} }
return ret; return ret;