1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-30 19:23:07 +03:00

MCOl-3536 Collation phase 2

This commit is contained in:
David Hall
2020-06-15 11:08:59 -05:00
parent 165ae4a6f3
commit d0818f2b4e
6 changed files with 40 additions and 25 deletions

View File

@ -75,12 +75,7 @@ std::string Func_replace::getStrVal(rowgroup::Row& row,
size_t pos = 0;
if (binaryCmp)
{
uint32_t i = 0;
pos = str.find(fromstr);
if (pos == string::npos)
return str;
// Count the number of fromstr in strend
// Count the number of fromstr in strend so we can reserve buffer space.
int count = 0;
do
{
@ -91,7 +86,11 @@ std::string Func_replace::getStrVal(rowgroup::Row& row,
newstr.reserve(strLen + (count * ((int)toLen - (int)fromLen)) + 1);
// Now move the stuff into newstr
uint32_t i = 0;
pos = str.find(fromstr);
if (pos == string::npos)
return str;
// Move the stuff into newstr
do
{
if (pos > i)
@ -114,12 +113,14 @@ std::string Func_replace::getStrVal(rowgroup::Row& row,
const char* from = fromstr.c_str();
const char* fromEnd = from + fromLen;
const char* to = tostr.c_str();
char* ptr = const_cast<char*>(src);
const char* ptr = src;
char *i,*j;
size_t count = 10; // Some arbitray number to reserve some space to start.
size_t growlen = count * ((int)toLen - (int)fromLen);
int growlen = (int)toLen - (int)fromLen;
growlen = growlen < 1 ? 1 : growlen;
growlen *= count;
newstr.reserve(strLen + (count * growlen) + 1);
size_t maxsize = newstr.max_size();
size_t maxsize = newstr.capacity();
uint32_t l;
// We don't know where byte patterns might match so
@ -132,7 +133,7 @@ std::string Func_replace::getStrVal(rowgroup::Row& row,
if (*ptr == *from) // If the first byte matches, maybe we have a match
{
// Do a byte by byte compare of src at that spot against from
i = ptr + 1;
i = const_cast<char*>(ptr) + 1;
j = const_cast<char*>(from) + 1;
found = true;
while (j != fromEnd)
@ -148,18 +149,19 @@ std::string Func_replace::getStrVal(rowgroup::Row& row,
{
if (ptr < i)
{
int mvsize = i - ptr;
if (newstr.length() + mvsize + toLen < maxsize)
int mvsize = ptr - src;
if (newstr.length() + mvsize + toLen > maxsize)
{
// We need a re-alloc
newstr.reserve(maxsize + growlen);
maxsize = newstr.capacity();
growlen *= 2;
}
newstr.append(ptr, mvsize);
ptr += mvsize;
newstr.append(src, ptr - src);
src += mvsize + fromLen;
ptr = src;
}
newstr.append(to, toLen);
ptr += toLen;
}
else
{
@ -170,8 +172,9 @@ std::string Func_replace::getStrVal(rowgroup::Row& row,
++ptr;
}
}
// Copy in the trailing src chars.
newstr.append(src, ptr - src);
}
return newstr;
}

View File

@ -76,10 +76,12 @@ std::string Func_rtrim::getStrVal(rowgroup::Row& row,
if (strTLen == 0 || strTLen > strLen)
return src;
bool binaryCmp = (cs->state & MY_CS_BINSORT) || !cs->use_mb();
if (binTLen == 1)
{
const char* ptr = pos;
if (cs->use_mb()) // This is a multi-byte charset
if (!binaryCmp) // This is a multi-byte charset
{
const char* p = pos;
uint32 l;
@ -110,7 +112,7 @@ std::string Func_rtrim::getStrVal(rowgroup::Row& row,
else
{
// An uncommon case where the space character is > 1 byte
if (cs->use_mb()) // This is a multi-byte charset
if (binaryCmp) // This is a multi-byte charset
{
// The problem is that the byte pattern at the end could
// match memcmp, but not be correct since the first byte compared

View File

@ -75,7 +75,7 @@ std::string Func_substr::getStrVal(rowgroup::Row& row,
int64_t length;
if (fp.size() == 3)
{
int64_t length = fp[2]->data()->getIntVal(row, isNull);
length = fp[2]->data()->getIntVal(row, isNull);
if (isNull)
return "";
if (length < 1)

View File

@ -66,7 +66,7 @@ std::string Func_substring_index::getStrVal(rowgroup::Row& row,
if (isNull)
return "";
if (strLen == 0 || delimLen == 0 || !count == 0)
if (strLen == 0 || delimLen == 0 || count == 0)
return "";
if (count > strLen)
@ -75,9 +75,10 @@ std::string Func_substring_index::getStrVal(rowgroup::Row& row,
if ((count < 0) && ((count * -1) > strLen))
return str;
bool binaryCmp = (cs->state & MY_CS_BINSORT) || !cs->use_mb();
std::string value; // Only used if !use_mb()
if (cs->use_mb()) // Charset supports multibyte characters
if (!binaryCmp) // Charset supports multibyte characters
{
const char* src = str.c_str();
const char* srcEnd = src + strLen;

View File

@ -75,6 +75,8 @@ std::string Func_trim::getStrVal(rowgroup::Row& row,
if (strTLen == 0 || strTLen > strLen)
return src;
bool binaryCmp = (cs->state & MY_CS_BINSORT) || !cs->use_mb();
if (binTLen == 1)
{
// If the trim string is 1 byte, don't waste cpu for memcmp
@ -86,7 +88,7 @@ std::string Func_trim::getStrVal(rowgroup::Row& row,
}
// Trim trailing
const char* ptr = pos;
if (cs->use_mb()) // This is a multi-byte charset
if (!binaryCmp) // This is a multi-byte charset
{
const char* p = pos;
uint32 l;
@ -124,7 +126,7 @@ std::string Func_trim::getStrVal(rowgroup::Row& row,
}
// Trim trailing
if (cs->use_mb()) // This is a multi-byte charset
if (!binaryCmp) // This is a multi-byte charset
{
// The problem is that the byte pattern at the end could
// match memcmp, but not be correct since the first byte compared

View File

@ -304,7 +304,14 @@ int StringCompare::operator()(IdbCompare* l, Row::Pointer r1, Row::Pointer r2)
if (!cs)
cs = l->rowGroup()->getCharset(fSpec.fIndex);
ret = fSpec.fAsc * cs->strnncollsp(s1, len1, s2, len2);
if (cs->state & MY_CS_BINSORT)
{
ret = fSpec.fAsc * strncmp(s1, s2, max(len1,len2));
}
else
{
ret = fSpec.fAsc * cs->strnncoll(s1, len1, s2, len2);
}
}
return ret;