You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-07-30 19:23:07 +03:00
MCOl-3536 Collation phase 2
This commit is contained in:
@ -75,12 +75,7 @@ std::string Func_replace::getStrVal(rowgroup::Row& row,
|
|||||||
size_t pos = 0;
|
size_t pos = 0;
|
||||||
if (binaryCmp)
|
if (binaryCmp)
|
||||||
{
|
{
|
||||||
uint32_t i = 0;
|
// Count the number of fromstr in strend so we can reserve buffer space.
|
||||||
pos = str.find(fromstr);
|
|
||||||
if (pos == string::npos)
|
|
||||||
return str;
|
|
||||||
|
|
||||||
// Count the number of fromstr in strend
|
|
||||||
int count = 0;
|
int count = 0;
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
@ -91,7 +86,11 @@ std::string Func_replace::getStrVal(rowgroup::Row& row,
|
|||||||
|
|
||||||
newstr.reserve(strLen + (count * ((int)toLen - (int)fromLen)) + 1);
|
newstr.reserve(strLen + (count * ((int)toLen - (int)fromLen)) + 1);
|
||||||
|
|
||||||
// Now move the stuff into newstr
|
uint32_t i = 0;
|
||||||
|
pos = str.find(fromstr);
|
||||||
|
if (pos == string::npos)
|
||||||
|
return str;
|
||||||
|
// Move the stuff into newstr
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
if (pos > i)
|
if (pos > i)
|
||||||
@ -114,12 +113,14 @@ std::string Func_replace::getStrVal(rowgroup::Row& row,
|
|||||||
const char* from = fromstr.c_str();
|
const char* from = fromstr.c_str();
|
||||||
const char* fromEnd = from + fromLen;
|
const char* fromEnd = from + fromLen;
|
||||||
const char* to = tostr.c_str();
|
const char* to = tostr.c_str();
|
||||||
char* ptr = const_cast<char*>(src);
|
const char* ptr = src;
|
||||||
char *i,*j;
|
char *i,*j;
|
||||||
size_t count = 10; // Some arbitray number to reserve some space to start.
|
size_t count = 10; // Some arbitray number to reserve some space to start.
|
||||||
size_t growlen = count * ((int)toLen - (int)fromLen);
|
int growlen = (int)toLen - (int)fromLen;
|
||||||
|
growlen = growlen < 1 ? 1 : growlen;
|
||||||
|
growlen *= count;
|
||||||
newstr.reserve(strLen + (count * growlen) + 1);
|
newstr.reserve(strLen + (count * growlen) + 1);
|
||||||
size_t maxsize = newstr.max_size();
|
size_t maxsize = newstr.capacity();
|
||||||
uint32_t l;
|
uint32_t l;
|
||||||
|
|
||||||
// We don't know where byte patterns might match so
|
// We don't know where byte patterns might match so
|
||||||
@ -132,7 +133,7 @@ std::string Func_replace::getStrVal(rowgroup::Row& row,
|
|||||||
if (*ptr == *from) // If the first byte matches, maybe we have a match
|
if (*ptr == *from) // If the first byte matches, maybe we have a match
|
||||||
{
|
{
|
||||||
// Do a byte by byte compare of src at that spot against from
|
// Do a byte by byte compare of src at that spot against from
|
||||||
i = ptr + 1;
|
i = const_cast<char*>(ptr) + 1;
|
||||||
j = const_cast<char*>(from) + 1;
|
j = const_cast<char*>(from) + 1;
|
||||||
found = true;
|
found = true;
|
||||||
while (j != fromEnd)
|
while (j != fromEnd)
|
||||||
@ -148,18 +149,19 @@ std::string Func_replace::getStrVal(rowgroup::Row& row,
|
|||||||
{
|
{
|
||||||
if (ptr < i)
|
if (ptr < i)
|
||||||
{
|
{
|
||||||
int mvsize = i - ptr;
|
int mvsize = ptr - src;
|
||||||
if (newstr.length() + mvsize + toLen < maxsize)
|
if (newstr.length() + mvsize + toLen > maxsize)
|
||||||
{
|
{
|
||||||
// We need a re-alloc
|
// We need a re-alloc
|
||||||
newstr.reserve(maxsize + growlen);
|
newstr.reserve(maxsize + growlen);
|
||||||
|
maxsize = newstr.capacity();
|
||||||
growlen *= 2;
|
growlen *= 2;
|
||||||
}
|
}
|
||||||
newstr.append(ptr, mvsize);
|
newstr.append(src, ptr - src);
|
||||||
ptr += mvsize;
|
src += mvsize + fromLen;
|
||||||
|
ptr = src;
|
||||||
}
|
}
|
||||||
newstr.append(to, toLen);
|
newstr.append(to, toLen);
|
||||||
ptr += toLen;
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -170,8 +172,9 @@ std::string Func_replace::getStrVal(rowgroup::Row& row,
|
|||||||
++ptr;
|
++ptr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Copy in the trailing src chars.
|
||||||
|
newstr.append(src, ptr - src);
|
||||||
}
|
}
|
||||||
|
|
||||||
return newstr;
|
return newstr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -76,10 +76,12 @@ std::string Func_rtrim::getStrVal(rowgroup::Row& row,
|
|||||||
if (strTLen == 0 || strTLen > strLen)
|
if (strTLen == 0 || strTLen > strLen)
|
||||||
return src;
|
return src;
|
||||||
|
|
||||||
|
bool binaryCmp = (cs->state & MY_CS_BINSORT) || !cs->use_mb();
|
||||||
|
|
||||||
if (binTLen == 1)
|
if (binTLen == 1)
|
||||||
{
|
{
|
||||||
const char* ptr = pos;
|
const char* ptr = pos;
|
||||||
if (cs->use_mb()) // This is a multi-byte charset
|
if (!binaryCmp) // This is a multi-byte charset
|
||||||
{
|
{
|
||||||
const char* p = pos;
|
const char* p = pos;
|
||||||
uint32 l;
|
uint32 l;
|
||||||
@ -110,7 +112,7 @@ std::string Func_rtrim::getStrVal(rowgroup::Row& row,
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
// An uncommon case where the space character is > 1 byte
|
// An uncommon case where the space character is > 1 byte
|
||||||
if (cs->use_mb()) // This is a multi-byte charset
|
if (binaryCmp) // This is a multi-byte charset
|
||||||
{
|
{
|
||||||
// The problem is that the byte pattern at the end could
|
// The problem is that the byte pattern at the end could
|
||||||
// match memcmp, but not be correct since the first byte compared
|
// match memcmp, but not be correct since the first byte compared
|
||||||
|
@ -75,7 +75,7 @@ std::string Func_substr::getStrVal(rowgroup::Row& row,
|
|||||||
int64_t length;
|
int64_t length;
|
||||||
if (fp.size() == 3)
|
if (fp.size() == 3)
|
||||||
{
|
{
|
||||||
int64_t length = fp[2]->data()->getIntVal(row, isNull);
|
length = fp[2]->data()->getIntVal(row, isNull);
|
||||||
if (isNull)
|
if (isNull)
|
||||||
return "";
|
return "";
|
||||||
if (length < 1)
|
if (length < 1)
|
||||||
|
@ -66,7 +66,7 @@ std::string Func_substring_index::getStrVal(rowgroup::Row& row,
|
|||||||
if (isNull)
|
if (isNull)
|
||||||
return "";
|
return "";
|
||||||
|
|
||||||
if (strLen == 0 || delimLen == 0 || !count == 0)
|
if (strLen == 0 || delimLen == 0 || count == 0)
|
||||||
return "";
|
return "";
|
||||||
|
|
||||||
if (count > strLen)
|
if (count > strLen)
|
||||||
@ -75,9 +75,10 @@ std::string Func_substring_index::getStrVal(rowgroup::Row& row,
|
|||||||
if ((count < 0) && ((count * -1) > strLen))
|
if ((count < 0) && ((count * -1) > strLen))
|
||||||
return str;
|
return str;
|
||||||
|
|
||||||
|
bool binaryCmp = (cs->state & MY_CS_BINSORT) || !cs->use_mb();
|
||||||
std::string value; // Only used if !use_mb()
|
std::string value; // Only used if !use_mb()
|
||||||
|
|
||||||
if (cs->use_mb()) // Charset supports multibyte characters
|
if (!binaryCmp) // Charset supports multibyte characters
|
||||||
{
|
{
|
||||||
const char* src = str.c_str();
|
const char* src = str.c_str();
|
||||||
const char* srcEnd = src + strLen;
|
const char* srcEnd = src + strLen;
|
||||||
|
@ -75,6 +75,8 @@ std::string Func_trim::getStrVal(rowgroup::Row& row,
|
|||||||
if (strTLen == 0 || strTLen > strLen)
|
if (strTLen == 0 || strTLen > strLen)
|
||||||
return src;
|
return src;
|
||||||
|
|
||||||
|
bool binaryCmp = (cs->state & MY_CS_BINSORT) || !cs->use_mb();
|
||||||
|
|
||||||
if (binTLen == 1)
|
if (binTLen == 1)
|
||||||
{
|
{
|
||||||
// If the trim string is 1 byte, don't waste cpu for memcmp
|
// If the trim string is 1 byte, don't waste cpu for memcmp
|
||||||
@ -86,7 +88,7 @@ std::string Func_trim::getStrVal(rowgroup::Row& row,
|
|||||||
}
|
}
|
||||||
// Trim trailing
|
// Trim trailing
|
||||||
const char* ptr = pos;
|
const char* ptr = pos;
|
||||||
if (cs->use_mb()) // This is a multi-byte charset
|
if (!binaryCmp) // This is a multi-byte charset
|
||||||
{
|
{
|
||||||
const char* p = pos;
|
const char* p = pos;
|
||||||
uint32 l;
|
uint32 l;
|
||||||
@ -124,7 +126,7 @@ std::string Func_trim::getStrVal(rowgroup::Row& row,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Trim trailing
|
// Trim trailing
|
||||||
if (cs->use_mb()) // This is a multi-byte charset
|
if (!binaryCmp) // This is a multi-byte charset
|
||||||
{
|
{
|
||||||
// The problem is that the byte pattern at the end could
|
// The problem is that the byte pattern at the end could
|
||||||
// match memcmp, but not be correct since the first byte compared
|
// match memcmp, but not be correct since the first byte compared
|
||||||
|
@ -304,7 +304,14 @@ int StringCompare::operator()(IdbCompare* l, Row::Pointer r1, Row::Pointer r2)
|
|||||||
if (!cs)
|
if (!cs)
|
||||||
cs = l->rowGroup()->getCharset(fSpec.fIndex);
|
cs = l->rowGroup()->getCharset(fSpec.fIndex);
|
||||||
|
|
||||||
ret = fSpec.fAsc * cs->strnncollsp(s1, len1, s2, len2);
|
if (cs->state & MY_CS_BINSORT)
|
||||||
|
{
|
||||||
|
ret = fSpec.fAsc * strncmp(s1, s2, max(len1,len2));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ret = fSpec.fAsc * cs->strnncoll(s1, len1, s2, len2);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
|
Reference in New Issue
Block a user