1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-29 08:21:15 +03:00

MCOL-3536 Collation

This commit is contained in:
David Hall
2020-06-12 10:19:17 -05:00
parent de125bac2b
commit 165ae4a6f3
4 changed files with 267 additions and 139 deletions

View File

@ -49,34 +49,126 @@ CalpontSystemCatalog::ColType Func_replace::operationType(FunctionParm& fp, Calp
std::string Func_replace::getStrVal(rowgroup::Row& row,
FunctionParm& fp,
bool& isNull,
execplan::CalpontSystemCatalog::ColType&)
execplan::CalpontSystemCatalog::ColType& ct)
{
CHARSET_INFO* cs = ct.getCharset();
const string& str = fp[0]->data()->getStrVal(row, isNull);
if (isNull)
return "";
size_t strLen = str.length();
const string& fromstr = fp[1]->data()->getStrVal(row, isNull);
if (isNull)
return "";
if (fromstr.length() == 0)
return str;
size_t fromLen = fromstr.length();
const string& tostr = fp[2]->data()->getStrVal(row, isNull);
if (isNull)
return "";
size_t toLen = tostr.length();
bool binaryCmp = (cs->state & MY_CS_BINSORT) || !cs->use_mb();
string newstr;
unsigned int i = 0;
for (;;)
size_t pos = 0;
if (binaryCmp)
{
size_t pos = str.find(fromstr, i);
if ( pos != string::npos )
uint32_t i = 0;
pos = str.find(fromstr);
if (pos == string::npos)
return str;
// Count the number of fromstr in strend
int count = 0;
do
{
//match
if ( pos > i )
++count;
pos = str.find(fromstr, pos + fromLen);
}
while (pos != string::npos);
newstr.reserve(strLen + (count * ((int)toLen - (int)fromLen)) + 1);
// Now move the stuff into newstr
do
{
if (pos > i)
newstr = newstr + str.substr(i, pos - i);
newstr = newstr + tostr;
i = pos + fromstr.size();
i = pos + fromLen;
pos = str.find(fromstr, i);
}
else
while (pos != string::npos);
newstr = newstr + str.substr(i, string::npos);
}
else
{
// UTF
const char* src = str.c_str();
const char* srcEnd = src + strLen;
const char* srchEnd = srcEnd - fromLen + 1;
const char* from = fromstr.c_str();
const char* fromEnd = from + fromLen;
const char* to = tostr.c_str();
char* ptr = const_cast<char*>(src);
char *i,*j;
size_t count = 10; // Some arbitray number to reserve some space to start.
size_t growlen = count * ((int)toLen - (int)fromLen);
newstr.reserve(strLen + (count * growlen) + 1);
size_t maxsize = newstr.max_size();
uint32_t l;
// We don't know where byte patterns might match so
// we start at the beginning of the string and move forward
// one character at a time until we find a match. Then we can
// move the src bytes and add in the to bytes,then try again.
while (ptr < srchEnd)
{
newstr = newstr + str.substr(i, 1000);
break;
bool found = false;
if (*ptr == *from) // If the first byte matches, maybe we have a match
{
// Do a byte by byte compare of src at that spot against from
i = ptr + 1;
j = const_cast<char*>(from) + 1;
found = true;
while (j != fromEnd)
{
if (*i++ != *j++)
{
found = false;
break;
}
}
}
if (found)
{
if (ptr < i)
{
int mvsize = i - ptr;
if (newstr.length() + mvsize + toLen < maxsize)
{
// We need a re-alloc
newstr.reserve(maxsize + growlen);
growlen *= 2;
}
newstr.append(ptr, mvsize);
ptr += mvsize;
}
newstr.append(to, toLen);
ptr += toLen;
}
else
{
// move to the next character
if ((l = my_ismbchar(cs, ptr, srcEnd))) // returns the number of bytes in the leading char or zero if one byte
ptr += l;
else
++ptr;
}
}
}