You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-07-29 08:21:15 +03:00
MCOL-3536 Collation
This commit is contained in:
@ -49,34 +49,126 @@ CalpontSystemCatalog::ColType Func_replace::operationType(FunctionParm& fp, Calp
|
||||
std::string Func_replace::getStrVal(rowgroup::Row& row,
|
||||
FunctionParm& fp,
|
||||
bool& isNull,
|
||||
execplan::CalpontSystemCatalog::ColType&)
|
||||
execplan::CalpontSystemCatalog::ColType& ct)
|
||||
{
|
||||
CHARSET_INFO* cs = ct.getCharset();
|
||||
|
||||
const string& str = fp[0]->data()->getStrVal(row, isNull);
|
||||
|
||||
if (isNull)
|
||||
return "";
|
||||
size_t strLen = str.length();
|
||||
|
||||
const string& fromstr = fp[1]->data()->getStrVal(row, isNull);
|
||||
|
||||
if (isNull)
|
||||
return "";
|
||||
if (fromstr.length() == 0)
|
||||
return str;
|
||||
size_t fromLen = fromstr.length();
|
||||
|
||||
const string& tostr = fp[2]->data()->getStrVal(row, isNull);
|
||||
if (isNull)
|
||||
return "";
|
||||
size_t toLen = tostr.length();
|
||||
|
||||
bool binaryCmp = (cs->state & MY_CS_BINSORT) || !cs->use_mb();
|
||||
string newstr;
|
||||
unsigned int i = 0;
|
||||
|
||||
for (;;)
|
||||
size_t pos = 0;
|
||||
if (binaryCmp)
|
||||
{
|
||||
size_t pos = str.find(fromstr, i);
|
||||
|
||||
if ( pos != string::npos )
|
||||
uint32_t i = 0;
|
||||
pos = str.find(fromstr);
|
||||
if (pos == string::npos)
|
||||
return str;
|
||||
|
||||
// Count the number of fromstr in strend
|
||||
int count = 0;
|
||||
do
|
||||
{
|
||||
//match
|
||||
if ( pos > i )
|
||||
++count;
|
||||
pos = str.find(fromstr, pos + fromLen);
|
||||
}
|
||||
while (pos != string::npos);
|
||||
|
||||
newstr.reserve(strLen + (count * ((int)toLen - (int)fromLen)) + 1);
|
||||
|
||||
// Now move the stuff into newstr
|
||||
do
|
||||
{
|
||||
if (pos > i)
|
||||
newstr = newstr + str.substr(i, pos - i);
|
||||
|
||||
newstr = newstr + tostr;
|
||||
i = pos + fromstr.size();
|
||||
i = pos + fromLen;
|
||||
pos = str.find(fromstr, i);
|
||||
}
|
||||
else
|
||||
while (pos != string::npos);
|
||||
|
||||
newstr = newstr + str.substr(i, string::npos);
|
||||
}
|
||||
else
|
||||
{
|
||||
// UTF
|
||||
const char* src = str.c_str();
|
||||
const char* srcEnd = src + strLen;
|
||||
const char* srchEnd = srcEnd - fromLen + 1;
|
||||
const char* from = fromstr.c_str();
|
||||
const char* fromEnd = from + fromLen;
|
||||
const char* to = tostr.c_str();
|
||||
char* ptr = const_cast<char*>(src);
|
||||
char *i,*j;
|
||||
size_t count = 10; // Some arbitray number to reserve some space to start.
|
||||
size_t growlen = count * ((int)toLen - (int)fromLen);
|
||||
newstr.reserve(strLen + (count * growlen) + 1);
|
||||
size_t maxsize = newstr.max_size();
|
||||
uint32_t l;
|
||||
|
||||
// We don't know where byte patterns might match so
|
||||
// we start at the beginning of the string and move forward
|
||||
// one character at a time until we find a match. Then we can
|
||||
// move the src bytes and add in the to bytes,then try again.
|
||||
while (ptr < srchEnd)
|
||||
{
|
||||
newstr = newstr + str.substr(i, 1000);
|
||||
break;
|
||||
bool found = false;
|
||||
if (*ptr == *from) // If the first byte matches, maybe we have a match
|
||||
{
|
||||
// Do a byte by byte compare of src at that spot against from
|
||||
i = ptr + 1;
|
||||
j = const_cast<char*>(from) + 1;
|
||||
found = true;
|
||||
while (j != fromEnd)
|
||||
{
|
||||
if (*i++ != *j++)
|
||||
{
|
||||
found = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (found)
|
||||
{
|
||||
if (ptr < i)
|
||||
{
|
||||
int mvsize = i - ptr;
|
||||
if (newstr.length() + mvsize + toLen < maxsize)
|
||||
{
|
||||
// We need a re-alloc
|
||||
newstr.reserve(maxsize + growlen);
|
||||
growlen *= 2;
|
||||
}
|
||||
newstr.append(ptr, mvsize);
|
||||
ptr += mvsize;
|
||||
}
|
||||
newstr.append(to, toLen);
|
||||
ptr += toLen;
|
||||
}
|
||||
else
|
||||
{
|
||||
// move to the next character
|
||||
if ((l = my_ismbchar(cs, ptr, srcEnd))) // returns the number of bytes in the leading char or zero if one byte
|
||||
ptr += l;
|
||||
else
|
||||
++ptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user