You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-07-30 19:23:07 +03:00
MCOL-3536 collation
This commit is contained in:
@ -58,7 +58,10 @@ string Func_concat::getStrVal(Row& row,
|
||||
string ret;
|
||||
string tmp;
|
||||
stringValue(parm[0], row, isNull, ret);
|
||||
|
||||
|
||||
// TODO: do a better job of cutting down the number re-allocations.
|
||||
// look at Item_func_concat::realloc_result for ideas and use
|
||||
// std::string:resize() appropriatly.
|
||||
for ( unsigned int id = 1 ; id < parm.size() ; id++)
|
||||
{
|
||||
stringValue(parm[id], row, isNull, tmp);
|
||||
|
@ -20,6 +20,9 @@
|
||||
*
|
||||
*
|
||||
****************************************************************************/
|
||||
#include <mariadb.h>
|
||||
#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost
|
||||
#include <my_sys.h>
|
||||
|
||||
#include <string>
|
||||
using namespace std;
|
||||
@ -47,13 +50,16 @@ CalpontSystemCatalog::ColType Func_concat_ws::operationType(FunctionParm& fp, Ca
|
||||
string Func_concat_ws::getStrVal(Row& row,
|
||||
FunctionParm& parm,
|
||||
bool& isNull,
|
||||
CalpontSystemCatalog::ColType&)
|
||||
CalpontSystemCatalog::ColType& type)
|
||||
{
|
||||
string delim;
|
||||
stringValue(parm[0], row, isNull, delim);
|
||||
if (isNull)
|
||||
return "";
|
||||
|
||||
// TODO: I don't think we need wide chars here.
|
||||
// Concatenation works without see Server implementation.
|
||||
#if 0
|
||||
wstring wstr;
|
||||
size_t strwclen = utf8::idb_mbstowcs(0, delim.c_str(), 0) + 1;
|
||||
wchar_t* wcbuf = new wchar_t[strwclen];
|
||||
@ -94,10 +100,11 @@ string Func_concat_ws::getStrVal(Row& row,
|
||||
delete [] outbuf;
|
||||
delete [] wcbuf;
|
||||
return ret;
|
||||
|
||||
#if 0
|
||||
#endif
|
||||
string str;
|
||||
string tmp;
|
||||
// Work on reallocation. use std::string::resize() to
|
||||
// grab larger chunks in some intellegent manner.
|
||||
for ( uint32_t i = 1 ; i < parm.size() ; i++)
|
||||
{
|
||||
stringValue(parm[i], row, isNull, tmp);
|
||||
@ -119,7 +126,6 @@ string Func_concat_ws::getStrVal(Row& row,
|
||||
isNull = false;
|
||||
|
||||
return str;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
@ -51,36 +51,34 @@ CalpontSystemCatalog::ColType Func_left::operationType(FunctionParm& fp, Calpont
|
||||
std::string Func_left::getStrVal(rowgroup::Row& row,
|
||||
FunctionParm& fp,
|
||||
bool& isNull,
|
||||
execplan::CalpontSystemCatalog::ColType&)
|
||||
execplan::CalpontSystemCatalog::ColType& type)
|
||||
{
|
||||
const string& tstr = fp[0]->data()->getStrVal(row, isNull);
|
||||
|
||||
CHARSET_INFO* cs = type.getCharset();
|
||||
// The original string
|
||||
const string& src = fp[0]->data()->getStrVal(row, isNull);
|
||||
if (isNull)
|
||||
return "";
|
||||
if (src.empty() || src.length() == 0)
|
||||
return src;
|
||||
// binLen represents the number of bytes in src
|
||||
size_t binLen = src.length();
|
||||
const char* pos = src.c_str();
|
||||
const char* end = pos + binLen;
|
||||
|
||||
size_t strwclen = utf8::idb_mbstowcs(0, tstr.c_str(), 0) + 1;
|
||||
wchar_t* wcbuf = new wchar_t[strwclen];
|
||||
strwclen = utf8::idb_mbstowcs(wcbuf, tstr.c_str(), strwclen);
|
||||
wstring str(wcbuf, strwclen);
|
||||
|
||||
int64_t pos = fp[1]->data()->getIntVal(row, isNull) - 1;
|
||||
|
||||
if (isNull)
|
||||
size_t trimLength = fp[1]->data()->getUintVal(row, isNull);
|
||||
if (isNull || trimLength <= 0)
|
||||
return "";
|
||||
|
||||
if (pos == -1) // pos == 0
|
||||
return "";
|
||||
size_t charPos;
|
||||
|
||||
wstring out = str.substr(0, pos + 1);
|
||||
size_t strmblen = utf8::idb_wcstombs(0, out.c_str(), 0) + 1;
|
||||
char* outbuf = new char[strmblen];
|
||||
strmblen = utf8::idb_wcstombs(outbuf, out.c_str(), strmblen);
|
||||
std::string ret(outbuf, strmblen);
|
||||
delete [] outbuf;
|
||||
delete [] wcbuf;
|
||||
if ((binLen <= trimLength) ||
|
||||
(binLen <= (charPos= cs->charpos(pos, end, trimLength))))
|
||||
{
|
||||
return src;
|
||||
}
|
||||
|
||||
std::string ret(pos, charPos);
|
||||
return ret;
|
||||
|
||||
// return str.substr(0, pos+1);
|
||||
}
|
||||
|
||||
|
||||
|
@ -56,191 +56,80 @@ CalpontSystemCatalog::ColType Func_lpad::operationType(FunctionParm& fp, Calpont
|
||||
std::string Func_lpad::getStrVal(rowgroup::Row& row,
|
||||
FunctionParm& fp,
|
||||
bool& isNull,
|
||||
execplan::CalpontSystemCatalog::ColType&)
|
||||
execplan::CalpontSystemCatalog::ColType& type)
|
||||
{
|
||||
unsigned i;
|
||||
// The number of characters (not bytes) in our input str.
|
||||
// Not all of these are necessarily significant. We need to search for the
|
||||
// NULL terminator to be sure.
|
||||
size_t strwclen;
|
||||
// this holds the number of characters (not bytes) in our pad str.
|
||||
size_t padwclen;
|
||||
|
||||
CHARSET_INFO* cs = type.getCharset();
|
||||
// The original string
|
||||
const string& tstr = fp[0]->data()->getStrVal(row, isNull);
|
||||
const string& src = fp[0]->data()->getStrVal(row, isNull);
|
||||
if (isNull)
|
||||
return "";
|
||||
if (src.empty() || src.length() == 0)
|
||||
return src;
|
||||
// binLen represents the number of bytes in src
|
||||
size_t binLen = src.length();
|
||||
const char* pos = src.c_str();
|
||||
const char* end = pos + binLen;
|
||||
// strLen = the number of characters in src
|
||||
size_t strLen = cs->numchars(pos, end);
|
||||
|
||||
// The result length in number of characters
|
||||
size_t len = 0;
|
||||
|
||||
switch (fp[1]->data()->resultType().colDataType)
|
||||
// In the case where someone entered pad length as a quoted string,
|
||||
// it may be interpreted by columnstore to be an actual string
|
||||
// and stored in fResult.int as a htonl of that string,
|
||||
// However fResult.double is always correct, so we'll use that.
|
||||
size_t padLength = (size_t)fp[1]->data()->getDoubleVal(row, isNull);
|
||||
if (isNull || padLength <= 0)
|
||||
return "";
|
||||
if (padLength > (size_t)INT_MAX32)
|
||||
padLength = (size_t)INT_MAX32;
|
||||
|
||||
if (padLength < strLen)
|
||||
{
|
||||
case execplan::CalpontSystemCatalog::BIGINT:
|
||||
case execplan::CalpontSystemCatalog::INT:
|
||||
case execplan::CalpontSystemCatalog::MEDINT:
|
||||
case execplan::CalpontSystemCatalog::TINYINT:
|
||||
case execplan::CalpontSystemCatalog::SMALLINT:
|
||||
{
|
||||
len = fp[1]->data()->getIntVal(row, isNull);
|
||||
}
|
||||
break;
|
||||
|
||||
case execplan::CalpontSystemCatalog::UBIGINT:
|
||||
case execplan::CalpontSystemCatalog::UINT:
|
||||
case execplan::CalpontSystemCatalog::UMEDINT:
|
||||
case execplan::CalpontSystemCatalog::UTINYINT:
|
||||
case execplan::CalpontSystemCatalog::USMALLINT:
|
||||
{
|
||||
len = fp[1]->data()->getUintVal(row, isNull);
|
||||
}
|
||||
break;
|
||||
|
||||
case execplan::CalpontSystemCatalog::FLOAT:
|
||||
case execplan::CalpontSystemCatalog::UFLOAT:
|
||||
case execplan::CalpontSystemCatalog::DOUBLE:
|
||||
case execplan::CalpontSystemCatalog::UDOUBLE:
|
||||
case execplan::CalpontSystemCatalog::DECIMAL:
|
||||
case execplan::CalpontSystemCatalog::UDECIMAL:
|
||||
{
|
||||
double value = fp[1]->data()->getDoubleVal(row, isNull);
|
||||
|
||||
if (value > 0)
|
||||
value += 0.5;
|
||||
else if (value < 0)
|
||||
value -= 0.5;
|
||||
|
||||
int64_t ret = (int64_t) value;
|
||||
|
||||
if (value > (double) numeric_limits<int64_t>::max())
|
||||
ret = numeric_limits<int64_t>::max();
|
||||
else if (value < (double) (numeric_limits<int64_t>::min() + 2))
|
||||
ret = numeric_limits<int64_t>::min() + 2; // IDB min for bigint
|
||||
|
||||
len = ret;
|
||||
}
|
||||
break;
|
||||
|
||||
case execplan::CalpontSystemCatalog::CHAR:
|
||||
case execplan::CalpontSystemCatalog::VARCHAR:
|
||||
{
|
||||
const string& strval = fp[1]->data()->getStrVal(row, isNull);
|
||||
len = strtol(strval.c_str(), NULL, 10);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
{
|
||||
std::ostringstream oss;
|
||||
oss << "lpad parameter 2 must be numeric, not " << execplan::colDataTypeToString(fp[1]->data()->resultType().colDataType);
|
||||
throw logging::IDBExcept(oss.str(), logging::ERR_DATATYPE_NOT_SUPPORT);
|
||||
|
||||
}
|
||||
binLen = cs->charpos(pos, end, padLength);
|
||||
std::string ret(pos, binLen);
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (len < 1)
|
||||
return "";
|
||||
|
||||
// MCOL-2182 As of MariaDB 10.3 the third parameter - pad characters - is optional
|
||||
// The pad characters.
|
||||
const string* pad = &fPad;
|
||||
if (fp.size() > 2)
|
||||
{
|
||||
pad = &fp[2]->data()->getStrVal(row, isNull);
|
||||
}
|
||||
// binPLen represents the number of bytes in pad
|
||||
size_t binPLen = pad->length();
|
||||
const char* posP = pad->c_str();
|
||||
// plen = the number of characters in pad
|
||||
size_t plen = cs->numchars(posP, posP+binPLen);
|
||||
if (plen == 0 || plen > strLen)
|
||||
return src;
|
||||
|
||||
if (isNull)
|
||||
return "";
|
||||
size_t byteCount = (padLength+1) * cs->mbmaxlen; // absolute maximun number of bytes
|
||||
char* buf = new char[byteCount];
|
||||
char* pBuf = buf;
|
||||
|
||||
// Rather than calling the wideconvert functions with a null buffer to
|
||||
// determine the size of buffer to allocate, we can be sure the wide
|
||||
// char string won't be longer than
|
||||
strwclen = tstr.length(); // a guess to start with. This will be >= to the real count.
|
||||
size_t alen = len;
|
||||
|
||||
if (strwclen > len)
|
||||
alen = strwclen;
|
||||
|
||||
size_t bufsize = alen + 1;
|
||||
|
||||
// Convert to wide characters. Do all further work in wide characters
|
||||
wchar_t* wcbuf = new wchar_t[bufsize];
|
||||
strwclen = utf8::idb_mbstowcs(wcbuf, tstr.c_str(), strwclen + 1);
|
||||
|
||||
size_t strSize = strwclen; // The number of significant characters
|
||||
const wchar_t* pWChar = wcbuf;
|
||||
|
||||
for (i = 0; *pWChar != '\0' && i < strwclen; ++pWChar, ++i)
|
||||
padLength -= strLen;
|
||||
byteCount = 0;
|
||||
|
||||
while (padLength >= plen)
|
||||
{
|
||||
memcpy(pBuf, posP, plen);
|
||||
padLength -= plen;
|
||||
byteCount += plen;
|
||||
pBuf += plen;
|
||||
}
|
||||
|
||||
strSize = i;
|
||||
|
||||
// If the incoming str is exactly the len of the result str,
|
||||
// return the original
|
||||
if (strSize == len)
|
||||
// Sometimes, in a case with multi-char pad, we need to add a partial pad
|
||||
if (padLength > 0)
|
||||
{
|
||||
return tstr;
|
||||
size_t partialSize = cs->charpos(posP, posP+plen, padLength);
|
||||
memcpy(pBuf, posP, partialSize);
|
||||
byteCount += partialSize;
|
||||
pBuf += partialSize;
|
||||
}
|
||||
|
||||
// If the incoming str is too big for the result str
|
||||
// truncate the widechar buffer and return as a string
|
||||
if (strSize > len)
|
||||
{
|
||||
// Trim the excess length of the buffer
|
||||
wstring trimmed = wstring(wcbuf, len);
|
||||
return utf8::wstring_to_utf8(trimmed.c_str());
|
||||
}
|
||||
|
||||
// This is the case where there's room to pad.
|
||||
|
||||
// Convert the pad string to wide
|
||||
padwclen = pad->length(); // A guess to start.
|
||||
size_t padbufsize = padwclen + 1;
|
||||
wchar_t* wcpad = new wchar_t[padbufsize];
|
||||
// padwclen+1 is for giving count for the terminating null
|
||||
size_t padlen = utf8::idb_mbstowcs(wcpad, pad->c_str(), padwclen + 1);
|
||||
|
||||
// How many chars do we need?
|
||||
size_t padspace = len - strSize;
|
||||
|
||||
// Shift the contents of wcbuf to the right.
|
||||
wchar_t* startofstr = wcbuf + padspace;
|
||||
|
||||
// Move the original string to the right to make room for the pad chars
|
||||
// Testing has shown that this loop is faster than memmove
|
||||
wchar_t* newchar = wcbuf + len; // Last spot to put a char in buf
|
||||
wchar_t* pChar = wcbuf + strSize; // terminal NULL of our str
|
||||
|
||||
while (pChar >= wcbuf)
|
||||
{
|
||||
*newchar-- = *pChar--;
|
||||
}
|
||||
|
||||
// Fill in the front of the buffer with the pad chars
|
||||
wchar_t* firstpadchar = wcbuf;
|
||||
|
||||
for (wchar_t* pch = wcbuf; pch < startofstr && padlen > 0;)
|
||||
{
|
||||
// Truncate the number of fill chars if running out of space
|
||||
if (padlen > padspace)
|
||||
{
|
||||
padlen = padspace;
|
||||
}
|
||||
|
||||
// Move the fill chars to buffer
|
||||
for (wchar_t* padchar = wcpad; padchar < wcpad + padlen; ++padchar)
|
||||
{
|
||||
*firstpadchar++ = *padchar;
|
||||
}
|
||||
|
||||
padspace -= padlen;
|
||||
pch += padlen;
|
||||
}
|
||||
|
||||
wstring padded = wstring(wcbuf, len);
|
||||
// Turn back to a string
|
||||
std::string ret(utf8::wstring_to_utf8(padded.c_str()));
|
||||
delete [] wcpad;
|
||||
delete [] wcbuf;
|
||||
memcpy(pBuf, pos, binLen);
|
||||
byteCount += binLen;
|
||||
|
||||
std::string ret(buf, byteCount);
|
||||
delete [] buf;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -50,89 +50,56 @@ CalpontSystemCatalog::ColType Func_ltrim::operationType(FunctionParm& fp, Calpon
|
||||
|
||||
|
||||
std::string Func_ltrim::getStrVal(rowgroup::Row& row,
|
||||
FunctionParm& fp,
|
||||
bool& isNull,
|
||||
execplan::CalpontSystemCatalog::ColType&)
|
||||
FunctionParm& fp,
|
||||
bool& isNull,
|
||||
execplan::CalpontSystemCatalog::ColType& type)
|
||||
{
|
||||
// The number of characters (not bytes) in our input tstr.
|
||||
// Not all of these are necessarily significant. We need to search for the
|
||||
// NULL terminator to be sure.
|
||||
size_t strwclen;
|
||||
// this holds the number of characters (not bytes) in ourtrim tstr.
|
||||
size_t trimwclen;
|
||||
|
||||
CHARSET_INFO* cs = type.getCharset();
|
||||
// The original string
|
||||
const string& tstr = fp[0]->data()->getStrVal(row, isNull);
|
||||
const string& src = fp[0]->data()->getStrVal(row, isNull);
|
||||
if (isNull)
|
||||
return "";
|
||||
if (src.empty() || src.length() == 0)
|
||||
return src;
|
||||
// binLen represents the number of bytes in src
|
||||
size_t binLen = src.length();
|
||||
const char* pos = src.c_str();
|
||||
const char* end = pos + binLen;
|
||||
// strLen = the number of characters in src
|
||||
size_t strLen = cs->numchars(pos, end);
|
||||
|
||||
// The trim characters.
|
||||
const string& trim = (fp.size() > 1 ? fp[1]->data()->getStrVal(row, isNull) : " ");
|
||||
// binTLen represents the number of bytes in trim
|
||||
size_t binTLen = trim.length();
|
||||
const char* posT = trim.c_str();
|
||||
// strTLen = the number of characters in trim
|
||||
size_t strTLen = cs->numchars(posT, posT+binTLen);
|
||||
if (strTLen == 0 || strTLen > strLen)
|
||||
return src;
|
||||
|
||||
if (isNull)
|
||||
return "";
|
||||
|
||||
if (tstr.empty() || tstr.length() == 0)
|
||||
return tstr;
|
||||
|
||||
// Rather than calling the wideconvert functions with a null buffer to
|
||||
// determine the size of buffer to allocate, we can be sure the wide
|
||||
// char string won't be longer than:
|
||||
strwclen = tstr.length(); // a guess to start with. This will be >= to the real count.
|
||||
int bufsize = strwclen + 1;
|
||||
|
||||
// Convert the string to wide characters. Do all further work in wide characters
|
||||
wchar_t* wcbuf = new wchar_t[bufsize];
|
||||
strwclen = utf8::idb_mbstowcs(wcbuf, tstr.c_str(), strwclen + 1);
|
||||
|
||||
// idb_mbstowcs can return -1 if there is bad mbs char in tstr
|
||||
if (strwclen == static_cast<size_t>(-1))
|
||||
strwclen = 0;
|
||||
|
||||
// Convert the trim string to wide
|
||||
trimwclen = trim.length(); // A guess to start.
|
||||
int trimbufsize = trimwclen + 1;
|
||||
wchar_t* wctrim = new wchar_t[trimbufsize];
|
||||
size_t trimlen = utf8::idb_mbstowcs(wctrim, trim.c_str(), trimwclen + 1);
|
||||
|
||||
// idb_mbstowcs can return -1 if there is bad mbs char in tstr
|
||||
if (trimlen == static_cast<size_t>(-1))
|
||||
trimlen = 0;
|
||||
|
||||
size_t trimCmpLen = trimlen * sizeof(wchar_t);
|
||||
|
||||
const wchar_t* oPtr = wcbuf; // To remember the start of the string
|
||||
const wchar_t* aPtr = oPtr;
|
||||
const wchar_t* aEnd = wcbuf + strwclen - 1;
|
||||
|
||||
if (trimlen > 0)
|
||||
if (binTLen == 1)
|
||||
{
|
||||
if (trimlen == 1)
|
||||
// If the trim string is 1 byte, don't waste cpu for memcmp
|
||||
while (pos < end && *pos == *posT)
|
||||
{
|
||||
// If trim is a single char, then don't spend the overhead for memcmp.
|
||||
wchar_t chr = wctrim[0];
|
||||
|
||||
while (aPtr <= aEnd && *aPtr == chr)
|
||||
aPtr++;
|
||||
}
|
||||
else
|
||||
{
|
||||
aEnd -= (trimlen - 1); // So we don't compare past the end of the string.
|
||||
|
||||
while (aPtr <= aEnd && !memcmp(aPtr, wctrim, trimCmpLen))
|
||||
aPtr += trimlen;
|
||||
++pos;
|
||||
--binLen;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while (pos+binTLen <= end && memcmp(pos,posT,binTLen) == 0)
|
||||
{
|
||||
pos += binTLen;
|
||||
binLen -= binTLen;
|
||||
}
|
||||
}
|
||||
|
||||
// Bug 5110 - error in allocating enough memory for utf8 chars
|
||||
size_t aLen = strwclen - (aPtr - oPtr);
|
||||
wstring trimmed = wstring(aPtr, aLen);
|
||||
// Turn back to a string
|
||||
std::string ret(utf8::wstring_to_utf8(trimmed.c_str()));
|
||||
delete [] wctrim;
|
||||
delete [] wcbuf;
|
||||
std::string ret(pos, binLen);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
} // namespace funcexp
|
||||
// vim:ts=4 sw=4:
|
||||
|
||||
|
@ -59,7 +59,7 @@ CalpontSystemCatalog::ColType Func_repeat::operationType(FunctionParm& fp, Calpo
|
||||
std::string Func_repeat::getStrVal(rowgroup::Row& row,
|
||||
FunctionParm& fp,
|
||||
bool& isNull,
|
||||
execplan::CalpontSystemCatalog::ColType& op_ct)
|
||||
execplan::CalpontSystemCatalog::ColType& type)
|
||||
{
|
||||
string str;
|
||||
|
||||
|
@ -51,42 +51,33 @@ CalpontSystemCatalog::ColType Func_right::operationType(FunctionParm& fp, Calpon
|
||||
std::string Func_right::getStrVal(rowgroup::Row& row,
|
||||
FunctionParm& fp,
|
||||
bool& isNull,
|
||||
execplan::CalpontSystemCatalog::ColType&)
|
||||
execplan::CalpontSystemCatalog::ColType& type)
|
||||
{
|
||||
const string& tstr = fp[0]->data()->getStrVal(row, isNull);
|
||||
|
||||
CHARSET_INFO* cs = type.getCharset();
|
||||
// The original string
|
||||
const string& src = fp[0]->data()->getStrVal(row, isNull);
|
||||
if (isNull)
|
||||
return "";
|
||||
if (src.empty() || src.length() == 0)
|
||||
return src;
|
||||
// binLen represents the number of bytes in src
|
||||
size_t binLen = src.length();
|
||||
const char* pos = src.c_str();
|
||||
const char* end = pos + binLen;
|
||||
|
||||
int64_t pos = fp[1]->data()->getIntVal(row, isNull);
|
||||
|
||||
if (isNull)
|
||||
size_t trimLength = fp[1]->data()->getUintVal(row, isNull);
|
||||
if (isNull || trimLength <= 0)
|
||||
return "";
|
||||
|
||||
if (pos == -1) // pos == 0
|
||||
return "";
|
||||
size_t start = cs->numchars(pos, end); // Here, start is number of characters in src
|
||||
if (start <= trimLength)
|
||||
return src;
|
||||
start = cs->charpos(pos, end, start - trimLength); // Here, start becomes number of bytes into src to start copying
|
||||
|
||||
size_t strwclen = utf8::idb_mbstowcs(0, tstr.c_str(), 0) + 1;
|
||||
//wchar_t wcbuf[strwclen];
|
||||
wchar_t* wcbuf = new wchar_t[strwclen];
|
||||
strwclen = utf8::idb_mbstowcs(wcbuf, tstr.c_str(), strwclen);
|
||||
wstring str(wcbuf, strwclen);
|
||||
|
||||
if ( (unsigned) pos >= strwclen )
|
||||
pos = strwclen;
|
||||
|
||||
wstring out = str.substr(strwclen - pos, strwclen);
|
||||
size_t strmblen = utf8::idb_wcstombs(0, out.c_str(), 0) + 1;
|
||||
//char outbuf[strmblen];
|
||||
char* outbuf = new char[strmblen];
|
||||
strmblen = utf8::idb_wcstombs(outbuf, out.c_str(), strmblen);
|
||||
std::string ret(outbuf, strmblen);
|
||||
delete [] outbuf;
|
||||
delete [] wcbuf;
|
||||
std::string ret(pos+start, binLen-start);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
} // namespace funcexp
|
||||
// vim:ts=4 sw=4:
|
||||
|
||||
|
@ -55,179 +55,81 @@ CalpontSystemCatalog::ColType Func_rpad::operationType(FunctionParm& fp, Calpont
|
||||
std::string Func_rpad::getStrVal(rowgroup::Row& row,
|
||||
FunctionParm& fp,
|
||||
bool& isNull,
|
||||
execplan::CalpontSystemCatalog::ColType&)
|
||||
execplan::CalpontSystemCatalog::ColType& type)
|
||||
{
|
||||
unsigned i;
|
||||
// The number of characters (not bytes) in our input str.
|
||||
// Not all of these are necessarily significant. We need to search for the
|
||||
// NULL terminator to be sure.
|
||||
size_t strwclen;
|
||||
// this holds the number of characters (not bytes) in our pad str.
|
||||
size_t padwclen;
|
||||
|
||||
CHARSET_INFO* cs = type.getCharset();
|
||||
// The original string
|
||||
const string& tstr = fp[0]->data()->getStrVal(row, isNull);
|
||||
const string& src = fp[0]->data()->getStrVal(row, isNull);
|
||||
if (isNull)
|
||||
return "";
|
||||
if (src.empty() || src.length() == 0)
|
||||
return src;
|
||||
// binLen represents the number of bytes in src
|
||||
size_t binLen = src.length();
|
||||
const char* pos = src.c_str();
|
||||
const char* end = pos + binLen;
|
||||
// strLen = the number of characters in src
|
||||
size_t strLen = cs->numchars(pos, end);
|
||||
|
||||
// The result length in number of characters
|
||||
size_t len = 0;
|
||||
|
||||
switch (fp[1]->data()->resultType().colDataType)
|
||||
// In the case where someone entered pad length as a quoted string,
|
||||
// it may be interpreted by columnstore to be an actual string
|
||||
// and stored in fResult.int as a htonl of that string,
|
||||
// However fResult.double is always correct, so we'll use that.
|
||||
size_t padLength = (size_t)fp[1]->data()->getDoubleVal(row, isNull);
|
||||
if (isNull || padLength <= 0)
|
||||
return "";
|
||||
if (padLength > (size_t)INT_MAX32)
|
||||
padLength = (size_t)INT_MAX32;
|
||||
|
||||
if (padLength < strLen)
|
||||
{
|
||||
case execplan::CalpontSystemCatalog::BIGINT:
|
||||
case execplan::CalpontSystemCatalog::INT:
|
||||
case execplan::CalpontSystemCatalog::MEDINT:
|
||||
case execplan::CalpontSystemCatalog::TINYINT:
|
||||
case execplan::CalpontSystemCatalog::SMALLINT:
|
||||
{
|
||||
len = fp[1]->data()->getIntVal(row, isNull);
|
||||
}
|
||||
break;
|
||||
|
||||
case execplan::CalpontSystemCatalog::UBIGINT:
|
||||
case execplan::CalpontSystemCatalog::UINT:
|
||||
case execplan::CalpontSystemCatalog::UMEDINT:
|
||||
case execplan::CalpontSystemCatalog::UTINYINT:
|
||||
case execplan::CalpontSystemCatalog::USMALLINT:
|
||||
{
|
||||
len = fp[1]->data()->getUintVal(row, isNull);
|
||||
}
|
||||
break;
|
||||
|
||||
case execplan::CalpontSystemCatalog::FLOAT:
|
||||
case execplan::CalpontSystemCatalog::UFLOAT:
|
||||
case execplan::CalpontSystemCatalog::DOUBLE:
|
||||
case execplan::CalpontSystemCatalog::UDOUBLE:
|
||||
case execplan::CalpontSystemCatalog::DECIMAL:
|
||||
case execplan::CalpontSystemCatalog::UDECIMAL:
|
||||
{
|
||||
double value = fp[1]->data()->getDoubleVal(row, isNull);
|
||||
|
||||
if (value > 0)
|
||||
value += 0.5;
|
||||
else if (value < 0)
|
||||
value -= 0.5;
|
||||
else if (value < 0)
|
||||
value -= 0.5;
|
||||
|
||||
int64_t ret = (int64_t) value;
|
||||
|
||||
if (value > (double) numeric_limits<int64_t>::max())
|
||||
ret = numeric_limits<int64_t>::max();
|
||||
else if (value < (double) (numeric_limits<int64_t>::min() + 2))
|
||||
ret = numeric_limits<int64_t>::min() + 2; // IDB min for bigint
|
||||
|
||||
len = ret;
|
||||
}
|
||||
break;
|
||||
|
||||
case execplan::CalpontSystemCatalog::CHAR:
|
||||
case execplan::CalpontSystemCatalog::VARCHAR:
|
||||
{
|
||||
const string& strval = fp[1]->data()->getStrVal(row, isNull);
|
||||
len = strtol(strval.c_str(), NULL, 10);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
{
|
||||
std::ostringstream oss;
|
||||
oss << "lpad parameter 2 must be numeric, not " << execplan::colDataTypeToString(fp[1]->data()->resultType().colDataType);
|
||||
throw logging::IDBExcept(oss.str(), logging::ERR_DATATYPE_NOT_SUPPORT);
|
||||
}
|
||||
binLen = cs->charpos(pos, end, padLength);
|
||||
std::string ret(pos, binLen);
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (len < 1)
|
||||
return "";
|
||||
|
||||
// The pad characters.
|
||||
// MCOL-2182 As of MariaDB 10.3 the third parameter - pad characters - is optional
|
||||
const string* pad = &fPad;
|
||||
if (fp.size() > 2)
|
||||
{
|
||||
pad = &fp[2]->data()->getStrVal(row, isNull);
|
||||
}
|
||||
// binPLen represents the number of bytes in pad
|
||||
size_t binPLen = pad->length();
|
||||
const char* posP = pad->c_str();
|
||||
// plen = the number of characters in pad
|
||||
size_t plen = cs->numchars(posP, posP+binPLen);
|
||||
if (plen == 0 || plen > strLen)
|
||||
return src;
|
||||
|
||||
if (isNull)
|
||||
return "";
|
||||
size_t byteCount = (padLength+1) * cs->mbmaxlen; // absolute maximun number of bytes
|
||||
char* buf = new char[byteCount];
|
||||
char* pBuf = buf;
|
||||
|
||||
// Rather than calling the wideconvert functions with a null buffer to
|
||||
// determine the size of buffer to allocate, we can be sure the wide
|
||||
// char string won't be longer than:
|
||||
strwclen = tstr.length(); // a guess to start with. This will be >= to the real count.
|
||||
int alen = len;
|
||||
byteCount = 0;
|
||||
|
||||
memcpy(pBuf, pos, binLen);
|
||||
byteCount += binLen;
|
||||
padLength -= strLen;
|
||||
pBuf += binLen;
|
||||
|
||||
if (strwclen > len)
|
||||
alen = strwclen;
|
||||
|
||||
int bufsize = alen + 1;
|
||||
|
||||
// Convert to wide characters. Do all further work in wide characters
|
||||
wchar_t* wcbuf = new wchar_t[bufsize];
|
||||
strwclen = utf8::idb_mbstowcs(wcbuf, tstr.c_str(), strwclen + 1);
|
||||
|
||||
unsigned int strSize = strwclen; // The number of significant characters
|
||||
const wchar_t* pWChar = wcbuf;
|
||||
|
||||
for (i = 0; *pWChar != '\0' && i < strwclen; ++pWChar, ++i)
|
||||
while (padLength >= plen)
|
||||
{
|
||||
memcpy(pBuf, posP, plen);
|
||||
padLength -= plen;
|
||||
byteCount += plen;
|
||||
pBuf += plen;
|
||||
}
|
||||
|
||||
strSize = i;
|
||||
|
||||
// If the incoming str is exactly the len of the result str,
|
||||
// return the original
|
||||
if (strSize == len)
|
||||
// Sometimes, in a case with multi-char pad, we need to add a partial pad
|
||||
if (padLength > 0)
|
||||
{
|
||||
return tstr;
|
||||
size_t partialSize = cs->charpos(posP, posP+plen, padLength);
|
||||
memcpy(pBuf, posP, partialSize);
|
||||
byteCount += partialSize;
|
||||
}
|
||||
|
||||
// If the incoming str is too big for the result str
|
||||
// truncate the widechar buffer and return as a string
|
||||
if (strSize > len)
|
||||
{
|
||||
// Trim the excess length of the buffer
|
||||
wstring trimmed = wstring(wcbuf, len);
|
||||
return utf8::wstring_to_utf8(trimmed.c_str());
|
||||
}
|
||||
|
||||
// This is the case where there's room to pad.
|
||||
|
||||
// Convert the pad string to wide
|
||||
padwclen = pad->length(); // A guess to start.
|
||||
int padbufsize = padwclen + 1;
|
||||
wchar_t* wcpad = new wchar_t[padbufsize];
|
||||
size_t padlen = utf8::idb_mbstowcs(wcpad, pad->c_str(), padwclen + 1);
|
||||
|
||||
// How many chars do we need?
|
||||
unsigned int padspace = len - strSize;
|
||||
|
||||
// Fill in the back of the buffer
|
||||
wchar_t* firstpadchar = wcbuf + strSize;
|
||||
|
||||
for (wchar_t* pch = wcbuf; pch < wcbuf + len && padlen > 0;)
|
||||
{
|
||||
// Truncate the number of fill chars if running out of space
|
||||
if (padlen > padspace)
|
||||
{
|
||||
padlen = padspace;
|
||||
}
|
||||
|
||||
// Move the fill chars to buffer
|
||||
for (wchar_t* padchar = wcpad; padchar < wcpad + padlen; ++padchar)
|
||||
{
|
||||
*firstpadchar++ = *padchar;
|
||||
}
|
||||
|
||||
padspace -= padlen;
|
||||
pch += padlen;
|
||||
}
|
||||
|
||||
wstring padded = wstring(wcbuf, len);
|
||||
|
||||
// Bug 5110 : strings were getting truncated since enough bytes not allocated.
|
||||
std::string ret(utf8::wstring_to_utf8(padded.c_str()));
|
||||
delete [] wcpad;
|
||||
delete [] wcbuf;
|
||||
|
||||
std::string ret(buf, byteCount);
|
||||
delete [] buf;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -49,95 +49,118 @@ CalpontSystemCatalog::ColType Func_rtrim::operationType(FunctionParm& fp, Calpon
|
||||
|
||||
|
||||
std::string Func_rtrim::getStrVal(rowgroup::Row& row,
|
||||
FunctionParm& fp,
|
||||
bool& isNull,
|
||||
execplan::CalpontSystemCatalog::ColType&)
|
||||
FunctionParm& fp,
|
||||
bool& isNull,
|
||||
execplan::CalpontSystemCatalog::ColType& type)
|
||||
{
|
||||
// The number of characters (not bytes) in our input tstr.
|
||||
// Not all of these are necessarily significant. We need to search for the
|
||||
// NULL terminator to be sure.
|
||||
size_t strwclen;
|
||||
// this holds the number of characters (not bytes) in ourtrim tstr.
|
||||
size_t trimwclen;
|
||||
|
||||
CHARSET_INFO* cs = type.getCharset();
|
||||
// The original string
|
||||
const string& tstr = fp[0]->data()->getStrVal(row, isNull);
|
||||
const string& src = fp[0]->data()->getStrVal(row, isNull);
|
||||
if (isNull)
|
||||
return "";
|
||||
if (src.empty() || src.length() == 0)
|
||||
return src;
|
||||
// binLen represents the number of bytes in src
|
||||
size_t binLen = src.length();
|
||||
const char* pos = src.c_str();
|
||||
const char* end = pos + binLen;
|
||||
// strLen = the number of characters in src
|
||||
size_t strLen = cs->numchars(pos, end);
|
||||
|
||||
// The trim characters.
|
||||
const string& trim = (fp.size() > 1 ? fp[1]->data()->getStrVal(row, isNull) : " ");
|
||||
// binTLen represents the number of bytes in trim
|
||||
size_t binTLen = trim.length();
|
||||
const char* posT = trim.c_str();
|
||||
// strTLen = the number of characters in trim
|
||||
size_t strTLen = cs->numchars(posT, posT+binTLen);
|
||||
if (strTLen == 0 || strTLen > strLen)
|
||||
return src;
|
||||
|
||||
if (isNull)
|
||||
return "";
|
||||
|
||||
if (tstr.empty() || tstr.length() == 0)
|
||||
return tstr;
|
||||
|
||||
// Rather than calling the wideconvert functions with a null buffer to
|
||||
// determine the size of buffer to allocate, we can be sure the wide
|
||||
// char string won't be longer than:
|
||||
strwclen = tstr.length(); // a guess to start with. This will be >= to the real count.
|
||||
int bufsize = strwclen + 1;
|
||||
|
||||
// Convert the string to wide characters. Do all further work in wide characters
|
||||
wchar_t* wcbuf = new wchar_t[bufsize];
|
||||
strwclen = utf8::idb_mbstowcs(wcbuf, tstr.c_str(), strwclen + 1);
|
||||
|
||||
// utf8::idb_mbstowcs could return -1 if there is bad chars
|
||||
if (strwclen == static_cast<size_t>(-1))
|
||||
strwclen = 0;
|
||||
|
||||
// Convert the trim string to wide
|
||||
trimwclen = trim.length(); // A guess to start.
|
||||
int trimbufsize = trimwclen + 1;
|
||||
wchar_t* wctrim = new wchar_t[trimbufsize];
|
||||
size_t trimlen = utf8::idb_mbstowcs(wctrim, trim.c_str(), trimwclen + 1);
|
||||
|
||||
// idb_mbstowcs could return -1 if there is bad chars
|
||||
if (trimlen == static_cast<size_t>(-1))
|
||||
trimlen = 0;
|
||||
|
||||
size_t trimCmpLen = trimlen * sizeof(wchar_t);
|
||||
|
||||
const wchar_t* oPtr = wcbuf; // To remember the start of the string
|
||||
const wchar_t* aPtr = oPtr;
|
||||
const wchar_t* aEnd = wcbuf + strwclen - 1;
|
||||
size_t trimCnt = 0;
|
||||
|
||||
if (trimlen > 0)
|
||||
if (binTLen == 1)
|
||||
{
|
||||
if (trimlen == 1)
|
||||
const char* ptr = pos;
|
||||
if (cs->use_mb()) // This is a multi-byte charset
|
||||
{
|
||||
// If trim is a single char, then don't spend the overhead for memcmp.
|
||||
wchar_t chr = wctrim[0];
|
||||
|
||||
while (aEnd >= aPtr && *aEnd == chr)
|
||||
const char* p = pos;
|
||||
uint32 l;
|
||||
// Multibyte characters in the string give us alignment problems
|
||||
// What we do here is skip past any multibyte characters. Whn
|
||||
// don with this loop, ptr is pointing to a singlebyte char that
|
||||
// is after all multibyte chars in the string, or to end.
|
||||
while (ptr < end)
|
||||
{
|
||||
--aEnd;
|
||||
++trimCnt;
|
||||
if ((l = my_ismbchar(cs, ptr, end))) // returns the number of bytes in the leading char or zero if one byte
|
||||
{
|
||||
ptr += l;
|
||||
p = ptr;
|
||||
}
|
||||
else
|
||||
{
|
||||
++ptr;
|
||||
}
|
||||
}
|
||||
ptr = p;
|
||||
}
|
||||
while (ptr < end && end[-1] == *posT)
|
||||
{
|
||||
--end;
|
||||
--binLen;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// An uncommon case where the space character is > 1 byte
|
||||
if (cs->use_mb()) // This is a multi-byte charset
|
||||
{
|
||||
// The problem is that the byte pattern at the end could
|
||||
// match memcmp, but not be correct since the first byte compared
|
||||
// may actually be a second or later byte from a previous char.
|
||||
|
||||
// We start at the beginning of the string and move forward
|
||||
// one character at a time until we reach the end. Then we can
|
||||
// safely compare and remove on character. Then back to the beginning
|
||||
// and try again.
|
||||
while (end - binTLen >= pos)
|
||||
{
|
||||
const char* p = pos;
|
||||
uint32 l;
|
||||
while (p + binTLen < end)
|
||||
{
|
||||
if ((l = my_ismbchar(cs, p, end))) // returns the number of bytes in the leading char or zero if one byte
|
||||
p += l;
|
||||
else
|
||||
++p;
|
||||
}
|
||||
if (p + binTLen == end && memcmp(p,posT,binTLen) == 0)
|
||||
{
|
||||
end -= binTLen;
|
||||
binLen -= binTLen;
|
||||
}
|
||||
else
|
||||
{
|
||||
break; // We've run out of places to look
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
aEnd -= (trimlen - 1); // So we don't compare past the end of the string.
|
||||
|
||||
while (aPtr <= aEnd && !memcmp(aEnd, wctrim, trimCmpLen))
|
||||
// This implies we have a single byte charset and a multibyte
|
||||
// space character.
|
||||
// Should never get here, since rtrim only trims space characters
|
||||
// Included for completeness.
|
||||
while (end-binTLen >= pos && memcmp(end-binTLen,posT,binTLen) == 0)
|
||||
{
|
||||
aEnd -= trimCmpLen;
|
||||
trimCnt += trimlen;
|
||||
end -= binTLen;
|
||||
binLen -= binTLen;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
size_t aLen = strwclen - trimCnt;
|
||||
wstring trimmed = wstring(aPtr, aLen);
|
||||
// Turn back to a string
|
||||
std::string ret(utf8::wstring_to_utf8(trimmed.c_str()));
|
||||
delete [] wctrim;
|
||||
delete [] wcbuf;
|
||||
std::string ret(pos, binLen);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
} // namespace funcexp
|
||||
// vim:ts=4 sw=4:
|
||||
|
||||
|
@ -73,7 +73,7 @@ CalpontSystemCatalog::ColType Func_strcmp::operationType(FunctionParm& fp, Calpo
|
||||
int64_t Func_strcmp::getIntVal(rowgroup::Row& row,
|
||||
FunctionParm& fp,
|
||||
bool& isNull,
|
||||
execplan::CalpontSystemCatalog::ColType& op_ct)
|
||||
execplan::CalpontSystemCatalog::ColType& type)
|
||||
{
|
||||
CHARSET_INFO* cs = fp[0]->data()->resultType().getCharset();
|
||||
const string& str = fp[0]->data()->getStrVal(row, isNull);
|
||||
@ -88,9 +88,9 @@ int64_t Func_strcmp::getIntVal(rowgroup::Row& row,
|
||||
std::string Func_strcmp::getStrVal(rowgroup::Row& row,
|
||||
FunctionParm& fp,
|
||||
bool& isNull,
|
||||
execplan::CalpontSystemCatalog::ColType& op_ct)
|
||||
execplan::CalpontSystemCatalog::ColType& type)
|
||||
{
|
||||
uint64_t val = getIntVal(row, fp, isNull, op_ct);
|
||||
uint64_t val = getIntVal(row, fp, isNull, type);
|
||||
|
||||
if (val > 0)
|
||||
return string("1");
|
||||
|
@ -86,31 +86,37 @@ std::string Func_trim::getStrVal(rowgroup::Row& row,
|
||||
--binLen;
|
||||
}
|
||||
// Trim trailing
|
||||
while (end > pos && *end == *posT)
|
||||
const char* ptr = pos;
|
||||
if (cs->use_mb()) // This is a multi-byte charset
|
||||
{
|
||||
const char* p = pos;
|
||||
uint32 l;
|
||||
// Multibyte characters in the string give us alignment problems
|
||||
// What we do here is skip past any multibyte characters. Whn
|
||||
// don with this loop, ptr is pointing to a singlebyte char that
|
||||
// is after all multibyte chars in the string, or to end.
|
||||
while (ptr < end)
|
||||
{
|
||||
if ((l = my_ismbchar(cs, ptr, end))) // returns the number of bytes in the leading char or zero if one byte
|
||||
{
|
||||
ptr += l;
|
||||
p = ptr;
|
||||
}
|
||||
else
|
||||
{
|
||||
++ptr;
|
||||
}
|
||||
}
|
||||
ptr = p;
|
||||
}
|
||||
while (ptr < end && end[-1] == *posT)
|
||||
{
|
||||
--end;
|
||||
--binLen;
|
||||
}
|
||||
}
|
||||
else if (!cs->use_mb())
|
||||
{
|
||||
// This is a one byte per char charset with multiple char trim.
|
||||
// Trim leading
|
||||
while (pos+binTLen <= end && memcmp(pos,posT,binTLen) == 0)
|
||||
{
|
||||
pos += binTLen;
|
||||
binLen -= binTLen;
|
||||
}
|
||||
// Trim trailing
|
||||
while (end-binTLen >= pos && memcmp(end-binTLen,posT,binTLen) == 0)
|
||||
{
|
||||
end -= binTLen;
|
||||
binLen -= binTLen;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// We're using a multi-byte charset
|
||||
// Trim leading is easy
|
||||
while (pos+binTLen <= end && memcmp(pos,posT,binTLen) == 0)
|
||||
{
|
||||
@ -119,33 +125,45 @@ std::string Func_trim::getStrVal(rowgroup::Row& row,
|
||||
}
|
||||
|
||||
// Trim trailing
|
||||
// The problem is that the byte pattern at the end could
|
||||
// match memcmp, but not be correct since the first byte compared
|
||||
// may actually be a second or later byte from a previous char.
|
||||
|
||||
// We start at the beginning of the string and move forward
|
||||
// one character at a time until we reach the end. Then we can
|
||||
// safely compare.
|
||||
while (end - binTLen >= pos)
|
||||
if (cs->use_mb()) // This is a multi-byte charset
|
||||
{
|
||||
const char* p = pos;
|
||||
uint32 l;
|
||||
while (p + binTLen < end)
|
||||
// The problem is that the byte pattern at the end could
|
||||
// match memcmp, but not be correct since the first byte compared
|
||||
// may actually be a second or later byte from a previous char.
|
||||
|
||||
// We start at the beginning of the string and move forward
|
||||
// one character at a time until we reach the end. Then we can
|
||||
// safely compare and remove on character. Then back to the beginning
|
||||
// and try again.
|
||||
while (end - binTLen >= pos)
|
||||
{
|
||||
if ((l = my_ismbchar(cs, p, end))) // returns the number of bytes in the leading char or zero if one byte
|
||||
p += l;
|
||||
const char* p = pos;
|
||||
uint32 l;
|
||||
while (p + binTLen < end)
|
||||
{
|
||||
if ((l = my_ismbchar(cs, p, end))) // returns the number of bytes in the leading char or zero if one byte
|
||||
p += l;
|
||||
else
|
||||
++p;
|
||||
}
|
||||
if (p + binTLen == end && memcmp(p,posT,binTLen) == 0)
|
||||
{
|
||||
end -= binTLen;
|
||||
binLen -= binTLen;
|
||||
}
|
||||
else
|
||||
++p;
|
||||
{
|
||||
break; // We've run out of places to look
|
||||
}
|
||||
}
|
||||
if (p + binTLen == end && memcmp(p,posT,binTLen) == 0)
|
||||
}
|
||||
else
|
||||
{
|
||||
while (end-binTLen >= pos && memcmp(end-binTLen,posT,binTLen) == 0)
|
||||
{
|
||||
end -= binTLen;
|
||||
binLen -= binTLen;
|
||||
}
|
||||
else
|
||||
{
|
||||
break; // We've run out of places to look
|
||||
}
|
||||
}
|
||||
}
|
||||
// Turn back to a string
|
||||
|
Reference in New Issue
Block a user