MCOL-3536 collation

2025-07-30 19:23:07 +03:00 · 2020-06-04 16:15:06 -05:00
parent 889094a23d
commit bacd81d32a
11 changed files with 349 additions and 552 deletions
--- a/utils/funcexp/func_concat.cpp
+++ b/utils/funcexp/func_concat.cpp
@ -58,7 +58,10 @@ string Func_concat::getStrVal(Row& row,
 	string ret;
    string tmp;
    stringValue(parm[0], row, isNull, ret);
-
+    
+    // TODO: do a better job of cutting down the number re-allocations.
+    // look at Item_func_concat::realloc_result for ideas and use 
+    // std::string:resize() appropriatly.
    for ( unsigned int id = 1 ; id < parm.size() ; id++)
    {
 		stringValue(parm[id], row, isNull, tmp);
--- a/utils/funcexp/func_concat_ws.cpp
+++ b/utils/funcexp/func_concat_ws.cpp
@ -20,6 +20,9 @@
 *
 *
 ****************************************************************************/
+#include <mariadb.h>
+#undef set_bits  // mariadb.h defines set_bits, which is incompatible with boost
+#include <my_sys.h>

 #include <string>
 using namespace std;
@ -47,13 +50,16 @@ CalpontSystemCatalog::ColType Func_concat_ws::operationType(FunctionParm& fp, Ca
 string Func_concat_ws::getStrVal(Row& row,
                                 FunctionParm& parm,
                                 bool& isNull,
-                                 CalpontSystemCatalog::ColType&)
+                                 CalpontSystemCatalog::ColType& type)
 {
 	string delim;
    stringValue(parm[0], row, isNull, delim);
    if (isNull)
        return "";

+    // TODO: I don't think we need wide chars here.
+    // Concatenation works without see Server implementation.
+#if 0    
    wstring wstr;
    size_t strwclen = utf8::idb_mbstowcs(0, delim.c_str(), 0) + 1;
    wchar_t* wcbuf = new wchar_t[strwclen];
@ -94,10 +100,11 @@ string Func_concat_ws::getStrVal(Row& row,
    delete [] outbuf;
    delete [] wcbuf;
    return ret;
-
-#if 0
+#endif
    string str;
    string tmp;
+    // Work on reallocation. use std::string::resize() to
+    // grab larger chunks in some intellegent manner.
    for ( uint32_t i = 1 ; i < parm.size() ; i++)
    {
 		stringValue(parm[i], row, isNull, tmp);
@ -119,7 +126,6 @@ string Func_concat_ws::getStrVal(Row& row,
        isNull = false;

    return str;
-#endif
 }


--- a/utils/funcexp/func_left.cpp
+++ b/utils/funcexp/func_left.cpp
@ -51,36 +51,34 @@ CalpontSystemCatalog::ColType Func_left::operationType(FunctionParm& fp, Calpont
 std::string Func_left::getStrVal(rowgroup::Row& row,
                                 FunctionParm& fp,
                                 bool& isNull,
-                                 execplan::CalpontSystemCatalog::ColType&)
+                                 execplan::CalpontSystemCatalog::ColType& type)
 {
-    const string& tstr = fp[0]->data()->getStrVal(row, isNull);
-
+    CHARSET_INFO* cs = type.getCharset();
+    // The original string
+    const string& src = fp[0]->data()->getStrVal(row, isNull);
    if (isNull)
        return "";
+    if (src.empty() || src.length() == 0)
+        return src;
+    // binLen represents the number of bytes in src
+    size_t binLen = src.length();
+    const char* pos = src.c_str();
+    const char* end = pos + binLen;

-    size_t strwclen = utf8::idb_mbstowcs(0, tstr.c_str(), 0) + 1;
-    wchar_t* wcbuf = new wchar_t[strwclen];
-    strwclen = utf8::idb_mbstowcs(wcbuf, tstr.c_str(), strwclen);
-    wstring str(wcbuf, strwclen);
-
-    int64_t pos = fp[1]->data()->getIntVal(row, isNull) - 1;
-
-    if (isNull)
+    size_t trimLength = fp[1]->data()->getUintVal(row, isNull);
+    if (isNull || trimLength <= 0)
        return "";

-    if (pos == -1)  // pos == 0
-        return "";
+    size_t charPos;

-    wstring out = str.substr(0, pos + 1);
-    size_t strmblen = utf8::idb_wcstombs(0, out.c_str(), 0) + 1;
-    char* outbuf = new char[strmblen];
-    strmblen = utf8::idb_wcstombs(outbuf, out.c_str(), strmblen);
-    std::string ret(outbuf, strmblen);
-    delete [] outbuf;
-    delete [] wcbuf;
+    if ((binLen <= trimLength) ||
+        (binLen <= (charPos= cs->charpos(pos, end, trimLength))))
+    {
+        return src;
+    }
+
+    std::string ret(pos, charPos);
    return ret;
-
-//	return str.substr(0, pos+1);
 }


--- a/utils/funcexp/func_lpad.cpp
+++ b/utils/funcexp/func_lpad.cpp
@ -56,191 +56,80 @@ CalpontSystemCatalog::ColType Func_lpad::operationType(FunctionParm& fp, Calpont
 std::string Func_lpad::getStrVal(rowgroup::Row& row,
                                 FunctionParm& fp,
                                 bool& isNull,
-                                 execplan::CalpontSystemCatalog::ColType&)
+                                 execplan::CalpontSystemCatalog::ColType& type)
 {
-    unsigned i;
-    // The number of characters (not bytes) in our input str.
-    // Not all of these are necessarily significant. We need to search for the
-    // NULL terminator to be sure.
-    size_t strwclen;
-    // this holds the number of characters (not bytes) in our pad str.
-    size_t padwclen;
-
+    CHARSET_INFO* cs = type.getCharset();
    // The original string
-    const string& tstr = fp[0]->data()->getStrVal(row, isNull);
+    const string& src = fp[0]->data()->getStrVal(row, isNull);
+    if (isNull)
+        return "";
+    if (src.empty() || src.length() == 0)
+        return src;
+    // binLen represents the number of bytes in src
+    size_t binLen = src.length();
+    const char* pos = src.c_str();
+    const char* end = pos + binLen;
+    // strLen = the number of characters in src
+    size_t strLen = cs->numchars(pos, end);

-    // The result length in number of characters
-    size_t len = 0;
-
-    switch (fp[1]->data()->resultType().colDataType)
+    // In the case where someone entered pad length as a quoted string,
+    // it may be interpreted by columnstore to be an actual string
+    // and stored in fResult.int as a htonl of that string,
+    // However fResult.double is always correct, so we'll use that.
+    size_t padLength = (size_t)fp[1]->data()->getDoubleVal(row, isNull);
+    if (isNull || padLength <= 0)
+        return "";
+    if (padLength > (size_t)INT_MAX32)
+        padLength = (size_t)INT_MAX32;
+    
+    if (padLength < strLen)
    {
-        case execplan::CalpontSystemCatalog::BIGINT:
-        case execplan::CalpontSystemCatalog::INT:
-        case execplan::CalpontSystemCatalog::MEDINT:
-        case execplan::CalpontSystemCatalog::TINYINT:
-        case execplan::CalpontSystemCatalog::SMALLINT:
-        {
-            len = fp[1]->data()->getIntVal(row, isNull);
-        }
-        break;
-
-        case execplan::CalpontSystemCatalog::UBIGINT:
-        case execplan::CalpontSystemCatalog::UINT:
-        case execplan::CalpontSystemCatalog::UMEDINT:
-        case execplan::CalpontSystemCatalog::UTINYINT:
-        case execplan::CalpontSystemCatalog::USMALLINT:
-        {
-            len = fp[1]->data()->getUintVal(row, isNull);
-        }
-        break;
-
-        case execplan::CalpontSystemCatalog::FLOAT:
-        case execplan::CalpontSystemCatalog::UFLOAT:
-        case execplan::CalpontSystemCatalog::DOUBLE:
-        case execplan::CalpontSystemCatalog::UDOUBLE:
-        case execplan::CalpontSystemCatalog::DECIMAL:
-        case execplan::CalpontSystemCatalog::UDECIMAL:
-        {
-            double value = fp[1]->data()->getDoubleVal(row, isNull);
-
-            if (value > 0)
-                value += 0.5;
-            else if (value < 0)
-                value -= 0.5;
-
-            int64_t ret = (int64_t) value;
-
-            if (value > (double) numeric_limits<int64_t>::max())
-                ret = numeric_limits<int64_t>::max();
-            else if (value < (double) (numeric_limits<int64_t>::min() + 2))
-                ret = numeric_limits<int64_t>::min() + 2; // IDB min for bigint
-
-            len = ret;
-        }
-        break;
-
-        case execplan::CalpontSystemCatalog::CHAR:
-        case execplan::CalpontSystemCatalog::VARCHAR:
-        {
-            const string& strval = fp[1]->data()->getStrVal(row, isNull);
-            len = strtol(strval.c_str(), NULL, 10);
-            break;
-        }
-
-        default:
-        {
-            std::ostringstream oss;
-            oss << "lpad parameter 2 must be numeric, not  " << execplan::colDataTypeToString(fp[1]->data()->resultType().colDataType);
-            throw logging::IDBExcept(oss.str(), logging::ERR_DATATYPE_NOT_SUPPORT);
-
-        }
+        binLen = cs->charpos(pos, end, padLength);
+        std::string ret(pos, binLen);
+        return ret;
    }

-    if (len < 1)
-        return "";
-
-    // MCOL-2182 As of MariaDB 10.3 the third parameter - pad characters - is optional
    // The pad characters.
    const string* pad = &fPad;
    if (fp.size() > 2)
    {
        pad = &fp[2]->data()->getStrVal(row, isNull);
    }
+    // binPLen represents the number of bytes in pad
+    size_t binPLen = pad->length();
+    const char* posP = pad->c_str();
+    // plen = the number of characters in pad
+    size_t plen = cs->numchars(posP, posP+binPLen);
+    if (plen == 0 || plen > strLen)
+        return src;

-    if (isNull)
-        return "";
+    size_t byteCount = (padLength+1) * cs->mbmaxlen; // absolute maximun number of bytes
+    char* buf = new char[byteCount];
+    char* pBuf = buf;

-    // Rather than calling the wideconvert functions with a null buffer to
-    // determine the size of buffer to allocate, we can be sure the wide
-    // char string won't be longer than
-    strwclen = tstr.length(); // a guess to start with. This will be >= to the real count.
-    size_t alen = len;
-
-    if (strwclen > len)
-        alen = strwclen;
-
-    size_t bufsize = alen + 1;
-
-    // Convert to wide characters. Do all further work in wide characters
-    wchar_t* wcbuf = new wchar_t[bufsize];
-    strwclen = utf8::idb_mbstowcs(wcbuf, tstr.c_str(), strwclen + 1);
-
-    size_t strSize = strwclen;    // The number of significant characters
-    const wchar_t* pWChar = wcbuf;
-
-    for (i = 0; *pWChar != '\0' && i < strwclen; ++pWChar, ++i)
+    padLength -= strLen;
+    byteCount = 0;
+    
+    while (padLength >= plen)
    {
+        memcpy(pBuf, posP, plen);
+        padLength -= plen;
+        byteCount += plen;
+        pBuf += plen;
    }
-
-    strSize = i;
-
-    // If the incoming str is exactly the len of the result str,
-    // return the original
-    if (strSize == len)
+    // Sometimes, in a case with multi-char pad, we need to add a partial pad
+    if (padLength > 0)
    {
-        return tstr;
+        size_t partialSize = cs->charpos(posP, posP+plen, padLength);
+        memcpy(pBuf, posP, partialSize);
+        byteCount += partialSize;
+        pBuf += partialSize;
    }
-
-    // If the incoming str is too big for the result str
-    // truncate the widechar buffer and return as a string
-    if (strSize > len)
-    {
-        // Trim the excess length of the buffer
-        wstring trimmed = wstring(wcbuf, len);
-        return utf8::wstring_to_utf8(trimmed.c_str());
-    }
-
-    // This is the case where there's room to pad.
-
-    // Convert the pad string to wide
-    padwclen = pad->length();  // A guess to start.
-    size_t padbufsize = padwclen + 1;
-    wchar_t* wcpad = new wchar_t[padbufsize];
-    // padwclen+1 is for giving count for the terminating null
-    size_t padlen = utf8::idb_mbstowcs(wcpad, pad->c_str(), padwclen + 1);
-
-    // How many chars do we need?
-    size_t padspace = len - strSize;
-
-    // Shift the contents of wcbuf to the right.
-    wchar_t* startofstr = wcbuf + padspace;
-
-    // Move the original string to the right to make room for the pad chars
-    // Testing has shown that this loop is faster than memmove
-    wchar_t* newchar = wcbuf + len;     // Last spot to put a char in buf
-    wchar_t* pChar = wcbuf + strSize;   // terminal NULL of our str
-
-    while (pChar >= wcbuf)
-    {
-        *newchar-- = *pChar--;
-    }
-
-    // Fill in the front of the buffer with the pad chars
-    wchar_t* firstpadchar = wcbuf;
-
-    for (wchar_t* pch = wcbuf; pch < startofstr && padlen > 0;)
-    {
-        // Truncate the number of fill chars if running out of space
-        if (padlen > padspace)
-        {
-            padlen = padspace;
-        }
-
-        // Move the fill chars to buffer
-        for (wchar_t* padchar = wcpad; padchar < wcpad + padlen; ++padchar)
-        {
-            *firstpadchar++ = *padchar;
-        }
-
-        padspace -= padlen;
-        pch += padlen;
-    }
-
-    wstring padded = wstring(wcbuf, len);
-    // Turn back to a string
-    std::string ret(utf8::wstring_to_utf8(padded.c_str()));
-    delete [] wcpad;
-    delete [] wcbuf;
+    memcpy(pBuf, pos, binLen);
+    byteCount += binLen;
+    
+    std::string ret(buf, byteCount);
+    delete [] buf;
    return ret;
 }

--- a/utils/funcexp/func_ltrim.cpp
+++ b/utils/funcexp/func_ltrim.cpp
@ -50,89 +50,56 @@ CalpontSystemCatalog::ColType Func_ltrim::operationType(FunctionParm& fp, Calpon


 std::string Func_ltrim::getStrVal(rowgroup::Row& row,
-                                  FunctionParm& fp,
-                                  bool& isNull,
-                                  execplan::CalpontSystemCatalog::ColType&)
+                                 FunctionParm& fp,
+                                 bool& isNull,
+                                 execplan::CalpontSystemCatalog::ColType& type)
 {
-    // The number of characters (not bytes) in our input tstr.
-    // Not all of these are necessarily significant. We need to search for the
-    // NULL terminator to be sure.
-    size_t strwclen;
-    // this holds the number of characters (not bytes) in ourtrim tstr.
-    size_t trimwclen;
-
+    CHARSET_INFO* cs = type.getCharset();
    // The original string
-    const string& tstr = fp[0]->data()->getStrVal(row, isNull);
+    const string& src = fp[0]->data()->getStrVal(row, isNull);
+    if (isNull)
+        return "";
+    if (src.empty() || src.length() == 0)
+        return src;
+    // binLen represents the number of bytes in src
+    size_t binLen = src.length();
+    const char* pos = src.c_str();
+    const char* end = pos + binLen;
+    // strLen = the number of characters in src
+    size_t strLen = cs->numchars(pos, end);

    // The trim characters.
    const string& trim = (fp.size() > 1 ? fp[1]->data()->getStrVal(row, isNull) : " ");
+    // binTLen represents the number of bytes in trim
+    size_t binTLen = trim.length();
+    const char* posT = trim.c_str();
+    // strTLen = the number of characters in trim
+    size_t strTLen = cs->numchars(posT, posT+binTLen);
+    if (strTLen == 0 || strTLen > strLen)
+        return src;

-    if (isNull)
-        return "";
-
-    if (tstr.empty() || tstr.length() == 0)
-        return tstr;
-
-    // Rather than calling the wideconvert functions with a null buffer to
-    // determine the size of buffer to allocate, we can be sure the wide
-    // char string won't be longer than:
-    strwclen = tstr.length(); // a guess to start with. This will be >= to the real count.
-    int bufsize = strwclen + 1;
-
-    // Convert the string to wide characters. Do all further work in wide characters
-    wchar_t* wcbuf = new wchar_t[bufsize];
-    strwclen = utf8::idb_mbstowcs(wcbuf, tstr.c_str(), strwclen + 1);
-
-    // idb_mbstowcs can return -1 if there is bad mbs char in tstr
-    if (strwclen == static_cast<size_t>(-1))
-        strwclen = 0;
-
-    // Convert the trim string to wide
-    trimwclen = trim.length();  // A guess to start.
-    int trimbufsize = trimwclen + 1;
-    wchar_t* wctrim = new wchar_t[trimbufsize];
-    size_t trimlen = utf8::idb_mbstowcs(wctrim, trim.c_str(), trimwclen + 1);
-
-    // idb_mbstowcs can return -1 if there is bad mbs char in tstr
-    if (trimlen == static_cast<size_t>(-1))
-        trimlen = 0;
-
-    size_t trimCmpLen = trimlen * sizeof(wchar_t);
-
-    const wchar_t* oPtr = wcbuf;      // To remember the start of the string
-    const wchar_t* aPtr = oPtr;
-    const wchar_t* aEnd = wcbuf + strwclen - 1;
-
-    if (trimlen > 0)
+    if (binTLen == 1)
    {
-        if (trimlen == 1)
+        // If the trim string is 1 byte, don't waste cpu for memcmp
+        while (pos < end && *pos == *posT)
        {
-            // If trim is a single char, then don't spend the overhead for memcmp.
-            wchar_t chr = wctrim[0];
-
-            while (aPtr <= aEnd && *aPtr == chr)
-                aPtr++;
-        }
-        else
-        {
-            aEnd -= (trimlen - 1); // So we don't compare past the end of the string.
-
-            while (aPtr <= aEnd && !memcmp(aPtr, wctrim, trimCmpLen))
-                aPtr += trimlen;
+            ++pos;
+            --binLen;
+        }
+    }
+    else
+    {
+        while (pos+binTLen <= end && memcmp(pos,posT,binTLen) == 0)
+        {
+            pos += binTLen;
+            binLen -= binTLen;
        }
    }
-
-    // Bug 5110 - error in allocating enough memory for utf8 chars
-    size_t aLen = strwclen - (aPtr - oPtr);
-    wstring trimmed = wstring(aPtr, aLen);
    // Turn back to a string
-    std::string ret(utf8::wstring_to_utf8(trimmed.c_str()));
-    delete [] wctrim;
-    delete [] wcbuf;
+    std::string ret(pos, binLen);
    return ret;
 }

-
 } // namespace funcexp
 // vim:ts=4 sw=4:

--- a/utils/funcexp/func_repeat.cpp
+++ b/utils/funcexp/func_repeat.cpp
@ -59,7 +59,7 @@ CalpontSystemCatalog::ColType Func_repeat::operationType(FunctionParm& fp, Calpo
 std::string Func_repeat::getStrVal(rowgroup::Row& row,
                                   FunctionParm& fp,
                                   bool& isNull,
-                                   execplan::CalpontSystemCatalog::ColType& op_ct)
+                                   execplan::CalpontSystemCatalog::ColType& type)
 {
 	string str;

--- a/utils/funcexp/func_right.cpp
+++ b/utils/funcexp/func_right.cpp
@ -51,42 +51,33 @@ CalpontSystemCatalog::ColType Func_right::operationType(FunctionParm& fp, Calpon
 std::string Func_right::getStrVal(rowgroup::Row& row,
                                  FunctionParm& fp,
                                  bool& isNull,
-                                  execplan::CalpontSystemCatalog::ColType&)
+                                  execplan::CalpontSystemCatalog::ColType& type)
 {
-    const string& tstr = fp[0]->data()->getStrVal(row, isNull);
-
+    CHARSET_INFO* cs = type.getCharset();
+    // The original string
+    const string& src = fp[0]->data()->getStrVal(row, isNull);
    if (isNull)
        return "";
+    if (src.empty() || src.length() == 0)
+        return src;
+    // binLen represents the number of bytes in src
+    size_t binLen = src.length();
+    const char* pos = src.c_str();
+    const char* end = pos + binLen;

-    int64_t pos = fp[1]->data()->getIntVal(row, isNull);
-
-    if (isNull)
+    size_t trimLength = fp[1]->data()->getUintVal(row, isNull);
+    if (isNull || trimLength <= 0)
        return "";

-    if (pos == -1)  // pos == 0
-        return "";
+    size_t start = cs->numchars(pos, end); // Here, start is number of characters in src
+    if (start <= trimLength)
+        return src;
+    start = cs->charpos(pos, end, start - trimLength); // Here, start becomes number of bytes into src to start copying

-    size_t strwclen = utf8::idb_mbstowcs(0, tstr.c_str(), 0) + 1;
-    //wchar_t wcbuf[strwclen];
-    wchar_t* wcbuf = new wchar_t[strwclen];
-    strwclen = utf8::idb_mbstowcs(wcbuf, tstr.c_str(), strwclen);
-    wstring str(wcbuf, strwclen);
-
-    if ( (unsigned) pos >= strwclen )
-        pos = strwclen;
-
-    wstring out = str.substr(strwclen - pos, strwclen);
-    size_t strmblen = utf8::idb_wcstombs(0, out.c_str(), 0) + 1;
-    //char outbuf[strmblen];
-    char* outbuf = new char[strmblen];
-    strmblen = utf8::idb_wcstombs(outbuf, out.c_str(), strmblen);
-    std::string ret(outbuf, strmblen);
-    delete [] outbuf;
-    delete [] wcbuf;
+    std::string ret(pos+start, binLen-start);
    return ret;
 }

-
 } // namespace funcexp
 // vim:ts=4 sw=4:

--- a/utils/funcexp/func_rpad.cpp
+++ b/utils/funcexp/func_rpad.cpp
@ -55,179 +55,81 @@ CalpontSystemCatalog::ColType Func_rpad::operationType(FunctionParm& fp, Calpont
 std::string Func_rpad::getStrVal(rowgroup::Row& row,
                                 FunctionParm& fp,
                                 bool& isNull,
-                                 execplan::CalpontSystemCatalog::ColType&)
+                                 execplan::CalpontSystemCatalog::ColType& type)
 {
-    unsigned i;
-    // The number of characters (not bytes) in our input str.
-    // Not all of these are necessarily significant. We need to search for the
-    // NULL terminator to be sure.
-    size_t strwclen;
-    // this holds the number of characters (not bytes) in our pad str.
-    size_t padwclen;
-
+    CHARSET_INFO* cs = type.getCharset();
    // The original string
-    const string& tstr = fp[0]->data()->getStrVal(row, isNull);
+    const string& src = fp[0]->data()->getStrVal(row, isNull);
+    if (isNull)
+        return "";
+    if (src.empty() || src.length() == 0)
+        return src;
+    // binLen represents the number of bytes in src
+    size_t binLen = src.length();
+    const char* pos = src.c_str();
+    const char* end = pos + binLen;
+    // strLen = the number of characters in src
+    size_t strLen = cs->numchars(pos, end);

-    // The result length in number of characters
-    size_t len = 0;
-
-    switch (fp[1]->data()->resultType().colDataType)
+    // In the case where someone entered pad length as a quoted string,
+    // it may be interpreted by columnstore to be an actual string
+    // and stored in fResult.int as a htonl of that string,
+    // However fResult.double is always correct, so we'll use that.
+    size_t padLength = (size_t)fp[1]->data()->getDoubleVal(row, isNull);
+    if (isNull || padLength <= 0)
+        return "";
+    if (padLength > (size_t)INT_MAX32)
+        padLength = (size_t)INT_MAX32;
+    
+    if (padLength < strLen)
    {
-        case execplan::CalpontSystemCatalog::BIGINT:
-        case execplan::CalpontSystemCatalog::INT:
-        case execplan::CalpontSystemCatalog::MEDINT:
-        case execplan::CalpontSystemCatalog::TINYINT:
-        case execplan::CalpontSystemCatalog::SMALLINT:
-        {
-            len = fp[1]->data()->getIntVal(row, isNull);
-        }
-        break;
-
-        case execplan::CalpontSystemCatalog::UBIGINT:
-        case execplan::CalpontSystemCatalog::UINT:
-        case execplan::CalpontSystemCatalog::UMEDINT:
-        case execplan::CalpontSystemCatalog::UTINYINT:
-        case execplan::CalpontSystemCatalog::USMALLINT:
-        {
-            len = fp[1]->data()->getUintVal(row, isNull);
-        }
-        break;
-
-        case execplan::CalpontSystemCatalog::FLOAT:
-        case execplan::CalpontSystemCatalog::UFLOAT:
-        case execplan::CalpontSystemCatalog::DOUBLE:
-        case execplan::CalpontSystemCatalog::UDOUBLE:
-        case execplan::CalpontSystemCatalog::DECIMAL:
-        case execplan::CalpontSystemCatalog::UDECIMAL:
-        {
-            double value = fp[1]->data()->getDoubleVal(row, isNull);
-
-            if (value > 0)
-                value += 0.5;
-            else if (value < 0)
-                value -= 0.5;
-            else if (value < 0)
-                value -= 0.5;
-
-            int64_t ret = (int64_t) value;
-
-            if (value > (double) numeric_limits<int64_t>::max())
-                ret = numeric_limits<int64_t>::max();
-            else if (value < (double) (numeric_limits<int64_t>::min() + 2))
-                ret = numeric_limits<int64_t>::min() + 2; // IDB min for bigint
-
-            len = ret;
-        }
-        break;
-
-        case execplan::CalpontSystemCatalog::CHAR:
-        case execplan::CalpontSystemCatalog::VARCHAR:
-        {
-            const string& strval = fp[1]->data()->getStrVal(row, isNull);
-            len = strtol(strval.c_str(), NULL, 10);
-            break;
-        }
-
-        default:
-        {
-            std::ostringstream oss;
-            oss << "lpad parameter 2 must be numeric, not  " << execplan::colDataTypeToString(fp[1]->data()->resultType().colDataType);
-            throw logging::IDBExcept(oss.str(), logging::ERR_DATATYPE_NOT_SUPPORT);
-        }
+        binLen = cs->charpos(pos, end, padLength);
+        std::string ret(pos, binLen);
+        return ret;
    }

-    if (len < 1)
-        return "";
-
    // The pad characters.
-    // MCOL-2182 As of MariaDB 10.3 the third parameter - pad characters - is optional
    const string* pad = &fPad;
    if (fp.size() > 2)
    {
        pad = &fp[2]->data()->getStrVal(row, isNull);
    }
+    // binPLen represents the number of bytes in pad
+    size_t binPLen = pad->length();
+    const char* posP = pad->c_str();
+    // plen = the number of characters in pad
+    size_t plen = cs->numchars(posP, posP+binPLen);
+    if (plen == 0 || plen > strLen)
+        return src;

-    if (isNull)
-        return "";
+    size_t byteCount = (padLength+1) * cs->mbmaxlen; // absolute maximun number of bytes
+    char* buf = new char[byteCount];
+    char* pBuf = buf;

-    // Rather than calling the wideconvert functions with a null buffer to
-    // determine the size of buffer to allocate, we can be sure the wide
-    // char string won't be longer than:
-    strwclen = tstr.length(); // a guess to start with. This will be >= to the real count.
-    int alen = len;
+    byteCount = 0;
+    
+    memcpy(pBuf, pos, binLen);
+    byteCount += binLen;
+    padLength -= strLen;
+    pBuf += binLen;

-    if (strwclen > len)
-        alen = strwclen;
-
-    int bufsize = alen + 1;
-
-    // Convert to wide characters. Do all further work in wide characters
-    wchar_t* wcbuf = new wchar_t[bufsize];
-    strwclen = utf8::idb_mbstowcs(wcbuf, tstr.c_str(), strwclen + 1);
-
-    unsigned int strSize = strwclen;    // The number of significant characters
-    const wchar_t* pWChar = wcbuf;
-
-    for (i = 0; *pWChar != '\0' && i < strwclen; ++pWChar, ++i)
+    while (padLength >= plen)
    {
+        memcpy(pBuf, posP, plen);
+        padLength -= plen;
+        byteCount += plen;
+        pBuf += plen;
    }
-
-    strSize = i;
-
-    // If the incoming str is exactly the len of the result str,
-    // return the original
-    if (strSize == len)
+    // Sometimes, in a case with multi-char pad, we need to add a partial pad
+    if (padLength > 0)
    {
-        return tstr;
+        size_t partialSize = cs->charpos(posP, posP+plen, padLength);
+        memcpy(pBuf, posP, partialSize);
+        byteCount += partialSize;
    }
-
-    // If the incoming str is too big for the result str
-    // truncate the widechar buffer and return as a string
-    if (strSize > len)
-    {
-        // Trim the excess length of the buffer
-        wstring trimmed = wstring(wcbuf, len);
-        return utf8::wstring_to_utf8(trimmed.c_str());
-    }
-
-    // This is the case where there's room to pad.
-
-    // Convert the pad string to wide
-    padwclen = pad->length();  // A guess to start.
-    int padbufsize = padwclen + 1;
-    wchar_t* wcpad = new wchar_t[padbufsize];
-    size_t padlen = utf8::idb_mbstowcs(wcpad, pad->c_str(), padwclen + 1);
-
-    // How many chars do we need?
-    unsigned int padspace = len - strSize;
-
-    // Fill in the back of the buffer
-    wchar_t* firstpadchar = wcbuf + strSize;
-
-    for (wchar_t* pch = wcbuf; pch < wcbuf + len && padlen > 0;)
-    {
-        // Truncate the number of fill chars if running out of space
-        if (padlen > padspace)
-        {
-            padlen = padspace;
-        }
-
-        // Move the fill chars to buffer
-        for (wchar_t* padchar = wcpad; padchar < wcpad + padlen; ++padchar)
-        {
-            *firstpadchar++ = *padchar;
-        }
-
-        padspace -= padlen;
-        pch += padlen;
-    }
-
-    wstring padded = wstring(wcbuf, len);
-
-    // Bug 5110 : strings were getting truncated since enough bytes not allocated.
-    std::string ret(utf8::wstring_to_utf8(padded.c_str()));
-    delete [] wcpad;
-    delete [] wcbuf;
+    
+    std::string ret(buf, byteCount);
+    delete [] buf;
    return ret;
 }

--- a/utils/funcexp/func_rtrim.cpp
+++ b/utils/funcexp/func_rtrim.cpp
@ -49,95 +49,118 @@ CalpontSystemCatalog::ColType Func_rtrim::operationType(FunctionParm& fp, Calpon


 std::string Func_rtrim::getStrVal(rowgroup::Row& row,
-                                  FunctionParm& fp,
-                                  bool& isNull,
-                                  execplan::CalpontSystemCatalog::ColType&)
+                                 FunctionParm& fp,
+                                 bool& isNull,
+                                 execplan::CalpontSystemCatalog::ColType& type)
 {
-    // The number of characters (not bytes) in our input tstr.
-    // Not all of these are necessarily significant. We need to search for the
-    // NULL terminator to be sure.
-    size_t strwclen;
-    // this holds the number of characters (not bytes) in ourtrim tstr.
-    size_t trimwclen;
-
+    CHARSET_INFO* cs = type.getCharset();
    // The original string
-    const string& tstr = fp[0]->data()->getStrVal(row, isNull);
+    const string& src = fp[0]->data()->getStrVal(row, isNull);
+    if (isNull)
+        return "";
+    if (src.empty() || src.length() == 0)
+        return src;
+    // binLen represents the number of bytes in src
+    size_t binLen = src.length();
+    const char* pos = src.c_str();
+    const char* end = pos + binLen;
+    // strLen = the number of characters in src
+    size_t strLen = cs->numchars(pos, end);

    // The trim characters.
    const string& trim = (fp.size() > 1 ? fp[1]->data()->getStrVal(row, isNull) : " ");
+    // binTLen represents the number of bytes in trim
+    size_t binTLen = trim.length();
+    const char* posT = trim.c_str();
+    // strTLen = the number of characters in trim
+    size_t strTLen = cs->numchars(posT, posT+binTLen);
+    if (strTLen == 0 || strTLen > strLen)
+        return src;

-    if (isNull)
-        return "";
-
-    if (tstr.empty() || tstr.length() == 0)
-        return tstr;
-
-    // Rather than calling the wideconvert functions with a null buffer to
-    // determine the size of buffer to allocate, we can be sure the wide
-    // char string won't be longer than:
-    strwclen = tstr.length(); // a guess to start with. This will be >= to the real count.
-    int bufsize = strwclen + 1;
-
-    // Convert the string to wide characters. Do all further work in wide characters
-    wchar_t* wcbuf = new wchar_t[bufsize];
-    strwclen = utf8::idb_mbstowcs(wcbuf, tstr.c_str(), strwclen + 1);
-
-    // utf8::idb_mbstowcs could return -1 if there is bad chars
-    if (strwclen == static_cast<size_t>(-1))
-        strwclen = 0;
-
-    // Convert the trim string to wide
-    trimwclen = trim.length();  // A guess to start.
-    int trimbufsize = trimwclen + 1;
-    wchar_t* wctrim = new wchar_t[trimbufsize];
-    size_t trimlen = utf8::idb_mbstowcs(wctrim, trim.c_str(), trimwclen + 1);
-
-    // idb_mbstowcs could return -1 if there is bad chars
-    if (trimlen == static_cast<size_t>(-1))
-        trimlen = 0;
-
-    size_t trimCmpLen = trimlen * sizeof(wchar_t);
-
-    const wchar_t* oPtr = wcbuf;      // To remember the start of the string
-    const wchar_t* aPtr = oPtr;
-    const wchar_t* aEnd = wcbuf + strwclen - 1;
-    size_t trimCnt = 0;
-
-    if (trimlen > 0)
+    if (binTLen == 1)
    {
-        if (trimlen == 1)
+        const char* ptr = pos;
+        if (cs->use_mb())   // This is a multi-byte charset
        {
-            // If trim is a single char, then don't spend the overhead for memcmp.
-            wchar_t chr = wctrim[0];
-
-            while (aEnd >= aPtr && *aEnd == chr)
+            const char* p = pos;
+            uint32 l;
+            // Multibyte characters in the string give us alignment problems
+            // What we do here is skip past any multibyte characters. Whn
+            // don with this loop, ptr is pointing to a singlebyte char that
+            // is after all multibyte chars in the string, or to end.
+            while (ptr < end)
            {
-                --aEnd;
-                ++trimCnt;
+                if ((l = my_ismbchar(cs, ptr, end))) // returns the number of bytes in the leading char or zero if one byte
+                {
+                    ptr += l;
+                    p = ptr;
+                }
+                else
+                {
+                    ++ptr;
+                }
+            }
+            ptr = p;
+        }
+        while (ptr < end && end[-1] == *posT)
+        {
+            --end;
+            --binLen;
+        }
+    }
+    else
+    {
+        // An uncommon case where the space character is > 1 byte
+        if (cs->use_mb())   // This is a multi-byte charset
+        {
+            // The problem is that the byte pattern at the end could
+            // match memcmp, but not be correct since the first byte compared
+            // may actually be a second or later byte from a previous char.
+            
+            // We start at the beginning of the string and move forward
+            // one character at a time until we reach the end. Then we can
+            // safely compare and remove on character. Then back to the beginning 
+            // and try again.
+            while (end - binTLen >= pos)
+            {
+                const char* p = pos;
+                uint32 l;
+                while (p + binTLen < end)
+                {
+                    if ((l = my_ismbchar(cs, p, end))) // returns the number of bytes in the leading char or zero if one byte
+                        p += l;
+                    else
+                        ++p;
+                }
+                if (p + binTLen == end && memcmp(p,posT,binTLen) == 0)
+                {
+                    end -= binTLen;
+                    binLen -= binTLen;
+                }
+                else
+                {
+                    break;  // We've run out of places to look
+                }
            }
        }
        else
        {
-            aEnd -= (trimlen - 1); // So we don't compare past the end of the string.
-
-            while (aPtr <= aEnd && !memcmp(aEnd, wctrim, trimCmpLen))
+            // This implies we have a single byte charset and a multibyte
+            // space character.
+            // Should never get here, since rtrim only trims space characters
+            // Included for completeness.
+            while (end-binTLen >= pos && memcmp(end-binTLen,posT,binTLen) == 0)
            {
-                aEnd -= trimCmpLen;
-                trimCnt += trimlen;
+                end -= binTLen;
+                binLen -= binTLen;
            }
        }
    }
-
-    size_t aLen = strwclen - trimCnt;
-    wstring trimmed = wstring(aPtr, aLen);
    // Turn back to a string
-    std::string ret(utf8::wstring_to_utf8(trimmed.c_str()));
-    delete [] wctrim;
-    delete [] wcbuf;
+    std::string ret(pos, binLen);
    return ret;
 }

-
 } // namespace funcexp
 // vim:ts=4 sw=4:

--- a/utils/funcexp/func_strcmp.cpp
+++ b/utils/funcexp/func_strcmp.cpp
@ -73,7 +73,7 @@ CalpontSystemCatalog::ColType Func_strcmp::operationType(FunctionParm& fp, Calpo
 int64_t Func_strcmp::getIntVal(rowgroup::Row& row,
                               FunctionParm& fp,
                               bool& isNull,
-                               execplan::CalpontSystemCatalog::ColType& op_ct)
+                               execplan::CalpontSystemCatalog::ColType& type)
 {
    CHARSET_INFO* cs = fp[0]->data()->resultType().getCharset();
    const string& str = fp[0]->data()->getStrVal(row, isNull);
@ -88,9 +88,9 @@ int64_t Func_strcmp::getIntVal(rowgroup::Row& row,
 std::string Func_strcmp::getStrVal(rowgroup::Row& row,
                                   FunctionParm& fp,
                                   bool& isNull,
-                                   execplan::CalpontSystemCatalog::ColType& op_ct)
+                                   execplan::CalpontSystemCatalog::ColType& type)
 {
-    uint64_t val = getIntVal(row, fp, isNull, op_ct);
+    uint64_t val = getIntVal(row, fp, isNull, type);

    if (val > 0)
        return string("1");
--- a/utils/funcexp/func_trim.cpp
+++ b/utils/funcexp/func_trim.cpp
@ -86,31 +86,37 @@ std::string Func_trim::getStrVal(rowgroup::Row& row,
            --binLen;
        }
        // Trim trailing
-        while (end > pos && *end == *posT)
+        const char* ptr = pos;
+        if (cs->use_mb())   // This is a multi-byte charset
+        {
+            const char* p = pos;
+            uint32 l;
+            // Multibyte characters in the string give us alignment problems
+            // What we do here is skip past any multibyte characters. Whn
+            // don with this loop, ptr is pointing to a singlebyte char that
+            // is after all multibyte chars in the string, or to end.
+            while (ptr < end)
+            {
+                if ((l = my_ismbchar(cs, ptr, end))) // returns the number of bytes in the leading char or zero if one byte
+                {
+                    ptr += l;
+                    p = ptr;
+                }
+                else
+                {
+                    ++ptr;
+                }
+            }
+            ptr = p;
+        }
+        while (ptr < end && end[-1] == *posT)
        {
            --end;
            --binLen;
        }
    }
-    else if (!cs->use_mb())
-    {
-        // This is a one byte per char charset with multiple char trim.
-        // Trim leading
-        while (pos+binTLen <= end && memcmp(pos,posT,binTLen) == 0)
-        {
-            pos += binTLen;
-            binLen -= binTLen;
-        }
-        // Trim trailing
-        while (end-binTLen >= pos && memcmp(end-binTLen,posT,binTLen) == 0)
-        {
-            end -= binTLen;
-            binLen -= binTLen;
-        }
-    }    
    else
    {
-        // We're using a multi-byte charset
        // Trim leading is easy
        while (pos+binTLen <= end && memcmp(pos,posT,binTLen) == 0)
        {
@ -119,33 +125,45 @@ std::string Func_trim::getStrVal(rowgroup::Row& row,
        }
        
        // Trim trailing
-        // The problem is that the byte pattern at the end could
-        // match memcmp, but not be correct since the first byte compared
-        // may actually be a second or later byte from a previous char.
-        
-        // We start at the beginning of the string and move forward
-        // one character at a time until we reach the end. Then we can
-        // safely compare.
-        while (end - binTLen >= pos)
+        if (cs->use_mb())   // This is a multi-byte charset
        {
-            const char* p = pos;
-            uint32 l;
-            while (p + binTLen < end)
+            // The problem is that the byte pattern at the end could
+            // match memcmp, but not be correct since the first byte compared
+            // may actually be a second or later byte from a previous char.
+            
+            // We start at the beginning of the string and move forward
+            // one character at a time until we reach the end. Then we can
+            // safely compare and remove on character. Then back to the beginning 
+            // and try again.
+            while (end - binTLen >= pos)
            {
-                if ((l = my_ismbchar(cs, p, end))) // returns the number of bytes in the leading char or zero if one byte
-                    p += l;
+                const char* p = pos;
+                uint32 l;
+                while (p + binTLen < end)
+                {
+                    if ((l = my_ismbchar(cs, p, end))) // returns the number of bytes in the leading char or zero if one byte
+                        p += l;
+                    else
+                        ++p;
+                }
+                if (p + binTLen == end && memcmp(p,posT,binTLen) == 0)
+                {
+                    end -= binTLen;
+                    binLen -= binTLen;
+                }
                else
-                    ++p;
+                {
+                    break;  // We've run out of places to look
+                }
            }
-            if (p + binTLen == end && memcmp(p,posT,binTLen) == 0)
+        }
+        else
+        {
+            while (end-binTLen >= pos && memcmp(end-binTLen,posT,binTLen) == 0)
            {
                end -= binTLen;
                binLen -= binTLen;
            }
-            else
-            {
-                break;  // We've run out of places to look
-            }
        }
    }
    // Turn back to a string