diff --git a/dbcon/mysql/ha_mcs_execplan.cpp b/dbcon/mysql/ha_mcs_execplan.cpp index ccb0049ff..ae6de6fb9 100755 --- a/dbcon/mysql/ha_mcs_execplan.cpp +++ b/dbcon/mysql/ha_mcs_execplan.cpp @@ -6672,7 +6672,8 @@ int processLimitAndOffset( } // We don't currently support limit with correlated subquery - if (gwi.subQuery && !gwi.correlatedTbNameVec.empty() && csep->hasOrderBy()) + if (csep->limitNum() != (uint64_t) - 1 && + gwi.subQuery && !gwi.correlatedTbNameVec.empty()) { gwi.fatalParseError = true; gwi.parseErrorText = IDBErrorInfo::instance()->errorMsg(ERR_NON_SUPPORT_LIMIT_SUB); diff --git a/utils/funcexp/func_find_in_set.cpp b/utils/funcexp/func_find_in_set.cpp index 9cc8a2485..81513cb31 100644 --- a/utils/funcexp/func_find_in_set.cpp +++ b/utils/funcexp/func_find_in_set.cpp @@ -20,6 +20,9 @@ * * ****************************************************************************/ +#include +#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost +#include #include #include @@ -56,37 +59,58 @@ int64_t Func_find_in_set::getIntVal(rowgroup::Row& row, CalpontSystemCatalog::ColType& op_ct) { const string& searchStr = parm[0]->data()->getStrVal(row, isNull); - if (isNull) return 0; const string& setString = parm[1]->data()->getStrVal(row, isNull); - if (isNull) return 0; if (searchStr.find(",") != string::npos) return 0; - string newSearchStr(searchStr.substr(0, strlen(searchStr.c_str()))); - string newSetString(setString.substr(0, strlen(setString.c_str()))); - //tokenize the setStr with comma as seprator. - typedef boost::tokenizer > tokenizer; - boost::char_separator sep( ","); - tokenizer tokens(newSetString, sep); + if (setString.length() > searchStr.length()) + return 0; + + CHARSET_INFO *cs= op_ct.getCharset(); - unsigned i = 0; - size_t pos = 0; - - for (tokenizer::iterator tok_iter = tokens.begin(); tok_iter != tokens.end(); ++tok_iter) + my_wc_t wc= 0; + const char *str_begin= setString.c_str(); + const char *str_end= setString.c_str(); + const char *real_end= str_end + setString.length(); + const char *find_str= searchStr.c_str(); + uint find_str_len= searchStr.length(); + int position= 0; + static const char separator=','; + while (1) { - pos = (*tok_iter).find(newSearchStr); - i++; - - if (( pos != string::npos) && (newSearchStr.length() == (*tok_iter).length())) - return i; + int symbol_len; + if ((symbol_len= cs->mb_wc(&wc, (uchar*) str_end, + (uchar*) real_end)) > 0) + { + const char *substr_end= str_end + symbol_len; + bool is_last_item= (substr_end == real_end); + bool is_separator= (wc == (my_wc_t) separator); + if (is_separator || is_last_item) + { + position++; + if (is_last_item && !is_separator) + str_end= substr_end; + if (!cs->strnncoll(str_begin, (uint) (str_end - str_begin), + find_str, find_str_len)) + return (longlong) position; + else + str_begin= substr_end; + } + str_end= substr_end; + } + else if (str_end - str_begin == 0 && + find_str_len == 0 && + wc == (my_wc_t) separator) + return (longlong) ++position; + else + return 0; } - return 0; } diff --git a/utils/funcexp/func_if.cpp b/utils/funcexp/func_if.cpp index ef2827bbe..6d10d6829 100644 --- a/utils/funcexp/func_if.cpp +++ b/utils/funcexp/func_if.cpp @@ -53,22 +53,22 @@ bool boolVal(SPTP& parm, Row& row, const string& timeZone) case CalpontSystemCatalog::TEXT: case CalpontSystemCatalog::VARCHAR: ret = (atoi((char*)(parm->data()->getStrVal(timeZone).c_str())) != 0); - + break; case CalpontSystemCatalog::FLOAT: case CalpontSystemCatalog::UFLOAT: ret = (parm->data()->getFloatVal(row, isNull) != 0); - + break; case CalpontSystemCatalog::DOUBLE: case CalpontSystemCatalog::UDOUBLE: ret = (parm->data()->getDoubleVal(row, isNull) != 0); - + break; case CalpontSystemCatalog::LONGDOUBLE: ret = (parm->data()->getLongDoubleVal(row, isNull) != 0); - + break; case CalpontSystemCatalog::DECIMAL: case CalpontSystemCatalog::UDECIMAL: ret = (parm->data()->getDecimalVal(row, isNull).value != 0); - + break; case CalpontSystemCatalog::BIGINT: case CalpontSystemCatalog::SMALLINT: case CalpontSystemCatalog::MEDINT: @@ -83,6 +83,7 @@ bool boolVal(SPTP& parm, Row& row, const string& timeZone) case CalpontSystemCatalog::TIME: default: ret = (parm->data()->getIntVal(row, isNull) != 0); + break; } } diff --git a/utils/funcexp/func_insert.cpp b/utils/funcexp/func_insert.cpp index 20109c27b..bcab9bc40 100644 --- a/utils/funcexp/func_insert.cpp +++ b/utils/funcexp/func_insert.cpp @@ -20,6 +20,9 @@ * * ****************************************************************************/ +#include +#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost +#include #include using namespace std; @@ -48,69 +51,61 @@ CalpontSystemCatalog::ColType Func_insert::operationType(FunctionParm& fp, Calpo return fp[0]->data()->resultType(); } -string insertStr(const string& src, int pos, int len, const string& targ) -{ - int64_t strLen = static_cast(src.length()); - - if ((pos <= 0) || ((pos - 1) >= strLen)) - return src; - - if ((len < 0) || (len > strLen)) - len = strLen; - - const char* srcptr = src.c_str(); - advance(srcptr, pos - 1, srcptr + strLen); - // srcptr now pointing to where we need to insert targ string - - uint32_t srcPos = srcptr - src.c_str(); - - uint32_t finPos = strLen; - const char* finptr = src.c_str(); - - if ((strLen - (pos - 1 + len)) >= 0) - { - advance(finptr, (pos - 1 + len), finptr + strLen); - // finptr now pointing to the end of the string to replace - finPos = finptr - src.c_str(); - } - - string out; - out.reserve(srcPos + targ.length() + strLen - finPos + 1); - out.append( src.c_str(), srcPos ); - out.append( targ.c_str(), targ.length() ); - out.append( src.c_str() + finPos, strLen - finPos ); - - return out; -} - std::string Func_insert::getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull, execplan::CalpontSystemCatalog::ColType&) { - string tstr; + string src; string tnewstr; - stringValue(fp[0], row, isNull, tstr); + int64_t start, length; + + stringValue(fp[0], row, isNull, src); if (isNull) - { return ""; - } stringValue(fp[3], row, isNull, tnewstr); if (isNull) return ""; - int64_t pos = fp[1]->data()->getIntVal(row, isNull); - + start = fp[1]->data()->getIntVal(row, isNull); if (isNull) return ""; - int64_t len = fp[2]->data()->getIntVal(row, isNull); - + length = fp[2]->data()->getIntVal(row, isNull); if (isNull) return ""; - return insertStr( tstr, pos, len, tnewstr ); + start--; // Because SQL syntax is 1 based and we want 0 based. + + CHARSET_INFO* cs = fp[0]->data()->resultType().getCharset(); + + // binLen represents the number of bytes + int64_t binLen = static_cast(src.length()); + const char* pos = src.c_str(); + const char* end = pos + binLen; + // strLen is number of characters + int64_t strLen = cs->numchars(pos, end); + + // Return the original string if start isn't within the string. + if ((start <= 1) || start >= strLen) + return src; + + if ((length < 0) || (length > strLen)) + length = strLen; + + // Convert start and length from characters to bytes. + start = cs->charpos(pos, end, start); + length = cs->charpos(pos+start, end, length); + + string out; + out.reserve(binLen - length + tnewstr.length() + 1); + + out.append(src.c_str(), start); + out.append(tnewstr.c_str(), tnewstr.length()); + out.append(src.c_str() + start + length, binLen - start - length); + + return out; } diff --git a/utils/funcexp/func_left.cpp b/utils/funcexp/func_left.cpp index 3fc0ea403..31588b9e9 100644 --- a/utils/funcexp/func_left.cpp +++ b/utils/funcexp/func_left.cpp @@ -20,6 +20,9 @@ * * ****************************************************************************/ +#include +#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost +#include #include using namespace std; diff --git a/utils/funcexp/func_length.cpp b/utils/funcexp/func_length.cpp index dbcf8eaac..c51c0be32 100644 --- a/utils/funcexp/func_length.cpp +++ b/utils/funcexp/func_length.cpp @@ -20,6 +20,9 @@ * * ****************************************************************************/ +#include +#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost +#include #include #include diff --git a/utils/funcexp/func_lpad.cpp b/utils/funcexp/func_lpad.cpp index 8a40f21d2..458ba747a 100644 --- a/utils/funcexp/func_lpad.cpp +++ b/utils/funcexp/func_lpad.cpp @@ -20,6 +20,10 @@ * * ****************************************************************************/ +#include +#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost +#include + #include "errorids.h" #include using namespace std; diff --git a/utils/funcexp/func_ltrim.cpp b/utils/funcexp/func_ltrim.cpp index 77db579b8..7e340914d 100644 --- a/utils/funcexp/func_ltrim.cpp +++ b/utils/funcexp/func_ltrim.cpp @@ -20,6 +20,9 @@ * * ****************************************************************************/ +#include +#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost +#include #include using namespace std; diff --git a/utils/funcexp/func_replace.cpp b/utils/funcexp/func_replace.cpp index 549e5f2a3..3bc6a6aaf 100644 --- a/utils/funcexp/func_replace.cpp +++ b/utils/funcexp/func_replace.cpp @@ -21,6 +21,10 @@ * ****************************************************************************/ +#include +#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost +#include + #include using namespace std; diff --git a/utils/funcexp/func_right.cpp b/utils/funcexp/func_right.cpp index f7a21faed..81d7d190a 100644 --- a/utils/funcexp/func_right.cpp +++ b/utils/funcexp/func_right.cpp @@ -20,6 +20,9 @@ * * ****************************************************************************/ +#include +#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost +#include #include using namespace std; diff --git a/utils/funcexp/func_rpad.cpp b/utils/funcexp/func_rpad.cpp index 37e1d8791..b92030faa 100644 --- a/utils/funcexp/func_rpad.cpp +++ b/utils/funcexp/func_rpad.cpp @@ -20,6 +20,10 @@ * * ****************************************************************************/ +#include +#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost +#include + #include "errorids.h" #include using namespace std; diff --git a/utils/funcexp/func_rtrim.cpp b/utils/funcexp/func_rtrim.cpp index 513567d6d..4bfb9ac40 100644 --- a/utils/funcexp/func_rtrim.cpp +++ b/utils/funcexp/func_rtrim.cpp @@ -20,6 +20,9 @@ * * ****************************************************************************/ +#include +#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost +#include #include using namespace std; diff --git a/utils/funcexp/func_strcmp.cpp b/utils/funcexp/func_strcmp.cpp index 7c7c950c5..cec87f0ae 100644 --- a/utils/funcexp/func_strcmp.cpp +++ b/utils/funcexp/func_strcmp.cpp @@ -20,6 +20,9 @@ * * ****************************************************************************/ +#include +#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost +#include #include #undef set_bits // mariadb.h defines set_bits, which is incompatible with boost diff --git a/utils/funcexp/func_substr.cpp b/utils/funcexp/func_substr.cpp index e99287af8..4d0a4e90d 100644 --- a/utils/funcexp/func_substr.cpp +++ b/utils/funcexp/func_substr.cpp @@ -20,6 +20,9 @@ * * ****************************************************************************/ +#include +#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost +#include #include using namespace std; diff --git a/utils/funcexp/func_substring_index.cpp b/utils/funcexp/func_substring_index.cpp index a3b5c342e..a9f346b65 100644 --- a/utils/funcexp/func_substring_index.cpp +++ b/utils/funcexp/func_substring_index.cpp @@ -21,6 +21,9 @@ * * ****************************************************************************/ +#include +#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost +#include #include using namespace std; diff --git a/utils/funcexp/func_trim.cpp b/utils/funcexp/func_trim.cpp index 83cbe6707..dedb891aa 100644 --- a/utils/funcexp/func_trim.cpp +++ b/utils/funcexp/func_trim.cpp @@ -20,6 +20,9 @@ * * ****************************************************************************/ +#include +#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost +#include #include using namespace std; @@ -47,106 +50,106 @@ CalpontSystemCatalog::ColType Func_trim::operationType(FunctionParm& fp, Calpont std::string Func_trim::getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull, - execplan::CalpontSystemCatalog::ColType&) + execplan::CalpontSystemCatalog::ColType& type) { - // The number of characters (not bytes) in our input tstr. - // Not all of these are necessarily significant. We need to search for the - // NULL terminator to be sure. - size_t strwclen; - // this holds the number of characters (not bytes) in ourtrim tstr. - size_t trimwclen; - + CHARSET_INFO* cs = type.getCharset(); // The original string - const string& tstr = fp[0]->data()->getStrVal(row, isNull); + const string& src = fp[0]->data()->getStrVal(row, isNull); + if (isNull) + return ""; + if (src.empty() || src.length() == 0) + return src; + // binLen represents the number of bytes in src + size_t binLen = src.length(); + const char* pos = src.c_str(); + const char* end = pos + binLen; + // strLen = the number of characters in src + size_t strLen = cs->numchars(pos, end); // The trim characters. const string& trim = (fp.size() > 1 ? fp[1]->data()->getStrVal(row, isNull) : " "); + // binTLen represents the number of bytes in trim + size_t binTLen = trim.length(); + const char* posT = trim.c_str(); + // strTLen = the number of characters in trim + size_t strTLen = cs->numchars(posT, posT+binTLen); + if (strTLen == 0 || strTLen > strLen) + return src; - if (isNull) - return ""; - - if (tstr.empty() || tstr.length() == 0) - return tstr; - - // Rather than calling the wideconvert functions with a null buffer to - // determine the size of buffer to allocate, we can be sure the wide - // char string won't be longer than: - strwclen = tstr.length(); // a guess to start with. This will be >= to the real count. - int bufsize = strwclen + 1; - - // Convert the string to wide characters. Do all further work in wide characters - wchar_t* wcbuf = new wchar_t[bufsize]; - strwclen = utf8::idb_mbstowcs(wcbuf, tstr.c_str(), strwclen + 1); - - // Bad char in mbc can return -1 - if (strwclen == static_cast(-1)) - strwclen = 0; - - // Convert the trim string to wide - trimwclen = trim.length(); // A guess to start. - int trimbufsize = trimwclen + 1; - wchar_t* wctrim = new wchar_t[trimbufsize]; - size_t trimlen = utf8::idb_mbstowcs(wctrim, trim.c_str(), trimwclen + 1); - - // Bad char in mbc can return -1 - if (trimlen == static_cast(-1)) - trimlen = 0; - - size_t trimCmpLen = trimlen * sizeof(wchar_t); - - const wchar_t* oPtr = wcbuf; // To remember the start of the string - const wchar_t* aPtr = oPtr; - const wchar_t* aEnd = wcbuf + strwclen - 1; - size_t trimCnt = 0; - - if (trimlen > 0) + if (binTLen == 1) { - if (trimlen == 1) + // If the trim string is 1 byte, don't waste cpu for memcmp + // Trim leading + while (pos < end && *pos == *posT) { - // If trim is a single char, then don't spend the overhead for memcmp. - wchar_t chr = wctrim[0]; - - // remove leading - while (aPtr != aEnd && *aPtr == chr) - { - aPtr++; - ++trimCnt; - } - - // remove trailing - while (aEnd != aPtr && *aEnd == chr) - { - aEnd--; - ++trimCnt; - } + ++pos; + --binLen; } - else + // Trim trailing + while (end > pos && *end == *posT) { - aEnd -= (trimlen - 1); // So we don't compare past the end of the string. - - // remove leading - while (aPtr <= aEnd && !memcmp(aPtr, wctrim, trimCmpLen)) + --end; + --binLen; + } + } + else if (!cs->use_mb()) + { + // This is a one byte per char charset with multiple char trim. + // Trim leading + while (pos+binTLen <= end && memcmp(pos,posT,binTLen) == 0) + { + pos += binTLen; + binLen -= binTLen; + } + // Trim trailing + while (end-binTLen >= pos && memcmp(end-binTLen,posT,binTLen) == 0) + { + end -= binTLen; + binLen -= binTLen; + } + } + else + { + // We're using a multi-byte charset + // Trim leading is easy + while (pos+binTLen <= end && memcmp(pos,posT,binTLen) == 0) + { + pos += binTLen; + binLen -= binTLen; + } + + // Trim trailing + // The problem is that the byte pattern at the end could + // match memcmp, but not be correct since the first byte compared + // may actually be a second or later byte from a previous char. + + // We start at the beginning of the string and move forward + // one character at a time until we reach the end. Then we can + // safely compare. + while (end - binTLen >= pos) + { + const char* p = pos; + uint32 l; + while (p + binTLen < end) { - aPtr += trimlen; - trimCnt += trimlen; + if ((l = my_ismbchar(cs, p, end))) // returns the number of bytes in the leading char or zero if one byte + p += l; + else + ++p; } - - // remove trailing - while (aPtr <= aEnd && !memcmp(aEnd, wctrim, trimCmpLen)) + if (p + binTLen == end && memcmp(p,posT,binTLen) == 0) { - aEnd -= trimlen; //BUG 5241 - trimCnt += trimlen; + end -= binTLen; + binLen -= binTLen; + } + else + { + break; // We've run out of places to look } } } - - // Bug 5110 - error in allocating enough memory for utf8 chars - size_t aLen = strwclen - trimCnt; - wstring trimmed = wstring(aPtr, aLen); // Turn back to a string - std::string ret(utf8::wstring_to_utf8(trimmed.c_str())); - delete [] wctrim; - delete [] wcbuf; + std::string ret(pos, binLen); return ret; }