diff --git a/dbcon/joblist/dictstep-jl.cpp b/dbcon/joblist/dictstep-jl.cpp index b6d7fdb87..cc34f45ac 100644 --- a/dbcon/joblist/dictstep-jl.cpp +++ b/dbcon/joblist/dictstep-jl.cpp @@ -59,6 +59,7 @@ DictStepJL::DictStepJL(const pDictionaryStep& dict) filterString = dict.fFilterString; filterCount = dict.fFilterCount; + charsetNumber = dict.fColType.charsetNumber; } DictStepJL::~DictStepJL() @@ -88,7 +89,7 @@ void DictStepJL::createCommand(ByteStream& bs) const } else bs << filterString; - + bs << charsetNumber; CommandJL::createCommand(bs); } diff --git a/dbcon/joblist/dictstep-jl.h b/dbcon/joblist/dictstep-jl.h index ff5fd8eaa..a9782acf4 100644 --- a/dbcon/joblist/dictstep-jl.h +++ b/dbcon/joblist/dictstep-jl.h @@ -76,6 +76,7 @@ private: std::vector eqFilter; bool hasEqFilter; uint8_t eqOp; // COMPARE_EQ or COMPARE_NE + uint32_t charsetNumber; }; }; // namespace diff --git a/primitives/linux-port/dictionary.cpp b/primitives/linux-port/dictionary.cpp index 87eb6638f..dd46c9739 100644 --- a/primitives/linux-port/dictionary.cpp +++ b/primitives/linux-port/dictionary.cpp @@ -103,7 +103,7 @@ Notes: */ void PrimitiveProcessor::p_TokenByScan(const TokenByScanRequestHeader* h, - TokenByScanResultHeader* ret, unsigned outSize, bool utf8, + TokenByScanResultHeader* ret, unsigned outSize, boost::shared_ptr eqFilter) { const DataValue* args; @@ -113,7 +113,6 @@ void PrimitiveProcessor::p_TokenByScan(const TokenByScanRequestHeader* h, int offsetIndex, argIndex, argsOffset; bool cmpResult = false; int tmp, i, err; - const char* sig; uint16_t siglen; @@ -191,6 +190,8 @@ void PrimitiveProcessor::p_TokenByScan(const TokenByScanRequestHeader* h, if (eqFilter) { // MCOL-1246 Trim whitespace before match + // TODO MCOL-3536 use CHARSET_INFO* cs for collation + // cs->hash_sort(hash_sort(const uchar *key, size_t len, ulong *nr1, ulong *nr2)) string strData(sig, siglen); boost::trim_right_if(strData, boost::is_any_of(" ")); bool gotIt = eqFilter->find(strData) != eqFilter->end(); @@ -215,14 +216,7 @@ void PrimitiveProcessor::p_TokenByScan(const TokenByScanRequestHeader* h, } else { - if (utf8) - { - tmp = cs->strnncoll(sig, siglen, args->data, args->len); - } - else - { - tmp = strncmp(sig, args->data, std::min(siglen, args->len)); - } + tmp = cs->strnncoll(sig, siglen, args->data, args->len); cmpResult = compare(tmp, h->COP1, siglen, args->len); } @@ -263,14 +257,7 @@ void PrimitiveProcessor::p_TokenByScan(const TokenByScanRequestHeader* h, else { - if (utf8) - { - tmp = cs->strnncoll(sig, siglen, args->data, args->len); - } - else - { - tmp = strncmp(sig, args->data, std::min(siglen, args->len)); - } + tmp = cs->strnncoll(sig, siglen, args->data, args->len); cmpResult = compare(tmp, h->COP2, siglen, args->len); } @@ -298,14 +285,7 @@ void PrimitiveProcessor::p_TokenByScan(const TokenByScanRequestHeader* h, else { - if (utf8) - { - tmp = cs->strnncoll(sig, siglen, args->data, args->len); - } - else - { - tmp = strncmp(sig, args->data, std::min(siglen, args->len)); - } + tmp = cs->strnncoll(sig, siglen, args->data, args->len); cmpResult = compare(tmp, h->COP2, siglen, args->len); } @@ -667,8 +647,12 @@ PrimitiveProcessor::makeLikeFilter (const DictFilterElement* filterString, uint3 return ret; } -void PrimitiveProcessor::p_Dictionary(const DictInput* in, vector* out, bool utf8, - bool skipNulls, boost::shared_ptr eqFilter, uint8_t eqOp) +void PrimitiveProcessor::p_Dictionary(const DictInput* in, + vector* out, + bool skipNulls, + uint32_t charsetNumber, + boost::shared_ptr eqFilter, + uint8_t eqOp) { PrimToken* outToken; const DictFilterElement* filter = 0; @@ -679,6 +663,7 @@ void PrimitiveProcessor::p_Dictionary(const DictInput* in, vector* out, uint16_t aggCount; bool cmpResult; DictOutput header; + const CHARSET_INFO* cs = get_charset(charsetNumber, MYF(MY_WME)); // default size of the ouput to something sufficiently large to prevent // excessive reallocation and copy when resizing @@ -714,30 +699,13 @@ void PrimitiveProcessor::p_Dictionary(const DictInput* in, vector* out, nextSig(in->NVALS, in->tokens, &sigptr, in->OutputType, (in->InputFlags ? true : false), skipNulls)) { - - string sig_utf8; - - if (utf8) - { - string tmpString((char*)sigptr.data, sigptr.len); - sig_utf8 = tmpString; - } - // do aggregate processing if (in->OutputType & OT_AGGREGATE) { // len == 0 indicates this is the first pass if (max.len != 0) { - if (utf8 ) - { - string max_utf8((char*)max.data, max.len); - tmp = utf8::idb_strcoll(sig_utf8.c_str(), max_utf8.c_str()); - } - else - { - tmp = strncmp((char*)sigptr.data, (char*)max.data, std::min(sigptr.len, max.len)); - } + tmp = cs->strnncoll(sigptr.data, sigptr.len, max.data, max.len); if (tmp > 0) max = sigptr; @@ -747,15 +715,7 @@ void PrimitiveProcessor::p_Dictionary(const DictInput* in, vector* out, if (min.len != 0) { - if (utf8) - { - string min_utf8((char*)min.data, min.len); - tmp = utf8::idb_strcoll(sig_utf8.c_str(), min_utf8.c_str()); - } - else - { - tmp = strncmp((char*)sigptr.data, (char*)min.data, std::min(sigptr.len, min.len)); - } + tmp = cs->strnncoll(sigptr.data, sigptr.len, min.data, min.len); if (tmp < 0) min = sigptr; @@ -788,15 +748,6 @@ void PrimitiveProcessor::p_Dictionary(const DictInput* in, vector* out, for (filterIndex = 0; filterIndex < in->NOPS; filterIndex++) { filter = reinterpret_cast(&in8[filterOffset]); - string filt_utf8; - size_t filt_utf8_len = 0; - - if (utf8) - { - string tmpString((const char*)filter->data, filter->len); - filt_utf8 = tmpString; - filt_utf8_len = filt_utf8.length(); - } if (filter->COP & COMPARE_LIKE) { @@ -807,18 +758,7 @@ void PrimitiveProcessor::p_Dictionary(const DictInput* in, vector* out, } else { - if (utf8) - { - size_t sig_utf8_len = sig_utf8.length(); - tmp = utf8::idb_strcoll(sig_utf8.c_str(), filt_utf8.c_str()); - cmpResult = compare(tmp, filter->COP, sig_utf8_len, filt_utf8_len); - } - else - { - tmp = strncmp((const char*) sigptr.data, (const char*)filter->data, - std::min(sigptr.len, static_cast(filter->len))); - } - + tmp = cs->strnncoll(sigptr.data, sigptr.len, filter->data, filter->len); cmpResult = compare(tmp, filter->COP, sigptr.len, filter->len); } diff --git a/primitives/linux-port/primitiveprocessor.h b/primitives/linux-port/primitiveprocessor.h index 677d0538e..0e8a93396 100644 --- a/primitives/linux-port/primitiveprocessor.h +++ b/primitives/linux-port/primitiveprocessor.h @@ -185,7 +185,7 @@ public: * @note Throws logic_error if the output buffer is too small for the result. */ void p_TokenByScan(const TokenByScanRequestHeader* t, - TokenByScanResultHeader* out, unsigned outSize, bool utf8, + TokenByScanResultHeader* out, unsigned outSize, boost::shared_ptr eqFilter); /** @brief The p_IdxWalk primitive processor @@ -264,8 +264,9 @@ public: */ // void p_ColAggregate(const NewColAggRequestHeader *in, NewColAggResultHeader *out); - void p_Dictionary(const DictInput* in, std::vector* out, bool utf8, - bool skipNulls, boost::shared_ptr eqFilter, + void p_Dictionary(const DictInput* in, std::vector* out, + bool skipNulls, uint32_t charsetNumber, + boost::shared_ptr eqFilter, uint8_t eqOp); inline void setLogicalBlockMode(bool b) diff --git a/primitives/primproc/dictstep.cpp b/primitives/primproc/dictstep.cpp index 107500d05..50d99c1fa 100644 --- a/primitives/primproc/dictstep.cpp +++ b/primitives/primproc/dictstep.cpp @@ -98,7 +98,8 @@ void DictStep::createCommand(ByteStream& bs) } else bs >> filterString; - + + bs >> charsetNumber; #if 0 cout << "see " << filterCount << " filters\n"; DictFilterElement* filters = (DictFilterElement*) filterString.buf(); @@ -173,8 +174,7 @@ void DictStep::issuePrimitive(bool isFilter) } bpp->pp.setLikeFilter(likeFilter); - // MCOL-3536 We shouldn't need to pass in utf8 -- maybe?? - bpp->pp.p_Dictionary(primMsg, &result, true, isFilter, eqFilter, eqOp); + bpp->pp.p_Dictionary(primMsg, &result, isFilter, charsetNumber, eqFilter, eqOp); } void DictStep::copyResultToTmpSpace(OrderedToken* ot) diff --git a/primitives/primproc/dictstep.h b/primitives/primproc/dictstep.h index 025658d5b..581a4cd14 100644 --- a/primitives/primproc/dictstep.h +++ b/primitives/primproc/dictstep.h @@ -141,8 +141,9 @@ private: messageqcpp::ByteStream filterString; uint32_t filterCount; uint32_t bufferSize; + uint32_t charsetNumber; uint16_t inputRidCount; - + bool hasEqFilter; boost::shared_ptr eqFilter; boost::shared_array likeFilter; diff --git a/primitives/primproc/primitiveserver.cpp b/primitives/primproc/primitiveserver.cpp index 26ae03ffa..b41736566 100644 --- a/primitives/primproc/primitiveserver.cpp +++ b/primitives/primproc/primitiveserver.cpp @@ -1154,7 +1154,6 @@ int DictScanJob::operator()() PrimitiveProcessor pproc(gDebugLevel); TokenByScanResultHeader* output; QueryContext verInfo; - bool bUtf8; try { @@ -1166,25 +1165,6 @@ int DictScanJob::operator()() *fByteStream >> verInfo; cmd = (TokenByScanRequestHeader*) fByteStream->buf(); - // If charset is one of those that can be representedby standard ascii, - // we can get a performance improvement by using strcmp rather than - // the full charset compare system. - switch (cmd->charsetNumber) - { - case 8: // latin1_swedish_ci - case 9: // latin2_general_ci - case 11: // ascii_general_ci - case 47: // latin1_bin - case 48: // latin1_general_ci - case 49: // latin1_general_cs - case 65: // ascii_bin - case 77: // latin2_bin - bUtf8 = false; - break; - default: - bUtf8 = true; - } - session = cmd->Hdr.SessionID; uniqueId = cmd->Hdr.UniqueID; runCount = cmd->Count; @@ -1229,8 +1209,7 @@ int DictScanJob::operator()() fLBIDTraceOn, session); pproc.setBlockPtr((int*) data); - // MCOL-3536 We shouldn't need to pass in utf8 -- maybe?? - pproc.p_TokenByScan(cmd, output, output_buf_size, bUtf8, eqFilter); + pproc.p_TokenByScan(cmd, output, output_buf_size, eqFilter); if (wasBlockInCache) output->CacheIO++; diff --git a/utils/funcexp/func_between.cpp b/utils/funcexp/func_between.cpp index 92abb080f..e58c78c2b 100644 --- a/utils/funcexp/func_between.cpp +++ b/utils/funcexp/func_between.cpp @@ -22,6 +22,10 @@ * ****************************************************************************/ +#include +#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost +#include + #include #include using namespace std; @@ -53,16 +57,16 @@ inline bool numericLE(result_t op1, result_t op2) return op1 <= op2; } -inline bool strGE(const string& op1, const string& op2) +inline bool strGE(uint32_t charsetNumber, const string& op1, const string& op2) { - //return strcoll(op1.c_str(), op2.c_str()) >= 0; - return utf8::idb_strcoll(op1.c_str(), op2.c_str()) >= 0; + const CHARSET_INFO* cs = get_charset(charsetNumber, MYF(MY_WME)); + return cs->strnncoll(op1.c_str(), op1.length(), op2.c_str(), op2.length()) >= 0; } -inline bool strLE(const string& op1, const string& op2) +inline bool strLE(uint32_t charsetNumber, const string& op1, const string& op2) { - //return strcoll(op1.c_str(), op2.c_str()) <= 0; - return utf8::idb_strcoll(op1.c_str(), op2.c_str()) <= 0; + const CHARSET_INFO* cs = get_charset(charsetNumber, MYF(MY_WME)); + return cs->strnncoll(op1.c_str(), op1.length(), op2.c_str(), op2.length()) <= 0; } inline bool getBool(rowgroup::Row& row, @@ -256,16 +260,16 @@ inline bool getBool(rowgroup::Row& row, if (notBetween) { - if (!strGE(val, pm[1]->data()->getStrVal(row, isNull)) && !isNull) + if (!strGE(ct.charsetNumber, val, pm[1]->data()->getStrVal(row, isNull)) && !isNull) return true; isNull = false; - return (!strLE(val, pm[2]->data()->getStrVal(row, isNull)) && !isNull); + return (!strLE(ct.charsetNumber, val, pm[2]->data()->getStrVal(row, isNull)) && !isNull); } - + return !isNull && - strGE(val, pm[1]->data()->getStrVal(row, isNull)) && - strLE(val, pm[2]->data()->getStrVal(row, isNull)); + strGE(ct.charsetNumber, val, pm[1]->data()->getStrVal(row, isNull)) && + strLE(ct.charsetNumber, val, pm[2]->data()->getStrVal(row, isNull)); } default: diff --git a/utils/funcexp/func_char_length.cpp b/utils/funcexp/func_char_length.cpp index 72bdfdfda..fbe8f6074 100644 --- a/utils/funcexp/func_char_length.cpp +++ b/utils/funcexp/func_char_length.cpp @@ -21,6 +21,10 @@ * ****************************************************************************/ +#include +#undef set_bits // mariadb.h defines set_bits, which is incompatible with boost +#include + #include #include #include @@ -78,15 +82,12 @@ int64_t Func_char_length::getIntVal(rowgroup::Row& row, case execplan::CalpontSystemCatalog::UDECIMAL: { const string& tstr = parm[0]->data()->getStrVal(row, isNull); - if (isNull) return 0; - - size_t strwclen = utf8::idb_mbstowcs(0, tstr.c_str(), 0) + 1; - wchar_t* wcbuf = new wchar_t[strwclen]; - strwclen = utf8::idb_mbstowcs(wcbuf, tstr.c_str(), strwclen); - delete [] wcbuf; - return (int64_t)strwclen; + const char* b = tstr.c_str(); + const char* e = tstr.c_str() + tstr.length(); + const CHARSET_INFO* cs = get_charset(parm[0]->data()->resultType().charsetNumber, MYF(MY_WME)); + return (int64_t)cs->numchars(b, e); } case execplan::CalpontSystemCatalog::DATE: diff --git a/utils/funcexp/func_concat_ws.cpp b/utils/funcexp/func_concat_ws.cpp index af3da5245..467fb6845 100644 --- a/utils/funcexp/func_concat_ws.cpp +++ b/utils/funcexp/func_concat_ws.cpp @@ -54,7 +54,6 @@ string Func_concat_ws::getStrVal(Row& row, if (isNull) return ""; -#ifdef STRCOLL_ENH__ wstring wstr; size_t strwclen = utf8::idb_mbstowcs(0, delim.c_str(), 0) + 1; wchar_t* wcbuf = new wchar_t[strwclen]; @@ -96,7 +95,7 @@ string Func_concat_ws::getStrVal(Row& row, delete [] wcbuf; return ret; -#else +#if 0 string str; string tmp; for ( uint32_t i = 1 ; i < parm.size() ; i++)