diff --git a/dbcon/joblist/jlf_execplantojoblist.cpp b/dbcon/joblist/jlf_execplantojoblist.cpp index 42bbf8904..876f2af32 100644 --- a/dbcon/joblist/jlf_execplantojoblist.cpp +++ b/dbcon/joblist/jlf_execplantojoblist.cpp @@ -1668,11 +1668,6 @@ const JobStepVector doSimpleFilter(SimpleFilter* sf, JobInfo& jobInfo) } //X - // Because, on a filter, we want to compare ignoring trailing spaces in many cases - if (sf->op()->op() != execplan::OP_LIKE) - { - boost::algorithm::trim_right_if(constval, boost::is_any_of(" ")); - } //@bug 339 nulls are not stored in dictionary if ((dictOid = isDictCol(ct)) > 0 && ConstantColumn::NULLDATA != cc->type()) { diff --git a/dbcon/joblist/pdictionaryscan.cpp b/dbcon/joblist/pdictionaryscan.cpp index d4810a0fc..c5e8745ae 100644 --- a/dbcon/joblist/pdictionaryscan.cpp +++ b/dbcon/joblist/pdictionaryscan.cpp @@ -904,6 +904,7 @@ void pDictionaryScan::serializeEqualityFilter() ism.Command = DICT_CREATE_EQUALITY_FILTER; msg.load((uint8_t*) &ism, sizeof(ISMPacketHeader)); msg << uniqueID; + msg << (uint32_t) colType().charsetNumber; msg << (uint32_t) equalityFilter.size(); for (i = 0; i < equalityFilter.size(); i++) diff --git a/primitives/linux-port/dictionary.cpp b/primitives/linux-port/dictionary.cpp index 6bde42b2e..4d141d4db 100644 --- a/primitives/linux-port/dictionary.cpp +++ b/primitives/linux-port/dictionary.cpp @@ -186,11 +186,11 @@ void PrimitiveProcessor::p_TokenByScan(const TokenByScanRequestHeader* h, if (eqFilter) { - // MCOL-1246 Trim whitespace before match - // TODO MCOL-3536 use CHARSET_INFO* cs for collation - // cs->hash_sort(hash_sort(const uchar *key, size_t len, ulong *nr1, ulong *nr2)) + if (cs != & eqFilter->getCharset()) + { + //throw runtime_error("Collations mismatch: TokenByScanRequestHeader and DicEqualityFilter"); + } string strData(sig, siglen); - boost::trim_right_if(strData, boost::is_any_of(" ")); bool gotIt = eqFilter->find(strData) != eqFilter->end(); if ((h->COP1 == COMPARE_EQ && gotIt) || (h->COP1 == COMPARE_NE && diff --git a/primitives/linux-port/primitiveprocessor.h b/primitives/linux-port/primitiveprocessor.h index 4c674f95e..9d7e65326 100644 --- a/primitives/linux-port/primitiveprocessor.h +++ b/primitives/linux-port/primitiveprocessor.h @@ -102,7 +102,28 @@ public: typedef std::tr1::unordered_set prestored_set_t; typedef std::tr1::unordered_set prestored_set_t_128; -typedef std::tr1::unordered_set DictEqualityFilter; + + +class DictEqualityFilter: public std::tr1::unordered_set +{ +public: + DictEqualityFilter(const datatypes::Charset &cs) + :std::tr1::unordered_set + (10, + datatypes::CollationAwareHasher(cs), + datatypes::CollationAwareComparator(cs)) + { } + CHARSET_INFO & getCharset() const + { + idbassert(& _M_h1.getCharset() == & _M_eq.getCharset()); + return _M_h1.getCharset(); + } +}; + struct idb_regex_t { diff --git a/primitives/primproc/dictstep.cpp b/primitives/primproc/dictstep.cpp index cc728e770..8d509b006 100644 --- a/primitives/primproc/dictstep.cpp +++ b/primitives/primproc/dictstep.cpp @@ -86,7 +86,7 @@ void DictStep::createCommand(ByteStream& bs) { string strTmp; - eqFilter.reset(new primitives::DictEqualityFilter()); + eqFilter.reset(new primitives::DictEqualityFilter(my_charset_latin1)); bs >> eqOp; //cout << "saw the eqfilter count=" << filterCount << endl; diff --git a/primitives/primproc/primitiveserver.cpp b/primitives/primproc/primitiveserver.cpp index 46af61900..d0d07c341 100644 --- a/primitives/primproc/primitiveserver.cpp +++ b/primitives/primproc/primitiveserver.cpp @@ -1804,12 +1804,15 @@ public: private: void createEqualityFilter() { - uint32_t uniqueID, count, i; + uint32_t uniqueID, count, i, charsetNumber; string str; - boost::shared_ptr filter(new DictEqualityFilter()); - bs->advance(sizeof(ISMPacketHeader)); *bs >> uniqueID; + *bs >> charsetNumber; + + datatypes::Charset cs(charsetNumber); + boost::shared_ptr filter(new DictEqualityFilter(cs)); + *bs >> count; for (i = 0; i < count; i++) diff --git a/utils/common/collation.h b/utils/common/collation.h index 1c0085e03..e007a6407 100644 --- a/utils/common/collation.h +++ b/utils/common/collation.h @@ -18,6 +18,7 @@ #ifndef COLLATION_H_INCLUDED #define COLLATION_H_INCLUDED +#include "exceptclasses.h" /* Redefine definitions used by MariaDB m_ctype.h. @@ -83,6 +84,26 @@ extern "C" MYSQL_PLUGIN_IMPORT CHARSET_INFO *default_charset_info; namespace datatypes { +class MariaDBHasher +{ + ulong mPart1; + ulong mPart2; +public: + MariaDBHasher() + :mPart1(1), mPart2(4) + { } + MariaDBHasher & add(CHARSET_INFO & cs, const char *str, size_t length) + { + cs.hash_sort((const uchar *) str, length, &mPart1, &mPart2); + return *this; + } + uint32_t finalize() const + { + return (uint32_t) mPart1; + } +}; + + // A reference to MariaDB CHARSET_INFO. class Charset @@ -93,9 +114,50 @@ public: Charset(CHARSET_INFO & cs) :mCharset(cs) { } Charset(uint32_t charsetNumber); CHARSET_INFO & getCharset() const { return mCharset; } + uint32_t hash(const char *data, uint64_t len) const + { + return MariaDBHasher().add(mCharset, data, len).finalize(); + } + bool eq(const std::string & str1, const std::string & str2) const + { + return mCharset.strnncollsp(str1.data(), str1.length(), + str2.data(), str2.length()) == 0; + } + }; +class CollationAwareHasher: public Charset +{ +public: + CollationAwareHasher(const Charset &cs) + :Charset(cs) + { } + inline uint32_t operator()(const std::string& s) const + { + return operator()(s.data(), s.length()); + } + inline uint32_t operator()(const char* data, uint64_t len) const + { + return Charset::hash(data, len); + } +}; + + +class CollationAwareComparator: public Charset +{ +public: + CollationAwareComparator(const Charset &cs) + :Charset(cs) + { } + bool operator()(const std::string & str1, const std::string & str2) const + { + return Charset::eq(str1, str2); + } +}; + + + } // end of namespace datatypes #endif