You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-07-29 08:21:15 +03:00
Part#1 MCOL-495 Make string comparison not case sensitive
Fixing field='str' for long (Dict) string data types.
This commit is contained in:
@ -1668,11 +1668,6 @@ const JobStepVector doSimpleFilter(SimpleFilter* sf, JobInfo& jobInfo)
|
|||||||
}
|
}
|
||||||
//X
|
//X
|
||||||
|
|
||||||
// Because, on a filter, we want to compare ignoring trailing spaces in many cases
|
|
||||||
if (sf->op()->op() != execplan::OP_LIKE)
|
|
||||||
{
|
|
||||||
boost::algorithm::trim_right_if(constval, boost::is_any_of(" "));
|
|
||||||
}
|
|
||||||
//@bug 339 nulls are not stored in dictionary
|
//@bug 339 nulls are not stored in dictionary
|
||||||
if ((dictOid = isDictCol(ct)) > 0 && ConstantColumn::NULLDATA != cc->type())
|
if ((dictOid = isDictCol(ct)) > 0 && ConstantColumn::NULLDATA != cc->type())
|
||||||
{
|
{
|
||||||
|
@ -904,6 +904,7 @@ void pDictionaryScan::serializeEqualityFilter()
|
|||||||
ism.Command = DICT_CREATE_EQUALITY_FILTER;
|
ism.Command = DICT_CREATE_EQUALITY_FILTER;
|
||||||
msg.load((uint8_t*) &ism, sizeof(ISMPacketHeader));
|
msg.load((uint8_t*) &ism, sizeof(ISMPacketHeader));
|
||||||
msg << uniqueID;
|
msg << uniqueID;
|
||||||
|
msg << (uint32_t) colType().charsetNumber;
|
||||||
msg << (uint32_t) equalityFilter.size();
|
msg << (uint32_t) equalityFilter.size();
|
||||||
|
|
||||||
for (i = 0; i < equalityFilter.size(); i++)
|
for (i = 0; i < equalityFilter.size(); i++)
|
||||||
|
@ -186,11 +186,11 @@ void PrimitiveProcessor::p_TokenByScan(const TokenByScanRequestHeader* h,
|
|||||||
|
|
||||||
if (eqFilter)
|
if (eqFilter)
|
||||||
{
|
{
|
||||||
// MCOL-1246 Trim whitespace before match
|
if (cs != & eqFilter->getCharset())
|
||||||
// TODO MCOL-3536 use CHARSET_INFO* cs for collation
|
{
|
||||||
// cs->hash_sort(hash_sort(const uchar *key, size_t len, ulong *nr1, ulong *nr2))
|
//throw runtime_error("Collations mismatch: TokenByScanRequestHeader and DicEqualityFilter");
|
||||||
|
}
|
||||||
string strData(sig, siglen);
|
string strData(sig, siglen);
|
||||||
boost::trim_right_if(strData, boost::is_any_of(" "));
|
|
||||||
bool gotIt = eqFilter->find(strData) != eqFilter->end();
|
bool gotIt = eqFilter->find(strData) != eqFilter->end();
|
||||||
|
|
||||||
if ((h->COP1 == COMPARE_EQ && gotIt) || (h->COP1 == COMPARE_NE &&
|
if ((h->COP1 == COMPARE_EQ && gotIt) || (h->COP1 == COMPARE_NE &&
|
||||||
|
@ -102,7 +102,28 @@ public:
|
|||||||
|
|
||||||
typedef std::tr1::unordered_set<int64_t, pcfHasher, pcfEqual> prestored_set_t;
|
typedef std::tr1::unordered_set<int64_t, pcfHasher, pcfEqual> prestored_set_t;
|
||||||
typedef std::tr1::unordered_set<int128_t, pcfHasher128, pcfEqual128> prestored_set_t_128;
|
typedef std::tr1::unordered_set<int128_t, pcfHasher128, pcfEqual128> prestored_set_t_128;
|
||||||
typedef std::tr1::unordered_set<std::string, utils::Hasher> DictEqualityFilter;
|
|
||||||
|
|
||||||
|
class DictEqualityFilter: public std::tr1::unordered_set<std::string,
|
||||||
|
datatypes::CollationAwareHasher,
|
||||||
|
datatypes::CollationAwareComparator>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
DictEqualityFilter(const datatypes::Charset &cs)
|
||||||
|
:std::tr1::unordered_set<std::string,
|
||||||
|
datatypes::CollationAwareHasher,
|
||||||
|
datatypes::CollationAwareComparator>
|
||||||
|
(10,
|
||||||
|
datatypes::CollationAwareHasher(cs),
|
||||||
|
datatypes::CollationAwareComparator(cs))
|
||||||
|
{ }
|
||||||
|
CHARSET_INFO & getCharset() const
|
||||||
|
{
|
||||||
|
idbassert(& _M_h1.getCharset() == & _M_eq.getCharset());
|
||||||
|
return _M_h1.getCharset();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
struct idb_regex_t
|
struct idb_regex_t
|
||||||
{
|
{
|
||||||
|
@ -86,7 +86,7 @@ void DictStep::createCommand(ByteStream& bs)
|
|||||||
{
|
{
|
||||||
string strTmp;
|
string strTmp;
|
||||||
|
|
||||||
eqFilter.reset(new primitives::DictEqualityFilter());
|
eqFilter.reset(new primitives::DictEqualityFilter(my_charset_latin1));
|
||||||
bs >> eqOp;
|
bs >> eqOp;
|
||||||
|
|
||||||
//cout << "saw the eqfilter count=" << filterCount << endl;
|
//cout << "saw the eqfilter count=" << filterCount << endl;
|
||||||
|
@ -1804,12 +1804,15 @@ public:
|
|||||||
private:
|
private:
|
||||||
void createEqualityFilter()
|
void createEqualityFilter()
|
||||||
{
|
{
|
||||||
uint32_t uniqueID, count, i;
|
uint32_t uniqueID, count, i, charsetNumber;
|
||||||
string str;
|
string str;
|
||||||
boost::shared_ptr<DictEqualityFilter> filter(new DictEqualityFilter());
|
|
||||||
|
|
||||||
bs->advance(sizeof(ISMPacketHeader));
|
bs->advance(sizeof(ISMPacketHeader));
|
||||||
*bs >> uniqueID;
|
*bs >> uniqueID;
|
||||||
|
*bs >> charsetNumber;
|
||||||
|
|
||||||
|
datatypes::Charset cs(charsetNumber);
|
||||||
|
boost::shared_ptr<DictEqualityFilter> filter(new DictEqualityFilter(cs));
|
||||||
|
|
||||||
*bs >> count;
|
*bs >> count;
|
||||||
|
|
||||||
for (i = 0; i < count; i++)
|
for (i = 0; i < count; i++)
|
||||||
|
@ -18,6 +18,7 @@
|
|||||||
#ifndef COLLATION_H_INCLUDED
|
#ifndef COLLATION_H_INCLUDED
|
||||||
#define COLLATION_H_INCLUDED
|
#define COLLATION_H_INCLUDED
|
||||||
|
|
||||||
|
#include "exceptclasses.h"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Redefine definitions used by MariaDB m_ctype.h.
|
Redefine definitions used by MariaDB m_ctype.h.
|
||||||
@ -83,6 +84,26 @@ extern "C" MYSQL_PLUGIN_IMPORT CHARSET_INFO *default_charset_info;
|
|||||||
namespace datatypes
|
namespace datatypes
|
||||||
{
|
{
|
||||||
|
|
||||||
|
class MariaDBHasher
|
||||||
|
{
|
||||||
|
ulong mPart1;
|
||||||
|
ulong mPart2;
|
||||||
|
public:
|
||||||
|
MariaDBHasher()
|
||||||
|
:mPart1(1), mPart2(4)
|
||||||
|
{ }
|
||||||
|
MariaDBHasher & add(CHARSET_INFO & cs, const char *str, size_t length)
|
||||||
|
{
|
||||||
|
cs.hash_sort((const uchar *) str, length, &mPart1, &mPart2);
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
uint32_t finalize() const
|
||||||
|
{
|
||||||
|
return (uint32_t) mPart1;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
// A reference to MariaDB CHARSET_INFO.
|
// A reference to MariaDB CHARSET_INFO.
|
||||||
|
|
||||||
class Charset
|
class Charset
|
||||||
@ -93,9 +114,50 @@ public:
|
|||||||
Charset(CHARSET_INFO & cs) :mCharset(cs) { }
|
Charset(CHARSET_INFO & cs) :mCharset(cs) { }
|
||||||
Charset(uint32_t charsetNumber);
|
Charset(uint32_t charsetNumber);
|
||||||
CHARSET_INFO & getCharset() const { return mCharset; }
|
CHARSET_INFO & getCharset() const { return mCharset; }
|
||||||
|
uint32_t hash(const char *data, uint64_t len) const
|
||||||
|
{
|
||||||
|
return MariaDBHasher().add(mCharset, data, len).finalize();
|
||||||
|
}
|
||||||
|
bool eq(const std::string & str1, const std::string & str2) const
|
||||||
|
{
|
||||||
|
return mCharset.strnncollsp(str1.data(), str1.length(),
|
||||||
|
str2.data(), str2.length()) == 0;
|
||||||
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
class CollationAwareHasher: public Charset
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
CollationAwareHasher(const Charset &cs)
|
||||||
|
:Charset(cs)
|
||||||
|
{ }
|
||||||
|
inline uint32_t operator()(const std::string& s) const
|
||||||
|
{
|
||||||
|
return operator()(s.data(), s.length());
|
||||||
|
}
|
||||||
|
inline uint32_t operator()(const char* data, uint64_t len) const
|
||||||
|
{
|
||||||
|
return Charset::hash(data, len);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
class CollationAwareComparator: public Charset
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
CollationAwareComparator(const Charset &cs)
|
||||||
|
:Charset(cs)
|
||||||
|
{ }
|
||||||
|
bool operator()(const std::string & str1, const std::string & str2) const
|
||||||
|
{
|
||||||
|
return Charset::eq(str1, str2);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
} // end of namespace datatypes
|
} // end of namespace datatypes
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
Reference in New Issue
Block a user