1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-29 08:21:15 +03:00

Part#2 MCOL-495 Make string comparison not case sensitive

Fixing field='str' for short (non-Dict) CHAR and VARCHAR data types.
This commit is contained in:
Alexander Barkov
2020-12-01 12:50:54 +04:00
parent 0ff6a6ec20
commit 52c5af054a
10 changed files with 268 additions and 88 deletions

View File

@ -19,6 +19,7 @@
#define COLLATION_H_INCLUDED
#include "exceptclasses.h"
#include "conststring.h"
/*
Redefine definitions used by MariaDB m_ctype.h.
@ -92,9 +93,9 @@ public:
MariaDBHasher()
:mPart1(1), mPart2(4)
{ }
MariaDBHasher & add(CHARSET_INFO & cs, const char *str, size_t length)
MariaDBHasher & add(CHARSET_INFO * cs, const char *str, size_t length)
{
cs.hash_sort((const uchar *) str, length, &mPart1, &mPart2);
cs->hash_sort((const uchar *) str, length, &mPart1, &mPart2);
return *this;
}
uint32_t finalize() const
@ -109,21 +110,33 @@ public:
class Charset
{
protected:
const struct charset_info_st & mCharset;
const struct charset_info_st * mCharset;
public:
Charset(CHARSET_INFO & cs) :mCharset(cs) { }
Charset(CHARSET_INFO & cs) :mCharset(&cs) { }
Charset(uint32_t charsetNumber);
CHARSET_INFO & getCharset() const { return mCharset; }
CHARSET_INFO & getCharset() const { return *mCharset; }
uint32_t hash(const char *data, uint64_t len) const
{
return MariaDBHasher().add(mCharset, data, len).finalize();
}
bool eq(const std::string & str1, const std::string & str2) const
{
return mCharset.strnncollsp(str1.data(), str1.length(),
str2.data(), str2.length()) == 0;
return mCharset->strnncollsp(str1.data(), str1.length(),
str2.data(), str2.length()) == 0;
}
int strnncollsp(const utils::ConstString &str1,
const utils::ConstString &str2) const
{
return mCharset->strnncollsp(str1.str(), str1.length(),
str2.str(), str2.length());
}
bool test_if_important_data(const char *str, const char *end) const
{
if (mCharset->state & MY_CS_NOPAD)
return str < end;
return str + mCharset->scan(str, end, MY_SEQ_SPACES) < end;
}
};

View File

@ -0,0 +1,47 @@
/* Copyright (C) 2020 MariaDB Corporation.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
#ifndef MARIADB_CONSTSTRING_H
#define MARIADB_CONSTSTRING_H
namespace utils
{
class ConstString
{
const char *mStr;
size_t mLength;
public:
ConstString(const char *str, size_t length)
:mStr(str), mLength(length)
{ }
const char *str() const { return mStr; }
size_t length() const { return mLength; }
ConstString & rtrimZero()
{
for ( ; mLength && mStr[mLength - 1] == '\0'; mLength--)
{ }
return *this;
}
};
} // namespace utils
#endif // MARIADB_CONSTSTRING_H

View File

@ -31,7 +31,7 @@ static inline CHARSET_INFO & get_charset_or_bin(int32_t charsetNumber)
Charset::Charset(uint32_t charsetNumber)
:mCharset(get_charset_or_bin(charsetNumber))
:mCharset(&get_charset_or_bin(charsetNumber))
{
}

View File

@ -1542,8 +1542,14 @@ DataConvert::StringToString(const datatypes::SystemCatalog::TypeAttributesStd& c
//check data length
if ( data.length() > (unsigned int)colType.colWidth )
{
// TODO: charsetNumber should be moved to TypeStdAttributes ASAP
const execplan::CalpontSystemCatalog::ColType &colType2=
static_cast<const execplan::CalpontSystemCatalog::ColType &>(colType);
datatypes::Charset cs(colType2.charsetNumber);
const char *newEnd = data.data() + colType.colWidth;
const char *origEnd = data.data() + data.length();
pushWarning = cs.test_if_important_data(newEnd, origEnd);
data = data.substr(0, colType.colWidth);
pushWarning = true;
boost::any value = data;
return value;
}