You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-07-02 17:22:27 +03:00
This patch improves handling of NULLs in textual fields in ColumnStore. Previously empty strings were considered NULLs and it could be a problem if data scheme allows for empty strings. It was also one of major reasons of behavior difference between ColumnStore and other engines in MariaDB family. Also, this patch fixes some other bugs and incorrect behavior, for example, incorrect comparison for "column <= ''" which evaluates to constant True for all purposes before this patch.
257 lines
7.1 KiB
C++
257 lines
7.1 KiB
C++
/*
|
|
Copyright (C) 2020-2022 MariaDB Corporation
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public License
|
|
as published by the Free Software Foundation; version 2 of
|
|
the License.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
|
MA 02110-1301, USA. */
|
|
#pragma once
|
|
|
|
#if defined(PREFER_MY_CONFIG_H)
|
|
|
|
#if !defined(MY_CONFIG_H)
|
|
#error my_config.h was not included (but PREFER_MY_CONFIG_H was set)
|
|
#endif
|
|
|
|
#include "mcsconfig_conflicting_defs_remember.h"
|
|
#include "mcsconfig_conflicting_defs_undef.h"
|
|
|
|
#else
|
|
#if defined(MY_CONFIG_H)
|
|
#error my_config.h was included before mcsconfig.h (and PREFER_MY_CONFIG_H was not set)
|
|
#endif
|
|
#endif // PREFER_MY_CONFIG_H
|
|
|
|
#include "mcsconfig.h"
|
|
|
|
#include "exceptclasses.h"
|
|
#include "conststring.h"
|
|
|
|
/*
|
|
Redefine definitions used by MariaDB m_ctype.h.
|
|
This is needed to avoid including <mariadb.h> and <my_sys.h>,
|
|
which conflict with many MCS and boost headers.
|
|
*/
|
|
|
|
#ifndef FALSE
|
|
#define FALSE (0)
|
|
#endif
|
|
|
|
#ifndef TRUE
|
|
#define TRUE (1)
|
|
#endif
|
|
|
|
#ifndef DBUG_ASSERT
|
|
#define DBUG_ASSERT(x) idbassert(x)
|
|
#define DBUG_ASSERT_TEMPORARILY_DEFINED
|
|
#endif
|
|
|
|
#ifndef MYSQL_PLUGIN_IMPORT
|
|
#if (defined(_WIN32) && defined(MYSQL_DYNAMIC_PLUGIN))
|
|
#define MYSQL_PLUGIN_IMPORT __declspec(dllimport)
|
|
#else
|
|
#define MYSQL_PLUGIN_IMPORT
|
|
#endif
|
|
#endif
|
|
|
|
typedef long long int longlong;
|
|
typedef unsigned long long int ulonglong;
|
|
typedef uint32_t uint32;
|
|
typedef uint16_t uint16;
|
|
typedef char my_bool;
|
|
typedef unsigned char uchar;
|
|
|
|
#if defined(__GNUC__) && !defined(_lint)
|
|
typedef char pchar; /* Mixed prototypes can take char */
|
|
typedef char puchar; /* Mixed prototypes can take char */
|
|
typedef char pbool; /* Mixed prototypes can take char */
|
|
typedef short pshort; /* Mixed prototypes can take short int */
|
|
typedef float pfloat; /* Mixed prototypes can take float */
|
|
#else
|
|
typedef int pchar; /* Mixed prototypes can't take char */
|
|
typedef uint puchar; /* Mixed prototypes can't take char */
|
|
typedef int pbool; /* Mixed prototypes can't take char */
|
|
typedef int pshort; /* Mixed prototypes can't take short int */
|
|
typedef double pfloat; /* Mixed prototypes can't take float */
|
|
#endif
|
|
|
|
typedef const struct charset_info_st CHARSET_INFO;
|
|
extern "C" MYSQL_PLUGIN_IMPORT CHARSET_INFO* default_charset_info;
|
|
|
|
#define HAVE_PSI_INTERFACE
|
|
|
|
#include "m_ctype.h"
|
|
|
|
#undef FALSE
|
|
#undef TRUE
|
|
|
|
#ifdef DBUG_ASSERT_TEMPORARILY_DEFINED
|
|
#undef DBUG_ASSERT
|
|
#endif
|
|
|
|
#if defined(PREFER_MY_CONFIG_H)
|
|
#include "mcsconfig_conflicting_defs_restore.h"
|
|
#endif
|
|
|
|
namespace datatypes
|
|
{
|
|
class MariaDBHasher
|
|
{
|
|
ulong mPart1;
|
|
ulong mPart2;
|
|
|
|
public:
|
|
MariaDBHasher() : mPart1(1), mPart2(4)
|
|
{
|
|
}
|
|
MariaDBHasher& add(CHARSET_INFO* cs, const char* str, size_t length)
|
|
{
|
|
cs->hash_sort((const uchar*)str, length, &mPart1, &mPart2);
|
|
return *this;
|
|
}
|
|
MariaDBHasher& add(CHARSET_INFO* cs, const utils::ConstString& str)
|
|
{
|
|
return add(cs, str.str(), str.length());
|
|
}
|
|
uint32_t finalize() const
|
|
{
|
|
return (uint32_t)mPart1;
|
|
}
|
|
};
|
|
|
|
// A reference to MariaDB CHARSET_INFO.
|
|
|
|
class Charset
|
|
{
|
|
protected:
|
|
const struct charset_info_st* mCharset;
|
|
|
|
private:
|
|
static constexpr const uint flags_ = MY_STRXFRM_PAD_WITH_SPACE | MY_STRXFRM_PAD_TO_MAXLEN;
|
|
|
|
public:
|
|
Charset(CHARSET_INFO& cs) : mCharset(&cs)
|
|
{
|
|
}
|
|
Charset(CHARSET_INFO* cs = nullptr) : mCharset(cs ? cs : &my_charset_bin)
|
|
{
|
|
}
|
|
Charset(uint32_t charsetNumber);
|
|
void setCharset(uint32_t charsetNumber);
|
|
CHARSET_INFO& getCharset() const
|
|
{
|
|
return *mCharset;
|
|
}
|
|
uint32_t hash(const char* data, uint64_t len) const
|
|
{
|
|
return MariaDBHasher().add(mCharset, data, len).finalize();
|
|
}
|
|
bool eq(const std::string& str1, const std::string& str2) const
|
|
{
|
|
return mCharset->strnncollsp(str1.data(), str1.length(), str2.data(), str2.length()) == 0;
|
|
}
|
|
int strnncollsp(const std::string& str1, const std::string& str2) const
|
|
{
|
|
return mCharset->strnncollsp(str1.data(), str1.length(), str2.data(), str2.length());
|
|
}
|
|
int strnncollsp(const utils::ConstString& str1, const utils::ConstString& str2) const
|
|
{
|
|
// nullptr handling below should return values as if nulls are substituted with empty string.
|
|
// please note that ConstString has an assertion so that nullptr data has zero length.
|
|
const char* s1 = str1.str();
|
|
const char* s2 = str2.str();
|
|
return mCharset->strnncollsp(s1 ? s1 : "", str1.length(), s2 ? s2 : "" , str2.length());
|
|
}
|
|
int strnncollsp(const char* str1, size_t length1, const char* str2, size_t length2) const
|
|
{
|
|
return mCharset->strnncollsp(str1, length1, str2, length2);
|
|
}
|
|
int strnncollsp(const unsigned char* str1, size_t length1, const unsigned char* str2, size_t length2) const
|
|
{
|
|
return mCharset->strnncollsp((const char*)str1, length1, (const char*)str2, length2);
|
|
}
|
|
bool test_if_important_data(const char* str, const char* end) const
|
|
{
|
|
if (mCharset->state & MY_CS_NOPAD)
|
|
return str < end;
|
|
return str + mCharset->scan(str, end, MY_SEQ_SPACES) < end;
|
|
}
|
|
bool like(bool neg, const utils::ConstString& subject, const utils::ConstString& pattern) const
|
|
{
|
|
bool res = !mCharset->wildcmp(subject.str(), subject.end(), pattern.str(), pattern.end(), '\\', '_', '%');
|
|
return neg ? !res : res;
|
|
}
|
|
size_t strnxfrm(uchar* dst, size_t dstlen, uint nweights, const uchar* src, size_t srclen, uint flags)
|
|
{
|
|
assert(mCharset->coll);
|
|
return mCharset->coll->strnxfrm(mCharset, dst, dstlen, nweights, src, srclen, flags);
|
|
}
|
|
// The magic check that tells that bytes are mapped to weights as 1:1
|
|
bool strnxfrmIsValid() const
|
|
{
|
|
return (mCharset->state & MY_CS_NON1TO1) == 0;
|
|
}
|
|
template <typename T>
|
|
T strnxfrm(const char* src) const
|
|
{
|
|
T ret = 0;
|
|
size_t len __attribute__((unused)) =
|
|
mCharset->strnxfrm((char*)&ret, sizeof(T), sizeof(T), src, sizeof(T), flags_);
|
|
assert(len <= sizeof(T));
|
|
return ret;
|
|
}
|
|
template <typename T>
|
|
T strnxfrm(const utils::ConstString& src) const
|
|
{
|
|
T ret = 0;
|
|
size_t len __attribute__((unused)) =
|
|
mCharset->strnxfrm((char*)&ret, sizeof(T), sizeof(T), (char*)src.str(), src.length(), flags_);
|
|
assert(len <= sizeof(T));
|
|
return ret;
|
|
}
|
|
static uint getDefaultFlags()
|
|
{
|
|
return flags_;
|
|
}
|
|
};
|
|
|
|
class CollationAwareHasher : public Charset
|
|
{
|
|
public:
|
|
CollationAwareHasher(const Charset& cs) : Charset(cs)
|
|
{
|
|
}
|
|
inline uint32_t operator()(const std::string& s) const
|
|
{
|
|
return operator()(s.data(), s.length());
|
|
}
|
|
inline uint32_t operator()(const char* data, uint64_t len) const
|
|
{
|
|
return Charset::hash(data, len);
|
|
}
|
|
};
|
|
|
|
class CollationAwareComparator : public Charset
|
|
{
|
|
public:
|
|
CollationAwareComparator(const Charset& cs) : Charset(cs)
|
|
{
|
|
}
|
|
bool operator()(const std::string& str1, const std::string& str2) const
|
|
{
|
|
return Charset::eq(str1, str2);
|
|
}
|
|
};
|
|
|
|
} // end of namespace datatypes
|