You've already forked mariadb-columnstore-engine
							
							
				mirror of
				https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
				synced 2025-11-03 17:13:17 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			287 lines
		
	
	
		
			8.1 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			287 lines
		
	
	
		
			8.1 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
/*
 | 
						|
   Copyright (C) 2020-2022 MariaDB Corporation
 | 
						|
 | 
						|
   This program is free software; you can redistribute it and/or
 | 
						|
   modify it under the terms of the GNU General Public License
 | 
						|
   as published by the Free Software Foundation; version 2 of
 | 
						|
   the License.
 | 
						|
 | 
						|
   This program is distributed in the hope that it will be useful,
 | 
						|
   but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
						|
   GNU General Public License for more details.
 | 
						|
 | 
						|
   You should have received a copy of the GNU General Public License
 | 
						|
   along with this program; if not, write to the Free Software
 | 
						|
   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 | 
						|
   MA 02110-1301, USA. */
 | 
						|
#pragma once
 | 
						|
 | 
						|
#if defined(PREFER_MY_CONFIG_H)
 | 
						|
 | 
						|
#if !defined(MY_CONFIG_H)
 | 
						|
#error my_config.h was not included (but PREFER_MY_CONFIG_H was set)
 | 
						|
#endif
 | 
						|
 | 
						|
#include "mcsconfig_conflicting_defs_remember.h"
 | 
						|
#include "mcsconfig_conflicting_defs_undef.h"
 | 
						|
 | 
						|
#else
 | 
						|
#if defined(MY_CONFIG_H)
 | 
						|
#error my_config.h was included before mcsconfig.h (and PREFER_MY_CONFIG_H was not set)
 | 
						|
#endif
 | 
						|
#endif  // PREFER_MY_CONFIG_H
 | 
						|
 | 
						|
#include "mcsconfig.h"
 | 
						|
 | 
						|
#include "exceptclasses.h"
 | 
						|
#include "conststring.h"
 | 
						|
 | 
						|
/*
 | 
						|
  Redefine definitions used by MariaDB m_ctype.h.
 | 
						|
  This is needed to avoid including <mariadb.h> and <my_sys.h>,
 | 
						|
  which conflict with many MCS and boost headers.
 | 
						|
*/
 | 
						|
 | 
						|
#ifndef FALSE
 | 
						|
#define FALSE (0)
 | 
						|
#endif
 | 
						|
 | 
						|
#ifndef TRUE
 | 
						|
#define TRUE (1)
 | 
						|
#endif
 | 
						|
 | 
						|
#ifndef DBUG_ASSERT
 | 
						|
#define DBUG_ASSERT(x) idbassert(x)
 | 
						|
#define DBUG_ASSERT_TEMPORARILY_DEFINED
 | 
						|
#endif
 | 
						|
 | 
						|
#ifndef MYSQL_PLUGIN_IMPORT
 | 
						|
#if (defined(_WIN32) && defined(MYSQL_DYNAMIC_PLUGIN))
 | 
						|
#define MYSQL_PLUGIN_IMPORT __declspec(dllimport)
 | 
						|
#else
 | 
						|
#define MYSQL_PLUGIN_IMPORT
 | 
						|
#endif
 | 
						|
#endif
 | 
						|
 | 
						|
typedef long long int longlong;
 | 
						|
typedef unsigned long long int ulonglong;
 | 
						|
typedef uint32_t uint32;
 | 
						|
typedef uint16_t uint16;
 | 
						|
typedef char my_bool;
 | 
						|
typedef unsigned char uchar;
 | 
						|
 | 
						|
#if defined(__GNUC__) && !defined(_lint)
 | 
						|
typedef char pchar;   /* Mixed prototypes can take char */
 | 
						|
typedef char puchar;  /* Mixed prototypes can take char */
 | 
						|
typedef char pbool;   /* Mixed prototypes can take char */
 | 
						|
typedef short pshort; /* Mixed prototypes can take short int */
 | 
						|
typedef float pfloat; /* Mixed prototypes can take float */
 | 
						|
#else
 | 
						|
typedef int pchar;     /* Mixed prototypes can't take char */
 | 
						|
typedef uint puchar;   /* Mixed prototypes can't take char */
 | 
						|
typedef int pbool;     /* Mixed prototypes can't take char */
 | 
						|
typedef int pshort;    /* Mixed prototypes can't take short int */
 | 
						|
typedef double pfloat; /* Mixed prototypes can't take float */
 | 
						|
#endif
 | 
						|
 | 
						|
typedef const struct charset_info_st CHARSET_INFO;
 | 
						|
extern "C" MYSQL_PLUGIN_IMPORT CHARSET_INFO* default_charset_info;
 | 
						|
 | 
						|
#define HAVE_PSI_INTERFACE
 | 
						|
 | 
						|
#include "m_ctype.h"
 | 
						|
 | 
						|
#undef FALSE
 | 
						|
#undef TRUE
 | 
						|
 | 
						|
#ifdef DBUG_ASSERT_TEMPORARILY_DEFINED
 | 
						|
#undef DBUG_ASSERT
 | 
						|
#endif
 | 
						|
 | 
						|
#if defined(PREFER_MY_CONFIG_H)
 | 
						|
#include "mcsconfig_conflicting_defs_restore.h"
 | 
						|
#endif
 | 
						|
 | 
						|
namespace datatypes
 | 
						|
{
 | 
						|
class MariaDBHasher
 | 
						|
{
 | 
						|
  ulong mPart1;
 | 
						|
  ulong mPart2;
 | 
						|
 | 
						|
 public:
 | 
						|
  MariaDBHasher() : mPart1(1), mPart2(4)
 | 
						|
  {
 | 
						|
  }
 | 
						|
  MariaDBHasher& add(CHARSET_INFO* cs, const char* str, size_t length)
 | 
						|
  {
 | 
						|
    cs->hash_sort((const uchar*)str, length, &mPart1, &mPart2);
 | 
						|
    return *this;
 | 
						|
  }
 | 
						|
  MariaDBHasher& add(CHARSET_INFO* cs, const utils::ConstString& str)
 | 
						|
  {
 | 
						|
    return add(cs, str.str(), str.length());
 | 
						|
  }
 | 
						|
  uint32_t finalize() const
 | 
						|
  {
 | 
						|
    return (uint32_t)mPart1;
 | 
						|
  }
 | 
						|
};
 | 
						|
 | 
						|
// A reference to MariaDB CHARSET_INFO.
 | 
						|
 | 
						|
class Charset
 | 
						|
{
 | 
						|
 protected:
 | 
						|
  const struct charset_info_st* mCharset;
 | 
						|
 | 
						|
 private:
 | 
						|
  static constexpr const uint flags_ = MY_STRXFRM_PAD_WITH_SPACE | MY_STRXFRM_PAD_TO_MAXLEN;
 | 
						|
 | 
						|
 public:
 | 
						|
  Charset(CHARSET_INFO& cs) : mCharset(&cs)
 | 
						|
  {
 | 
						|
  }
 | 
						|
  Charset(CHARSET_INFO* cs = nullptr) : mCharset(cs ? cs : &my_charset_bin)
 | 
						|
  {
 | 
						|
  }
 | 
						|
 | 
						|
  bool operator==(const Charset& rhs) const
 | 
						|
  {
 | 
						|
    return rhs.getCharset().cs_name.str == getCharset().cs_name.str;
 | 
						|
  }
 | 
						|
 | 
						|
  bool operator!=(const Charset& rhs) const
 | 
						|
  {
 | 
						|
    return !(*this == rhs);
 | 
						|
  }
 | 
						|
 | 
						|
  std::string convert(const std::string& from, const datatypes::Charset& fromCs) const
 | 
						|
  {
 | 
						|
    std::string result;
 | 
						|
    uint dummy_errors;
 | 
						|
    result.resize(from.size() * getCharset().mbmaxlen);
 | 
						|
    size_t resultingSize = my_convert(const_cast<char*>(result.c_str()), result.size(), &getCharset(),
 | 
						|
                                      from.c_str(), from.size(), &fromCs.getCharset(), &dummy_errors);
 | 
						|
    result.resize(resultingSize);
 | 
						|
    return result;
 | 
						|
  }
 | 
						|
 | 
						|
  Charset(uint32_t charsetNumber);
 | 
						|
  void setCharset(uint32_t charsetNumber);
 | 
						|
  CHARSET_INFO& getCharset() const
 | 
						|
  {
 | 
						|
    return *mCharset;
 | 
						|
  }
 | 
						|
  uint32_t hash(const char* data, uint64_t len) const
 | 
						|
  {
 | 
						|
    return MariaDBHasher().add(mCharset, data, len).finalize();
 | 
						|
  }
 | 
						|
  bool eq(const std::string& str1, const std::string& str2) const
 | 
						|
  {
 | 
						|
    return mCharset->strnncollsp(str1.data(), str1.length(), str2.data(), str2.length()) == 0;
 | 
						|
  }
 | 
						|
  int strnncollsp(const std::string& str1, const std::string& str2) const
 | 
						|
  {
 | 
						|
    return mCharset->strnncollsp(str1.data(), str1.length(), str2.data(), str2.length());
 | 
						|
  }
 | 
						|
  int strnncollsp(const utils::ConstString& str1, const utils::ConstString& str2) const
 | 
						|
  {
 | 
						|
    // nullptr handling below should return values as if nulls are substituted with empty string.
 | 
						|
    // please note that ConstString has an assertion so that nullptr data has zero length.
 | 
						|
    const char* s1 = str1.str();
 | 
						|
    const char* s2 = str2.str();
 | 
						|
    return mCharset->strnncollsp(s1 ? s1 : "", str1.length(), s2 ? s2 : "", str2.length());
 | 
						|
  }
 | 
						|
  int strnncollsp(const char* str1, size_t length1, const char* str2, size_t length2) const
 | 
						|
  {
 | 
						|
    return mCharset->strnncollsp(str1, length1, str2, length2);
 | 
						|
  }
 | 
						|
  int strnncollsp(const unsigned char* str1, size_t length1, const unsigned char* str2, size_t length2) const
 | 
						|
  {
 | 
						|
    return mCharset->strnncollsp((const char*)str1, length1, (const char*)str2, length2);
 | 
						|
  }
 | 
						|
  bool test_if_important_data(const char* str, const char* end) const
 | 
						|
  {
 | 
						|
    if (mCharset->state & MY_CS_NOPAD)
 | 
						|
      return str < end;
 | 
						|
    return str + mCharset->scan(str, end, MY_SEQ_SPACES) < end;
 | 
						|
  }
 | 
						|
  bool like(bool neg, const utils::ConstString& subject, const utils::ConstString& pattern) const
 | 
						|
  {
 | 
						|
    bool res = !mCharset->wildcmp(subject.str(), subject.end(), pattern.str(), pattern.end(), '\\', '_', '%');
 | 
						|
    return neg ? !res : res;
 | 
						|
  }
 | 
						|
  size_t strnxfrm(uchar* dst, size_t dstlen, uint nweights, const uchar* src, size_t srclen, uint flags)
 | 
						|
  {
 | 
						|
    assert(mCharset->coll);
 | 
						|
    return mCharset->coll->strnxfrm(mCharset, dst, dstlen, nweights, src, srclen, flags);
 | 
						|
  }
 | 
						|
  // The magic check that tells that bytes are mapped to weights as 1:1
 | 
						|
  bool strnxfrmIsValid() const
 | 
						|
  {
 | 
						|
    return (mCharset->state & MY_CS_NON1TO1) == 0;
 | 
						|
  }
 | 
						|
  template <typename T>
 | 
						|
  T strnxfrm(const char* src) const
 | 
						|
  {
 | 
						|
    T ret = 0;
 | 
						|
    size_t len __attribute__((unused)) =
 | 
						|
        mCharset->strnxfrm((char*)&ret, sizeof(T), sizeof(T), src, sizeof(T), flags_);
 | 
						|
    assert(len <= sizeof(T));
 | 
						|
    return ret;
 | 
						|
  }
 | 
						|
  template <typename T>
 | 
						|
  T strnxfrm(const utils::ConstString& src) const
 | 
						|
  {
 | 
						|
    T ret = 0;
 | 
						|
    size_t len __attribute__((unused)) =
 | 
						|
        mCharset->strnxfrm((char*)&ret, sizeof(T), sizeof(T), (char*)src.str(), src.length(), flags_);
 | 
						|
    assert(len <= sizeof(T));
 | 
						|
    return ret;
 | 
						|
  }
 | 
						|
  static uint getDefaultFlags()
 | 
						|
  {
 | 
						|
    return flags_;
 | 
						|
  }
 | 
						|
};
 | 
						|
 | 
						|
class CollationAwareHasher : public Charset
 | 
						|
{
 | 
						|
 public:
 | 
						|
  CollationAwareHasher(const Charset& cs) : Charset(cs)
 | 
						|
  {
 | 
						|
  }
 | 
						|
  inline uint32_t operator()(const std::string& s) const
 | 
						|
  {
 | 
						|
    return operator()(s.data(), s.length());
 | 
						|
  }
 | 
						|
  inline uint32_t operator()(const char* data, uint64_t len) const
 | 
						|
  {
 | 
						|
    return Charset::hash(data, len);
 | 
						|
  }
 | 
						|
};
 | 
						|
 | 
						|
class CollationAwareComparator : public Charset
 | 
						|
{
 | 
						|
 public:
 | 
						|
  CollationAwareComparator(const Charset& cs) : Charset(cs)
 | 
						|
  {
 | 
						|
  }
 | 
						|
  bool operator()(const std::string& str1, const std::string& str2) const
 | 
						|
  {
 | 
						|
    return Charset::eq(str1, str2);
 | 
						|
  }
 | 
						|
};
 | 
						|
 | 
						|
inline bool ASCIIStringCaseInsensetiveEquals(const std::string& left, const std::string& right)
 | 
						|
{
 | 
						|
  auto asciiHigher = [](char c) { return (c >= 'a' && c <= 'z') ? c - 'a' + 'A' : c; };
 | 
						|
  return left.size() == right.size() &&
 | 
						|
         std::equal(left.begin(), left.end(), right.begin(),
 | 
						|
                    [&asciiHigher](char l, char r) { return asciiHigher(l) == asciiHigher(r); });
 | 
						|
}
 | 
						|
 | 
						|
}  // end of namespace datatypes
 |