You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-12-24 14:20:59 +03:00
MCOL-5199 This patch solves the overal performance degradation introduced with a new way of char columns hashing
in aggregation code The patch disables padding that forces hasher to calculate over the whole 2k buffer. This patch also moves hashing code into the common place where it belongs.
This commit is contained in:
@@ -135,8 +135,10 @@ class Charset
|
||||
{
|
||||
protected:
|
||||
const struct charset_info_st* mCharset;
|
||||
|
||||
private:
|
||||
static constexpr const uint flags_ = MY_STRXFRM_PAD_WITH_SPACE | MY_STRXFRM_PAD_TO_MAXLEN;
|
||||
|
||||
public:
|
||||
Charset(CHARSET_INFO& cs) : mCharset(&cs)
|
||||
{
|
||||
@@ -187,7 +189,7 @@ class Charset
|
||||
}
|
||||
size_t strnxfrm(uchar* dst, size_t dstlen, uint nweights, const uchar* src, size_t srclen, uint flags)
|
||||
{
|
||||
idbassert(mCharset->coll);
|
||||
assert(mCharset->coll);
|
||||
return mCharset->coll->strnxfrm(mCharset, dst, dstlen, nweights, src, srclen, flags);
|
||||
}
|
||||
// The magic check that tells that bytes are mapped to weights as 1:1
|
||||
@@ -195,21 +197,21 @@ class Charset
|
||||
{
|
||||
return (mCharset->state & MY_CS_NON1TO1) == 0;
|
||||
}
|
||||
template<typename T>
|
||||
template <typename T>
|
||||
T strnxfrm(const char* src) const
|
||||
{
|
||||
T ret = 0;
|
||||
size_t len __attribute__((unused)) = mCharset->strnxfrm((char*)&ret, sizeof(T), sizeof(T),
|
||||
src, sizeof(T), flags_);
|
||||
size_t len __attribute__((unused)) =
|
||||
mCharset->strnxfrm((char*)&ret, sizeof(T), sizeof(T), src, sizeof(T), flags_);
|
||||
assert(len <= sizeof(T));
|
||||
return ret;
|
||||
}
|
||||
template<typename T>
|
||||
T strnxfrm(const utils::ConstString &src) const
|
||||
template <typename T>
|
||||
T strnxfrm(const utils::ConstString& src) const
|
||||
{
|
||||
T ret = 0;
|
||||
size_t len __attribute__((unused)) = mCharset->strnxfrm((char*)&ret, sizeof(T), sizeof(T),
|
||||
(char*)src.str(), src.length(), flags_);
|
||||
size_t len __attribute__((unused)) =
|
||||
mCharset->strnxfrm((char*)&ret, sizeof(T), sizeof(T), (char*)src.str(), src.length(), flags_);
|
||||
assert(len <= sizeof(T));
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -66,6 +66,13 @@ class ConstString
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
ConstString& rtrimSpaces()
|
||||
{
|
||||
for (; mLength && mStr[mLength - 1] == ' '; --mLength)
|
||||
{
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace utils
|
||||
|
||||
@@ -26,8 +26,10 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <string>
|
||||
#include "mcs_basic_types.h"
|
||||
|
||||
namespace utils
|
||||
@@ -203,6 +205,81 @@ class Hasher_r
|
||||
}
|
||||
};
|
||||
|
||||
// This stream hasher was borrowed from RobinHood
|
||||
class Hasher64_r
|
||||
{
|
||||
public:
|
||||
inline uint64_t operator()(const void* ptr, uint32_t len, uint64_t x = 0ULL)
|
||||
{
|
||||
auto const* const data64 = static_cast<uint64_t const*>(ptr);
|
||||
uint64_t h = seed ^ (len * m);
|
||||
|
||||
std::size_t const n_blocks = len / 8;
|
||||
if (x)
|
||||
{
|
||||
x *= m;
|
||||
x ^= x >> r;
|
||||
x *= m;
|
||||
h ^= x;
|
||||
h *= m;
|
||||
}
|
||||
for (std::size_t i = 0; i < n_blocks; ++i)
|
||||
{
|
||||
uint64_t k;
|
||||
memcpy(&k, data64 + i, sizeof(k));
|
||||
|
||||
k *= m;
|
||||
k ^= k >> r;
|
||||
k *= m;
|
||||
|
||||
h ^= k;
|
||||
h *= m;
|
||||
}
|
||||
|
||||
auto const* const data8 = reinterpret_cast<uint8_t const*>(data64 + n_blocks);
|
||||
switch (len & 7U)
|
||||
{
|
||||
case 7:
|
||||
h ^= static_cast<uint64_t>(data8[6]) << 48U;
|
||||
// FALLTHROUGH
|
||||
case 6:
|
||||
h ^= static_cast<uint64_t>(data8[5]) << 40U;
|
||||
// FALLTHROUGH
|
||||
case 5:
|
||||
h ^= static_cast<uint64_t>(data8[4]) << 32U;
|
||||
// FALLTHROUGH
|
||||
case 4:
|
||||
h ^= static_cast<uint64_t>(data8[3]) << 24U;
|
||||
// FALLTHROUGH
|
||||
case 3:
|
||||
h ^= static_cast<uint64_t>(data8[2]) << 16U;
|
||||
// FALLTHROUGH
|
||||
case 2:
|
||||
h ^= static_cast<uint64_t>(data8[1]) << 8U;
|
||||
// FALLTHROUGH
|
||||
case 1:
|
||||
h ^= static_cast<uint64_t>(data8[0]);
|
||||
h *= m;
|
||||
// FALLTHROUGH
|
||||
default: break;
|
||||
}
|
||||
return h;
|
||||
}
|
||||
|
||||
inline uint64_t finalize(uint64_t h, uint64_t len) const
|
||||
{
|
||||
h ^= h >> r;
|
||||
h *= m;
|
||||
h ^= h >> r;
|
||||
return h;
|
||||
}
|
||||
|
||||
private:
|
||||
static constexpr uint64_t m = 0xc6a4a7935bd1e995ULL;
|
||||
static constexpr uint64_t seed = 0xe17a1465ULL;
|
||||
static constexpr unsigned int r = 47;
|
||||
};
|
||||
|
||||
class Hasher128
|
||||
{
|
||||
public:
|
||||
|
||||
Reference in New Issue
Block a user