1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-30 19:23:07 +03:00

Merge pull request #2004 from drrtuy/MCOL-4759

MCOL-4759 Upmerge for MCOL-4564 code that implements hash merging fam…
This commit is contained in:
Roman Nozdrin
2021-06-28 14:05:16 +03:00
committed by GitHub
2 changed files with 122 additions and 52 deletions

57
utils/common/hashfamily.h Normal file
View File

@ -0,0 +1,57 @@
/* Copyright (C) 2021 Mariadb Corporation.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
#ifndef UTILS_HASHFAMILY_H
#define UTILS_HASHFAMILY_H
#include "hasher.h"
#include "collation.h"
namespace utils
{
class HashFamily
{
public:
HashFamily(const utils::Hasher_r& h,
const uint64_t intermediateHash,
const uint64_t len,
const datatypes::MariaDBHasher& hM) : mHasher(h),
mMariaDBHasher(hM),
mHasher_rHash(intermediateHash),
mHasher_rLen(len)
{ }
// Algorithm, seed and factor are taken from this discussion
// https://stackoverflow.com/questions/1646807/quick-and-simple-hash-code-combinations
inline uint64_t finalize() const
{
return (seed * factor + mHasher.finalize(mHasher_rHash, mHasher_rLen)) * factor + mMariaDBHasher.finalize();
}
private:
constexpr static uint64_t seed = 1009ULL;
constexpr static uint64_t factor = 9176ULL;
const utils::Hasher_r& mHasher;
const datatypes::MariaDBHasher& mMariaDBHasher;
const uint64_t mHasher_rHash;
const uint32_t mHasher_rLen;
};
}
#endif
// vim:ts=2 sw=2:

View File

@ -60,7 +60,7 @@
#include "../winport/winport.h"
#include "collation.h"
#include "common/hashfamily.h"
// Workaround for my_global.h #define of isnan(X) causing a std::std namespace
@ -569,18 +569,21 @@ public:
// a fcn to check the type defs seperately doesn't exist yet. No normalization.
inline uint64_t hash(uint32_t lastCol) const; // generates a hash for cols [0-lastCol]
inline uint64_t hash() const; // generates a hash for all cols
inline void colUpdateMariaDBHasher(datatypes::MariaDBHasher &hasher, uint32_t col) const;
inline void colUpdateMariaDBHasherTypeless(datatypes::MariaDBHasher &hasher, uint32_t keyColsIdx,
const std::vector<uint32_t>& keyCols,
const std::vector<uint32_t>* smallSideKeyColumnsIds,
const std::vector<uint32_t>* smallSideColumnsWidths) const;
inline void colUpdateHasher(datatypes::MariaDBHasher& hM,
const utils::Hasher_r& h,
const uint32_t col,
uint32_t& intermediateHash) const;
inline void colUpdateHasherTypeless(datatypes::MariaDBHasher &hasher, uint32_t keyColsIdx,
const std::vector<uint32_t>& keyCols,
const std::vector<uint32_t>* smallSideKeyColumnsIds,
const std::vector<uint32_t>* smallSideColumnsWidths) const;
inline uint64_t hashTypeless(const std::vector<uint32_t>& keyCols,
const std::vector<uint32_t>* smallSideKeyColumnsIds,
const std::vector<uint32_t>* smallSideColumnsWidths) const
{
datatypes::MariaDBHasher h;
for (uint32_t i = 0; i < keyCols.size(); i++)
colUpdateMariaDBHasherTypeless(h, i, keyCols, smallSideKeyColumnsIds, smallSideColumnsWidths);
colUpdateHasherTypeless(h, i, keyCols, smallSideKeyColumnsIds, smallSideColumnsWidths);
return h.finalize();
}
@ -946,7 +949,10 @@ inline utils::ConstString Row::getConstString(uint32_t colIndex) const
}
inline void Row::colUpdateMariaDBHasher(datatypes::MariaDBHasher &h, uint32_t col) const
inline void Row::colUpdateHasher(datatypes::MariaDBHasher& hM,
const utils::Hasher_r& h,
const uint32_t col,
uint32_t& intermediateHash) const
{
switch (getColType(col))
{
@ -956,17 +962,19 @@ inline void Row::colUpdateMariaDBHasher(datatypes::MariaDBHasher &h, uint32_t co
case execplan::CalpontSystemCatalog::TEXT:
{
CHARSET_INFO *cs = getCharset(col);
h.add(cs, getConstString(col));
hM.add(cs, getConstString(col));
break;
}
default:
h.add(&my_charset_bin, getShortConstString(col));
{
intermediateHash = h((const char*) &data[offsets[col]], colWidths[col], intermediateHash);
break;
}
}
}
inline void Row::colUpdateMariaDBHasherTypeless(datatypes::MariaDBHasher &h, uint32_t keyColsIdx,
inline void Row::colUpdateHasherTypeless(datatypes::MariaDBHasher &h, uint32_t keyColsIdx,
const std::vector<uint32_t>& keyCols,
const std::vector<uint32_t>* smallSideKeyColumnsIds,
const std::vector<uint32_t>* smallSideColumnsWidths) const
@ -1472,7 +1480,12 @@ inline uint64_t Row::hash() const
inline uint64_t Row::hash(uint32_t lastCol) const
{
datatypes::MariaDBHasher h;
// Use two hash classes. MariaDBHasher for text-based
// collation-aware data types and Hasher_r for all other data types.
// We deliver a hash that is a combination of both hashers' results.
utils::Hasher_r h;
datatypes::MariaDBHasher hM;
uint32_t intermediateHash = 0;
// Sometimes we ask this to hash 0 bytes, and it comes through looking like
// lastCol = -1. Return 0.
@ -1480,9 +1493,9 @@ inline uint64_t Row::hash(uint32_t lastCol) const
return 0;
for (uint32_t i = 0; i <= lastCol; i++)
colUpdateMariaDBHasher(h, i);
colUpdateHasher(hM, h, i, intermediateHash);
return h.finalize();
return utils::HashFamily(h, intermediateHash, lastCol << 2, hM).finalize();
}
inline bool Row::equals(const Row& r2) const