You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-06-13 16:01:32 +03:00
572 lines
18 KiB
C++
572 lines
18 KiB
C++
/* Copyright (C) 2014 InfiniDB, Inc.
|
|
Copyright (C) 2019 MariaDB Corporation
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public License
|
|
as published by the Free Software Foundation; version 2 of
|
|
the License.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
|
MA 02110-1301, USA. */
|
|
|
|
#pragma once
|
|
|
|
#include <iostream>
|
|
#include <vector>
|
|
#include <boost/shared_ptr.hpp>
|
|
#include <boost/scoped_ptr.hpp>
|
|
|
|
#include <boost/scoped_array.hpp>
|
|
#include <unordered_map>
|
|
|
|
#include "countingallocator.h"
|
|
#include "resourcemanager.h"
|
|
#include "rowgroup.h"
|
|
#include "joiner.h"
|
|
#include "fixedallocator.h"
|
|
#include "joblisttypes.h"
|
|
#include "../funcexp/funcexpwrapper.h"
|
|
#include "stlpoolallocator.h"
|
|
#include "hasher.h"
|
|
#include "threadpool.h"
|
|
#include "columnwidth.h"
|
|
#include "mcs_string.h"
|
|
|
|
namespace joiner
|
|
{
|
|
uint32_t calculateKeyLength(const std::vector<uint32_t>& aKeyColumnsIds, const rowgroup::RowGroup& aRowGroup,
|
|
const std::vector<uint32_t>* aLargeKeyColumnsIds = nullptr,
|
|
const rowgroup::RowGroup* aLargeRowGroup = nullptr);
|
|
|
|
constexpr uint8_t IS_SMALLSIDE = 0x01; // SmallSide of a JOIN w/o a skew in key columns widths
|
|
constexpr uint8_t IS_SMALLSIDE_SKEWED = 0x02; // SmallSide of a JOIN with a skew in key cols widths
|
|
class TypelessDataDecoder;
|
|
|
|
class TypelessData
|
|
{
|
|
public:
|
|
union
|
|
{
|
|
uint8_t* data;
|
|
const rowgroup::Row* mRowPtr;
|
|
};
|
|
uint32_t len;
|
|
// The flags are locally significant in PP now so serialize doesn't send it over the wire.
|
|
uint32_t mFlags;
|
|
|
|
TypelessData() : data(nullptr), len(0), mFlags(0)
|
|
{
|
|
}
|
|
TypelessData(const rowgroup::Row* rowPtr) : mRowPtr(rowPtr), len(0), mFlags(0)
|
|
{
|
|
}
|
|
TypelessData(messageqcpp::ByteStream& bs, utils::PoolAllocator& memAllocator)
|
|
: data(nullptr), len(0), mFlags(0)
|
|
{
|
|
deserialize(bs, memAllocator);
|
|
}
|
|
inline bool operator==(const TypelessData&) const;
|
|
void serialize(messageqcpp::ByteStream&) const;
|
|
void deserialize(messageqcpp::ByteStream&, utils::FixedAllocator&);
|
|
void deserialize(messageqcpp::ByteStream&, utils::PoolAllocator&);
|
|
std::string toString() const;
|
|
uint32_t hash(const rowgroup::RowGroup&, const std::vector<uint32_t>& keyCols,
|
|
const std::vector<uint32_t>* smallSideKeyColumnsIds,
|
|
const rowgroup::RowGroup* smallSideRG) const;
|
|
static int cmp(const rowgroup::RowGroup&, const std::vector<uint32_t>& keyCols, const TypelessData& a,
|
|
const TypelessData& b, const std::vector<uint32_t>* smallSideKeyColumnsIds,
|
|
const rowgroup::RowGroup* smallSideRG);
|
|
int cmpToRow(const rowgroup::RowGroup& r, const std::vector<uint32_t>& keyCols, const rowgroup::Row& row,
|
|
const std::vector<uint32_t>* smallSideKeyColumnsIds,
|
|
const rowgroup::RowGroup* smallSideRG) const;
|
|
inline void setSmallSide()
|
|
{
|
|
mFlags |= IS_SMALLSIDE;
|
|
}
|
|
inline void setSmallSideWithSkewedData()
|
|
{
|
|
mFlags |= IS_SMALLSIDE_SKEWED;
|
|
}
|
|
inline bool isSmallSide() const
|
|
{
|
|
return mFlags & (IS_SMALLSIDE_SKEWED | IS_SMALLSIDE);
|
|
}
|
|
inline bool isSmallSideWithSkewedData() const
|
|
{
|
|
return mFlags & IS_SMALLSIDE_SKEWED;
|
|
}
|
|
};
|
|
|
|
// This operator is used in EM only so it doesn't support TD cmp operation
|
|
// using Row pointers.
|
|
inline bool TypelessData::operator==(const TypelessData& t) const
|
|
{
|
|
if (len != t.len)
|
|
return false;
|
|
|
|
if (len == 0) // special value to force mismatches
|
|
return false;
|
|
|
|
return (memcmp(data, t.data, len) == 0);
|
|
}
|
|
|
|
class TypelessDataDecoder
|
|
{
|
|
const uint8_t* mPtr;
|
|
const uint8_t* mEnd;
|
|
void checkAvailableData(uint32_t nbytes) const
|
|
{
|
|
if (mPtr + nbytes > mEnd)
|
|
throw runtime_error("TypelessData is too short");
|
|
}
|
|
|
|
public:
|
|
TypelessDataDecoder(const uint8_t* ptr, size_t length) : mPtr(ptr), mEnd(ptr + length)
|
|
{
|
|
}
|
|
TypelessDataDecoder(const TypelessData& data) : TypelessDataDecoder(data.data, data.len)
|
|
{
|
|
}
|
|
utils::ConstString scanGeneric(uint32_t length)
|
|
{
|
|
checkAvailableData(length);
|
|
utils::ConstString res((const char*)mPtr, length);
|
|
mPtr += length;
|
|
return res;
|
|
}
|
|
uint32_t scanStringLength()
|
|
{
|
|
checkAvailableData(2);
|
|
uint32_t res = ((uint32_t)mPtr[0]) * 255 + mPtr[1];
|
|
mPtr += 2;
|
|
return res;
|
|
}
|
|
utils::ConstString scanString()
|
|
{
|
|
return scanGeneric(scanStringLength());
|
|
}
|
|
int64_t scanTInt64()
|
|
{
|
|
checkAvailableData(sizeof(int64_t));
|
|
int64_t res = *reinterpret_cast<const int64_t*>(mPtr);
|
|
mPtr += sizeof(int64_t);
|
|
return res;
|
|
}
|
|
datatypes::TSInt128 scanTInt128()
|
|
{
|
|
checkAvailableData(datatypes::MAXDECIMALWIDTH);
|
|
datatypes::TSInt128 res(mPtr);
|
|
mPtr += datatypes::MAXDECIMALWIDTH;
|
|
return res;
|
|
}
|
|
};
|
|
|
|
// Comparator for long double in the hash
|
|
class LongDoubleEq
|
|
{
|
|
public:
|
|
LongDoubleEq(){};
|
|
inline bool operator()(const long double& pos1, const long double& pos2) const
|
|
{
|
|
return pos1 == pos2;
|
|
}
|
|
};
|
|
|
|
/* This function makes the keys for string & compound joins. The length of the
|
|
* key is limited by keylen. Keys that are longer are assigned a length of 0 on return,
|
|
* signifying that it shouldn't match anything.
|
|
*/
|
|
// MCOL-1822 SUM/AVG as long double: pass in RG and col so we can determine type conversion
|
|
extern TypelessData makeTypelessKey(const rowgroup::Row&, const std::vector<uint32_t>&, uint32_t keylen,
|
|
utils::FixedAllocator* fa, const rowgroup::RowGroup&,
|
|
const std::vector<uint32_t>&);
|
|
extern uint64_t getHashOfTypelessKey(const rowgroup::Row&, const std::vector<uint32_t>&, uint32_t seed = 0);
|
|
|
|
class TypelessDataStructure
|
|
{
|
|
public:
|
|
const rowgroup::RowGroup* mRowGroup;
|
|
const std::vector<uint32_t>* mMap;
|
|
const std::vector<uint32_t>* mSmallSideKeyColumnsIds;
|
|
const rowgroup::RowGroup* mSmallSideRG;
|
|
TypelessDataStructure(const rowgroup::RowGroup* rg, const std::vector<uint32_t>* map,
|
|
const std::vector<uint32_t>* smallSideKeyColumnsIds,
|
|
const rowgroup::RowGroup* smallSideRG)
|
|
: mRowGroup(rg), mMap(map), mSmallSideKeyColumnsIds(smallSideKeyColumnsIds), mSmallSideRG(smallSideRG)
|
|
{
|
|
}
|
|
};
|
|
|
|
|
|
using RowPointersVec =
|
|
std::vector<rowgroup::Row::Pointer, allocators::CountingAllocator<rowgroup::Row::Pointer>>;
|
|
using RowPointersVecUP = std::unique_ptr<RowPointersVec>;
|
|
class TupleJoiner
|
|
{
|
|
public:
|
|
struct hasher
|
|
{
|
|
inline size_t operator()(int64_t val) const
|
|
{
|
|
return fHasher((char*)&val, 8);
|
|
}
|
|
inline size_t operator()(uint64_t val) const
|
|
{
|
|
return fHasher((char*)&val, 8);
|
|
}
|
|
inline size_t operator()(const TypelessData& e) const
|
|
{
|
|
return fHasher((char*)e.data, e.len);
|
|
}
|
|
inline size_t operator()(long double val) const
|
|
{
|
|
if (sizeof(long double) == 8) // Probably just MSC, but you never know.
|
|
{
|
|
return fHasher((char*)&val, sizeof(long double));
|
|
}
|
|
else
|
|
{
|
|
// For Linux x86_64, long double is stored in 128 bits, but only 80 are significant
|
|
return fHasher((char*)&val, 10);
|
|
}
|
|
}
|
|
|
|
private:
|
|
utils::Hasher fHasher;
|
|
};
|
|
|
|
struct TypelessDataHasher : public TypelessDataStructure
|
|
{
|
|
TypelessDataHasher(const rowgroup::RowGroup* rg, const std::vector<uint32_t>* map,
|
|
const std::vector<uint32_t>* smallSideKeyColumnsIds,
|
|
const rowgroup::RowGroup* smallSideRG)
|
|
: TypelessDataStructure(rg, map, smallSideKeyColumnsIds, smallSideRG)
|
|
{
|
|
}
|
|
inline size_t operator()(const TypelessData& e) const
|
|
{
|
|
return e.hash(*mRowGroup, *mMap, mSmallSideKeyColumnsIds, mSmallSideRG);
|
|
}
|
|
};
|
|
|
|
struct TypelessDataComparator : public TypelessDataStructure
|
|
{
|
|
public:
|
|
TypelessDataComparator(const rowgroup::RowGroup* rg, const std::vector<uint32_t>* map,
|
|
const std::vector<uint32_t>* smallSideKeyColumnsIds,
|
|
const rowgroup::RowGroup* smallSideRG)
|
|
: TypelessDataStructure(rg, map, smallSideKeyColumnsIds, smallSideRG)
|
|
{
|
|
}
|
|
bool operator()(const TypelessData& a, const TypelessData& b) const
|
|
{
|
|
return !TypelessData::cmp(*mRowGroup, *mMap, a, b, mSmallSideKeyColumnsIds, mSmallSideRG);
|
|
}
|
|
};
|
|
|
|
/* ctor to use for numeric join */
|
|
TupleJoiner(const rowgroup::RowGroup& smallInput, const rowgroup::RowGroup& largeInput,
|
|
uint32_t smallJoinColumn, uint32_t largeJoinColumn, joblist::JoinType jt,
|
|
threadpool::ThreadPool* jsThreadPool, joblist::ResourceManager* rm, const uint64_t numCores);
|
|
|
|
/* ctor to use for string & compound join */
|
|
TupleJoiner(const rowgroup::RowGroup& smallInput, const rowgroup::RowGroup& largeInput,
|
|
const std::vector<uint32_t>& smallJoinColumns, const std::vector<uint32_t>& largeJoinColumns,
|
|
joblist::JoinType jt, threadpool::ThreadPool* jsThreadPool, joblist::ResourceManager* rm, const uint64_t numCores);
|
|
|
|
~TupleJoiner();
|
|
|
|
size_t size() const;
|
|
void insert(rowgroup::Row& r, bool zeroTheRid = true); // not thread-safe
|
|
void insertRGData(rowgroup::RowGroup& rg, uint threadID);
|
|
void doneInserting();
|
|
|
|
/* match() returns the small-side rows that match the large-side row.
|
|
On a UM join, it uses largeSideRow,
|
|
on a PM join, it uses index and threadID.
|
|
*/
|
|
void match(rowgroup::Row& largeSideRow, uint32_t index, uint32_t threadID,
|
|
std::vector<rowgroup::Row::Pointer>* matches);
|
|
|
|
/* On a PM left outer join + aggregation, the result is already complete.
|
|
No need to match, just mark.
|
|
*/
|
|
void markMatches(uint32_t threadID, uint32_t rowCount);
|
|
|
|
/* For small outer joins, this is how matches are marked now. */
|
|
void markMatches(uint32_t threadID, const std::vector<rowgroup::Row::Pointer>& matches);
|
|
|
|
/* Some accessors */
|
|
inline bool inPM() const
|
|
{
|
|
return joinAlg == PM;
|
|
}
|
|
inline bool inUM() const
|
|
{
|
|
return joinAlg == UM;
|
|
}
|
|
inline bool onDisk() const
|
|
{
|
|
return _convertToDiskJoin;
|
|
}
|
|
void setInPM();
|
|
void setInUM(std::vector<rowgroup::RGData>& rgs);
|
|
void umJoinConvert(uint threadID, std::vector<rowgroup::RGData>& rgs, size_t begin, size_t end);
|
|
|
|
// TODO: these are currently in use by edge cases, ex, converting to disk
|
|
// join. Would be nice to make those cases use the rgdata variants
|
|
// above.
|
|
void setInUM();
|
|
void umJoinConvert(size_t begin, size_t end);
|
|
|
|
void setThreadCount(uint32_t cnt);
|
|
void setPMJoinResults(std::shared_ptr<std::vector<uint32_t>[]>, uint32_t threadID);
|
|
std::shared_ptr<std::vector<uint32_t>[]> getPMJoinArrays(uint32_t threadID);
|
|
RowPointersVec& getSmallSide()
|
|
{
|
|
return *rows;
|
|
}
|
|
inline bool smallOuterJoin()
|
|
{
|
|
return ((joinType & joblist::SMALLOUTER) != 0);
|
|
}
|
|
inline bool largeOuterJoin()
|
|
{
|
|
return ((joinType & joblist::LARGEOUTER) != 0);
|
|
}
|
|
inline bool innerJoin()
|
|
{
|
|
return joinType == joblist::INNER;
|
|
}
|
|
inline bool fullOuterJoin()
|
|
{
|
|
return (smallOuterJoin() && largeOuterJoin());
|
|
}
|
|
inline joblist::JoinType getJoinType()
|
|
{
|
|
return joinType;
|
|
}
|
|
inline const rowgroup::RowGroup& getSmallRG()
|
|
{
|
|
return smallRG;
|
|
}
|
|
inline const rowgroup::RowGroup& getLargeRG()
|
|
{
|
|
return largeRG;
|
|
}
|
|
inline uint32_t getSmallKeyColumn()
|
|
{
|
|
return smallKeyColumns[0];
|
|
}
|
|
inline uint32_t getLargeKeyColumn()
|
|
{
|
|
return largeKeyColumns[0];
|
|
}
|
|
bool hasNullJoinColumn(const rowgroup::Row& largeRow) const;
|
|
void getUnmarkedRows(std::vector<rowgroup::Row::Pointer>* out);
|
|
std::string getTableName() const;
|
|
void setTableName(const std::string& tname);
|
|
|
|
/* To allow sorting */
|
|
bool operator<(const TupleJoiner&) const;
|
|
|
|
/* Typeless join interface */
|
|
inline bool isTypelessJoin()
|
|
{
|
|
return typelessJoin;
|
|
}
|
|
inline bool isSignedUnsignedJoin()
|
|
{
|
|
return bSignedUnsignedJoin;
|
|
}
|
|
inline const std::vector<uint32_t>& getSmallKeyColumns()
|
|
{
|
|
return smallKeyColumns;
|
|
}
|
|
inline const std::vector<uint32_t>& getLargeKeyColumns()
|
|
{
|
|
return largeKeyColumns;
|
|
}
|
|
inline uint32_t getKeyLength()
|
|
{
|
|
return keyLength;
|
|
}
|
|
|
|
/* Runtime casual partitioning support */
|
|
inline const boost::scoped_array<bool>& discreteCPValues()
|
|
{
|
|
return discreteValues;
|
|
}
|
|
inline const boost::scoped_array<std::vector<int128_t>>& getCPData()
|
|
{
|
|
return cpValues;
|
|
}
|
|
inline void setUniqueLimit(uint32_t limit)
|
|
{
|
|
uniqueLimit = limit;
|
|
}
|
|
|
|
/* Semi-join interface */
|
|
inline bool semiJoin()
|
|
{
|
|
return ((joinType & joblist::SEMI) != 0);
|
|
}
|
|
inline bool antiJoin()
|
|
{
|
|
return ((joinType & joblist::ANTI) != 0);
|
|
}
|
|
inline bool scalar()
|
|
{
|
|
return ((joinType & joblist::SCALAR) != 0);
|
|
}
|
|
inline bool matchnulls()
|
|
{
|
|
return ((joinType & joblist::MATCHNULLS) != 0);
|
|
}
|
|
inline bool hasFEFilter()
|
|
{
|
|
return fe.get();
|
|
}
|
|
inline boost::shared_ptr<funcexp::FuncExpWrapper> getFcnExpFilter()
|
|
{
|
|
return fe;
|
|
}
|
|
void setFcnExpFilter(boost::shared_ptr<funcexp::FuncExpWrapper> fe);
|
|
inline bool evaluateFilter(rowgroup::Row& r, uint32_t index)
|
|
{
|
|
return fes[index].evaluate(&r);
|
|
}
|
|
inline uint64_t getJoinNullValue()
|
|
{
|
|
return joblist::BIGINTNULL; // a normalized NULL value
|
|
}
|
|
inline uint64_t smallNullValue()
|
|
{
|
|
return nullValueForJoinColumn;
|
|
}
|
|
|
|
// Wide-DECIMAL JOIN
|
|
bool joinHasSkewedKeyColumn();
|
|
inline const vector<uint32_t>& getSmallSideColumnsWidths() const
|
|
{
|
|
return smallRG.getColWidths();
|
|
}
|
|
// Disk-based join support
|
|
void clearData();
|
|
std::shared_ptr<TupleJoiner> copyForDiskJoin();
|
|
bool isFinished()
|
|
{
|
|
return finished;
|
|
}
|
|
void setConvertToDiskJoin();
|
|
void abort()
|
|
{
|
|
wasAborted_ = true;
|
|
}
|
|
|
|
void initHashMaps(uint32_t& smallJoinColumn);
|
|
void clearHashMaps();
|
|
private:
|
|
template<typename K, typename V>
|
|
using HashMapTemplate = std::unordered_multimap<K, V, hasher, std::equal_to<K>,
|
|
utils::STLPoolAllocator<std::pair<const K, V>>>;
|
|
using hash_t = HashMapTemplate<int64_t, uint8_t*>;
|
|
using sthash_t = HashMapTemplate<int64_t, rowgroup::Row::Pointer>;
|
|
using typelesshash_t = HashMapTemplate<TypelessData, rowgroup::Row::Pointer>;
|
|
using ldhash_t = HashMapTemplate<long double, rowgroup::Row::Pointer>;
|
|
|
|
typedef hash_t::iterator iterator;
|
|
typedef typelesshash_t::iterator thIterator;
|
|
typedef ldhash_t::iterator ldIterator;
|
|
|
|
TupleJoiner();
|
|
TupleJoiner(const TupleJoiner&) = delete;
|
|
TupleJoiner& operator=(const TupleJoiner&) = delete;
|
|
void getBucketCount();
|
|
|
|
rowgroup::RGData smallNullMemory;
|
|
|
|
std::vector<std::unique_ptr<hash_t>> h; // used for UM joins on ints
|
|
std::vector<std::unique_ptr<sthash_t>>
|
|
sth; // used for UM join on ints where the backing table uses a string table
|
|
std::vector<std::unique_ptr<ldhash_t>> ld; // used for UM join on long double
|
|
RowPointersVecUP rows; // used for PM join
|
|
|
|
/* This struct is rough. The BPP-JL stores the parsed results for
|
|
the logical block being processed. There are X threads at once, so
|
|
up to X logical blocks being processed. For each of those there's a vector
|
|
of matches. Each match is an index into 'rows'. */
|
|
std::shared_ptr<std::shared_ptr<std::vector<uint32_t>[]>[]> pmJoinResults;
|
|
rowgroup::RowGroup smallRG, largeRG;
|
|
boost::scoped_array<rowgroup::Row> smallRow;
|
|
|
|
rowgroup::Row smallNullRow;
|
|
|
|
enum JoinAlg
|
|
{
|
|
INSERTING,
|
|
PM,
|
|
UM,
|
|
LARGE
|
|
};
|
|
JoinAlg joinAlg;
|
|
joblist::JoinType joinType;
|
|
uint32_t threadCount;
|
|
std::string tableName;
|
|
|
|
/* vars, & fcns for typeless join */
|
|
bool typelessJoin;
|
|
std::vector<uint32_t> smallKeyColumns, largeKeyColumns;
|
|
std::vector<std::unique_ptr<typelesshash_t>> ht; // used for UM join on strings
|
|
uint32_t keyLength;
|
|
std::vector<utils::FixedAllocator> storedKeyAlloc;
|
|
std::vector<utils::FixedAllocator> tmpKeyAlloc;
|
|
bool bSignedUnsignedJoin; // Set if we have a signed vs unsigned compare in a join. When not set, we can
|
|
// save checking for the signed bit.
|
|
|
|
/* semi-join vars & fcns */
|
|
boost::shared_ptr<funcexp::FuncExpWrapper> fe;
|
|
boost::scoped_array<funcexp::FuncExpWrapper> fes; // holds X copies of fe, one per thread
|
|
// this var is only used to normalize the NULL values for single-column joins,
|
|
// will have to change when/if we need to support that for compound or string joins
|
|
int64_t nullValueForJoinColumn;
|
|
|
|
/* Runtime casual partitioning support */
|
|
void updateCPData(const rowgroup::Row& r);
|
|
boost::scoped_array<bool> discreteValues;
|
|
boost::scoped_array<std::vector<int128_t>> cpValues; // if !discreteValues, [0] has min, [1] has max
|
|
uint32_t uniqueLimit;
|
|
bool finished;
|
|
|
|
// multithreaded UM hash table construction
|
|
int numCores;
|
|
uint bucketCount;
|
|
uint bucketMask;
|
|
boost::scoped_array<boost::mutex> m_bucketLocks;
|
|
boost::mutex m_cpValuesLock;
|
|
utils::Hasher_r bucketPicker;
|
|
const uint32_t bpSeed = 0x4545e1d7; // an arbitrary random #
|
|
threadpool::ThreadPool* jobstepThreadPool;
|
|
void um_insertTypeless(uint threadID, uint rowcount, rowgroup::Row& r);
|
|
void um_insertLongDouble(uint rowcount, rowgroup::Row& r);
|
|
void um_insertInlineRows(uint rowcount, rowgroup::Row& r);
|
|
void um_insertStringTable(uint rowcount, rowgroup::Row& r);
|
|
|
|
template <typename buckets_t, typename hash_table_t>
|
|
void bucketsToTables(buckets_t*, hash_table_t&);
|
|
|
|
bool _convertToDiskJoin;
|
|
joblist::ResourceManager* resourceManager_ = nullptr;
|
|
bool wasAborted_ = false;
|
|
void initRowsVector();
|
|
};
|
|
|
|
} // namespace joiner
|