You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-08-01 06:46:55 +03:00
MCOL-641 Refactor initial extent elimination support.
This commit also adds support in TupleHashJoinStep::forwardCPData, although we currently do not support wide decimals as join keys. Row estimation to determine large-side of the join is also updated.
This commit is contained in:
committed by
Roman Nozdrin
parent
ca53b6348a
commit
d3bc68b02f
@ -30,6 +30,8 @@
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
using int128_t = __int128;
|
||||
|
||||
namespace utils
|
||||
{
|
||||
/** @brief class Hasher
|
||||
@ -346,6 +348,25 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
// TODO a copy of these classes also exists in primitiveprocessor.h; consolidate
|
||||
class Hash128
|
||||
{
|
||||
public:
|
||||
inline size_t operator()(const int128_t i) const
|
||||
{
|
||||
return *reinterpret_cast<const uint64_t*>(&i);
|
||||
}
|
||||
};
|
||||
|
||||
class Equal128
|
||||
{
|
||||
public:
|
||||
inline bool operator()(const int128_t f1, const int128_t f2) const
|
||||
{
|
||||
return f1 == f2;
|
||||
}
|
||||
};
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
/** @brief class TupleHasher
|
||||
*
|
||||
|
@ -32,6 +32,9 @@ namespace utils
|
||||
const uint8_t MAXLENGTH16BYTES = 42;
|
||||
const uint8_t MAXLENGTH8BYTES = 23;
|
||||
|
||||
const int128_t minInt128 = int128_t(0x8000000000000000LL) << 64;
|
||||
const int128_t maxInt128 = (int128_t(0x7FFFFFFFFFFFFFFFLL) << 64) + 0xFFFFFFFFFFFFFFFFLL;
|
||||
|
||||
inline bool isWideDecimalNullValue(const int128_t& val)
|
||||
{
|
||||
const uint64_t* ptr = reinterpret_cast<const uint64_t*>(&val);
|
||||
|
@ -161,21 +161,10 @@ const int32_t MIN_TIMESTAMP_VALUE = 0;
|
||||
namespace dataconvert
|
||||
{
|
||||
|
||||
// Decimal has maximum 38 digits with 3 extra chars for dot(.), minus(-), null character(\0)
|
||||
const int MAX_DECIMAL_STRING_LENGTH = 41;
|
||||
|
||||
// WIP MCOL-641
|
||||
using int128_t = __int128;
|
||||
using uint128_t = unsigned __int128;
|
||||
|
||||
struct Int128Pod_struct
|
||||
{
|
||||
uint64_t lo;
|
||||
uint64_t hi;
|
||||
};
|
||||
|
||||
typedef Int128Pod_struct Int128Pod_t;
|
||||
|
||||
enum CalpontDateTimeFormat
|
||||
{
|
||||
CALPONTDATE_ENUM = 1, // date format is: "YYYY-MM-DD"
|
||||
@ -1067,27 +1056,6 @@ public:
|
||||
static size_t writeFractionalPart(int128_t* dec, char* p, const unsigned int buflen,
|
||||
const uint8_t scale);
|
||||
|
||||
static inline void int128Max(int128_t& i)
|
||||
{
|
||||
Int128Pod_t *pod = reinterpret_cast<Int128Pod_t*>(&i);
|
||||
pod->lo = 0xFFFFFFFFFFFFFFFF;
|
||||
pod->hi = 0x7FFFFFFFFFFFFFFF;
|
||||
}
|
||||
|
||||
static inline void int128Min(int128_t& i)
|
||||
{
|
||||
Int128Pod_t *pod = reinterpret_cast<Int128Pod_t*>(&i);
|
||||
pod->lo = 0;
|
||||
pod->hi = 0x8000000000000000;
|
||||
}
|
||||
|
||||
static inline void uint128Max(uint128_t& i)
|
||||
{
|
||||
Int128Pod_t *pod = reinterpret_cast<Int128Pod_t*>(&i);
|
||||
pod->lo = 0xFFFFFFFFFFFFFFFF;
|
||||
pod->hi = 0xFFFFFFFFFFFFFFFF;
|
||||
}
|
||||
|
||||
static inline std::string constructRegexp(const std::string& str);
|
||||
static inline void trimWhitespace(int64_t& charData);
|
||||
static inline bool isEscapedChar(char c)
|
||||
|
@ -20,16 +20,19 @@
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include <limits>
|
||||
#ifdef _MSC_VER
|
||||
#include <unordered_set>
|
||||
#else
|
||||
#ifndef _MSC_VER
|
||||
#include <tr1/unordered_set>
|
||||
#else
|
||||
#include <unordered_set>
|
||||
#endif
|
||||
|
||||
#include "hasher.h"
|
||||
#include "lbidlist.h"
|
||||
#include "spinlock.h"
|
||||
#include "vlarray.h"
|
||||
|
||||
#include "widedecimalutils.h"
|
||||
|
||||
using namespace std;
|
||||
using namespace rowgroup;
|
||||
using namespace utils;
|
||||
@ -102,18 +105,38 @@ TupleJoiner::TupleJoiner(
|
||||
smallKeyColumns.push_back(smallJoinColumn);
|
||||
largeKeyColumns.push_back(largeJoinColumn);
|
||||
discreteValues.reset(new bool[1]);
|
||||
cpValues.reset(new vector<int64_t>[1]);
|
||||
cpValues.reset(new vector<int128_t>[1]);
|
||||
discreteValues[0] = false;
|
||||
|
||||
if (smallRG.isUnsigned(smallKeyColumns[0]))
|
||||
{
|
||||
cpValues[0].push_back(numeric_limits<uint64_t>::max());
|
||||
cpValues[0].push_back(0);
|
||||
if (datatypes::Decimal::isWideDecimalType(
|
||||
smallRG.getColType(smallKeyColumns[0]),
|
||||
smallRG.getColumnWidth(smallKeyColumns[0])))
|
||||
{
|
||||
cpValues[0].push_back((int128_t) -1);
|
||||
cpValues[0].push_back(0);
|
||||
}
|
||||
else
|
||||
{
|
||||
cpValues[0].push_back((int128_t) numeric_limits<uint64_t>::max());
|
||||
cpValues[0].push_back(0);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
cpValues[0].push_back(numeric_limits<int64_t>::max());
|
||||
cpValues[0].push_back(numeric_limits<int64_t>::min());
|
||||
if (datatypes::Decimal::isWideDecimalType(
|
||||
smallRG.getColType(smallKeyColumns[0]),
|
||||
smallRG.getColumnWidth(smallKeyColumns[0])))
|
||||
{
|
||||
cpValues[0].push_back(utils::maxInt128);
|
||||
cpValues[0].push_back(utils::minInt128);
|
||||
}
|
||||
else
|
||||
{
|
||||
cpValues[0].push_back((int128_t) numeric_limits<int64_t>::max());
|
||||
cpValues[0].push_back((int128_t) numeric_limits<int64_t>::min());
|
||||
}
|
||||
}
|
||||
|
||||
if (smallRG.isUnsigned(smallJoinColumn) != largeRG.isUnsigned(largeJoinColumn))
|
||||
@ -195,20 +218,40 @@ TupleJoiner::TupleJoiner(
|
||||
storedKeyAlloc[i].setAllocSize(keyLength);
|
||||
|
||||
discreteValues.reset(new bool[smallKeyColumns.size()]);
|
||||
cpValues.reset(new vector<int64_t>[smallKeyColumns.size()]);
|
||||
cpValues.reset(new vector<int128_t>[smallKeyColumns.size()]);
|
||||
|
||||
for (i = 0; i < smallKeyColumns.size(); i++)
|
||||
{
|
||||
discreteValues[i] = false;
|
||||
if (isUnsigned(smallRG.getColTypes()[smallKeyColumns[i]]))
|
||||
{
|
||||
cpValues[i].push_back(static_cast<int64_t>(numeric_limits<uint64_t>::max()));
|
||||
cpValues[i].push_back(0);
|
||||
if (datatypes::Decimal::isWideDecimalType(
|
||||
smallRG.getColType(smallKeyColumns[i]),
|
||||
smallRG.getColumnWidth(smallKeyColumns[i])))
|
||||
{
|
||||
cpValues[i].push_back((int128_t) -1);
|
||||
cpValues[i].push_back(0);
|
||||
}
|
||||
else
|
||||
{
|
||||
cpValues[i].push_back((int128_t) numeric_limits<uint64_t>::max());
|
||||
cpValues[i].push_back(0);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
cpValues[i].push_back(numeric_limits<int64_t>::max());
|
||||
cpValues[i].push_back(numeric_limits<int64_t>::min());
|
||||
if (datatypes::Decimal::isWideDecimalType(
|
||||
smallRG.getColType(smallKeyColumns[i]),
|
||||
smallRG.getColumnWidth(smallKeyColumns[i])))
|
||||
{
|
||||
cpValues[i].push_back(utils::maxInt128);
|
||||
cpValues[i].push_back(utils::minInt128);
|
||||
}
|
||||
else
|
||||
{
|
||||
cpValues[i].push_back(numeric_limits<int64_t>::max());
|
||||
cpValues[i].push_back(numeric_limits<int64_t>::min());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -678,8 +721,9 @@ void TupleJoiner::doneInserting()
|
||||
|
||||
for (col = 0; col < smallKeyColumns.size(); col++)
|
||||
{
|
||||
tr1::unordered_set<int64_t> uniquer;
|
||||
tr1::unordered_set<int64_t>::iterator uit;
|
||||
typedef std::tr1::unordered_set<int128_t, utils::Hash128, utils::Equal128> unordered_set_int128;
|
||||
unordered_set_int128 uniquer;
|
||||
unordered_set_int128::iterator uit;
|
||||
sthash_t::iterator sthit;
|
||||
hash_t::iterator hit;
|
||||
ldhash_t::iterator ldit;
|
||||
@ -758,6 +802,12 @@ void TupleJoiner::doneInserting()
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (datatypes::Decimal::isWideDecimalType(
|
||||
smallRow.getColType(smallKeyColumns[col]),
|
||||
smallRow.getColumnWidth(smallKeyColumns[col])))
|
||||
{
|
||||
uniquer.insert(*((int128_t*)smallRow.getBinaryField<int128_t>(smallKeyColumns[col])));
|
||||
}
|
||||
else if (smallRow.isUnsigned(smallKeyColumns[col]))
|
||||
{
|
||||
uniquer.insert((int64_t)smallRow.getUintField(smallKeyColumns[col]));
|
||||
@ -1080,21 +1130,22 @@ void TupleJoiner::updateCPData(const Row& r)
|
||||
{
|
||||
int64_t val = r.getIntField(colIdx);
|
||||
|
||||
if (order_swap(val) < order_swap(min) ||
|
||||
min == numeric_limits<int64_t>::max())
|
||||
if (order_swap(val) < order_swap((int64_t) min) ||
|
||||
((int64_t) min) == numeric_limits<int64_t>::max())
|
||||
{
|
||||
min = val;
|
||||
}
|
||||
|
||||
if (order_swap(val) > order_swap(max) ||
|
||||
max == numeric_limits<int64_t>::min())
|
||||
if (order_swap(val) > order_swap((int64_t) max) ||
|
||||
((int64_t) max) == numeric_limits<int64_t>::min())
|
||||
{
|
||||
max = val;
|
||||
}
|
||||
}
|
||||
else if (r.isUnsigned(colIdx))
|
||||
{
|
||||
uint64_t uval;
|
||||
uint128_t uval;
|
||||
|
||||
if (r.getColType(colIdx) == CalpontSystemCatalog::LONGDOUBLE)
|
||||
{
|
||||
double dval = (double)roundl(r.getLongDoubleField(smallKeyColumns[col]));
|
||||
@ -1114,20 +1165,27 @@ void TupleJoiner::updateCPData(const Row& r)
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (datatypes::Decimal::isWideDecimalType(
|
||||
r.getColType(colIdx),
|
||||
r.getColumnWidth(colIdx)))
|
||||
{
|
||||
uval = *((int128_t*)r.getBinaryField<int128_t>(colIdx));
|
||||
}
|
||||
else
|
||||
{
|
||||
uval = r.getUintField(colIdx);
|
||||
}
|
||||
|
||||
if (uval > static_cast<uint64_t>(max))
|
||||
max = static_cast<int64_t>(uval);
|
||||
if (uval > static_cast<uint128_t>(max))
|
||||
max = static_cast<int128_t>(uval);
|
||||
|
||||
if (uval < static_cast<uint64_t>(min))
|
||||
min = static_cast<int64_t>(uval);
|
||||
if (uval < static_cast<uint128_t>(min))
|
||||
min = static_cast<int128_t>(uval);
|
||||
}
|
||||
else
|
||||
{
|
||||
int64_t val = 0;
|
||||
int128_t val = 0;
|
||||
|
||||
if (r.getColType(colIdx) == CalpontSystemCatalog::LONGDOUBLE)
|
||||
{
|
||||
double dval = (double)roundl(r.getLongDoubleField(colIdx));
|
||||
@ -1147,13 +1205,12 @@ void TupleJoiner::updateCPData(const Row& r)
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (r.getColumnWidth(colIdx) == datatypes::MAXDECIMALWIDTH
|
||||
&& (r.getColType(colIdx) == CalpontSystemCatalog::DECIMAL
|
||||
|| r.getColType(colIdx) == CalpontSystemCatalog::UDECIMAL))
|
||||
else if (datatypes::Decimal::isWideDecimalType(
|
||||
r.getColType(colIdx),
|
||||
r.getColumnWidth(colIdx)))
|
||||
{
|
||||
// WIP MCOL-641
|
||||
val = *((int128_t*)r.getBinaryField<int128_t>(colIdx));
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
val = r.getIntField(colIdx);
|
||||
@ -1681,20 +1738,40 @@ boost::shared_ptr<TupleJoiner> TupleJoiner::copyForDiskJoin()
|
||||
ret->uniqueLimit = uniqueLimit;
|
||||
|
||||
ret->discreteValues.reset(new bool[smallKeyColumns.size()]);
|
||||
ret->cpValues.reset(new vector<int64_t>[smallKeyColumns.size()]);
|
||||
ret->cpValues.reset(new vector<int128_t>[smallKeyColumns.size()]);
|
||||
|
||||
for (uint32_t i = 0; i < smallKeyColumns.size(); i++)
|
||||
{
|
||||
ret->discreteValues[i] = false;
|
||||
if (isUnsigned(smallRG.getColTypes()[smallKeyColumns[i]]))
|
||||
{
|
||||
ret->cpValues[i].push_back(static_cast<int64_t>(numeric_limits<uint64_t>::max()));
|
||||
ret->cpValues[i].push_back(0);
|
||||
if (datatypes::Decimal::isWideDecimalType(
|
||||
smallRG.getColType(smallKeyColumns[i]),
|
||||
smallRG.getColumnWidth(smallKeyColumns[i])))
|
||||
{
|
||||
ret->cpValues[i].push_back((int128_t) -1);
|
||||
ret->cpValues[i].push_back(0);
|
||||
}
|
||||
else
|
||||
{
|
||||
ret->cpValues[i].push_back((int128_t) numeric_limits<uint64_t>::max());
|
||||
ret->cpValues[i].push_back(0);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
ret->cpValues[i].push_back(numeric_limits<int64_t>::max());
|
||||
ret->cpValues[i].push_back(numeric_limits<int64_t>::min());
|
||||
if (datatypes::Decimal::isWideDecimalType(
|
||||
smallRG.getColType(smallKeyColumns[i]),
|
||||
smallRG.getColumnWidth(smallKeyColumns[i])))
|
||||
{
|
||||
ret->cpValues[i].push_back(utils::maxInt128);
|
||||
ret->cpValues[i].push_back(utils::minInt128);
|
||||
}
|
||||
else
|
||||
{
|
||||
ret->cpValues[i].push_back(numeric_limits<int64_t>::max());
|
||||
ret->cpValues[i].push_back(numeric_limits<int64_t>::min());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -287,7 +287,7 @@ public:
|
||||
{
|
||||
return discreteValues;
|
||||
}
|
||||
inline const boost::scoped_array<std::vector<int64_t> >& getCPData()
|
||||
inline const boost::scoped_array<std::vector<int128_t> >& getCPData()
|
||||
{
|
||||
return cpValues;
|
||||
}
|
||||
@ -413,7 +413,7 @@ private:
|
||||
/* Runtime casual partitioning support */
|
||||
void updateCPData(const rowgroup::Row& r);
|
||||
boost::scoped_array<bool> discreteValues;
|
||||
boost::scoped_array<std::vector<int64_t> > cpValues; // if !discreteValues, [0] has min, [1] has max
|
||||
boost::scoped_array<std::vector<int128_t> > cpValues; // if !discreteValues, [0] has min, [1] has max
|
||||
uint32_t uniqueLimit;
|
||||
bool finished;
|
||||
|
||||
|
@ -66,6 +66,7 @@ typedef const struct charset_info_st CHARSET_INFO;
|
||||
// Workaround for my_global.h #define of isnan(X) causing a std::std namespace
|
||||
|
||||
using int128_t = __int128;
|
||||
using uint128_t = unsigned __int128;
|
||||
|
||||
namespace rowgroup
|
||||
{
|
||||
|
Reference in New Issue
Block a user