1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-30 19:23:07 +03:00

MCOL-641 Refactor initial extent elimination support.

This commit also adds support in TupleHashJoinStep::forwardCPData,
although we currently do not support wide decimals as join keys.

Row estimation to determine large-side of the join is also updated.
This commit is contained in:
Gagan Goel
2020-07-24 19:04:25 -04:00
committed by Roman Nozdrin
parent ca53b6348a
commit d3bc68b02f
32 changed files with 1221 additions and 386 deletions

View File

@ -20,16 +20,19 @@
#include <algorithm>
#include <vector>
#include <limits>
#ifdef _MSC_VER
#include <unordered_set>
#else
#ifndef _MSC_VER
#include <tr1/unordered_set>
#else
#include <unordered_set>
#endif
#include "hasher.h"
#include "lbidlist.h"
#include "spinlock.h"
#include "vlarray.h"
#include "widedecimalutils.h"
using namespace std;
using namespace rowgroup;
using namespace utils;
@ -102,18 +105,38 @@ TupleJoiner::TupleJoiner(
smallKeyColumns.push_back(smallJoinColumn);
largeKeyColumns.push_back(largeJoinColumn);
discreteValues.reset(new bool[1]);
cpValues.reset(new vector<int64_t>[1]);
cpValues.reset(new vector<int128_t>[1]);
discreteValues[0] = false;
if (smallRG.isUnsigned(smallKeyColumns[0]))
{
cpValues[0].push_back(numeric_limits<uint64_t>::max());
cpValues[0].push_back(0);
if (datatypes::Decimal::isWideDecimalType(
smallRG.getColType(smallKeyColumns[0]),
smallRG.getColumnWidth(smallKeyColumns[0])))
{
cpValues[0].push_back((int128_t) -1);
cpValues[0].push_back(0);
}
else
{
cpValues[0].push_back((int128_t) numeric_limits<uint64_t>::max());
cpValues[0].push_back(0);
}
}
else
{
cpValues[0].push_back(numeric_limits<int64_t>::max());
cpValues[0].push_back(numeric_limits<int64_t>::min());
if (datatypes::Decimal::isWideDecimalType(
smallRG.getColType(smallKeyColumns[0]),
smallRG.getColumnWidth(smallKeyColumns[0])))
{
cpValues[0].push_back(utils::maxInt128);
cpValues[0].push_back(utils::minInt128);
}
else
{
cpValues[0].push_back((int128_t) numeric_limits<int64_t>::max());
cpValues[0].push_back((int128_t) numeric_limits<int64_t>::min());
}
}
if (smallRG.isUnsigned(smallJoinColumn) != largeRG.isUnsigned(largeJoinColumn))
@ -195,20 +218,40 @@ TupleJoiner::TupleJoiner(
storedKeyAlloc[i].setAllocSize(keyLength);
discreteValues.reset(new bool[smallKeyColumns.size()]);
cpValues.reset(new vector<int64_t>[smallKeyColumns.size()]);
cpValues.reset(new vector<int128_t>[smallKeyColumns.size()]);
for (i = 0; i < smallKeyColumns.size(); i++)
{
discreteValues[i] = false;
if (isUnsigned(smallRG.getColTypes()[smallKeyColumns[i]]))
{
cpValues[i].push_back(static_cast<int64_t>(numeric_limits<uint64_t>::max()));
cpValues[i].push_back(0);
if (datatypes::Decimal::isWideDecimalType(
smallRG.getColType(smallKeyColumns[i]),
smallRG.getColumnWidth(smallKeyColumns[i])))
{
cpValues[i].push_back((int128_t) -1);
cpValues[i].push_back(0);
}
else
{
cpValues[i].push_back((int128_t) numeric_limits<uint64_t>::max());
cpValues[i].push_back(0);
}
}
else
{
cpValues[i].push_back(numeric_limits<int64_t>::max());
cpValues[i].push_back(numeric_limits<int64_t>::min());
if (datatypes::Decimal::isWideDecimalType(
smallRG.getColType(smallKeyColumns[i]),
smallRG.getColumnWidth(smallKeyColumns[i])))
{
cpValues[i].push_back(utils::maxInt128);
cpValues[i].push_back(utils::minInt128);
}
else
{
cpValues[i].push_back(numeric_limits<int64_t>::max());
cpValues[i].push_back(numeric_limits<int64_t>::min());
}
}
}
}
@ -678,8 +721,9 @@ void TupleJoiner::doneInserting()
for (col = 0; col < smallKeyColumns.size(); col++)
{
tr1::unordered_set<int64_t> uniquer;
tr1::unordered_set<int64_t>::iterator uit;
typedef std::tr1::unordered_set<int128_t, utils::Hash128, utils::Equal128> unordered_set_int128;
unordered_set_int128 uniquer;
unordered_set_int128::iterator uit;
sthash_t::iterator sthit;
hash_t::iterator hit;
ldhash_t::iterator ldit;
@ -758,6 +802,12 @@ void TupleJoiner::doneInserting()
}
}
}
else if (datatypes::Decimal::isWideDecimalType(
smallRow.getColType(smallKeyColumns[col]),
smallRow.getColumnWidth(smallKeyColumns[col])))
{
uniquer.insert(*((int128_t*)smallRow.getBinaryField<int128_t>(smallKeyColumns[col])));
}
else if (smallRow.isUnsigned(smallKeyColumns[col]))
{
uniquer.insert((int64_t)smallRow.getUintField(smallKeyColumns[col]));
@ -1080,21 +1130,22 @@ void TupleJoiner::updateCPData(const Row& r)
{
int64_t val = r.getIntField(colIdx);
if (order_swap(val) < order_swap(min) ||
min == numeric_limits<int64_t>::max())
if (order_swap(val) < order_swap((int64_t) min) ||
((int64_t) min) == numeric_limits<int64_t>::max())
{
min = val;
}
if (order_swap(val) > order_swap(max) ||
max == numeric_limits<int64_t>::min())
if (order_swap(val) > order_swap((int64_t) max) ||
((int64_t) max) == numeric_limits<int64_t>::min())
{
max = val;
}
}
else if (r.isUnsigned(colIdx))
{
uint64_t uval;
uint128_t uval;
if (r.getColType(colIdx) == CalpontSystemCatalog::LONGDOUBLE)
{
double dval = (double)roundl(r.getLongDoubleField(smallKeyColumns[col]));
@ -1114,20 +1165,27 @@ void TupleJoiner::updateCPData(const Row& r)
}
}
}
else if (datatypes::Decimal::isWideDecimalType(
r.getColType(colIdx),
r.getColumnWidth(colIdx)))
{
uval = *((int128_t*)r.getBinaryField<int128_t>(colIdx));
}
else
{
uval = r.getUintField(colIdx);
}
if (uval > static_cast<uint64_t>(max))
max = static_cast<int64_t>(uval);
if (uval > static_cast<uint128_t>(max))
max = static_cast<int128_t>(uval);
if (uval < static_cast<uint64_t>(min))
min = static_cast<int64_t>(uval);
if (uval < static_cast<uint128_t>(min))
min = static_cast<int128_t>(uval);
}
else
{
int64_t val = 0;
int128_t val = 0;
if (r.getColType(colIdx) == CalpontSystemCatalog::LONGDOUBLE)
{
double dval = (double)roundl(r.getLongDoubleField(colIdx));
@ -1147,13 +1205,12 @@ void TupleJoiner::updateCPData(const Row& r)
}
}
}
else if (r.getColumnWidth(colIdx) == datatypes::MAXDECIMALWIDTH
&& (r.getColType(colIdx) == CalpontSystemCatalog::DECIMAL
|| r.getColType(colIdx) == CalpontSystemCatalog::UDECIMAL))
else if (datatypes::Decimal::isWideDecimalType(
r.getColType(colIdx),
r.getColumnWidth(colIdx)))
{
// WIP MCOL-641
val = *((int128_t*)r.getBinaryField<int128_t>(colIdx));
}
else
{
val = r.getIntField(colIdx);
@ -1681,20 +1738,40 @@ boost::shared_ptr<TupleJoiner> TupleJoiner::copyForDiskJoin()
ret->uniqueLimit = uniqueLimit;
ret->discreteValues.reset(new bool[smallKeyColumns.size()]);
ret->cpValues.reset(new vector<int64_t>[smallKeyColumns.size()]);
ret->cpValues.reset(new vector<int128_t>[smallKeyColumns.size()]);
for (uint32_t i = 0; i < smallKeyColumns.size(); i++)
{
ret->discreteValues[i] = false;
if (isUnsigned(smallRG.getColTypes()[smallKeyColumns[i]]))
{
ret->cpValues[i].push_back(static_cast<int64_t>(numeric_limits<uint64_t>::max()));
ret->cpValues[i].push_back(0);
if (datatypes::Decimal::isWideDecimalType(
smallRG.getColType(smallKeyColumns[i]),
smallRG.getColumnWidth(smallKeyColumns[i])))
{
ret->cpValues[i].push_back((int128_t) -1);
ret->cpValues[i].push_back(0);
}
else
{
ret->cpValues[i].push_back((int128_t) numeric_limits<uint64_t>::max());
ret->cpValues[i].push_back(0);
}
}
else
{
ret->cpValues[i].push_back(numeric_limits<int64_t>::max());
ret->cpValues[i].push_back(numeric_limits<int64_t>::min());
if (datatypes::Decimal::isWideDecimalType(
smallRG.getColType(smallKeyColumns[i]),
smallRG.getColumnWidth(smallKeyColumns[i])))
{
ret->cpValues[i].push_back(utils::maxInt128);
ret->cpValues[i].push_back(utils::minInt128);
}
else
{
ret->cpValues[i].push_back(numeric_limits<int64_t>::max());
ret->cpValues[i].push_back(numeric_limits<int64_t>::min());
}
}
}