|
|
|
|
@@ -18,6 +18,8 @@
|
|
|
|
|
#include <algorithm>
|
|
|
|
|
#include <cstddef>
|
|
|
|
|
#include <cstdint>
|
|
|
|
|
#include <optional>
|
|
|
|
|
#include <vector>
|
|
|
|
|
|
|
|
|
|
#include "rulebased_optimizer.h"
|
|
|
|
|
|
|
|
|
|
@@ -33,6 +35,9 @@
|
|
|
|
|
namespace optimizer
|
|
|
|
|
{
|
|
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
|
using FilterRangeBounds = std::vector<std::pair<T, T>>;
|
|
|
|
|
|
|
|
|
|
void applyParallelCES_exists(execplan::CalpontSelectExecutionPlan& csep, const size_t id);
|
|
|
|
|
|
|
|
|
|
static const std::string RewrittenSubTableAliasPrefix = "$added_sub_";
|
|
|
|
|
@@ -163,6 +168,55 @@ execplan::SimpleColumn* findSuitableKeyColumn(execplan::CalpontSelectExecutionPl
|
|
|
|
|
return nullptr;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Populates range bounds based on column statistics
|
|
|
|
|
// Returns optional with bounds if successful, nullopt otherwise
|
|
|
|
|
template <typename T>
|
|
|
|
|
std::optional<FilterRangeBounds<T>> populateRangeBounds(execplan::SimpleColumn* keyColumn,
|
|
|
|
|
optimizer::RBOptimizerContext& ctx)
|
|
|
|
|
{
|
|
|
|
|
cal_impl_if::SchemaAndTableName schemaAndTableName = {keyColumn->schemaName(), keyColumn->tableName()};
|
|
|
|
|
auto tableColumnsStatisticsIt = ctx.gwi.tableStatisticsMap.find(schemaAndTableName);
|
|
|
|
|
if (tableColumnsStatisticsIt == ctx.gwi.tableStatisticsMap.end())
|
|
|
|
|
{
|
|
|
|
|
return std::nullopt;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
auto columnStatisticsIt = tableColumnsStatisticsIt->second.find(keyColumn->columnName());
|
|
|
|
|
if (columnStatisticsIt == tableColumnsStatisticsIt->second.end())
|
|
|
|
|
{
|
|
|
|
|
return std::nullopt;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
auto columnStatistics = columnStatisticsIt->second;
|
|
|
|
|
|
|
|
|
|
// TODO configurable parallel factor via session variable
|
|
|
|
|
// NB now histogram size is the way to control parallel factor with 16 being the maximum
|
|
|
|
|
size_t numberOfUnionUnits = std::min(columnStatistics.get_json_histogram().size(), MaxParallelFactor);
|
|
|
|
|
size_t numberOfBucketsPerUnionUnit = columnStatistics.get_json_histogram().size() / numberOfUnionUnits;
|
|
|
|
|
|
|
|
|
|
FilterRangeBounds<T> bounds;
|
|
|
|
|
|
|
|
|
|
// Loop over buckets to produce filter ranges
|
|
|
|
|
for (size_t i = 0; i < numberOfUnionUnits - 1; ++i)
|
|
|
|
|
{
|
|
|
|
|
auto bucket = columnStatistics.get_json_histogram().begin() + i * numberOfBucketsPerUnionUnit;
|
|
|
|
|
auto endBucket = columnStatistics.get_json_histogram().begin() + (i + 1) * numberOfBucketsPerUnionUnit;
|
|
|
|
|
T currentLowerBound = *(uint32_t*)bucket->start_value.data();
|
|
|
|
|
T currentUpperBound = *(uint32_t*)endBucket->start_value.data();
|
|
|
|
|
bounds.push_back({currentLowerBound, currentUpperBound});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Add last range
|
|
|
|
|
// NB despite the fact that currently Histogram_json_hb has the last bucket that has end as its start
|
|
|
|
|
auto lastBucket =
|
|
|
|
|
columnStatistics.get_json_histogram().begin() + (numberOfUnionUnits - 1) * numberOfBucketsPerUnionUnit;
|
|
|
|
|
T currentLowerBound = *(uint32_t*)lastBucket->start_value.data();
|
|
|
|
|
T currentUpperBound = *(uint32_t*)columnStatistics.get_last_bucket_end_endp().data();
|
|
|
|
|
bounds.push_back({currentLowerBound, currentUpperBound});
|
|
|
|
|
|
|
|
|
|
return bounds;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// TODO char and other numerical types support
|
|
|
|
|
execplan::CalpontSelectExecutionPlan::SelectList makeUnionFromTable(
|
|
|
|
|
execplan::CalpontSelectExecutionPlan& csep, execplan::CalpontSystemCatalog::TableAliasName& table,
|
|
|
|
|
@@ -179,46 +233,12 @@ execplan::CalpontSelectExecutionPlan::SelectList makeUnionFromTable(
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// TODO char and other numerical types support
|
|
|
|
|
std::vector<std::pair<uint64_t, uint64_t>> bounds;
|
|
|
|
|
{
|
|
|
|
|
cal_impl_if::SchemaAndTableName schemaAndTableName = {keyColumn->schemaName(), keyColumn->tableName()};
|
|
|
|
|
auto tableColumnsStatisticsIt = ctx.gwi.tableStatisticsMap.find(schemaAndTableName);
|
|
|
|
|
if (tableColumnsStatisticsIt == ctx.gwi.tableStatisticsMap.end())
|
|
|
|
|
auto boundsOpt = populateRangeBounds<uint64_t>(keyColumn, ctx);
|
|
|
|
|
if (!boundsOpt.has_value())
|
|
|
|
|
{
|
|
|
|
|
return unionVec;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
auto columnStatisticsIt = tableColumnsStatisticsIt->second.find(keyColumn->columnName());
|
|
|
|
|
if (columnStatisticsIt == tableColumnsStatisticsIt->second.end())
|
|
|
|
|
{
|
|
|
|
|
return unionVec;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
auto columnStatistics = columnStatisticsIt->second;
|
|
|
|
|
|
|
|
|
|
// TODO configurable parallel factor via session variable
|
|
|
|
|
// NB now histogram size is the way to control parallel factor with 16 being the maximum
|
|
|
|
|
size_t numberOfUnionUnits = std::min(columnStatistics.get_json_histogram().size(), MaxParallelFactor);
|
|
|
|
|
size_t numberOfBucketsPerUnionUnit = columnStatistics.get_json_histogram().size() / numberOfUnionUnits;
|
|
|
|
|
|
|
|
|
|
// Loop over buckets to produce filter ranges
|
|
|
|
|
for (size_t i = 0; i < numberOfUnionUnits - 1; ++i)
|
|
|
|
|
{
|
|
|
|
|
auto bucket = columnStatistics.get_json_histogram().begin() + i * numberOfBucketsPerUnionUnit;
|
|
|
|
|
auto endBucket = columnStatistics.get_json_histogram().begin() + (i + 1) * numberOfBucketsPerUnionUnit;
|
|
|
|
|
uint64_t currentLowerBound = *(uint32_t*)bucket->start_value.data();
|
|
|
|
|
uint64_t currentUpperBound = *(uint32_t*)endBucket->start_value.data();
|
|
|
|
|
bounds.push_back({currentLowerBound, currentUpperBound});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Add last range
|
|
|
|
|
// NB despite the fact that currently Histogram_json_hb has the last bucket that has end as its start
|
|
|
|
|
auto lastBucket = columnStatistics.get_json_histogram().begin() +
|
|
|
|
|
(numberOfUnionUnits - 1) * numberOfBucketsPerUnionUnit;
|
|
|
|
|
uint64_t currentLowerBound = *(uint32_t*)lastBucket->start_value.data();
|
|
|
|
|
uint64_t currentUpperBound = *(uint32_t*)columnStatistics.get_last_bucket_end_endp().data();
|
|
|
|
|
bounds.push_back({currentLowerBound, currentUpperBound});
|
|
|
|
|
}
|
|
|
|
|
auto& bounds = boundsOpt.value();
|
|
|
|
|
|
|
|
|
|
for (auto& bound : bounds)
|
|
|
|
|
{
|
|
|
|
|
@@ -243,15 +263,19 @@ bool applyParallelCES(execplan::CalpontSelectExecutionPlan& csep, RBOptimizerCon
|
|
|
|
|
cal_impl_if::SchemaAndTableName schemaAndTableName = {table.schema, table.table};
|
|
|
|
|
std::cout << "Processing table schema " << schemaAndTableName.schema << " table "
|
|
|
|
|
<< schemaAndTableName.table << " alias " << table.alias << std::endl;
|
|
|
|
|
auto columnStatistics = ctx.gwi.findStatisticsForATable(schemaAndTableName);
|
|
|
|
|
std::cout << "Column statistics: " << columnStatistics.has_value() << std::endl;
|
|
|
|
|
auto anyColumnStatistics = ctx.gwi.findStatisticsForATable(schemaAndTableName);
|
|
|
|
|
std::cout << "Column statistics: " << anyColumnStatistics.has_value() << std::endl;
|
|
|
|
|
// TODO add column statistics check to the corresponding match
|
|
|
|
|
if (!table.isColumnstore() && columnStatistics)
|
|
|
|
|
if (!table.isColumnstore() && anyColumnStatistics)
|
|
|
|
|
{
|
|
|
|
|
// Don't copy filters for this
|
|
|
|
|
auto derivedSCEP = csep.cloneForTableWORecursiveSelects(table);
|
|
|
|
|
// Remove the filters as they were pushed down to union units
|
|
|
|
|
// This is inappropriate for EXISTS filter and join conditions
|
|
|
|
|
// WIP replace with filters applied to filters, so that only relevant filters are left
|
|
|
|
|
// WIP Ugly hack to avoid leaks
|
|
|
|
|
auto unusedFilters = derivedSCEP->filters();
|
|
|
|
|
delete unusedFilters;
|
|
|
|
|
derivedSCEP->filters(nullptr);
|
|
|
|
|
auto* derivedCSEP = dynamic_cast<execplan::CalpontSelectExecutionPlan*>(derivedSCEP.get());
|
|
|
|
|
if (!derivedCSEP)
|
|
|
|
|
@@ -283,8 +307,7 @@ bool applyParallelCES(execplan::CalpontSelectExecutionPlan& csep, RBOptimizerCon
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
execplan::CalpontSelectExecutionPlan::ReturnedColumnList newReturnedColumns;
|
|
|
|
|
// [[maybe_unused]] size_t colPosition = 0;
|
|
|
|
|
// execplan::CalpontSelectExecutionPlan::ReturnedColumnList newReturnedColumns;
|
|
|
|
|
// replace parent CSEP RCs with derived table RCs using ScheamAndTableName -> tableAlias map
|
|
|
|
|
if (!newDerivedTableList.empty())
|
|
|
|
|
{
|
|
|
|
|
@@ -298,8 +321,6 @@ bool applyParallelCES(execplan::CalpontSelectExecutionPlan& csep, RBOptimizerCon
|
|
|
|
|
// If there is an alias in the map then it is a new derived table
|
|
|
|
|
auto sc = dynamic_cast<execplan::SimpleColumn*>(rc.get());
|
|
|
|
|
std::vector<execplan::SimpleColumn*> scs;
|
|
|
|
|
// execplan::ParseTree pt(rc.get());
|
|
|
|
|
// pt.walk(execplan::getSimpleCols, &scs);
|
|
|
|
|
|
|
|
|
|
std::cout << "Processing RC schema " << sc->schemaName() << " table " << sc->tableName() << " alias "
|
|
|
|
|
<< sc->tableAlias() << std::endl;
|
|
|
|
|
@@ -330,6 +351,9 @@ bool applyParallelCES(execplan::CalpontSelectExecutionPlan& csep, RBOptimizerCon
|
|
|
|
|
// But this is inappropriate for EXISTS filter and join conditions
|
|
|
|
|
// There must be no derived at this point, so we can replace it with the new derived table list
|
|
|
|
|
|
|
|
|
|
// WIP hardcoded query
|
|
|
|
|
if (csep.filters() && csep.filters()->data())
|
|
|
|
|
{
|
|
|
|
|
auto* left = dynamic_cast<execplan::SimpleFilter*>(csep.filters()->data());
|
|
|
|
|
if (left)
|
|
|
|
|
{
|
|
|
|
|
@@ -339,7 +363,8 @@ bool applyParallelCES(execplan::CalpontSelectExecutionPlan& csep, RBOptimizerCon
|
|
|
|
|
auto* lhsSC = dynamic_cast<execplan::SimpleColumn*>(lhs);
|
|
|
|
|
if (lhsSC)
|
|
|
|
|
{
|
|
|
|
|
auto newTableAlias = tableAliasMap.find({lhsSC->schemaName(), lhsSC->tableName(), lhsSC->tableAlias(), "", false});
|
|
|
|
|
auto newTableAlias =
|
|
|
|
|
tableAliasMap.find({lhsSC->schemaName(), lhsSC->tableName(), lhsSC->tableAlias(), "", false});
|
|
|
|
|
// WIP Leak loosing previous lhs
|
|
|
|
|
if (newTableAlias != tableAliasMap.end())
|
|
|
|
|
{
|
|
|
|
|
@@ -352,6 +377,7 @@ bool applyParallelCES(execplan::CalpontSelectExecutionPlan& csep, RBOptimizerCon
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
csep.derivedTableList(newDerivedTableList);
|
|
|
|
|
// Replace table list with new table list populated with union units
|
|
|
|
|
|