You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-11-03 17:13:17 +03:00
feat(rbo,rules): preparation to replace derived-based with table-based approach
This commit is contained in:
@@ -437,6 +437,10 @@ class CalpontSystemCatalog : public datatypes::SystemCatalog
|
|||||||
: schema(sch), table(tb), alias(al), view(v), fisColumnStore(true)
|
: schema(sch), table(tb), alias(al), view(v), fisColumnStore(true)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
TableAliasName(const std::string& sch, const std::string& tb, const std::string& al, const std::string& v, const bool isColumnStore)
|
||||||
|
: schema(sch), table(tb), alias(al), view(v), fisColumnStore(isColumnStore)
|
||||||
|
{
|
||||||
|
}
|
||||||
std::string schema;
|
std::string schema;
|
||||||
std::string table;
|
std::string table;
|
||||||
std::string alias;
|
std::string alias;
|
||||||
|
|||||||
@@ -118,7 +118,6 @@ typedef std::tr1::unordered_map<TABLE_LIST*, uint> TableOuterJoinMap;
|
|||||||
using ColumnName = std::string;
|
using ColumnName = std::string;
|
||||||
using ColumnStatisticsMap = std::unordered_map<ColumnName, Histogram_json_hb>;
|
using ColumnStatisticsMap = std::unordered_map<ColumnName, Histogram_json_hb>;
|
||||||
using TableStatisticsMap = std::unordered_map<SchemaAndTableName, ColumnStatisticsMap, SchemaAndTableNameHash>;
|
using TableStatisticsMap = std::unordered_map<SchemaAndTableName, ColumnStatisticsMap, SchemaAndTableNameHash>;
|
||||||
using TableAliasMap = std::unordered_map<SchemaAndTableName, std::string, SchemaAndTableNameHash>;
|
|
||||||
|
|
||||||
// This structure is used to store MDB AST -> CSEP translation context.
|
// This structure is used to store MDB AST -> CSEP translation context.
|
||||||
// There is a column statistics for some columns in a query.
|
// There is a column statistics for some columns in a query.
|
||||||
|
|||||||
@@ -18,17 +18,16 @@
|
|||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <limits>
|
|
||||||
|
|
||||||
#include "rulebased_optimizer.h"
|
#include "rulebased_optimizer.h"
|
||||||
|
|
||||||
#include "constantcolumn.h"
|
#include "constantcolumn.h"
|
||||||
#include "execplan/calpontselectexecutionplan.h"
|
#include "execplan/calpontselectexecutionplan.h"
|
||||||
#include "execplan/simplecolumn.h"
|
#include "execplan/simplecolumn.h"
|
||||||
#include "existsfilter.h"
|
|
||||||
#include "logicoperator.h"
|
#include "logicoperator.h"
|
||||||
#include "operator.h"
|
#include "operator.h"
|
||||||
#include "predicateoperator.h"
|
#include "predicateoperator.h"
|
||||||
|
#include "rbo_apply_parallel_ces.h"
|
||||||
#include "simplefilter.h"
|
#include "simplefilter.h"
|
||||||
|
|
||||||
namespace optimizer
|
namespace optimizer
|
||||||
@@ -75,7 +74,6 @@ bool parallelCESFilter(execplan::CalpontSelectExecutionPlan& csep)
|
|||||||
execplan::ParseTree* filtersWithNewRangeAddedIfNeeded(execplan::SCSEP& csep, execplan::SimpleColumn& column,
|
execplan::ParseTree* filtersWithNewRangeAddedIfNeeded(execplan::SCSEP& csep, execplan::SimpleColumn& column,
|
||||||
std::pair<uint64_t, uint64_t>& bound)
|
std::pair<uint64_t, uint64_t>& bound)
|
||||||
{
|
{
|
||||||
|
|
||||||
auto tableKeyColumnLeftOp = new execplan::SimpleColumn(column);
|
auto tableKeyColumnLeftOp = new execplan::SimpleColumn(column);
|
||||||
tableKeyColumnLeftOp->resultType(column.resultType());
|
tableKeyColumnLeftOp->resultType(column.resultType());
|
||||||
|
|
||||||
@@ -117,15 +115,18 @@ execplan::ParseTree* filtersWithNewRangeAddedIfNeeded(execplan::SCSEP& csep, exe
|
|||||||
|
|
||||||
// Looking for a projected column that comes first in an available index and has EI statistics
|
// Looking for a projected column that comes first in an available index and has EI statistics
|
||||||
// INV nullptr signifies that no suitable column was found
|
// INV nullptr signifies that no suitable column was found
|
||||||
execplan::SimpleColumn* findSuitableKeyColumn(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimizerContext& ctx)
|
execplan::SimpleColumn* findSuitableKeyColumn(execplan::CalpontSelectExecutionPlan& csep,
|
||||||
|
optimizer::RBOptimizerContext& ctx)
|
||||||
{
|
{
|
||||||
for (auto& rc : csep.returnedCols())
|
for (auto& rc : csep.returnedCols())
|
||||||
{
|
{
|
||||||
|
// TODO extract SC from RC
|
||||||
auto* simpleColumn = dynamic_cast<execplan::SimpleColumn*>(rc.get());
|
auto* simpleColumn = dynamic_cast<execplan::SimpleColumn*>(rc.get());
|
||||||
if (simpleColumn)
|
if (simpleColumn)
|
||||||
{
|
{
|
||||||
cal_impl_if::SchemaAndTableName schemaAndTableNam = {simpleColumn->schemaName(), simpleColumn->tableName()};
|
cal_impl_if::SchemaAndTableName schemaAndTableName = {simpleColumn->schemaName(),
|
||||||
auto columnStatistics = ctx.gwi.findStatisticsForATable(schemaAndTableNam);
|
simpleColumn->tableName()};
|
||||||
|
auto columnStatistics = ctx.gwi.findStatisticsForATable(schemaAndTableName);
|
||||||
if (!columnStatistics)
|
if (!columnStatistics)
|
||||||
{
|
{
|
||||||
continue;
|
continue;
|
||||||
@@ -190,7 +191,8 @@ execplan::CalpontSelectExecutionPlan::SelectList makeUnionFromTable(
|
|||||||
|
|
||||||
// Add last range
|
// Add last range
|
||||||
// NB despite the fact that currently Histogram_json_hb has the last bucket that has end as its start
|
// NB despite the fact that currently Histogram_json_hb has the last bucket that has end as its start
|
||||||
auto lastBucket = columnStatistics.get_json_histogram().begin() + (numberOfUnionUnits - 1) * numberOfBucketsPerUnionUnit;
|
auto lastBucket =
|
||||||
|
columnStatistics.get_json_histogram().begin() + (numberOfUnionUnits - 1) * numberOfBucketsPerUnionUnit;
|
||||||
uint64_t currentLowerBound = *(uint32_t*)lastBucket->start_value.data();
|
uint64_t currentLowerBound = *(uint32_t*)lastBucket->start_value.data();
|
||||||
uint64_t currentUpperBound = *(uint32_t*)columnStatistics.get_last_bucket_end_endp().data();
|
uint64_t currentUpperBound = *(uint32_t*)columnStatistics.get_last_bucket_end_endp().data();
|
||||||
bounds.push_back({currentLowerBound, currentUpperBound});
|
bounds.push_back({currentLowerBound, currentUpperBound});
|
||||||
@@ -213,17 +215,21 @@ bool applyParallelCES(execplan::CalpontSelectExecutionPlan& csep, RBOptimizerCon
|
|||||||
cal_impl_if::TableAliasMap tableAliasMap;
|
cal_impl_if::TableAliasMap tableAliasMap;
|
||||||
bool ruleHasBeenApplied = false;
|
bool ruleHasBeenApplied = false;
|
||||||
|
|
||||||
// ATM Must be only 1 table
|
|
||||||
for (auto& table : tables)
|
for (auto& table : tables)
|
||||||
{
|
{
|
||||||
if (!table.isColumnstore())
|
cal_impl_if::SchemaAndTableName schemaAndTableName = {table.schema, table.table};
|
||||||
|
std::cout << "Processing table schema " << schemaAndTableName.schema << " table "
|
||||||
|
<< schemaAndTableName.table << " alias " << table.alias << std::endl;
|
||||||
|
auto columnStatistics = ctx.gwi.findStatisticsForATable(schemaAndTableName);
|
||||||
|
// TODO add column statistics check to the corresponding match
|
||||||
|
if (!table.isColumnstore() && columnStatistics)
|
||||||
{
|
{
|
||||||
auto derivedSCEP = csep.cloneWORecursiveSelects();
|
auto derivedSCEP = csep.cloneWORecursiveSelects();
|
||||||
// need to add a level here
|
// need to add a level here
|
||||||
std::string tableAlias = RewrittenSubTableAliasPrefix + table.schema + "_" + table.table + "_" +
|
std::string tableAlias = RewrittenSubTableAliasPrefix + table.schema + "_" + table.table + "_" +
|
||||||
std::to_string(ctx.uniqueId);
|
std::to_string(ctx.uniqueId);
|
||||||
// TODO add original alias to support multiple same name tables
|
// TODO add original alias to support multiple same name tables
|
||||||
tableAliasMap.insert({{table.schema, table.table}, tableAlias});
|
tableAliasMap.insert({table, tableAlias});
|
||||||
derivedSCEP->location(execplan::CalpontSelectExecutionPlan::FROM);
|
derivedSCEP->location(execplan::CalpontSelectExecutionPlan::FROM);
|
||||||
derivedSCEP->subType(execplan::CalpontSelectExecutionPlan::FROM_SUBS);
|
derivedSCEP->subType(execplan::CalpontSelectExecutionPlan::FROM_SUBS);
|
||||||
derivedSCEP->derivedTbAlias(tableAlias);
|
derivedSCEP->derivedTbAlias(tableAlias);
|
||||||
@@ -233,8 +239,6 @@ bool applyParallelCES(execplan::CalpontSelectExecutionPlan& csep, RBOptimizerCon
|
|||||||
derivedSCEP->unionVec().insert(derivedSCEP->unionVec().end(), additionalUnionVec.begin(),
|
derivedSCEP->unionVec().insert(derivedSCEP->unionVec().end(), additionalUnionVec.begin(),
|
||||||
additionalUnionVec.end());
|
additionalUnionVec.end());
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
newDerivedTableList.push_back(derivedSCEP);
|
newDerivedTableList.push_back(derivedSCEP);
|
||||||
execplan::CalpontSystemCatalog::TableAliasName tn = execplan::make_aliasview("", "", tableAlias, "");
|
execplan::CalpontSystemCatalog::TableAliasName tn = execplan::make_aliasview("", "", tableAlias, "");
|
||||||
newTableList.push_back(tn);
|
newTableList.push_back(tn);
|
||||||
@@ -246,41 +250,75 @@ bool applyParallelCES(execplan::CalpontSelectExecutionPlan& csep, RBOptimizerCon
|
|||||||
}
|
}
|
||||||
|
|
||||||
execplan::CalpontSelectExecutionPlan::ReturnedColumnList newReturnedColumns;
|
execplan::CalpontSelectExecutionPlan::ReturnedColumnList newReturnedColumns;
|
||||||
size_t colPosition = 0;
|
[[maybe_unused]] size_t colPosition = 0;
|
||||||
// change parent to derived table columns using ScheamAndTableName -> tableAlias map
|
// replace parent CSEP RCs with derived table RCs using ScheamAndTableName -> tableAlias map
|
||||||
for (auto& rc : csep.returnedCols())
|
for (auto& rc : csep.returnedCols())
|
||||||
{
|
{
|
||||||
// TODO support expressions
|
// TODO support expressions
|
||||||
|
// Find SC for the RC
|
||||||
auto rcCloned = boost::make_shared<execplan::SimpleColumn>(*rc);
|
auto rcCloned = boost::make_shared<execplan::SimpleColumn>(*rc);
|
||||||
// TODO timezone and result type are not copied
|
// TODO timezone and result type are not copied
|
||||||
// TODO add specific ctor for this functionality
|
// TODO add specific ctor for this functionality
|
||||||
auto newTableAlias = tableAliasMap.find({rc->schemaName(), rc->tableName()});
|
// If there is an alias in the map then it is a new derived table
|
||||||
rcCloned->tableName("");
|
auto sc = dynamic_cast<execplan::SimpleColumn*>(rc.get());
|
||||||
rcCloned->schemaName("");
|
std::vector<execplan::SimpleColumn*> scs;
|
||||||
rcCloned->tableAlias(tableAlias);
|
// execplan::ParseTree pt(rc.get());
|
||||||
rcCloned->colPosition(colPosition++);
|
// pt.walk(execplan::getSimpleCols, &scs);
|
||||||
rcCloned->resultType(rc->resultType());
|
|
||||||
|
|
||||||
newReturnedColumns.push_back(rcCloned);
|
// auto sc = scs[0];
|
||||||
|
std::cout << "Processing RC schema " << sc->schemaName() << " table " << sc->tableName() << " alias "
|
||||||
|
<< sc->tableAlias() << std::endl;
|
||||||
|
auto newTableAlias = tableAliasMap.find(
|
||||||
|
{sc->schemaName(), sc->tableName(), sc->tableAlias(), "", false});
|
||||||
|
if (newTableAlias == tableAliasMap.end())
|
||||||
|
{
|
||||||
|
std::cout << "The RC doesn't belong to any of the derived tables, so leave it intact" << std::endl;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
sc->tableName("");
|
||||||
|
sc->schemaName("");
|
||||||
|
sc->tableAlias(newTableAlias->second);
|
||||||
|
sc->isColumnStore(true);
|
||||||
|
sc->colPosition(colPosition++);
|
||||||
|
// rcCloned->colPosition(colPosition++);
|
||||||
|
// rcCloned->resultType(rc->resultType());
|
||||||
|
// newReturnedColumns.push_back(rcCloned);
|
||||||
}
|
}
|
||||||
|
|
||||||
execplan::CalpontSelectExecutionPlan::ReturnedColumnList newReturnedColumns;
|
execplan::CalpontSelectExecutionPlan::ReturnedColumnList newReturnedColumns;
|
||||||
size_t colPosition = 0;
|
[[maybe_unused]] size_t colPosition = 0;
|
||||||
// change parent to derived table columns using ScheamAndTableName -> tableAlias map
|
// replace parent CSEP RCs with derived table RCs using ScheamAndTableName -> tableAlias map
|
||||||
for (auto& rc : csep.returnedCols())
|
for (auto& rc : csep.returnedCols())
|
||||||
{
|
{
|
||||||
// TODO support expressions
|
// TODO support expressions
|
||||||
|
// Find SC for the RC
|
||||||
auto rcCloned = boost::make_shared<execplan::SimpleColumn>(*rc);
|
auto rcCloned = boost::make_shared<execplan::SimpleColumn>(*rc);
|
||||||
// TODO timezone and result type are not copied
|
// TODO timezone and result type are not copied
|
||||||
// TODO add specific ctor for this functionality
|
// TODO add specific ctor for this functionality
|
||||||
auto newTableAlias = tableAliasMap.find({rc->schemaName(), rc->tableName()});
|
// If there is an alias in the map then it is a new derived table
|
||||||
rcCloned->tableName("");
|
auto sc = dynamic_cast<execplan::SimpleColumn*>(rc.get());
|
||||||
rcCloned->schemaName("");
|
std::vector<execplan::SimpleColumn*> scs;
|
||||||
rcCloned->tableAlias(tableAlias);
|
// execplan::ParseTree pt(rc.get());
|
||||||
rcCloned->colPosition(colPosition++);
|
// pt.walk(execplan::getSimpleCols, &scs);
|
||||||
rcCloned->resultType(rc->resultType());
|
|
||||||
|
|
||||||
newReturnedColumns.push_back(rcCloned);
|
// auto sc = scs[0];
|
||||||
|
std::cout << "Processing RC schema " << sc->schemaName() << " table " << sc->tableName() << " alias "
|
||||||
|
<< sc->tableAlias() << std::endl;
|
||||||
|
auto newTableAlias = tableAliasMap.find(
|
||||||
|
{sc->schemaName(), sc->tableName(), sc->tableAlias(), "", false});
|
||||||
|
if (newTableAlias == tableAliasMap.end())
|
||||||
|
{
|
||||||
|
std::cout << "The RC doesn't belong to any of the derived tables, so leave it intact" << std::endl;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
sc->tableName("");
|
||||||
|
sc->schemaName("");
|
||||||
|
sc->tableAlias(newTableAlias->second);
|
||||||
|
sc->isColumnStore(true);
|
||||||
|
sc->colPosition(colPosition++);
|
||||||
|
// rcCloned->colPosition(colPosition++);
|
||||||
|
// rcCloned->resultType(rc->resultType());
|
||||||
|
// newReturnedColumns.push_back(rcCloned);
|
||||||
}
|
}
|
||||||
// Remove the filters if necessary using csep.filters(nullptr) as they were pushed down to union units
|
// Remove the filters if necessary using csep.filters(nullptr) as they were pushed down to union units
|
||||||
// But this is inappropriate for EXISTS filter and join conditions
|
// But this is inappropriate for EXISTS filter and join conditions
|
||||||
|
|||||||
@@ -25,6 +25,38 @@
|
|||||||
#include "rulebased_optimizer.h"
|
#include "rulebased_optimizer.h"
|
||||||
|
|
||||||
namespace optimizer {
|
namespace optimizer {
|
||||||
|
struct LessThan
|
||||||
|
{
|
||||||
|
bool operator()(const execplan::CalpontSystemCatalog::TableAliasName& lhs,
|
||||||
|
const execplan::CalpontSystemCatalog::TableAliasName& rhs) const
|
||||||
|
{
|
||||||
|
if (lhs.schema < rhs.schema)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
else if (lhs.schema == rhs.schema)
|
||||||
|
{
|
||||||
|
if (lhs.table < rhs.table)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
else if (lhs.table == rhs.table)
|
||||||
|
{
|
||||||
|
if (lhs.alias < rhs.alias)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
using TableAliasMap = std::map<execplan::CalpontSystemCatalog::TableAliasName, std::string,
|
||||||
|
LessThan>;
|
||||||
|
|
||||||
|
bool matchParallelCES(execplan::CalpontSelectExecutionPlan& csep);
|
||||||
|
void applyParallelCES(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimizerContext& ctx);
|
||||||
bool parallelCESFilter(execplan::CalpontSelectExecutionPlan& csep);
|
bool parallelCESFilter(execplan::CalpontSelectExecutionPlan& csep);
|
||||||
bool applyParallelCES(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimizerContext& ctx);
|
bool applyParallelCES(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimizerContext& ctx);
|
||||||
}
|
}
|
||||||
Reference in New Issue
Block a user