You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-11-02 06:13:16 +03:00
feat(rbo,rules): preparation to replace derived-based with table-based approach
This commit is contained in:
@@ -437,6 +437,10 @@ class CalpontSystemCatalog : public datatypes::SystemCatalog
|
||||
: schema(sch), table(tb), alias(al), view(v), fisColumnStore(true)
|
||||
{
|
||||
}
|
||||
TableAliasName(const std::string& sch, const std::string& tb, const std::string& al, const std::string& v, const bool isColumnStore)
|
||||
: schema(sch), table(tb), alias(al), view(v), fisColumnStore(isColumnStore)
|
||||
{
|
||||
}
|
||||
std::string schema;
|
||||
std::string table;
|
||||
std::string alias;
|
||||
@@ -458,7 +462,7 @@ class CalpontSystemCatalog : public datatypes::SystemCatalog
|
||||
{
|
||||
return !(*this == rhs);
|
||||
}
|
||||
bool isColumnstore() const
|
||||
bool isColumnstore() const
|
||||
{
|
||||
return fisColumnStore;
|
||||
}
|
||||
|
||||
@@ -118,7 +118,6 @@ typedef std::tr1::unordered_map<TABLE_LIST*, uint> TableOuterJoinMap;
|
||||
using ColumnName = std::string;
|
||||
using ColumnStatisticsMap = std::unordered_map<ColumnName, Histogram_json_hb>;
|
||||
using TableStatisticsMap = std::unordered_map<SchemaAndTableName, ColumnStatisticsMap, SchemaAndTableNameHash>;
|
||||
using TableAliasMap = std::unordered_map<SchemaAndTableName, std::string, SchemaAndTableNameHash>;
|
||||
|
||||
// This structure is used to store MDB AST -> CSEP translation context.
|
||||
// There is a column statistics for some columns in a query.
|
||||
|
||||
@@ -18,17 +18,16 @@
|
||||
#include <algorithm>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <limits>
|
||||
|
||||
#include "rulebased_optimizer.h"
|
||||
|
||||
#include "constantcolumn.h"
|
||||
#include "execplan/calpontselectexecutionplan.h"
|
||||
#include "execplan/simplecolumn.h"
|
||||
#include "existsfilter.h"
|
||||
#include "logicoperator.h"
|
||||
#include "operator.h"
|
||||
#include "predicateoperator.h"
|
||||
#include "rbo_apply_parallel_ces.h"
|
||||
#include "simplefilter.h"
|
||||
|
||||
namespace optimizer
|
||||
@@ -75,7 +74,6 @@ bool parallelCESFilter(execplan::CalpontSelectExecutionPlan& csep)
|
||||
execplan::ParseTree* filtersWithNewRangeAddedIfNeeded(execplan::SCSEP& csep, execplan::SimpleColumn& column,
|
||||
std::pair<uint64_t, uint64_t>& bound)
|
||||
{
|
||||
|
||||
auto tableKeyColumnLeftOp = new execplan::SimpleColumn(column);
|
||||
tableKeyColumnLeftOp->resultType(column.resultType());
|
||||
|
||||
@@ -117,15 +115,18 @@ execplan::ParseTree* filtersWithNewRangeAddedIfNeeded(execplan::SCSEP& csep, exe
|
||||
|
||||
// Looking for a projected column that comes first in an available index and has EI statistics
|
||||
// INV nullptr signifies that no suitable column was found
|
||||
execplan::SimpleColumn* findSuitableKeyColumn(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimizerContext& ctx)
|
||||
execplan::SimpleColumn* findSuitableKeyColumn(execplan::CalpontSelectExecutionPlan& csep,
|
||||
optimizer::RBOptimizerContext& ctx)
|
||||
{
|
||||
for (auto& rc : csep.returnedCols())
|
||||
{
|
||||
// TODO extract SC from RC
|
||||
auto* simpleColumn = dynamic_cast<execplan::SimpleColumn*>(rc.get());
|
||||
if (simpleColumn)
|
||||
{
|
||||
cal_impl_if::SchemaAndTableName schemaAndTableNam = {simpleColumn->schemaName(), simpleColumn->tableName()};
|
||||
auto columnStatistics = ctx.gwi.findStatisticsForATable(schemaAndTableNam);
|
||||
cal_impl_if::SchemaAndTableName schemaAndTableName = {simpleColumn->schemaName(),
|
||||
simpleColumn->tableName()};
|
||||
auto columnStatistics = ctx.gwi.findStatisticsForATable(schemaAndTableName);
|
||||
if (!columnStatistics)
|
||||
{
|
||||
continue;
|
||||
@@ -190,7 +191,8 @@ execplan::CalpontSelectExecutionPlan::SelectList makeUnionFromTable(
|
||||
|
||||
// Add last range
|
||||
// NB despite the fact that currently Histogram_json_hb has the last bucket that has end as its start
|
||||
auto lastBucket = columnStatistics.get_json_histogram().begin() + (numberOfUnionUnits - 1) * numberOfBucketsPerUnionUnit;
|
||||
auto lastBucket =
|
||||
columnStatistics.get_json_histogram().begin() + (numberOfUnionUnits - 1) * numberOfBucketsPerUnionUnit;
|
||||
uint64_t currentLowerBound = *(uint32_t*)lastBucket->start_value.data();
|
||||
uint64_t currentUpperBound = *(uint32_t*)columnStatistics.get_last_bucket_end_endp().data();
|
||||
bounds.push_back({currentLowerBound, currentUpperBound});
|
||||
@@ -213,17 +215,21 @@ bool applyParallelCES(execplan::CalpontSelectExecutionPlan& csep, RBOptimizerCon
|
||||
cal_impl_if::TableAliasMap tableAliasMap;
|
||||
bool ruleHasBeenApplied = false;
|
||||
|
||||
// ATM Must be only 1 table
|
||||
for (auto& table : tables)
|
||||
{
|
||||
if (!table.isColumnstore())
|
||||
cal_impl_if::SchemaAndTableName schemaAndTableName = {table.schema, table.table};
|
||||
std::cout << "Processing table schema " << schemaAndTableName.schema << " table "
|
||||
<< schemaAndTableName.table << " alias " << table.alias << std::endl;
|
||||
auto columnStatistics = ctx.gwi.findStatisticsForATable(schemaAndTableName);
|
||||
// TODO add column statistics check to the corresponding match
|
||||
if (!table.isColumnstore() && columnStatistics)
|
||||
{
|
||||
auto derivedSCEP = csep.cloneWORecursiveSelects();
|
||||
// need to add a level here
|
||||
std::string tableAlias = RewrittenSubTableAliasPrefix + table.schema + "_" + table.table + "_" +
|
||||
std::to_string(ctx.uniqueId);
|
||||
// TODO add original alias to support multiple same name tables
|
||||
tableAliasMap.insert({{table.schema, table.table}, tableAlias});
|
||||
tableAliasMap.insert({table, tableAlias});
|
||||
derivedSCEP->location(execplan::CalpontSelectExecutionPlan::FROM);
|
||||
derivedSCEP->subType(execplan::CalpontSelectExecutionPlan::FROM_SUBS);
|
||||
derivedSCEP->derivedTbAlias(tableAlias);
|
||||
@@ -233,8 +239,6 @@ bool applyParallelCES(execplan::CalpontSelectExecutionPlan& csep, RBOptimizerCon
|
||||
derivedSCEP->unionVec().insert(derivedSCEP->unionVec().end(), additionalUnionVec.begin(),
|
||||
additionalUnionVec.end());
|
||||
|
||||
|
||||
|
||||
newDerivedTableList.push_back(derivedSCEP);
|
||||
execplan::CalpontSystemCatalog::TableAliasName tn = execplan::make_aliasview("", "", tableAlias, "");
|
||||
newTableList.push_back(tn);
|
||||
@@ -246,41 +250,75 @@ bool applyParallelCES(execplan::CalpontSelectExecutionPlan& csep, RBOptimizerCon
|
||||
}
|
||||
|
||||
execplan::CalpontSelectExecutionPlan::ReturnedColumnList newReturnedColumns;
|
||||
size_t colPosition = 0;
|
||||
// change parent to derived table columns using ScheamAndTableName -> tableAlias map
|
||||
[[maybe_unused]] size_t colPosition = 0;
|
||||
// replace parent CSEP RCs with derived table RCs using ScheamAndTableName -> tableAlias map
|
||||
for (auto& rc : csep.returnedCols())
|
||||
{
|
||||
// TODO support expressions
|
||||
// Find SC for the RC
|
||||
auto rcCloned = boost::make_shared<execplan::SimpleColumn>(*rc);
|
||||
// TODO timezone and result type are not copied
|
||||
// TODO add specific ctor for this functionality
|
||||
auto newTableAlias = tableAliasMap.find({rc->schemaName(), rc->tableName()});
|
||||
rcCloned->tableName("");
|
||||
rcCloned->schemaName("");
|
||||
rcCloned->tableAlias(tableAlias);
|
||||
rcCloned->colPosition(colPosition++);
|
||||
rcCloned->resultType(rc->resultType());
|
||||
// If there is an alias in the map then it is a new derived table
|
||||
auto sc = dynamic_cast<execplan::SimpleColumn*>(rc.get());
|
||||
std::vector<execplan::SimpleColumn*> scs;
|
||||
// execplan::ParseTree pt(rc.get());
|
||||
// pt.walk(execplan::getSimpleCols, &scs);
|
||||
|
||||
newReturnedColumns.push_back(rcCloned);
|
||||
// auto sc = scs[0];
|
||||
std::cout << "Processing RC schema " << sc->schemaName() << " table " << sc->tableName() << " alias "
|
||||
<< sc->tableAlias() << std::endl;
|
||||
auto newTableAlias = tableAliasMap.find(
|
||||
{sc->schemaName(), sc->tableName(), sc->tableAlias(), "", false});
|
||||
if (newTableAlias == tableAliasMap.end())
|
||||
{
|
||||
std::cout << "The RC doesn't belong to any of the derived tables, so leave it intact" << std::endl;
|
||||
continue;
|
||||
}
|
||||
sc->tableName("");
|
||||
sc->schemaName("");
|
||||
sc->tableAlias(newTableAlias->second);
|
||||
sc->isColumnStore(true);
|
||||
sc->colPosition(colPosition++);
|
||||
// rcCloned->colPosition(colPosition++);
|
||||
// rcCloned->resultType(rc->resultType());
|
||||
// newReturnedColumns.push_back(rcCloned);
|
||||
}
|
||||
|
||||
execplan::CalpontSelectExecutionPlan::ReturnedColumnList newReturnedColumns;
|
||||
size_t colPosition = 0;
|
||||
// change parent to derived table columns using ScheamAndTableName -> tableAlias map
|
||||
[[maybe_unused]] size_t colPosition = 0;
|
||||
// replace parent CSEP RCs with derived table RCs using ScheamAndTableName -> tableAlias map
|
||||
for (auto& rc : csep.returnedCols())
|
||||
{
|
||||
// TODO support expressions
|
||||
// Find SC for the RC
|
||||
auto rcCloned = boost::make_shared<execplan::SimpleColumn>(*rc);
|
||||
// TODO timezone and result type are not copied
|
||||
// TODO add specific ctor for this functionality
|
||||
auto newTableAlias = tableAliasMap.find({rc->schemaName(), rc->tableName()});
|
||||
rcCloned->tableName("");
|
||||
rcCloned->schemaName("");
|
||||
rcCloned->tableAlias(tableAlias);
|
||||
rcCloned->colPosition(colPosition++);
|
||||
rcCloned->resultType(rc->resultType());
|
||||
// If there is an alias in the map then it is a new derived table
|
||||
auto sc = dynamic_cast<execplan::SimpleColumn*>(rc.get());
|
||||
std::vector<execplan::SimpleColumn*> scs;
|
||||
// execplan::ParseTree pt(rc.get());
|
||||
// pt.walk(execplan::getSimpleCols, &scs);
|
||||
|
||||
newReturnedColumns.push_back(rcCloned);
|
||||
// auto sc = scs[0];
|
||||
std::cout << "Processing RC schema " << sc->schemaName() << " table " << sc->tableName() << " alias "
|
||||
<< sc->tableAlias() << std::endl;
|
||||
auto newTableAlias = tableAliasMap.find(
|
||||
{sc->schemaName(), sc->tableName(), sc->tableAlias(), "", false});
|
||||
if (newTableAlias == tableAliasMap.end())
|
||||
{
|
||||
std::cout << "The RC doesn't belong to any of the derived tables, so leave it intact" << std::endl;
|
||||
continue;
|
||||
}
|
||||
sc->tableName("");
|
||||
sc->schemaName("");
|
||||
sc->tableAlias(newTableAlias->second);
|
||||
sc->isColumnStore(true);
|
||||
sc->colPosition(colPosition++);
|
||||
// rcCloned->colPosition(colPosition++);
|
||||
// rcCloned->resultType(rc->resultType());
|
||||
// newReturnedColumns.push_back(rcCloned);
|
||||
}
|
||||
// Remove the filters if necessary using csep.filters(nullptr) as they were pushed down to union units
|
||||
// But this is inappropriate for EXISTS filter and join conditions
|
||||
|
||||
@@ -25,6 +25,38 @@
|
||||
#include "rulebased_optimizer.h"
|
||||
|
||||
namespace optimizer {
|
||||
struct LessThan
|
||||
{
|
||||
bool operator()(const execplan::CalpontSystemCatalog::TableAliasName& lhs,
|
||||
const execplan::CalpontSystemCatalog::TableAliasName& rhs) const
|
||||
{
|
||||
if (lhs.schema < rhs.schema)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
else if (lhs.schema == rhs.schema)
|
||||
{
|
||||
if (lhs.table < rhs.table)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
else if (lhs.table == rhs.table)
|
||||
{
|
||||
if (lhs.alias < rhs.alias)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
};
|
||||
using TableAliasMap = std::map<execplan::CalpontSystemCatalog::TableAliasName, std::string,
|
||||
LessThan>;
|
||||
|
||||
bool matchParallelCES(execplan::CalpontSelectExecutionPlan& csep);
|
||||
void applyParallelCES(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimizerContext& ctx);
|
||||
bool parallelCESFilter(execplan::CalpontSelectExecutionPlan& csep);
|
||||
bool applyParallelCES(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimizerContext& ctx);
|
||||
}
|
||||
Reference in New Issue
Block a user