1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-11-02 06:13:16 +03:00

feat(rbo,rules): preparation to replace derived-based with table-based approach

This commit is contained in:
drrtuy
2025-07-21 19:25:37 +00:00
parent 595b1baad2
commit ebaa6cce5c
4 changed files with 105 additions and 32 deletions

View File

@@ -437,6 +437,10 @@ class CalpontSystemCatalog : public datatypes::SystemCatalog
: schema(sch), table(tb), alias(al), view(v), fisColumnStore(true) : schema(sch), table(tb), alias(al), view(v), fisColumnStore(true)
{ {
} }
TableAliasName(const std::string& sch, const std::string& tb, const std::string& al, const std::string& v, const bool isColumnStore)
: schema(sch), table(tb), alias(al), view(v), fisColumnStore(isColumnStore)
{
}
std::string schema; std::string schema;
std::string table; std::string table;
std::string alias; std::string alias;
@@ -458,7 +462,7 @@ class CalpontSystemCatalog : public datatypes::SystemCatalog
{ {
return !(*this == rhs); return !(*this == rhs);
} }
bool isColumnstore() const bool isColumnstore() const
{ {
return fisColumnStore; return fisColumnStore;
} }

View File

@@ -118,7 +118,6 @@ typedef std::tr1::unordered_map<TABLE_LIST*, uint> TableOuterJoinMap;
using ColumnName = std::string; using ColumnName = std::string;
using ColumnStatisticsMap = std::unordered_map<ColumnName, Histogram_json_hb>; using ColumnStatisticsMap = std::unordered_map<ColumnName, Histogram_json_hb>;
using TableStatisticsMap = std::unordered_map<SchemaAndTableName, ColumnStatisticsMap, SchemaAndTableNameHash>; using TableStatisticsMap = std::unordered_map<SchemaAndTableName, ColumnStatisticsMap, SchemaAndTableNameHash>;
using TableAliasMap = std::unordered_map<SchemaAndTableName, std::string, SchemaAndTableNameHash>;
// This structure is used to store MDB AST -> CSEP translation context. // This structure is used to store MDB AST -> CSEP translation context.
// There is a column statistics for some columns in a query. // There is a column statistics for some columns in a query.

View File

@@ -18,17 +18,16 @@
#include <algorithm> #include <algorithm>
#include <cstddef> #include <cstddef>
#include <cstdint> #include <cstdint>
#include <limits>
#include "rulebased_optimizer.h" #include "rulebased_optimizer.h"
#include "constantcolumn.h" #include "constantcolumn.h"
#include "execplan/calpontselectexecutionplan.h" #include "execplan/calpontselectexecutionplan.h"
#include "execplan/simplecolumn.h" #include "execplan/simplecolumn.h"
#include "existsfilter.h"
#include "logicoperator.h" #include "logicoperator.h"
#include "operator.h" #include "operator.h"
#include "predicateoperator.h" #include "predicateoperator.h"
#include "rbo_apply_parallel_ces.h"
#include "simplefilter.h" #include "simplefilter.h"
namespace optimizer namespace optimizer
@@ -75,7 +74,6 @@ bool parallelCESFilter(execplan::CalpontSelectExecutionPlan& csep)
execplan::ParseTree* filtersWithNewRangeAddedIfNeeded(execplan::SCSEP& csep, execplan::SimpleColumn& column, execplan::ParseTree* filtersWithNewRangeAddedIfNeeded(execplan::SCSEP& csep, execplan::SimpleColumn& column,
std::pair<uint64_t, uint64_t>& bound) std::pair<uint64_t, uint64_t>& bound)
{ {
auto tableKeyColumnLeftOp = new execplan::SimpleColumn(column); auto tableKeyColumnLeftOp = new execplan::SimpleColumn(column);
tableKeyColumnLeftOp->resultType(column.resultType()); tableKeyColumnLeftOp->resultType(column.resultType());
@@ -117,15 +115,18 @@ execplan::ParseTree* filtersWithNewRangeAddedIfNeeded(execplan::SCSEP& csep, exe
// Looking for a projected column that comes first in an available index and has EI statistics // Looking for a projected column that comes first in an available index and has EI statistics
// INV nullptr signifies that no suitable column was found // INV nullptr signifies that no suitable column was found
execplan::SimpleColumn* findSuitableKeyColumn(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimizerContext& ctx) execplan::SimpleColumn* findSuitableKeyColumn(execplan::CalpontSelectExecutionPlan& csep,
optimizer::RBOptimizerContext& ctx)
{ {
for (auto& rc : csep.returnedCols()) for (auto& rc : csep.returnedCols())
{ {
// TODO extract SC from RC
auto* simpleColumn = dynamic_cast<execplan::SimpleColumn*>(rc.get()); auto* simpleColumn = dynamic_cast<execplan::SimpleColumn*>(rc.get());
if (simpleColumn) if (simpleColumn)
{ {
cal_impl_if::SchemaAndTableName schemaAndTableNam = {simpleColumn->schemaName(), simpleColumn->tableName()}; cal_impl_if::SchemaAndTableName schemaAndTableName = {simpleColumn->schemaName(),
auto columnStatistics = ctx.gwi.findStatisticsForATable(schemaAndTableNam); simpleColumn->tableName()};
auto columnStatistics = ctx.gwi.findStatisticsForATable(schemaAndTableName);
if (!columnStatistics) if (!columnStatistics)
{ {
continue; continue;
@@ -190,7 +191,8 @@ execplan::CalpontSelectExecutionPlan::SelectList makeUnionFromTable(
// Add last range // Add last range
// NB despite the fact that currently Histogram_json_hb has the last bucket that has end as its start // NB despite the fact that currently Histogram_json_hb has the last bucket that has end as its start
auto lastBucket = columnStatistics.get_json_histogram().begin() + (numberOfUnionUnits - 1) * numberOfBucketsPerUnionUnit; auto lastBucket =
columnStatistics.get_json_histogram().begin() + (numberOfUnionUnits - 1) * numberOfBucketsPerUnionUnit;
uint64_t currentLowerBound = *(uint32_t*)lastBucket->start_value.data(); uint64_t currentLowerBound = *(uint32_t*)lastBucket->start_value.data();
uint64_t currentUpperBound = *(uint32_t*)columnStatistics.get_last_bucket_end_endp().data(); uint64_t currentUpperBound = *(uint32_t*)columnStatistics.get_last_bucket_end_endp().data();
bounds.push_back({currentLowerBound, currentUpperBound}); bounds.push_back({currentLowerBound, currentUpperBound});
@@ -213,17 +215,21 @@ bool applyParallelCES(execplan::CalpontSelectExecutionPlan& csep, RBOptimizerCon
cal_impl_if::TableAliasMap tableAliasMap; cal_impl_if::TableAliasMap tableAliasMap;
bool ruleHasBeenApplied = false; bool ruleHasBeenApplied = false;
// ATM Must be only 1 table
for (auto& table : tables) for (auto& table : tables)
{ {
if (!table.isColumnstore()) cal_impl_if::SchemaAndTableName schemaAndTableName = {table.schema, table.table};
std::cout << "Processing table schema " << schemaAndTableName.schema << " table "
<< schemaAndTableName.table << " alias " << table.alias << std::endl;
auto columnStatistics = ctx.gwi.findStatisticsForATable(schemaAndTableName);
// TODO add column statistics check to the corresponding match
if (!table.isColumnstore() && columnStatistics)
{ {
auto derivedSCEP = csep.cloneWORecursiveSelects(); auto derivedSCEP = csep.cloneWORecursiveSelects();
// need to add a level here // need to add a level here
std::string tableAlias = RewrittenSubTableAliasPrefix + table.schema + "_" + table.table + "_" + std::string tableAlias = RewrittenSubTableAliasPrefix + table.schema + "_" + table.table + "_" +
std::to_string(ctx.uniqueId); std::to_string(ctx.uniqueId);
// TODO add original alias to support multiple same name tables // TODO add original alias to support multiple same name tables
tableAliasMap.insert({{table.schema, table.table}, tableAlias}); tableAliasMap.insert({table, tableAlias});
derivedSCEP->location(execplan::CalpontSelectExecutionPlan::FROM); derivedSCEP->location(execplan::CalpontSelectExecutionPlan::FROM);
derivedSCEP->subType(execplan::CalpontSelectExecutionPlan::FROM_SUBS); derivedSCEP->subType(execplan::CalpontSelectExecutionPlan::FROM_SUBS);
derivedSCEP->derivedTbAlias(tableAlias); derivedSCEP->derivedTbAlias(tableAlias);
@@ -233,8 +239,6 @@ bool applyParallelCES(execplan::CalpontSelectExecutionPlan& csep, RBOptimizerCon
derivedSCEP->unionVec().insert(derivedSCEP->unionVec().end(), additionalUnionVec.begin(), derivedSCEP->unionVec().insert(derivedSCEP->unionVec().end(), additionalUnionVec.begin(),
additionalUnionVec.end()); additionalUnionVec.end());
newDerivedTableList.push_back(derivedSCEP); newDerivedTableList.push_back(derivedSCEP);
execplan::CalpontSystemCatalog::TableAliasName tn = execplan::make_aliasview("", "", tableAlias, ""); execplan::CalpontSystemCatalog::TableAliasName tn = execplan::make_aliasview("", "", tableAlias, "");
newTableList.push_back(tn); newTableList.push_back(tn);
@@ -246,41 +250,75 @@ bool applyParallelCES(execplan::CalpontSelectExecutionPlan& csep, RBOptimizerCon
} }
execplan::CalpontSelectExecutionPlan::ReturnedColumnList newReturnedColumns; execplan::CalpontSelectExecutionPlan::ReturnedColumnList newReturnedColumns;
size_t colPosition = 0; [[maybe_unused]] size_t colPosition = 0;
// change parent to derived table columns using ScheamAndTableName -> tableAlias map // replace parent CSEP RCs with derived table RCs using ScheamAndTableName -> tableAlias map
for (auto& rc : csep.returnedCols()) for (auto& rc : csep.returnedCols())
{ {
// TODO support expressions // TODO support expressions
// Find SC for the RC
auto rcCloned = boost::make_shared<execplan::SimpleColumn>(*rc); auto rcCloned = boost::make_shared<execplan::SimpleColumn>(*rc);
// TODO timezone and result type are not copied // TODO timezone and result type are not copied
// TODO add specific ctor for this functionality // TODO add specific ctor for this functionality
auto newTableAlias = tableAliasMap.find({rc->schemaName(), rc->tableName()}); // If there is an alias in the map then it is a new derived table
rcCloned->tableName(""); auto sc = dynamic_cast<execplan::SimpleColumn*>(rc.get());
rcCloned->schemaName(""); std::vector<execplan::SimpleColumn*> scs;
rcCloned->tableAlias(tableAlias); // execplan::ParseTree pt(rc.get());
rcCloned->colPosition(colPosition++); // pt.walk(execplan::getSimpleCols, &scs);
rcCloned->resultType(rc->resultType());
newReturnedColumns.push_back(rcCloned); // auto sc = scs[0];
std::cout << "Processing RC schema " << sc->schemaName() << " table " << sc->tableName() << " alias "
<< sc->tableAlias() << std::endl;
auto newTableAlias = tableAliasMap.find(
{sc->schemaName(), sc->tableName(), sc->tableAlias(), "", false});
if (newTableAlias == tableAliasMap.end())
{
std::cout << "The RC doesn't belong to any of the derived tables, so leave it intact" << std::endl;
continue;
}
sc->tableName("");
sc->schemaName("");
sc->tableAlias(newTableAlias->second);
sc->isColumnStore(true);
sc->colPosition(colPosition++);
// rcCloned->colPosition(colPosition++);
// rcCloned->resultType(rc->resultType());
// newReturnedColumns.push_back(rcCloned);
} }
execplan::CalpontSelectExecutionPlan::ReturnedColumnList newReturnedColumns; execplan::CalpontSelectExecutionPlan::ReturnedColumnList newReturnedColumns;
size_t colPosition = 0; [[maybe_unused]] size_t colPosition = 0;
// change parent to derived table columns using ScheamAndTableName -> tableAlias map // replace parent CSEP RCs with derived table RCs using ScheamAndTableName -> tableAlias map
for (auto& rc : csep.returnedCols()) for (auto& rc : csep.returnedCols())
{ {
// TODO support expressions // TODO support expressions
// Find SC for the RC
auto rcCloned = boost::make_shared<execplan::SimpleColumn>(*rc); auto rcCloned = boost::make_shared<execplan::SimpleColumn>(*rc);
// TODO timezone and result type are not copied // TODO timezone and result type are not copied
// TODO add specific ctor for this functionality // TODO add specific ctor for this functionality
auto newTableAlias = tableAliasMap.find({rc->schemaName(), rc->tableName()}); // If there is an alias in the map then it is a new derived table
rcCloned->tableName(""); auto sc = dynamic_cast<execplan::SimpleColumn*>(rc.get());
rcCloned->schemaName(""); std::vector<execplan::SimpleColumn*> scs;
rcCloned->tableAlias(tableAlias); // execplan::ParseTree pt(rc.get());
rcCloned->colPosition(colPosition++); // pt.walk(execplan::getSimpleCols, &scs);
rcCloned->resultType(rc->resultType());
newReturnedColumns.push_back(rcCloned); // auto sc = scs[0];
std::cout << "Processing RC schema " << sc->schemaName() << " table " << sc->tableName() << " alias "
<< sc->tableAlias() << std::endl;
auto newTableAlias = tableAliasMap.find(
{sc->schemaName(), sc->tableName(), sc->tableAlias(), "", false});
if (newTableAlias == tableAliasMap.end())
{
std::cout << "The RC doesn't belong to any of the derived tables, so leave it intact" << std::endl;
continue;
}
sc->tableName("");
sc->schemaName("");
sc->tableAlias(newTableAlias->second);
sc->isColumnStore(true);
sc->colPosition(colPosition++);
// rcCloned->colPosition(colPosition++);
// rcCloned->resultType(rc->resultType());
// newReturnedColumns.push_back(rcCloned);
} }
// Remove the filters if necessary using csep.filters(nullptr) as they were pushed down to union units // Remove the filters if necessary using csep.filters(nullptr) as they were pushed down to union units
// But this is inappropriate for EXISTS filter and join conditions // But this is inappropriate for EXISTS filter and join conditions

View File

@@ -25,6 +25,38 @@
#include "rulebased_optimizer.h" #include "rulebased_optimizer.h"
namespace optimizer { namespace optimizer {
struct LessThan
{
bool operator()(const execplan::CalpontSystemCatalog::TableAliasName& lhs,
const execplan::CalpontSystemCatalog::TableAliasName& rhs) const
{
if (lhs.schema < rhs.schema)
{
return true;
}
else if (lhs.schema == rhs.schema)
{
if (lhs.table < rhs.table)
{
return true;
}
else if (lhs.table == rhs.table)
{
if (lhs.alias < rhs.alias)
{
return true;
}
}
}
return false;
}
};
using TableAliasMap = std::map<execplan::CalpontSystemCatalog::TableAliasName, std::string,
LessThan>;
bool matchParallelCES(execplan::CalpontSelectExecutionPlan& csep);
void applyParallelCES(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimizerContext& ctx);
bool parallelCESFilter(execplan::CalpontSelectExecutionPlan& csep); bool parallelCESFilter(execplan::CalpontSelectExecutionPlan& csep);
bool applyParallelCES(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimizerContext& ctx); bool applyParallelCES(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimizerContext& ctx);
} }