From ebaa6cce5cde8d810615574f3ab694d2a7ad38ca Mon Sep 17 00:00:00 2001 From: drrtuy Date: Mon, 21 Jul 2025 19:25:37 +0000 Subject: [PATCH] feat(rbo,rules): preparation to replace derived-based with table-based approach --- dbcon/execplan/calpontsystemcatalog.h | 6 +- dbcon/mysql/ha_mcs_impl_if.h | 1 - dbcon/mysql/rbo_apply_parallel_ces.cpp | 98 ++++++++++++++++++-------- dbcon/mysql/rbo_apply_parallel_ces.h | 32 +++++++++ 4 files changed, 105 insertions(+), 32 deletions(-) diff --git a/dbcon/execplan/calpontsystemcatalog.h b/dbcon/execplan/calpontsystemcatalog.h index dee640c3e..3cfaceae1 100644 --- a/dbcon/execplan/calpontsystemcatalog.h +++ b/dbcon/execplan/calpontsystemcatalog.h @@ -437,6 +437,10 @@ class CalpontSystemCatalog : public datatypes::SystemCatalog : schema(sch), table(tb), alias(al), view(v), fisColumnStore(true) { } + TableAliasName(const std::string& sch, const std::string& tb, const std::string& al, const std::string& v, const bool isColumnStore) + : schema(sch), table(tb), alias(al), view(v), fisColumnStore(isColumnStore) + { + } std::string schema; std::string table; std::string alias; @@ -458,7 +462,7 @@ class CalpontSystemCatalog : public datatypes::SystemCatalog { return !(*this == rhs); } - bool isColumnstore() const + bool isColumnstore() const { return fisColumnStore; } diff --git a/dbcon/mysql/ha_mcs_impl_if.h b/dbcon/mysql/ha_mcs_impl_if.h index 6baa7fd74..aed13d202 100644 --- a/dbcon/mysql/ha_mcs_impl_if.h +++ b/dbcon/mysql/ha_mcs_impl_if.h @@ -118,7 +118,6 @@ typedef std::tr1::unordered_map TableOuterJoinMap; using ColumnName = std::string; using ColumnStatisticsMap = std::unordered_map; using TableStatisticsMap = std::unordered_map; -using TableAliasMap = std::unordered_map; // This structure is used to store MDB AST -> CSEP translation context. // There is a column statistics for some columns in a query. diff --git a/dbcon/mysql/rbo_apply_parallel_ces.cpp b/dbcon/mysql/rbo_apply_parallel_ces.cpp index b9accfb3b..86094a3ac 100644 --- a/dbcon/mysql/rbo_apply_parallel_ces.cpp +++ b/dbcon/mysql/rbo_apply_parallel_ces.cpp @@ -18,17 +18,16 @@ #include #include #include -#include #include "rulebased_optimizer.h" #include "constantcolumn.h" #include "execplan/calpontselectexecutionplan.h" #include "execplan/simplecolumn.h" -#include "existsfilter.h" #include "logicoperator.h" #include "operator.h" #include "predicateoperator.h" +#include "rbo_apply_parallel_ces.h" #include "simplefilter.h" namespace optimizer @@ -75,7 +74,6 @@ bool parallelCESFilter(execplan::CalpontSelectExecutionPlan& csep) execplan::ParseTree* filtersWithNewRangeAddedIfNeeded(execplan::SCSEP& csep, execplan::SimpleColumn& column, std::pair& bound) { - auto tableKeyColumnLeftOp = new execplan::SimpleColumn(column); tableKeyColumnLeftOp->resultType(column.resultType()); @@ -117,15 +115,18 @@ execplan::ParseTree* filtersWithNewRangeAddedIfNeeded(execplan::SCSEP& csep, exe // Looking for a projected column that comes first in an available index and has EI statistics // INV nullptr signifies that no suitable column was found -execplan::SimpleColumn* findSuitableKeyColumn(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimizerContext& ctx) +execplan::SimpleColumn* findSuitableKeyColumn(execplan::CalpontSelectExecutionPlan& csep, + optimizer::RBOptimizerContext& ctx) { for (auto& rc : csep.returnedCols()) { + // TODO extract SC from RC auto* simpleColumn = dynamic_cast(rc.get()); if (simpleColumn) { - cal_impl_if::SchemaAndTableName schemaAndTableNam = {simpleColumn->schemaName(), simpleColumn->tableName()}; - auto columnStatistics = ctx.gwi.findStatisticsForATable(schemaAndTableNam); + cal_impl_if::SchemaAndTableName schemaAndTableName = {simpleColumn->schemaName(), + simpleColumn->tableName()}; + auto columnStatistics = ctx.gwi.findStatisticsForATable(schemaAndTableName); if (!columnStatistics) { continue; @@ -190,7 +191,8 @@ execplan::CalpontSelectExecutionPlan::SelectList makeUnionFromTable( // Add last range // NB despite the fact that currently Histogram_json_hb has the last bucket that has end as its start - auto lastBucket = columnStatistics.get_json_histogram().begin() + (numberOfUnionUnits - 1) * numberOfBucketsPerUnionUnit; + auto lastBucket = + columnStatistics.get_json_histogram().begin() + (numberOfUnionUnits - 1) * numberOfBucketsPerUnionUnit; uint64_t currentLowerBound = *(uint32_t*)lastBucket->start_value.data(); uint64_t currentUpperBound = *(uint32_t*)columnStatistics.get_last_bucket_end_endp().data(); bounds.push_back({currentLowerBound, currentUpperBound}); @@ -213,17 +215,21 @@ bool applyParallelCES(execplan::CalpontSelectExecutionPlan& csep, RBOptimizerCon cal_impl_if::TableAliasMap tableAliasMap; bool ruleHasBeenApplied = false; - // ATM Must be only 1 table for (auto& table : tables) { - if (!table.isColumnstore()) + cal_impl_if::SchemaAndTableName schemaAndTableName = {table.schema, table.table}; + std::cout << "Processing table schema " << schemaAndTableName.schema << " table " + << schemaAndTableName.table << " alias " << table.alias << std::endl; + auto columnStatistics = ctx.gwi.findStatisticsForATable(schemaAndTableName); + // TODO add column statistics check to the corresponding match + if (!table.isColumnstore() && columnStatistics) { auto derivedSCEP = csep.cloneWORecursiveSelects(); // need to add a level here std::string tableAlias = RewrittenSubTableAliasPrefix + table.schema + "_" + table.table + "_" + std::to_string(ctx.uniqueId); // TODO add original alias to support multiple same name tables - tableAliasMap.insert({{table.schema, table.table}, tableAlias}); + tableAliasMap.insert({table, tableAlias}); derivedSCEP->location(execplan::CalpontSelectExecutionPlan::FROM); derivedSCEP->subType(execplan::CalpontSelectExecutionPlan::FROM_SUBS); derivedSCEP->derivedTbAlias(tableAlias); @@ -233,8 +239,6 @@ bool applyParallelCES(execplan::CalpontSelectExecutionPlan& csep, RBOptimizerCon derivedSCEP->unionVec().insert(derivedSCEP->unionVec().end(), additionalUnionVec.begin(), additionalUnionVec.end()); - - newDerivedTableList.push_back(derivedSCEP); execplan::CalpontSystemCatalog::TableAliasName tn = execplan::make_aliasview("", "", tableAlias, ""); newTableList.push_back(tn); @@ -246,41 +250,75 @@ bool applyParallelCES(execplan::CalpontSelectExecutionPlan& csep, RBOptimizerCon } execplan::CalpontSelectExecutionPlan::ReturnedColumnList newReturnedColumns; - size_t colPosition = 0; - // change parent to derived table columns using ScheamAndTableName -> tableAlias map + [[maybe_unused]] size_t colPosition = 0; + // replace parent CSEP RCs with derived table RCs using ScheamAndTableName -> tableAlias map for (auto& rc : csep.returnedCols()) { // TODO support expressions + // Find SC for the RC auto rcCloned = boost::make_shared(*rc); // TODO timezone and result type are not copied // TODO add specific ctor for this functionality - auto newTableAlias = tableAliasMap.find({rc->schemaName(), rc->tableName()}); - rcCloned->tableName(""); - rcCloned->schemaName(""); - rcCloned->tableAlias(tableAlias); - rcCloned->colPosition(colPosition++); - rcCloned->resultType(rc->resultType()); + // If there is an alias in the map then it is a new derived table + auto sc = dynamic_cast(rc.get()); + std::vector scs; + // execplan::ParseTree pt(rc.get()); + // pt.walk(execplan::getSimpleCols, &scs); - newReturnedColumns.push_back(rcCloned); + // auto sc = scs[0]; + std::cout << "Processing RC schema " << sc->schemaName() << " table " << sc->tableName() << " alias " + << sc->tableAlias() << std::endl; + auto newTableAlias = tableAliasMap.find( + {sc->schemaName(), sc->tableName(), sc->tableAlias(), "", false}); + if (newTableAlias == tableAliasMap.end()) + { + std::cout << "The RC doesn't belong to any of the derived tables, so leave it intact" << std::endl; + continue; + } + sc->tableName(""); + sc->schemaName(""); + sc->tableAlias(newTableAlias->second); + sc->isColumnStore(true); + sc->colPosition(colPosition++); + // rcCloned->colPosition(colPosition++); + // rcCloned->resultType(rc->resultType()); + // newReturnedColumns.push_back(rcCloned); } execplan::CalpontSelectExecutionPlan::ReturnedColumnList newReturnedColumns; - size_t colPosition = 0; - // change parent to derived table columns using ScheamAndTableName -> tableAlias map + [[maybe_unused]] size_t colPosition = 0; + // replace parent CSEP RCs with derived table RCs using ScheamAndTableName -> tableAlias map for (auto& rc : csep.returnedCols()) { // TODO support expressions + // Find SC for the RC auto rcCloned = boost::make_shared(*rc); // TODO timezone and result type are not copied // TODO add specific ctor for this functionality - auto newTableAlias = tableAliasMap.find({rc->schemaName(), rc->tableName()}); - rcCloned->tableName(""); - rcCloned->schemaName(""); - rcCloned->tableAlias(tableAlias); - rcCloned->colPosition(colPosition++); - rcCloned->resultType(rc->resultType()); + // If there is an alias in the map then it is a new derived table + auto sc = dynamic_cast(rc.get()); + std::vector scs; + // execplan::ParseTree pt(rc.get()); + // pt.walk(execplan::getSimpleCols, &scs); - newReturnedColumns.push_back(rcCloned); + // auto sc = scs[0]; + std::cout << "Processing RC schema " << sc->schemaName() << " table " << sc->tableName() << " alias " + << sc->tableAlias() << std::endl; + auto newTableAlias = tableAliasMap.find( + {sc->schemaName(), sc->tableName(), sc->tableAlias(), "", false}); + if (newTableAlias == tableAliasMap.end()) + { + std::cout << "The RC doesn't belong to any of the derived tables, so leave it intact" << std::endl; + continue; + } + sc->tableName(""); + sc->schemaName(""); + sc->tableAlias(newTableAlias->second); + sc->isColumnStore(true); + sc->colPosition(colPosition++); + // rcCloned->colPosition(colPosition++); + // rcCloned->resultType(rc->resultType()); + // newReturnedColumns.push_back(rcCloned); } // Remove the filters if necessary using csep.filters(nullptr) as they were pushed down to union units // But this is inappropriate for EXISTS filter and join conditions diff --git a/dbcon/mysql/rbo_apply_parallel_ces.h b/dbcon/mysql/rbo_apply_parallel_ces.h index 1aee6debd..644ca764a 100644 --- a/dbcon/mysql/rbo_apply_parallel_ces.h +++ b/dbcon/mysql/rbo_apply_parallel_ces.h @@ -25,6 +25,38 @@ #include "rulebased_optimizer.h" namespace optimizer { + struct LessThan + { + bool operator()(const execplan::CalpontSystemCatalog::TableAliasName& lhs, + const execplan::CalpontSystemCatalog::TableAliasName& rhs) const + { + if (lhs.schema < rhs.schema) + { + return true; + } + else if (lhs.schema == rhs.schema) + { + if (lhs.table < rhs.table) + { + return true; + } + else if (lhs.table == rhs.table) + { + if (lhs.alias < rhs.alias) + { + return true; + } + } + } + + return false; + } + }; + using TableAliasMap = std::map; + + bool matchParallelCES(execplan::CalpontSelectExecutionPlan& csep); + void applyParallelCES(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimizerContext& ctx); bool parallelCESFilter(execplan::CalpontSelectExecutionPlan& csep); bool applyParallelCES(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimizerContext& ctx); } \ No newline at end of file