diff --git a/dbcon/mysql/ha_from_sub.cpp b/dbcon/mysql/ha_from_sub.cpp index 442743c2d..3fc987f82 100644 --- a/dbcon/mysql/ha_from_sub.cpp +++ b/dbcon/mysql/ha_from_sub.cpp @@ -51,9 +51,6 @@ void derivedTableOptimization(gp_walk_info* gwip, SCSEP& csep) // derived tables are not checked for optimization in this scope. CalpontSelectExecutionPlan::SelectList derivedTbList = csep->derivedTableList(); - // @bug6156. Skip horizontal optimization for no table union. - bool horizontalOptimization = true; - for (uint i = 0; i < derivedTbList.size(); i++) { CalpontSelectExecutionPlan* plan = reinterpret_cast(derivedTbList[i].get()); @@ -76,18 +73,6 @@ void derivedTableOptimization(gp_walk_info* gwip, SCSEP& csep) } } - if (plan->tableList().empty()) - horizontalOptimization = false; - - for (uint j = 0; j < plan->unionVec().size(); j++) - { - if (reinterpret_cast(plan->unionVec()[j].get())->tableList().empty()) - { - horizontalOptimization = false; - break; - } - } - if (verticalOptimization) { int64_t val = 1; @@ -203,203 +188,6 @@ void derivedTableOptimization(gp_walk_info* gwip, SCSEP& csep) } } } - - /* - * @bug5635. Move filters that only belongs to a derived table to inside the derived table. - * 1. parse tree walk to populate derivedTableFilterMap and set null candidate on the tree. - * 2. remove the null filters - * 3. and the filters of derivedTableFilterMap and append to the WHERE filter of the derived table - * - * Note: - * 1. Subquery filters is ignored because derived table can not be in subquery - * 2. While walking tree, whenever a single derive simplefilter is encountered, - * this filter is pushed to the corresponding stack - * 2. Whenever an OR operator is encountered, all the filter stack of - * that OR involving derived table are emptied and null candidate of each - * stacked filter needs to be reset (not null) - */ - ParseTree* pt = csep->filters(); - map derivedTbFilterMap; - - if (horizontalOptimization && pt) - { - pt->walk(setDerivedTable); - setDerivedFilter(gwip, pt, derivedTbFilterMap, derivedTbList); - csep->filters(pt); - } - - // AND the filters of individual stack to the derived table filter tree - // @todo union filters. - // @todo outer join complication - map::iterator mapIt; - - for (uint i = 0; i < derivedTbList.size(); i++) - { - CalpontSelectExecutionPlan* plan = reinterpret_cast(derivedTbList[i].get()); - CalpontSelectExecutionPlan::ReturnedColumnList derivedColList = plan->returnedCols(); - mapIt = derivedTbFilterMap.find(plan->derivedTbAlias()); - - if (mapIt != derivedTbFilterMap.end()) - { - // replace all derived column of this filter with real column from - // derived table projection list. - ParseTree* mainFilter = new ParseTree(); - mainFilter->copyTree(*(mapIt->second)); - replaceRefCol(mainFilter, derivedColList); - ParseTree* derivedFilter = plan->filters(); - - if (derivedFilter) - { - LogicOperator* op = new LogicOperator("and"); - ParseTree* filter = new ParseTree(op); - filter->left(derivedFilter); - filter->right(mainFilter); - plan->filters(filter); - } - else - { - plan->filters(mainFilter); - } - - // union filter handling - for (uint j = 0; j < plan->unionVec().size(); j++) - { - CalpontSelectExecutionPlan* unionPlan = - reinterpret_cast(plan->unionVec()[j].get()); - CalpontSelectExecutionPlan::ReturnedColumnList unionColList = unionPlan->returnedCols(); - ParseTree* mainFilterForUnion = new ParseTree(); - mainFilterForUnion->copyTree(*(mapIt->second)); - replaceRefCol(mainFilterForUnion, unionColList); - ParseTree* unionFilter = unionPlan->filters(); - - if (unionFilter) - { - LogicOperator* op = new LogicOperator("and"); - ParseTree* filter = new ParseTree(op); - filter->left(unionFilter); - filter->right(mainFilterForUnion); - unionPlan->filters(filter); - } - else - { - unionPlan->filters(mainFilterForUnion); - } - } - } - } - - // clean derivedTbFilterMap because all the filters are copied - for (mapIt = derivedTbFilterMap.begin(); mapIt != derivedTbFilterMap.end(); ++mapIt) - delete (*mapIt).second; - - // recursively process the nested derived table - for (uint i = 0; i < csep->subSelectList().size(); i++) - { - SCSEP subselect(boost::dynamic_pointer_cast(csep->subSelectList()[i])); - derivedTableOptimization(gwip, subselect); - } -} - -void setDerivedTable(execplan::ParseTree* n) -{ - ParseTree* lhs = n->left(); - ParseTree* rhs = n->right(); - - Operator* op = dynamic_cast(n->data()); - - // if logic operator then lhs and rhs can't be both null - if (op) - { - if (!lhs || lhs->derivedTable() == "*") - { - n->derivedTable(rhs ? rhs->derivedTable() : "*"); - } - else if (!rhs || rhs->derivedTable() == "*") - { - n->derivedTable(lhs->derivedTable()); - } - else if (lhs->derivedTable() == rhs->derivedTable()) - { - n->derivedTable(lhs->derivedTable()); - } - else - { - n->derivedTable(""); - } - } - else - { - n->data()->setDerivedTable(); - n->derivedTable(n->data()->derivedTable()); - } -} - -ParseTree* setDerivedFilter(gp_walk_info* gwip, ParseTree*& n, map& filterMap, - CalpontSelectExecutionPlan::SelectList& derivedTbList) -{ - if (!(n->derivedTable().empty())) - { - // @todo replace virtual column of n to real column - // all simple columns should belong to the same derived table - CalpontSelectExecutionPlan* csep = NULL; - - for (uint i = 0; i < derivedTbList.size(); i++) - { - CalpontSelectExecutionPlan* plan = dynamic_cast(derivedTbList[i].get()); - - if (plan->derivedTbAlias() == n->derivedTable()) - { - csep = plan; - break; - } - } - - // should never be null; if null then give up optimization. - if (!csep) - return n; - - // 2. push the filter to the derived table filter stack, or 'and' with - // the filters in the stack - map::iterator mapIter = filterMap.find(n->derivedTable()); - - if (mapIter == filterMap.end()) - { - filterMap.insert(pair(n->derivedTable(), n)); - } - else - { - ParseTree* pt = new ParseTree(new LogicOperator("and")); - pt->left(mapIter->second); - pt->right(n); - mapIter->second = pt; - } - - int64_t val = 1; - n = new ParseTree(new ConstantColumn(val)); - (dynamic_cast(n->data()))->timeZone(gwip->timeZone); - } - else - { - Operator* op = dynamic_cast(n->data()); - - if (op && (op->op() == OP_OR || op->op() == OP_XOR)) - { - return n; - } - else - { - ParseTree* lhs = n->left(); - ParseTree* rhs = n->right(); - - if (lhs) - n->left(setDerivedFilter(gwip, lhs, filterMap, derivedTbList)); - - if (rhs) - n->right(setDerivedFilter(gwip, rhs, filterMap, derivedTbList)); - } - } - - return n; } FromSubQuery::FromSubQuery(gp_walk_info& gwip) : SubQuery(gwip) diff --git a/dbcon/mysql/ha_mcs_execplan.cpp b/dbcon/mysql/ha_mcs_execplan.cpp index 7618dd180..893df3fd9 100644 --- a/dbcon/mysql/ha_mcs_execplan.cpp +++ b/dbcon/mysql/ha_mcs_execplan.cpp @@ -7474,12 +7474,11 @@ int cs_get_select_plan(ha_columnstore_select_handler* handler, THD* thd, SCSEP& cerr << "-------------- EXECUTION PLAN END --------------\n" << endl; } - // Derived table projection and filter optimization. + // Derived table projection list optimization. derivedTableOptimization(&gwi, csep); - if (get_unstable_optimizer(thd)) { - optimizer::RBOptimizerContext ctx(gwi); + optimizer::RBOptimizerContext ctx(gwi, *thd); bool csepWasOptimized = optimizer::optimizeCSEP(*csep, ctx); if (csep->traceOn() && csepWasOptimized) { diff --git a/dbcon/mysql/rbo_predicate_pushdown.cpp b/dbcon/mysql/rbo_predicate_pushdown.cpp new file mode 100644 index 000000000..83bd005b7 --- /dev/null +++ b/dbcon/mysql/rbo_predicate_pushdown.cpp @@ -0,0 +1,236 @@ +/* Copyright (C) 2025 MariaDB Corporation + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#include +#include + +#include "rulebased_optimizer.h" + +#include "constantcolumn.h" +#include "execplan/calpontselectexecutionplan.h" +#include "execplan/simplecolumn.h" +#include "logicoperator.h" +#include "operator.h" + +namespace optimizer +{ + +bool matchPredicatePushdown(execplan::CalpontSelectExecutionPlan& csep) +{ + // The original rule match contains questionable decision to filter out + // queries that contains any UNION UNIT with only derived tables. + // See ha_from_sub.cpp before MCS 23.10.7 for more details and @bug6156. + // All tables are derived thus nothing to optimize. + return !csep.tableList().empty(); +} + + +void setDerivedTable(execplan::ParseTree* n) +{ + execplan::ParseTree* lhs = n->left(); + execplan::ParseTree* rhs = n->right(); + + execplan::Operator* op = dynamic_cast(n->data()); + + // if logic operator then lhs and rhs can't be both null + if (op) + { + if (!lhs || lhs->derivedTable() == "*") + { + n->derivedTable(rhs ? rhs->derivedTable() : "*"); + } + else if (!rhs || rhs->derivedTable() == "*") + { + n->derivedTable(lhs->derivedTable()); + } + else if (lhs->derivedTable() == rhs->derivedTable()) + { + n->derivedTable(lhs->derivedTable()); + } + else + { + n->derivedTable(""); + } + } + else + { + n->data()->setDerivedTable(); + n->derivedTable(n->data()->derivedTable()); + } +} + +execplan::ParseTree* setDerivedFilter(cal_impl_if::gp_walk_info* gwip, execplan::ParseTree*& n, map& filterMap, + const execplan::CalpontSelectExecutionPlan::SelectList& derivedTbList) +{ + if (!(n->derivedTable().empty())) + { + // @todo replace virtual column of n to real column + // all simple columns should belong to the same derived table + execplan::CalpontSelectExecutionPlan* csep = NULL; + + for (uint i = 0; i < derivedTbList.size(); i++) + { + execplan::CalpontSelectExecutionPlan* plan = dynamic_cast(derivedTbList[i].get()); + + if (plan->derivedTbAlias() == n->derivedTable()) + { + csep = plan; + break; + } + } + + // should never be null; if null then give up optimization. + if (!csep) + return n; + + // 2. push the filter to the derived table filter stack, or 'and' with + // the filters in the stack + map::iterator mapIter = filterMap.find(n->derivedTable()); + + if (mapIter == filterMap.end()) + { + filterMap.insert(pair(n->derivedTable(), n)); + } + else + { + execplan::ParseTree* pt = new execplan::ParseTree(new execplan::LogicOperator("and")); + pt->left(mapIter->second); + pt->right(n); + mapIter->second = pt; + } + + int64_t val = 1; + n = new execplan::ParseTree(new execplan::ConstantColumn(val)); + (dynamic_cast(n->data()))->timeZone(gwip->timeZone); + } + else + { + execplan::Operator* op = dynamic_cast(n->data()); + + if (op && (op->op() == execplan::OP_OR || op->op() == execplan::OP_XOR)) + { + return n; + } + else + { + execplan::ParseTree* lhs = n->left(); + execplan::ParseTree* rhs = n->right(); + + if (lhs) + n->left(optimizer::setDerivedFilter(gwip, lhs, filterMap, derivedTbList)); + + if (rhs) + n->right(optimizer::setDerivedFilter(gwip, rhs, filterMap, derivedTbList)); + } + } + + return n; +} + +void applyPredicatePushdown(execplan::CalpontSelectExecutionPlan& csep, RBOptimizerContext& ctx) +{ + /* + * @bug5635. Move filters that only belongs to a derived table to inside the derived table. + * 1. parse tree walk to populate derivedTableFilterMap and set null candidate on the tree. + * 2. remove the null filters + * 3. and the filters of derivedTableFilterMap and append to the WHERE filter of the derived table + * + * Note: + * 1. Subquery filters is ignored because derived table can not be in subquery + * 2. While walking tree, whenever a single derive simplefilter is encountered, + * this filter is pushed to the corresponding stack + * 2. Whenever an OR operator is encountered, all the filter stack of + * that OR involving derived table are emptied and null candidate of each + * stacked filter needs to be reset (not null) + */ + execplan::ParseTree* pt = csep.filters(); + map derivedTbFilterMap; + auto& derivedTbList = csep.derivedTableList(); + + if (pt) + { + pt->walk(setDerivedTable); + setDerivedFilter(&ctx.gwi, pt, derivedTbFilterMap, derivedTbList); + csep.filters(pt); + } + + // AND the filters of individual stack to the derived table filter tree + // @todo union filters. + // @todo outer join complication + map::iterator mapIt; + + for (uint i = 0; i < derivedTbList.size(); i++) + { + execplan::CalpontSelectExecutionPlan* plan = dynamic_cast(derivedTbList[i].get()); + execplan::CalpontSelectExecutionPlan::ReturnedColumnList derivedColList = plan->returnedCols(); + mapIt = derivedTbFilterMap.find(plan->derivedTbAlias()); + + if (mapIt != derivedTbFilterMap.end()) + { + // replace all derived column of this filter with real column from + // derived table projection list. + execplan::ParseTree* mainFilter = new execplan::ParseTree(); + mainFilter->copyTree(*(mapIt->second)); + replaceRefCol(mainFilter, derivedColList); + execplan::ParseTree* derivedFilter = plan->filters(); + + if (derivedFilter) + { + execplan::LogicOperator* op = new execplan::LogicOperator("and"); + execplan::ParseTree* filter = new execplan::ParseTree(op); + filter->left(derivedFilter); + filter->right(mainFilter); + plan->filters(filter); + } + else + { + plan->filters(mainFilter); + } + + // union filter handling + for (uint j = 0; j < plan->unionVec().size(); j++) + { + execplan::CalpontSelectExecutionPlan* unionPlan = + reinterpret_cast(plan->unionVec()[j].get()); + execplan::CalpontSelectExecutionPlan::ReturnedColumnList unionColList = unionPlan->returnedCols(); + execplan::ParseTree* mainFilterForUnion = new execplan::ParseTree(); + mainFilterForUnion->copyTree(*(mapIt->second)); + replaceRefCol(mainFilterForUnion, unionColList); + execplan::ParseTree* unionFilter = unionPlan->filters(); + + if (unionFilter) + { + execplan::LogicOperator* op = new execplan::LogicOperator("and"); + execplan::ParseTree* filter = new execplan::ParseTree(op); + filter->left(unionFilter); + filter->right(mainFilterForUnion); + unionPlan->filters(filter); + } + else + { + unionPlan->filters(mainFilterForUnion); + } + } + } + } + + // clean derivedTbFilterMap because all the filters are copied + for (mapIt = derivedTbFilterMap.begin(); mapIt != derivedTbFilterMap.end(); ++mapIt) + delete (*mapIt).second; +} + +} // namespace optimizer diff --git a/dbcon/mysql/rbo_predicate_pushdown.h b/dbcon/mysql/rbo_predicate_pushdown.h new file mode 100644 index 000000000..31967f0be --- /dev/null +++ b/dbcon/mysql/rbo_predicate_pushdown.h @@ -0,0 +1,30 @@ +/* Copyright (C) 2025 MariaDB Corporation + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#pragma once + +#define PREFER_MY_CONFIG_H +#include +#include "idb_mysql.h" + +#include "execplan/calpontselectexecutionplan.h" +#include "rulebased_optimizer.h" + +namespace optimizer { + bool matchPredicatePushdown(execplan::CalpontSelectExecutionPlan& csep); + void applyPredicatePushdown(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimizerContext& ctx); +} \ No newline at end of file diff --git a/dbcon/mysql/rulebased_optimizer.cpp b/dbcon/mysql/rulebased_optimizer.cpp index f01f79321..a572d4ce5 100644 --- a/dbcon/mysql/rulebased_optimizer.cpp +++ b/dbcon/mysql/rulebased_optimizer.cpp @@ -50,9 +50,17 @@ bool optimizeCSEPWithRules(execplan::CalpontSelectExecutionPlan& root, const std // high level API call for optimizer bool optimizeCSEP(execplan::CalpontSelectExecutionPlan& root, optimizer::RBOptimizerContext& ctx) { - optimizer::Rule parallelCES{"parallelCES", optimizer::matchParallelCES, optimizer::applyParallelCES}; + std::vector rules; - std::vector rules = {parallelCES}; + if (get_unstable_optimizer(&ctx.thd)) + { + optimizer::Rule parallelCES{"parallelCES", optimizer::matchParallelCES, optimizer::applyParallelCES}; + rules.push_back(parallelCES); + } + + optimizer::Rule predicatePushdown{"predicatePushdown", optimizer::matchParallelCES, + optimizer::applyParallelCES}; + rules.push_back(predicatePushdown); return optimizeCSEPWithRules(root, rules, ctx); } @@ -85,6 +93,7 @@ bool Rule::walk(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimiz execplan::CalpontSelectExecutionPlan* current = planStack.top(); planStack.pop(); + // Walk nested derived for (auto& table : current->derivedTableList()) { auto* csepPtr = dynamic_cast(table.get()); @@ -94,6 +103,7 @@ bool Rule::walk(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimiz } } + // Walk nested UNION UNITS for (auto& unionUnit : current->unionVec()) { auto* unionUnitPtr = dynamic_cast(unionUnit.get()); @@ -103,6 +113,18 @@ bool Rule::walk(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimiz } } + // Walk nested subselect in filters, e.g. SEMI-JOIN + for (auto& subselect : csep.subSelectList()) + { + auto* subselectPtr = dynamic_cast(subselect.get()); + if (subselectPtr) + { + planStack.push(subselectPtr); + } + } + + // TODO add walking nested subselect in projection. See CSEP::fSelectSubList + if (matchRule(*current)) { applyRule(*current, ctx); diff --git a/dbcon/mysql/rulebased_optimizer.h b/dbcon/mysql/rulebased_optimizer.h index c047a406e..515d420c9 100644 --- a/dbcon/mysql/rulebased_optimizer.h +++ b/dbcon/mysql/rulebased_optimizer.h @@ -32,10 +32,11 @@ namespace optimizer { class RBOptimizerContext { public: RBOptimizerContext() = delete; - RBOptimizerContext(cal_impl_if::gp_walk_info& walk_info) : gwi(walk_info) {} + RBOptimizerContext(cal_impl_if::gp_walk_info& walk_info, THD& thd) : gwi(walk_info), thd(thd) {} // gwi lifetime should be longer than optimizer context. // In plugin runtime this is always true. cal_impl_if::gp_walk_info& gwi; + THD& thd; uint64_t uniqueId {0}; };