1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-10-31 18:30:33 +03:00

fix(rbo,rules): MCOL-6131 predicate pushdown rule for RBO.

This commit is contained in:
drrtuy
2025-08-07 17:26:13 +00:00
committed by Leonid Fedorov
parent 1341d282ad
commit 67ac7f2f75
6 changed files with 294 additions and 218 deletions

View File

@@ -51,9 +51,6 @@ void derivedTableOptimization(gp_walk_info* gwip, SCSEP& csep)
// derived tables are not checked for optimization in this scope.
CalpontSelectExecutionPlan::SelectList derivedTbList = csep->derivedTableList();
// @bug6156. Skip horizontal optimization for no table union.
bool horizontalOptimization = true;
for (uint i = 0; i < derivedTbList.size(); i++)
{
CalpontSelectExecutionPlan* plan = reinterpret_cast<CalpontSelectExecutionPlan*>(derivedTbList[i].get());
@@ -76,18 +73,6 @@ void derivedTableOptimization(gp_walk_info* gwip, SCSEP& csep)
}
}
if (plan->tableList().empty())
horizontalOptimization = false;
for (uint j = 0; j < plan->unionVec().size(); j++)
{
if (reinterpret_cast<CalpontSelectExecutionPlan*>(plan->unionVec()[j].get())->tableList().empty())
{
horizontalOptimization = false;
break;
}
}
if (verticalOptimization)
{
int64_t val = 1;
@@ -203,203 +188,6 @@ void derivedTableOptimization(gp_walk_info* gwip, SCSEP& csep)
}
}
}
/*
* @bug5635. Move filters that only belongs to a derived table to inside the derived table.
* 1. parse tree walk to populate derivedTableFilterMap and set null candidate on the tree.
* 2. remove the null filters
* 3. and the filters of derivedTableFilterMap and append to the WHERE filter of the derived table
*
* Note:
* 1. Subquery filters is ignored because derived table can not be in subquery
* 2. While walking tree, whenever a single derive simplefilter is encountered,
* this filter is pushed to the corresponding stack
* 2. Whenever an OR operator is encountered, all the filter stack of
* that OR involving derived table are emptied and null candidate of each
* stacked filter needs to be reset (not null)
*/
ParseTree* pt = csep->filters();
map<string, ParseTree*> derivedTbFilterMap;
if (horizontalOptimization && pt)
{
pt->walk(setDerivedTable);
setDerivedFilter(gwip, pt, derivedTbFilterMap, derivedTbList);
csep->filters(pt);
}
// AND the filters of individual stack to the derived table filter tree
// @todo union filters.
// @todo outer join complication
map<string, ParseTree*>::iterator mapIt;
for (uint i = 0; i < derivedTbList.size(); i++)
{
CalpontSelectExecutionPlan* plan = reinterpret_cast<CalpontSelectExecutionPlan*>(derivedTbList[i].get());
CalpontSelectExecutionPlan::ReturnedColumnList derivedColList = plan->returnedCols();
mapIt = derivedTbFilterMap.find(plan->derivedTbAlias());
if (mapIt != derivedTbFilterMap.end())
{
// replace all derived column of this filter with real column from
// derived table projection list.
ParseTree* mainFilter = new ParseTree();
mainFilter->copyTree(*(mapIt->second));
replaceRefCol(mainFilter, derivedColList);
ParseTree* derivedFilter = plan->filters();
if (derivedFilter)
{
LogicOperator* op = new LogicOperator("and");
ParseTree* filter = new ParseTree(op);
filter->left(derivedFilter);
filter->right(mainFilter);
plan->filters(filter);
}
else
{
plan->filters(mainFilter);
}
// union filter handling
for (uint j = 0; j < plan->unionVec().size(); j++)
{
CalpontSelectExecutionPlan* unionPlan =
reinterpret_cast<CalpontSelectExecutionPlan*>(plan->unionVec()[j].get());
CalpontSelectExecutionPlan::ReturnedColumnList unionColList = unionPlan->returnedCols();
ParseTree* mainFilterForUnion = new ParseTree();
mainFilterForUnion->copyTree(*(mapIt->second));
replaceRefCol(mainFilterForUnion, unionColList);
ParseTree* unionFilter = unionPlan->filters();
if (unionFilter)
{
LogicOperator* op = new LogicOperator("and");
ParseTree* filter = new ParseTree(op);
filter->left(unionFilter);
filter->right(mainFilterForUnion);
unionPlan->filters(filter);
}
else
{
unionPlan->filters(mainFilterForUnion);
}
}
}
}
// clean derivedTbFilterMap because all the filters are copied
for (mapIt = derivedTbFilterMap.begin(); mapIt != derivedTbFilterMap.end(); ++mapIt)
delete (*mapIt).second;
// recursively process the nested derived table
for (uint i = 0; i < csep->subSelectList().size(); i++)
{
SCSEP subselect(boost::dynamic_pointer_cast<CalpontSelectExecutionPlan>(csep->subSelectList()[i]));
derivedTableOptimization(gwip, subselect);
}
}
void setDerivedTable(execplan::ParseTree* n)
{
ParseTree* lhs = n->left();
ParseTree* rhs = n->right();
Operator* op = dynamic_cast<Operator*>(n->data());
// if logic operator then lhs and rhs can't be both null
if (op)
{
if (!lhs || lhs->derivedTable() == "*")
{
n->derivedTable(rhs ? rhs->derivedTable() : "*");
}
else if (!rhs || rhs->derivedTable() == "*")
{
n->derivedTable(lhs->derivedTable());
}
else if (lhs->derivedTable() == rhs->derivedTable())
{
n->derivedTable(lhs->derivedTable());
}
else
{
n->derivedTable("");
}
}
else
{
n->data()->setDerivedTable();
n->derivedTable(n->data()->derivedTable());
}
}
ParseTree* setDerivedFilter(gp_walk_info* gwip, ParseTree*& n, map<string, ParseTree*>& filterMap,
CalpontSelectExecutionPlan::SelectList& derivedTbList)
{
if (!(n->derivedTable().empty()))
{
// @todo replace virtual column of n to real column
// all simple columns should belong to the same derived table
CalpontSelectExecutionPlan* csep = NULL;
for (uint i = 0; i < derivedTbList.size(); i++)
{
CalpontSelectExecutionPlan* plan = dynamic_cast<CalpontSelectExecutionPlan*>(derivedTbList[i].get());
if (plan->derivedTbAlias() == n->derivedTable())
{
csep = plan;
break;
}
}
// should never be null; if null then give up optimization.
if (!csep)
return n;
// 2. push the filter to the derived table filter stack, or 'and' with
// the filters in the stack
map<string, ParseTree*>::iterator mapIter = filterMap.find(n->derivedTable());
if (mapIter == filterMap.end())
{
filterMap.insert(pair<string, ParseTree*>(n->derivedTable(), n));
}
else
{
ParseTree* pt = new ParseTree(new LogicOperator("and"));
pt->left(mapIter->second);
pt->right(n);
mapIter->second = pt;
}
int64_t val = 1;
n = new ParseTree(new ConstantColumn(val));
(dynamic_cast<ConstantColumn*>(n->data()))->timeZone(gwip->timeZone);
}
else
{
Operator* op = dynamic_cast<Operator*>(n->data());
if (op && (op->op() == OP_OR || op->op() == OP_XOR))
{
return n;
}
else
{
ParseTree* lhs = n->left();
ParseTree* rhs = n->right();
if (lhs)
n->left(setDerivedFilter(gwip, lhs, filterMap, derivedTbList));
if (rhs)
n->right(setDerivedFilter(gwip, rhs, filterMap, derivedTbList));
}
}
return n;
}
FromSubQuery::FromSubQuery(gp_walk_info& gwip) : SubQuery(gwip)

View File

@@ -7474,12 +7474,11 @@ int cs_get_select_plan(ha_columnstore_select_handler* handler, THD* thd, SCSEP&
cerr << "-------------- EXECUTION PLAN END --------------\n" << endl;
}
// Derived table projection and filter optimization.
// Derived table projection list optimization.
derivedTableOptimization(&gwi, csep);
if (get_unstable_optimizer(thd))
{
optimizer::RBOptimizerContext ctx(gwi);
optimizer::RBOptimizerContext ctx(gwi, *thd);
bool csepWasOptimized = optimizer::optimizeCSEP(*csep, ctx);
if (csep->traceOn() && csepWasOptimized)
{

View File

@@ -0,0 +1,236 @@
/* Copyright (C) 2025 MariaDB Corporation
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
#include <cstddef>
#include <cstdint>
#include "rulebased_optimizer.h"
#include "constantcolumn.h"
#include "execplan/calpontselectexecutionplan.h"
#include "execplan/simplecolumn.h"
#include "logicoperator.h"
#include "operator.h"
namespace optimizer
{
bool matchPredicatePushdown(execplan::CalpontSelectExecutionPlan& csep)
{
// The original rule match contains questionable decision to filter out
// queries that contains any UNION UNIT with only derived tables.
// See ha_from_sub.cpp before MCS 23.10.7 for more details and @bug6156.
// All tables are derived thus nothing to optimize.
return !csep.tableList().empty();
}
void setDerivedTable(execplan::ParseTree* n)
{
execplan::ParseTree* lhs = n->left();
execplan::ParseTree* rhs = n->right();
execplan::Operator* op = dynamic_cast<execplan::Operator*>(n->data());
// if logic operator then lhs and rhs can't be both null
if (op)
{
if (!lhs || lhs->derivedTable() == "*")
{
n->derivedTable(rhs ? rhs->derivedTable() : "*");
}
else if (!rhs || rhs->derivedTable() == "*")
{
n->derivedTable(lhs->derivedTable());
}
else if (lhs->derivedTable() == rhs->derivedTable())
{
n->derivedTable(lhs->derivedTable());
}
else
{
n->derivedTable("");
}
}
else
{
n->data()->setDerivedTable();
n->derivedTable(n->data()->derivedTable());
}
}
execplan::ParseTree* setDerivedFilter(cal_impl_if::gp_walk_info* gwip, execplan::ParseTree*& n, map<string, execplan::ParseTree*>& filterMap,
const execplan::CalpontSelectExecutionPlan::SelectList& derivedTbList)
{
if (!(n->derivedTable().empty()))
{
// @todo replace virtual column of n to real column
// all simple columns should belong to the same derived table
execplan::CalpontSelectExecutionPlan* csep = NULL;
for (uint i = 0; i < derivedTbList.size(); i++)
{
execplan::CalpontSelectExecutionPlan* plan = dynamic_cast<execplan::CalpontSelectExecutionPlan*>(derivedTbList[i].get());
if (plan->derivedTbAlias() == n->derivedTable())
{
csep = plan;
break;
}
}
// should never be null; if null then give up optimization.
if (!csep)
return n;
// 2. push the filter to the derived table filter stack, or 'and' with
// the filters in the stack
map<string, execplan::ParseTree*>::iterator mapIter = filterMap.find(n->derivedTable());
if (mapIter == filterMap.end())
{
filterMap.insert(pair<string, execplan::ParseTree*>(n->derivedTable(), n));
}
else
{
execplan::ParseTree* pt = new execplan::ParseTree(new execplan::LogicOperator("and"));
pt->left(mapIter->second);
pt->right(n);
mapIter->second = pt;
}
int64_t val = 1;
n = new execplan::ParseTree(new execplan::ConstantColumn(val));
(dynamic_cast<execplan::ConstantColumn*>(n->data()))->timeZone(gwip->timeZone);
}
else
{
execplan::Operator* op = dynamic_cast<execplan::Operator*>(n->data());
if (op && (op->op() == execplan::OP_OR || op->op() == execplan::OP_XOR))
{
return n;
}
else
{
execplan::ParseTree* lhs = n->left();
execplan::ParseTree* rhs = n->right();
if (lhs)
n->left(optimizer::setDerivedFilter(gwip, lhs, filterMap, derivedTbList));
if (rhs)
n->right(optimizer::setDerivedFilter(gwip, rhs, filterMap, derivedTbList));
}
}
return n;
}
void applyPredicatePushdown(execplan::CalpontSelectExecutionPlan& csep, RBOptimizerContext& ctx)
{
/*
* @bug5635. Move filters that only belongs to a derived table to inside the derived table.
* 1. parse tree walk to populate derivedTableFilterMap and set null candidate on the tree.
* 2. remove the null filters
* 3. and the filters of derivedTableFilterMap and append to the WHERE filter of the derived table
*
* Note:
* 1. Subquery filters is ignored because derived table can not be in subquery
* 2. While walking tree, whenever a single derive simplefilter is encountered,
* this filter is pushed to the corresponding stack
* 2. Whenever an OR operator is encountered, all the filter stack of
* that OR involving derived table are emptied and null candidate of each
* stacked filter needs to be reset (not null)
*/
execplan::ParseTree* pt = csep.filters();
map<string, execplan::ParseTree*> derivedTbFilterMap;
auto& derivedTbList = csep.derivedTableList();
if (pt)
{
pt->walk(setDerivedTable);
setDerivedFilter(&ctx.gwi, pt, derivedTbFilterMap, derivedTbList);
csep.filters(pt);
}
// AND the filters of individual stack to the derived table filter tree
// @todo union filters.
// @todo outer join complication
map<string, execplan::ParseTree*>::iterator mapIt;
for (uint i = 0; i < derivedTbList.size(); i++)
{
execplan::CalpontSelectExecutionPlan* plan = dynamic_cast<execplan::CalpontSelectExecutionPlan*>(derivedTbList[i].get());
execplan::CalpontSelectExecutionPlan::ReturnedColumnList derivedColList = plan->returnedCols();
mapIt = derivedTbFilterMap.find(plan->derivedTbAlias());
if (mapIt != derivedTbFilterMap.end())
{
// replace all derived column of this filter with real column from
// derived table projection list.
execplan::ParseTree* mainFilter = new execplan::ParseTree();
mainFilter->copyTree(*(mapIt->second));
replaceRefCol(mainFilter, derivedColList);
execplan::ParseTree* derivedFilter = plan->filters();
if (derivedFilter)
{
execplan::LogicOperator* op = new execplan::LogicOperator("and");
execplan::ParseTree* filter = new execplan::ParseTree(op);
filter->left(derivedFilter);
filter->right(mainFilter);
plan->filters(filter);
}
else
{
plan->filters(mainFilter);
}
// union filter handling
for (uint j = 0; j < plan->unionVec().size(); j++)
{
execplan::CalpontSelectExecutionPlan* unionPlan =
reinterpret_cast<execplan::CalpontSelectExecutionPlan*>(plan->unionVec()[j].get());
execplan::CalpontSelectExecutionPlan::ReturnedColumnList unionColList = unionPlan->returnedCols();
execplan::ParseTree* mainFilterForUnion = new execplan::ParseTree();
mainFilterForUnion->copyTree(*(mapIt->second));
replaceRefCol(mainFilterForUnion, unionColList);
execplan::ParseTree* unionFilter = unionPlan->filters();
if (unionFilter)
{
execplan::LogicOperator* op = new execplan::LogicOperator("and");
execplan::ParseTree* filter = new execplan::ParseTree(op);
filter->left(unionFilter);
filter->right(mainFilterForUnion);
unionPlan->filters(filter);
}
else
{
unionPlan->filters(mainFilterForUnion);
}
}
}
}
// clean derivedTbFilterMap because all the filters are copied
for (mapIt = derivedTbFilterMap.begin(); mapIt != derivedTbFilterMap.end(); ++mapIt)
delete (*mapIt).second;
}
} // namespace optimizer

View File

@@ -0,0 +1,30 @@
/* Copyright (C) 2025 MariaDB Corporation
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
#pragma once
#define PREFER_MY_CONFIG_H
#include <my_config.h>
#include "idb_mysql.h"
#include "execplan/calpontselectexecutionplan.h"
#include "rulebased_optimizer.h"
namespace optimizer {
bool matchPredicatePushdown(execplan::CalpontSelectExecutionPlan& csep);
void applyPredicatePushdown(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimizerContext& ctx);
}

View File

@@ -50,9 +50,17 @@ bool optimizeCSEPWithRules(execplan::CalpontSelectExecutionPlan& root, const std
// high level API call for optimizer
bool optimizeCSEP(execplan::CalpontSelectExecutionPlan& root, optimizer::RBOptimizerContext& ctx)
{
optimizer::Rule parallelCES{"parallelCES", optimizer::matchParallelCES, optimizer::applyParallelCES};
std::vector<optimizer::Rule> rules;
std::vector<optimizer::Rule> rules = {parallelCES};
if (get_unstable_optimizer(&ctx.thd))
{
optimizer::Rule parallelCES{"parallelCES", optimizer::matchParallelCES, optimizer::applyParallelCES};
rules.push_back(parallelCES);
}
optimizer::Rule predicatePushdown{"predicatePushdown", optimizer::matchParallelCES,
optimizer::applyParallelCES};
rules.push_back(predicatePushdown);
return optimizeCSEPWithRules(root, rules, ctx);
}
@@ -85,6 +93,7 @@ bool Rule::walk(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimiz
execplan::CalpontSelectExecutionPlan* current = planStack.top();
planStack.pop();
// Walk nested derived
for (auto& table : current->derivedTableList())
{
auto* csepPtr = dynamic_cast<execplan::CalpontSelectExecutionPlan*>(table.get());
@@ -94,6 +103,7 @@ bool Rule::walk(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimiz
}
}
// Walk nested UNION UNITS
for (auto& unionUnit : current->unionVec())
{
auto* unionUnitPtr = dynamic_cast<execplan::CalpontSelectExecutionPlan*>(unionUnit.get());
@@ -103,6 +113,18 @@ bool Rule::walk(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimiz
}
}
// Walk nested subselect in filters, e.g. SEMI-JOIN
for (auto& subselect : csep.subSelectList())
{
auto* subselectPtr = dynamic_cast<execplan::CalpontSelectExecutionPlan*>(subselect.get());
if (subselectPtr)
{
planStack.push(subselectPtr);
}
}
// TODO add walking nested subselect in projection. See CSEP::fSelectSubList
if (matchRule(*current))
{
applyRule(*current, ctx);

View File

@@ -32,10 +32,11 @@ namespace optimizer {
class RBOptimizerContext {
public:
RBOptimizerContext() = delete;
RBOptimizerContext(cal_impl_if::gp_walk_info& walk_info) : gwi(walk_info) {}
RBOptimizerContext(cal_impl_if::gp_walk_info& walk_info, THD& thd) : gwi(walk_info), thd(thd) {}
// gwi lifetime should be longer than optimizer context.
// In plugin runtime this is always true.
cal_impl_if::gp_walk_info& gwi;
THD& thd;
uint64_t uniqueId {0};
};