1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-08-01 06:46:55 +03:00

Merge branch 'stable-23.10' into MCOL-4240

This commit is contained in:
Leonid Fedorov
2025-07-30 19:05:41 +04:00
committed by GitHub
176 changed files with 3877 additions and 7205 deletions

View File

@ -43,6 +43,7 @@ set(libcalmysql_SRCS
is_columnstore_extents.cpp
columnstore_dataload.cpp
rulebased_optimizer.cpp
rbo_apply_parallel_ces.cpp
)
set_source_files_properties(ha_mcs.cpp PROPERTIES COMPILE_FLAGS "-fno-implicit-templates")
@ -63,7 +64,7 @@ if(COMMAND mysql_add_plugin)
${PLUGIN_EXEC_LIBS}
${PLUGIN_WRITE_LIBS}
joblist_server
${NETSNMP_LIBRARIES}
statistics_manager
${MARIADB_CLIENT_LIBS}
${S3API_DEPS}
threadpool
@ -89,10 +90,10 @@ else()
${S3API_DEPS}
${ENGINE_LDFLAGS}
${ENGINE_WRITE_LIBS}
${NETSNMP_LIBRARIES}
${SERVER_BUILD_DIR}/libservices/libmysqlservices.a
threadpool
loggingcpp
statistics_manager
marias3
)
# define this dummy target for standalone builds (ie, when mysql_add_plugin doesn't exist)

View File

@ -444,6 +444,9 @@ SCSEP FromSubQuery::transform()
return csep;
}
// Insert column statistics
fGwip.mergeTableStatistics(gwi.tableStatisticsMap);
fGwip.subselectList.push_back(csep);
return csep;
}

View File

@ -1828,7 +1828,7 @@ static int columnstore_init_func(void* p)
fprintf(stderr, "Columnstore: Started; Version: %s-%s\n", columnstore_version.c_str(),
columnstore_release.c_str());
plugin_ref plugin_innodb;
plugin_ref plugin_innodb = nullptr;
LEX_CSTRING name = {STRING_WITH_LEN("INNODB")};
if (get_innodb_queries_uses_mcs())
@ -1841,7 +1841,7 @@ static int columnstore_init_func(void* p)
DBUG_RETURN(HA_ERR_RETRY_INIT);
}
}
strncpy(cs_version, columnstore_version.c_str(), sizeof(cs_version) - 1);
cs_version[sizeof(cs_version) - 1] = 0;
@ -1857,7 +1857,7 @@ static int columnstore_init_func(void* p)
(my_hash_get_key)mcs_get_key, 0, 0);
std::cerr << "Columnstore: init mcs_hton attributes" << std::endl;
mcs_hton->create = ha_mcs_cache_create_handler;
mcs_hton->panic = 0;
mcs_hton->flags = HTON_CAN_RECREATE | HTON_NO_PARTITION;
@ -1873,13 +1873,15 @@ static int columnstore_init_func(void* p)
if (get_innodb_queries_uses_mcs())
{
std::cerr << "Columnstore: innodb_queries_uses_mcs is set, redirecting all InnoDB queries to Columnstore." << std::endl;
std::cerr << "Columnstore: innodb_queries_uses_mcs is set, redirecting all InnoDB queries to Columnstore."
<< std::endl;
auto* innodb_hton = plugin_hton(plugin_innodb);
int error = innodb_hton == nullptr; // Engine must exists!
if (error)
{
std::cerr << "Columnstore: innodb_queries_uses_mcs is set, but could not find InnoDB plugin." << std::endl;
std::cerr << "Columnstore: innodb_queries_uses_mcs is set, but could not find InnoDB plugin."
<< std::endl;
my_error(HA_ERR_INITIALIZATION, MYF(0), "Could not find storage engine %s", name.str);
}
innodb_hton->create_select = create_columnstore_select_handler;

View File

@ -871,12 +871,14 @@ int ProcessDDLStatement(string& ddlStatement, string& schema, const string& /*ta
return rc;
}
#if MYSQL_VERSION_ID < 110400
// For TIMESTAMP, if no constraint is given, default to NOT NULL
if (createTable->fTableDef->fColumns[i]->fType->fType == ddlpackage::DDL_TIMESTAMP &&
createTable->fTableDef->fColumns[i]->fConstraints.empty())
{
createTable->fTableDef->fColumns[i]->fConstraints.push_back(new ColumnConstraintDef(DDL_NOT_NULL));
}
#endif
if (createTable->fTableDef->fColumns[i]->fDefaultValue)
{

View File

@ -49,6 +49,7 @@ using namespace logging;
#define PREFER_MY_CONFIG_H
#include <my_config.h>
#include "idb_mysql.h"
#include "partition_element.h"
#include "partition_info.h"
@ -6287,6 +6288,39 @@ int processLimitAndOffset(SELECT_LEX& select_lex, gp_walk_info& gwi, SCSEP& csep
return 0;
}
// Loop over available indexes to find and extract corresponding EI column statistics
// for the first column of the index if any.
// Statistics is stored in GWI context.
// Mock for ES 10.6
#if MYSQL_VERSION_ID >= 120401
void extractColumnStatistics(Item_field* ifp, gp_walk_info& gwi)
{
for (uint j = 0; j < ifp->field->table->s->keys; j++)
{
for (uint i = 0; i < ifp->field->table->s->key_info[j].usable_key_parts; i++)
{
if (ifp->field->table->s->key_info[j].key_part[i].fieldnr == ifp->field->field_index + 1)
{
if (i == 0 && ifp->field->read_stats)
{
assert(ifp->field->table->s);
auto* histogram = dynamic_cast<Histogram_json_hb*>(ifp->field->read_stats->histogram);
if (histogram)
{
SchemaAndTableName tableName = {ifp->field->table->s->db.str, ifp->field->table->s->table_name.str};
gwi.tableStatisticsMap[tableName][ifp->field->field_name.str] = *histogram;
}
}
}
}
}
}
#else
void extractColumnStatistics(Item_field* /*ifp*/, gp_walk_info& /*gwi*/)
{
}
#endif
/*@brief Process SELECT part of a query or sub-query */
/***********************************************************
* DESCRIPTION:
@ -6376,21 +6410,20 @@ int processSelect(SELECT_LEX& select_lex, gp_walk_info& gwi, SCSEP& csep, vector
case Item::FIELD_ITEM:
{
Item_field* ifp = (Item_field*)item;
SimpleColumn* sc = NULL;
extractColumnStatistics(ifp, gwi);
// Handle * case
if (ifp->field_name.length && string(ifp->field_name.str) == "*")
{
collectAllCols(gwi, ifp);
break;
}
sc = buildSimpleColumn(ifp, gwi);
SimpleColumn* sc = buildSimpleColumn(ifp, gwi);
if (sc)
{
string fullname;
String str;
ifp->print(&str, QT_ORDINARY);
fullname = str.c_ptr();
string fullname(str.c_ptr());
if (!ifp->is_explicit_name()) // no alias
{
@ -7413,7 +7446,6 @@ int cs_get_derived_plan(ha_columnstore_derived_handler* handler, THD* /*thd*/, S
return 0;
}
int cs_get_select_plan(ha_columnstore_select_handler* handler, THD* thd, SCSEP& csep, gp_walk_info& gwi,
bool isSelectLexUnit)
{
@ -7442,13 +7474,14 @@ int cs_get_select_plan(ha_columnstore_select_handler* handler, THD* thd, SCSEP&
cerr << *csep << endl;
cerr << "-------------- EXECUTION PLAN END --------------\n" << endl;
}
// Derived table projection and filter optimization.
derivedTableOptimization(&gwi, csep);
if (get_unstable_optimizer(thd))
{
bool csepWasOptimized = optimizer::optimizeCSEP(*csep);
optimizer::RBOptimizerContext ctx(gwi);
bool csepWasOptimized = optimizer::optimizeCSEP(*csep, ctx);
if (csep->traceOn() && csepWasOptimized)
{
cerr << "---------------- cs_get_select_plan optimized EXECUTION PLAN ----------------" << endl;

View File

@ -128,13 +128,44 @@ using namespace funcexp;
#include "ha_mcs_sysvars.h"
#include "ha_mcs_datatype.h"
#include "statistics.h"
#include "ha_mcs_logging.h"
#include "ha_subquery.h"
#include "statistics_manager/statistics.h"
namespace cal_impl_if
{
extern bool nonConstFunc(Item_func* ifp);
void gp_walk_info::mergeTableStatistics(const TableStatisticsMap& aTableStatisticsMap)
{
for (auto& [schemaAndTableName, aColumnStatisticsMap]: aTableStatisticsMap)
{
auto tableStatisticsMapIt = tableStatisticsMap.find(schemaAndTableName);
if (tableStatisticsMapIt == tableStatisticsMap.end())
{
tableStatisticsMap[schemaAndTableName] = aColumnStatisticsMap;
}
else
{
for (auto& [columnName, histogram]: aColumnStatisticsMap)
{
tableStatisticsMapIt->second[columnName] = histogram;
}
}
}
}
std::optional<ColumnStatisticsMap> gp_walk_info::findStatisticsForATable(SchemaAndTableName& schemaAndTableName)
{
auto tableStatisticsMapIt = tableStatisticsMap.find(schemaAndTableName);
if (tableStatisticsMapIt == tableStatisticsMap.end())
{
return std::nullopt;
}
return {tableStatisticsMapIt->second};
}
}
namespace

View File

@ -95,13 +95,36 @@ enum ClauseType
ORDER_BY
};
struct SchemaAndTableName {
std::string schema;
std::string table;
bool operator==(const SchemaAndTableName& other) const {
return schema == other.schema && table == other.table;
}
};
struct SchemaAndTableNameHash {
std::size_t operator()(const SchemaAndTableName& k) const {
return std::hash<std::string>()(k.schema + k.table);
}
};
typedef std::vector<JoinInfo> JoinInfoVec;
typedef dmlpackage::ColValuesList ColValuesList;
typedef dmlpackage::TableValuesMap TableValuesMap;
typedef std::map<execplan::CalpontSystemCatalog::TableAliasName, std::pair<int, TABLE_LIST*>> TableMap;
typedef std::tr1::unordered_map<TABLE_LIST*, std::vector<COND*>> TableOnExprList;
typedef std::tr1::unordered_map<TABLE_LIST*, uint> TableOuterJoinMap;
using ColumnName = std::string;
using ColumnStatisticsMap = std::unordered_map<ColumnName, Histogram_json_hb>;
using TableStatisticsMap = std::unordered_map<SchemaAndTableName, ColumnStatisticsMap, SchemaAndTableNameHash>;
// This structure is used to store MDB AST -> CSEP translation context.
// There is a column statistics for some columns in a query.
// As per 23.10.5 "some" means first column of the index in projection list of CSEP
// satisfies the condition of applyParallelCSEP RBO rule.
// Note that statistics must be merged from subquery/derived table
// to the statistics of the outer query.
struct gp_walk_info
{
execplan::CalpontSelectExecutionPlan::ReturnedColumnList returnedCols;
@ -110,6 +133,7 @@ struct gp_walk_info
execplan::CalpontSelectExecutionPlan::ReturnedColumnList orderByCols;
std::vector<Item*> extSelAggColsItems;
execplan::CalpontSelectExecutionPlan::ColumnMap columnMap;
TableStatisticsMap tableStatisticsMap;
// This vector temporarily hold the projection columns to be added
// to the returnedCols vector for subquery processing. It will be appended
// to the end of returnedCols when the processing is finished.
@ -200,7 +224,8 @@ struct gp_walk_info
SubQuery** subQueriesChain;
gp_walk_info(long timeZone_, SubQuery** subQueriesChain_)
: sessionid(0)
: tableStatisticsMap({})
, sessionid(0)
, fatalParseError(false)
, condPush(false)
, dropCond(false)
@ -230,6 +255,9 @@ struct gp_walk_info
{
}
~gp_walk_info();
void mergeTableStatistics(const TableStatisticsMap& tableStatisticsMap);
std::optional<ColumnStatisticsMap> findStatisticsForATable(SchemaAndTableName& schemaAndTableName);
};
struct SubQueryChainHolder;

View File

@ -96,6 +96,10 @@ SCSEP SelectSubQuery::transform()
return csep;
}
// Insert column statistics
fGwip.mergeTableStatistics(gwi.tableStatisticsMap);
// Insert subselect CSEP
fGwip.subselectList.push_back(csep);
// remove outer query tables

View File

@ -14,10 +14,17 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
// One include file to deal with all the MySQL pollution of the
// global namespace
//
// Don't include ANY mysql headers anywhere except here!
/* One include file to deal with all the MySQL pollution of the
global namespace
Don't include ANY mysql headers anywhere except here!
WARN: if any cmake build target uses this include file,
GenError from server must be added to the target dependencies
to generate mysqld_error.h used below
*/
#pragma once
#ifdef TEST_MCSCONFIG_H
@ -67,6 +74,37 @@
#include "rpl_rli.h"
#include "my_dbug.h"
#include "sql_show.h"
#if MYSQL_VERSION_ID >= 120401
#include "opt_histogram_json.h"
#else
// Mock Histogram_bucket for MySQL < 11.4
struct Histogram_bucket
{
std::string start_value;
double cum_fract;
longlong ndv;
};
class Histogram_json_hb
{
std::vector<Histogram_bucket> buckets;
std::string last_bucket_end_endp;
public:
const std::vector<Histogram_bucket>& get_json_histogram() const
{
return buckets;
}
const std::string& get_last_bucket_end_endp() const
{
return last_bucket_end_endp;
}
};
#endif
#pragma GCC diagnostic pop
// Now clean up the pollution as best we can...

View File

@ -0,0 +1,260 @@
/* Copyright (C) 2025 MariaDB Corporation
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <limits>
#include "rulebased_optimizer.h"
#include "constantcolumn.h"
#include "execplan/calpontselectexecutionplan.h"
#include "execplan/simplecolumn.h"
#include "existsfilter.h"
#include "logicoperator.h"
#include "operator.h"
#include "predicateoperator.h"
#include "simplefilter.h"
namespace optimizer
{
void applyParallelCES_exists(execplan::CalpontSelectExecutionPlan& csep, const size_t id);
static const std::string RewrittenSubTableAliasPrefix = "$added_sub_";
static const size_t MaxParallelFactor = 16;
bool tableIsInUnion(const execplan::CalpontSystemCatalog::TableAliasName& table,
execplan::CalpontSelectExecutionPlan& csep)
{
return std::any_of(csep.unionVec().begin(), csep.unionVec().end(),
[&table](const auto& unionUnit)
{
execplan::CalpontSelectExecutionPlan* unionUnitLocal =
dynamic_cast<execplan::CalpontSelectExecutionPlan*>(unionUnit.get());
bool tableIsPresented =
std::any_of(unionUnitLocal->tableList().begin(), unionUnitLocal->tableList().end(),
[&table](const auto& unionTable) { return unionTable == table; });
return tableIsPresented;
});
}
bool matchParallelCES(execplan::CalpontSelectExecutionPlan& csep)
{
auto tables = csep.tableList();
// This is leaf and there are no other tables at this level in neither UNION, nor derived table.
// TODO filter out CSEPs with orderBy, groupBy, having
// Filter out tables that were re-written.
return tables.size() == 1 && !tables[0].isColumnstore() && !tableIsInUnion(tables[0], csep);
}
// This routine produces a new ParseTree that is AND(lowerBand <= column, column <= upperBand)
// TODO add engine-independent statistics-derived ranges
execplan::ParseTree* filtersWithNewRangeAddedIfNeeded(execplan::SCSEP& csep, execplan::SimpleColumn& column,
std::pair<uint64_t, uint64_t>& bound)
{
auto tableKeyColumnLeftOp = new execplan::SimpleColumn(column);
tableKeyColumnLeftOp->resultType(column.resultType());
// TODO Nobody owns this allocation and cleanup only depends on delete in ParseTree nodes' dtors.
auto* filterColLeftOp = new execplan::ConstantColumnUInt(bound.second, 0, 0);
// set TZ
// There is a question with ownership of the const column
// TODO here we lost upper bound value if predicate is not changed to weak lt
execplan::SOP ltOp = boost::make_shared<execplan::Operator>(execplan::PredicateOperator("<"));
ltOp->setOpType(filterColLeftOp->resultType(), tableKeyColumnLeftOp->resultType());
ltOp->resultType(ltOp->operationType());
auto* sfr = new execplan::SimpleFilter(ltOp, tableKeyColumnLeftOp, filterColLeftOp);
auto tableKeyColumnRightOp = new execplan::SimpleColumn(column);
tableKeyColumnRightOp->resultType(column.resultType());
// TODO hardcoded column type and value
auto* filterColRightOp = new execplan::ConstantColumnUInt(bound.first, 0, 0);
execplan::SOP gtOp = boost::make_shared<execplan::Operator>(execplan::PredicateOperator(">="));
gtOp->setOpType(filterColRightOp->resultType(), tableKeyColumnRightOp->resultType());
gtOp->resultType(gtOp->operationType());
auto* sfl = new execplan::SimpleFilter(gtOp, tableKeyColumnRightOp, filterColRightOp);
execplan::ParseTree* ptp = new execplan::ParseTree(new execplan::LogicOperator("and"));
ptp->right(sfr);
ptp->left(sfl);
auto* currentFilters = csep->filters();
if (currentFilters)
{
execplan::ParseTree* andWithExistingFilters =
new execplan::ParseTree(new execplan::LogicOperator("and"), currentFilters, ptp);
return andWithExistingFilters;
}
return ptp;
}
// Looking for a projected column that comes first in an available index and has EI statistics
// INV nullptr signifies that no suitable column was found
execplan::SimpleColumn* findSuitableKeyColumn(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimizerContext& ctx)
{
for (auto& rc : csep.returnedCols())
{
auto* simpleColumn = dynamic_cast<execplan::SimpleColumn*>(rc.get());
if (simpleColumn)
{
cal_impl_if::SchemaAndTableName schemaAndTableNam = {simpleColumn->schemaName(), simpleColumn->tableName()};
auto columnStatistics = ctx.gwi.findStatisticsForATable(schemaAndTableNam);
if (!columnStatistics)
{
continue;
}
auto columnStatisticsIt = columnStatistics->find(simpleColumn->columnName());
if (columnStatisticsIt != columnStatistics->end())
{
return simpleColumn;
}
}
}
return nullptr;
}
// TODO char and other numerical types support
execplan::CalpontSelectExecutionPlan::SelectList makeUnionFromTable(
execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimizerContext& ctx)
{
execplan::CalpontSelectExecutionPlan::SelectList unionVec;
// SC type controls an integral type used to produce suitable filters. The continuation of this function
// should become a template function based on SC type.
execplan::SimpleColumn* keyColumn = findSuitableKeyColumn(csep, ctx);
if (!keyColumn)
{
return unionVec;
}
cal_impl_if::SchemaAndTableName schemaAndTableName = {keyColumn->schemaName(), keyColumn->tableName()};
auto tableColumnsStatisticsIt = ctx.gwi.tableStatisticsMap.find(schemaAndTableName);
if (tableColumnsStatisticsIt == ctx.gwi.tableStatisticsMap.end())
{
return unionVec;
}
auto columnStatisticsIt = tableColumnsStatisticsIt->second.find(keyColumn->columnName());
if (columnStatisticsIt == tableColumnsStatisticsIt->second.end())
{
return unionVec;
}
auto columnStatistics = columnStatisticsIt->second;
// TODO configurable parallel factor via session variable
// NB now histogram size is the way to control parallel factor with 16 being the maximum
size_t numberOfUnionUnits = std::min(columnStatistics.get_json_histogram().size(), MaxParallelFactor);
size_t numberOfBucketsPerUnionUnit = columnStatistics.get_json_histogram().size() / numberOfUnionUnits;
// TODO char and other numerical types support
std::vector<std::pair<uint64_t, uint64_t>> bounds;
// Loop over buckets to produce filter ranges
for (size_t i = 0; i < numberOfUnionUnits - 1; ++i)
{
auto bucket = columnStatistics.get_json_histogram().begin() + i * numberOfBucketsPerUnionUnit;
auto endBucket = columnStatistics.get_json_histogram().begin() + (i + 1) * numberOfBucketsPerUnionUnit;
uint64_t currentLowerBound = *(uint32_t*)bucket->start_value.data();
uint64_t currentUpperBound = *(uint32_t*)endBucket->start_value.data();
bounds.push_back({currentLowerBound, currentUpperBound});
}
// Add last range
// NB despite the fact that currently Histogram_json_hb has the last bucket that has end as its start
auto lastBucket = columnStatistics.get_json_histogram().begin() + (numberOfUnionUnits - 1) * numberOfBucketsPerUnionUnit;
uint64_t currentLowerBound = *(uint32_t*)lastBucket->start_value.data();
uint64_t currentUpperBound = *(uint32_t*)columnStatistics.get_last_bucket_end_endp().data();
bounds.push_back({currentLowerBound, currentUpperBound});
for (auto& bound : bounds)
{
auto clonedCSEP = csep.cloneWORecursiveSelects();
// Add BETWEEN based on key column range
clonedCSEP->filters(filtersWithNewRangeAddedIfNeeded(clonedCSEP, *keyColumn, bound));
unionVec.push_back(clonedCSEP);
}
return unionVec;
}
void applyParallelCES(execplan::CalpontSelectExecutionPlan& csep, RBOptimizerContext& ctx)
{
auto tables = csep.tableList();
execplan::CalpontSelectExecutionPlan::TableList newTableList;
execplan::CalpontSelectExecutionPlan::SelectList newDerivedTableList;
execplan::CalpontSelectExecutionPlan::ReturnedColumnList newReturnedColumns;
// ATM Must be only 1 table
for (auto& table : tables)
{
if (!table.isColumnstore())
{
auto derivedSCEP = csep.cloneWORecursiveSelects();
// need to add a level here
std::string tableAlias = RewrittenSubTableAliasPrefix + table.schema + "_" + table.table + "_" +
std::to_string(ctx.uniqueId);
derivedSCEP->location(execplan::CalpontSelectExecutionPlan::FROM);
derivedSCEP->subType(execplan::CalpontSelectExecutionPlan::FROM_SUBS);
derivedSCEP->derivedTbAlias(tableAlias);
// Create a copy of the current leaf CSEP with additional filters to partition the key column
auto additionalUnionVec = makeUnionFromTable(csep, ctx);
derivedSCEP->unionVec().insert(derivedSCEP->unionVec().end(), additionalUnionVec.begin(),
additionalUnionVec.end());
size_t colPosition = 0;
// change parent to derived table columns
for (auto& rc : csep.returnedCols())
{
auto rcCloned = boost::make_shared<execplan::SimpleColumn>(*rc);
// TODO timezone and result type are not copied
// TODO add specific ctor for this functionality
rcCloned->tableName("");
rcCloned->schemaName("");
rcCloned->tableAlias(tableAlias);
rcCloned->colPosition(colPosition++);
rcCloned->resultType(rc->resultType());
newReturnedColumns.push_back(rcCloned);
}
newDerivedTableList.push_back(derivedSCEP);
execplan::CalpontSystemCatalog::TableAliasName tn = execplan::make_aliasview("", "", tableAlias, "");
newTableList.push_back(tn);
// Remove the filters as they were pushed down to union units
// This is inappropriate for EXISTS filter and join conditions
derivedSCEP->filters(nullptr);
}
}
// Remove the filters if necessary using csep.filters(nullptr) as they were pushed down to union units
// But this is inappropriate for EXISTS filter and join conditions
// There must be no derived at this point, so we can replace it with the new derived table list
csep.derivedTableList(newDerivedTableList);
// Replace table list with new table list populated with union units
csep.tableList(newTableList);
csep.returnedCols(newReturnedColumns);
}
} // namespace optimizer

View File

@ -0,0 +1,30 @@
/* Copyright (C) 2025 MariaDB Corporation
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
#pragma once
#define PREFER_MY_CONFIG_H
#include <my_config.h>
#include "idb_mysql.h"
#include "execplan/calpontselectexecutionplan.h"
#include "rulebased_optimizer.h"
namespace optimizer {
bool matchParallelCES(execplan::CalpontSelectExecutionPlan& csep);
void applyParallelCES(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimizerContext& ctx);
}

View File

@ -15,49 +15,57 @@
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <limits>
#include "rulebased_optimizer.h"
#include "constantcolumn.h"
#include "execplan/calpontselectexecutionplan.h"
#include "execplan/simplecolumn.h"
#include "existsfilter.h"
#include "logicoperator.h"
#include "operator.h"
#include "predicateoperator.h"
#include "simplefilter.h"
#include "rulebased_optimizer.h"
#include "rbo_apply_parallel_ces.h"
namespace optimizer
{
static const std::string RewrittenSubTableAliasPrefix = "$added_sub_";
// Apply a list of rules to a CSEP
bool optimizeCSEPWithRules(execplan::CalpontSelectExecutionPlan& root, const std::vector<Rule>& rules)
bool optimizeCSEPWithRules(execplan::CalpontSelectExecutionPlan& root, const std::vector<Rule>& rules,
optimizer::RBOptimizerContext& ctx)
{
bool changed = false;
for (const auto& rule : rules)
{
changed |= rule.apply(root);
changed |= rule.apply(root, ctx);
}
return changed;
}
// high level API call for optimizer
bool optimizeCSEP(execplan::CalpontSelectExecutionPlan& root)
bool optimizeCSEP(execplan::CalpontSelectExecutionPlan& root, optimizer::RBOptimizerContext& ctx)
{
optimizer::Rule parallelCES{"parallelCES", optimizer::matchParallelCES, optimizer::applyParallelCES};
std::vector<Rule> rules = {parallelCES};
std::vector<optimizer::Rule> rules = {parallelCES};
return optimizeCSEPWithRules(root, rules);
return optimizeCSEPWithRules(root, rules, ctx);
}
// Apply iteratively until CSEP is converged by rule
bool Rule::apply(execplan::CalpontSelectExecutionPlan& root) const
bool Rule::apply(execplan::CalpontSelectExecutionPlan& root, optimizer::RBOptimizerContext& ctx) const
{
bool changedThisRound = false;
bool hasBeenApplied = false;
do
{
changedThisRound = walk(root);
changedThisRound = walk(root, ctx);
hasBeenApplied |= changedThisRound;
} while (changedThisRound && !applyOnlyOnce);
@ -65,188 +73,45 @@ bool Rule::apply(execplan::CalpontSelectExecutionPlan& root) const
}
// DFS walk to match CSEP and apply rules if match
bool Rule::walk(execplan::CalpontSelectExecutionPlan& csep) const
bool Rule::walk(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimizerContext& ctx) const
{
bool rewrite = false;
for (auto& table : csep.derivedTableList())
std::stack<execplan::CalpontSelectExecutionPlan*> planStack;
planStack.push(&csep);
while (!planStack.empty())
{
auto* csepPtr = dynamic_cast<execplan::CalpontSelectExecutionPlan*>(table.get());
if (!csepPtr)
execplan::CalpontSelectExecutionPlan* current = planStack.top();
planStack.pop();
for (auto& table : current->derivedTableList())
{
continue;
auto* csepPtr = dynamic_cast<execplan::CalpontSelectExecutionPlan*>(table.get());
if (csepPtr)
{
planStack.push(csepPtr);
}
}
auto& csepLocal = *csepPtr;
rewrite |= walk(csepLocal);
}
for (auto& unionUnit : csep.unionVec())
{
auto* unionUnitPtr = dynamic_cast<execplan::CalpontSelectExecutionPlan*>(unionUnit.get());
if (!unionUnitPtr)
for (auto& unionUnit : current->unionVec())
{
continue;
auto* unionUnitPtr = dynamic_cast<execplan::CalpontSelectExecutionPlan*>(unionUnit.get());
if (unionUnitPtr)
{
planStack.push(unionUnitPtr);
}
}
auto& unionUnitLocal = *unionUnitPtr;
rewrite |= walk(unionUnitLocal);
}
if (matchRule(csep))
{
applyRule(csep);
rewrite = true;
if (matchRule(*current))
{
applyRule(*current, ctx);
++ctx.uniqueId;
rewrite = true;
}
}
return rewrite;
}
bool tableIsInUnion(const execplan::CalpontSystemCatalog::TableAliasName& table,
execplan::CalpontSelectExecutionPlan& csep)
{
return std::any_of(csep.unionVec().begin(), csep.unionVec().end(),
[&table](const auto& unionUnit)
{
execplan::CalpontSelectExecutionPlan* unionUnitLocal =
dynamic_cast<execplan::CalpontSelectExecutionPlan*>(unionUnit.get());
bool tableIsPresented =
std::any_of(unionUnitLocal->tableList().begin(), unionUnitLocal->tableList().end(),
[&table](const auto& unionTable) { return unionTable == table; });
return tableIsPresented;
});
}
bool matchParallelCES(execplan::CalpontSelectExecutionPlan& csep)
{
auto tables = csep.tableList();
// This is leaf and there are no other tables at this level in neither UNION, nor derived table.
// WIP filter out CSEPs with orderBy, groupBy, having
// Filter out tables that were re-written.
return tables.size() == 1 && !tables[0].isColumnstore() && !tableIsInUnion(tables[0], csep);
}
// This routine produces a new ParseTree that is AND(lowerBand <= column, column <= upperBand)
// TODO add engine-independent statistics-derived ranges
execplan::ParseTree* filtersWithNewRangeAddedIfNeeded(execplan::SCSEP& csep)
{
// INV this is SimpleColumn we supply as an argument
// TODO find the suitable column using EI statistics.
auto* column = dynamic_cast<execplan::SimpleColumn*>(csep->returnedCols().front().get());
assert(column);
auto tableKeyColumnLeftOp = new execplan::SimpleColumn(*column);
tableKeyColumnLeftOp->resultType(column->resultType());
// TODO Nobody owns this allocation and cleanup only depends on delete in ParseTree nodes' dtors.
auto* filterColLeftOp = new execplan::ConstantColumnUInt(42ULL, 0, 0);
// set TZ
// There is a question with ownership of the const column
execplan::SOP ltOp = boost::make_shared<execplan::Operator>(execplan::PredicateOperator("<="));
ltOp->setOpType(filterColLeftOp->resultType(), tableKeyColumnLeftOp->resultType());
ltOp->resultType(ltOp->operationType());
auto* sfr = new execplan::SimpleFilter(ltOp, tableKeyColumnLeftOp, filterColLeftOp);
// auto tableKeyColumn = derivedSCEP->returnedCols().front();
auto tableKeyColumnRightOp = new execplan::SimpleColumn(*column);
tableKeyColumnRightOp->resultType(column->resultType());
// TODO hardcoded column type and value
auto* filterColRightOp = new execplan::ConstantColumnUInt(30ULL, 0, 0);
execplan::SOP gtOp = boost::make_shared<execplan::Operator>(execplan::PredicateOperator(">="));
gtOp->setOpType(filterColRightOp->resultType(), tableKeyColumnRightOp->resultType());
gtOp->resultType(gtOp->operationType());
auto* sfl = new execplan::SimpleFilter(gtOp, tableKeyColumnRightOp, filterColRightOp);
execplan::ParseTree* ptp = new execplan::ParseTree(new execplan::LogicOperator("and"));
ptp->right(sfr);
ptp->left(sfl);
auto* currentFilters = csep->filters();
if (currentFilters)
{
execplan::ParseTree* andWithExistingFilters =
new execplan::ParseTree(new execplan::LogicOperator("and"), currentFilters, ptp);
return andWithExistingFilters;
}
return ptp;
}
execplan::CalpontSelectExecutionPlan::SelectList makeUnionFromTable(
const size_t numberOfLegs, execplan::CalpontSelectExecutionPlan& csep)
{
execplan::CalpontSelectExecutionPlan::SelectList unionVec;
unionVec.reserve(numberOfLegs);
for (size_t i = 0; i < numberOfLegs; ++i)
{
auto clonedCSEP = csep.cloneWORecursiveSelects();
// Add BETWEEN based on key column range
clonedCSEP->filters(filtersWithNewRangeAddedIfNeeded(clonedCSEP));
unionVec.push_back(clonedCSEP);
}
return unionVec;
}
void applyParallelCES(execplan::CalpontSelectExecutionPlan& csep)
{
auto tables = csep.tableList();
execplan::CalpontSelectExecutionPlan::TableList newTableList;
execplan::CalpontSelectExecutionPlan::SelectList newDerivedTableList;
execplan::CalpontSelectExecutionPlan::ReturnedColumnList newReturnedColumns;
// ATM Must be only 1 table
for (auto& table : tables)
{
if (!table.isColumnstore())
{
auto derivedSCEP = csep.cloneWORecursiveSelects();
// need to add a level here
std::string tableAlias = RewrittenSubTableAliasPrefix + table.schema + "_" + table.table;
derivedSCEP->location(execplan::CalpontSelectExecutionPlan::FROM);
derivedSCEP->subType(execplan::CalpontSelectExecutionPlan::FROM_SUBS);
derivedSCEP->derivedTbAlias(tableAlias);
// TODO: hardcoded for now
size_t parallelFactor = 2;
// Create a copy of the current leaf CSEP with additional filters to partition the key column
auto additionalUnionVec = makeUnionFromTable(parallelFactor, csep);
derivedSCEP->unionVec().insert(derivedSCEP->unionVec().end(), additionalUnionVec.begin(),
additionalUnionVec.end());
size_t colPosition = 0;
// change parent to derived table columns
for (auto& rc : csep.returnedCols())
{
auto rcCloned = boost::make_shared<execplan::SimpleColumn>(*rc);
// TODO timezone and result type are not copied
// TODO add specific ctor for this functionality
rcCloned->tableName("");
rcCloned->schemaName("");
rcCloned->tableAlias(tableAlias);
rcCloned->colPosition(colPosition++);
rcCloned->resultType(rc->resultType());
newReturnedColumns.push_back(rcCloned);
}
newDerivedTableList.push_back(derivedSCEP);
execplan::CalpontSystemCatalog::TableAliasName tn = execplan::make_aliasview("", "", tableAlias, "");
newTableList.push_back(tn);
// Remove the filters as they were pushed down to union units
derivedSCEP->filters(nullptr);
}
}
// Remove the filters as they were pushed down to union units
csep.filters(nullptr);
// There must be no derived at this point.
csep.derivedTableList(newDerivedTableList);
// Replace table list with new table list populated with union units
csep.tableList(newTableList);
csep.returnedCols(newReturnedColumns);
}
} // namespace optimizer

View File

@ -18,14 +18,31 @@
#pragma once
#include <string>
#define PREFER_MY_CONFIG_H
#include <my_config.h>
#include "idb_mysql.h"
#include "ha_mcs_impl_if.h"
#include "execplan/calpontselectexecutionplan.h"
namespace optimizer {
class RBOptimizerContext {
public:
RBOptimizerContext() = delete;
RBOptimizerContext(cal_impl_if::gp_walk_info& walk_info) : gwi(walk_info) {}
// gwi lifetime should be longer than optimizer context.
// In plugin runtime this is always true.
cal_impl_if::gp_walk_info& gwi;
uint64_t uniqueId {0};
};
struct Rule
{
using RuleMatcher = bool (*)(execplan::CalpontSelectExecutionPlan&);
using RuleApplier = void (*)(execplan::CalpontSelectExecutionPlan&);
using RuleApplier = void (*)(execplan::CalpontSelectExecutionPlan&, RBOptimizerContext&);
Rule(std::string&& name, RuleMatcher matchRule, RuleApplier applyRule)
: name(name), matchRule(matchRule), applyRule(applyRule) {};
@ -39,15 +56,18 @@ struct Rule
Rule() = default;
Rule(const Rule&) = default;
Rule(Rule&&) = default;
std::string getName() const
{
return name;
}
Rule& operator=(const Rule&) = default;
Rule& operator=(Rule&&) = default;
bool apply(execplan::CalpontSelectExecutionPlan& csep) const;
bool walk(execplan::CalpontSelectExecutionPlan& csep) const;
bool apply(execplan::CalpontSelectExecutionPlan& csep, RBOptimizerContext& ctx) const;
bool walk(execplan::CalpontSelectExecutionPlan& csep, RBOptimizerContext& ctx) const;
};
bool matchParallelCES(execplan::CalpontSelectExecutionPlan& csep);
void applyParallelCES(execplan::CalpontSelectExecutionPlan& csep);
bool optimizeCSEP(execplan::CalpontSelectExecutionPlan& root);
bool optimizeCSEP(execplan::CalpontSelectExecutionPlan& root, RBOptimizerContext& ctx);
}