You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-11-03 17:13:17 +03:00
feat(rbo,rules,QA): refactored statistics storage
This commit is contained in:
@@ -5228,7 +5228,27 @@ int processFrom(bool& isUnion, SELECT_LEX& select_lex, gp_walk_info& gwi, SCSEP&
|
|||||||
std::cout << " has stats ";
|
std::cout << " has stats ";
|
||||||
SchemaAndTableName tableName = {field->table->s->db.str,
|
SchemaAndTableName tableName = {field->table->s->db.str,
|
||||||
field->table->s->table_name.str};
|
field->table->s->table_name.str};
|
||||||
gwi.tableStatisticsMap[tableName][field->field_name.str] = *histogram;
|
execplan::SimpleColumn simpleColumn = {field->table->s->db.str,
|
||||||
|
field->table->s->table_name.str,
|
||||||
|
field->field_name.str};
|
||||||
|
auto tableStatisticsMapIt = gwi.tableStatisticsMap.find(tableName);
|
||||||
|
if (tableStatisticsMapIt == gwi.tableStatisticsMap.end())
|
||||||
|
{
|
||||||
|
gwi.tableStatisticsMap[tableName][field->field_name.str] = {simpleColumn, {*histogram}};
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
auto columnStatisticsMapIt = tableStatisticsMapIt->second.find(field->field_name.str);
|
||||||
|
if (columnStatisticsMapIt == tableStatisticsMapIt->second.end())
|
||||||
|
{
|
||||||
|
tableStatisticsMapIt->second[field->field_name.str] = {simpleColumn, {*histogram}};
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
auto columnStatisticsVec = columnStatisticsMapIt->second.second;
|
||||||
|
columnStatisticsVec.push_back(*histogram);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@@ -6321,43 +6341,43 @@ int processLimitAndOffset(SELECT_LEX& select_lex, gp_walk_info& gwi, SCSEP& csep
|
|||||||
// for the first column of the index if any.
|
// for the first column of the index if any.
|
||||||
// Statistics is stored in GWI context.
|
// Statistics is stored in GWI context.
|
||||||
// Mock for ES 10.6
|
// Mock for ES 10.6
|
||||||
#if MYSQL_VERSION_ID >= 120401
|
// #if MYSQL_VERSION_ID >= 120401
|
||||||
void extractColumnStatistics(Item_field* ifp, gp_walk_info& gwi)
|
// void extractColumnStatistics(Item_field* ifp, gp_walk_info& gwi)
|
||||||
{
|
// {
|
||||||
for (uint j = 0; j < ifp->field->table->s->keys; j++)
|
// for (uint j = 0; j < ifp->field->table->s->keys; j++)
|
||||||
{
|
// {
|
||||||
for (uint i = 0; i < ifp->field->table->s->key_info[j].usable_key_parts; i++)
|
// for (uint i = 0; i < ifp->field->table->s->key_info[j].usable_key_parts; i++)
|
||||||
{
|
// {
|
||||||
if (ifp->field->table->s->key_info[j].key_part[i].fieldnr == ifp->field->field_index + 1)
|
// if (ifp->field->table->s->key_info[j].key_part[i].fieldnr == ifp->field->field_index + 1)
|
||||||
{
|
// {
|
||||||
if (i == 0 && ifp->field->read_stats)
|
// if (i == 0 && ifp->field->read_stats)
|
||||||
{
|
// {
|
||||||
assert(ifp->field->table->s);
|
// assert(ifp->field->table->s);
|
||||||
auto* histogram = dynamic_cast<Histogram_json_hb*>(ifp->field->read_stats->histogram);
|
// auto* histogram = dynamic_cast<Histogram_json_hb*>(ifp->field->read_stats->histogram);
|
||||||
if (histogram)
|
// if (histogram)
|
||||||
{
|
// {
|
||||||
SchemaAndTableName tableName = {ifp->field->table->s->db.str,
|
// SchemaAndTableName tableName = {ifp->field->table->s->db.str,
|
||||||
ifp->field->table->s->table_name.str};
|
// ifp->field->table->s->table_name.str};
|
||||||
auto tableStatisticsMapIt = gwi.tableStatisticsMap.find(tableName);
|
// auto tableStatisticsMapIt = gwi.tableStatisticsMap.find(tableName);
|
||||||
if (tableStatisticsMapIt == gwi.tableStatisticsMap.end())
|
// if (tableStatisticsMapIt == gwi.tableStatisticsMap.end())
|
||||||
{
|
// {
|
||||||
gwi.tableStatisticsMap.insert({tableName, {{ifp->field->field_name.str, *histogram}}});
|
// gwi.tableStatisticsMap.insert({tableName, {{ifp->field->field_name.str, *histogram}}});
|
||||||
}
|
// }
|
||||||
else
|
// else
|
||||||
{
|
// {
|
||||||
tableStatisticsMapIt->second.insert({ifp->field->field_name.str, *histogram});
|
// tableStatisticsMapIt->second.insert({ifp->field->field_name.str, *histogram});
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
#else
|
// #else
|
||||||
void extractColumnStatistics(Item_field* /*ifp*/, gp_walk_info& /*gwi*/)
|
// void extractColumnStatistics(Item_field* /*ifp*/, gp_walk_info& /*gwi*/)
|
||||||
{
|
// {
|
||||||
}
|
// }
|
||||||
#endif
|
// #endif
|
||||||
|
|
||||||
/*@brief Process SELECT part of a query or sub-query */
|
/*@brief Process SELECT part of a query or sub-query */
|
||||||
/***********************************************************
|
/***********************************************************
|
||||||
|
|||||||
@@ -116,7 +116,7 @@ typedef std::map<execplan::CalpontSystemCatalog::TableAliasName, std::pair<int,
|
|||||||
typedef std::tr1::unordered_map<TABLE_LIST*, std::vector<COND*>> TableOnExprList;
|
typedef std::tr1::unordered_map<TABLE_LIST*, std::vector<COND*>> TableOnExprList;
|
||||||
typedef std::tr1::unordered_map<TABLE_LIST*, uint> TableOuterJoinMap;
|
typedef std::tr1::unordered_map<TABLE_LIST*, uint> TableOuterJoinMap;
|
||||||
using ColumnName = std::string;
|
using ColumnName = std::string;
|
||||||
using ColumnStatisticsMap = std::unordered_map<ColumnName, Histogram_json_hb>;
|
using ColumnStatisticsMap = std::unordered_map<ColumnName, std::pair<execplan::SimpleColumn, std::vector<Histogram_json_hb>>>;
|
||||||
using TableStatisticsMap = std::unordered_map<SchemaAndTableName, ColumnStatisticsMap, SchemaAndTableNameHash>;
|
using TableStatisticsMap = std::unordered_map<SchemaAndTableName, ColumnStatisticsMap, SchemaAndTableNameHash>;
|
||||||
|
|
||||||
// This structure is used to store MDB AST -> CSEP translation context.
|
// This structure is used to store MDB AST -> CSEP translation context.
|
||||||
|
|||||||
@@ -105,6 +105,7 @@ execplan::ParseTree* filtersWithNewRange(execplan::SCSEP& csep, execplan::Simple
|
|||||||
ltOp->resultType(ltOp->operationType());
|
ltOp->resultType(ltOp->operationType());
|
||||||
|
|
||||||
auto* sfr = new execplan::SimpleFilter(ltOp, tableKeyColumnLeftOp, filterColLeftOp);
|
auto* sfr = new execplan::SimpleFilter(ltOp, tableKeyColumnLeftOp, filterColLeftOp);
|
||||||
|
// TODO new
|
||||||
auto tableKeyColumnRightOp = new execplan::SimpleColumn(column);
|
auto tableKeyColumnRightOp = new execplan::SimpleColumn(column);
|
||||||
tableKeyColumnRightOp->resultType(column.resultType());
|
tableKeyColumnRightOp->resultType(column.resultType());
|
||||||
// TODO hardcoded column type and value
|
// TODO hardcoded column type and value
|
||||||
@@ -114,8 +115,10 @@ execplan::ParseTree* filtersWithNewRange(execplan::SCSEP& csep, execplan::Simple
|
|||||||
gtOp->setOpType(filterColRightOp->resultType(), tableKeyColumnRightOp->resultType());
|
gtOp->setOpType(filterColRightOp->resultType(), tableKeyColumnRightOp->resultType());
|
||||||
gtOp->resultType(gtOp->operationType());
|
gtOp->resultType(gtOp->operationType());
|
||||||
|
|
||||||
|
// TODO new
|
||||||
auto* sfl = new execplan::SimpleFilter(gtOp, tableKeyColumnRightOp, filterColRightOp);
|
auto* sfl = new execplan::SimpleFilter(gtOp, tableKeyColumnRightOp, filterColRightOp);
|
||||||
|
|
||||||
|
// TODO new
|
||||||
execplan::ParseTree* ptp = new execplan::ParseTree(new execplan::LogicOperator("and"));
|
execplan::ParseTree* ptp = new execplan::ParseTree(new execplan::LogicOperator("and"));
|
||||||
ptp->right(sfr);
|
ptp->right(sfr);
|
||||||
ptp->left(sfl);
|
ptp->left(sfl);
|
||||||
@@ -169,6 +172,12 @@ execplan::SimpleColumn* findSuitableKeyColumn(execplan::CalpontSelectExecutionPl
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TBD
|
||||||
|
Histogram_json_hb& chooseStatisticsToUse(std::vector<Histogram_json_hb>& columnStatisticsVec)
|
||||||
|
{
|
||||||
|
return columnStatisticsVec.front();
|
||||||
|
}
|
||||||
|
|
||||||
// Populates range bounds based on column statistics
|
// Populates range bounds based on column statistics
|
||||||
// Returns optional with bounds if successful, nullopt otherwise
|
// Returns optional with bounds if successful, nullopt otherwise
|
||||||
template <typename T>
|
template <typename T>
|
||||||
@@ -188,7 +197,8 @@ std::optional<FilterRangeBounds<T>> populateRangeBounds(execplan::SimpleColumn*
|
|||||||
return std::nullopt;
|
return std::nullopt;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto columnStatistics = columnStatisticsIt->second;
|
auto& [simpleColumn, columnStatisticsVec] = columnStatisticsIt->second;
|
||||||
|
auto& columnStatistics = chooseStatisticsToUse(columnStatisticsVec);
|
||||||
|
|
||||||
// TODO configurable parallel factor via session variable
|
// TODO configurable parallel factor via session variable
|
||||||
// NB now histogram size is the way to control parallel factor with 16 being the maximum
|
// NB now histogram size is the way to control parallel factor with 16 being the maximum
|
||||||
|
|||||||
Reference in New Issue
Block a user