You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-11-02 06:13:16 +03:00
feat(rbo,rules,QA): refactored statistics storage
This commit is contained in:
@@ -5227,8 +5227,28 @@ int processFrom(bool& isUnion, SELECT_LEX& select_lex, gp_walk_info& gwi, SCSEP&
|
||||
{
|
||||
std::cout << " has stats ";
|
||||
SchemaAndTableName tableName = {field->table->s->db.str,
|
||||
field->table->s->table_name.str};
|
||||
gwi.tableStatisticsMap[tableName][field->field_name.str] = *histogram;
|
||||
field->table->s->table_name.str};
|
||||
execplan::SimpleColumn simpleColumn = {field->table->s->db.str,
|
||||
field->table->s->table_name.str,
|
||||
field->field_name.str};
|
||||
auto tableStatisticsMapIt = gwi.tableStatisticsMap.find(tableName);
|
||||
if (tableStatisticsMapIt == gwi.tableStatisticsMap.end())
|
||||
{
|
||||
gwi.tableStatisticsMap[tableName][field->field_name.str] = {simpleColumn, {*histogram}};
|
||||
}
|
||||
else
|
||||
{
|
||||
auto columnStatisticsMapIt = tableStatisticsMapIt->second.find(field->field_name.str);
|
||||
if (columnStatisticsMapIt == tableStatisticsMapIt->second.end())
|
||||
{
|
||||
tableStatisticsMapIt->second[field->field_name.str] = {simpleColumn, {*histogram}};
|
||||
}
|
||||
else
|
||||
{
|
||||
auto columnStatisticsVec = columnStatisticsMapIt->second.second;
|
||||
columnStatisticsVec.push_back(*histogram);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -6321,43 +6341,43 @@ int processLimitAndOffset(SELECT_LEX& select_lex, gp_walk_info& gwi, SCSEP& csep
|
||||
// for the first column of the index if any.
|
||||
// Statistics is stored in GWI context.
|
||||
// Mock for ES 10.6
|
||||
#if MYSQL_VERSION_ID >= 120401
|
||||
void extractColumnStatistics(Item_field* ifp, gp_walk_info& gwi)
|
||||
{
|
||||
for (uint j = 0; j < ifp->field->table->s->keys; j++)
|
||||
{
|
||||
for (uint i = 0; i < ifp->field->table->s->key_info[j].usable_key_parts; i++)
|
||||
{
|
||||
if (ifp->field->table->s->key_info[j].key_part[i].fieldnr == ifp->field->field_index + 1)
|
||||
{
|
||||
if (i == 0 && ifp->field->read_stats)
|
||||
{
|
||||
assert(ifp->field->table->s);
|
||||
auto* histogram = dynamic_cast<Histogram_json_hb*>(ifp->field->read_stats->histogram);
|
||||
if (histogram)
|
||||
{
|
||||
SchemaAndTableName tableName = {ifp->field->table->s->db.str,
|
||||
ifp->field->table->s->table_name.str};
|
||||
auto tableStatisticsMapIt = gwi.tableStatisticsMap.find(tableName);
|
||||
if (tableStatisticsMapIt == gwi.tableStatisticsMap.end())
|
||||
{
|
||||
gwi.tableStatisticsMap.insert({tableName, {{ifp->field->field_name.str, *histogram}}});
|
||||
}
|
||||
else
|
||||
{
|
||||
tableStatisticsMapIt->second.insert({ifp->field->field_name.str, *histogram});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
void extractColumnStatistics(Item_field* /*ifp*/, gp_walk_info& /*gwi*/)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
// #if MYSQL_VERSION_ID >= 120401
|
||||
// void extractColumnStatistics(Item_field* ifp, gp_walk_info& gwi)
|
||||
// {
|
||||
// for (uint j = 0; j < ifp->field->table->s->keys; j++)
|
||||
// {
|
||||
// for (uint i = 0; i < ifp->field->table->s->key_info[j].usable_key_parts; i++)
|
||||
// {
|
||||
// if (ifp->field->table->s->key_info[j].key_part[i].fieldnr == ifp->field->field_index + 1)
|
||||
// {
|
||||
// if (i == 0 && ifp->field->read_stats)
|
||||
// {
|
||||
// assert(ifp->field->table->s);
|
||||
// auto* histogram = dynamic_cast<Histogram_json_hb*>(ifp->field->read_stats->histogram);
|
||||
// if (histogram)
|
||||
// {
|
||||
// SchemaAndTableName tableName = {ifp->field->table->s->db.str,
|
||||
// ifp->field->table->s->table_name.str};
|
||||
// auto tableStatisticsMapIt = gwi.tableStatisticsMap.find(tableName);
|
||||
// if (tableStatisticsMapIt == gwi.tableStatisticsMap.end())
|
||||
// {
|
||||
// gwi.tableStatisticsMap.insert({tableName, {{ifp->field->field_name.str, *histogram}}});
|
||||
// }
|
||||
// else
|
||||
// {
|
||||
// tableStatisticsMapIt->second.insert({ifp->field->field_name.str, *histogram});
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// #else
|
||||
// void extractColumnStatistics(Item_field* /*ifp*/, gp_walk_info& /*gwi*/)
|
||||
// {
|
||||
// }
|
||||
// #endif
|
||||
|
||||
/*@brief Process SELECT part of a query or sub-query */
|
||||
/***********************************************************
|
||||
|
||||
@@ -116,7 +116,7 @@ typedef std::map<execplan::CalpontSystemCatalog::TableAliasName, std::pair<int,
|
||||
typedef std::tr1::unordered_map<TABLE_LIST*, std::vector<COND*>> TableOnExprList;
|
||||
typedef std::tr1::unordered_map<TABLE_LIST*, uint> TableOuterJoinMap;
|
||||
using ColumnName = std::string;
|
||||
using ColumnStatisticsMap = std::unordered_map<ColumnName, Histogram_json_hb>;
|
||||
using ColumnStatisticsMap = std::unordered_map<ColumnName, std::pair<execplan::SimpleColumn, std::vector<Histogram_json_hb>>>;
|
||||
using TableStatisticsMap = std::unordered_map<SchemaAndTableName, ColumnStatisticsMap, SchemaAndTableNameHash>;
|
||||
|
||||
// This structure is used to store MDB AST -> CSEP translation context.
|
||||
|
||||
@@ -105,6 +105,7 @@ execplan::ParseTree* filtersWithNewRange(execplan::SCSEP& csep, execplan::Simple
|
||||
ltOp->resultType(ltOp->operationType());
|
||||
|
||||
auto* sfr = new execplan::SimpleFilter(ltOp, tableKeyColumnLeftOp, filterColLeftOp);
|
||||
// TODO new
|
||||
auto tableKeyColumnRightOp = new execplan::SimpleColumn(column);
|
||||
tableKeyColumnRightOp->resultType(column.resultType());
|
||||
// TODO hardcoded column type and value
|
||||
@@ -114,8 +115,10 @@ execplan::ParseTree* filtersWithNewRange(execplan::SCSEP& csep, execplan::Simple
|
||||
gtOp->setOpType(filterColRightOp->resultType(), tableKeyColumnRightOp->resultType());
|
||||
gtOp->resultType(gtOp->operationType());
|
||||
|
||||
// TODO new
|
||||
auto* sfl = new execplan::SimpleFilter(gtOp, tableKeyColumnRightOp, filterColRightOp);
|
||||
|
||||
// TODO new
|
||||
execplan::ParseTree* ptp = new execplan::ParseTree(new execplan::LogicOperator("and"));
|
||||
ptp->right(sfr);
|
||||
ptp->left(sfl);
|
||||
@@ -169,6 +172,12 @@ execplan::SimpleColumn* findSuitableKeyColumn(execplan::CalpontSelectExecutionPl
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// TBD
|
||||
Histogram_json_hb& chooseStatisticsToUse(std::vector<Histogram_json_hb>& columnStatisticsVec)
|
||||
{
|
||||
return columnStatisticsVec.front();
|
||||
}
|
||||
|
||||
// Populates range bounds based on column statistics
|
||||
// Returns optional with bounds if successful, nullopt otherwise
|
||||
template <typename T>
|
||||
@@ -188,7 +197,8 @@ std::optional<FilterRangeBounds<T>> populateRangeBounds(execplan::SimpleColumn*
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
auto columnStatistics = columnStatisticsIt->second;
|
||||
auto& [simpleColumn, columnStatisticsVec] = columnStatisticsIt->second;
|
||||
auto& columnStatistics = chooseStatisticsToUse(columnStatisticsVec);
|
||||
|
||||
// TODO configurable parallel factor via session variable
|
||||
// NB now histogram size is the way to control parallel factor with 16 being the maximum
|
||||
|
||||
Reference in New Issue
Block a user