1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-11-03 17:13:17 +03:00

feat(rbo,rules,QA): refactored statistics storage

This commit is contained in:
drrtuy
2025-07-31 12:50:24 +00:00
parent 112ba9f162
commit e167082497
3 changed files with 71 additions and 41 deletions

View File

@@ -5228,7 +5228,27 @@ int processFrom(bool& isUnion, SELECT_LEX& select_lex, gp_walk_info& gwi, SCSEP&
std::cout << " has stats "; std::cout << " has stats ";
SchemaAndTableName tableName = {field->table->s->db.str, SchemaAndTableName tableName = {field->table->s->db.str,
field->table->s->table_name.str}; field->table->s->table_name.str};
gwi.tableStatisticsMap[tableName][field->field_name.str] = *histogram; execplan::SimpleColumn simpleColumn = {field->table->s->db.str,
field->table->s->table_name.str,
field->field_name.str};
auto tableStatisticsMapIt = gwi.tableStatisticsMap.find(tableName);
if (tableStatisticsMapIt == gwi.tableStatisticsMap.end())
{
gwi.tableStatisticsMap[tableName][field->field_name.str] = {simpleColumn, {*histogram}};
}
else
{
auto columnStatisticsMapIt = tableStatisticsMapIt->second.find(field->field_name.str);
if (columnStatisticsMapIt == tableStatisticsMapIt->second.end())
{
tableStatisticsMapIt->second[field->field_name.str] = {simpleColumn, {*histogram}};
}
else
{
auto columnStatisticsVec = columnStatisticsMapIt->second.second;
columnStatisticsVec.push_back(*histogram);
}
}
} }
else else
{ {
@@ -6321,43 +6341,43 @@ int processLimitAndOffset(SELECT_LEX& select_lex, gp_walk_info& gwi, SCSEP& csep
// for the first column of the index if any. // for the first column of the index if any.
// Statistics is stored in GWI context. // Statistics is stored in GWI context.
// Mock for ES 10.6 // Mock for ES 10.6
#if MYSQL_VERSION_ID >= 120401 // #if MYSQL_VERSION_ID >= 120401
void extractColumnStatistics(Item_field* ifp, gp_walk_info& gwi) // void extractColumnStatistics(Item_field* ifp, gp_walk_info& gwi)
{ // {
for (uint j = 0; j < ifp->field->table->s->keys; j++) // for (uint j = 0; j < ifp->field->table->s->keys; j++)
{ // {
for (uint i = 0; i < ifp->field->table->s->key_info[j].usable_key_parts; i++) // for (uint i = 0; i < ifp->field->table->s->key_info[j].usable_key_parts; i++)
{ // {
if (ifp->field->table->s->key_info[j].key_part[i].fieldnr == ifp->field->field_index + 1) // if (ifp->field->table->s->key_info[j].key_part[i].fieldnr == ifp->field->field_index + 1)
{ // {
if (i == 0 && ifp->field->read_stats) // if (i == 0 && ifp->field->read_stats)
{ // {
assert(ifp->field->table->s); // assert(ifp->field->table->s);
auto* histogram = dynamic_cast<Histogram_json_hb*>(ifp->field->read_stats->histogram); // auto* histogram = dynamic_cast<Histogram_json_hb*>(ifp->field->read_stats->histogram);
if (histogram) // if (histogram)
{ // {
SchemaAndTableName tableName = {ifp->field->table->s->db.str, // SchemaAndTableName tableName = {ifp->field->table->s->db.str,
ifp->field->table->s->table_name.str}; // ifp->field->table->s->table_name.str};
auto tableStatisticsMapIt = gwi.tableStatisticsMap.find(tableName); // auto tableStatisticsMapIt = gwi.tableStatisticsMap.find(tableName);
if (tableStatisticsMapIt == gwi.tableStatisticsMap.end()) // if (tableStatisticsMapIt == gwi.tableStatisticsMap.end())
{ // {
gwi.tableStatisticsMap.insert({tableName, {{ifp->field->field_name.str, *histogram}}}); // gwi.tableStatisticsMap.insert({tableName, {{ifp->field->field_name.str, *histogram}}});
} // }
else // else
{ // {
tableStatisticsMapIt->second.insert({ifp->field->field_name.str, *histogram}); // tableStatisticsMapIt->second.insert({ifp->field->field_name.str, *histogram});
} // }
} // }
} // }
} // }
} // }
} // }
} // }
#else // #else
void extractColumnStatistics(Item_field* /*ifp*/, gp_walk_info& /*gwi*/) // void extractColumnStatistics(Item_field* /*ifp*/, gp_walk_info& /*gwi*/)
{ // {
} // }
#endif // #endif
/*@brief Process SELECT part of a query or sub-query */ /*@brief Process SELECT part of a query or sub-query */
/*********************************************************** /***********************************************************

View File

@@ -116,7 +116,7 @@ typedef std::map<execplan::CalpontSystemCatalog::TableAliasName, std::pair<int,
typedef std::tr1::unordered_map<TABLE_LIST*, std::vector<COND*>> TableOnExprList; typedef std::tr1::unordered_map<TABLE_LIST*, std::vector<COND*>> TableOnExprList;
typedef std::tr1::unordered_map<TABLE_LIST*, uint> TableOuterJoinMap; typedef std::tr1::unordered_map<TABLE_LIST*, uint> TableOuterJoinMap;
using ColumnName = std::string; using ColumnName = std::string;
using ColumnStatisticsMap = std::unordered_map<ColumnName, Histogram_json_hb>; using ColumnStatisticsMap = std::unordered_map<ColumnName, std::pair<execplan::SimpleColumn, std::vector<Histogram_json_hb>>>;
using TableStatisticsMap = std::unordered_map<SchemaAndTableName, ColumnStatisticsMap, SchemaAndTableNameHash>; using TableStatisticsMap = std::unordered_map<SchemaAndTableName, ColumnStatisticsMap, SchemaAndTableNameHash>;
// This structure is used to store MDB AST -> CSEP translation context. // This structure is used to store MDB AST -> CSEP translation context.

View File

@@ -105,6 +105,7 @@ execplan::ParseTree* filtersWithNewRange(execplan::SCSEP& csep, execplan::Simple
ltOp->resultType(ltOp->operationType()); ltOp->resultType(ltOp->operationType());
auto* sfr = new execplan::SimpleFilter(ltOp, tableKeyColumnLeftOp, filterColLeftOp); auto* sfr = new execplan::SimpleFilter(ltOp, tableKeyColumnLeftOp, filterColLeftOp);
// TODO new
auto tableKeyColumnRightOp = new execplan::SimpleColumn(column); auto tableKeyColumnRightOp = new execplan::SimpleColumn(column);
tableKeyColumnRightOp->resultType(column.resultType()); tableKeyColumnRightOp->resultType(column.resultType());
// TODO hardcoded column type and value // TODO hardcoded column type and value
@@ -114,8 +115,10 @@ execplan::ParseTree* filtersWithNewRange(execplan::SCSEP& csep, execplan::Simple
gtOp->setOpType(filterColRightOp->resultType(), tableKeyColumnRightOp->resultType()); gtOp->setOpType(filterColRightOp->resultType(), tableKeyColumnRightOp->resultType());
gtOp->resultType(gtOp->operationType()); gtOp->resultType(gtOp->operationType());
// TODO new
auto* sfl = new execplan::SimpleFilter(gtOp, tableKeyColumnRightOp, filterColRightOp); auto* sfl = new execplan::SimpleFilter(gtOp, tableKeyColumnRightOp, filterColRightOp);
// TODO new
execplan::ParseTree* ptp = new execplan::ParseTree(new execplan::LogicOperator("and")); execplan::ParseTree* ptp = new execplan::ParseTree(new execplan::LogicOperator("and"));
ptp->right(sfr); ptp->right(sfr);
ptp->left(sfl); ptp->left(sfl);
@@ -169,6 +172,12 @@ execplan::SimpleColumn* findSuitableKeyColumn(execplan::CalpontSelectExecutionPl
return nullptr; return nullptr;
} }
// TBD
Histogram_json_hb& chooseStatisticsToUse(std::vector<Histogram_json_hb>& columnStatisticsVec)
{
return columnStatisticsVec.front();
}
// Populates range bounds based on column statistics // Populates range bounds based on column statistics
// Returns optional with bounds if successful, nullopt otherwise // Returns optional with bounds if successful, nullopt otherwise
template <typename T> template <typename T>
@@ -188,7 +197,8 @@ std::optional<FilterRangeBounds<T>> populateRangeBounds(execplan::SimpleColumn*
return std::nullopt; return std::nullopt;
} }
auto columnStatistics = columnStatisticsIt->second; auto& [simpleColumn, columnStatisticsVec] = columnStatisticsIt->second;
auto& columnStatistics = chooseStatisticsToUse(columnStatisticsVec);
// TODO configurable parallel factor via session variable // TODO configurable parallel factor via session variable
// NB now histogram size is the way to control parallel factor with 16 being the maximum // NB now histogram size is the way to control parallel factor with 16 being the maximum