1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-08-08 14:22:09 +03:00

feat(rbo,rules): refactored statistics storage in gwi and implemented statistics based UNION rewrite.

This commit is contained in:
drrtuy
2025-07-17 15:37:20 +00:00
parent 67295b4320
commit 15be33fbc5
6 changed files with 122 additions and 46 deletions

View File

@@ -96,13 +96,29 @@ enum ClauseType
ORDER_BY
};
struct SchemaAndTableName {
std::string schema;
std::string table;
bool operator==(const SchemaAndTableName& other) const {
return schema == other.schema && table == other.table;
}
};
struct SchemaAndTableNameHash {
std::size_t operator()(const SchemaAndTableName& k) const {
return std::hash<std::string>()(k.schema + k.table);
}
};
typedef std::vector<JoinInfo> JoinInfoVec;
typedef dmlpackage::ColValuesList ColValuesList;
typedef dmlpackage::TableValuesMap TableValuesMap;
typedef std::map<execplan::CalpontSystemCatalog::TableAliasName, std::pair<int, TABLE_LIST*>> TableMap;
typedef std::tr1::unordered_map<TABLE_LIST*, std::vector<COND*>> TableOnExprList;
typedef std::tr1::unordered_map<TABLE_LIST*, uint> TableOuterJoinMap;
using ColumnStatisticsMap = std::unordered_map<std::string, std::vector<Histogram_bucket>>;
using ColumnName = std::string;
using ColumnStatisticsMap = std::unordered_map<ColumnName, Histogram_json_hb>;
using TableStatisticsMap = std::unordered_map<SchemaAndTableName, ColumnStatisticsMap, SchemaAndTableNameHash>;
struct gp_walk_info
{
@@ -112,7 +128,7 @@ struct gp_walk_info
execplan::CalpontSelectExecutionPlan::ReturnedColumnList orderByCols;
std::vector<Item*> extSelAggColsItems;
execplan::CalpontSelectExecutionPlan::ColumnMap columnMap;
std::unordered_map<std::string, std::vector<Histogram_bucket>> columnStatisticsMap;
TableStatisticsMap tableStatisticsMap;
// This vector temporarily hold the projection columns to be added
// to the returnedCols vector for subquery processing. It will be appended
// to the end of returnedCols when the processing is finished.
@@ -203,7 +219,8 @@ struct gp_walk_info
SubQuery** subQueriesChain;
gp_walk_info(long timeZone_, SubQuery** subQueriesChain_)
: sessionid(0)
: tableStatisticsMap({})
, sessionid(0)
, fatalParseError(false)
, condPush(false)
, dropCond(false)
@@ -234,7 +251,8 @@ struct gp_walk_info
}
~gp_walk_info();
void mergeColumnStatisticsMap(const std::unordered_map<std::string, std::vector<Histogram_bucket>>& columnStatisticsMap);
void mergeTableStatistics(const TableStatisticsMap& tableStatisticsMap);
std::optional<ColumnStatisticsMap> findStatisticsForATable(SchemaAndTableName& schemaAndTableName);
};
struct SubQueryChainHolder;