1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-11-22 20:42:36 +03:00

chore(QA,stats): adding a wrapper class for the statistics.

This commit is contained in:
drrtuy
2025-09-29 20:59:01 +00:00
parent 66b2a8d19c
commit 47008a2a3f
4 changed files with 34 additions and 11 deletions

View File

@@ -5287,7 +5287,7 @@ void extractColumnStatistics(TABLE_LIST* table_ptr, gp_walk_info& gwi)
} }
else else
{ {
auto columnStatisticsVec = columnStatisticsMapIt->second.second; auto& columnStatisticsVec = columnStatisticsMapIt->second.getHistograms();
columnStatisticsVec.push_back(histogram); columnStatisticsVec.push_back(histogram);
} }
} }

View File

@@ -119,9 +119,31 @@ typedef dmlpackage::TableValuesMap TableValuesMap;
typedef std::map<execplan::CalpontSystemCatalog::TableAliasName, std::pair<int, TABLE_LIST*>> TableMap; typedef std::map<execplan::CalpontSystemCatalog::TableAliasName, std::pair<int, TABLE_LIST*>> TableMap;
typedef std::tr1::unordered_map<TABLE_LIST*, std::vector<COND*>> TableOnExprList; typedef std::tr1::unordered_map<TABLE_LIST*, std::vector<COND*>> TableOnExprList;
typedef std::tr1::unordered_map<TABLE_LIST*, uint> TableOuterJoinMap; typedef std::tr1::unordered_map<TABLE_LIST*, uint> TableOuterJoinMap;
struct ColumnStatistics
{
ColumnStatistics(execplan::SimpleColumn column, std::vector<Histogram_json_hb*> histograms)
: column(column), histograms(histograms)
{
}
ColumnStatistics() = default;
execplan::SimpleColumn column;
std::vector<Histogram_json_hb*> histograms;
std::vector<Histogram_json_hb*>& getHistograms()
{
return histograms;
}
execplan::SimpleColumn& getColumn()
{
return column;
}
};
using ColumnName = std::string; using ColumnName = std::string;
using ColumnStatisticsMap = using ColumnStatisticsMap = std::unordered_map<ColumnName, ColumnStatistics>;
std::unordered_map<ColumnName, std::pair<execplan::SimpleColumn, std::vector<Histogram_json_hb*>>>;
using TableStatisticsMap = using TableStatisticsMap =
std::unordered_map<SchemaAndTableName, ColumnStatisticsMap, SchemaAndTableNameHash>; std::unordered_map<SchemaAndTableName, ColumnStatisticsMap, SchemaAndTableNameHash>;

View File

@@ -238,11 +238,12 @@ std::optional<std::pair<execplan::SimpleColumn&, Histogram_json_hb*>> chooseKeyC
} }
// TODO take some column and some stats for it!!! // TODO take some column and some stats for it!!!
for (auto& [columnName, scAndStatisticsVec] : tableColumnsStatisticsIt->second) for (auto& [columnName, columnStatistics] : tableColumnsStatisticsIt->second)
{ {
auto& [sc, columnStatisticsVec] = scAndStatisticsVec; auto& sc = columnStatistics.getColumn();
auto* columnStatistics = chooseStatisticsToUse(columnStatisticsVec); auto& columnStatisticsVec = columnStatistics.getHistograms();
return {{sc, columnStatistics}}; auto* bestColumnStatistics = chooseStatisticsToUse(columnStatisticsVec);
return {{sc, bestColumnStatistics}};
} }
return std::nullopt; return std::nullopt;
@@ -262,7 +263,7 @@ bool parallelCESFilter(execplan::CalpontSelectExecutionPlan& csep, optimizer::RB
// Returns optional with bounds if successful, nullopt otherwise // Returns optional with bounds if successful, nullopt otherwise
template <typename T> template <typename T>
std::optional<details::FilterRangeBounds<T>> populateRangeBounds(Histogram_json_hb* columnStatistics, std::optional<details::FilterRangeBounds<T>> populateRangeBounds(Histogram_json_hb* columnStatistics,
optimizer::RBOptimizerContext& ctx) size_t& maxParallelFactor)
{ {
details::FilterRangeBounds<T> bounds; details::FilterRangeBounds<T> bounds;
@@ -280,7 +281,6 @@ std::optional<details::FilterRangeBounds<T>> populateRangeBounds(Histogram_json_
}; };
// Get parallel factor from context // Get parallel factor from context
size_t maxParallelFactor = ctx.getCesOptimizationParallelFactor();
size_t numberOfUnionUnits = std::min(columnStatistics->get_json_histogram().size(), maxParallelFactor); size_t numberOfUnionUnits = std::min(columnStatistics->get_json_histogram().size(), maxParallelFactor);
size_t numberOfBucketsPerUnionUnit = columnStatistics->get_json_histogram().size() / numberOfUnionUnits; size_t numberOfBucketsPerUnionUnit = columnStatistics->get_json_histogram().size() / numberOfUnionUnits;
@@ -343,7 +343,8 @@ execplan::CalpontSelectExecutionPlan::SelectList makeUnionFromTable(
std::cout << "makeUnionFromTable RC front " << csep.returnedCols().front()->toString() << std::endl; std::cout << "makeUnionFromTable RC front " << csep.returnedCols().front()->toString() << std::endl;
// TODO char and other numerical types support // TODO char and other numerical types support
auto boundsOpt = populateRangeBounds<uint64_t>(columnStatistics, ctx); size_t configuredMaxParallelFactor = ctx.getCesOptimizationParallelFactor();
auto boundsOpt = populateRangeBounds<uint64_t>(columnStatistics, configuredMaxParallelFactor);
if (!boundsOpt.has_value()) if (!boundsOpt.has_value())
{ {
return unionVec; return unionVec;

View File

@@ -145,4 +145,4 @@ bool optimizeCSEP(execplan::CalpontSelectExecutionPlan& root, RBOptimizerContext
std::string getRewrittenSubTableAlias(const execplan::CalpontSystemCatalog::TableAliasName& table, std::string getRewrittenSubTableAlias(const execplan::CalpontSystemCatalog::TableAliasName& table,
const RBOptimizerContext& ctx); const RBOptimizerContext& ctx);
} } // namespace optimizer