1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-11-22 20:42:36 +03:00

chore(QA,stats): adding a wrapper class for the statistics.

This commit is contained in:
drrtuy
2025-09-29 20:59:01 +00:00
parent 66b2a8d19c
commit 47008a2a3f
4 changed files with 34 additions and 11 deletions

View File

@@ -5287,7 +5287,7 @@ void extractColumnStatistics(TABLE_LIST* table_ptr, gp_walk_info& gwi)
}
else
{
auto columnStatisticsVec = columnStatisticsMapIt->second.second;
auto& columnStatisticsVec = columnStatisticsMapIt->second.getHistograms();
columnStatisticsVec.push_back(histogram);
}
}

View File

@@ -119,9 +119,31 @@ typedef dmlpackage::TableValuesMap TableValuesMap;
typedef std::map<execplan::CalpontSystemCatalog::TableAliasName, std::pair<int, TABLE_LIST*>> TableMap;
typedef std::tr1::unordered_map<TABLE_LIST*, std::vector<COND*>> TableOnExprList;
typedef std::tr1::unordered_map<TABLE_LIST*, uint> TableOuterJoinMap;
struct ColumnStatistics
{
ColumnStatistics(execplan::SimpleColumn column, std::vector<Histogram_json_hb*> histograms)
: column(column), histograms(histograms)
{
}
ColumnStatistics() = default;
execplan::SimpleColumn column;
std::vector<Histogram_json_hb*> histograms;
std::vector<Histogram_json_hb*>& getHistograms()
{
return histograms;
}
execplan::SimpleColumn& getColumn()
{
return column;
}
};
using ColumnName = std::string;
using ColumnStatisticsMap =
std::unordered_map<ColumnName, std::pair<execplan::SimpleColumn, std::vector<Histogram_json_hb*>>>;
using ColumnStatisticsMap = std::unordered_map<ColumnName, ColumnStatistics>;
using TableStatisticsMap =
std::unordered_map<SchemaAndTableName, ColumnStatisticsMap, SchemaAndTableNameHash>;

View File

@@ -238,11 +238,12 @@ std::optional<std::pair<execplan::SimpleColumn&, Histogram_json_hb*>> chooseKeyC
}
// TODO take some column and some stats for it!!!
for (auto& [columnName, scAndStatisticsVec] : tableColumnsStatisticsIt->second)
for (auto& [columnName, columnStatistics] : tableColumnsStatisticsIt->second)
{
auto& [sc, columnStatisticsVec] = scAndStatisticsVec;
auto* columnStatistics = chooseStatisticsToUse(columnStatisticsVec);
return {{sc, columnStatistics}};
auto& sc = columnStatistics.getColumn();
auto& columnStatisticsVec = columnStatistics.getHistograms();
auto* bestColumnStatistics = chooseStatisticsToUse(columnStatisticsVec);
return {{sc, bestColumnStatistics}};
}
return std::nullopt;
@@ -262,7 +263,7 @@ bool parallelCESFilter(execplan::CalpontSelectExecutionPlan& csep, optimizer::RB
// Returns optional with bounds if successful, nullopt otherwise
template <typename T>
std::optional<details::FilterRangeBounds<T>> populateRangeBounds(Histogram_json_hb* columnStatistics,
optimizer::RBOptimizerContext& ctx)
size_t& maxParallelFactor)
{
details::FilterRangeBounds<T> bounds;
@@ -280,7 +281,6 @@ std::optional<details::FilterRangeBounds<T>> populateRangeBounds(Histogram_json_
};
// Get parallel factor from context
size_t maxParallelFactor = ctx.getCesOptimizationParallelFactor();
size_t numberOfUnionUnits = std::min(columnStatistics->get_json_histogram().size(), maxParallelFactor);
size_t numberOfBucketsPerUnionUnit = columnStatistics->get_json_histogram().size() / numberOfUnionUnits;
@@ -343,7 +343,8 @@ execplan::CalpontSelectExecutionPlan::SelectList makeUnionFromTable(
std::cout << "makeUnionFromTable RC front " << csep.returnedCols().front()->toString() << std::endl;
// TODO char and other numerical types support
auto boundsOpt = populateRangeBounds<uint64_t>(columnStatistics, ctx);
size_t configuredMaxParallelFactor = ctx.getCesOptimizationParallelFactor();
auto boundsOpt = populateRangeBounds<uint64_t>(columnStatistics, configuredMaxParallelFactor);
if (!boundsOpt.has_value())
{
return unionVec;

View File

@@ -145,4 +145,4 @@ bool optimizeCSEP(execplan::CalpontSelectExecutionPlan& root, RBOptimizerContext
std::string getRewrittenSubTableAlias(const execplan::CalpontSystemCatalog::TableAliasName& table,
const RBOptimizerContext& ctx);
}
} // namespace optimizer