1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-11-21 09:20:51 +03:00

chore(QA,plugin): moving statistics code to prepare for PRIMARY KEY support in QA.

This commit is contained in:
drrtuy
2025-10-07 21:56:46 +00:00
parent 47008a2a3f
commit fb98e46bfc
10 changed files with 100 additions and 78 deletions

View File

@@ -128,7 +128,7 @@ execplan::ParseTree* ExistsSub::transform()
} }
// Insert column statistics // Insert column statistics
fGwip.mergeTableStatistics(gwi.tableStatisticsMap); fGwip.mergeTableStatistics(gwi.tableStatistics);
// remove outer query tables // remove outer query tables
CalpontSelectExecutionPlan::TableList tblist; CalpontSelectExecutionPlan::TableList tblist;

View File

@@ -237,7 +237,7 @@ SCSEP FromSubQuery::transform()
} }
// Insert column statistics // Insert column statistics
fGwip.mergeTableStatistics(gwi.tableStatisticsMap); fGwip.mergeTableStatistics(gwi.tableStatistics);
fGwip.subselectList.push_back(csep); fGwip.subselectList.push_back(csep);
return csep; return csep;

View File

@@ -195,7 +195,7 @@ execplan::ParseTree* InSub::transform()
} }
// Insert column statistics // Insert column statistics
fGwip.mergeTableStatistics(gwi.tableStatisticsMap); fGwip.mergeTableStatistics(gwi.tableStatistics);
// remove outer query tables // remove outer query tables
CalpontSelectExecutionPlan::TableList tblist; CalpontSelectExecutionPlan::TableList tblist;

View File

@@ -5267,31 +5267,11 @@ void extractColumnStatistics(TABLE_LIST* table_ptr, gp_walk_info& gwi)
Field* field = table_ptr->table->key_info[j].key_part[0].field; Field* field = table_ptr->table->key_info[j].key_part[0].field;
if (field->read_stats) if (field->read_stats)
{ {
auto* histogram = dynamic_cast<Histogram_json_hb*>(field->read_stats->histogram); SchemaAndTableName tableName = {field->table->s->db.str, field->table->s->table_name.str};
if (histogram) auto sc =
{ std::unique_ptr<execplan::SimpleColumn>(buildSimpleColumnFromFieldForStatistics(field, gwi));
SchemaAndTableName tableName = {field->table->s->db.str, field->table->s->table_name.str}; assert(field->field_name.str);
auto sc = gwi.tableStatistics.createOrUpdate(tableName, field->field_name.str, *sc, field->read_stats);
std::unique_ptr<execplan::SimpleColumn>(buildSimpleColumnFromFieldForStatistics(field, gwi));
auto tableStatisticsMapIt = gwi.tableStatisticsMap.find(tableName);
if (tableStatisticsMapIt == gwi.tableStatisticsMap.end())
{
gwi.tableStatisticsMap[tableName][field->field_name.str] = {*sc, {histogram}};
}
else
{
auto columnStatisticsMapIt = tableStatisticsMapIt->second.find(field->field_name.str);
if (columnStatisticsMapIt == tableStatisticsMapIt->second.end())
{
tableStatisticsMapIt->second[field->field_name.str] = {*sc, {histogram}};
}
else
{
auto& columnStatisticsVec = columnStatisticsMapIt->second.getHistograms();
columnStatisticsVec.push_back(histogram);
}
}
}
} }
} }
} }

View File

@@ -98,7 +98,6 @@ using namespace execplan;
using namespace joblist; using namespace joblist;
#include "errorcodes.h" #include "errorcodes.h"
#include "idberrorinfo.h" #include "idberrorinfo.h"
#include "errorids.h" #include "errorids.h"
@@ -122,23 +121,9 @@ namespace cal_impl_if
{ {
extern bool nonConstFunc(Item_func* ifp); extern bool nonConstFunc(Item_func* ifp);
void gp_walk_info::mergeTableStatistics(const TableStatisticsMap& aTableStatisticsMap) void gp_walk_info::mergeTableStatistics(const TableStatistics& aTableStatistics)
{ {
for (auto& [schemaAndTableName, aColumnStatisticsMap] : aTableStatisticsMap) return tableStatistics.mergeTableStatistics(aTableStatistics);
{
auto tableStatisticsMapIt = tableStatisticsMap.find(schemaAndTableName);
if (tableStatisticsMapIt == tableStatisticsMap.end())
{
tableStatisticsMap[schemaAndTableName] = aColumnStatisticsMap;
}
else
{
for (auto& [columnName, histogram] : aColumnStatisticsMap)
{
tableStatisticsMapIt->second[columnName] = histogram;
}
}
}
} }
} // namespace cal_impl_if } // namespace cal_impl_if

View File

@@ -122,15 +122,12 @@ typedef std::tr1::unordered_map<TABLE_LIST*, uint> TableOuterJoinMap;
struct ColumnStatistics struct ColumnStatistics
{ {
ColumnStatistics(execplan::SimpleColumn column, std::vector<Histogram_json_hb*> histograms) ColumnStatistics(execplan::SimpleColumn& column, std::vector<Histogram_json_hb*> histograms)
: column(column), histograms(histograms) : column(column), histograms(histograms)
{ {
} }
ColumnStatistics() = default; ColumnStatistics() = default;
execplan::SimpleColumn column;
std::vector<Histogram_json_hb*> histograms;
std::vector<Histogram_json_hb*>& getHistograms() std::vector<Histogram_json_hb*>& getHistograms()
{ {
return histograms; return histograms;
@@ -140,13 +137,82 @@ struct ColumnStatistics
{ {
return column; return column;
} }
execplan::SimpleColumn column;
std::vector<Histogram_json_hb*> histograms;
Field* min{nullptr};
Field* max{nullptr};
}; };
using ColumnName = std::string; using ColumnName = std::string;
using MDBColumnStatistics = Column_statistics;
using ColumnStatisticsMap = std::unordered_map<ColumnName, ColumnStatistics>; using ColumnStatisticsMap = std::unordered_map<ColumnName, ColumnStatistics>;
using TableStatisticsMap = using TableStatisticsMap =
std::unordered_map<SchemaAndTableName, ColumnStatisticsMap, SchemaAndTableNameHash>; std::unordered_map<SchemaAndTableName, ColumnStatisticsMap, SchemaAndTableNameHash>;
struct TableStatistics
{
TableStatistics() = default;
void createOrUpdate(SchemaAndTableName tableName, const char* fieldName, execplan::SimpleColumn& sc,
MDBColumnStatistics* statistics)
{
auto* histogram = dynamic_cast<Histogram_json_hb*>(statistics->histogram);
auto tableStatisticsIt = tableStatistics_.find(tableName);
if (tableStatisticsIt == tableStatistics_.end())
{
tableStatistics_[tableName][fieldName] = {sc, {histogram}};
}
else
{
auto columnStatisticsMapIt = tableStatisticsIt->second.find(fieldName);
if (columnStatisticsMapIt == tableStatisticsIt->second.end())
{
tableStatisticsIt->second[fieldName] = {sc, {histogram}};
}
else
{
auto& columnStatisticsVec = columnStatisticsMapIt->second.getHistograms();
columnStatisticsVec.push_back(histogram);
}
}
}
std::optional<ColumnStatisticsMap> findStatisticsForATable(SchemaAndTableName& schemaAndTableName)
{
auto tableStatisticsIt = tableStatistics_.find(schemaAndTableName);
if (tableStatisticsIt == tableStatistics_.end())
{
return std::nullopt;
}
return {tableStatisticsIt->second};
}
void mergeTableStatistics(const TableStatistics& aTableStatistics)
{
for (auto& [schemaAndTableName, aColumnStatisticsMap] : aTableStatistics.tableStatistics_)
{
auto tableStatisticsIt = tableStatistics_.find(schemaAndTableName);
if (tableStatisticsIt == tableStatistics_.end())
{
tableStatistics_[schemaAndTableName] = aColumnStatisticsMap;
}
else
{
for (auto& [columnName, histogram] : aColumnStatisticsMap)
{
tableStatisticsIt->second[columnName] = histogram;
}
}
}
}
TableStatisticsMap tableStatistics_;
};
// This structure is used to store MDB AST -> CSEP translation context. // This structure is used to store MDB AST -> CSEP translation context.
// There is a column statistics for some columns in a query. // There is a column statistics for some columns in a query.
// As per 23.10.5 "some" means first column of the index in projection list of CSEP // As per 23.10.5 "some" means first column of the index in projection list of CSEP
@@ -161,7 +227,7 @@ struct gp_walk_info
execplan::CalpontSelectExecutionPlan::ReturnedColumnList orderByCols; execplan::CalpontSelectExecutionPlan::ReturnedColumnList orderByCols;
std::vector<Item*> extSelAggColsItems; std::vector<Item*> extSelAggColsItems;
execplan::CalpontSelectExecutionPlan::ColumnMap columnMap; execplan::CalpontSelectExecutionPlan::ColumnMap columnMap;
TableStatisticsMap tableStatisticsMap; TableStatistics tableStatistics;
// This vector temporarily hold the projection columns to be added // This vector temporarily hold the projection columns to be added
// to the returnedCols vector for subquery processing. It will be appended // to the returnedCols vector for subquery processing. It will be appended
// to the end of returnedCols when the processing is finished. // to the end of returnedCols when the processing is finished.
@@ -252,7 +318,7 @@ struct gp_walk_info
SubQuery** subQueriesChain; SubQuery** subQueriesChain;
gp_walk_info(long timeZone_, SubQuery** subQueriesChain_) gp_walk_info(long timeZone_, SubQuery** subQueriesChain_)
: tableStatisticsMap({}) : tableStatistics({})
, sessionid(0) , sessionid(0)
, fatalParseError(false) , fatalParseError(false)
, condPush(false) , condPush(false)
@@ -284,17 +350,10 @@ struct gp_walk_info
} }
~gp_walk_info(); ~gp_walk_info();
void mergeTableStatistics(const TableStatisticsMap& tableStatisticsMap); void mergeTableStatistics(const TableStatistics& tableStatistics);
std::optional<ColumnStatisticsMap> findStatisticsForATable(SchemaAndTableName& schemaAndTableName) std::optional<ColumnStatisticsMap> findStatisticsForATable(SchemaAndTableName& schemaAndTableName)
{ {
auto tableStatisticsMapIt = tableStatisticsMap.find(schemaAndTableName); return tableStatistics.findStatisticsForATable(schemaAndTableName);
if (tableStatisticsMapIt == tableStatisticsMap.end())
{
return std::nullopt;
}
return {tableStatisticsMapIt->second};
} }
}; };

View File

@@ -277,7 +277,7 @@ execplan::ParseTree* ScalarSub::buildParseTree(PredicateOperator* op)
} }
// Insert column statistics // Insert column statistics
fGwip.mergeTableStatistics(gwi.tableStatisticsMap); fGwip.mergeTableStatistics(gwi.tableStatistics);
fGwip.subselectList.push_back(csep); fGwip.subselectList.push_back(csep);

View File

@@ -97,7 +97,7 @@ SCSEP SelectSubQuery::transform()
} }
// Insert column statistics // Insert column statistics
fGwip.mergeTableStatistics(gwi.tableStatisticsMap); fGwip.mergeTableStatistics(gwi.tableStatistics);
// Insert subselect CSEP // Insert subselect CSEP
fGwip.subselectList.push_back(csep); fGwip.subselectList.push_back(csep);

View File

@@ -73,14 +73,13 @@ bool someAreForeignTables(execplan::CalpontSelectExecutionPlan& csep)
bool someForeignTablesHasStatisticsAndMbIndex(execplan::CalpontSelectExecutionPlan& csep, bool someForeignTablesHasStatisticsAndMbIndex(execplan::CalpontSelectExecutionPlan& csep,
optimizer::RBOptimizerContext& ctx) optimizer::RBOptimizerContext& ctx)
{ {
return std::any_of( return std::any_of(csep.tableList().begin(), csep.tableList().end(),
csep.tableList().begin(), csep.tableList().end(), [&ctx](const auto& table)
[&ctx](const auto& table) {
{ cal_impl_if::SchemaAndTableName schemaAndTableName = {table.schema, table.table};
cal_impl_if::SchemaAndTableName schemaAndTableName = {table.schema, table.table}; return (!table.isColumnstore() &&
return (!table.isColumnstore() && ctx.getGwi().tableStatisticsMap.find(schemaAndTableName) != ctx.getGwi().tableStatistics.findStatisticsForATable(schemaAndTableName));
ctx.getGwi().tableStatisticsMap.end()); });
});
} }
// This routine produces a new ParseTree that is AND(lowerBand <= column, column <= upperBand) // This routine produces a new ParseTree that is AND(lowerBand <= column, column <= upperBand)
@@ -230,15 +229,14 @@ std::optional<std::pair<execplan::SimpleColumn&, Histogram_json_hb*>> chooseKeyC
{ {
cal_impl_if::SchemaAndTableName schemaAndTableName = {targetTable.schema, targetTable.table}; cal_impl_if::SchemaAndTableName schemaAndTableName = {targetTable.schema, targetTable.table};
auto tableColumnsStatisticsIt = ctx.getGwi().tableStatisticsMap.find(schemaAndTableName); auto tableColumnsStatistics = ctx.getGwi().tableStatistics.findStatisticsForATable(schemaAndTableName);
if (tableColumnsStatisticsIt == ctx.getGwi().tableStatisticsMap.end() || if (!tableColumnsStatistics)
tableColumnsStatisticsIt->second.empty())
{ {
return std::nullopt; return std::nullopt;
} }
// TODO take some column and some stats for it!!! // TODO take some column and some stats for it!!!
for (auto& [columnName, columnStatistics] : tableColumnsStatisticsIt->second) for (auto& [columnName, columnStatistics] : tableColumnsStatistics.value())
{ {
auto& sc = columnStatistics.getColumn(); auto& sc = columnStatistics.getColumn();
auto& columnStatisticsVec = columnStatistics.getHistograms(); auto& columnStatisticsVec = columnStatistics.getHistograms();

View File

@@ -85,14 +85,14 @@ class RBOHybridTest : public ::testing::Test
cal_impl_if::SchemaAndTableName, cal_impl_if::SchemaAndTableName,
std::map<std::string, std::pair<execplan::SimpleColumn, std::vector<Histogram_json_hb*>>>, std::map<std::string, std::pair<execplan::SimpleColumn, std::vector<Histogram_json_hb*>>>,
cal_impl_if::SchemaAndTableNameHash> cal_impl_if::SchemaAndTableNameHash>
tableStatisticsMap; tableStatistics;
// Helper method to find statistics for a table // Helper method to find statistics for a table
std::map<std::string, std::pair<execplan::SimpleColumn, std::vector<Histogram_json_hb*>>>* std::map<std::string, std::pair<execplan::SimpleColumn, std::vector<Histogram_json_hb*>>>*
findStatisticsForATable(const cal_impl_if::SchemaAndTableName& schemaAndTable) findStatisticsForATable(const cal_impl_if::SchemaAndTableName& schemaAndTable)
{ {
auto it = tableStatisticsMap.find(schemaAndTable); auto it = tableStatistics.find(schemaAndTable);
return (it != tableStatisticsMap.end()) ? &(it->second) : nullptr; return (it != tableStatistics.end()) ? &(it->second) : nullptr;
} }
}; };
@@ -116,7 +116,7 @@ class RBOHybridTest : public ::testing::Test
cal_impl_if::SchemaAndTableName schemaAndTable = {schema, table}; cal_impl_if::SchemaAndTableName schemaAndTable = {schema, table};
execplan::SimpleColumn simpleCol; // Mock column execplan::SimpleColumn simpleCol; // Mock column
std::vector<Histogram_json_hb*> histograms = {histogram}; std::vector<Histogram_json_hb*> histograms = {histogram};
mockGWI.tableStatisticsMap[schemaAndTable][column] = std::make_pair(simpleCol, histograms); mockGWI.tableStatistics[schemaAndTable][column] = std::make_pair(simpleCol, histograms);
} }
// Get the mock gateway info for testing helper functions // Get the mock gateway info for testing helper functions