You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-11-21 09:20:51 +03:00
chore(QA,plugin): moving statistics code to prepare for PRIMARY KEY support in QA.
This commit is contained in:
@@ -128,7 +128,7 @@ execplan::ParseTree* ExistsSub::transform()
|
||||
}
|
||||
|
||||
// Insert column statistics
|
||||
fGwip.mergeTableStatistics(gwi.tableStatisticsMap);
|
||||
fGwip.mergeTableStatistics(gwi.tableStatistics);
|
||||
|
||||
// remove outer query tables
|
||||
CalpontSelectExecutionPlan::TableList tblist;
|
||||
|
||||
@@ -237,7 +237,7 @@ SCSEP FromSubQuery::transform()
|
||||
}
|
||||
|
||||
// Insert column statistics
|
||||
fGwip.mergeTableStatistics(gwi.tableStatisticsMap);
|
||||
fGwip.mergeTableStatistics(gwi.tableStatistics);
|
||||
|
||||
fGwip.subselectList.push_back(csep);
|
||||
return csep;
|
||||
|
||||
@@ -195,7 +195,7 @@ execplan::ParseTree* InSub::transform()
|
||||
}
|
||||
|
||||
// Insert column statistics
|
||||
fGwip.mergeTableStatistics(gwi.tableStatisticsMap);
|
||||
fGwip.mergeTableStatistics(gwi.tableStatistics);
|
||||
|
||||
// remove outer query tables
|
||||
CalpontSelectExecutionPlan::TableList tblist;
|
||||
|
||||
@@ -5267,31 +5267,11 @@ void extractColumnStatistics(TABLE_LIST* table_ptr, gp_walk_info& gwi)
|
||||
Field* field = table_ptr->table->key_info[j].key_part[0].field;
|
||||
if (field->read_stats)
|
||||
{
|
||||
auto* histogram = dynamic_cast<Histogram_json_hb*>(field->read_stats->histogram);
|
||||
if (histogram)
|
||||
{
|
||||
SchemaAndTableName tableName = {field->table->s->db.str, field->table->s->table_name.str};
|
||||
auto sc =
|
||||
std::unique_ptr<execplan::SimpleColumn>(buildSimpleColumnFromFieldForStatistics(field, gwi));
|
||||
auto tableStatisticsMapIt = gwi.tableStatisticsMap.find(tableName);
|
||||
if (tableStatisticsMapIt == gwi.tableStatisticsMap.end())
|
||||
{
|
||||
gwi.tableStatisticsMap[tableName][field->field_name.str] = {*sc, {histogram}};
|
||||
}
|
||||
else
|
||||
{
|
||||
auto columnStatisticsMapIt = tableStatisticsMapIt->second.find(field->field_name.str);
|
||||
if (columnStatisticsMapIt == tableStatisticsMapIt->second.end())
|
||||
{
|
||||
tableStatisticsMapIt->second[field->field_name.str] = {*sc, {histogram}};
|
||||
}
|
||||
else
|
||||
{
|
||||
auto& columnStatisticsVec = columnStatisticsMapIt->second.getHistograms();
|
||||
columnStatisticsVec.push_back(histogram);
|
||||
}
|
||||
}
|
||||
}
|
||||
SchemaAndTableName tableName = {field->table->s->db.str, field->table->s->table_name.str};
|
||||
auto sc =
|
||||
std::unique_ptr<execplan::SimpleColumn>(buildSimpleColumnFromFieldForStatistics(field, gwi));
|
||||
assert(field->field_name.str);
|
||||
gwi.tableStatistics.createOrUpdate(tableName, field->field_name.str, *sc, field->read_stats);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -98,7 +98,6 @@ using namespace execplan;
|
||||
|
||||
using namespace joblist;
|
||||
|
||||
|
||||
#include "errorcodes.h"
|
||||
#include "idberrorinfo.h"
|
||||
#include "errorids.h"
|
||||
@@ -122,23 +121,9 @@ namespace cal_impl_if
|
||||
{
|
||||
extern bool nonConstFunc(Item_func* ifp);
|
||||
|
||||
void gp_walk_info::mergeTableStatistics(const TableStatisticsMap& aTableStatisticsMap)
|
||||
void gp_walk_info::mergeTableStatistics(const TableStatistics& aTableStatistics)
|
||||
{
|
||||
for (auto& [schemaAndTableName, aColumnStatisticsMap] : aTableStatisticsMap)
|
||||
{
|
||||
auto tableStatisticsMapIt = tableStatisticsMap.find(schemaAndTableName);
|
||||
if (tableStatisticsMapIt == tableStatisticsMap.end())
|
||||
{
|
||||
tableStatisticsMap[schemaAndTableName] = aColumnStatisticsMap;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (auto& [columnName, histogram] : aColumnStatisticsMap)
|
||||
{
|
||||
tableStatisticsMapIt->second[columnName] = histogram;
|
||||
}
|
||||
}
|
||||
}
|
||||
return tableStatistics.mergeTableStatistics(aTableStatistics);
|
||||
}
|
||||
|
||||
} // namespace cal_impl_if
|
||||
|
||||
@@ -122,15 +122,12 @@ typedef std::tr1::unordered_map<TABLE_LIST*, uint> TableOuterJoinMap;
|
||||
|
||||
struct ColumnStatistics
|
||||
{
|
||||
ColumnStatistics(execplan::SimpleColumn column, std::vector<Histogram_json_hb*> histograms)
|
||||
ColumnStatistics(execplan::SimpleColumn& column, std::vector<Histogram_json_hb*> histograms)
|
||||
: column(column), histograms(histograms)
|
||||
{
|
||||
}
|
||||
ColumnStatistics() = default;
|
||||
|
||||
execplan::SimpleColumn column;
|
||||
std::vector<Histogram_json_hb*> histograms;
|
||||
|
||||
std::vector<Histogram_json_hb*>& getHistograms()
|
||||
{
|
||||
return histograms;
|
||||
@@ -140,13 +137,82 @@ struct ColumnStatistics
|
||||
{
|
||||
return column;
|
||||
}
|
||||
|
||||
execplan::SimpleColumn column;
|
||||
std::vector<Histogram_json_hb*> histograms;
|
||||
Field* min{nullptr};
|
||||
Field* max{nullptr};
|
||||
};
|
||||
|
||||
using ColumnName = std::string;
|
||||
using MDBColumnStatistics = Column_statistics;
|
||||
using ColumnStatisticsMap = std::unordered_map<ColumnName, ColumnStatistics>;
|
||||
using TableStatisticsMap =
|
||||
std::unordered_map<SchemaAndTableName, ColumnStatisticsMap, SchemaAndTableNameHash>;
|
||||
|
||||
struct TableStatistics
|
||||
{
|
||||
TableStatistics() = default;
|
||||
|
||||
void createOrUpdate(SchemaAndTableName tableName, const char* fieldName, execplan::SimpleColumn& sc,
|
||||
MDBColumnStatistics* statistics)
|
||||
{
|
||||
auto* histogram = dynamic_cast<Histogram_json_hb*>(statistics->histogram);
|
||||
|
||||
auto tableStatisticsIt = tableStatistics_.find(tableName);
|
||||
if (tableStatisticsIt == tableStatistics_.end())
|
||||
{
|
||||
tableStatistics_[tableName][fieldName] = {sc, {histogram}};
|
||||
}
|
||||
else
|
||||
{
|
||||
auto columnStatisticsMapIt = tableStatisticsIt->second.find(fieldName);
|
||||
if (columnStatisticsMapIt == tableStatisticsIt->second.end())
|
||||
{
|
||||
tableStatisticsIt->second[fieldName] = {sc, {histogram}};
|
||||
}
|
||||
else
|
||||
{
|
||||
auto& columnStatisticsVec = columnStatisticsMapIt->second.getHistograms();
|
||||
columnStatisticsVec.push_back(histogram);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::optional<ColumnStatisticsMap> findStatisticsForATable(SchemaAndTableName& schemaAndTableName)
|
||||
{
|
||||
auto tableStatisticsIt = tableStatistics_.find(schemaAndTableName);
|
||||
|
||||
if (tableStatisticsIt == tableStatistics_.end())
|
||||
{
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
return {tableStatisticsIt->second};
|
||||
}
|
||||
|
||||
void mergeTableStatistics(const TableStatistics& aTableStatistics)
|
||||
{
|
||||
for (auto& [schemaAndTableName, aColumnStatisticsMap] : aTableStatistics.tableStatistics_)
|
||||
{
|
||||
auto tableStatisticsIt = tableStatistics_.find(schemaAndTableName);
|
||||
if (tableStatisticsIt == tableStatistics_.end())
|
||||
{
|
||||
tableStatistics_[schemaAndTableName] = aColumnStatisticsMap;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (auto& [columnName, histogram] : aColumnStatisticsMap)
|
||||
{
|
||||
tableStatisticsIt->second[columnName] = histogram;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TableStatisticsMap tableStatistics_;
|
||||
};
|
||||
|
||||
// This structure is used to store MDB AST -> CSEP translation context.
|
||||
// There is a column statistics for some columns in a query.
|
||||
// As per 23.10.5 "some" means first column of the index in projection list of CSEP
|
||||
@@ -161,7 +227,7 @@ struct gp_walk_info
|
||||
execplan::CalpontSelectExecutionPlan::ReturnedColumnList orderByCols;
|
||||
std::vector<Item*> extSelAggColsItems;
|
||||
execplan::CalpontSelectExecutionPlan::ColumnMap columnMap;
|
||||
TableStatisticsMap tableStatisticsMap;
|
||||
TableStatistics tableStatistics;
|
||||
// This vector temporarily hold the projection columns to be added
|
||||
// to the returnedCols vector for subquery processing. It will be appended
|
||||
// to the end of returnedCols when the processing is finished.
|
||||
@@ -252,7 +318,7 @@ struct gp_walk_info
|
||||
SubQuery** subQueriesChain;
|
||||
|
||||
gp_walk_info(long timeZone_, SubQuery** subQueriesChain_)
|
||||
: tableStatisticsMap({})
|
||||
: tableStatistics({})
|
||||
, sessionid(0)
|
||||
, fatalParseError(false)
|
||||
, condPush(false)
|
||||
@@ -284,17 +350,10 @@ struct gp_walk_info
|
||||
}
|
||||
~gp_walk_info();
|
||||
|
||||
void mergeTableStatistics(const TableStatisticsMap& tableStatisticsMap);
|
||||
void mergeTableStatistics(const TableStatistics& tableStatistics);
|
||||
std::optional<ColumnStatisticsMap> findStatisticsForATable(SchemaAndTableName& schemaAndTableName)
|
||||
{
|
||||
auto tableStatisticsMapIt = tableStatisticsMap.find(schemaAndTableName);
|
||||
|
||||
if (tableStatisticsMapIt == tableStatisticsMap.end())
|
||||
{
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
return {tableStatisticsMapIt->second};
|
||||
return tableStatistics.findStatisticsForATable(schemaAndTableName);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -277,7 +277,7 @@ execplan::ParseTree* ScalarSub::buildParseTree(PredicateOperator* op)
|
||||
}
|
||||
|
||||
// Insert column statistics
|
||||
fGwip.mergeTableStatistics(gwi.tableStatisticsMap);
|
||||
fGwip.mergeTableStatistics(gwi.tableStatistics);
|
||||
|
||||
fGwip.subselectList.push_back(csep);
|
||||
|
||||
|
||||
@@ -97,7 +97,7 @@ SCSEP SelectSubQuery::transform()
|
||||
}
|
||||
|
||||
// Insert column statistics
|
||||
fGwip.mergeTableStatistics(gwi.tableStatisticsMap);
|
||||
fGwip.mergeTableStatistics(gwi.tableStatistics);
|
||||
|
||||
// Insert subselect CSEP
|
||||
fGwip.subselectList.push_back(csep);
|
||||
|
||||
@@ -73,14 +73,13 @@ bool someAreForeignTables(execplan::CalpontSelectExecutionPlan& csep)
|
||||
bool someForeignTablesHasStatisticsAndMbIndex(execplan::CalpontSelectExecutionPlan& csep,
|
||||
optimizer::RBOptimizerContext& ctx)
|
||||
{
|
||||
return std::any_of(
|
||||
csep.tableList().begin(), csep.tableList().end(),
|
||||
[&ctx](const auto& table)
|
||||
{
|
||||
cal_impl_if::SchemaAndTableName schemaAndTableName = {table.schema, table.table};
|
||||
return (!table.isColumnstore() && ctx.getGwi().tableStatisticsMap.find(schemaAndTableName) !=
|
||||
ctx.getGwi().tableStatisticsMap.end());
|
||||
});
|
||||
return std::any_of(csep.tableList().begin(), csep.tableList().end(),
|
||||
[&ctx](const auto& table)
|
||||
{
|
||||
cal_impl_if::SchemaAndTableName schemaAndTableName = {table.schema, table.table};
|
||||
return (!table.isColumnstore() &&
|
||||
ctx.getGwi().tableStatistics.findStatisticsForATable(schemaAndTableName));
|
||||
});
|
||||
}
|
||||
|
||||
// This routine produces a new ParseTree that is AND(lowerBand <= column, column <= upperBand)
|
||||
@@ -230,15 +229,14 @@ std::optional<std::pair<execplan::SimpleColumn&, Histogram_json_hb*>> chooseKeyC
|
||||
{
|
||||
cal_impl_if::SchemaAndTableName schemaAndTableName = {targetTable.schema, targetTable.table};
|
||||
|
||||
auto tableColumnsStatisticsIt = ctx.getGwi().tableStatisticsMap.find(schemaAndTableName);
|
||||
if (tableColumnsStatisticsIt == ctx.getGwi().tableStatisticsMap.end() ||
|
||||
tableColumnsStatisticsIt->second.empty())
|
||||
auto tableColumnsStatistics = ctx.getGwi().tableStatistics.findStatisticsForATable(schemaAndTableName);
|
||||
if (!tableColumnsStatistics)
|
||||
{
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
// TODO take some column and some stats for it!!!
|
||||
for (auto& [columnName, columnStatistics] : tableColumnsStatisticsIt->second)
|
||||
for (auto& [columnName, columnStatistics] : tableColumnsStatistics.value())
|
||||
{
|
||||
auto& sc = columnStatistics.getColumn();
|
||||
auto& columnStatisticsVec = columnStatistics.getHistograms();
|
||||
|
||||
@@ -85,14 +85,14 @@ class RBOHybridTest : public ::testing::Test
|
||||
cal_impl_if::SchemaAndTableName,
|
||||
std::map<std::string, std::pair<execplan::SimpleColumn, std::vector<Histogram_json_hb*>>>,
|
||||
cal_impl_if::SchemaAndTableNameHash>
|
||||
tableStatisticsMap;
|
||||
tableStatistics;
|
||||
|
||||
// Helper method to find statistics for a table
|
||||
std::map<std::string, std::pair<execplan::SimpleColumn, std::vector<Histogram_json_hb*>>>*
|
||||
findStatisticsForATable(const cal_impl_if::SchemaAndTableName& schemaAndTable)
|
||||
{
|
||||
auto it = tableStatisticsMap.find(schemaAndTable);
|
||||
return (it != tableStatisticsMap.end()) ? &(it->second) : nullptr;
|
||||
auto it = tableStatistics.find(schemaAndTable);
|
||||
return (it != tableStatistics.end()) ? &(it->second) : nullptr;
|
||||
}
|
||||
};
|
||||
|
||||
@@ -116,7 +116,7 @@ class RBOHybridTest : public ::testing::Test
|
||||
cal_impl_if::SchemaAndTableName schemaAndTable = {schema, table};
|
||||
execplan::SimpleColumn simpleCol; // Mock column
|
||||
std::vector<Histogram_json_hb*> histograms = {histogram};
|
||||
mockGWI.tableStatisticsMap[schemaAndTable][column] = std::make_pair(simpleCol, histograms);
|
||||
mockGWI.tableStatistics[schemaAndTable][column] = std::make_pair(simpleCol, histograms);
|
||||
}
|
||||
|
||||
// Get the mock gateway info for testing helper functions
|
||||
|
||||
Reference in New Issue
Block a user