You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-11-21 09:20:51 +03:00
chore(QA,plugin): moving statistics code to prepare for PRIMARY KEY support in QA.
This commit is contained in:
@@ -128,7 +128,7 @@ execplan::ParseTree* ExistsSub::transform()
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Insert column statistics
|
// Insert column statistics
|
||||||
fGwip.mergeTableStatistics(gwi.tableStatisticsMap);
|
fGwip.mergeTableStatistics(gwi.tableStatistics);
|
||||||
|
|
||||||
// remove outer query tables
|
// remove outer query tables
|
||||||
CalpontSelectExecutionPlan::TableList tblist;
|
CalpontSelectExecutionPlan::TableList tblist;
|
||||||
|
|||||||
@@ -237,7 +237,7 @@ SCSEP FromSubQuery::transform()
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Insert column statistics
|
// Insert column statistics
|
||||||
fGwip.mergeTableStatistics(gwi.tableStatisticsMap);
|
fGwip.mergeTableStatistics(gwi.tableStatistics);
|
||||||
|
|
||||||
fGwip.subselectList.push_back(csep);
|
fGwip.subselectList.push_back(csep);
|
||||||
return csep;
|
return csep;
|
||||||
|
|||||||
@@ -195,7 +195,7 @@ execplan::ParseTree* InSub::transform()
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Insert column statistics
|
// Insert column statistics
|
||||||
fGwip.mergeTableStatistics(gwi.tableStatisticsMap);
|
fGwip.mergeTableStatistics(gwi.tableStatistics);
|
||||||
|
|
||||||
// remove outer query tables
|
// remove outer query tables
|
||||||
CalpontSelectExecutionPlan::TableList tblist;
|
CalpontSelectExecutionPlan::TableList tblist;
|
||||||
|
|||||||
@@ -5267,31 +5267,11 @@ void extractColumnStatistics(TABLE_LIST* table_ptr, gp_walk_info& gwi)
|
|||||||
Field* field = table_ptr->table->key_info[j].key_part[0].field;
|
Field* field = table_ptr->table->key_info[j].key_part[0].field;
|
||||||
if (field->read_stats)
|
if (field->read_stats)
|
||||||
{
|
{
|
||||||
auto* histogram = dynamic_cast<Histogram_json_hb*>(field->read_stats->histogram);
|
SchemaAndTableName tableName = {field->table->s->db.str, field->table->s->table_name.str};
|
||||||
if (histogram)
|
auto sc =
|
||||||
{
|
std::unique_ptr<execplan::SimpleColumn>(buildSimpleColumnFromFieldForStatistics(field, gwi));
|
||||||
SchemaAndTableName tableName = {field->table->s->db.str, field->table->s->table_name.str};
|
assert(field->field_name.str);
|
||||||
auto sc =
|
gwi.tableStatistics.createOrUpdate(tableName, field->field_name.str, *sc, field->read_stats);
|
||||||
std::unique_ptr<execplan::SimpleColumn>(buildSimpleColumnFromFieldForStatistics(field, gwi));
|
|
||||||
auto tableStatisticsMapIt = gwi.tableStatisticsMap.find(tableName);
|
|
||||||
if (tableStatisticsMapIt == gwi.tableStatisticsMap.end())
|
|
||||||
{
|
|
||||||
gwi.tableStatisticsMap[tableName][field->field_name.str] = {*sc, {histogram}};
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
auto columnStatisticsMapIt = tableStatisticsMapIt->second.find(field->field_name.str);
|
|
||||||
if (columnStatisticsMapIt == tableStatisticsMapIt->second.end())
|
|
||||||
{
|
|
||||||
tableStatisticsMapIt->second[field->field_name.str] = {*sc, {histogram}};
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
auto& columnStatisticsVec = columnStatisticsMapIt->second.getHistograms();
|
|
||||||
columnStatisticsVec.push_back(histogram);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -98,7 +98,6 @@ using namespace execplan;
|
|||||||
|
|
||||||
using namespace joblist;
|
using namespace joblist;
|
||||||
|
|
||||||
|
|
||||||
#include "errorcodes.h"
|
#include "errorcodes.h"
|
||||||
#include "idberrorinfo.h"
|
#include "idberrorinfo.h"
|
||||||
#include "errorids.h"
|
#include "errorids.h"
|
||||||
@@ -122,23 +121,9 @@ namespace cal_impl_if
|
|||||||
{
|
{
|
||||||
extern bool nonConstFunc(Item_func* ifp);
|
extern bool nonConstFunc(Item_func* ifp);
|
||||||
|
|
||||||
void gp_walk_info::mergeTableStatistics(const TableStatisticsMap& aTableStatisticsMap)
|
void gp_walk_info::mergeTableStatistics(const TableStatistics& aTableStatistics)
|
||||||
{
|
{
|
||||||
for (auto& [schemaAndTableName, aColumnStatisticsMap] : aTableStatisticsMap)
|
return tableStatistics.mergeTableStatistics(aTableStatistics);
|
||||||
{
|
|
||||||
auto tableStatisticsMapIt = tableStatisticsMap.find(schemaAndTableName);
|
|
||||||
if (tableStatisticsMapIt == tableStatisticsMap.end())
|
|
||||||
{
|
|
||||||
tableStatisticsMap[schemaAndTableName] = aColumnStatisticsMap;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
for (auto& [columnName, histogram] : aColumnStatisticsMap)
|
|
||||||
{
|
|
||||||
tableStatisticsMapIt->second[columnName] = histogram;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace cal_impl_if
|
} // namespace cal_impl_if
|
||||||
|
|||||||
@@ -122,15 +122,12 @@ typedef std::tr1::unordered_map<TABLE_LIST*, uint> TableOuterJoinMap;
|
|||||||
|
|
||||||
struct ColumnStatistics
|
struct ColumnStatistics
|
||||||
{
|
{
|
||||||
ColumnStatistics(execplan::SimpleColumn column, std::vector<Histogram_json_hb*> histograms)
|
ColumnStatistics(execplan::SimpleColumn& column, std::vector<Histogram_json_hb*> histograms)
|
||||||
: column(column), histograms(histograms)
|
: column(column), histograms(histograms)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
ColumnStatistics() = default;
|
ColumnStatistics() = default;
|
||||||
|
|
||||||
execplan::SimpleColumn column;
|
|
||||||
std::vector<Histogram_json_hb*> histograms;
|
|
||||||
|
|
||||||
std::vector<Histogram_json_hb*>& getHistograms()
|
std::vector<Histogram_json_hb*>& getHistograms()
|
||||||
{
|
{
|
||||||
return histograms;
|
return histograms;
|
||||||
@@ -140,13 +137,82 @@ struct ColumnStatistics
|
|||||||
{
|
{
|
||||||
return column;
|
return column;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
execplan::SimpleColumn column;
|
||||||
|
std::vector<Histogram_json_hb*> histograms;
|
||||||
|
Field* min{nullptr};
|
||||||
|
Field* max{nullptr};
|
||||||
};
|
};
|
||||||
|
|
||||||
using ColumnName = std::string;
|
using ColumnName = std::string;
|
||||||
|
using MDBColumnStatistics = Column_statistics;
|
||||||
using ColumnStatisticsMap = std::unordered_map<ColumnName, ColumnStatistics>;
|
using ColumnStatisticsMap = std::unordered_map<ColumnName, ColumnStatistics>;
|
||||||
using TableStatisticsMap =
|
using TableStatisticsMap =
|
||||||
std::unordered_map<SchemaAndTableName, ColumnStatisticsMap, SchemaAndTableNameHash>;
|
std::unordered_map<SchemaAndTableName, ColumnStatisticsMap, SchemaAndTableNameHash>;
|
||||||
|
|
||||||
|
struct TableStatistics
|
||||||
|
{
|
||||||
|
TableStatistics() = default;
|
||||||
|
|
||||||
|
void createOrUpdate(SchemaAndTableName tableName, const char* fieldName, execplan::SimpleColumn& sc,
|
||||||
|
MDBColumnStatistics* statistics)
|
||||||
|
{
|
||||||
|
auto* histogram = dynamic_cast<Histogram_json_hb*>(statistics->histogram);
|
||||||
|
|
||||||
|
auto tableStatisticsIt = tableStatistics_.find(tableName);
|
||||||
|
if (tableStatisticsIt == tableStatistics_.end())
|
||||||
|
{
|
||||||
|
tableStatistics_[tableName][fieldName] = {sc, {histogram}};
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
auto columnStatisticsMapIt = tableStatisticsIt->second.find(fieldName);
|
||||||
|
if (columnStatisticsMapIt == tableStatisticsIt->second.end())
|
||||||
|
{
|
||||||
|
tableStatisticsIt->second[fieldName] = {sc, {histogram}};
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
auto& columnStatisticsVec = columnStatisticsMapIt->second.getHistograms();
|
||||||
|
columnStatisticsVec.push_back(histogram);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::optional<ColumnStatisticsMap> findStatisticsForATable(SchemaAndTableName& schemaAndTableName)
|
||||||
|
{
|
||||||
|
auto tableStatisticsIt = tableStatistics_.find(schemaAndTableName);
|
||||||
|
|
||||||
|
if (tableStatisticsIt == tableStatistics_.end())
|
||||||
|
{
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
|
||||||
|
return {tableStatisticsIt->second};
|
||||||
|
}
|
||||||
|
|
||||||
|
void mergeTableStatistics(const TableStatistics& aTableStatistics)
|
||||||
|
{
|
||||||
|
for (auto& [schemaAndTableName, aColumnStatisticsMap] : aTableStatistics.tableStatistics_)
|
||||||
|
{
|
||||||
|
auto tableStatisticsIt = tableStatistics_.find(schemaAndTableName);
|
||||||
|
if (tableStatisticsIt == tableStatistics_.end())
|
||||||
|
{
|
||||||
|
tableStatistics_[schemaAndTableName] = aColumnStatisticsMap;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for (auto& [columnName, histogram] : aColumnStatisticsMap)
|
||||||
|
{
|
||||||
|
tableStatisticsIt->second[columnName] = histogram;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TableStatisticsMap tableStatistics_;
|
||||||
|
};
|
||||||
|
|
||||||
// This structure is used to store MDB AST -> CSEP translation context.
|
// This structure is used to store MDB AST -> CSEP translation context.
|
||||||
// There is a column statistics for some columns in a query.
|
// There is a column statistics for some columns in a query.
|
||||||
// As per 23.10.5 "some" means first column of the index in projection list of CSEP
|
// As per 23.10.5 "some" means first column of the index in projection list of CSEP
|
||||||
@@ -161,7 +227,7 @@ struct gp_walk_info
|
|||||||
execplan::CalpontSelectExecutionPlan::ReturnedColumnList orderByCols;
|
execplan::CalpontSelectExecutionPlan::ReturnedColumnList orderByCols;
|
||||||
std::vector<Item*> extSelAggColsItems;
|
std::vector<Item*> extSelAggColsItems;
|
||||||
execplan::CalpontSelectExecutionPlan::ColumnMap columnMap;
|
execplan::CalpontSelectExecutionPlan::ColumnMap columnMap;
|
||||||
TableStatisticsMap tableStatisticsMap;
|
TableStatistics tableStatistics;
|
||||||
// This vector temporarily hold the projection columns to be added
|
// This vector temporarily hold the projection columns to be added
|
||||||
// to the returnedCols vector for subquery processing. It will be appended
|
// to the returnedCols vector for subquery processing. It will be appended
|
||||||
// to the end of returnedCols when the processing is finished.
|
// to the end of returnedCols when the processing is finished.
|
||||||
@@ -252,7 +318,7 @@ struct gp_walk_info
|
|||||||
SubQuery** subQueriesChain;
|
SubQuery** subQueriesChain;
|
||||||
|
|
||||||
gp_walk_info(long timeZone_, SubQuery** subQueriesChain_)
|
gp_walk_info(long timeZone_, SubQuery** subQueriesChain_)
|
||||||
: tableStatisticsMap({})
|
: tableStatistics({})
|
||||||
, sessionid(0)
|
, sessionid(0)
|
||||||
, fatalParseError(false)
|
, fatalParseError(false)
|
||||||
, condPush(false)
|
, condPush(false)
|
||||||
@@ -284,17 +350,10 @@ struct gp_walk_info
|
|||||||
}
|
}
|
||||||
~gp_walk_info();
|
~gp_walk_info();
|
||||||
|
|
||||||
void mergeTableStatistics(const TableStatisticsMap& tableStatisticsMap);
|
void mergeTableStatistics(const TableStatistics& tableStatistics);
|
||||||
std::optional<ColumnStatisticsMap> findStatisticsForATable(SchemaAndTableName& schemaAndTableName)
|
std::optional<ColumnStatisticsMap> findStatisticsForATable(SchemaAndTableName& schemaAndTableName)
|
||||||
{
|
{
|
||||||
auto tableStatisticsMapIt = tableStatisticsMap.find(schemaAndTableName);
|
return tableStatistics.findStatisticsForATable(schemaAndTableName);
|
||||||
|
|
||||||
if (tableStatisticsMapIt == tableStatisticsMap.end())
|
|
||||||
{
|
|
||||||
return std::nullopt;
|
|
||||||
}
|
|
||||||
|
|
||||||
return {tableStatisticsMapIt->second};
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -277,7 +277,7 @@ execplan::ParseTree* ScalarSub::buildParseTree(PredicateOperator* op)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Insert column statistics
|
// Insert column statistics
|
||||||
fGwip.mergeTableStatistics(gwi.tableStatisticsMap);
|
fGwip.mergeTableStatistics(gwi.tableStatistics);
|
||||||
|
|
||||||
fGwip.subselectList.push_back(csep);
|
fGwip.subselectList.push_back(csep);
|
||||||
|
|
||||||
|
|||||||
@@ -97,7 +97,7 @@ SCSEP SelectSubQuery::transform()
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Insert column statistics
|
// Insert column statistics
|
||||||
fGwip.mergeTableStatistics(gwi.tableStatisticsMap);
|
fGwip.mergeTableStatistics(gwi.tableStatistics);
|
||||||
|
|
||||||
// Insert subselect CSEP
|
// Insert subselect CSEP
|
||||||
fGwip.subselectList.push_back(csep);
|
fGwip.subselectList.push_back(csep);
|
||||||
|
|||||||
@@ -73,14 +73,13 @@ bool someAreForeignTables(execplan::CalpontSelectExecutionPlan& csep)
|
|||||||
bool someForeignTablesHasStatisticsAndMbIndex(execplan::CalpontSelectExecutionPlan& csep,
|
bool someForeignTablesHasStatisticsAndMbIndex(execplan::CalpontSelectExecutionPlan& csep,
|
||||||
optimizer::RBOptimizerContext& ctx)
|
optimizer::RBOptimizerContext& ctx)
|
||||||
{
|
{
|
||||||
return std::any_of(
|
return std::any_of(csep.tableList().begin(), csep.tableList().end(),
|
||||||
csep.tableList().begin(), csep.tableList().end(),
|
[&ctx](const auto& table)
|
||||||
[&ctx](const auto& table)
|
{
|
||||||
{
|
cal_impl_if::SchemaAndTableName schemaAndTableName = {table.schema, table.table};
|
||||||
cal_impl_if::SchemaAndTableName schemaAndTableName = {table.schema, table.table};
|
return (!table.isColumnstore() &&
|
||||||
return (!table.isColumnstore() && ctx.getGwi().tableStatisticsMap.find(schemaAndTableName) !=
|
ctx.getGwi().tableStatistics.findStatisticsForATable(schemaAndTableName));
|
||||||
ctx.getGwi().tableStatisticsMap.end());
|
});
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// This routine produces a new ParseTree that is AND(lowerBand <= column, column <= upperBand)
|
// This routine produces a new ParseTree that is AND(lowerBand <= column, column <= upperBand)
|
||||||
@@ -230,15 +229,14 @@ std::optional<std::pair<execplan::SimpleColumn&, Histogram_json_hb*>> chooseKeyC
|
|||||||
{
|
{
|
||||||
cal_impl_if::SchemaAndTableName schemaAndTableName = {targetTable.schema, targetTable.table};
|
cal_impl_if::SchemaAndTableName schemaAndTableName = {targetTable.schema, targetTable.table};
|
||||||
|
|
||||||
auto tableColumnsStatisticsIt = ctx.getGwi().tableStatisticsMap.find(schemaAndTableName);
|
auto tableColumnsStatistics = ctx.getGwi().tableStatistics.findStatisticsForATable(schemaAndTableName);
|
||||||
if (tableColumnsStatisticsIt == ctx.getGwi().tableStatisticsMap.end() ||
|
if (!tableColumnsStatistics)
|
||||||
tableColumnsStatisticsIt->second.empty())
|
|
||||||
{
|
{
|
||||||
return std::nullopt;
|
return std::nullopt;
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO take some column and some stats for it!!!
|
// TODO take some column and some stats for it!!!
|
||||||
for (auto& [columnName, columnStatistics] : tableColumnsStatisticsIt->second)
|
for (auto& [columnName, columnStatistics] : tableColumnsStatistics.value())
|
||||||
{
|
{
|
||||||
auto& sc = columnStatistics.getColumn();
|
auto& sc = columnStatistics.getColumn();
|
||||||
auto& columnStatisticsVec = columnStatistics.getHistograms();
|
auto& columnStatisticsVec = columnStatistics.getHistograms();
|
||||||
|
|||||||
@@ -85,14 +85,14 @@ class RBOHybridTest : public ::testing::Test
|
|||||||
cal_impl_if::SchemaAndTableName,
|
cal_impl_if::SchemaAndTableName,
|
||||||
std::map<std::string, std::pair<execplan::SimpleColumn, std::vector<Histogram_json_hb*>>>,
|
std::map<std::string, std::pair<execplan::SimpleColumn, std::vector<Histogram_json_hb*>>>,
|
||||||
cal_impl_if::SchemaAndTableNameHash>
|
cal_impl_if::SchemaAndTableNameHash>
|
||||||
tableStatisticsMap;
|
tableStatistics;
|
||||||
|
|
||||||
// Helper method to find statistics for a table
|
// Helper method to find statistics for a table
|
||||||
std::map<std::string, std::pair<execplan::SimpleColumn, std::vector<Histogram_json_hb*>>>*
|
std::map<std::string, std::pair<execplan::SimpleColumn, std::vector<Histogram_json_hb*>>>*
|
||||||
findStatisticsForATable(const cal_impl_if::SchemaAndTableName& schemaAndTable)
|
findStatisticsForATable(const cal_impl_if::SchemaAndTableName& schemaAndTable)
|
||||||
{
|
{
|
||||||
auto it = tableStatisticsMap.find(schemaAndTable);
|
auto it = tableStatistics.find(schemaAndTable);
|
||||||
return (it != tableStatisticsMap.end()) ? &(it->second) : nullptr;
|
return (it != tableStatistics.end()) ? &(it->second) : nullptr;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -116,7 +116,7 @@ class RBOHybridTest : public ::testing::Test
|
|||||||
cal_impl_if::SchemaAndTableName schemaAndTable = {schema, table};
|
cal_impl_if::SchemaAndTableName schemaAndTable = {schema, table};
|
||||||
execplan::SimpleColumn simpleCol; // Mock column
|
execplan::SimpleColumn simpleCol; // Mock column
|
||||||
std::vector<Histogram_json_hb*> histograms = {histogram};
|
std::vector<Histogram_json_hb*> histograms = {histogram};
|
||||||
mockGWI.tableStatisticsMap[schemaAndTable][column] = std::make_pair(simpleCol, histograms);
|
mockGWI.tableStatistics[schemaAndTable][column] = std::make_pair(simpleCol, histograms);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get the mock gateway info for testing helper functions
|
// Get the mock gateway info for testing helper functions
|
||||||
|
|||||||
Reference in New Issue
Block a user