From fb98e46bfc3dc0b3152fd9d3ece2a4ea26379bd7 Mon Sep 17 00:00:00 2001 From: drrtuy Date: Tue, 7 Oct 2025 21:56:46 +0000 Subject: [PATCH] chore(QA,plugin): moving statistics code to prepare for PRIMARY KEY support in QA. --- dbcon/mysql/ha_exists_sub.cpp | 2 +- dbcon/mysql/ha_from_sub.cpp | 2 +- dbcon/mysql/ha_in_sub.cpp | 2 +- dbcon/mysql/ha_mcs_execplan.cpp | 30 ++-------- dbcon/mysql/ha_mcs_impl.cpp | 19 +----- dbcon/mysql/ha_mcs_impl_if.h | 89 +++++++++++++++++++++++----- dbcon/mysql/ha_scalar_sub.cpp | 2 +- dbcon/mysql/ha_select_sub.cpp | 2 +- dbcon/rbo/rbo_apply_parallel_ces.cpp | 22 ++++--- tests/rbo_hybrid.cpp | 8 +-- 10 files changed, 100 insertions(+), 78 deletions(-) diff --git a/dbcon/mysql/ha_exists_sub.cpp b/dbcon/mysql/ha_exists_sub.cpp index c6d649322..9b08a0a7e 100644 --- a/dbcon/mysql/ha_exists_sub.cpp +++ b/dbcon/mysql/ha_exists_sub.cpp @@ -128,7 +128,7 @@ execplan::ParseTree* ExistsSub::transform() } // Insert column statistics - fGwip.mergeTableStatistics(gwi.tableStatisticsMap); + fGwip.mergeTableStatistics(gwi.tableStatistics); // remove outer query tables CalpontSelectExecutionPlan::TableList tblist; diff --git a/dbcon/mysql/ha_from_sub.cpp b/dbcon/mysql/ha_from_sub.cpp index 6a309aff5..8f9da0c76 100644 --- a/dbcon/mysql/ha_from_sub.cpp +++ b/dbcon/mysql/ha_from_sub.cpp @@ -237,7 +237,7 @@ SCSEP FromSubQuery::transform() } // Insert column statistics - fGwip.mergeTableStatistics(gwi.tableStatisticsMap); + fGwip.mergeTableStatistics(gwi.tableStatistics); fGwip.subselectList.push_back(csep); return csep; diff --git a/dbcon/mysql/ha_in_sub.cpp b/dbcon/mysql/ha_in_sub.cpp index ed8b17daa..d0397f804 100644 --- a/dbcon/mysql/ha_in_sub.cpp +++ b/dbcon/mysql/ha_in_sub.cpp @@ -195,7 +195,7 @@ execplan::ParseTree* InSub::transform() } // Insert column statistics - fGwip.mergeTableStatistics(gwi.tableStatisticsMap); + fGwip.mergeTableStatistics(gwi.tableStatistics); // remove outer query tables CalpontSelectExecutionPlan::TableList tblist; diff --git a/dbcon/mysql/ha_mcs_execplan.cpp b/dbcon/mysql/ha_mcs_execplan.cpp index 0fde2aedc..014b79a81 100644 --- a/dbcon/mysql/ha_mcs_execplan.cpp +++ b/dbcon/mysql/ha_mcs_execplan.cpp @@ -5267,31 +5267,11 @@ void extractColumnStatistics(TABLE_LIST* table_ptr, gp_walk_info& gwi) Field* field = table_ptr->table->key_info[j].key_part[0].field; if (field->read_stats) { - auto* histogram = dynamic_cast(field->read_stats->histogram); - if (histogram) - { - SchemaAndTableName tableName = {field->table->s->db.str, field->table->s->table_name.str}; - auto sc = - std::unique_ptr(buildSimpleColumnFromFieldForStatistics(field, gwi)); - auto tableStatisticsMapIt = gwi.tableStatisticsMap.find(tableName); - if (tableStatisticsMapIt == gwi.tableStatisticsMap.end()) - { - gwi.tableStatisticsMap[tableName][field->field_name.str] = {*sc, {histogram}}; - } - else - { - auto columnStatisticsMapIt = tableStatisticsMapIt->second.find(field->field_name.str); - if (columnStatisticsMapIt == tableStatisticsMapIt->second.end()) - { - tableStatisticsMapIt->second[field->field_name.str] = {*sc, {histogram}}; - } - else - { - auto& columnStatisticsVec = columnStatisticsMapIt->second.getHistograms(); - columnStatisticsVec.push_back(histogram); - } - } - } + SchemaAndTableName tableName = {field->table->s->db.str, field->table->s->table_name.str}; + auto sc = + std::unique_ptr(buildSimpleColumnFromFieldForStatistics(field, gwi)); + assert(field->field_name.str); + gwi.tableStatistics.createOrUpdate(tableName, field->field_name.str, *sc, field->read_stats); } } } diff --git a/dbcon/mysql/ha_mcs_impl.cpp b/dbcon/mysql/ha_mcs_impl.cpp index 73a5a7513..e2b47e997 100644 --- a/dbcon/mysql/ha_mcs_impl.cpp +++ b/dbcon/mysql/ha_mcs_impl.cpp @@ -98,7 +98,6 @@ using namespace execplan; using namespace joblist; - #include "errorcodes.h" #include "idberrorinfo.h" #include "errorids.h" @@ -122,23 +121,9 @@ namespace cal_impl_if { extern bool nonConstFunc(Item_func* ifp); -void gp_walk_info::mergeTableStatistics(const TableStatisticsMap& aTableStatisticsMap) +void gp_walk_info::mergeTableStatistics(const TableStatistics& aTableStatistics) { - for (auto& [schemaAndTableName, aColumnStatisticsMap] : aTableStatisticsMap) - { - auto tableStatisticsMapIt = tableStatisticsMap.find(schemaAndTableName); - if (tableStatisticsMapIt == tableStatisticsMap.end()) - { - tableStatisticsMap[schemaAndTableName] = aColumnStatisticsMap; - } - else - { - for (auto& [columnName, histogram] : aColumnStatisticsMap) - { - tableStatisticsMapIt->second[columnName] = histogram; - } - } - } + return tableStatistics.mergeTableStatistics(aTableStatistics); } } // namespace cal_impl_if diff --git a/dbcon/mysql/ha_mcs_impl_if.h b/dbcon/mysql/ha_mcs_impl_if.h index 5ccdb65e9..d2ca6ae30 100644 --- a/dbcon/mysql/ha_mcs_impl_if.h +++ b/dbcon/mysql/ha_mcs_impl_if.h @@ -122,15 +122,12 @@ typedef std::tr1::unordered_map TableOuterJoinMap; struct ColumnStatistics { - ColumnStatistics(execplan::SimpleColumn column, std::vector histograms) + ColumnStatistics(execplan::SimpleColumn& column, std::vector histograms) : column(column), histograms(histograms) { } ColumnStatistics() = default; - execplan::SimpleColumn column; - std::vector histograms; - std::vector& getHistograms() { return histograms; @@ -140,13 +137,82 @@ struct ColumnStatistics { return column; } + + execplan::SimpleColumn column; + std::vector histograms; + Field* min{nullptr}; + Field* max{nullptr}; }; using ColumnName = std::string; +using MDBColumnStatistics = Column_statistics; using ColumnStatisticsMap = std::unordered_map; using TableStatisticsMap = std::unordered_map; +struct TableStatistics +{ + TableStatistics() = default; + + void createOrUpdate(SchemaAndTableName tableName, const char* fieldName, execplan::SimpleColumn& sc, + MDBColumnStatistics* statistics) + { + auto* histogram = dynamic_cast(statistics->histogram); + + auto tableStatisticsIt = tableStatistics_.find(tableName); + if (tableStatisticsIt == tableStatistics_.end()) + { + tableStatistics_[tableName][fieldName] = {sc, {histogram}}; + } + else + { + auto columnStatisticsMapIt = tableStatisticsIt->second.find(fieldName); + if (columnStatisticsMapIt == tableStatisticsIt->second.end()) + { + tableStatisticsIt->second[fieldName] = {sc, {histogram}}; + } + else + { + auto& columnStatisticsVec = columnStatisticsMapIt->second.getHistograms(); + columnStatisticsVec.push_back(histogram); + } + } + } + + std::optional findStatisticsForATable(SchemaAndTableName& schemaAndTableName) + { + auto tableStatisticsIt = tableStatistics_.find(schemaAndTableName); + + if (tableStatisticsIt == tableStatistics_.end()) + { + return std::nullopt; + } + + return {tableStatisticsIt->second}; + } + + void mergeTableStatistics(const TableStatistics& aTableStatistics) + { + for (auto& [schemaAndTableName, aColumnStatisticsMap] : aTableStatistics.tableStatistics_) + { + auto tableStatisticsIt = tableStatistics_.find(schemaAndTableName); + if (tableStatisticsIt == tableStatistics_.end()) + { + tableStatistics_[schemaAndTableName] = aColumnStatisticsMap; + } + else + { + for (auto& [columnName, histogram] : aColumnStatisticsMap) + { + tableStatisticsIt->second[columnName] = histogram; + } + } + } + } + + TableStatisticsMap tableStatistics_; +}; + // This structure is used to store MDB AST -> CSEP translation context. // There is a column statistics for some columns in a query. // As per 23.10.5 "some" means first column of the index in projection list of CSEP @@ -161,7 +227,7 @@ struct gp_walk_info execplan::CalpontSelectExecutionPlan::ReturnedColumnList orderByCols; std::vector extSelAggColsItems; execplan::CalpontSelectExecutionPlan::ColumnMap columnMap; - TableStatisticsMap tableStatisticsMap; + TableStatistics tableStatistics; // This vector temporarily hold the projection columns to be added // to the returnedCols vector for subquery processing. It will be appended // to the end of returnedCols when the processing is finished. @@ -252,7 +318,7 @@ struct gp_walk_info SubQuery** subQueriesChain; gp_walk_info(long timeZone_, SubQuery** subQueriesChain_) - : tableStatisticsMap({}) + : tableStatistics({}) , sessionid(0) , fatalParseError(false) , condPush(false) @@ -284,17 +350,10 @@ struct gp_walk_info } ~gp_walk_info(); - void mergeTableStatistics(const TableStatisticsMap& tableStatisticsMap); + void mergeTableStatistics(const TableStatistics& tableStatistics); std::optional findStatisticsForATable(SchemaAndTableName& schemaAndTableName) { - auto tableStatisticsMapIt = tableStatisticsMap.find(schemaAndTableName); - - if (tableStatisticsMapIt == tableStatisticsMap.end()) - { - return std::nullopt; - } - - return {tableStatisticsMapIt->second}; + return tableStatistics.findStatisticsForATable(schemaAndTableName); } }; diff --git a/dbcon/mysql/ha_scalar_sub.cpp b/dbcon/mysql/ha_scalar_sub.cpp index ccf7e5d43..6b4ff63ab 100644 --- a/dbcon/mysql/ha_scalar_sub.cpp +++ b/dbcon/mysql/ha_scalar_sub.cpp @@ -277,7 +277,7 @@ execplan::ParseTree* ScalarSub::buildParseTree(PredicateOperator* op) } // Insert column statistics - fGwip.mergeTableStatistics(gwi.tableStatisticsMap); + fGwip.mergeTableStatistics(gwi.tableStatistics); fGwip.subselectList.push_back(csep); diff --git a/dbcon/mysql/ha_select_sub.cpp b/dbcon/mysql/ha_select_sub.cpp index 8b5bde3bd..dcb7e3963 100644 --- a/dbcon/mysql/ha_select_sub.cpp +++ b/dbcon/mysql/ha_select_sub.cpp @@ -97,7 +97,7 @@ SCSEP SelectSubQuery::transform() } // Insert column statistics - fGwip.mergeTableStatistics(gwi.tableStatisticsMap); + fGwip.mergeTableStatistics(gwi.tableStatistics); // Insert subselect CSEP fGwip.subselectList.push_back(csep); diff --git a/dbcon/rbo/rbo_apply_parallel_ces.cpp b/dbcon/rbo/rbo_apply_parallel_ces.cpp index d85c95609..e7c8c4037 100644 --- a/dbcon/rbo/rbo_apply_parallel_ces.cpp +++ b/dbcon/rbo/rbo_apply_parallel_ces.cpp @@ -73,14 +73,13 @@ bool someAreForeignTables(execplan::CalpontSelectExecutionPlan& csep) bool someForeignTablesHasStatisticsAndMbIndex(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimizerContext& ctx) { - return std::any_of( - csep.tableList().begin(), csep.tableList().end(), - [&ctx](const auto& table) - { - cal_impl_if::SchemaAndTableName schemaAndTableName = {table.schema, table.table}; - return (!table.isColumnstore() && ctx.getGwi().tableStatisticsMap.find(schemaAndTableName) != - ctx.getGwi().tableStatisticsMap.end()); - }); + return std::any_of(csep.tableList().begin(), csep.tableList().end(), + [&ctx](const auto& table) + { + cal_impl_if::SchemaAndTableName schemaAndTableName = {table.schema, table.table}; + return (!table.isColumnstore() && + ctx.getGwi().tableStatistics.findStatisticsForATable(schemaAndTableName)); + }); } // This routine produces a new ParseTree that is AND(lowerBand <= column, column <= upperBand) @@ -230,15 +229,14 @@ std::optional> chooseKeyC { cal_impl_if::SchemaAndTableName schemaAndTableName = {targetTable.schema, targetTable.table}; - auto tableColumnsStatisticsIt = ctx.getGwi().tableStatisticsMap.find(schemaAndTableName); - if (tableColumnsStatisticsIt == ctx.getGwi().tableStatisticsMap.end() || - tableColumnsStatisticsIt->second.empty()) + auto tableColumnsStatistics = ctx.getGwi().tableStatistics.findStatisticsForATable(schemaAndTableName); + if (!tableColumnsStatistics) { return std::nullopt; } // TODO take some column and some stats for it!!! - for (auto& [columnName, columnStatistics] : tableColumnsStatisticsIt->second) + for (auto& [columnName, columnStatistics] : tableColumnsStatistics.value()) { auto& sc = columnStatistics.getColumn(); auto& columnStatisticsVec = columnStatistics.getHistograms(); diff --git a/tests/rbo_hybrid.cpp b/tests/rbo_hybrid.cpp index 39b83bf5d..18196631a 100644 --- a/tests/rbo_hybrid.cpp +++ b/tests/rbo_hybrid.cpp @@ -85,14 +85,14 @@ class RBOHybridTest : public ::testing::Test cal_impl_if::SchemaAndTableName, std::map>>, cal_impl_if::SchemaAndTableNameHash> - tableStatisticsMap; + tableStatistics; // Helper method to find statistics for a table std::map>>* findStatisticsForATable(const cal_impl_if::SchemaAndTableName& schemaAndTable) { - auto it = tableStatisticsMap.find(schemaAndTable); - return (it != tableStatisticsMap.end()) ? &(it->second) : nullptr; + auto it = tableStatistics.find(schemaAndTable); + return (it != tableStatistics.end()) ? &(it->second) : nullptr; } }; @@ -116,7 +116,7 @@ class RBOHybridTest : public ::testing::Test cal_impl_if::SchemaAndTableName schemaAndTable = {schema, table}; execplan::SimpleColumn simpleCol; // Mock column std::vector histograms = {histogram}; - mockGWI.tableStatisticsMap[schemaAndTable][column] = std::make_pair(simpleCol, histograms); + mockGWI.tableStatistics[schemaAndTable][column] = std::make_pair(simpleCol, histograms); } // Get the mock gateway info for testing helper functions