From 112ba9f162804adb5e847fa11747cd1499e8075c Mon Sep 17 00:00:00 2001 From: drrtuy Date: Wed, 30 Jul 2025 22:36:03 +0000 Subject: [PATCH] feat(rbo,rules,QA): changed to way statistics is collected --- dbcon/mysql/ha_mcs_execplan.cpp | 36 +++++++++++++++++++++++--- dbcon/mysql/rbo_apply_parallel_ces.cpp | 13 ++++++++-- 2 files changed, 44 insertions(+), 5 deletions(-) diff --git a/dbcon/mysql/ha_mcs_execplan.cpp b/dbcon/mysql/ha_mcs_execplan.cpp index b5dec04b0..321671eea 100644 --- a/dbcon/mysql/ha_mcs_execplan.cpp +++ b/dbcon/mysql/ha_mcs_execplan.cpp @@ -5207,9 +5207,38 @@ int processFrom(bool& isUnion, SELECT_LEX& select_lex, gp_walk_info& gwi, SCSEP& // trigger system catalog cache if (columnStore) + { gwi.csc->columnRIDs( make_table(table_ptr->db.str, table_ptr->table_name.str, lower_case_table_names), true); - + } + else + { + for (uint j = 0; j < table_ptr->table->s->keys; j++) + { + // for (uint i = 0; i < table_ptr->table->s->key_info[j].usable_key_parts; i++) + { + Field* field = table_ptr->table->key_info[j].key_part[0].field; + std::cout << "j index " << j << " i column " << 0 << " fieldnr " + << table_ptr->table->key_info[j].key_part[0].fieldnr << " " << field->field_name.str; + if (field->read_stats) + { + auto* histogram = dynamic_cast(field->read_stats->histogram); + if (histogram) + { + std::cout << " has stats "; + SchemaAndTableName tableName = {field->table->s->db.str, + field->table->s->table_name.str}; + gwi.tableStatisticsMap[tableName][field->field_name.str] = *histogram; + } + else + { + std::cout << " no stats "; + } + } + std::cout << std::endl; + } + } + } string table_name = table_ptr->table_name.str; // @bug5523 @@ -6307,7 +6336,8 @@ void extractColumnStatistics(Item_field* ifp, gp_walk_info& gwi) auto* histogram = dynamic_cast(ifp->field->read_stats->histogram); if (histogram) { - SchemaAndTableName tableName = {ifp->field->table->s->db.str, ifp->field->table->s->table_name.str}; + SchemaAndTableName tableName = {ifp->field->table->s->db.str, + ifp->field->table->s->table_name.str}; auto tableStatisticsMapIt = gwi.tableStatisticsMap.find(tableName); if (tableStatisticsMapIt == gwi.tableStatisticsMap.end()) { @@ -6418,7 +6448,7 @@ int processSelect(SELECT_LEX& select_lex, gp_walk_info& gwi, SCSEP& csep, vector case Item::FIELD_ITEM: { Item_field* ifp = (Item_field*)item; - extractColumnStatistics(ifp, gwi); + // extractColumnStatistics(ifp, gwi); // Handle * case if (ifp->field_name.length && string(ifp->field_name.str) == "*") { diff --git a/dbcon/mysql/rbo_apply_parallel_ces.cpp b/dbcon/mysql/rbo_apply_parallel_ces.cpp index 6cb5d7638..2dbadf773 100644 --- a/dbcon/mysql/rbo_apply_parallel_ces.cpp +++ b/dbcon/mysql/rbo_apply_parallel_ces.cpp @@ -64,7 +64,7 @@ bool someAreForeignTables(execplan::CalpontSelectExecutionPlan& csep) [](const auto& table) { return !table.isColumnstore(); }); } -bool someForeignTablesHasIndex(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimizerContext& ctx) +bool someForeignTablesHasStatisticsAndMbIndex(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimizerContext& ctx) { return std::any_of( csep.tableList().begin(), csep.tableList().end(), @@ -83,7 +83,7 @@ bool parallelCESFilter(execplan::CalpontSelectExecutionPlan& csep, optimizer::RB // TODO filter out CSEPs with orderBy, groupBy, having // Filter out tables that were re-written. // return tables.size() == 1 && !tables[0].isColumnstore() && !tableIsInUnion(tables[0], csep); - return someAreForeignTables(csep) && someForeignTablesHasIndex(csep, ctx); + return someAreForeignTables(csep) && someForeignTablesHasStatisticsAndMbIndex(csep, ctx); } // This routine produces a new ParseTree that is AND(lowerBand <= column, column <= upperBand) @@ -210,6 +210,15 @@ std::optional> populateRangeBounds(execplan::SimpleColumn* T currentUpperBound = *(uint32_t*)endBucket->start_value.data(); bounds.push_back({currentLowerBound, currentUpperBound}); } + for (auto& bucket : columnStatistics.get_json_histogram()) + { + T currentLowerBound = *(uint32_t*)bucket.start_value.data(); + std::cout << "Bucket: " << currentLowerBound << std::endl; + } + // auto penultimateBucket = columnStatistics.get_json_histogram().begin() + numberOfUnionUnits * numberOfBucketsPerUnionUnit; + // T currentLowerBound = *(uint32_t*)penultimateBucket->start_value.data(); + // T currentUpperBound = *(uint32_t*)columnStatistics.get_last_bucket_end_endp().data(); + // bounds.push_back({currentLowerBound, currentUpperBound}); for (auto& bound : bounds) {