diff --git a/dbcon/mysql/ha_mcs_execplan.cpp b/dbcon/mysql/ha_mcs_execplan.cpp index 321671eea..f508b89a8 100644 --- a/dbcon/mysql/ha_mcs_execplan.cpp +++ b/dbcon/mysql/ha_mcs_execplan.cpp @@ -5227,8 +5227,28 @@ int processFrom(bool& isUnion, SELECT_LEX& select_lex, gp_walk_info& gwi, SCSEP& { std::cout << " has stats "; SchemaAndTableName tableName = {field->table->s->db.str, - field->table->s->table_name.str}; - gwi.tableStatisticsMap[tableName][field->field_name.str] = *histogram; + field->table->s->table_name.str}; + execplan::SimpleColumn simpleColumn = {field->table->s->db.str, + field->table->s->table_name.str, + field->field_name.str}; + auto tableStatisticsMapIt = gwi.tableStatisticsMap.find(tableName); + if (tableStatisticsMapIt == gwi.tableStatisticsMap.end()) + { + gwi.tableStatisticsMap[tableName][field->field_name.str] = {simpleColumn, {*histogram}}; + } + else + { + auto columnStatisticsMapIt = tableStatisticsMapIt->second.find(field->field_name.str); + if (columnStatisticsMapIt == tableStatisticsMapIt->second.end()) + { + tableStatisticsMapIt->second[field->field_name.str] = {simpleColumn, {*histogram}}; + } + else + { + auto columnStatisticsVec = columnStatisticsMapIt->second.second; + columnStatisticsVec.push_back(*histogram); + } + } } else { @@ -6321,43 +6341,43 @@ int processLimitAndOffset(SELECT_LEX& select_lex, gp_walk_info& gwi, SCSEP& csep // for the first column of the index if any. // Statistics is stored in GWI context. // Mock for ES 10.6 -#if MYSQL_VERSION_ID >= 120401 -void extractColumnStatistics(Item_field* ifp, gp_walk_info& gwi) -{ - for (uint j = 0; j < ifp->field->table->s->keys; j++) - { - for (uint i = 0; i < ifp->field->table->s->key_info[j].usable_key_parts; i++) - { - if (ifp->field->table->s->key_info[j].key_part[i].fieldnr == ifp->field->field_index + 1) - { - if (i == 0 && ifp->field->read_stats) - { - assert(ifp->field->table->s); - auto* histogram = dynamic_cast(ifp->field->read_stats->histogram); - if (histogram) - { - SchemaAndTableName tableName = {ifp->field->table->s->db.str, - ifp->field->table->s->table_name.str}; - auto tableStatisticsMapIt = gwi.tableStatisticsMap.find(tableName); - if (tableStatisticsMapIt == gwi.tableStatisticsMap.end()) - { - gwi.tableStatisticsMap.insert({tableName, {{ifp->field->field_name.str, *histogram}}}); - } - else - { - tableStatisticsMapIt->second.insert({ifp->field->field_name.str, *histogram}); - } - } - } - } - } - } -} -#else -void extractColumnStatistics(Item_field* /*ifp*/, gp_walk_info& /*gwi*/) -{ -} -#endif +// #if MYSQL_VERSION_ID >= 120401 +// void extractColumnStatistics(Item_field* ifp, gp_walk_info& gwi) +// { +// for (uint j = 0; j < ifp->field->table->s->keys; j++) +// { +// for (uint i = 0; i < ifp->field->table->s->key_info[j].usable_key_parts; i++) +// { +// if (ifp->field->table->s->key_info[j].key_part[i].fieldnr == ifp->field->field_index + 1) +// { +// if (i == 0 && ifp->field->read_stats) +// { +// assert(ifp->field->table->s); +// auto* histogram = dynamic_cast(ifp->field->read_stats->histogram); +// if (histogram) +// { +// SchemaAndTableName tableName = {ifp->field->table->s->db.str, +// ifp->field->table->s->table_name.str}; +// auto tableStatisticsMapIt = gwi.tableStatisticsMap.find(tableName); +// if (tableStatisticsMapIt == gwi.tableStatisticsMap.end()) +// { +// gwi.tableStatisticsMap.insert({tableName, {{ifp->field->field_name.str, *histogram}}}); +// } +// else +// { +// tableStatisticsMapIt->second.insert({ifp->field->field_name.str, *histogram}); +// } +// } +// } +// } +// } +// } +// } +// #else +// void extractColumnStatistics(Item_field* /*ifp*/, gp_walk_info& /*gwi*/) +// { +// } +// #endif /*@brief Process SELECT part of a query or sub-query */ /*********************************************************** diff --git a/dbcon/mysql/ha_mcs_impl_if.h b/dbcon/mysql/ha_mcs_impl_if.h index aed13d202..0548e021a 100644 --- a/dbcon/mysql/ha_mcs_impl_if.h +++ b/dbcon/mysql/ha_mcs_impl_if.h @@ -116,7 +116,7 @@ typedef std::map> TableOnExprList; typedef std::tr1::unordered_map TableOuterJoinMap; using ColumnName = std::string; -using ColumnStatisticsMap = std::unordered_map; +using ColumnStatisticsMap = std::unordered_map>>; using TableStatisticsMap = std::unordered_map; // This structure is used to store MDB AST -> CSEP translation context. diff --git a/dbcon/mysql/rbo_apply_parallel_ces.cpp b/dbcon/mysql/rbo_apply_parallel_ces.cpp index 2dbadf773..02454e401 100644 --- a/dbcon/mysql/rbo_apply_parallel_ces.cpp +++ b/dbcon/mysql/rbo_apply_parallel_ces.cpp @@ -105,6 +105,7 @@ execplan::ParseTree* filtersWithNewRange(execplan::SCSEP& csep, execplan::Simple ltOp->resultType(ltOp->operationType()); auto* sfr = new execplan::SimpleFilter(ltOp, tableKeyColumnLeftOp, filterColLeftOp); + // TODO new auto tableKeyColumnRightOp = new execplan::SimpleColumn(column); tableKeyColumnRightOp->resultType(column.resultType()); // TODO hardcoded column type and value @@ -114,8 +115,10 @@ execplan::ParseTree* filtersWithNewRange(execplan::SCSEP& csep, execplan::Simple gtOp->setOpType(filterColRightOp->resultType(), tableKeyColumnRightOp->resultType()); gtOp->resultType(gtOp->operationType()); + // TODO new auto* sfl = new execplan::SimpleFilter(gtOp, tableKeyColumnRightOp, filterColRightOp); + // TODO new execplan::ParseTree* ptp = new execplan::ParseTree(new execplan::LogicOperator("and")); ptp->right(sfr); ptp->left(sfl); @@ -169,6 +172,12 @@ execplan::SimpleColumn* findSuitableKeyColumn(execplan::CalpontSelectExecutionPl return nullptr; } +// TBD +Histogram_json_hb& chooseStatisticsToUse(std::vector& columnStatisticsVec) +{ + return columnStatisticsVec.front(); +} + // Populates range bounds based on column statistics // Returns optional with bounds if successful, nullopt otherwise template @@ -188,7 +197,8 @@ std::optional> populateRangeBounds(execplan::SimpleColumn* return std::nullopt; } - auto columnStatistics = columnStatisticsIt->second; + auto& [simpleColumn, columnStatisticsVec] = columnStatisticsIt->second; + auto& columnStatistics = chooseStatisticsToUse(columnStatisticsVec); // TODO configurable parallel factor via session variable // NB now histogram size is the way to control parallel factor with 16 being the maximum