diff --git a/dbcon/mysql/ha_mcs_execplan.cpp b/dbcon/mysql/ha_mcs_execplan.cpp index 66e365d1e..f5fa7db42 100644 --- a/dbcon/mysql/ha_mcs_execplan.cpp +++ b/dbcon/mysql/ha_mcs_execplan.cpp @@ -2673,8 +2673,7 @@ CalpontSystemCatalog::ColType colType_MysqlToIDB(const Field* field) ct.colWidth = 8; break; - case STRING_RESULT: - ct.colDataType = CalpontSystemCatalog::VARCHAR; + case STRING_RESULT: ct.colDataType = CalpontSystemCatalog::VARCHAR; default: IDEBUG(cerr << "colType_MysqlToIDB:: Unknown result type of MySQL " << item->result_type() << endl); @@ -5205,6 +5204,64 @@ void setExecutionParams(gp_walk_info& gwi, SCSEP& csep) csep->umMemLimit(get_um_mem_limit(gwi.thd) * 1024ULL * 1024); } +// Loop over available indexes to find and extract corresponding EI column statistics +// for the first column of the index if any. +// Statistics is stored in GWI context. +// Mock for ES 10.6 +// TODO clean up extra logging when the feature is ready +#if MYSQL_VERSION_ID >= 110401 +void extractColumnStatistics(TABLE_LIST* table_ptr, gp_walk_info& gwi) +{ + for (uint j = 0; j < table_ptr->table->s->keys; j++) + { + { + Field* field = table_ptr->table->key_info[j].key_part[0].field; + std::cout << "j index " << j << " i column " << 0 << " fieldnr " + << table_ptr->table->key_info[j].key_part[0].fieldnr << " " << field->field_name.str; + if (field->read_stats) + { + auto* histogram = dynamic_cast(field->read_stats->histogram); + if (histogram) + { + std::cout << " has stats with " << histogram->buckets.size() << " buckets"; + SchemaAndTableName tableName = {field->table->s->db.str, field->table->s->table_name.str}; + auto* sc = buildSimpleColumnFromFieldForStatistics(field, gwi); + std::cout << "sc with stats !!!!! " << sc->toString() << std::endl; + + auto tableStatisticsMapIt = gwi.tableStatisticsMap.find(tableName); + if (tableStatisticsMapIt == gwi.tableStatisticsMap.end()) + { + gwi.tableStatisticsMap[tableName][field->field_name.str] = {*sc, {histogram}}; + } + else + { + auto columnStatisticsMapIt = tableStatisticsMapIt->second.find(field->field_name.str); + if (columnStatisticsMapIt == tableStatisticsMapIt->second.end()) + { + tableStatisticsMapIt->second[field->field_name.str] = {*sc, {histogram}}; + } + else + { + auto columnStatisticsVec = columnStatisticsMapIt->second.second; + columnStatisticsVec.push_back(histogram); + } + } + } + else + { + std::cout << " no stats "; + } + } + std::cout << std::endl; + } + } +} +#else +void extractColumnStatistics(Item_field* /*ifp*/, gp_walk_info& /*gwi*/) +{ +} +#endif + /*@brief Process FROM part of the query or sub-query */ /*********************************************************** * DESCRIPTION: @@ -5302,51 +5359,8 @@ int processFrom(bool& isUnion, SELECT_LEX& select_lex, gp_walk_info& gwi, SCSEP& } else { - for (uint j = 0; j < table_ptr->table->s->keys; j++) - { - { - Field* field = table_ptr->table->key_info[j].key_part[0].field; - std::cout << "j index " << j << " i column " << 0 << " fieldnr " - << table_ptr->table->key_info[j].key_part[0].fieldnr << " " << field->field_name.str; - if (field->read_stats) - { - auto* histogram = dynamic_cast(field->read_stats->histogram); - if (histogram) - { - std::cout << " has stats with " << histogram->buckets.size() << " buckets"; - SchemaAndTableName tableName = {field->table->s->db.str, field->table->s->table_name.str}; - auto* sc = buildSimpleColumnFromFieldForStatistics(field, gwi); - std::cout << "sc with stats !!!!! " << sc->toString() << std::endl; - // execplan::SimpleColumn simpleColumn = { - // field->table->s->db.str, field->table->s->table_name.str, field->field_name.str, false}; - - auto tableStatisticsMapIt = gwi.tableStatisticsMap.find(tableName); - if (tableStatisticsMapIt == gwi.tableStatisticsMap.end()) - { - gwi.tableStatisticsMap[tableName][field->field_name.str] = {*sc, {histogram}}; - } - else - { - auto columnStatisticsMapIt = tableStatisticsMapIt->second.find(field->field_name.str); - if (columnStatisticsMapIt == tableStatisticsMapIt->second.end()) - { - tableStatisticsMapIt->second[field->field_name.str] = {*sc, {histogram}}; - } - else - { - auto columnStatisticsVec = columnStatisticsMapIt->second.second; - columnStatisticsVec.push_back(histogram); - } - } - } - else - { - std::cout << " no stats "; - } - } - std::cout << std::endl; - } - } + // TODO move extractColumnStatistics up when statistics is supported in MCS + extractColumnStatistics(table_ptr, gwi); } string table_name = table_ptr->table_name.str; @@ -6557,7 +6571,6 @@ int processSelect(SELECT_LEX& select_lex, gp_walk_info& gwi, SCSEP& csep, vector case Item::FIELD_ITEM: { Item_field* ifp = (Item_field*)item; - // extractColumnStatistics(ifp, gwi); // Handle * case if (ifp->field_name.length && string(ifp->field_name.str) == "*") { diff --git a/dbcon/mysql/rbo_apply_parallel_ces.cpp b/dbcon/mysql/rbo_apply_parallel_ces.cpp index f44931b20..039ee9dd1 100644 --- a/dbcon/mysql/rbo_apply_parallel_ces.cpp +++ b/dbcon/mysql/rbo_apply_parallel_ces.cpp @@ -79,11 +79,8 @@ bool someForeignTablesHasStatisticsAndMbIndex(execplan::CalpontSelectExecutionPl bool parallelCESFilter(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimizerContext& ctx) { - auto tables = csep.tableList(); - // This is leaf and there are no other tables at this level in neither UNION, nor derived table. - // TODO filter out CSEPs with orderBy, groupBy, having + // TODO filter out CSEPs with orderBy, groupBy, having || or clean up OB,GB,HAVING cloning CSEP // Filter out tables that were re-written. - // return tables.size() == 1 && !tables[0].isColumnstore() && !tableIsInUnion(tables[0], csep); return someAreForeignTables(csep) && someForeignTablesHasStatisticsAndMbIndex(csep, ctx); } @@ -106,7 +103,7 @@ execplan::ParseTree* filtersWithNewRange(execplan::SCSEP& csep, execplan::Simple ltOp->resultType(ltOp->operationType()); auto* sfr = new execplan::SimpleFilter(ltOp, tableKeyColumnLeftOp, filterColLeftOp); - // TODO new + // TODO new // TODO remove new and re-use tableKeyColumnLeftOp auto tableKeyColumnRightOp = new execplan::SimpleColumn(column); tableKeyColumnRightOp->resultType(column.resultType()); @@ -214,8 +211,8 @@ std::optional> populateRangeBounds(Histogram_json_hb* colum // TODO configurable parallel factor via session variable // NB now histogram size is the way to control parallel factor with 16 being the maximum - std::cout << "populateRangeBounds() columnStatistics->buckets.size() " << columnStatistics->get_json_histogram().size() - << std::endl; + std::cout << "populateRangeBounds() columnStatistics->buckets.size() " + << columnStatistics->get_json_histogram().size() << std::endl; size_t numberOfUnionUnits = std::min(columnStatistics->get_json_histogram().size(), MaxParallelFactor); size_t numberOfBucketsPerUnionUnit = columnStatistics->get_json_histogram().size() / numberOfUnionUnits; @@ -237,12 +234,12 @@ std::optional> populateRangeBounds(Histogram_json_hb* colum T currentLowerBound = *(uint32_t*)bucket.start_value.data(); std::cout << "Bucket: " << currentLowerBound << std::endl; } + // TODO leave this here b/c there is a corresponding JIRA about the last upper range bound. // auto penultimateBucket = columnStatistics.get_json_histogram().begin() + numberOfUnionUnits * // numberOfBucketsPerUnionUnit; T currentLowerBound = *(uint32_t*)penultimateBucket->start_value.data(); T // currentUpperBound = *(uint32_t*)columnStatistics.get_last_bucket_end_endp().data(); // bounds.push_back({currentLowerBound, currentUpperBound}); - for (auto& bound : bounds) { std::cout << "Bound: " << bound.first << " " << bound.second << std::endl; @@ -304,7 +301,7 @@ execplan::CalpontSelectExecutionPlan::SelectList makeUnionFromTable( clonedCSEP->filters(filter); unionVec.push_back(clonedCSEP); } - + return unionVec; } bool applyParallelCES(execplan::CalpontSelectExecutionPlan& csep, RBOptimizerContext& ctx)