From c630e23a186c7ecfe0afac21163cb4fa2cdc5f7a Mon Sep 17 00:00:00 2001 From: Sergei Petrunia Date: Sat, 7 Sep 2024 17:17:44 +0300 Subject: [PATCH] MDEV-34894: Poor query plan, because range estimates are not reused for ref(const) (Variant 4, with @@optimizer_adjust_secondary_key_costs, reuse in two places, and conditions are replaced with equivalent simpler forms in two more) In best_access_path(), ReuseRangeEstimateForRef-3, the check for whether "all used key_part_i used key_part_i=const" was incorrect: it may produced a "NO" answer for cases when we had: key_part1= const // some key parts are usable key_part2= value_not_in_join_prefix //present but unusable key_part3= non_const_value // unusable due to gap in key parts. This caused the optimizer to fail to apply ReuseRangeEstimateForRef heuristics. The consequence is poor query plan choice when the index in question has very skewed data distribution. The fix is enabled if its @@optimizer_adjust_secondary_key_costs flag is set. --- mysql-test/main/join.result | 84 +++++++++++++++++++ mysql-test/main/join.test | 78 +++++++++++++++++ mysql-test/main/mysqld--help.result | 8 +- mysql-test/main/secondary_key_costs.result | 2 +- .../sys_vars/r/sysvars_server_embedded.result | 4 +- .../r/sysvars_server_notembedded.result | 4 +- sql/sql_priv.h | 1 + sql/sql_select.cc | 48 ++++++++--- sql/sys_vars.cc | 7 +- 9 files changed, 214 insertions(+), 22 deletions(-) diff --git a/mysql-test/main/join.result b/mysql-test/main/join.result index 34c58910f07..9f843a7e722 100644 --- a/mysql-test/main/join.result +++ b/mysql-test/main/join.result @@ -3495,3 +3495,87 @@ a b c SET OPTIMIZER_USE_CONDITION_SELECTIVITY=@tmp; DROP TABLE t1,t2; # End of 10.6 tests +# +# MDEV-34894: Poor query plan, because range estimates are not reused for ref(const) +# +create table t0 ( +a int, +b int, +dummy int +); +insert into t0 select seq,seq,seq from seq_1_to_10; +create table t1 ( +pk1 int, +pk2 int, +pk3 int, +key1 int, +key(key1), +filler char(100), +primary key(pk1,pk2,pk3) +); +insert into t1 +select +seq, seq, seq, +FLOOR(seq/2), +'filler-data' +from seq_1_to_10000; +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status OK +update t1 set pk1=1 where pk1 between 1 and 200; +explain select * from t1 where pk1=1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref PRIMARY PRIMARY 4 const 231 +explain select * from t0,t1 where t1.pk1=t0.a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t0 ALL NULL NULL NULL NULL 10 Using where +1 SIMPLE t1 ref PRIMARY PRIMARY 4 test.t0.a 1 +create table t2 ( +col int +); +insert into t2 select seq from seq_1_to_10000; +set optimizer_adjust_secondary_key_costs='fix_reuse_range_for_ref'; +# This must use this good query plan: +# t0 - ALL +# t1 - ref, key=key1, not PRIMARY as pk1=1 is true for 20% of all rows +# t2 - ALL +explain select * from t0, t1, t2 +where +t1.pk1=1 and t1.pk2=t2.col and t1.pk3=t0.dummy and +t1.key1=t0.b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t0 ALL NULL NULL NULL NULL 10 Using where +1 SIMPLE t1 ref PRIMARY,key1 key1 5 test.t0.b 1 Using where +1 SIMPLE t2 ALL NULL NULL NULL NULL 10000 Using where; Using join buffer (flat, BNL join) +set optimizer_adjust_secondary_key_costs=''; +# Bad query: +# t0 - ALL +# t1 - ref, key=PRIMARY +# t2 - ALL +explain select * from t0, t1, t2 +where +t1.pk1=1 and t1.pk2=t2.col and t1.pk3=t0.dummy and +t1.key1=t0.b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t0 ALL NULL NULL NULL NULL 10 +1 SIMPLE t1 ref PRIMARY,key1 PRIMARY 4 const 1 Using index condition; Using where +1 SIMPLE t2 ALL NULL NULL NULL NULL 10000 Using where; Using join buffer (flat, BNL join) +drop table t0,t1,t2; +set @@optimizer_adjust_secondary_key_costs="fix_reuse_range_for_ref"; +CREATE OR REPLACE TABLE t1 (a INT NOT NULL, b INT NOT NULL, c INT, key(a,b,c)) ENGINE=Aria; +INSERT INTO t1 select seq/10,mod(seq,2),seq from seq_1_to_1000; +update t1 set a=10 WHERE c < 100; +update t1 set a=12 WHERE a=11; +insert into t1 values (11,1,11), (11,2,11); +create or replace table t2 select seq from seq_1_to_10; +explain select count(*) from t1, t2 as seq where a=10 and b=seq.seq; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE seq ALL NULL NULL NULL NULL 10 +1 SIMPLE t1 ref a a 8 const,test.seq.seq 5 Using where; Using index +explain select count(*) from t1, t2 as seq where a=11 and b=seq.seq; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref a a 4 const 2 Using index +1 SIMPLE seq ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join) +drop table t1,t2; +set @@optimizer_adjust_secondary_key_costs=default; diff --git a/mysql-test/main/join.test b/mysql-test/main/join.test index f8abc3fac22..6e037100ae5 100644 --- a/mysql-test/main/join.test +++ b/mysql-test/main/join.test @@ -1904,3 +1904,81 @@ SELECT * FROM SET OPTIMIZER_USE_CONDITION_SELECTIVITY=@tmp; DROP TABLE t1,t2; --echo # End of 10.6 tests + +--source include/have_sequence.inc + +--echo # +--echo # MDEV-34894: Poor query plan, because range estimates are not reused for ref(const) +--echo # +create table t0 ( + a int, + b int, + dummy int +); +insert into t0 select seq,seq,seq from seq_1_to_10; + +create table t1 ( + pk1 int, + pk2 int, + pk3 int, + key1 int, + key(key1), + filler char(100), + primary key(pk1,pk2,pk3) +); + +insert into t1 +select + seq, seq, seq, + FLOOR(seq/2), + 'filler-data' +from seq_1_to_10000; +analyze table t1; + +update t1 set pk1=1 where pk1 between 1 and 200; + +explain select * from t1 where pk1=1; + +explain select * from t0,t1 where t1.pk1=t0.a; + +create table t2 ( + col int +); +insert into t2 select seq from seq_1_to_10000; + +set optimizer_adjust_secondary_key_costs='fix_reuse_range_for_ref'; +--echo # This must use this good query plan: +--echo # t0 - ALL +--echo # t1 - ref, key=key1, not PRIMARY as pk1=1 is true for 20% of all rows +--echo # t2 - ALL +explain select * from t0, t1, t2 +where + t1.pk1=1 and t1.pk2=t2.col and t1.pk3=t0.dummy and + t1.key1=t0.b; + +set optimizer_adjust_secondary_key_costs=''; +--echo # Bad query: +--echo # t0 - ALL +--echo # t1 - ref, key=PRIMARY +--echo # t2 - ALL +explain select * from t0, t1, t2 +where + t1.pk1=1 and t1.pk2=t2.col and t1.pk3=t0.dummy and + t1.key1=t0.b; + +drop table t0,t1,t2; + +set @@optimizer_adjust_secondary_key_costs="fix_reuse_range_for_ref"; +CREATE OR REPLACE TABLE t1 (a INT NOT NULL, b INT NOT NULL, c INT, key(a,b,c)) ENGINE=Aria; +INSERT INTO t1 select seq/10,mod(seq,2),seq from seq_1_to_1000; +update t1 set a=10 WHERE c < 100; +update t1 set a=12 WHERE a=11; +insert into t1 values (11,1,11), (11,2,11); +create or replace table t2 select seq from seq_1_to_10; + +explain select count(*) from t1, t2 as seq where a=10 and b=seq.seq; +# This will execute code in ReuseRangeEstimateForRef-4 +explain select count(*) from t1, t2 as seq where a=11 and b=seq.seq; +drop table t1,t2; + +set @@optimizer_adjust_secondary_key_costs=default; diff --git a/mysql-test/main/mysqld--help.result b/mysql-test/main/mysqld--help.result index 08cd1202655..1e5fad44a5b 100644 --- a/mysql-test/main/mysqld--help.result +++ b/mysql-test/main/mysqld--help.result @@ -721,8 +721,10 @@ The following specify which files/extra groups are read (specified before remain disable_forced_index_in_group_by = Disable automatic forced index in GROUP BY. fix_innodb_cardinality = Disable doubling of the Cardinality for InnoDB secondary - keys. This variable will be deleted in MariaDB 11.0 as it - is not needed with the new 11.0 optimizer. + keys. fix_reuse_range_for_ref = Do a better job at + reusing range access estimates when estimating ref + access. This variable will be deleted in MariaDB 11.0 as + it is not needed with the new 11.0 optimizer. Use 'ALL' to set all combinations. --optimizer-join-limit-pref-ratio=# For queries with JOIN and ORDER BY LIMIT : make the @@ -1705,7 +1707,7 @@ old-alter-table DEFAULT old-mode UTF8_IS_UTF8MB3 old-passwords FALSE old-style-user-limits FALSE -optimizer-adjust-secondary-key-costs +optimizer-adjust-secondary-key-costs fix_reuse_range_for_ref optimizer-join-limit-pref-ratio 0 optimizer-max-sel-arg-weight 32000 optimizer-max-sel-args 16000 diff --git a/mysql-test/main/secondary_key_costs.result b/mysql-test/main/secondary_key_costs.result index b246b666115..3a5b883068c 100644 --- a/mysql-test/main/secondary_key_costs.result +++ b/mysql-test/main/secondary_key_costs.result @@ -82,7 +82,7 @@ json_detailed(json_extract(@trace, '$**.considered_access_paths')) drop table t1, name, flag2; select @@optimizer_adjust_secondary_key_costs; @@optimizer_adjust_secondary_key_costs - +fix_reuse_range_for_ref set @@optimizer_adjust_secondary_key_costs=7; select @@optimizer_adjust_secondary_key_costs; @@optimizer_adjust_secondary_key_costs diff --git a/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result b/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result index 42b661bc448..850497191d0 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result +++ b/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result @@ -2275,11 +2275,11 @@ COMMAND_LINE_ARGUMENT REQUIRED VARIABLE_NAME OPTIMIZER_ADJUST_SECONDARY_KEY_COSTS VARIABLE_SCOPE SESSION VARIABLE_TYPE SET -VARIABLE_COMMENT A bit field with the following values: adjust_secondary_key_cost = Update secondary key costs for ranges to be at least 5x of clustered primary key costs. disable_max_seek = Disable 'max_seek optimization' for secondary keys and slight adjustment of filter cost. disable_forced_index_in_group_by = Disable automatic forced index in GROUP BY. fix_innodb_cardinality = Disable doubling of the Cardinality for InnoDB secondary keys. This variable will be deleted in MariaDB 11.0 as it is not needed with the new 11.0 optimizer. +VARIABLE_COMMENT A bit field with the following values: adjust_secondary_key_cost = Update secondary key costs for ranges to be at least 5x of clustered primary key costs. disable_max_seek = Disable 'max_seek optimization' for secondary keys and slight adjustment of filter cost. disable_forced_index_in_group_by = Disable automatic forced index in GROUP BY. fix_innodb_cardinality = Disable doubling of the Cardinality for InnoDB secondary keys. fix_reuse_range_for_ref = Do a better job at reusing range access estimates when estimating ref access. This variable will be deleted in MariaDB 11.0 as it is not needed with the new 11.0 optimizer. NUMERIC_MIN_VALUE NULL NUMERIC_MAX_VALUE NULL NUMERIC_BLOCK_SIZE NULL -ENUM_VALUE_LIST adjust_secondary_key_cost,disable_max_seek,disable_forced_index_in_group_by,fix_innodb_cardinality +ENUM_VALUE_LIST adjust_secondary_key_cost,disable_max_seek,disable_forced_index_in_group_by,fix_innodb_cardinality,fix_reuse_range_for_ref READ_ONLY NO COMMAND_LINE_ARGUMENT REQUIRED VARIABLE_NAME OPTIMIZER_JOIN_LIMIT_PREF_RATIO diff --git a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result index b626551a570..fcbe4f51fc5 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result +++ b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result @@ -2435,11 +2435,11 @@ COMMAND_LINE_ARGUMENT REQUIRED VARIABLE_NAME OPTIMIZER_ADJUST_SECONDARY_KEY_COSTS VARIABLE_SCOPE SESSION VARIABLE_TYPE SET -VARIABLE_COMMENT A bit field with the following values: adjust_secondary_key_cost = Update secondary key costs for ranges to be at least 5x of clustered primary key costs. disable_max_seek = Disable 'max_seek optimization' for secondary keys and slight adjustment of filter cost. disable_forced_index_in_group_by = Disable automatic forced index in GROUP BY. fix_innodb_cardinality = Disable doubling of the Cardinality for InnoDB secondary keys. This variable will be deleted in MariaDB 11.0 as it is not needed with the new 11.0 optimizer. +VARIABLE_COMMENT A bit field with the following values: adjust_secondary_key_cost = Update secondary key costs for ranges to be at least 5x of clustered primary key costs. disable_max_seek = Disable 'max_seek optimization' for secondary keys and slight adjustment of filter cost. disable_forced_index_in_group_by = Disable automatic forced index in GROUP BY. fix_innodb_cardinality = Disable doubling of the Cardinality for InnoDB secondary keys. fix_reuse_range_for_ref = Do a better job at reusing range access estimates when estimating ref access. This variable will be deleted in MariaDB 11.0 as it is not needed with the new 11.0 optimizer. NUMERIC_MIN_VALUE NULL NUMERIC_MAX_VALUE NULL NUMERIC_BLOCK_SIZE NULL -ENUM_VALUE_LIST adjust_secondary_key_cost,disable_max_seek,disable_forced_index_in_group_by,fix_innodb_cardinality +ENUM_VALUE_LIST adjust_secondary_key_cost,disable_max_seek,disable_forced_index_in_group_by,fix_innodb_cardinality,fix_reuse_range_for_ref READ_ONLY NO COMMAND_LINE_ARGUMENT REQUIRED VARIABLE_NAME OPTIMIZER_JOIN_LIMIT_PREF_RATIO diff --git a/sql/sql_priv.h b/sql/sql_priv.h index 0bcb09a80ad..8f7962fc6b6 100644 --- a/sql/sql_priv.h +++ b/sql/sql_priv.h @@ -275,6 +275,7 @@ #define OPTIMIZER_ADJ_DISABLE_MAX_SEEKS (2) #define OPTIMIZER_ADJ_DISABLE_FORCE_INDEX_GROUP_BY (4) #define OPTIMIZER_FIX_INNODB_CARDINALITY (8) +#define OPTIMIZER_ADJ_FIX_REUSE_RANGE_FOR_REF (16) /* Replication uses 8 bytes to store SQL_MODE in the binary log. The day you diff --git a/sql/sql_select.cc b/sql/sql_select.cc index bf80b360ab2..57e391dc82d 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -8125,6 +8125,7 @@ best_access_path(JOIN *join, key_part_map notnull_part=0; // key parts which won't have NULL in lookup tuple. table_map found_ref= 0; uint key= keyuse->key; + uint max_const_parts; filter= 0; bool ft_key= (keyuse->keypart == FT_KEYPART); /* Bitmap of keyparts where the ref access is over 'keypart=const': */ @@ -8228,6 +8229,8 @@ best_access_path(JOIN *join, rec= MATCHING_ROWS_IN_OTHER_TABLE; // Fix for small tables Json_writer_object trace_access_idx(thd); + max_const_parts= max_part_bit(const_part); + /* full text keys require special treatment */ @@ -8344,9 +8347,7 @@ best_access_path(JOIN *join, in ReuseRangeEstimateForRef-3. */ if (table->opt_range_keys.is_set(key) && - (const_part & - (((key_part_map)1 << table->opt_range[key].key_parts)-1)) == - (((key_part_map)1 << table->opt_range[key].key_parts)-1) && + table->opt_range[key].key_parts <= max_const_parts && table->opt_range[key].ranges == 1 && records > (double) table->opt_range[key].rows) { @@ -8394,6 +8395,17 @@ best_access_path(JOIN *join, found_part == PREV_BITS(uint,keyinfo->user_defined_key_parts))) { max_key_part= max_part_bit(found_part); + bool all_used_equalities_are_const; + if ((thd->variables.optimizer_adjust_secondary_key_costs & + OPTIMIZER_ADJ_FIX_REUSE_RANGE_FOR_REF)) + { + all_used_equalities_are_const= (max_key_part == max_const_parts); + } + else + { + // Old, incorrect check: + all_used_equalities_are_const= !found_ref; + } /* ReuseRangeEstimateForRef-3: We're now considering a ref[or_null] access via @@ -8408,7 +8420,7 @@ best_access_path(JOIN *join, create quick select over another index), so we can't compare them to (**). We'll make indirect judgements instead. The sufficient conditions for re-use are: - (C1) All e_i in (**) are constants, i.e. found_ref==FALSE. (if + (C1) All e_i in (**) are constants (if this is not satisfied we have no way to know which ranges will be actually scanned by 'ref' until we execute the join) @@ -8418,7 +8430,7 @@ best_access_path(JOIN *join, We also have a property that "range optimizer produces equal or tighter set of scan intervals than ref(const) optimizer". Each of the intervals in (**) are "tightest possible" intervals when - one limits itself to using keyparts 1..K (which we do in #2). + one limits itself to using keyparts 1..K (which we do in #2). From here it follows that range access used either one, or both of the (I1) and (I2) intervals: @@ -8433,7 +8445,8 @@ best_access_path(JOIN *join, (C3) "range optimizer used (have ref_or_null?2:1) intervals" */ - if (table->opt_range_keys.is_set(key) && !found_ref && //(C1) + if (table->opt_range_keys.is_set(key) && + all_used_equalities_are_const && // (C1) table->opt_range[key].key_parts == max_key_part && //(C2) table->opt_range[key].ranges == 1 + MY_TEST(ref_or_null_part)) //(C3) { @@ -8466,10 +8479,10 @@ best_access_path(JOIN *join, */ if (table->opt_range_keys.is_set(key)) { + double rows= (double) table->opt_range[key].rows; if (table->opt_range[key].key_parts >= max_key_part) // (2) { - double rows= (double) table->opt_range[key].rows; - if (!found_ref && // (1) + if (all_used_equalities_are_const && // (1) records < rows) // (3) { trace_access_idx.add("used_range_estimates", "clipped up"); @@ -8537,15 +8550,26 @@ best_access_path(JOIN *join, */ if (table->opt_range_keys.is_set(key) && table->opt_range[key].key_parts <= max_key_part && - const_part & - ((key_part_map)1 << table->opt_range[key].key_parts) && table->opt_range[key].ranges == (1 + MY_TEST(ref_or_null_part & const_part)) && records > (double) table->opt_range[key].rows) { - trace_access_idx.add("used_range_estimates", true); - records= (double) table->opt_range[key].rows; + bool all_parts_used; + if ((thd->variables.optimizer_adjust_secondary_key_costs & + OPTIMIZER_ADJ_FIX_REUSE_RANGE_FOR_REF)) + { + all_parts_used= table->opt_range[key].key_parts <= max_const_parts; + } + else + all_parts_used= (bool) (const_part & + ((key_part_map)1 + << table->opt_range[key].key_parts)); + if (all_parts_used) + { + trace_access_idx.add("used_range_estimates", true); + records= (double) table->opt_range[key].rows; + } } } diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index c5b507b936d..77d0c2daf78 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -2837,7 +2837,8 @@ static Sys_var_ulong Sys_optimizer_trace_max_mem_size( */ static const char *adjust_secondary_key_cost[]= { - "adjust_secondary_key_cost", "disable_max_seek", "disable_forced_index_in_group_by", "fix_innodb_cardinality",0 + "adjust_secondary_key_cost", "disable_max_seek", "disable_forced_index_in_group_by", + "fix_innodb_cardinality", "fix_reuse_range_for_ref", 0 }; @@ -2852,10 +2853,12 @@ static Sys_var_set Sys_optimizer_adjust_secondary_key_costs( "GROUP BY. " "fix_innodb_cardinality = Disable doubling of the Cardinality for InnoDB " "secondary keys. " + "fix_reuse_range_for_ref = Do a better job at reusing range access estimates " + "when estimating ref access. " "This variable will be deleted in MariaDB 11.0 as it is not needed with the " "new 11.0 optimizer.", SESSION_VAR(optimizer_adjust_secondary_key_costs), CMD_LINE(REQUIRED_ARG), - adjust_secondary_key_cost, DEFAULT(0)); + adjust_secondary_key_cost, DEFAULT(OPTIMIZER_ADJ_FIX_REUSE_RANGE_FOR_REF)); static Sys_var_charptr_fscs Sys_pid_file(