diff --git a/mysql-test/main/mysqld--help.result b/mysql-test/main/mysqld--help.result index 219bad22528..d8a8fe449b6 100644 --- a/mysql-test/main/mysqld--help.result +++ b/mysql-test/main/mysqld--help.result @@ -712,6 +712,13 @@ The following specify which files/extra groups are read (specified before remain max_connections*5 or max_connections + table_cache*2 (whichever is larger) number of file descriptors (Automatically configured unless set explicitly) + --optimizer-adjust-secondary-key-costs=# + 0 = No changes. 1 = Update secondary key costs for ranges + to be at least 5x of clustered primary key costs. 2 = + Remove 'max_seek optimization' for secondary keys and + slight adjustment of filter cost. This option will be + deleted in MariaDB 11.0 as it is not needed with the new + 11.0 optimizer. --optimizer-max-sel-arg-weight=# The maximum weight of the SEL_ARG graph. Set to 0 for no limit @@ -1684,6 +1691,7 @@ old-alter-table DEFAULT old-mode UTF8_IS_UTF8MB3 old-passwords FALSE old-style-user-limits FALSE +optimizer-adjust-secondary-key-costs 0 optimizer-max-sel-arg-weight 32000 optimizer-max-sel-args 16000 optimizer-prune-level 1 diff --git a/mysql-test/main/secondary_key_costs.result b/mysql-test/main/secondary_key_costs.result new file mode 100644 index 00000000000..55c84705abf --- /dev/null +++ b/mysql-test/main/secondary_key_costs.result @@ -0,0 +1,82 @@ +create table t1 ( +pk int primary key auto_increment, +nm varchar(32), +fl1 tinyint default 0, +fl2 tinyint default 0, +index idx1(nm, fl1), +index idx2(fl2) +) engine=myisam; +create table name ( +pk int primary key auto_increment, +nm bigint +) engine=myisam; +create table flag2 ( +pk int primary key auto_increment, +fl2 tinyint +) engine=myisam; +insert into name(nm) select seq from seq_1_to_1000 order by rand(17); +insert into flag2(fl2) select seq mod 2 from seq_1_to_1000 order by rand(19); +insert into t1(nm,fl2) +select nm, fl2 from name, flag2 where name.pk = flag2.pk; +analyze table t1 persistent for all; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status Table is already up to date +set optimizer_trace="enabled=on"; +set optimizer_switch='rowid_filter=on'; +set statement optimizer_adjust_secondary_key_costs=0 for +explain select * from t1 where nm like '500%' AND fl2 = 0; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx1,idx2 idx1 35 NULL 1 Using index condition; Using where +set @trace=(select trace from information_schema.optimizer_trace); +select json_detailed(json_extract(@trace, '$**.considered_access_paths')); +json_detailed(json_extract(@trace, '$**.considered_access_paths')) +[ + [ + { + "access_type": "ref", + "index": "idx2", + "used_range_estimates": true, + "rowid_filter_skipped": "worst/max seeks clipping", + "rows": 492, + "cost": 492.3171406, + "chosen": true + }, + { + "access_type": "range", + "resulting_rows": 0.492, + "cost": 1.448699097, + "chosen": true + } + ] +] + +The following trace should have a different rowid_filter_key cost + +set statement optimizer_adjust_secondary_key_costs=2 for +explain select * from t1 where nm like '500%' AND fl2 = 0; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx1,idx2 idx1 35 NULL 1 Using index condition; Using where +set @trace=(select trace from information_schema.optimizer_trace); +select json_detailed(json_extract(@trace, '$**.considered_access_paths')); +json_detailed(json_extract(@trace, '$**.considered_access_paths')) +[ + [ + { + "access_type": "ref", + "index": "idx2", + "used_range_estimates": true, + "rowid_filter_key": "idx1", + "rows": 492, + "cost": 3.814364688, + "chosen": true + }, + { + "access_type": "range", + "resulting_rows": 0.492, + "cost": 1.448699097, + "chosen": true + } + ] +] +drop table t1, name, flag2; diff --git a/mysql-test/main/secondary_key_costs.test b/mysql-test/main/secondary_key_costs.test new file mode 100644 index 00000000000..d3db137653b --- /dev/null +++ b/mysql-test/main/secondary_key_costs.test @@ -0,0 +1,53 @@ +--source include/have_sequence.inc +--source include/not_embedded.inc + +# +# Show the costs for rowid filter +# + +create table t1 ( + pk int primary key auto_increment, + nm varchar(32), + fl1 tinyint default 0, + fl2 tinyint default 0, + index idx1(nm, fl1), + index idx2(fl2) +) engine=myisam; + +create table name ( + pk int primary key auto_increment, + nm bigint +) engine=myisam; + +create table flag2 ( + pk int primary key auto_increment, + fl2 tinyint +) engine=myisam; + +insert into name(nm) select seq from seq_1_to_1000 order by rand(17); +insert into flag2(fl2) select seq mod 2 from seq_1_to_1000 order by rand(19); + +insert into t1(nm,fl2) + select nm, fl2 from name, flag2 where name.pk = flag2.pk; + +analyze table t1 persistent for all; + +--disable_ps_protocol +set optimizer_trace="enabled=on"; +set optimizer_switch='rowid_filter=on'; +set statement optimizer_adjust_secondary_key_costs=0 for +explain select * from t1 where nm like '500%' AND fl2 = 0; +set @trace=(select trace from information_schema.optimizer_trace); +select json_detailed(json_extract(@trace, '$**.considered_access_paths')); + +--echo +--echo The following trace should have a different rowid_filter_key cost +--echo +set statement optimizer_adjust_secondary_key_costs=2 for +explain select * from t1 where nm like '500%' AND fl2 = 0; +set @trace=(select trace from information_schema.optimizer_trace); +select json_detailed(json_extract(@trace, '$**.considered_access_paths')); + +--enable_ps_protocol + +drop table t1, name, flag2; diff --git a/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result b/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result index 8e50d8e7268..aa88a8f74a8 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result +++ b/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result @@ -2272,6 +2272,16 @@ NUMERIC_BLOCK_SIZE 1 ENUM_VALUE_LIST NULL READ_ONLY YES COMMAND_LINE_ARGUMENT REQUIRED +VARIABLE_NAME OPTIMIZER_ADJUST_SECONDARY_KEY_COSTS +VARIABLE_SCOPE SESSION +VARIABLE_TYPE BIGINT UNSIGNED +VARIABLE_COMMENT 0 = No changes. 1 = Update secondary key costs for ranges to be at least 5x of clustered primary key costs. 2 = Remove 'max_seek optimization' for secondary keys and slight adjustment of filter cost. This option will be deleted in MariaDB 11.0 as it is not needed with the new 11.0 optimizer. +NUMERIC_MIN_VALUE 0 +NUMERIC_MAX_VALUE 2 +NUMERIC_BLOCK_SIZE 1 +ENUM_VALUE_LIST NULL +READ_ONLY NO +COMMAND_LINE_ARGUMENT REQUIRED VARIABLE_NAME OPTIMIZER_MAX_SEL_ARGS VARIABLE_SCOPE SESSION VARIABLE_TYPE BIGINT UNSIGNED diff --git a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result index 00c66ddf072..c07b7169608 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result +++ b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result @@ -2432,6 +2432,16 @@ NUMERIC_BLOCK_SIZE 1 ENUM_VALUE_LIST NULL READ_ONLY YES COMMAND_LINE_ARGUMENT REQUIRED +VARIABLE_NAME OPTIMIZER_ADJUST_SECONDARY_KEY_COSTS +VARIABLE_SCOPE SESSION +VARIABLE_TYPE BIGINT UNSIGNED +VARIABLE_COMMENT 0 = No changes. 1 = Update secondary key costs for ranges to be at least 5x of clustered primary key costs. 2 = Remove 'max_seek optimization' for secondary keys and slight adjustment of filter cost. This option will be deleted in MariaDB 11.0 as it is not needed with the new 11.0 optimizer. +NUMERIC_MIN_VALUE 0 +NUMERIC_MAX_VALUE 2 +NUMERIC_BLOCK_SIZE 1 +ENUM_VALUE_LIST NULL +READ_ONLY NO +COMMAND_LINE_ARGUMENT REQUIRED VARIABLE_NAME OPTIMIZER_MAX_SEL_ARGS VARIABLE_SCOPE SESSION VARIABLE_TYPE BIGINT UNSIGNED diff --git a/sql/sql_class.h b/sql/sql_class.h index d61ee1545b6..5175d440136 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -698,7 +698,6 @@ typedef struct system_variables ulonglong max_statement_time; ulonglong optimizer_switch; ulonglong optimizer_trace; - ulong optimizer_trace_max_mem_size; sql_mode_t sql_mode; ///< which non-standard SQL behaviour should be enabled sql_mode_t old_behavior; ///< which old SQL behaviour should be enabled ulonglong option_bits; ///< OPTION_xxx constants, e.g. OPTION_PROFILING @@ -761,6 +760,8 @@ typedef struct system_variables ulong optimizer_use_condition_selectivity; ulong optimizer_max_sel_arg_weight; ulong optimizer_max_sel_args; + ulong optimizer_trace_max_mem_size; + ulong optimizer_adjust_secondary_key_costs; ulong use_stat_tables; double sample_percentage; ulong histogram_size; diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 62c7f52748a..b1779ebc24d 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -5895,11 +5895,15 @@ make_join_statistics(JOIN *join, List &tables_list, This is can't be to high as otherwise we are likely to use table scan. */ - s->worst_seeks= MY_MIN((double) s->found_records / 10, - (double) s->read_time*3); - if (s->worst_seeks < 2.0) // Fix for small tables - s->worst_seeks=2.0; - + /* Largest integer that can be stored in double (no compiler warning) */ + s->worst_seeks= (double) (1ULL << 53); + if (thd->variables.optimizer_adjust_secondary_key_costs != 2) + { + s->worst_seeks= MY_MIN((double) s->found_records / 10, + (double) s->read_time*3); + if (s->worst_seeks < 2.0) // Fix for small tables + s->worst_seeks=2.0; + } /* Add to stat->const_keys those indexes for which all group fields or all select distinct fields participate in one index. @@ -7817,8 +7821,27 @@ double cost_for_index_read(const THD *thd, const TABLE *table, uint key, if (table->covering_keys.is_set(key)) cost= file->keyread_time(key, 1, records); else + { cost= ((file->keyread_time(key, 0, records) + file->read_time(key, 1, MY_MIN(records, worst_seeks)))); + if (thd->variables.optimizer_adjust_secondary_key_costs == 1 && + file->is_clustering_key(0)) + { + /* + According to benchmarks done in 11.0 to calculate the new cost + model secondary key ranges are about 7x slower than primary + key ranges for big tables. Here we are a bit conservative and + only calculate with 5x. The reason for having it only 5x and + not for example 7x is is that choosing plans with more rows + that are read (ignored by the WHERE clause) causes the 10.x + optimizer to believe that there are more rows in the result + set, which can cause problems in finding the best join order. + Note: A clustering primary key is always key 0. + */ + double clustering_key_cost= file->read_time(0, 1, records); + cost= MY_MAX(cost, clustering_key_cost * 5); + } + } DBUG_PRINT("statistics", ("cost: %.3f", cost)); DBUG_RETURN(cost); @@ -7992,6 +8015,14 @@ best_access_path(JOIN *join, double keyread_tmp= 0; ha_rows rec; bool best_uses_jbuf= FALSE; + /* + if optimizer_use_condition_selectivity adjust filter cost to be slightly + higher to ensure that ref|filter is not less than range over same + number of rows + */ + double filter_setup_cost= (thd->variables. + optimizer_adjust_secondary_key_costs == 2 ? + 1.0 : 0.0); MY_BITMAP *eq_join_set= &s->table->eq_join_set; KEYUSE *hj_start_key= 0; SplM_plan_info *spl_plan= 0; @@ -8548,6 +8579,7 @@ best_access_path(JOIN *join, type == JT_EQ_REF ? 0.5 * tmp : MY_MIN(tmp, keyread_tmp); double access_cost_factor= MY_MIN((tmp - key_access_cost) / rows, 1.0); + if (!(records < s->worst_seeks && records <= thd->variables.max_seeks_for_key)) { @@ -8564,7 +8596,9 @@ best_access_path(JOIN *join, } if (filter) { - tmp-= filter->get_adjusted_gain(rows) - filter->get_cmp_gain(rows); + tmp-= (filter->get_adjusted_gain(rows) - + filter->get_cmp_gain(rows) - + filter_setup_cost); DBUG_ASSERT(tmp >= 0); trace_access_idx.add("rowid_filter_key", table->key_info[filter->key_no].name); @@ -8810,7 +8844,7 @@ best_access_path(JOIN *join, access_cost_factor); if (filter) { - tmp-= filter->get_adjusted_gain(rows); + tmp-= filter->get_adjusted_gain(rows) - filter_setup_cost; DBUG_ASSERT(tmp >= 0); } diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index dea04bf3bc5..61911d877e5 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -2817,6 +2817,19 @@ static Sys_var_ulong Sys_optimizer_trace_max_mem_size( SESSION_VAR(optimizer_trace_max_mem_size), CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, ULONG_MAX), DEFAULT(1024 * 1024), BLOCK_SIZE(1)); +static Sys_var_ulong Sys_optimizer_adjust_secondary_key_costs( + "optimizer_adjust_secondary_key_costs", + "0 = No changes. " + "1 = Update secondary key costs for ranges to be at least 5x of clustered " + "primary key costs. " + "2 = Remove 'max_seek optimization' for secondary keys and slight " + "adjustment of filter cost. " + "This option will be deleted in MariaDB 11.0 as it is not needed with the " + "new 11.0 optimizer.", + SESSION_VAR(optimizer_adjust_secondary_key_costs), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, 2), DEFAULT(0), BLOCK_SIZE(1)); + + static Sys_var_charptr_fscs Sys_pid_file( "pid_file", "Pid file used by safe_mysqld", READ_ONLY GLOBAL_VAR(pidfile_name_ptr), CMD_LINE(REQUIRED_ARG),