diff --git a/mysql-test/main/mysqld--help.result b/mysql-test/main/mysqld--help.result index 7a9c31ef2c9..10945400e75 100644 --- a/mysql-test/main/mysqld--help.result +++ b/mysql-test/main/mysqld--help.result @@ -752,6 +752,15 @@ The following specify which files/extra groups are read (specified before remain If the optimizer needs to enumerate join prefix of this size or larger, then it will try aggressively prune away the search space. + --optimizer-join-limit-pref-ratio=# + For queries with JOIN and ORDER BY LIMIT : make the + optimizer consider a join order that allows to short-cut + execution after producing #LIMIT matches if that promises + N times speedup. (A conservative setting here would be is + a high value, like 100 so the short-cutting plan is used + if it promises a speedup of 100x or more). Short-cutting + plans are inherently risky so the default is 0 which + means do not consider this optimization --optimizer-max-sel-arg-weight=# The maximum weight of the SEL_ARG graph. Set to 0 for no limit @@ -1739,6 +1748,7 @@ old-passwords FALSE old-style-user-limits FALSE optimizer-adjust-secondary-key-costs optimizer-extra-pruning-depth 8 +optimizer-join-limit-pref-ratio 0 optimizer-max-sel-arg-weight 32000 optimizer-max-sel-args 16000 optimizer-prune-level 2 diff --git a/mysql-test/main/order_by_limit_join.result b/mysql-test/main/order_by_limit_join.result new file mode 100644 index 00000000000..d853d82092a --- /dev/null +++ b/mysql-test/main/order_by_limit_join.result @@ -0,0 +1,465 @@ +# +# MDEV-34720: Poor plan choice for large JOIN with ORDER BY and small LIMIT +# +create table t1 ( +a int, +b int, +c int, +col1 int, +col2 int, +index(a), +index(b), +index(col1) +); +insert into t1 select +mod(seq, 100), +mod(seq, 95), +seq, +seq, +seq +from +seq_1_to_10000; +create table t10 ( +a int, +a_value char(10), +key(a) +); +insert into t10 select seq, seq from seq_1_to_100; +create table t11 ( +b int, +b_value char(10), +key(b) +); +insert into t11 select seq, seq from seq_1_to_100; +set @tmp_os=@@optimizer_trace; +set optimizer_trace=1; +# +# Query 1 - basic example. +# +# Table t1 is not the first, have to use temporary+filesort: +explain +select +* +from +t1 +join t10 on t1.a=t10.a +join t11 on t1.b=t11.b +order by +t1.col1 +limit 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t10 ALL a NULL NULL NULL 100 Using where; Using temporary; Using filesort +1 SIMPLE t1 ref a,b a 5 test.t10.a 100 Using where +1 SIMPLE t11 ref b b 5 test.t1.b 1 +set optimizer_join_limit_pref_ratio=10; +# t1 is first, key=col1 produces ordering, no filesort or temporary: +explain +select +* +from +t1 +join t10 on t1.a=t10.a +join t11 on t1.b=t11.b +order by +t1.col1 +limit 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index a,b col1 5 NULL 10 Using where +1 SIMPLE t10 ref a a 5 test.t1.a 1 +1 SIMPLE t11 ref b b 5 test.t1.b 1 +set @trace=(select trace from information_schema.optimizer_trace); +select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS; +JS +[ + { + "limit_fraction": 0.001, + "test_if_skip_sort_order_early": + [ + { + "reconsidering_access_paths_for_index_ordering": + { + "clause": "ORDER BY", + "fanout": 1, + "read_time": 53.27053125, + "table": "t1", + "rows_estimation": 10000, + "possible_keys": + [ + { + "index": "a", + "can_resolve_order": false, + "cause": "not usable index for the query" + }, + { + "index": "b", + "can_resolve_order": false, + "cause": "not usable index for the query" + }, + { + "index": "col1", + "can_resolve_order": true, + "direction": 1, + "updated_limit": 10, + "index_scan_time": 10, + "records": 10000, + "chosen": true + } + ] + } + } + ], + "can_skip_filesort": true, + "full_join_cost": 46064.98442, + "risk_ratio": 10, + "shortcut_join_cost": 97.28224614, + "shortcut_cost_with_risk": 972.8224614, + "use_shortcut_cost": true + } +] +# +# Query 2 - same as above but without a suitable index. +# +# Table t1 is not the first, have to use temporary+filesort: +set optimizer_join_limit_pref_ratio=0; +explain +select +* +from +t1 +join t10 on t1.a=t10.a +join t11 on t1.b=t11.b +order by +t1.col2 +limit 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t10 ALL a NULL NULL NULL 100 Using where; Using temporary; Using filesort +1 SIMPLE t1 ref a,b a 5 test.t10.a 100 Using where +1 SIMPLE t11 ref b b 5 test.t1.b 1 +# t1 is first but there's no suitable index, +# so we use filesort but using temporary: +set optimizer_join_limit_pref_ratio=10; +explain +select +* +from +t1 +join t10 on t1.a=t10.a +join t11 on t1.b=t11.b +order by +t1.col2 +limit 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL a,b NULL NULL NULL 10000 Using where; Using filesort +1 SIMPLE t10 ref a a 5 test.t1.a 1 +1 SIMPLE t11 ref b b 5 test.t1.b 1 +set @trace=(select trace from information_schema.optimizer_trace); +select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS; +JS +[ + { + "limit_fraction": 0.001, + "test_if_skip_sort_order_early": + [], + "can_skip_filesort": false, + "full_join_cost": 46064.98442, + "risk_ratio": 10, + "shortcut_join_cost": 2097.281246, + "shortcut_cost_with_risk": 20972.81246, + "use_shortcut_cost": true + } +] +# +# Query 3: Counter example with large limit +# +# Table t1 is not the first, have to use temporary+filesort: +set optimizer_join_limit_pref_ratio=0; +explain +select +* +from +t1 +join t10 on t1.a=t10.a +join t11 on t1.b=t11.b +order by +t1.col1 +limit 5000; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t10 ALL a NULL NULL NULL 100 Using where; Using temporary; Using filesort +1 SIMPLE t1 ref a,b a 5 test.t10.a 100 Using where +1 SIMPLE t11 ref b b 5 test.t1.b 1 +# Same plan as above: +# Table t1 is not the first, have to use temporary+filesort: +set optimizer_join_limit_pref_ratio=10; +explain +select +* +from +t1 +join t10 on t1.a=t10.a +join t11 on t1.b=t11.b +order by +t1.col1 +limit 5000; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t10 ALL a NULL NULL NULL 100 Using where; Using temporary; Using filesort +1 SIMPLE t1 ref a,b a 5 test.t10.a 100 Using where +1 SIMPLE t11 ref b b 5 test.t1.b 1 +set @trace=(select trace from information_schema.optimizer_trace); +select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS; +JS +[ + { + "limit_fraction": 0.5, + "test_if_skip_sort_order_early": + [ + { + "reconsidering_access_paths_for_index_ordering": + { + "clause": "ORDER BY", + "fanout": 1, + "read_time": 53.27053125, + "table": "t1", + "rows_estimation": 10000, + "possible_keys": + [ + { + "index": "a", + "can_resolve_order": false, + "cause": "not usable index for the query" + }, + { + "index": "b", + "can_resolve_order": false, + "cause": "not usable index for the query" + }, + { + "index": "col1", + "can_resolve_order": true, + "direction": 1, + "updated_limit": 5000, + "index_scan_time": 5000, + "usable": false, + "cause": "cost" + } + ] + } + } + ], + "can_skip_filesort": false, + "full_join_cost": 46064.98442, + "risk_ratio": 10, + "shortcut_join_cost": 24059.12698, + "shortcut_cost_with_risk": 240591.2698, + "use_shortcut_cost": false + } +] +# +# Query 4: LEFT JOIN makes it impossible to put ORDER-BY-table first, +# however the optimizer still puts it as sort_by_table. +# +set optimizer_join_limit_pref_ratio=10; +explain +select +* +from +t10 left join (t1 join t11 on t1.b=t11.b ) on t1.a=t10.a +order by +t1.col2 +limit 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t10 ALL NULL NULL NULL NULL 100 Using temporary; Using filesort +1 SIMPLE t1 ref a,b a 5 test.t10.a 100 Using where +1 SIMPLE t11 ref b b 5 test.t1.b 1 +set @trace=(select trace from information_schema.optimizer_trace); +# This will show nothing as limit shortcut code figures that +# it's not possible to use t1 to construct shortcuts: +select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS; +JS +NULL +# +# Query 5: Same as Q1 but also with a semi-join +# +set optimizer_join_limit_pref_ratio=default; +# Table t1 is not the first, have to use temporary+filesort: +explain +select +* +from +t1 +join t10 on t1.a=t10.a +join t11 on t1.b=t11.b +where +t1.a in (select a from t10) and +t1.b in (select b from t11) +order by +t1.col1 +limit 10; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t10 ALL a NULL NULL NULL 100 Using where; Using temporary; Using filesort +1 PRIMARY t10 ref a a 5 test.t10.a 1 Using index; LooseScan +1 PRIMARY t1 ref a,b a 5 test.t10.a 100 Using where +1 PRIMARY t11 ref b b 5 test.t1.b 1 +1 PRIMARY eq_ref distinct_key distinct_key 4 func 1 +3 MATERIALIZED t11 index b b 5 NULL 100 Using index +set optimizer_join_limit_pref_ratio=10; +# t1 is first, key=col1 produces ordering, no filesort or temporary: +explain +select +* +from +t1 +join t10 on t1.a=t10.a +join t11 on t1.b=t11.b +where +t1.a in (select a from t10) and +t1.b in (select b from t11) +order by +t1.col1 +limit 10; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t1 index a,b col1 5 NULL 1 Using where +1 PRIMARY t10 ref a a 5 test.t1.a 1 +1 PRIMARY t11 ref b b 5 test.t1.b 1 +1 PRIMARY eq_ref distinct_key distinct_key 4 func 1 +1 PRIMARY eq_ref distinct_key distinct_key 4 func 1 +2 MATERIALIZED t10 index a a 5 NULL 100 Using index +3 MATERIALIZED t11 index b b 5 NULL 100 Using index +set @trace=(select trace from information_schema.optimizer_trace); +select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS; +JS +[ + { + "limit_fraction": 0.001, + "test_if_skip_sort_order_early": + [ + { + "reconsidering_access_paths_for_index_ordering": + { + "clause": "ORDER BY", + "fanout": 1, + "read_time": 53.27053125, + "table": "t1", + "rows_estimation": 10000, + "possible_keys": + [ + { + "index": "a", + "can_resolve_order": false, + "cause": "not usable index for the query" + }, + { + "index": "b", + "can_resolve_order": false, + "cause": "not usable index for the query" + }, + { + "index": "col1", + "can_resolve_order": true, + "direction": 1, + "updated_limit": 10, + "index_scan_time": 10, + "records": 10000, + "chosen": true + } + ] + } + } + ], + "can_skip_filesort": true, + "full_join_cost": 47079.71684, + "risk_ratio": 10, + "shortcut_join_cost": 98.29697856, + "shortcut_cost_with_risk": 982.9697856, + "use_shortcut_cost": true + } +] +# +# Query 6: same as Query 1 but let's limit the search depth +# +set @tmp_osd=@@optimizer_search_depth; +set optimizer_search_depth=1; +set optimizer_join_limit_pref_ratio=default; +# Table t1 is not the first, have to use temporary+filesort: +explain +select +* +from +t1 +join t10 on t1.a=t10.a +join t11 on t1.b=t11.b +order by +t1.col1 +limit 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t10 ALL a NULL NULL NULL 100 Using where; Using temporary; Using filesort +1 SIMPLE t11 ALL b NULL NULL NULL 100 Using join buffer (flat, BNL join) +1 SIMPLE t1 ref a,b a 5 test.t10.a 100 Using where +set optimizer_join_limit_pref_ratio=10; +# t1 is first, key=col1 produces ordering, no filesort or temporary: +explain +select +* +from +t1 +join t10 on t1.a=t10.a +join t11 on t1.b=t11.b +order by +t1.col1 +limit 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index a,b col1 5 NULL 10 Using where +1 SIMPLE t10 ref a a 5 test.t1.a 1 +1 SIMPLE t11 ref b b 5 test.t1.b 1 +set @trace=(select trace from information_schema.optimizer_trace); +select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS; +JS +[ + { + "limit_fraction": 0.001, + "test_if_skip_sort_order_early": + [ + { + "reconsidering_access_paths_for_index_ordering": + { + "clause": "ORDER BY", + "fanout": 1, + "read_time": 53.27053125, + "table": "t1", + "rows_estimation": 10000, + "possible_keys": + [ + { + "index": "a", + "can_resolve_order": false, + "cause": "not usable index for the query" + }, + { + "index": "b", + "can_resolve_order": false, + "cause": "not usable index for the query" + }, + { + "index": "col1", + "can_resolve_order": true, + "direction": 1, + "updated_limit": 10, + "index_scan_time": 10, + "records": 10000, + "chosen": true + } + ] + } + } + ], + "can_skip_filesort": true, + "full_join_cost": 46064.98442, + "risk_ratio": 10, + "shortcut_join_cost": 97.28224614, + "shortcut_cost_with_risk": 972.8224614, + "use_shortcut_cost": true + } +] +set optimizer_search_depth=@tmp_osd; +set optimizer_trace=@tmp_os; +set optimizer_join_limit_pref_ratio=default; +drop table t1, t10, t11; diff --git a/mysql-test/main/order_by_limit_join.test b/mysql-test/main/order_by_limit_join.test new file mode 100644 index 00000000000..da05cdc30c3 --- /dev/null +++ b/mysql-test/main/order_by_limit_join.test @@ -0,0 +1,207 @@ +--echo # +--echo # MDEV-34720: Poor plan choice for large JOIN with ORDER BY and small LIMIT +--echo # + +--source include/have_sequence.inc + +# We need optimizer trace +--source include/not_embedded.inc + +create table t1 ( + a int, + b int, + c int, + col1 int, + col2 int, + index(a), + index(b), + index(col1) +); + +insert into t1 select + mod(seq, 100), + mod(seq, 95), + seq, + seq, + seq +from + seq_1_to_10000; + + +create table t10 ( + a int, + a_value char(10), + key(a) +); +insert into t10 select seq, seq from seq_1_to_100; + +create table t11 ( + b int, + b_value char(10), + key(b) +); +insert into t11 select seq, seq from seq_1_to_100; + +set @tmp_os=@@optimizer_trace; +set optimizer_trace=1; + +--echo # +--echo # Query 1 - basic example. +--echo # +let $query= explain +select + * +from + t1 + join t10 on t1.a=t10.a + join t11 on t1.b=t11.b +order by + t1.col1 +limit 10; + +--echo # Table t1 is not the first, have to use temporary+filesort: +eval $query; + +set optimizer_join_limit_pref_ratio=10; + +--echo # t1 is first, key=col1 produces ordering, no filesort or temporary: +eval $query; + +set @trace=(select trace from information_schema.optimizer_trace); +select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS; + +--echo # +--echo # Query 2 - same as above but without a suitable index. +--echo # +let $query= +explain +select + * +from + t1 + join t10 on t1.a=t10.a + join t11 on t1.b=t11.b +order by + t1.col2 +limit 10; + +--echo # Table t1 is not the first, have to use temporary+filesort: +set optimizer_join_limit_pref_ratio=0; +eval $query; + +--echo # t1 is first but there's no suitable index, +--echo # so we use filesort but using temporary: +set optimizer_join_limit_pref_ratio=10; +eval $query; + +set @trace=(select trace from information_schema.optimizer_trace); +select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS; + +--echo # +--echo # Query 3: Counter example with large limit +--echo # +let $query= explain +select + * +from + t1 + join t10 on t1.a=t10.a + join t11 on t1.b=t11.b +order by + t1.col1 +limit 5000; + +--echo # Table t1 is not the first, have to use temporary+filesort: +set optimizer_join_limit_pref_ratio=0; +eval $query; + +--echo # Same plan as above: +--echo # Table t1 is not the first, have to use temporary+filesort: +set optimizer_join_limit_pref_ratio=10; +eval $query; + +set @trace=(select trace from information_schema.optimizer_trace); +select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS; + +--echo # +--echo # Query 4: LEFT JOIN makes it impossible to put ORDER-BY-table first, +--echo # however the optimizer still puts it as sort_by_table. +--echo # +set optimizer_join_limit_pref_ratio=10; +explain +select + * +from + t10 left join (t1 join t11 on t1.b=t11.b ) on t1.a=t10.a +order by + t1.col2 +limit 10; + +set @trace=(select trace from information_schema.optimizer_trace); +--echo # This will show nothing as limit shortcut code figures that +--echo # it's not possible to use t1 to construct shortcuts: +select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS; + +--echo # +--echo # Query 5: Same as Q1 but also with a semi-join +--echo # +set optimizer_join_limit_pref_ratio=default; +let $query= explain +select + * +from + t1 + join t10 on t1.a=t10.a + join t11 on t1.b=t11.b +where + t1.a in (select a from t10) and + t1.b in (select b from t11) +order by + t1.col1 +limit 10; + +--echo # Table t1 is not the first, have to use temporary+filesort: +eval $query; + +set optimizer_join_limit_pref_ratio=10; + +--echo # t1 is first, key=col1 produces ordering, no filesort or temporary: +eval $query; + +set @trace=(select trace from information_schema.optimizer_trace); +select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS; + +--echo # +--echo # Query 6: same as Query 1 but let's limit the search depth +--echo # +set @tmp_osd=@@optimizer_search_depth; +set optimizer_search_depth=1; +let $query= explain +select + * +from + t1 + join t10 on t1.a=t10.a + join t11 on t1.b=t11.b +order by + t1.col1 +limit 10; + +set optimizer_join_limit_pref_ratio=default; +--echo # Table t1 is not the first, have to use temporary+filesort: +eval $query; + +set optimizer_join_limit_pref_ratio=10; + +--echo # t1 is first, key=col1 produces ordering, no filesort or temporary: +eval $query; + +set @trace=(select trace from information_schema.optimizer_trace); +select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS; + + +set optimizer_search_depth=@tmp_osd; +set optimizer_trace=@tmp_os; +set optimizer_join_limit_pref_ratio=default; +drop table t1, t10, t11; + diff --git a/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result b/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result index 34a258fe2d5..7a0963af685 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result +++ b/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result @@ -2342,6 +2342,16 @@ NUMERIC_BLOCK_SIZE 1 ENUM_VALUE_LIST NULL READ_ONLY NO COMMAND_LINE_ARGUMENT REQUIRED +VARIABLE_NAME OPTIMIZER_JOIN_LIMIT_PREF_RATIO +VARIABLE_SCOPE SESSION +VARIABLE_TYPE BIGINT UNSIGNED +VARIABLE_COMMENT For queries with JOIN and ORDER BY LIMIT : make the optimizer consider a join order that allows to short-cut execution after producing #LIMIT matches if that promises N times speedup. (A conservative setting here would be is a high value, like 100 so the short-cutting plan is used if it promises a speedup of 100x or more). Short-cutting plans are inherently risky so the default is 0 which means do not consider this optimization +NUMERIC_MIN_VALUE 0 +NUMERIC_MAX_VALUE 4294967295 +NUMERIC_BLOCK_SIZE 1 +ENUM_VALUE_LIST NULL +READ_ONLY NO +COMMAND_LINE_ARGUMENT REQUIRED VARIABLE_NAME OPTIMIZER_MAX_SEL_ARGS VARIABLE_SCOPE SESSION VARIABLE_TYPE BIGINT UNSIGNED diff --git a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result index 0d643c03384..d25980e9902 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result +++ b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result @@ -2512,6 +2512,16 @@ NUMERIC_BLOCK_SIZE 1 ENUM_VALUE_LIST NULL READ_ONLY NO COMMAND_LINE_ARGUMENT REQUIRED +VARIABLE_NAME OPTIMIZER_JOIN_LIMIT_PREF_RATIO +VARIABLE_SCOPE SESSION +VARIABLE_TYPE BIGINT UNSIGNED +VARIABLE_COMMENT For queries with JOIN and ORDER BY LIMIT : make the optimizer consider a join order that allows to short-cut execution after producing #LIMIT matches if that promises N times speedup. (A conservative setting here would be is a high value, like 100 so the short-cutting plan is used if it promises a speedup of 100x or more). Short-cutting plans are inherently risky so the default is 0 which means do not consider this optimization +NUMERIC_MIN_VALUE 0 +NUMERIC_MAX_VALUE 4294967295 +NUMERIC_BLOCK_SIZE 1 +ENUM_VALUE_LIST NULL +READ_ONLY NO +COMMAND_LINE_ARGUMENT REQUIRED VARIABLE_NAME OPTIMIZER_MAX_SEL_ARGS VARIABLE_SCOPE SESSION VARIABLE_TYPE BIGINT UNSIGNED diff --git a/sql/sql_class.h b/sql/sql_class.h index 0b8cabd3dbd..906f897415a 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -766,6 +766,7 @@ typedef struct system_variables ulong net_wait_timeout; ulong net_write_timeout; ulong optimizer_extra_pruning_depth; + ulonglong optimizer_join_limit_pref_ratio; ulong optimizer_prune_level; ulong optimizer_search_depth; ulong optimizer_selectivity_sampling_limit; diff --git a/sql/sql_select.cc b/sql/sql_select.cc index b78f296be1c..b6d1c6a1482 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -234,12 +234,14 @@ static COND *make_cond_for_table_from_pred(THD *thd, Item *root_cond, static Item* part_of_refkey(TABLE *form,Field *field); uint find_shortest_key(TABLE *table, const key_map *usable_keys); -static bool test_if_cheaper_ordering(const JOIN_TAB *tab, +static bool test_if_cheaper_ordering(bool in_join_optimizer, + const JOIN_TAB *tab, ORDER *order, TABLE *table, key_map usable_keys, int key, ha_rows select_limit, int *new_key, int *new_key_direction, ha_rows *new_select_limit, + double *new_read_time, uint *new_used_key_parts= NULL, uint *saved_best_key_parts= NULL); static int test_if_order_by_key(JOIN *, ORDER *, TABLE *, uint, uint *); @@ -329,6 +331,18 @@ static void optimize_rownum(THD *thd, SELECT_LEX_UNIT *unit, Item *cond); static bool process_direct_rownum_comparison(THD *thd, SELECT_LEX_UNIT *unit, Item *cond); +static +bool join_limit_shortcut_is_applicable(const JOIN *join); +POSITION *join_limit_shortcut_finalize_plan(JOIN *join, double *cost); + +static +bool find_indexes_matching_order(JOIN *join, TABLE *table, ORDER *order, + key_map *usable_keys); +static +void compute_part_of_sort_key_for_equals(JOIN *join, TABLE *table, + Item_field *item_field, + key_map *col_keys); + #ifndef DBUG_OFF /* @@ -5860,6 +5874,7 @@ make_join_statistics(JOIN *join, List &tables_list, join->sort_by_table= get_sort_by_table(join->order, join->group_list, join->select_lex->leaf_tables, join->const_table_map); + join->limit_shortcut_applicable= join_limit_shortcut_is_applicable(join); /* Update info on indexes that can be used for search lookups as reading const tables may has added new sargable predicates. @@ -9282,6 +9297,7 @@ choose_plan(JOIN *join, table_map join_tables) THD *thd= join->thd; DBUG_ENTER("choose_plan"); + join->limit_optimization_mode= false; join->cur_embedding_map= 0; join->extra_heuristic_pruning= false; join->prune_level= join->thd->variables.optimizer_prune_level; @@ -9349,8 +9365,43 @@ choose_plan(JOIN *join, table_map join_tables) join->extra_heuristic_pruning= true; } + double limit_cost= DBL_MAX; + POSITION *limit_plan= NULL; + + /* First, build a join plan that can short-cut ORDER BY...LIMIT */ + if (join->limit_shortcut_applicable && !join->emb_sjm_nest) + { + bool res; + Json_writer_object wrapper(join->thd); + Json_writer_array trace(join->thd, "join_limit_shortcut_plan_search"); + join->limit_optimization_mode= true; + res= greedy_search(join, join_tables, search_depth, + use_cond_selectivity); + join->limit_optimization_mode= false; + + if (res) + DBUG_RETURN(TRUE); + DBUG_ASSERT(join->best_read != DBL_MAX); + + /* + We've built a join order. Adjust its cost based on ORDER BY...LIMIT + short-cutting. + */ + limit_plan= join_limit_shortcut_finalize_plan(join, &limit_cost); + } + + /* The main call to search for the query plan: */ if (greedy_search(join, join_tables, search_depth, use_cond_selectivity)) DBUG_RETURN(TRUE); + + DBUG_ASSERT(join->best_read != DBL_MAX); + if (limit_plan && limit_cost < join->best_read) + { + /* Plan that uses ORDER BY ... LIMIT shortcutting is better. */ + memcpy((uchar*)join->best_positions, (uchar*)limit_plan, + sizeof(POSITION)*join->table_count); + join->best_read= limit_cost; + } } /* @@ -10578,6 +10629,315 @@ get_costs_for_tables(JOIN *join, table_map remaining_tables, uint idx, DBUG_RETURN(found_eq_ref); } + +/* + @brief + Check if it is potentally possible to short-cut the JOIN execution due to + ORDER BY ... LIMIT clause + + @detail + It is possible when the join has "ORDER BY ... LIMIT n" clause, and the + sort+limit operation is done right after the join operation (there's no + grouping or DISTINCT in between). + Then we can potentially build a join plan that enumerates rows in the + ORDER BY order and so will be able to terminate as soon as it has produced + #limit rows. + + Note that it is not a requirement that sort_by_table has an index that + matches ORDER BY. If it doesn't have one, the optimizer will pass + sort_by_table to filesort. Reading from sort_by_table won't use + short-cutting but the rest of the join will. +*/ + +static +bool join_limit_shortcut_is_applicable(const JOIN *join) +{ + /* + Any post-join operation like GROUP BY or DISTINCT or window functions + means we cannot short-cut join execution + */ + if (!join->thd->variables.optimizer_join_limit_pref_ratio || + !join->order || + join->select_limit == HA_POS_ERROR || + join->group_list || + join->select_distinct || + join->select_options & SELECT_BIG_RESULT || + join->rollup.state != ROLLUP::STATE_NONE || + join->select_lex->have_window_funcs() || + join->select_lex->with_sum_func) + { + return false; + } + + /* + Cannot do short-cutting if + (1) ORDER BY refers to more than one table or + (2) the table it refers to cannot be first table in the join order + */ + if (!join->sort_by_table || // (1) + join->sort_by_table->reginfo.join_tab->dependent) // (2) + return false; + + Json_writer_object wrapper(join->thd); + Json_writer_object trace(join->thd, "join_limit_shortcut_is_applicable"); + trace.add("applicable", 1); + /* It looks like we can short-cut limit due to join */ + return true; +} + + +/* + @brief + Check if we could use an index-based access method to produce rows + in the order for ORDER BY ... LIMIT. + + @detail + This should do what test_if_skip_sort_order() does. We can't use that + function directly, because: + + 1. We're at the join optimization stage and have not done query plan + fix-ups done in get_best_combination() and co. + + 2. The code in test_if_skip_sort_order() does modify query plan structures, + for example it may change the table's quick select. This is done even if + it's called with no_changes=true parameter. + + @param access_method_changed OUT Whether the function changed the access + method to get rows in desired order. + @param new_access_cost OUT if access method changed: its cost. + + @return + true - Can skip sorting + false - Cannot skip sorting +*/ + +bool test_if_skip_sort_order_early(JOIN *join, + bool *access_method_changed, + double *new_access_cost) +{ + const POSITION *pos= &join->best_positions[join->const_tables]; + TABLE *table= pos->table->table; + key_map usable_keys= table->keys_in_use_for_order_by; + + *access_method_changed= false; + + // Step #1: Find indexes that produce the required ordering. + if (find_indexes_matching_order(join, table, join->order, &usable_keys)) + { + return false; // Cannot skip sorting + } + + // Step #2: Check if the index we're using produces the needed ordering + uint ref_key; + if (pos->key) + { + // Mirror the (wrong) logic in test_if_skip_sort_order: + if (pos->spl_plan || pos->type == JT_REF_OR_NULL) + return false; // Use filesort + + ref_key= pos->key->key; + } + else + { + if (pos->table->quick) + { + if (pos->table->quick->get_type() == QUICK_SELECT_I::QS_TYPE_RANGE) + ref_key= pos->table->quick->index; + else + ref_key= MAX_KEY; + } + else + ref_key= MAX_KEY; + } + + if (ref_key != MAX_KEY && usable_keys.is_set(ref_key)) + { + return true; // we're using an index that produces the reqired ordering. + } + + /* + Step #3: check if we can switch to using an index that would produce the + ordering. + (But don't actually switch, this will be done by test_if_skip_sort_order) + */ + int best_key= -1; + uint UNINIT_VAR(best_key_parts); + uint saved_best_key_parts= 0; + int best_key_direction= 0; + JOIN_TAB *tab= pos->table; + ha_rows new_limit; + double new_read_time; + if (test_if_cheaper_ordering(/*in_join_optimizer */TRUE, + tab, join->order, table, usable_keys, + ref_key, join->select_limit, + &best_key, &best_key_direction, + &new_limit, &new_read_time, + &best_key_parts, + &saved_best_key_parts)) + { + // Ok found a way to skip sorting + *access_method_changed= true; + *new_access_cost= new_read_time; + return true; + } + + return false; +} + + +/* + Compute the cost of join assuming we only need fraction of the output. +*/ + +double recompute_join_cost_with_limit(const JOIN *join, bool skip_sorting, + double *first_table_cost, + double fraction) +{ + POSITION *pos= join->best_positions + join->const_tables; + /* + Generally, we assume that producing X% of output takes X% of the cost. + */ + double partial_join_cost= join->best_read * fraction; + + if (skip_sorting) + { + /* + First table produces rows in required order. Two options: + + A. first_table_cost=NULL means we use whatever access method the join + optimizer has picked. Its cost was included in join->best_read and + we've already took a fraction of it. + + B. first_table_cost!=NULL means we will need to switch to another access + method, we have the cost to read rows to produce #LIMIT rows in join + output. + */ + if (first_table_cost) + { + /* + Subtract the remainder of the first table's cost we had in + join->best_read: + */ + partial_join_cost -= pos->read_time*fraction; + partial_join_cost -= pos->records_read*fraction / TIME_FOR_COMPARE; + + /* Add the cost of the new access method we've got: */ + partial_join_cost= COST_ADD(partial_join_cost, *first_table_cost); + } + } + else + { + DBUG_ASSERT(!first_table_cost); + /* + Cannot skip sorting. We read the first table entirely, then sort it. + + partial_join_cost includes pos->read_time*fraction. Add to it + pos->read_time*(1-fraction) so we have the cost to read the entire first + table. Do the same for costs of checking the WHERE. + */ + double extra_first_table_cost= pos->read_time * (1.0 - fraction); + double extra_first_table_where= pos->records_read * (1.0 - fraction) / + TIME_FOR_COMPARE; + + partial_join_cost= COST_ADD(partial_join_cost, + COST_ADD(extra_first_table_cost, + extra_first_table_where)); + } + return partial_join_cost; +} + + +/* + @brief + Finalize building the join order which allows to short-cut the join + execution. + + @detail + This is called after we have produced a join order that allows short- + cutting. + Here, we decide if it is cheaper to use this one or the original join + order. +*/ + +POSITION *join_limit_shortcut_finalize_plan(JOIN *join, double *cost) +{ + Json_writer_object wrapper(join->thd); + Json_writer_object trace(join->thd, "join_limit_shortcut_choice"); + + double fraction= join->select_limit / join->join_record_count; + trace.add("limit_fraction", fraction); + + /* Check which fraction of join output we need */ + if (fraction >= 1.0) + { + trace.add("skip_adjustment", "no short-cutting"); + return NULL; + } + + /* + Check if the first table's access method produces the required ordering. + Possible options: + 1. Yes: we can just take a fraction of the execution cost. + 2A No: change the access method to one that does produce the required + ordering, update the costs. + 2B No: Need to pass the first table to filesort(). + */ + bool skip_sorting; + bool access_method_changed; + double new_access_cost; + { + Json_writer_array tmp(join->thd, "test_if_skip_sort_order_early"); + skip_sorting= test_if_skip_sort_order_early(join, + &access_method_changed, + &new_access_cost); + } + trace.add("can_skip_filesort", skip_sorting); + + double cost_with_shortcut= + recompute_join_cost_with_limit(join, skip_sorting, + access_method_changed ? + &new_access_cost : (double*)0, + fraction); + double risk_ratio= + (double)join->thd->variables.optimizer_join_limit_pref_ratio; + trace.add("full_join_cost", join->best_read); + trace.add("risk_ratio", risk_ratio); + trace.add("shortcut_join_cost", cost_with_shortcut); + cost_with_shortcut *= risk_ratio; + trace.add("shortcut_cost_with_risk", cost_with_shortcut); + if (cost_with_shortcut < join->best_read) + { + trace.add("use_shortcut_cost", true); + POSITION *pos= (POSITION*)memdup_root(join->thd->mem_root, + join->best_positions, + sizeof(POSITION)* + (join->table_count + 1)); + *cost= cost_with_shortcut; + return pos; + } + trace.add("use_shortcut_cost", false); + return NULL; +} + + +/* + @brief + If we're in Limit Optimization Mode, allow only join->sort_by_table as + the first table in the join order +*/ + +static +bool join_limit_shortcut_limits_tables(const JOIN *join, uint idx, table_map *map) +{ + if (join->limit_optimization_mode && idx == join->const_tables) + { + *map= join->sort_by_table->map; + return true; + } + return false; +} + + /** Find a good, possibly optimal, query execution plan (QEP) by a possibly exhaustive search. @@ -10760,6 +11120,9 @@ best_extension_by_limited_search(JOIN *join, */ allowed_tables= remaining_tables; allowed_current_tables= join->get_allowed_nj_tables(idx) & remaining_tables; + table_map sort_table; + if (join_limit_shortcut_limits_tables(join, idx, &sort_table)) + allowed_current_tables= sort_table; } DBUG_ASSERT(allowed_tables & remaining_tables); @@ -25106,6 +25469,7 @@ find_field_in_item_list (Field *field, void *data) that belong to 'table' and are equal to 'item_field'. */ +static void compute_part_of_sort_key_for_equals(JOIN *join, TABLE *table, Item_field *item_field, key_map *col_keys) @@ -25250,6 +25614,59 @@ static void prepare_for_reverse_ordered_access(JOIN_TAB *tab) } +/* + @brief + Given a table and order, find indexes that produce rows in the order + + @param usable_keys IN Bitmap of keys we can use + OUT Bitmap of indexes that produce rows in order. + + @return + false Some indexes were found + true No indexes found +*/ + +static +bool find_indexes_matching_order(JOIN *join, TABLE *table, ORDER *order, + key_map *usable_keys) +{ + /* Find indexes that cover all ORDER/GROUP BY fields */ + for (ORDER *tmp_order=order; tmp_order ; tmp_order=tmp_order->next) + { + Item *item= (*tmp_order->item)->real_item(); + if (item->type() != Item::FIELD_ITEM) + { + usable_keys->clear_all(); + return true; /* No suitable keys */ + } + + /* + Take multiple-equalities into account. Suppose we have + ORDER BY col1, col10 + and there are + multiple-equal(col1, col2, col3), + multiple-equal(col10, col11). + + Then, + - when item=col1, we find the set of indexes that cover one of {col1, + col2, col3} + - when item=col10, we find the set of indexes that cover one of {col10, + col11} + + And we compute an intersection of these sets to find set of indexes that + cover all ORDER BY components. + */ + key_map col_keys; + compute_part_of_sort_key_for_equals(join, table, (Item_field*)item, + &col_keys); + usable_keys->intersect(col_keys); + if (usable_keys->is_clear_all()) + return true; // No usable keys + } + return false; + +} + /** Test if we can skip the ORDER BY by using an index. @@ -25311,41 +25728,17 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit, been taken into account. */ usable_keys= *map; - - /* Find indexes that cover all ORDER/GROUP BY fields */ - for (ORDER *tmp_order=order; tmp_order ; tmp_order=tmp_order->next) + + // Step #1: Find indexes that produce the required ordering. + if (find_indexes_matching_order(tab->join, table, order, &usable_keys)) { - Item *item= (*tmp_order->item)->real_item(); - if (item->type() != Item::FIELD_ITEM) - { - usable_keys.clear_all(); - DBUG_RETURN(0); - } - - /* - Take multiple-equalities into account. Suppose we have - ORDER BY col1, col10 - and there are - multiple-equal(col1, col2, col3), - multiple-equal(col10, col11). - - Then, - - when item=col1, we find the set of indexes that cover one of {col1, - col2, col3} - - when item=col10, we find the set of indexes that cover one of {col10, - col11} - - And we compute an intersection of these sets to find set of indexes that - cover all ORDER BY components. - */ - key_map col_keys; - compute_part_of_sort_key_for_equals(tab->join, table, (Item_field*)item, - &col_keys); - usable_keys.intersect(col_keys); - if (usable_keys.is_clear_all()) - goto use_filesort; // No usable keys + DBUG_RETURN(false); // Cannot skip sorting } + /* + Step #2: Analyze the current access method. Note the used index as ref_key + and #used keyparts in ref_key_parts. + */ ref_key= -1; /* Test if constant range in WHERE */ if (tab->ref.key >= 0 && tab->ref.key_parts) @@ -25389,6 +25782,12 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit, } } + /* + Step #3: Check if index ref_key that we're using produces the required + ordering or if there is another index new_ref_key such that + - ref_key is a prefix of new_ref_key (so, access method can be reused) + - new_ref_key produces the required ordering + */ if (ref_key >= 0 && ref_key != MAX_KEY) { /* Current access method uses index ref_key with ref_key_parts parts */ @@ -25508,17 +25907,24 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit, &used_key_parts))) goto check_reverse_order; } + + /* + Step #4: Go through all indexes that produce required ordering (in + usable_keys) and check if any of them is cheaper than ref_key + */ { uint UNINIT_VAR(best_key_parts); uint saved_best_key_parts= 0; int best_key_direction= 0; JOIN *join= tab->join; ha_rows table_records= table->stat_records(); + double new_read_time_dummy; - test_if_cheaper_ordering(tab, order, table, usable_keys, + test_if_cheaper_ordering(FALSE, tab, order, table, usable_keys, ref_key, select_limit, &best_key, &best_key_direction, - &select_limit, &best_key_parts, + &select_limit, &new_read_time_dummy, + &best_key_parts, &saved_best_key_parts); /* @@ -30761,11 +31167,13 @@ static bool get_range_limit_read_cost(const JOIN_TAB *tab, */ static bool -test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table, +test_if_cheaper_ordering(bool in_join_optimizer, + const JOIN_TAB *tab, ORDER *order, TABLE *table, key_map usable_keys, int ref_key, ha_rows select_limit_arg, int *new_key, int *new_key_direction, - ha_rows *new_select_limit, uint *new_used_key_parts, + ha_rows *new_select_limit, double *new_read_time, + uint *new_used_key_parts, uint *saved_best_key_parts) { DBUG_ENTER("test_if_cheaper_ordering"); @@ -30823,7 +31231,7 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table, if (join) { - uint tablenr= (uint)(tab - join->join_tab); + uint tablenr= join->const_tables; read_time= join->best_positions[tablenr].read_time; records= join->best_positions[tablenr].records_read; for (uint i= tablenr+1; i < join->table_count; i++) @@ -30861,12 +31269,27 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table, Calculate the selectivity of the ref_key for REF_ACCESS. For RANGE_ACCESS we use table->opt_range_condition_rows. */ - if (ref_key >= 0 && ref_key != MAX_KEY && tab->type == JT_REF) + if (in_join_optimizer) + { + if (ref_key >= 0 && ref_key != MAX_KEY && + join->best_positions[join->const_tables].type == JT_REF) + { + refkey_rows_estimate= + (ha_rows)join->best_positions[join->const_tables].records_read; + set_if_bigger(refkey_rows_estimate, 1); + } + } + else if (ref_key >= 0 && ref_key != MAX_KEY && tab->type == JT_REF) { /* If ref access uses keypart=const for all its key parts, and quick select uses the same # of key parts, then they are equivalent. Reuse #rows estimate from quick select as it is more precise. + + Note: we could just have used + join->best_positions[join->const_tables].records_read + here. That number was computed in best_access_path() and it already + includes adjustments based on table->opt_range[ref_key].rows. */ if (tab->ref.const_ref_part_map == make_prev_keypart_map(tab->ref.key_parts) && @@ -31193,6 +31616,7 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table, *new_key= best_key; *new_key_direction= best_key_direction; *new_select_limit= has_limit ? best_select_limit : table_records; + *new_read_time= read_time; if (new_used_key_parts != NULL) *new_used_key_parts= best_key_parts; DBUG_RETURN(TRUE); @@ -31292,10 +31716,11 @@ uint get_index_for_order(ORDER *order, TABLE *table, SQL_SELECT *select, table->opt_range_condition_rows= table->stat_records(); int key, direction; - if (test_if_cheaper_ordering(NULL, order, table, + double new_cost; + if (test_if_cheaper_ordering(FALSE, NULL, order, table, table->keys_in_use_for_order_by, -1, limit, - &key, &direction, &limit) && + &key, &direction, &limit, &new_cost) && !is_key_used(table, key, table->write_set)) { *need_sort= FALSE; diff --git a/sql/sql_select.h b/sql/sql_select.h index df60f1632b1..05201000803 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -1212,6 +1212,20 @@ public: passing 1st non-const table to filesort(). NULL means no such table exists. */ TABLE *sort_by_table; + + /* + If true, there is ORDER BY x LIMIT n clause and for certain join orders, it + is possible to short-cut the join execution, i.e. stop it as soon as n + output rows were produced. See join_limit_shortcut_is_applicable(). + */ + bool limit_shortcut_applicable; + + /* + Used during join optimization: if true, we're building a join order that + will short-cut join execution as soon as #LIMIT rows are produced. + */ + bool limit_optimization_mode; + /* Number of tables in the join. (In MySQL, it is named 'tables' and is also the number of elements in diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index 78017603b51..6d52dfd516a 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -2816,6 +2816,20 @@ static Sys_var_ulong Sys_optimizer_selectivity_sampling_limit( VALID_RANGE(SELECTIVITY_SAMPLING_THRESHOLD, UINT_MAX), DEFAULT(SELECTIVITY_SAMPLING_LIMIT), BLOCK_SIZE(1)); +static Sys_var_ulonglong Sys_optimizer_join_limit_pref_ratio( + "optimizer_join_limit_pref_ratio", + "For queries with JOIN and ORDER BY LIMIT : make the optimizer " + "consider a join order that allows to short-cut execution after " + "producing #LIMIT matches if that promises N times speedup. " + "(A conservative setting here would be is a high value, like 100 so " + "the short-cutting plan is used if it promises a speedup of 100x or " + "more). Short-cutting plans are inherently risky so the default is 0 " + "which means do not consider this optimization", + SESSION_VAR(optimizer_join_limit_pref_ratio), + CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, UINT_MAX), + DEFAULT(0), BLOCK_SIZE(1)); + static Sys_var_ulong Sys_optimizer_use_condition_selectivity( "optimizer_use_condition_selectivity", "Controls selectivity of which conditions the optimizer takes into "