From c8d040938a7ebe10e62506a726702c5990ef4dda Mon Sep 17 00:00:00 2001 From: Sergei Petrunia Date: Sun, 18 Aug 2024 20:10:19 +0300 Subject: [PATCH] MDEV-34720: Poor plan choice for large JOIN with ORDER BY and small LIMIT (Variant 2b: call greedy_search() twice, correct handling for limited search_depth) Modify the join optimizer to specifically try to produce join orders that can short-cut their execution for ORDER BY..LIMIT clause. The optimization is controlled by @@optimizer_join_limit_pref_ratio. Default value 0 means don't construct short-cutting join orders. Other value means construct short-cutting join order, and prefer it only if it promises speedup of more than #value times. In Optimizer Trace, look for these names: * join_limit_shortcut_is_applicable * join_limit_shortcut_plan_search * join_limit_shortcut_choice --- mysql-test/main/mysqld--help.result | 10 + mysql-test/main/order_by_limit_join.result | 461 ++++++++++++++++++ mysql-test/main/order_by_limit_join.test | 207 ++++++++ .../sys_vars/r/sysvars_server_embedded.result | 10 + .../r/sysvars_server_notembedded.result | 10 + sql/sql_class.h | 1 + sql/sql_select.cc | 394 ++++++++++++++- sql/sql_select.h | 14 + sql/sys_vars.cc | 14 + 9 files changed, 1110 insertions(+), 11 deletions(-) create mode 100644 mysql-test/main/order_by_limit_join.result create mode 100644 mysql-test/main/order_by_limit_join.test diff --git a/mysql-test/main/mysqld--help.result b/mysql-test/main/mysqld--help.result index cabf8037508..08cd1202655 100644 --- a/mysql-test/main/mysqld--help.result +++ b/mysql-test/main/mysqld--help.result @@ -724,6 +724,15 @@ The following specify which files/extra groups are read (specified before remain keys. This variable will be deleted in MariaDB 11.0 as it is not needed with the new 11.0 optimizer. Use 'ALL' to set all combinations. + --optimizer-join-limit-pref-ratio=# + For queries with JOIN and ORDER BY LIMIT : make the + optimizer consider a join order that allows to short-cut + execution after producing #LIMIT matches if that promises + N times speedup. (A conservative setting here would be is + a high value, like 100 so the short-cutting plan is used + if it promises a speedup of 100x or more). Short-cutting + plans are inherently risky so the default is 0 which + means do not consider this optimization --optimizer-max-sel-arg-weight=# The maximum weight of the SEL_ARG graph. Set to 0 for no limit @@ -1697,6 +1706,7 @@ old-mode UTF8_IS_UTF8MB3 old-passwords FALSE old-style-user-limits FALSE optimizer-adjust-secondary-key-costs +optimizer-join-limit-pref-ratio 0 optimizer-max-sel-arg-weight 32000 optimizer-max-sel-args 16000 optimizer-prune-level 1 diff --git a/mysql-test/main/order_by_limit_join.result b/mysql-test/main/order_by_limit_join.result new file mode 100644 index 00000000000..3f48aa3f65f --- /dev/null +++ b/mysql-test/main/order_by_limit_join.result @@ -0,0 +1,461 @@ +# +# MDEV-34720: Poor plan choice for large JOIN with ORDER BY and small LIMIT +# +create table t1 ( +a int, +b int, +c int, +col1 int, +col2 int, +index(a), +index(b), +index(col1) +); +insert into t1 select +mod(seq, 100), +mod(seq, 95), +seq, +seq, +seq +from +seq_1_to_10000; +create table t10 ( +a int, +a_value char(10), +key(a) +); +insert into t10 select seq, seq from seq_1_to_100; +create table t11 ( +b int, +b_value char(10), +key(b) +); +insert into t11 select seq, seq from seq_1_to_100; +set @tmp_os=@@optimizer_trace; +set optimizer_trace=1; +# +# Query 1 - basic example. +# +# Table t1 is not the first, have to use temporary+filesort: +explain +select +* +from +t1 +join t10 on t1.a=t10.a +join t11 on t1.b=t11.b +order by +t1.col1 +limit 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t10 ALL a NULL NULL NULL 100 Using where; Using temporary; Using filesort +1 SIMPLE t1 ref a,b a 5 test.t10.a 100 Using where +1 SIMPLE t11 ref b b 5 test.t1.b 1 +set optimizer_join_limit_pref_ratio=10; +# t1 is first, key=col1 produces ordering, no filesort or temporary: +explain +select +* +from +t1 +join t10 on t1.a=t10.a +join t11 on t1.b=t11.b +order by +t1.col1 +limit 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index a,b col1 5 NULL 10 Using where +1 SIMPLE t10 ref a a 5 test.t1.a 1 +1 SIMPLE t11 ref b b 5 test.t1.b 1 +set @trace=(select trace from information_schema.optimizer_trace); +select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS; +JS +[ + { + "limit_fraction": 0.001, + "test_if_skip_sort_order_early": + [ + { + "reconsidering_access_paths_for_index_ordering": + { + "clause": "ORDER BY", + "fanout": 1, + "read_time": 53.27053125, + "table": "t1", + "rows_estimation": 10000, + "possible_keys": + [ + { + "index": "a", + "can_resolve_order": false, + "cause": "not usable index for the query" + }, + { + "index": "b", + "can_resolve_order": false, + "cause": "not usable index for the query" + }, + { + "index": "col1", + "can_resolve_order": true, + "updated_limit": 10, + "index_scan_time": 10, + "records": 10000, + "chosen": true + } + ] + } + } + ], + "can_skip_filesort": true, + "full_join_cost": 46064.98442, + "risk_ratio": 10, + "shortcut_join_cost": 97.28224614, + "shortcut_cost_with_risk": 972.8224614, + "use_shortcut_cost": true + } +] +# +# Query 2 - same as above but without a suitable index. +# +# Table t1 is not the first, have to use temporary+filesort: +set optimizer_join_limit_pref_ratio=0; +explain +select +* +from +t1 +join t10 on t1.a=t10.a +join t11 on t1.b=t11.b +order by +t1.col2 +limit 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t10 ALL a NULL NULL NULL 100 Using where; Using temporary; Using filesort +1 SIMPLE t1 ref a,b a 5 test.t10.a 100 Using where +1 SIMPLE t11 ref b b 5 test.t1.b 1 +# t1 is first but there's no suitable index, +# so we use filesort but using temporary: +set optimizer_join_limit_pref_ratio=10; +explain +select +* +from +t1 +join t10 on t1.a=t10.a +join t11 on t1.b=t11.b +order by +t1.col2 +limit 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL a,b NULL NULL NULL 10000 Using where; Using filesort +1 SIMPLE t10 ref a a 5 test.t1.a 1 +1 SIMPLE t11 ref b b 5 test.t1.b 1 +set @trace=(select trace from information_schema.optimizer_trace); +select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS; +JS +[ + { + "limit_fraction": 0.001, + "test_if_skip_sort_order_early": + [], + "can_skip_filesort": false, + "full_join_cost": 46064.98442, + "risk_ratio": 10, + "shortcut_join_cost": 2097.281246, + "shortcut_cost_with_risk": 20972.81246, + "use_shortcut_cost": true + } +] +# +# Query 3: Counter example with large limit +# +# Table t1 is not the first, have to use temporary+filesort: +set optimizer_join_limit_pref_ratio=0; +explain +select +* +from +t1 +join t10 on t1.a=t10.a +join t11 on t1.b=t11.b +order by +t1.col1 +limit 5000; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t10 ALL a NULL NULL NULL 100 Using where; Using temporary; Using filesort +1 SIMPLE t1 ref a,b a 5 test.t10.a 100 Using where +1 SIMPLE t11 ref b b 5 test.t1.b 1 +# Same plan as above: +# Table t1 is not the first, have to use temporary+filesort: +set optimizer_join_limit_pref_ratio=10; +explain +select +* +from +t1 +join t10 on t1.a=t10.a +join t11 on t1.b=t11.b +order by +t1.col1 +limit 5000; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t10 ALL a NULL NULL NULL 100 Using where; Using temporary; Using filesort +1 SIMPLE t1 ref a,b a 5 test.t10.a 100 Using where +1 SIMPLE t11 ref b b 5 test.t1.b 1 +set @trace=(select trace from information_schema.optimizer_trace); +select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS; +JS +[ + { + "limit_fraction": 0.5, + "test_if_skip_sort_order_early": + [ + { + "reconsidering_access_paths_for_index_ordering": + { + "clause": "ORDER BY", + "fanout": 1, + "read_time": 53.27053125, + "table": "t1", + "rows_estimation": 10000, + "possible_keys": + [ + { + "index": "a", + "can_resolve_order": false, + "cause": "not usable index for the query" + }, + { + "index": "b", + "can_resolve_order": false, + "cause": "not usable index for the query" + }, + { + "index": "col1", + "can_resolve_order": true, + "updated_limit": 5000, + "index_scan_time": 5000, + "usable": false, + "cause": "cost" + } + ] + } + } + ], + "can_skip_filesort": false, + "full_join_cost": 46064.98442, + "risk_ratio": 10, + "shortcut_join_cost": 24059.12698, + "shortcut_cost_with_risk": 240591.2698, + "use_shortcut_cost": false + } +] +# +# Query 4: LEFT JOIN makes it impossible to put ORDER-BY-table first, +# however the optimizer still puts it as sort_by_table. +# +set optimizer_join_limit_pref_ratio=10; +explain +select +* +from +t10 left join (t1 join t11 on t1.b=t11.b ) on t1.a=t10.a +order by +t1.col2 +limit 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t10 ALL NULL NULL NULL NULL 100 Using temporary; Using filesort +1 SIMPLE t1 ref a,b a 5 test.t10.a 100 Using where +1 SIMPLE t11 ref b b 5 test.t1.b 1 +set @trace=(select trace from information_schema.optimizer_trace); +# This will show nothing as limit shortcut code figures that +# it's not possible to use t1 to construct shortcuts: +select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS; +JS +NULL +# +# Query 5: Same as Q1 but also with a semi-join +# +set optimizer_join_limit_pref_ratio=default; +# Table t1 is not the first, have to use temporary+filesort: +explain +select +* +from +t1 +join t10 on t1.a=t10.a +join t11 on t1.b=t11.b +where +t1.a in (select a from t10) and +t1.b in (select b from t11) +order by +t1.col1 +limit 10; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t10 ALL a NULL NULL NULL 100 Using where; Using temporary; Using filesort +1 PRIMARY t10 ref a a 5 test.t10.a 1 Using index; LooseScan +1 PRIMARY t1 ref a,b a 5 test.t10.a 100 Using where +1 PRIMARY t11 ref b b 5 test.t1.b 1 +1 PRIMARY eq_ref distinct_key distinct_key 4 func 1 +3 MATERIALIZED t11 index b b 5 NULL 100 Using index +set optimizer_join_limit_pref_ratio=10; +# t1 is first, key=col1 produces ordering, no filesort or temporary: +explain +select +* +from +t1 +join t10 on t1.a=t10.a +join t11 on t1.b=t11.b +where +t1.a in (select a from t10) and +t1.b in (select b from t11) +order by +t1.col1 +limit 10; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t1 index a,b col1 5 NULL 1 Using where +1 PRIMARY t10 ref a a 5 test.t1.a 1 +1 PRIMARY t11 ref b b 5 test.t1.b 1 +1 PRIMARY eq_ref distinct_key distinct_key 4 func 1 +1 PRIMARY eq_ref distinct_key distinct_key 4 func 1 +3 MATERIALIZED t11 index b b 5 NULL 100 Using index +2 MATERIALIZED t10 index a a 5 NULL 100 Using index +set @trace=(select trace from information_schema.optimizer_trace); +select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS; +JS +[ + { + "limit_fraction": 0.001, + "test_if_skip_sort_order_early": + [ + { + "reconsidering_access_paths_for_index_ordering": + { + "clause": "ORDER BY", + "fanout": 1, + "read_time": 53.27053125, + "table": "t1", + "rows_estimation": 10000, + "possible_keys": + [ + { + "index": "a", + "can_resolve_order": false, + "cause": "not usable index for the query" + }, + { + "index": "b", + "can_resolve_order": false, + "cause": "not usable index for the query" + }, + { + "index": "col1", + "can_resolve_order": true, + "updated_limit": 10, + "index_scan_time": 10, + "records": 10000, + "chosen": true + } + ] + } + } + ], + "can_skip_filesort": true, + "full_join_cost": 47079.71684, + "risk_ratio": 10, + "shortcut_join_cost": 98.29697856, + "shortcut_cost_with_risk": 982.9697856, + "use_shortcut_cost": true + } +] +# +# Query 6: same as Query 1 but let's limit the search depth +# +set @tmp_osd=@@optimizer_search_depth; +set optimizer_search_depth=1; +set optimizer_join_limit_pref_ratio=default; +# Table t1 is not the first, have to use temporary+filesort: +explain +select +* +from +t1 +join t10 on t1.a=t10.a +join t11 on t1.b=t11.b +order by +t1.col1 +limit 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t10 ALL a NULL NULL NULL 100 Using where; Using temporary; Using filesort +1 SIMPLE t11 ALL b NULL NULL NULL 100 Using join buffer (flat, BNL join) +1 SIMPLE t1 ref a,b a 5 test.t10.a 100 Using where +set optimizer_join_limit_pref_ratio=10; +# t1 is first, key=col1 produces ordering, no filesort or temporary: +explain +select +* +from +t1 +join t10 on t1.a=t10.a +join t11 on t1.b=t11.b +order by +t1.col1 +limit 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index a,b col1 5 NULL 10 Using where +1 SIMPLE t10 ref a a 5 test.t1.a 1 +1 SIMPLE t11 ref b b 5 test.t1.b 1 +set @trace=(select trace from information_schema.optimizer_trace); +select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS; +JS +[ + { + "limit_fraction": 0.001, + "test_if_skip_sort_order_early": + [ + { + "reconsidering_access_paths_for_index_ordering": + { + "clause": "ORDER BY", + "fanout": 1, + "read_time": 53.27053125, + "table": "t1", + "rows_estimation": 10000, + "possible_keys": + [ + { + "index": "a", + "can_resolve_order": false, + "cause": "not usable index for the query" + }, + { + "index": "b", + "can_resolve_order": false, + "cause": "not usable index for the query" + }, + { + "index": "col1", + "can_resolve_order": true, + "updated_limit": 10, + "index_scan_time": 10, + "records": 10000, + "chosen": true + } + ] + } + } + ], + "can_skip_filesort": true, + "full_join_cost": 46064.98442, + "risk_ratio": 10, + "shortcut_join_cost": 97.28224614, + "shortcut_cost_with_risk": 972.8224614, + "use_shortcut_cost": true + } +] +set optimizer_search_depth=@tmp_osd; +set optimizer_trace=@tmp_os; +set optimizer_join_limit_pref_ratio=default; +drop table t1, t10, t11; diff --git a/mysql-test/main/order_by_limit_join.test b/mysql-test/main/order_by_limit_join.test new file mode 100644 index 00000000000..da05cdc30c3 --- /dev/null +++ b/mysql-test/main/order_by_limit_join.test @@ -0,0 +1,207 @@ +--echo # +--echo # MDEV-34720: Poor plan choice for large JOIN with ORDER BY and small LIMIT +--echo # + +--source include/have_sequence.inc + +# We need optimizer trace +--source include/not_embedded.inc + +create table t1 ( + a int, + b int, + c int, + col1 int, + col2 int, + index(a), + index(b), + index(col1) +); + +insert into t1 select + mod(seq, 100), + mod(seq, 95), + seq, + seq, + seq +from + seq_1_to_10000; + + +create table t10 ( + a int, + a_value char(10), + key(a) +); +insert into t10 select seq, seq from seq_1_to_100; + +create table t11 ( + b int, + b_value char(10), + key(b) +); +insert into t11 select seq, seq from seq_1_to_100; + +set @tmp_os=@@optimizer_trace; +set optimizer_trace=1; + +--echo # +--echo # Query 1 - basic example. +--echo # +let $query= explain +select + * +from + t1 + join t10 on t1.a=t10.a + join t11 on t1.b=t11.b +order by + t1.col1 +limit 10; + +--echo # Table t1 is not the first, have to use temporary+filesort: +eval $query; + +set optimizer_join_limit_pref_ratio=10; + +--echo # t1 is first, key=col1 produces ordering, no filesort or temporary: +eval $query; + +set @trace=(select trace from information_schema.optimizer_trace); +select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS; + +--echo # +--echo # Query 2 - same as above but without a suitable index. +--echo # +let $query= +explain +select + * +from + t1 + join t10 on t1.a=t10.a + join t11 on t1.b=t11.b +order by + t1.col2 +limit 10; + +--echo # Table t1 is not the first, have to use temporary+filesort: +set optimizer_join_limit_pref_ratio=0; +eval $query; + +--echo # t1 is first but there's no suitable index, +--echo # so we use filesort but using temporary: +set optimizer_join_limit_pref_ratio=10; +eval $query; + +set @trace=(select trace from information_schema.optimizer_trace); +select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS; + +--echo # +--echo # Query 3: Counter example with large limit +--echo # +let $query= explain +select + * +from + t1 + join t10 on t1.a=t10.a + join t11 on t1.b=t11.b +order by + t1.col1 +limit 5000; + +--echo # Table t1 is not the first, have to use temporary+filesort: +set optimizer_join_limit_pref_ratio=0; +eval $query; + +--echo # Same plan as above: +--echo # Table t1 is not the first, have to use temporary+filesort: +set optimizer_join_limit_pref_ratio=10; +eval $query; + +set @trace=(select trace from information_schema.optimizer_trace); +select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS; + +--echo # +--echo # Query 4: LEFT JOIN makes it impossible to put ORDER-BY-table first, +--echo # however the optimizer still puts it as sort_by_table. +--echo # +set optimizer_join_limit_pref_ratio=10; +explain +select + * +from + t10 left join (t1 join t11 on t1.b=t11.b ) on t1.a=t10.a +order by + t1.col2 +limit 10; + +set @trace=(select trace from information_schema.optimizer_trace); +--echo # This will show nothing as limit shortcut code figures that +--echo # it's not possible to use t1 to construct shortcuts: +select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS; + +--echo # +--echo # Query 5: Same as Q1 but also with a semi-join +--echo # +set optimizer_join_limit_pref_ratio=default; +let $query= explain +select + * +from + t1 + join t10 on t1.a=t10.a + join t11 on t1.b=t11.b +where + t1.a in (select a from t10) and + t1.b in (select b from t11) +order by + t1.col1 +limit 10; + +--echo # Table t1 is not the first, have to use temporary+filesort: +eval $query; + +set optimizer_join_limit_pref_ratio=10; + +--echo # t1 is first, key=col1 produces ordering, no filesort or temporary: +eval $query; + +set @trace=(select trace from information_schema.optimizer_trace); +select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS; + +--echo # +--echo # Query 6: same as Query 1 but let's limit the search depth +--echo # +set @tmp_osd=@@optimizer_search_depth; +set optimizer_search_depth=1; +let $query= explain +select + * +from + t1 + join t10 on t1.a=t10.a + join t11 on t1.b=t11.b +order by + t1.col1 +limit 10; + +set optimizer_join_limit_pref_ratio=default; +--echo # Table t1 is not the first, have to use temporary+filesort: +eval $query; + +set optimizer_join_limit_pref_ratio=10; + +--echo # t1 is first, key=col1 produces ordering, no filesort or temporary: +eval $query; + +set @trace=(select trace from information_schema.optimizer_trace); +select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS; + + +set optimizer_search_depth=@tmp_osd; +set optimizer_trace=@tmp_os; +set optimizer_join_limit_pref_ratio=default; +drop table t1, t10, t11; + diff --git a/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result b/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result index b5c47ac5ecd..42b661bc448 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result +++ b/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result @@ -2282,6 +2282,16 @@ NUMERIC_BLOCK_SIZE NULL ENUM_VALUE_LIST adjust_secondary_key_cost,disable_max_seek,disable_forced_index_in_group_by,fix_innodb_cardinality READ_ONLY NO COMMAND_LINE_ARGUMENT REQUIRED +VARIABLE_NAME OPTIMIZER_JOIN_LIMIT_PREF_RATIO +VARIABLE_SCOPE SESSION +VARIABLE_TYPE BIGINT UNSIGNED +VARIABLE_COMMENT For queries with JOIN and ORDER BY LIMIT : make the optimizer consider a join order that allows to short-cut execution after producing #LIMIT matches if that promises N times speedup. (A conservative setting here would be is a high value, like 100 so the short-cutting plan is used if it promises a speedup of 100x or more). Short-cutting plans are inherently risky so the default is 0 which means do not consider this optimization +NUMERIC_MIN_VALUE 0 +NUMERIC_MAX_VALUE 4294967295 +NUMERIC_BLOCK_SIZE 1 +ENUM_VALUE_LIST NULL +READ_ONLY NO +COMMAND_LINE_ARGUMENT REQUIRED VARIABLE_NAME OPTIMIZER_MAX_SEL_ARGS VARIABLE_SCOPE SESSION VARIABLE_TYPE BIGINT UNSIGNED diff --git a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result index 59b9cbb8d96..b626551a570 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result +++ b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result @@ -2442,6 +2442,16 @@ NUMERIC_BLOCK_SIZE NULL ENUM_VALUE_LIST adjust_secondary_key_cost,disable_max_seek,disable_forced_index_in_group_by,fix_innodb_cardinality READ_ONLY NO COMMAND_LINE_ARGUMENT REQUIRED +VARIABLE_NAME OPTIMIZER_JOIN_LIMIT_PREF_RATIO +VARIABLE_SCOPE SESSION +VARIABLE_TYPE BIGINT UNSIGNED +VARIABLE_COMMENT For queries with JOIN and ORDER BY LIMIT : make the optimizer consider a join order that allows to short-cut execution after producing #LIMIT matches if that promises N times speedup. (A conservative setting here would be is a high value, like 100 so the short-cutting plan is used if it promises a speedup of 100x or more). Short-cutting plans are inherently risky so the default is 0 which means do not consider this optimization +NUMERIC_MIN_VALUE 0 +NUMERIC_MAX_VALUE 4294967295 +NUMERIC_BLOCK_SIZE 1 +ENUM_VALUE_LIST NULL +READ_ONLY NO +COMMAND_LINE_ARGUMENT REQUIRED VARIABLE_NAME OPTIMIZER_MAX_SEL_ARGS VARIABLE_SCOPE SESSION VARIABLE_TYPE BIGINT UNSIGNED diff --git a/sql/sql_class.h b/sql/sql_class.h index a347700f72f..615bf97a0ef 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -760,6 +760,7 @@ typedef struct system_variables ulong net_retry_count; ulong net_wait_timeout; ulong net_write_timeout; + ulonglong optimizer_join_limit_pref_ratio; ulong optimizer_prune_level; ulong optimizer_search_depth; ulong optimizer_selectivity_sampling_limit; diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 1b8552a805e..3be4adb092f 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -233,12 +233,14 @@ static COND *make_cond_for_table_from_pred(THD *thd, Item *root_cond, static Item* part_of_refkey(TABLE *form,Field *field); uint find_shortest_key(TABLE *table, const key_map *usable_keys); -static bool test_if_cheaper_ordering(const JOIN_TAB *tab, +static bool test_if_cheaper_ordering(bool in_join_optimizer, + const JOIN_TAB *tab, ORDER *order, TABLE *table, key_map usable_keys, int key, ha_rows select_limit, int *new_key, int *new_key_direction, ha_rows *new_select_limit, + double *new_read_time, uint *new_used_key_parts= NULL, uint *saved_best_key_parts= NULL); static int test_if_order_by_key(JOIN *join, @@ -330,6 +332,10 @@ static void optimize_rownum(THD *thd, SELECT_LEX_UNIT *unit, Item *cond); static bool process_direct_rownum_comparison(THD *thd, SELECT_LEX_UNIT *unit, Item *cond); +static +bool join_limit_shortcut_is_applicable(const JOIN *join); +POSITION *join_limit_shortcut_finalize_plan(JOIN *join, double *cost); + static bool find_indexes_matching_order(JOIN *join, TABLE *table, ORDER *order, key_map *usable_keys); @@ -5817,6 +5823,7 @@ make_join_statistics(JOIN *join, List &tables_list, join->sort_by_table= get_sort_by_table(join->order, join->group_list, join->select_lex->leaf_tables, join->const_table_map); + join->limit_shortcut_applicable= join_limit_shortcut_is_applicable(join); /* Update info on indexes that can be used for search lookups as reading const tables may has added new sargable predicates. @@ -9175,6 +9182,7 @@ choose_plan(JOIN *join, table_map join_tables) THD *thd= join->thd; DBUG_ENTER("choose_plan"); + join->limit_optimization_mode= false; join->cur_embedding_map= 0; reset_nj_counters(join, join->join_list); qsort2_cmp jtab_sort_func; @@ -9232,9 +9240,45 @@ choose_plan(JOIN *join, table_map join_tables) if (search_depth == 0) /* Automatically determine a reasonable value for 'search_depth' */ search_depth= determine_search_depth(join); + + double limit_cost= DBL_MAX; + POSITION *limit_plan= NULL; + + /* First, build a join plan that can short-cut ORDER BY...LIMIT */ + if (join->limit_shortcut_applicable && !join->emb_sjm_nest) + { + bool res; + Json_writer_object wrapper(join->thd); + Json_writer_array trace(join->thd, "join_limit_shortcut_plan_search"); + join->limit_optimization_mode= true; + res= greedy_search(join, join_tables, search_depth, prune_level, + use_cond_selectivity); + join->limit_optimization_mode= false; + + if (res) + DBUG_RETURN(TRUE); + DBUG_ASSERT(join->best_read != DBL_MAX); + + /* + We've built a join order. Adjust its cost based on ORDER BY...LIMIT + short-cutting. + */ + limit_plan= join_limit_shortcut_finalize_plan(join, &limit_cost); + } + + /* The main call to search for the query plan: */ if (greedy_search(join, join_tables, search_depth, prune_level, use_cond_selectivity)) DBUG_RETURN(TRUE); + + DBUG_ASSERT(join->best_read != DBL_MAX); + if (limit_plan && limit_cost < join->best_read) + { + /* Plan that uses ORDER BY ... LIMIT shortcutting is better. */ + memcpy((uchar*)join->best_positions, (uchar*)limit_plan, + sizeof(POSITION)*join->table_count); + join->best_read= limit_cost; + } } /* @@ -10344,6 +10388,311 @@ check_if_edge_table(POSITION *pos, } +/* + @brief + Check if it is potentally possible to short-cut the JOIN execution due to + ORDER BY ... LIMIT clause + + @detail + It is possible when the join has "ORDER BY ... LIMIT n" clause, and the + sort+limit operation is done right after the join operation (there's no + grouping or DISTINCT in between). + Then we can potentially build a join plan that enumerates rows in the + ORDER BY order and so will be able to terminate as soon as it has produced + #limit rows. + + Note that it is not a requirement that sort_by_table has an index that + matches ORDER BY. If it doesn't have one, the optimizer will pass + sort_by_table to filesort. Reading from sort_by_table won't use + short-cutting but the rest of the join will. +*/ + +static +bool join_limit_shortcut_is_applicable(const JOIN *join) +{ + /* + Any post-join operation like GROUP BY or DISTINCT or window functions + means we cannot short-cut join execution + */ + if (!join->thd->variables.optimizer_join_limit_pref_ratio || + !join->order || + join->select_limit == HA_POS_ERROR || + join->group_list || + join->select_distinct || + join->select_options & SELECT_BIG_RESULT || + join->rollup.state != ROLLUP::STATE_NONE || + join->select_lex->have_window_funcs() || + join->select_lex->with_sum_func) + { + return false; + } + + /* + Cannot do short-cutting if + (1) ORDER BY refers to more than one table or + (2) the table it refers to cannot be first table in the join order + */ + if (!join->sort_by_table || // (1) + join->sort_by_table->reginfo.join_tab->dependent) // (2) + return false; + + Json_writer_object wrapper(join->thd); + Json_writer_object trace(join->thd, "join_limit_shortcut_is_applicable"); + trace.add("applicable", 1); + /* It looks like we can short-cut limit due to join */ + return true; +} + + +/* + @brief + Check if we could use an index-based access method to produce rows + in the order for ORDER BY ... LIMIT. + + @detail + This should do what test_if_skip_sort_order() does. We can't use that + function directly, because: + + 1. We're at the join optimization stage and have not done query plan + fix-ups done in get_best_combination() and co. + + 2. The code in test_if_skip_sort_order() does modify query plan structures, + for example it may change the table's quick select. This is done even if + it's called with no_changes=true parameter. + + @param access_method_changed OUT Whether the function changed the access + method to get rows in desired order. + @param new_access_cost OUT if access method changed: its cost. + + @return + true - Can skip sorting + false - Cannot skip sorting +*/ + +bool test_if_skip_sort_order_early(JOIN *join, + bool *access_method_changed, + double *new_access_cost) +{ + const POSITION *pos= &join->best_positions[join->const_tables]; + TABLE *table= pos->table->table; + key_map usable_keys= table->keys_in_use_for_order_by; + + *access_method_changed= false; + + // Step #1: Find indexes that produce the required ordering. + if (find_indexes_matching_order(join, table, join->order, &usable_keys)) + { + return false; // Cannot skip sorting + } + + // Step #2: Check if the index we're using produces the needed ordering + uint ref_key; + if (pos->key) + { + // Mirror the (wrong) logic in test_if_skip_sort_order: + if (pos->spl_plan || pos->type == JT_REF_OR_NULL) + return false; // Use filesort + + ref_key= pos->key->key; + } + else + { + if (pos->table->quick) + { + if (pos->table->quick->get_type() == QUICK_SELECT_I::QS_TYPE_RANGE) + ref_key= pos->table->quick->index; + else + ref_key= MAX_KEY; + } + else + ref_key= MAX_KEY; + } + + if (ref_key != MAX_KEY && usable_keys.is_set(ref_key)) + { + return true; // we're using an index that produces the reqired ordering. + } + + /* + Step #3: check if we can switch to using an index that would produce the + ordering. + (But don't actually switch, this will be done by test_if_skip_sort_order) + */ + int best_key= -1; + uint UNINIT_VAR(best_key_parts); + uint saved_best_key_parts= 0; + int best_key_direction= 0; + JOIN_TAB *tab= pos->table; + ha_rows new_limit; + double new_read_time; + if (test_if_cheaper_ordering(/*in_join_optimizer */TRUE, + tab, join->order, table, usable_keys, + ref_key, join->select_limit, + &best_key, &best_key_direction, + &new_limit, &new_read_time, + &best_key_parts, + &saved_best_key_parts)) + { + // Ok found a way to skip sorting + *access_method_changed= true; + *new_access_cost= new_read_time; + return true; + } + + return false; +} + + +/* + Compute the cost of join assuming we only need fraction of the output. +*/ + +double recompute_join_cost_with_limit(const JOIN *join, bool skip_sorting, + double *first_table_cost, + double fraction) +{ + POSITION *pos= join->best_positions + join->const_tables; + /* + Generally, we assume that producing X% of output takes X% of the cost. + */ + double partial_join_cost= join->best_read * fraction; + + if (skip_sorting) + { + /* + First table produces rows in required order. Two options: + + A. first_table_cost=NULL means we use whatever access method the join + optimizer has picked. Its cost was included in join->best_read and + we've already took a fraction of it. + + B. first_table_cost!=NULL means we will need to switch to another access + method, we have the cost to read rows to produce #LIMIT rows in join + output. + */ + if (first_table_cost) + { + /* + Subtract the remainder of the first table's cost we had in + join->best_read: + */ + partial_join_cost -= pos->read_time*fraction; + partial_join_cost -= pos->records_read*fraction / TIME_FOR_COMPARE; + + /* Add the cost of the new access method we've got: */ + partial_join_cost= COST_ADD(partial_join_cost, *first_table_cost); + } + } + else + { + DBUG_ASSERT(!first_table_cost); + /* + Cannot skip sorting. We read the first table entirely, then sort it. + + partial_join_cost includes pos->read_time*fraction. Add to it + pos->read_time*(1-fraction) so we have the cost to read the entire first + table. Do the same for costs of checking the WHERE. + */ + double extra_first_table_cost= pos->read_time * (1.0 - fraction); + double extra_first_table_where= pos->records_read * (1.0 - fraction) / + TIME_FOR_COMPARE; + + partial_join_cost= COST_ADD(partial_join_cost, + COST_ADD(extra_first_table_cost, + extra_first_table_where)); + } + return partial_join_cost; +} + + +/* + @brief + Finalize building the join order which allows to short-cut the join + execution. + + @detail + This is called after we have produced a join order that allows short- + cutting. + Here, we decide if it is cheaper to use this one or the original join + order. +*/ + +POSITION *join_limit_shortcut_finalize_plan(JOIN *join, double *cost) +{ + Json_writer_object wrapper(join->thd); + Json_writer_object trace(join->thd, "join_limit_shortcut_choice"); + + double fraction= join->select_limit / join->join_record_count; + trace.add("limit_fraction", fraction); + + /* Check which fraction of join output we need */ + if (fraction >= 1.0) + { + trace.add("skip_adjustment", "no short-cutting"); + return NULL; + } + + /* + Check if the first table's access method produces the required ordering. + Possible options: + 1. Yes: we can just take a fraction of the execution cost. + 2A No: change the access method to one that does produce the required + ordering, update the costs. + 2B No: Need to pass the first table to filesort(). + */ + bool skip_sorting; + bool access_method_changed; + double new_access_cost; + { + Json_writer_array tmp(join->thd, "test_if_skip_sort_order_early"); + skip_sorting= test_if_skip_sort_order_early(join, + &access_method_changed, + &new_access_cost); + } + trace.add("can_skip_filesort", skip_sorting); + + double cost_with_shortcut= + recompute_join_cost_with_limit(join, skip_sorting, + access_method_changed ? + &new_access_cost : (double*)0, + fraction); + double risk_ratio= + (double)join->thd->variables.optimizer_join_limit_pref_ratio; + trace.add("full_join_cost", join->best_read); + trace.add("risk_ratio", risk_ratio); + trace.add("shortcut_join_cost", cost_with_shortcut); + cost_with_shortcut *= risk_ratio; + trace.add("shortcut_cost_with_risk", cost_with_shortcut); + if (cost_with_shortcut < join->best_read) + { + trace.add("use_shortcut_cost", true); + POSITION *pos= (POSITION*)memdup_root(join->thd->mem_root, + join->best_positions, + sizeof(POSITION)* + (join->table_count + 1)); + *cost= cost_with_shortcut; + return pos; + } + trace.add("use_shortcut_cost", false); + return NULL; +} + + +/* + @brief + If we're in Limit Optimization Mode, allow only join->sort_by_table as + the first table in the join order +*/ + +static +bool join_limit_shortcut_allows_table(const JOIN *join, uint idx, JOIN_TAB *s) +{ + if (join->limit_optimization_mode && idx == join->const_tables) + return (join->sort_by_table == s->table); + return true; +} + + /** Find a good, possibly optimal, query execution plan (QEP) by a possibly exhaustive search. @@ -10520,7 +10869,8 @@ best_extension_by_limited_search(JOIN *join, if ((allowed_tables & real_table_bit) && !(remaining_tables & s->dependent) && - !check_interleaving_with_nj(s)) + !check_interleaving_with_nj(s) && + join_limit_shortcut_allows_table(join, idx, s)) { double current_record_count, current_read_time; double partial_join_cardinality; @@ -24790,7 +25140,8 @@ static void prepare_for_reverse_ordered_access(JOIN_TAB *tab) @brief Given a table and order, find indexes that produce rows in the order - @param usable_keys OUT Bitmap of indexes that produce rows in order. + @param usable_keys IN Bitmap of keys we can use + OUT Bitmap of indexes that produce rows in order. @return false Some indexes were found @@ -25089,11 +25440,13 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit, int best_key_direction= 0; JOIN *join= tab->join; ha_rows table_records= table->stat_records(); + double new_read_time_dummy; - test_if_cheaper_ordering(tab, order, table, usable_keys, + test_if_cheaper_ordering(FALSE, tab, order, table, usable_keys, ref_key, select_limit, &best_key, &best_key_direction, - &select_limit, &best_key_parts, + &select_limit, &new_read_time_dummy, + &best_key_parts, &saved_best_key_parts); /* @@ -30324,11 +30677,13 @@ static bool get_range_limit_read_cost(const JOIN_TAB *tab, */ static bool -test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table, +test_if_cheaper_ordering(bool in_join_optimizer, + const JOIN_TAB *tab, ORDER *order, TABLE *table, key_map usable_keys, int ref_key, ha_rows select_limit_arg, int *new_key, int *new_key_direction, - ha_rows *new_select_limit, uint *new_used_key_parts, + ha_rows *new_select_limit, double *new_read_time, + uint *new_used_key_parts, uint *saved_best_key_parts) { DBUG_ENTER("test_if_cheaper_ordering"); @@ -30386,7 +30741,7 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table, if (join) { - uint tablenr= (uint)(tab - join->join_tab); + uint tablenr= join->const_tables; read_time= join->best_positions[tablenr].read_time; records= join->best_positions[tablenr].records_read; for (uint i= tablenr+1; i < join->table_count; i++) @@ -30424,12 +30779,27 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table, Calculate the selectivity of the ref_key for REF_ACCESS. For RANGE_ACCESS we use table->opt_range_condition_rows. */ - if (ref_key >= 0 && ref_key != MAX_KEY && tab->type == JT_REF) + if (in_join_optimizer) + { + if (ref_key >= 0 && ref_key != MAX_KEY && + join->best_positions[join->const_tables].type == JT_REF) + { + refkey_rows_estimate= + (ha_rows)join->best_positions[join->const_tables].records_read; + set_if_bigger(refkey_rows_estimate, 1); + } + } + else if (ref_key >= 0 && ref_key != MAX_KEY && tab->type == JT_REF) { /* If ref access uses keypart=const for all its key parts, and quick select uses the same # of key parts, then they are equivalent. Reuse #rows estimate from quick select as it is more precise. + + Note: we could just have used + join->best_positions[join->const_tables].records_read + here. That number was computed in best_access_path() and it already + includes adjustments based on table->opt_range[ref_key].rows. */ if (tab->ref.const_ref_part_map == make_prev_keypart_map(tab->ref.key_parts) && @@ -30755,6 +31125,7 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table, *new_key= best_key; *new_key_direction= best_key_direction; *new_select_limit= has_limit ? best_select_limit : table_records; + *new_read_time= read_time; if (new_used_key_parts != NULL) *new_used_key_parts= best_key_parts; DBUG_RETURN(TRUE); @@ -30854,10 +31225,11 @@ uint get_index_for_order(ORDER *order, TABLE *table, SQL_SELECT *select, table->opt_range_condition_rows= table->stat_records(); int key, direction; - if (test_if_cheaper_ordering(NULL, order, table, + double new_cost; + if (test_if_cheaper_ordering(FALSE, NULL, order, table, table->keys_in_use_for_order_by, -1, limit, - &key, &direction, &limit) && + &key, &direction, &limit, &new_cost) && !is_key_used(table, key, table->write_set)) { *need_sort= FALSE; diff --git a/sql/sql_select.h b/sql/sql_select.h index 128426993ac..59c4ebfc14e 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -1205,6 +1205,20 @@ public: passing 1st non-const table to filesort(). NULL means no such table exists. */ TABLE *sort_by_table; + + /* + If true, there is ORDER BY x LIMIT n clause and for certain join orders, it + is possible to short-cut the join execution, i.e. stop it as soon as n + output rows were produced. See join_limit_shortcut_is_applicable(). + */ + bool limit_shortcut_applicable; + + /* + Used during join optimization: if true, we're building a join order that + will short-cut join execution as soon as #LIMIT rows are produced. + */ + bool limit_optimization_mode; + /* Number of tables in the join. (In MySQL, it is named 'tables' and is also the number of elements in diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index 42ec52b38a9..c5b507b936d 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -2702,6 +2702,20 @@ static Sys_var_ulong Sys_optimizer_selectivity_sampling_limit( VALID_RANGE(SELECTIVITY_SAMPLING_THRESHOLD, UINT_MAX), DEFAULT(SELECTIVITY_SAMPLING_LIMIT), BLOCK_SIZE(1)); +static Sys_var_ulonglong Sys_optimizer_join_limit_pref_ratio( + "optimizer_join_limit_pref_ratio", + "For queries with JOIN and ORDER BY LIMIT : make the optimizer " + "consider a join order that allows to short-cut execution after " + "producing #LIMIT matches if that promises N times speedup. " + "(A conservative setting here would be is a high value, like 100 so " + "the short-cutting plan is used if it promises a speedup of 100x or " + "more). Short-cutting plans are inherently risky so the default is 0 " + "which means do not consider this optimization", + SESSION_VAR(optimizer_join_limit_pref_ratio), + CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, UINT_MAX), + DEFAULT(0), BLOCK_SIZE(1)); + static Sys_var_ulong Sys_optimizer_use_condition_selectivity( "optimizer_use_condition_selectivity", "Controls selectivity of which conditions the optimizer takes into "