MDEV-34720: Poor plan choice for large JOIN with ORDER BY and small LIMIT

(Variant 2b: call greedy_search() twice, correct handling for limited search_depth) Modify the join optimizer to specifically try to produce join orders that can short-cut their execution for ORDER BY..LIMIT clause. The optimization is controlled by @@optimizer_join_limit_pref_ratio. Default value 0 means don't construct short-cutting join orders. Other value means construct short-cutting join order, and prefer it only if it promises speedup of more than #value times. In Optimizer Trace, look for these names: * join_limit_shortcut_is_applicable * join_limit_shortcut_plan_search * join_limit_shortcut_choice
2025-07-29 05:21:33 +03:00 · 2024-08-18 20:10:19 +03:00
parent 819765a47d
commit c8d040938a
9 changed files with 1110 additions and 11 deletions
--- a/mysql-test/main/mysqld--help.result
+++ b/mysql-test/main/mysqld--help.result
@ -724,6 +724,15 @@ The following specify which files/extra groups are read (specified before remain
 keys. This variable will be deleted in MariaDB 11.0 as it
 is not needed with the new 11.0 optimizer.
 Use 'ALL' to set all combinations.
+ --optimizer-join-limit-pref-ratio=# 
+ For queries with JOIN and ORDER BY LIMIT : make the
+ optimizer consider a join order that allows to short-cut
+ execution after producing #LIMIT matches if that promises
+ N times speedup. (A conservative setting here would be is
+ a high value, like 100 so the short-cutting plan is used
+ if it promises a speedup of 100x or more). Short-cutting
+ plans are inherently risky so the default is 0 which
+ means do not consider this optimization
 --optimizer-max-sel-arg-weight=# 
 The maximum weight of the SEL_ARG graph. Set to 0 for no
 limit
@ -1697,6 +1706,7 @@ old-mode UTF8_IS_UTF8MB3
 old-passwords FALSE
 old-style-user-limits FALSE
 optimizer-adjust-secondary-key-costs 
+optimizer-join-limit-pref-ratio 0
 optimizer-max-sel-arg-weight 32000
 optimizer-max-sel-args 16000
 optimizer-prune-level 1
--- a/mysql-test/main/order_by_limit_join.result
+++ b/mysql-test/main/order_by_limit_join.result
@ -0,0 +1,461 @@
+#
+# MDEV-34720: Poor plan choice for large JOIN with ORDER BY and small LIMIT
+#
+create table t1 (
+a int,
+b int,
+c int,
+col1 int,
+col2 int,
+index(a),
+index(b),
+index(col1)
+);
+insert into t1 select
+mod(seq, 100),
+mod(seq, 95),
+seq,
+seq,
+seq
+from
+seq_1_to_10000;
+create table t10 (
+a int,
+a_value char(10),
+key(a)
+);
+insert into t10 select seq, seq from seq_1_to_100;
+create table t11 (
+b int,
+b_value char(10),
+key(b)
+);
+insert into t11 select seq, seq from seq_1_to_100;
+set @tmp_os=@@optimizer_trace;
+set optimizer_trace=1;
+#
+# Query 1 - basic example.
+#
+# Table t1 is not the first, have to use temporary+filesort:
+explain
+select
+*
+from
+t1
+join t10 on t1.a=t10.a
+join t11 on t1.b=t11.b
+order by
+t1.col1
+limit 10;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t10	ALL	a	NULL	NULL	NULL	100	Using where; Using temporary; Using filesort
+1	SIMPLE	t1	ref	a,b	a	5	test.t10.a	100	Using where
+1	SIMPLE	t11	ref	b	b	5	test.t1.b	1	
+set optimizer_join_limit_pref_ratio=10;
+# t1 is first, key=col1 produces ordering, no filesort or temporary:
+explain
+select
+*
+from
+t1
+join t10 on t1.a=t10.a
+join t11 on t1.b=t11.b
+order by
+t1.col1
+limit 10;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	a,b	col1	5	NULL	10	Using where
+1	SIMPLE	t10	ref	a	a	5	test.t1.a	1	
+1	SIMPLE	t11	ref	b	b	5	test.t1.b	1	
+set @trace=(select trace from information_schema.optimizer_trace);
+select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS;
+JS
+[
+    {
+        "limit_fraction": 0.001,
+        "test_if_skip_sort_order_early": 
+        [
+            {
+                "reconsidering_access_paths_for_index_ordering": 
+                {
+                    "clause": "ORDER BY",
+                    "fanout": 1,
+                    "read_time": 53.27053125,
+                    "table": "t1",
+                    "rows_estimation": 10000,
+                    "possible_keys": 
+                    [
+                        {
+                            "index": "a",
+                            "can_resolve_order": false,
+                            "cause": "not usable index for the query"
+                        },
+                        {
+                            "index": "b",
+                            "can_resolve_order": false,
+                            "cause": "not usable index for the query"
+                        },
+                        {
+                            "index": "col1",
+                            "can_resolve_order": true,
+                            "updated_limit": 10,
+                            "index_scan_time": 10,
+                            "records": 10000,
+                            "chosen": true
+                        }
+                    ]
+                }
+            }
+        ],
+        "can_skip_filesort": true,
+        "full_join_cost": 46064.98442,
+        "risk_ratio": 10,
+        "shortcut_join_cost": 97.28224614,
+        "shortcut_cost_with_risk": 972.8224614,
+        "use_shortcut_cost": true
+    }
+]
+#
+# Query 2 - same as above but without a suitable index.
+#
+# Table t1 is not the first, have to use temporary+filesort:
+set optimizer_join_limit_pref_ratio=0;
+explain
+select
+*
+from
+t1
+join t10 on t1.a=t10.a
+join t11 on t1.b=t11.b
+order by
+t1.col2
+limit 10;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t10	ALL	a	NULL	NULL	NULL	100	Using where; Using temporary; Using filesort
+1	SIMPLE	t1	ref	a,b	a	5	test.t10.a	100	Using where
+1	SIMPLE	t11	ref	b	b	5	test.t1.b	1	
+# t1 is first but there's no suitable index,
+#   so we use filesort but using temporary:
+set optimizer_join_limit_pref_ratio=10;
+explain
+select
+*
+from
+t1
+join t10 on t1.a=t10.a
+join t11 on t1.b=t11.b
+order by
+t1.col2
+limit 10;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ALL	a,b	NULL	NULL	NULL	10000	Using where; Using filesort
+1	SIMPLE	t10	ref	a	a	5	test.t1.a	1	
+1	SIMPLE	t11	ref	b	b	5	test.t1.b	1	
+set @trace=(select trace from information_schema.optimizer_trace);
+select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS;
+JS
+[
+    {
+        "limit_fraction": 0.001,
+        "test_if_skip_sort_order_early": 
+        [],
+        "can_skip_filesort": false,
+        "full_join_cost": 46064.98442,
+        "risk_ratio": 10,
+        "shortcut_join_cost": 2097.281246,
+        "shortcut_cost_with_risk": 20972.81246,
+        "use_shortcut_cost": true
+    }
+]
+#
+# Query 3: Counter example with large limit
+#
+# Table t1 is not the first, have to use temporary+filesort:
+set optimizer_join_limit_pref_ratio=0;
+explain
+select
+*
+from
+t1
+join t10 on t1.a=t10.a
+join t11 on t1.b=t11.b
+order by
+t1.col1
+limit 5000;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t10	ALL	a	NULL	NULL	NULL	100	Using where; Using temporary; Using filesort
+1	SIMPLE	t1	ref	a,b	a	5	test.t10.a	100	Using where
+1	SIMPLE	t11	ref	b	b	5	test.t1.b	1	
+# Same plan as above:
+# Table t1 is not the first, have to use temporary+filesort:
+set optimizer_join_limit_pref_ratio=10;
+explain
+select
+*
+from
+t1
+join t10 on t1.a=t10.a
+join t11 on t1.b=t11.b
+order by
+t1.col1
+limit 5000;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t10	ALL	a	NULL	NULL	NULL	100	Using where; Using temporary; Using filesort
+1	SIMPLE	t1	ref	a,b	a	5	test.t10.a	100	Using where
+1	SIMPLE	t11	ref	b	b	5	test.t1.b	1	
+set @trace=(select trace from information_schema.optimizer_trace);
+select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS;
+JS
+[
+    {
+        "limit_fraction": 0.5,
+        "test_if_skip_sort_order_early": 
+        [
+            {
+                "reconsidering_access_paths_for_index_ordering": 
+                {
+                    "clause": "ORDER BY",
+                    "fanout": 1,
+                    "read_time": 53.27053125,
+                    "table": "t1",
+                    "rows_estimation": 10000,
+                    "possible_keys": 
+                    [
+                        {
+                            "index": "a",
+                            "can_resolve_order": false,
+                            "cause": "not usable index for the query"
+                        },
+                        {
+                            "index": "b",
+                            "can_resolve_order": false,
+                            "cause": "not usable index for the query"
+                        },
+                        {
+                            "index": "col1",
+                            "can_resolve_order": true,
+                            "updated_limit": 5000,
+                            "index_scan_time": 5000,
+                            "usable": false,
+                            "cause": "cost"
+                        }
+                    ]
+                }
+            }
+        ],
+        "can_skip_filesort": false,
+        "full_join_cost": 46064.98442,
+        "risk_ratio": 10,
+        "shortcut_join_cost": 24059.12698,
+        "shortcut_cost_with_risk": 240591.2698,
+        "use_shortcut_cost": false
+    }
+]
+#
+# Query 4: LEFT JOIN makes it impossible to put ORDER-BY-table first,
+#          however the optimizer still puts it as sort_by_table.
+#
+set optimizer_join_limit_pref_ratio=10;
+explain
+select
+*
+from
+t10 left join (t1 join t11 on t1.b=t11.b ) on t1.a=t10.a
+order by
+t1.col2
+limit 10;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t10	ALL	NULL	NULL	NULL	NULL	100	Using temporary; Using filesort
+1	SIMPLE	t1	ref	a,b	a	5	test.t10.a	100	Using where
+1	SIMPLE	t11	ref	b	b	5	test.t1.b	1	
+set @trace=(select trace from information_schema.optimizer_trace);
+# This will show nothing as limit shortcut code figures that
+#   it's not possible to use t1 to construct shortcuts:
+select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS;
+JS
+NULL
+#
+# Query 5: Same as Q1 but also with a semi-join
+#
+set optimizer_join_limit_pref_ratio=default;
+# Table t1 is not the first, have to use temporary+filesort:
+explain
+select
+*
+from
+t1
+join t10 on t1.a=t10.a
+join t11 on t1.b=t11.b
+where
+t1.a in (select a from t10) and
+t1.b in (select b from t11)
+order by
+t1.col1
+limit 10;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t10	ALL	a	NULL	NULL	NULL	100	Using where; Using temporary; Using filesort
+1	PRIMARY	t10	ref	a	a	5	test.t10.a	1	Using index; LooseScan
+1	PRIMARY	t1	ref	a,b	a	5	test.t10.a	100	Using where
+1	PRIMARY	t11	ref	b	b	5	test.t1.b	1	
+1	PRIMARY	<subquery3>	eq_ref	distinct_key	distinct_key	4	func	1	
+3	MATERIALIZED	t11	index	b	b	5	NULL	100	Using index
+set optimizer_join_limit_pref_ratio=10;
+# t1 is first, key=col1 produces ordering, no filesort or temporary:
+explain
+select
+*
+from
+t1
+join t10 on t1.a=t10.a
+join t11 on t1.b=t11.b
+where
+t1.a in (select a from t10) and
+t1.b in (select b from t11)
+order by
+t1.col1
+limit 10;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t1	index	a,b	col1	5	NULL	1	Using where
+1	PRIMARY	t10	ref	a	a	5	test.t1.a	1	
+1	PRIMARY	t11	ref	b	b	5	test.t1.b	1	
+1	PRIMARY	<subquery3>	eq_ref	distinct_key	distinct_key	4	func	1	
+1	PRIMARY	<subquery2>	eq_ref	distinct_key	distinct_key	4	func	1	
+3	MATERIALIZED	t11	index	b	b	5	NULL	100	Using index
+2	MATERIALIZED	t10	index	a	a	5	NULL	100	Using index
+set @trace=(select trace from information_schema.optimizer_trace);
+select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS;
+JS
+[
+    {
+        "limit_fraction": 0.001,
+        "test_if_skip_sort_order_early": 
+        [
+            {
+                "reconsidering_access_paths_for_index_ordering": 
+                {
+                    "clause": "ORDER BY",
+                    "fanout": 1,
+                    "read_time": 53.27053125,
+                    "table": "t1",
+                    "rows_estimation": 10000,
+                    "possible_keys": 
+                    [
+                        {
+                            "index": "a",
+                            "can_resolve_order": false,
+                            "cause": "not usable index for the query"
+                        },
+                        {
+                            "index": "b",
+                            "can_resolve_order": false,
+                            "cause": "not usable index for the query"
+                        },
+                        {
+                            "index": "col1",
+                            "can_resolve_order": true,
+                            "updated_limit": 10,
+                            "index_scan_time": 10,
+                            "records": 10000,
+                            "chosen": true
+                        }
+                    ]
+                }
+            }
+        ],
+        "can_skip_filesort": true,
+        "full_join_cost": 47079.71684,
+        "risk_ratio": 10,
+        "shortcut_join_cost": 98.29697856,
+        "shortcut_cost_with_risk": 982.9697856,
+        "use_shortcut_cost": true
+    }
+]
+#
+# Query 6: same as Query 1 but let's limit the search depth
+#
+set @tmp_osd=@@optimizer_search_depth;
+set optimizer_search_depth=1;
+set optimizer_join_limit_pref_ratio=default;
+# Table t1 is not the first, have to use temporary+filesort:
+explain
+select
+*
+from
+t1
+join t10 on t1.a=t10.a
+join t11 on t1.b=t11.b
+order by
+t1.col1
+limit 10;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t10	ALL	a	NULL	NULL	NULL	100	Using where; Using temporary; Using filesort
+1	SIMPLE	t11	ALL	b	NULL	NULL	NULL	100	Using join buffer (flat, BNL join)
+1	SIMPLE	t1	ref	a,b	a	5	test.t10.a	100	Using where
+set optimizer_join_limit_pref_ratio=10;
+# t1 is first, key=col1 produces ordering, no filesort or temporary:
+explain
+select
+*
+from
+t1
+join t10 on t1.a=t10.a
+join t11 on t1.b=t11.b
+order by
+t1.col1
+limit 10;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	a,b	col1	5	NULL	10	Using where
+1	SIMPLE	t10	ref	a	a	5	test.t1.a	1	
+1	SIMPLE	t11	ref	b	b	5	test.t1.b	1	
+set @trace=(select trace from information_schema.optimizer_trace);
+select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS;
+JS
+[
+    {
+        "limit_fraction": 0.001,
+        "test_if_skip_sort_order_early": 
+        [
+            {
+                "reconsidering_access_paths_for_index_ordering": 
+                {
+                    "clause": "ORDER BY",
+                    "fanout": 1,
+                    "read_time": 53.27053125,
+                    "table": "t1",
+                    "rows_estimation": 10000,
+                    "possible_keys": 
+                    [
+                        {
+                            "index": "a",
+                            "can_resolve_order": false,
+                            "cause": "not usable index for the query"
+                        },
+                        {
+                            "index": "b",
+                            "can_resolve_order": false,
+                            "cause": "not usable index for the query"
+                        },
+                        {
+                            "index": "col1",
+                            "can_resolve_order": true,
+                            "updated_limit": 10,
+                            "index_scan_time": 10,
+                            "records": 10000,
+                            "chosen": true
+                        }
+                    ]
+                }
+            }
+        ],
+        "can_skip_filesort": true,
+        "full_join_cost": 46064.98442,
+        "risk_ratio": 10,
+        "shortcut_join_cost": 97.28224614,
+        "shortcut_cost_with_risk": 972.8224614,
+        "use_shortcut_cost": true
+    }
+]
+set optimizer_search_depth=@tmp_osd;
+set optimizer_trace=@tmp_os;
+set optimizer_join_limit_pref_ratio=default;
+drop table t1, t10, t11;
--- a/mysql-test/main/order_by_limit_join.test
+++ b/mysql-test/main/order_by_limit_join.test
@ -0,0 +1,207 @@
+--echo #
+--echo # MDEV-34720: Poor plan choice for large JOIN with ORDER BY and small LIMIT
+--echo #
+
+--source include/have_sequence.inc
+
+# We need optimizer trace
+--source include/not_embedded.inc
+
+create table t1 (
+  a int,
+  b int,
+  c int,
+  col1 int,
+  col2 int,
+  index(a),
+  index(b),
+  index(col1)
+);
+
+insert into t1 select
+ mod(seq, 100),
+ mod(seq, 95),
+ seq,
+ seq,
+ seq
+from
+  seq_1_to_10000;
+
+
+create table t10 (
+ a int,
+ a_value char(10),
+ key(a)
+);
+insert into t10 select seq, seq from seq_1_to_100;
+
+create table t11 (
+ b int,
+ b_value char(10),
+ key(b)
+);
+insert into t11 select seq, seq from seq_1_to_100;
+
+set @tmp_os=@@optimizer_trace;
+set optimizer_trace=1;
+
+--echo #
+--echo # Query 1 - basic example.
+--echo #
+let $query= explain
+select
+  *
+from
+  t1
+  join t10 on t1.a=t10.a
+  join t11 on t1.b=t11.b
+order by
+  t1.col1
+limit 10;
+
+--echo # Table t1 is not the first, have to use temporary+filesort:
+eval $query;
+
+set optimizer_join_limit_pref_ratio=10;
+
+--echo # t1 is first, key=col1 produces ordering, no filesort or temporary:
+eval $query;
+
+set @trace=(select trace from information_schema.optimizer_trace);
+select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS;
+
+--echo #
+--echo # Query 2 - same as above but without a suitable index.
+--echo #
+let $query=
+explain
+select
+  *
+from
+  t1
+  join t10 on t1.a=t10.a
+  join t11 on t1.b=t11.b
+order by
+  t1.col2
+limit 10;
+
+--echo # Table t1 is not the first, have to use temporary+filesort:
+set optimizer_join_limit_pref_ratio=0;
+eval $query;
+
+--echo # t1 is first but there's no suitable index,
+--echo #   so we use filesort but using temporary:
+set optimizer_join_limit_pref_ratio=10;
+eval $query;
+
+set @trace=(select trace from information_schema.optimizer_trace);
+select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS;
+
+--echo #
+--echo # Query 3: Counter example with large limit
+--echo #
+let $query= explain
+select
+  *
+from
+  t1
+  join t10 on t1.a=t10.a
+  join t11 on t1.b=t11.b
+order by
+  t1.col1
+limit 5000;
+
+--echo # Table t1 is not the first, have to use temporary+filesort:
+set optimizer_join_limit_pref_ratio=0;
+eval $query;
+
+--echo # Same plan as above:
+--echo # Table t1 is not the first, have to use temporary+filesort:
+set optimizer_join_limit_pref_ratio=10;
+eval $query;
+
+set @trace=(select trace from information_schema.optimizer_trace);
+select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS;
+
+--echo #
+--echo # Query 4: LEFT JOIN makes it impossible to put ORDER-BY-table first,
+--echo #          however the optimizer still puts it as sort_by_table.
+--echo #
+set optimizer_join_limit_pref_ratio=10;
+explain
+select
+  *
+from
+  t10 left join (t1 join t11 on t1.b=t11.b ) on t1.a=t10.a
+order by
+  t1.col2
+limit 10;
+
+set @trace=(select trace from information_schema.optimizer_trace);
+--echo # This will show nothing as limit shortcut code figures that
+--echo #   it's not possible to use t1 to construct shortcuts:
+select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS;
+
+--echo #
+--echo # Query 5: Same as Q1 but also with a semi-join
+--echo #
+set optimizer_join_limit_pref_ratio=default;
+let $query= explain
+select
+  *
+from
+  t1
+  join t10 on t1.a=t10.a
+  join t11 on t1.b=t11.b
+where
+  t1.a in (select a from t10) and
+  t1.b in (select b from t11)
+order by
+  t1.col1
+limit 10;
+
+--echo # Table t1 is not the first, have to use temporary+filesort:
+eval $query;
+
+set optimizer_join_limit_pref_ratio=10;
+
+--echo # t1 is first, key=col1 produces ordering, no filesort or temporary:
+eval $query;
+
+set @trace=(select trace from information_schema.optimizer_trace);
+select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS;
+
+--echo #
+--echo # Query 6: same as Query 1 but let's limit the search depth
+--echo #
+set @tmp_osd=@@optimizer_search_depth;
+set optimizer_search_depth=1;
+let $query= explain
+select
+  *
+from
+  t1
+  join t10 on t1.a=t10.a
+  join t11 on t1.b=t11.b
+order by
+  t1.col1
+limit 10;
+
+set optimizer_join_limit_pref_ratio=default;
+--echo # Table t1 is not the first, have to use temporary+filesort:
+eval $query;
+
+set optimizer_join_limit_pref_ratio=10;
+
+--echo # t1 is first, key=col1 produces ordering, no filesort or temporary:
+eval $query;
+
+set @trace=(select trace from information_schema.optimizer_trace);
+select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS;
+
+
+set optimizer_search_depth=@tmp_osd;
+set optimizer_trace=@tmp_os;
+set optimizer_join_limit_pref_ratio=default;
+drop table t1, t10, t11;
+
--- a/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result
+++ b/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result
@ -2282,6 +2282,16 @@ NUMERIC_BLOCK_SIZE	NULL
 ENUM_VALUE_LIST	adjust_secondary_key_cost,disable_max_seek,disable_forced_index_in_group_by,fix_innodb_cardinality
 READ_ONLY	NO
 COMMAND_LINE_ARGUMENT	REQUIRED
+VARIABLE_NAME	OPTIMIZER_JOIN_LIMIT_PREF_RATIO
+VARIABLE_SCOPE	SESSION
+VARIABLE_TYPE	BIGINT UNSIGNED
+VARIABLE_COMMENT	For queries with JOIN and ORDER BY LIMIT : make the optimizer consider a join order that allows to short-cut execution after producing #LIMIT matches if that promises N times speedup. (A conservative setting here would be is a high value, like 100 so the short-cutting plan is used if it promises a speedup of 100x or more). Short-cutting plans are inherently risky so the default is 0 which means do not consider this optimization
+NUMERIC_MIN_VALUE	0
+NUMERIC_MAX_VALUE	4294967295
+NUMERIC_BLOCK_SIZE	1
+ENUM_VALUE_LIST	NULL
+READ_ONLY	NO
+COMMAND_LINE_ARGUMENT	REQUIRED
 VARIABLE_NAME	OPTIMIZER_MAX_SEL_ARGS
 VARIABLE_SCOPE	SESSION
 VARIABLE_TYPE	BIGINT UNSIGNED
--- a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result
+++ b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result
@ -2442,6 +2442,16 @@ NUMERIC_BLOCK_SIZE	NULL
 ENUM_VALUE_LIST	adjust_secondary_key_cost,disable_max_seek,disable_forced_index_in_group_by,fix_innodb_cardinality
 READ_ONLY	NO
 COMMAND_LINE_ARGUMENT	REQUIRED
+VARIABLE_NAME	OPTIMIZER_JOIN_LIMIT_PREF_RATIO
+VARIABLE_SCOPE	SESSION
+VARIABLE_TYPE	BIGINT UNSIGNED
+VARIABLE_COMMENT	For queries with JOIN and ORDER BY LIMIT : make the optimizer consider a join order that allows to short-cut execution after producing #LIMIT matches if that promises N times speedup. (A conservative setting here would be is a high value, like 100 so the short-cutting plan is used if it promises a speedup of 100x or more). Short-cutting plans are inherently risky so the default is 0 which means do not consider this optimization
+NUMERIC_MIN_VALUE	0
+NUMERIC_MAX_VALUE	4294967295
+NUMERIC_BLOCK_SIZE	1
+ENUM_VALUE_LIST	NULL
+READ_ONLY	NO
+COMMAND_LINE_ARGUMENT	REQUIRED
 VARIABLE_NAME	OPTIMIZER_MAX_SEL_ARGS
 VARIABLE_SCOPE	SESSION
 VARIABLE_TYPE	BIGINT UNSIGNED
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@ -760,6 +760,7 @@ typedef struct system_variables
  ulong net_retry_count;
  ulong net_wait_timeout;
  ulong net_write_timeout;
+  ulonglong optimizer_join_limit_pref_ratio;
  ulong optimizer_prune_level;
  ulong optimizer_search_depth;
  ulong optimizer_selectivity_sampling_limit;
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@ -233,12 +233,14 @@ static COND *make_cond_for_table_from_pred(THD *thd, Item *root_cond,

 static Item* part_of_refkey(TABLE *form,Field *field);
 uint find_shortest_key(TABLE *table, const key_map *usable_keys);
-static bool test_if_cheaper_ordering(const JOIN_TAB *tab,
+static bool test_if_cheaper_ordering(bool in_join_optimizer,
+                                     const JOIN_TAB *tab,
                                     ORDER *order, TABLE *table,
                                     key_map usable_keys, int key,
                                     ha_rows select_limit,
                                     int *new_key, int *new_key_direction,
                                     ha_rows *new_select_limit,
+                                     double *new_read_time,
                                     uint *new_used_key_parts= NULL,
                                     uint *saved_best_key_parts= NULL);
 static int test_if_order_by_key(JOIN *join,
@ -330,6 +332,10 @@ static void optimize_rownum(THD *thd, SELECT_LEX_UNIT *unit, Item *cond);
 static bool process_direct_rownum_comparison(THD *thd, SELECT_LEX_UNIT *unit,
                                             Item *cond);

+static
+bool join_limit_shortcut_is_applicable(const JOIN *join);
+POSITION *join_limit_shortcut_finalize_plan(JOIN *join, double *cost);
+
 static
 bool find_indexes_matching_order(JOIN *join, TABLE *table, ORDER *order,
                                 key_map *usable_keys);
@ -5817,6 +5823,7 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list,
  join->sort_by_table= get_sort_by_table(join->order, join->group_list,
                                         join->select_lex->leaf_tables,
                                         join->const_table_map);
+  join->limit_shortcut_applicable= join_limit_shortcut_is_applicable(join);
  /* 
    Update info on indexes that can be used for search lookups as
    reading const tables may has added new sargable predicates. 
@ -9175,6 +9182,7 @@ choose_plan(JOIN *join, table_map join_tables)
  THD *thd= join->thd;
  DBUG_ENTER("choose_plan");

+  join->limit_optimization_mode= false;
  join->cur_embedding_map= 0;
  reset_nj_counters(join, join->join_list);
  qsort2_cmp jtab_sort_func;
@ -9232,9 +9240,45 @@ choose_plan(JOIN *join, table_map join_tables)
    if (search_depth == 0)
      /* Automatically determine a reasonable value for 'search_depth' */
      search_depth= determine_search_depth(join);
+
+    double limit_cost= DBL_MAX;
+    POSITION *limit_plan= NULL;
+
+    /* First, build a join plan that can short-cut ORDER BY...LIMIT */
+    if (join->limit_shortcut_applicable && !join->emb_sjm_nest)
+    {
+      bool res;
+      Json_writer_object wrapper(join->thd);
+      Json_writer_array trace(join->thd, "join_limit_shortcut_plan_search");
+      join->limit_optimization_mode= true;
+      res= greedy_search(join, join_tables, search_depth, prune_level,
+                         use_cond_selectivity);
+      join->limit_optimization_mode= false;
+
+      if (res)
+        DBUG_RETURN(TRUE);
+      DBUG_ASSERT(join->best_read != DBL_MAX);
+
+      /*
+        We've built a join order. Adjust its cost based on ORDER BY...LIMIT
+        short-cutting.
+      */
+      limit_plan= join_limit_shortcut_finalize_plan(join, &limit_cost);
+    }
+
+    /* The main call to search for the query plan: */
    if (greedy_search(join, join_tables, search_depth, prune_level,
                      use_cond_selectivity))
      DBUG_RETURN(TRUE);
+
+    DBUG_ASSERT(join->best_read != DBL_MAX);
+    if (limit_plan && limit_cost < join->best_read)
+    {
+      /* Plan that uses ORDER BY ... LIMIT shortcutting is better. */
+      memcpy((uchar*)join->best_positions, (uchar*)limit_plan,
+             sizeof(POSITION)*join->table_count);
+      join->best_read= limit_cost;
+    }
  }

  /* 
@ -10344,6 +10388,311 @@ check_if_edge_table(POSITION *pos,
 }


+/*
+  @brief
+    Check if it is potentally possible to short-cut the JOIN execution due to
+    ORDER BY ... LIMIT clause
+
+  @detail
+    It is possible when the join has "ORDER BY ... LIMIT n" clause, and the
+    sort+limit operation is done right after the join operation (there's no
+    grouping or DISTINCT in between).
+    Then we can potentially build a join plan that enumerates rows in the
+    ORDER BY order and so will be able to terminate as soon as it has produced
+    #limit rows.
+
+    Note that it is not a requirement that sort_by_table has an index that
+    matches ORDER BY. If it doesn't have one, the optimizer will pass
+    sort_by_table to filesort. Reading from sort_by_table won't use
+    short-cutting but the rest of the join will.
+*/
+
+static
+bool join_limit_shortcut_is_applicable(const JOIN *join)
+{
+  /*
+    Any post-join operation like GROUP BY or DISTINCT or window functions
+    means we cannot short-cut join execution
+  */
+  if (!join->thd->variables.optimizer_join_limit_pref_ratio ||
+      !join->order ||
+      join->select_limit == HA_POS_ERROR ||
+      join->group_list ||
+      join->select_distinct ||
+      join->select_options & SELECT_BIG_RESULT ||
+      join->rollup.state != ROLLUP::STATE_NONE ||
+      join->select_lex->have_window_funcs() ||
+      join->select_lex->with_sum_func)
+  {
+    return false;
+  }
+
+  /*
+    Cannot do short-cutting if
+    (1) ORDER BY refers to more than one table or
+    (2) the table it refers to cannot be first table in the join order
+  */
+  if (!join->sort_by_table ||                           // (1)
+      join->sort_by_table->reginfo.join_tab->dependent) // (2)
+    return false;
+
+  Json_writer_object wrapper(join->thd);
+  Json_writer_object trace(join->thd, "join_limit_shortcut_is_applicable");
+  trace.add("applicable", 1);
+  /* It looks like we can short-cut limit due to join */
+  return true;
+}
+
+
+/*
+  @brief
+    Check if we could use an index-based access method to produce rows
+    in the order for ORDER BY ... LIMIT.
+
+  @detail
+  This should do what test_if_skip_sort_order() does. We can't use that
+  function directly, because:
+
+  1. We're at the join optimization stage and have not done query plan
+     fix-ups done in get_best_combination() and co.
+
+  2. The code in test_if_skip_sort_order() does modify query plan structures,
+     for example it may change the table's quick select. This is done even if
+     it's called with no_changes=true parameter.
+
+  @param  access_method_changed  OUT Whether the function changed the access
+                                     method to get rows in desired order.
+  @param  new_access_cost        OUT if access method changed: its cost.
+
+  @return
+    true  - Can skip sorting
+    false - Cannot skip sorting
+*/
+
+bool test_if_skip_sort_order_early(JOIN *join,
+                                   bool *access_method_changed,
+                                   double *new_access_cost)
+{
+  const POSITION *pos= &join->best_positions[join->const_tables];
+  TABLE *table= pos->table->table;
+  key_map usable_keys= table->keys_in_use_for_order_by;
+
+  *access_method_changed= false;
+
+  // Step #1: Find indexes that produce the required ordering.
+  if (find_indexes_matching_order(join, table, join->order, &usable_keys))
+  {
+    return false; // Cannot skip sorting
+  }
+
+  // Step #2: Check if the index we're using produces the needed ordering
+  uint ref_key;
+  if (pos->key)
+  {
+    // Mirror the (wrong) logic in test_if_skip_sort_order:
+    if (pos->spl_plan || pos->type == JT_REF_OR_NULL)
+      return false; // Use filesort
+
+    ref_key= pos->key->key;
+  }
+  else
+  {
+    if (pos->table->quick)
+    {
+      if (pos->table->quick->get_type() == QUICK_SELECT_I::QS_TYPE_RANGE)
+        ref_key= pos->table->quick->index;
+      else
+        ref_key= MAX_KEY;
+    }
+    else
+      ref_key= MAX_KEY;
+  }
+
+  if (ref_key != MAX_KEY && usable_keys.is_set(ref_key))
+  {
+    return true;  // we're using an index that produces the reqired ordering.
+  }
+
+  /*
+    Step #3: check if we can switch to using an index that would produce the
+    ordering.
+    (But don't actually switch, this will be done by test_if_skip_sort_order)
+  */
+  int best_key= -1;
+  uint UNINIT_VAR(best_key_parts);
+  uint saved_best_key_parts= 0;
+  int best_key_direction= 0;
+  JOIN_TAB *tab= pos->table;
+  ha_rows new_limit;
+  double new_read_time;
+  if (test_if_cheaper_ordering(/*in_join_optimizer */TRUE,
+                               tab, join->order, table, usable_keys,
+                               ref_key, join->select_limit,
+                               &best_key, &best_key_direction,
+                               &new_limit, &new_read_time,
+                               &best_key_parts,
+                               &saved_best_key_parts))
+  {
+    // Ok found a way to skip sorting
+    *access_method_changed= true;
+    *new_access_cost= new_read_time;
+    return true;
+  }
+
+  return false;
+}
+
+
+/*
+  Compute the cost of join assuming we only need fraction of the output.
+*/
+
+double recompute_join_cost_with_limit(const JOIN *join, bool skip_sorting,
+                                      double *first_table_cost,
+                                      double fraction)
+{
+  POSITION *pos= join->best_positions + join->const_tables;
+  /*
+    Generally, we assume that producing X% of output takes X% of the cost.
+  */
+  double partial_join_cost= join->best_read * fraction;
+
+  if (skip_sorting)
+  {
+    /*
+      First table produces rows in required order. Two options:
+
+      A. first_table_cost=NULL means we use whatever access method the join
+        optimizer has picked. Its cost was included in join->best_read and
+        we've already took a fraction of it.
+
+      B. first_table_cost!=NULL means we will need to switch to another access
+        method, we have the cost to read rows to produce #LIMIT rows in join
+        output.
+    */
+    if (first_table_cost)
+    {
+      /*
+        Subtract the remainder of the first table's cost we had in
+        join->best_read:
+      */
+      partial_join_cost -= pos->read_time*fraction;
+      partial_join_cost -= pos->records_read*fraction / TIME_FOR_COMPARE;
+
+      /* Add the cost of the new access method we've got: */
+      partial_join_cost= COST_ADD(partial_join_cost, *first_table_cost);
+    }
+  }
+  else
+  {
+    DBUG_ASSERT(!first_table_cost);
+    /*
+      Cannot skip sorting. We read the first table entirely, then sort it.
+
+      partial_join_cost includes pos->read_time*fraction. Add to it
+      pos->read_time*(1-fraction) so we have the cost to read the entire first
+      table.  Do the same for costs of checking the WHERE.
+    */
+    double extra_first_table_cost= pos->read_time * (1.0 - fraction);
+    double extra_first_table_where= pos->records_read * (1.0 - fraction) /
+                                      TIME_FOR_COMPARE;
+
+    partial_join_cost= COST_ADD(partial_join_cost,
+                            COST_ADD(extra_first_table_cost,
+                                     extra_first_table_where));
+  }
+  return partial_join_cost;
+}
+
+
+/*
+  @brief
+    Finalize building the join order which allows to short-cut the join
+    execution.
+
+  @detail
+    This is called after we have produced a join order that allows short-
+    cutting.
+    Here, we decide if it is cheaper to use this one or the original join
+    order.
+*/
+
+POSITION *join_limit_shortcut_finalize_plan(JOIN *join, double *cost)
+{
+  Json_writer_object wrapper(join->thd);
+  Json_writer_object trace(join->thd, "join_limit_shortcut_choice");
+
+  double fraction= join->select_limit / join->join_record_count;
+  trace.add("limit_fraction", fraction);
+
+  /* Check which fraction of join output we need */
+  if (fraction >= 1.0)
+  {
+    trace.add("skip_adjustment", "no short-cutting");
+    return NULL;
+  }
+
+  /*
+    Check if the first table's access method produces the required ordering.
+    Possible options:
+    1. Yes: we can just take a fraction of the execution cost.
+    2A No: change the access method to one that does produce the required
+           ordering, update the costs.
+    2B No: Need to pass the first table to filesort().
+  */
+  bool skip_sorting;
+  bool access_method_changed;
+  double new_access_cost;
+  {
+    Json_writer_array tmp(join->thd, "test_if_skip_sort_order_early");
+    skip_sorting= test_if_skip_sort_order_early(join,
+                                                &access_method_changed,
+                                                &new_access_cost);
+  }
+  trace.add("can_skip_filesort", skip_sorting);
+
+  double cost_with_shortcut=
+    recompute_join_cost_with_limit(join, skip_sorting,
+                                   access_method_changed ?
+                                     &new_access_cost : (double*)0,
+                                   fraction);
+  double risk_ratio=
+    (double)join->thd->variables.optimizer_join_limit_pref_ratio;
+  trace.add("full_join_cost", join->best_read);
+  trace.add("risk_ratio", risk_ratio);
+  trace.add("shortcut_join_cost", cost_with_shortcut);
+  cost_with_shortcut *= risk_ratio;
+  trace.add("shortcut_cost_with_risk", cost_with_shortcut);
+  if (cost_with_shortcut < join->best_read)
+  {
+    trace.add("use_shortcut_cost", true);
+    POSITION *pos= (POSITION*)memdup_root(join->thd->mem_root,
+                                          join->best_positions,
+                                          sizeof(POSITION)*
+                                          (join->table_count + 1));
+    *cost= cost_with_shortcut;
+    return pos;
+  }
+  trace.add("use_shortcut_cost", false);
+  return NULL;
+}
+
+
+/*
+  @brief
+    If we're in Limit Optimization Mode, allow only join->sort_by_table as
+    the first table in the join order
+*/
+
+static
+bool join_limit_shortcut_allows_table(const JOIN *join, uint idx, JOIN_TAB *s)
+{
+  if (join->limit_optimization_mode && idx == join->const_tables)
+    return (join->sort_by_table == s->table);
+  return true;
+}
+
+
 /**
  Find a good, possibly optimal, query execution plan (QEP) by a possibly
  exhaustive search.
@ -10520,7 +10869,8 @@ best_extension_by_limited_search(JOIN      *join,

    if ((allowed_tables & real_table_bit) &&
        !(remaining_tables & s->dependent) &&
-        !check_interleaving_with_nj(s))
+        !check_interleaving_with_nj(s) &&
+        join_limit_shortcut_allows_table(join, idx, s))
    {
      double current_record_count, current_read_time;
      double partial_join_cardinality;
@ -24790,7 +25140,8 @@ static void prepare_for_reverse_ordered_access(JOIN_TAB *tab)
  @brief
    Given a table and order, find indexes that produce rows in the order

-  @param  usable_keys OUT  Bitmap of indexes that produce rows in order.
+  @param  usable_keys  IN   Bitmap of keys we can use
+                       OUT  Bitmap of indexes that produce rows in order.

  @return
     false  Some indexes were found
@ -25089,11 +25440,13 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
    int best_key_direction= 0;
    JOIN *join= tab->join;
    ha_rows table_records= table->stat_records();
+    double new_read_time_dummy;

-    test_if_cheaper_ordering(tab, order, table, usable_keys,
+    test_if_cheaper_ordering(FALSE, tab, order, table, usable_keys,
                             ref_key, select_limit,
                             &best_key, &best_key_direction,
-                             &select_limit, &best_key_parts,
+                             &select_limit, &new_read_time_dummy,
+                             &best_key_parts,
                             &saved_best_key_parts);

    /*
@ -30324,11 +30677,13 @@ static bool get_range_limit_read_cost(const JOIN_TAB *tab,
 */

 static bool
-test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
+test_if_cheaper_ordering(bool in_join_optimizer,
+                         const JOIN_TAB *tab, ORDER *order, TABLE *table,
                         key_map usable_keys,  int ref_key,
                         ha_rows select_limit_arg,
                         int *new_key, int *new_key_direction,
-                         ha_rows *new_select_limit, uint *new_used_key_parts,
+                         ha_rows *new_select_limit, double *new_read_time,
+                         uint *new_used_key_parts,
                         uint *saved_best_key_parts)
 {
  DBUG_ENTER("test_if_cheaper_ordering");
@ -30386,7 +30741,7 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,

  if (join)
  {
-    uint tablenr= (uint)(tab - join->join_tab);
+    uint tablenr= join->const_tables;
    read_time= join->best_positions[tablenr].read_time;
    records= join->best_positions[tablenr].records_read;
    for (uint i= tablenr+1; i < join->table_count; i++)
@ -30424,12 +30779,27 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
    Calculate the selectivity of the ref_key for REF_ACCESS. For
    RANGE_ACCESS we use table->opt_range_condition_rows.
  */
-  if (ref_key >= 0 && ref_key != MAX_KEY && tab->type == JT_REF)
+  if (in_join_optimizer)
+  {
+    if (ref_key >= 0 && ref_key != MAX_KEY &&
+        join->best_positions[join->const_tables].type == JT_REF)
+    {
+      refkey_rows_estimate=
+        (ha_rows)join->best_positions[join->const_tables].records_read;
+      set_if_bigger(refkey_rows_estimate, 1);
+    }
+  }
+  else if (ref_key >= 0 && ref_key != MAX_KEY && tab->type == JT_REF)
  {
    /*
      If ref access uses keypart=const for all its key parts,
      and quick select uses the same # of key parts, then they are equivalent.
      Reuse #rows estimate from quick select as it is more precise.
+
+      Note: we could just have used
+        join->best_positions[join->const_tables].records_read
+      here. That number was computed in best_access_path() and it already
+      includes adjustments based on table->opt_range[ref_key].rows.
    */
    if (tab->ref.const_ref_part_map ==
        make_prev_keypart_map(tab->ref.key_parts) &&
@ -30755,6 +31125,7 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
  *new_key= best_key;
  *new_key_direction= best_key_direction;
  *new_select_limit= has_limit ? best_select_limit : table_records;
+  *new_read_time= read_time;
  if (new_used_key_parts != NULL)
    *new_used_key_parts= best_key_parts;
  DBUG_RETURN(TRUE);
@ -30854,10 +31225,11 @@ uint get_index_for_order(ORDER *order, TABLE *table, SQL_SELECT *select,
    table->opt_range_condition_rows= table->stat_records();
    
    int key, direction;
-    if (test_if_cheaper_ordering(NULL, order, table,
+    double new_cost;
+    if (test_if_cheaper_ordering(FALSE, NULL, order, table,
                                 table->keys_in_use_for_order_by, -1,
                                 limit,
-                                 &key, &direction, &limit) &&
+                                 &key, &direction, &limit, &new_cost) &&
        !is_key_used(table, key, table->write_set))
    {
      *need_sort= FALSE;
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@ -1205,6 +1205,20 @@ public:
    passing 1st non-const table to filesort(). NULL means no such table exists.
  */
  TABLE    *sort_by_table;
+
+  /*
+    If true, there is ORDER BY x LIMIT n clause and for certain join orders, it
+    is possible to short-cut the join execution, i.e. stop it as soon as n
+    output rows were produced. See join_limit_shortcut_is_applicable().
+  */
+  bool    limit_shortcut_applicable;
+
+  /*
+    Used during join optimization: if true, we're building a join order that
+    will short-cut join execution as soon as #LIMIT rows are produced.
+  */
+  bool    limit_optimization_mode;
+
  /* 
    Number of tables in the join. 
    (In MySQL, it is named 'tables' and is also the number of elements in 
--- a/sql/sys_vars.cc
+++ b/sql/sys_vars.cc
@ -2702,6 +2702,20 @@ static Sys_var_ulong Sys_optimizer_selectivity_sampling_limit(
       VALID_RANGE(SELECTIVITY_SAMPLING_THRESHOLD, UINT_MAX),
       DEFAULT(SELECTIVITY_SAMPLING_LIMIT), BLOCK_SIZE(1));

+static Sys_var_ulonglong Sys_optimizer_join_limit_pref_ratio(
+       "optimizer_join_limit_pref_ratio",
+       "For queries with JOIN and ORDER BY LIMIT : make the optimizer "
+       "consider a join order that allows to short-cut execution after "
+       "producing #LIMIT matches if that promises N times speedup. "
+       "(A conservative setting here would be is a high value, like 100 so "
+       "the short-cutting plan is used if it promises a speedup of 100x or "
+       "more). Short-cutting plans are inherently risky so the default is 0 "
+       "which means do not consider this optimization",
+       SESSION_VAR(optimizer_join_limit_pref_ratio),
+       CMD_LINE(REQUIRED_ARG),
+       VALID_RANGE(0, UINT_MAX),
+       DEFAULT(0), BLOCK_SIZE(1));
+
 static Sys_var_ulong Sys_optimizer_use_condition_selectivity(
       "optimizer_use_condition_selectivity",
       "Controls selectivity of which conditions the optimizer takes into "