1
0
mirror of https://github.com/MariaDB/server.git synced 2025-07-30 16:24:05 +03:00

MDEV-34720: Poor plan choice for large JOIN with ORDER BY and small LIMIT

(Variant 2b: call greedy_search() twice, correct handling for limited
 search_depth)

Modify the join optimizer to specifically try to produce join orders that
can short-cut their execution for ORDER BY..LIMIT clause.

The optimization is controlled by @@optimizer_join_limit_pref_ratio.
Default value 0 means don't construct short-cutting join orders.
Other value means construct short-cutting join order, and prefer it only
if it promises speedup of more than #value times.

In Optimizer Trace, look for these names:
* join_limit_shortcut_is_applicable
* join_limit_shortcut_plan_search
* join_limit_shortcut_choice
This commit is contained in:
Sergei Petrunia
2024-08-18 20:10:19 +03:00
parent 819765a47d
commit c8d040938a
9 changed files with 1110 additions and 11 deletions

View File

@ -0,0 +1,207 @@
--echo #
--echo # MDEV-34720: Poor plan choice for large JOIN with ORDER BY and small LIMIT
--echo #
--source include/have_sequence.inc
# We need optimizer trace
--source include/not_embedded.inc
create table t1 (
a int,
b int,
c int,
col1 int,
col2 int,
index(a),
index(b),
index(col1)
);
insert into t1 select
mod(seq, 100),
mod(seq, 95),
seq,
seq,
seq
from
seq_1_to_10000;
create table t10 (
a int,
a_value char(10),
key(a)
);
insert into t10 select seq, seq from seq_1_to_100;
create table t11 (
b int,
b_value char(10),
key(b)
);
insert into t11 select seq, seq from seq_1_to_100;
set @tmp_os=@@optimizer_trace;
set optimizer_trace=1;
--echo #
--echo # Query 1 - basic example.
--echo #
let $query= explain
select
*
from
t1
join t10 on t1.a=t10.a
join t11 on t1.b=t11.b
order by
t1.col1
limit 10;
--echo # Table t1 is not the first, have to use temporary+filesort:
eval $query;
set optimizer_join_limit_pref_ratio=10;
--echo # t1 is first, key=col1 produces ordering, no filesort or temporary:
eval $query;
set @trace=(select trace from information_schema.optimizer_trace);
select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS;
--echo #
--echo # Query 2 - same as above but without a suitable index.
--echo #
let $query=
explain
select
*
from
t1
join t10 on t1.a=t10.a
join t11 on t1.b=t11.b
order by
t1.col2
limit 10;
--echo # Table t1 is not the first, have to use temporary+filesort:
set optimizer_join_limit_pref_ratio=0;
eval $query;
--echo # t1 is first but there's no suitable index,
--echo # so we use filesort but using temporary:
set optimizer_join_limit_pref_ratio=10;
eval $query;
set @trace=(select trace from information_schema.optimizer_trace);
select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS;
--echo #
--echo # Query 3: Counter example with large limit
--echo #
let $query= explain
select
*
from
t1
join t10 on t1.a=t10.a
join t11 on t1.b=t11.b
order by
t1.col1
limit 5000;
--echo # Table t1 is not the first, have to use temporary+filesort:
set optimizer_join_limit_pref_ratio=0;
eval $query;
--echo # Same plan as above:
--echo # Table t1 is not the first, have to use temporary+filesort:
set optimizer_join_limit_pref_ratio=10;
eval $query;
set @trace=(select trace from information_schema.optimizer_trace);
select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS;
--echo #
--echo # Query 4: LEFT JOIN makes it impossible to put ORDER-BY-table first,
--echo # however the optimizer still puts it as sort_by_table.
--echo #
set optimizer_join_limit_pref_ratio=10;
explain
select
*
from
t10 left join (t1 join t11 on t1.b=t11.b ) on t1.a=t10.a
order by
t1.col2
limit 10;
set @trace=(select trace from information_schema.optimizer_trace);
--echo # This will show nothing as limit shortcut code figures that
--echo # it's not possible to use t1 to construct shortcuts:
select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS;
--echo #
--echo # Query 5: Same as Q1 but also with a semi-join
--echo #
set optimizer_join_limit_pref_ratio=default;
let $query= explain
select
*
from
t1
join t10 on t1.a=t10.a
join t11 on t1.b=t11.b
where
t1.a in (select a from t10) and
t1.b in (select b from t11)
order by
t1.col1
limit 10;
--echo # Table t1 is not the first, have to use temporary+filesort:
eval $query;
set optimizer_join_limit_pref_ratio=10;
--echo # t1 is first, key=col1 produces ordering, no filesort or temporary:
eval $query;
set @trace=(select trace from information_schema.optimizer_trace);
select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS;
--echo #
--echo # Query 6: same as Query 1 but let's limit the search depth
--echo #
set @tmp_osd=@@optimizer_search_depth;
set optimizer_search_depth=1;
let $query= explain
select
*
from
t1
join t10 on t1.a=t10.a
join t11 on t1.b=t11.b
order by
t1.col1
limit 10;
set optimizer_join_limit_pref_ratio=default;
--echo # Table t1 is not the first, have to use temporary+filesort:
eval $query;
set optimizer_join_limit_pref_ratio=10;
--echo # t1 is first, key=col1 produces ordering, no filesort or temporary:
eval $query;
set @trace=(select trace from information_schema.optimizer_trace);
select json_detailed(json_extract(@trace, '$**.join_limit_shortcut_choice')) as JS;
set optimizer_search_depth=@tmp_osd;
set optimizer_trace=@tmp_os;
set optimizer_join_limit_pref_ratio=default;
drop table t1, t10, t11;