1
0
mirror of https://github.com/MariaDB/server.git synced 2025-07-29 05:21:33 +03:00

Added EQ_REF chaining to the greedy_optimizer

MDEV-28073 Slow query performance in MariaDB when using many table

The idea is to prefer and chain EQ_REF tables (tables that uses an
unique key to find a row) when searching for the best table combination.
This significantly reduces row combinations that has to be examined.
This is optimization is enabled when setting optimizer_prune_level=2
(which is now default).

Implementation:
- optimizer_prune_level has a new level, 2, which enables EQ_REF
  optimization in addition to the pruning done by level 1.
  Level 2 is now default.
- Added JOIN::eq_ref_tables that contains bits of tables that could use
  potentially use EQ_REF access in the query.  This is calculated
  in sort_and_filter_keyuse()

Under optimizer_prune_level=2:
- When the greedy_optimizer notices that the preceding table was an
  EQ_REF table, it tries to add an EQ_REF table next. If an EQ_REF
  table exists, only this one will be considered at this level.
  We also collect all EQ_REF tables chained by the next levels and these
  are ignored on the starting level as we have already examined these.
  If no EQ_REF table exists, we continue as normal.

This optimization speeds up the greedy_optimizer combination test with
~25%

Other things:
- I ported the changes in MySQL 5.7 to greedy_optimizer.test to MariaDB
  to be able to ensure we can handle all cases that MySQL can do.
- I have run all tests with --mysqld=--optimizer_prune_level=1 to verify that
  there where no test changes.
This commit is contained in:
Monty
2022-06-02 19:47:23 +03:00
committed by Oleg Smirnov
parent 6e7376eb59
commit 515b9ad05a
33 changed files with 6187 additions and 2582 deletions

View File

@ -1,3 +1,5 @@
--source include/have_innodb.inc
#
# A simple test of the greedy query optimization algorithm and the switches that
# control the optimizationprocess.
@ -248,7 +250,7 @@ explain select t1.c11 from t7, t6, t5, t4, t3, t2, t1 where t1.c11 = t2.c21 and
show status like 'Last_query_cost';
set optimizer_prune_level=1;
set optimizer_prune_level=2;
select @@optimizer_prune_level;
set optimizer_search_depth=0;
@ -381,6 +383,573 @@ LEFT JOIN (
SET optimizer_search_depth = DEFAULT;
DROP TABLE t1,t2,t2_1,t3,t3_1,t4,t4_1,t5,t5_1;
set join_cache_level=@save_join_cache_level;
--echo End of 5.0 tests
set join_cache_level=@save_join_cache_level;
--echo #
--echo # Bug #59326: Greedy optimizer produce stupid query execution plans.
--echo #
CREATE TABLE t10(
K INT NOT NULL AUTO_INCREMENT,
I INT,
PRIMARY KEY(K)
);
INSERT INTO t10(I) VALUES (1),(2),(3),(4),(5),(6),(7),(8),(9),(0);
CREATE TABLE t100 LIKE t10;
INSERT INTO t100(I)
SELECT X.I FROM t10 AS X,t10 AS Y;
CREATE TABLE t10000 LIKE t10;
INSERT INTO t10000(I)
SELECT X.I FROM t100 AS X, t100 AS Y;
--disable_warnings
let $total_handler_reads=
select sum(variable_value) from information_schema.session_status
where VARIABLE_NAME like 'Handler_read%';
--enable_warnings
## All crossproducts should be executed in order t10,t100,t10000
EXPLAIN SELECT * FROM t10,t100,t10000;
EXPLAIN SELECT * FROM t10,t10000,t100;
EXPLAIN SELECT * FROM t100,t10,t10000;
EXPLAIN SELECT * FROM t100,t10000,t10;
EXPLAIN SELECT * FROM t10000,t10,t100;
EXPLAIN SELECT * FROM t10000,t100,t10;
######
## Ordering between T100,T10000 EQ-joined T10 will
## normally be with smallest EQ-table joined first
######
let $query=
SELECT STRAIGHT_JOIN COUNT(*) FROM t10,t100,t10000
WHERE t100.K=t10.I
AND t10000.K=t10.I;
--source include/expect_qep.inc
## However, swapping EQ_REF-joined tables gives the same cost
let $query=
SELECT STRAIGHT_JOIN COUNT(*) FROM t10,t10000,t100
WHERE t100.K=t10.I
AND t10000.K=t10.I;
--source include/check_qep.inc
#####
# Expect all variants of EQ joining t100 & t10000 with T10
# to have same cost # handler_reads:
let $query=
SELECT COUNT(*) FROM t10,t100,t10000
WHERE t100.K=t10.I
AND t10000.K=t10.I;
--source include/check_qep.inc
let $query=
SELECT COUNT(*) FROM t10,t10000,t100
WHERE t100.K=t10.I
AND t10000.K=t10.I;
--source include/check_qep.inc
let $query=
SELECT COUNT(*) FROM t10,t100,t10000
WHERE t100.K=t10.I
AND t10000.K=t10.K;
--source include/check_qep.inc
let $query=
SELECT COUNT(*) FROM t10,t10000,t100
WHERE t100.K=t10.I
AND t10000.K=t10.K;
--source include/check_qep.inc
let $query=
SELECT COUNT(*) FROM t100,t10,t10000
WHERE t100.K=t10.I
AND t10000.K=t10.K;
--source include/check_qep.inc
let $query=
SELECT COUNT(*) FROM t100,t10000,t10
WHERE t100.K=t10.I
AND t10000.K=t10.K;
--source include/check_qep.inc
let $query=
SELECT COUNT(*) FROM t10000,t10,t100
WHERE t100.K=t10.I
AND t10000.K=t10.K;
--source include/check_qep.inc
let $query=
SELECT COUNT(*) FROM t10000,t100,t10
WHERE t100.K=t10.I
AND t10000.K=t10.K;
--source include/check_qep.inc
#####
## EQ_REF Should be executed before table scan(ALL)
## - Independent of #records in table being EQ_REF-joined
#####
#####
# Expect: Join EQ_REF(t100) before ALL(t10000)
let $query=
SELECT STRAIGHT_JOIN COUNT(*) FROM t10,t100,t10000
WHERE t100.K=t10.I
AND t10000.I=t10.I;
--source include/expect_qep.inc
let $query=
SELECT COUNT(*) FROM t10,t100,t10000
WHERE t100.K=t10.I
AND t10000.I=t10.I;
--source include/check_qep.inc
let $query=
SELECT COUNT(*) FROM t10,t10000,t100
WHERE t100.K=t10.I
AND t10000.I=t10.I;
--source include/check_qep.inc
#####
# Expect: Join EQ_REF(t10000) before ALL(t100) (star-join)
let $query=
SELECT STRAIGHT_JOIN COUNT(*) FROM t10,t10000,t100
WHERE t100.I=t10.I
AND t10000.K=t10.I;
--source include/expect_qep.inc
--echo # See BUG#18352936
let $query=
SELECT COUNT(*) FROM t10,t100,t10000
WHERE t100.I=t10.I
AND t10000.K=t10.I;
--source include/check_qep.inc
--echo # See BUG#18352936
let $query=
SELECT COUNT(*) FROM t10,t10000,t100
WHERE t100.I=t10.I
AND t10000.K=t10.I;
--source include/check_qep.inc
#####
# Expect: Join EQ_REF(t10000) before ALL(t100)
let $query=
SELECT STRAIGHT_JOIN COUNT(*) FROM t10,t10000,t100
WHERE t100.I=t10.I
AND t10000.K=t100.I;
--source include/expect_qep.inc
let $query=
SELECT COUNT(*) FROM t10,t100,t10000
WHERE t100.I=t10.I
AND t10000.K=t100.I;
--source include/check_qep.inc
let $query=
SELECT COUNT(*) FROM t10,t10000,t100
WHERE t100.I=t10.I
AND t10000.K=t100.I;
--source include/check_qep.inc
#####
## EQ_REF & ALL join two instances of t10000 with t10:
## Always EQ_REF join first before producing cross product
#####
#####
# Expected QEP: 'join EQ_REF(X) on X.K=t10.I' before 'cross' ALL(Y)
let $query=
SELECT STRAIGHT_JOIN COUNT(*) FROM t10,t10000 x,t10000 y
WHERE x.k=t10.i;
--source include/expect_qep.inc
let $query=
SELECT COUNT(*) FROM t10,t10000 x,t10000 y
WHERE x.k=t10.i;
--source include/check_qep.inc
let $query=
SELECT COUNT(*) FROM t10,t10000 y,t10000 x
WHERE x.k=t10.i;
--source include/check_qep.inc
#####
# Expected QEP: 'join EQ_REF(X) on X.K=t10.I' before ALL(Y)
let $query=
SELECT STRAIGHT_JOIN COUNT(*) FROM t10,t10000 x,t10000 y
WHERE x.k=t10.i
AND y.i=t10.i;
--source include/expect_qep.inc
let $query=
SELECT COUNT(*) FROM t10,t10000 x,t10000 y
WHERE x.k=t10.i
AND y.i=t10.i;
--source include/check_qep.inc
let $query=
SELECT COUNT(*) FROM t10,t10000 y,t10000 x
WHERE x.k=t10.i
AND y.i=t10.i;
--source include/check_qep.inc
#####
# Expected QEP: 'join EQ_REF(X) on X.K=t10.I' before ALL(Y)
let $query=
SELECT STRAIGHT_JOIN COUNT(*) FROM t10,t10000 x,t10000 y
WHERE x.k=t10.i
AND y.i=x.k;
--source include/expect_qep.inc
let $query=
SELECT COUNT(*) FROM t10,t10000 x,t10000 y
WHERE x.k=t10.i
AND y.i=x.k;
--source include/check_qep.inc
let $query=
SELECT COUNT(*) FROM t10,t10000 y,t10000 x
WHERE x.k=t10.i
AND y.i=x.k;
--source include/check_qep.inc
## Create indexes to test REF access
CREATE INDEX IX ON t10(I);
CREATE INDEX IX ON t100(I);
CREATE INDEX IX ON t10000(I);
########
## EQ_REF Should be executed before 'REF'
## - Independent of #records in table being EQ_REF-joined
####
# Expected QEP: 'join EQ_REF(t100) on t100.K=t10.I' before REF(t10000)
let $query=
SELECT STRAIGHT_JOIN COUNT(*) FROM t10,t100,t10000
WHERE t100.K=t10.I
AND t10000.I=t10.I;
--source include/expect_qep.inc
let $query=
SELECT COUNT(*) FROM t10,t100,t10000
WHERE t100.K=t10.I
AND t10000.I=t10.I;
--source include/check_qep.inc
let $query=
SELECT COUNT(*) FROM t10,t10000,t100
WHERE t100.K=t10.I
AND t10000.I=t10.I;
--source include/check_qep.inc
#####
## EQ_REF & REF join two instances of t10000 with t10:
#####
#####
## Expect this QEP, cost & #handler_read
# Expected QEP: 'join EQ_REF(X) on X.K=t10.I' before 'cross' ALL(Y)
let $query=
SELECT STRAIGHT_JOIN COUNT(*) FROM t10,t10000 x,t10000 y
WHERE x.k=t10.i;
--source include/expect_qep.inc
let $query=
SELECT COUNT(*) FROM t10,t10000 x,t10000 y
WHERE x.k=t10.i;
--source include/check_qep.inc
let $query=
SELECT COUNT(*) FROM t10,t10000 y,t10000 x
WHERE x.k=t10.i;
--source include/check_qep.inc
#####
# Expected QEP: 'join EQ_REF(X) on X.K=t10.I' before REF(Y)
let $query=
SELECT STRAIGHT_JOIN COUNT(*) FROM t10,t10000 x,t10000 y
WHERE x.k=t10.i
AND y.i=t10.i;
--source include/expect_qep.inc
let $query=
SELECT COUNT(*) FROM t10,t10000 x,t10000 y
WHERE x.k=t10.i
AND y.i=t10.i;
--source include/check_qep.inc
let $query=
SELECT COUNT(*) FROM t10,t10000 y,t10000 x
WHERE x.k=t10.i
AND y.i=t10.i;
--source include/check_qep.inc
#####
# Expected QEP: 'join EQ_REF(X) on X.K=t10.I' before REF(Y)
let $query=
SELECT STRAIGHT_JOIN COUNT(*) FROM t10,t10000 x,t10000 y
WHERE x.k=t10.i
AND y.i=x.k;
--source include/expect_qep.inc
let $query=
SELECT COUNT(*) FROM t10,t10000 x,t10000 y
WHERE x.k=t10.i
AND y.i=x.k;
--source include/check_qep.inc
let $query=
SELECT COUNT(*) FROM t10,t10000 y,t10000 x
WHERE x.k=t10.i
AND y.i=x.k;
--source include/check_qep.inc
########
########
--echo #
--echo # Test improved capabilities of analyzing complex query
--echo # plans without restricting 'optimizer_search_depth'.
--echo # Fix problems like those reported as bug#41740 & bug#58225.
--echo #
--echo # EPLAIN of queries using T1-T62 will timeout/hang wo/ fixes
--echo #
DROP TABLE t10, t10000;
--disable_result_log
let $tabledef=
( K INT NOT NULL AUTO_INCREMENT,
I INT,
A INT,
PRIMARY KEY(K), KEY IX(A)
) engine = InnoDB;
let $analyze = ANALYZE TABLE t100;
let $i= 1;
while ($i < 62)
{
let $create= CREATE TABLE t$i $tabledef;
eval $create;
let $insert =
INSERT INTO t$i(I,A) SELECT X.K,X.K FROM t100 AS X, t100 AS Y WHERE X.K < 20 AND Y.K <= $i;
eval $insert;
let $analyze = $analyze, t$i;
inc $i;
}
eval $analyze;
set optimizer_prune_level=default;
#--enable_result_log
#select @@optimizer_prune_level;
#--disable_result_log
flush status;
#################
## The EXPLAIN'ed query itself can't be part of the verified
## result as the QEP is not 100% predictable due to variation
## in statistics from the engines. This is believed to be
## caused by:
## - Variations in table fill degree.
## - 'Fuzzy' statistics provided by engines.
## - Round errors caused by 'cost' calculation using
## 'only' 64-bit double precision.
## - Other bugs...?
##
###############
## Will test with optimizer_search_depth= [0,1,3,62]
let $depth= 0;
while ($depth<4)
{
if ($depth==0)
{
set optimizer_search_depth=0;
}
if ($depth==1)
{
set optimizer_search_depth=1;
}
if ($depth==2)
{
set optimizer_search_depth=3;
}
if ($depth==3)
{
set optimizer_search_depth=62;
}
inc $depth;
## Test pruning of joined table scans (ALL)
## Prepare of QEP without timeout is heavily dependent
## on maintaining correctly '#rows-sorted' plan
##
let $query= SELECT COUNT(*) FROM t1 AS x;
let $i= 1;
while ($i < 61)
{
let $query= $query JOIN t$i ON t$i.I=x.I;
inc $i;
select @@optimizer_prune_level;
select @@optimizer_search_depth;
eval EXPLAIN $query;
}
## Test pruning of joined table scans (ALL)
## with multiple instances of same table.
## (All instances being equally expensive)
let $query= SELECT COUNT(*) FROM t1 AS x;
let $i= 1;
while ($i <= 56)
{
let $t= t$i;
let $query= $query JOIN $t as t$i ON t$i.I=x.I;
inc $i;
let $query= $query JOIN $t as t$i ON t$i.I=x.I;
inc $i;
let $query= $query JOIN $t as t$i ON t$i.I=x.I;
inc $i;
let $query= $query JOIN $t as t$i ON t$i.I=x.I;
inc $i;
let $query= $query JOIN $t as t$i ON t$i.I=x.I;
inc $i;
let $query= $query JOIN $t as t$i ON t$i.I=x.I;
inc $i;
let $query= $query JOIN $t as t$i ON t$i.I=x.I;
inc $i;
let $query= $query JOIN $t as t$i ON t$i.I=x.I;
inc $i;
select @@optimizer_prune_level;
select @@optimizer_search_depth;
eval EXPLAIN $query;
}
## A mix of 25% EQ_REF / 75% ALL joins
##
let $query= SELECT COUNT(*) FROM t1 AS x;
let $i= 1;
while ($i < 60)
{
let $query= $query JOIN t$i ON t$i.I = x.I;
inc $i;
let $query= $query JOIN t$i ON t$i.K = x.I;
inc $i;
let $query= $query JOIN t$i ON t$i.I = x.I;
inc $i;
let $query= $query JOIN t$i ON t$i.I = x.I;
inc $i;
eval EXPLAIN $query;
}
## A mix of 50% EQ_REF / 50% ALL joins
##
let $query= SELECT COUNT(*) FROM t1 AS x;
let $i= 1;
while ($i < 60)
{
let $query= $query JOIN t$i ON t$i.I = x.I;
inc $i;
let $query= $query JOIN t$i ON t$i.K = x.I;
inc $i;
let $query= $query JOIN t$i ON t$i.I = x.I;
inc $i;
let $query= $query JOIN t$i ON t$i.K = x.I;
inc $i;
eval EXPLAIN $query;
}
## A mix of 75% EQ_REF / 25% ALL joins
##
let $query= SELECT COUNT(*) FROM t1 AS x;
let $i= 1;
while ($i < 60)
{
let $query= $query JOIN t$i ON t$i.I = x.I;
inc $i;
let $query= $query JOIN t$i ON t$i.K = x.I;
inc $i;
let $query= $query JOIN t$i ON t$i.K = x.I;
inc $i;
let $query= $query JOIN t$i ON t$i.K = x.I;
inc $i;
eval EXPLAIN $query;
}
## 100% EQ_REF joins
##
let $query= SELECT COUNT(*) FROM t1 AS x;
let $i= 1;
while ($i < 61)
{
let $query= $query JOIN t$i ON t$i.K=x.I;
inc $i;
eval EXPLAIN $query;
}
}
let $drop = DROP TABLE t100;
let $i= 1;
while ($i < 62)
{
let $drop = $drop, t$i;
inc $i;
}
eval $drop;
--enable_result_log
show status like "optimizer%";
SET OPTIMIZER_SEARCH_DEPTH = DEFAULT;
--echo #
--echo # Bug found when testing greedy optimizer tests
--echo #
CREATE TABLE t1 (pk INTEGER,
col_int_key INTEGER,
col_varchar_key VARCHAR(8),
PRIMARY KEY (pk),
KEY (col_varchar_key, col_int_key, pk));
INSERT INTO t1 values (1,1,"A"),(2,2,"B");
explain SELECT * FROM t1 AS alias1
WHERE alias1.col_varchar_key IN (SELECT COUNT(*) FROM t1 AS SQ3_alias2 JOIN t1 AS SQ3_alias3 ON (SQ3_alias3.col_varchar_key = SQ3_alias2.col_varchar_key AND SQ3_alias3.pk = SQ3_alias2.pk));
drop table t1;
--echo #
--echo # This triggered an assert failure while testing
--echo #
CREATE TABLE t1 (a int, b int, key(b));
INSERT INTO t1 VALUES (7,4),(1,1);
CREATE TABLE t2 (d int);
INSERT INTO t2 VALUES (2),(3);
CREATE TABLE t3 (c int);
INSERT INTO t3 VALUES (5),(6);
SELECT * FROM t1 WHERE 5 IN (SELECT t1_a.a FROM t1 as t1_a WHERE 1 IN (SELECT t1_b.a FROM t1 as t1_b LEFT JOIN (t2 JOIN t3) ON (t1_b.a = t2.d) WHERE t1_b.b < 1));
drop table t1,t2,t3;
--echo End of 10.0 tests