1
0
mirror of https://github.com/MariaDB/server.git synced 2025-07-27 18:02:13 +03:00

MDEV-26278 Add functionality to eliminate derived tables from the query

Elimination of unnecessary tables from SQL queries is already present
in MariaDB. But it only works for regular tables and not for derived ones.

Imagine we have a view:
  CREATE VIEW v1 AS SELECT a, b, max(c) AS maxc FROM t1 GROUP BY a, b

Due to "GROUP BY a, b" the values of combinations {a, b} are unique,
and this fact can be treated as like derived table "v1" has a unique key
on fields {a, b}.

Suppose we have a SQL query:
  SELECT t2.* FROM t2 LEFT JOIN v1 ON t2.a=v1.a and t2.b=v1.b

1. Since {v1.a, v1.b} is unique and both these fields are bound to t2,
   "v1" is functionally dependent on t2.
   This means every record of "t2" will be either joined with
   a single record of "v1" or NULL-complemented.
2. No fields of "v1" are present on the SELECT list

These two facts allow the server to completely exclude (eliminate)
the derived table "v1" from the query.
This commit is contained in:
Oleg Smirnov
2022-02-17 22:53:37 +07:00
parent 1f0187ff8d
commit 7f0201a2b5
5 changed files with 650 additions and 20 deletions

View File

@ -1,6 +1,7 @@
#
# Table elimination (MWL#17) tests
#
--source include/have_sequence.inc
--disable_warnings
drop table if exists t0, t1, t2, t3, t4, t5, t6;
drop view if exists v1, v2;
@ -641,3 +642,120 @@ LIMIT 1;
DROP TABLE t1,t2;
--echo #
--echo # MDEV-26278: Table elimination does not work across derived tables
--echo #
create table t1 (a int, b int);
insert into t1 select seq, seq+10 from seq_1_to_10;
create table t11 (
a int not null,
b int,
key(a)
);
insert into t11 select A.seq, A.seq+B.seq
from
seq_1_to_10 A,
seq_1_to_100 B;
create table t12 (
pk int primary key,
col1 int
);
insert into t12 select seq, seq from seq_1_to_1000;
create view v2b as
select t11.a as a, count(*) as b
from t11 left join t12 on t12.pk=t11.b
group by t11.a;
--echo # The whole v2b is eliminated
explain select t1.* from t1 left join v2b on v2b.a=t1.a;
--echo # Check format JSON as well
explain format=JSON select t1.* from t1 left join v2b on t1.a=v2b.a;
--echo # Elimination of a whole subquery
explain select t1.* from t1 left join
(select t11.a as a, count(*) as b
from t11 left join t12 on t12.pk=t11.b
group by t11.a) v2b on v2b.a=t1.a;
--echo # In this case v2b cannot be eliminated (since v2b.b is not unique)!
explain select t1.* from t1 left join v2b on t1.a=v2b.b;
--echo # Check format JSON as well
explain format=JSON select t1.* from t1 left join v2b on t1.a=v2b.b;
create view v2c as
select t11.a as a, max(t12.col1) as b
from t11 left join t12 on t12.pk=t11.b
group by t11.a;
--echo # The whole v2c is eliminated
explain select t1.* from t1 left join v2c on v2c.a=t1.a;
--echo # Check format JSON as well
explain format=JSON select t1.* from t1 left join v2c on v2c.a=t1.a;
--echo # In this case v2c cannot be eliminated (since v2c.b is not unique)!
explain select t1.* from t1 left join v2c on t1.a=v2c.b;
--echo # Check format JSON as well
explain format=JSON select t1.* from t1 left join v2c on t1.a=v2c.b;
--echo # Create a view with multiple fields in the GROUP BY clause:
create view v2d as
select t11.a as a, t11.b as b, max(t12.col1) as max_col1
from t11 left join t12 on t12.pk=t11.b
group by t11.a, t11.b;
--echo # This one must not be eliminated since only one of the GROUP BY fields is bound:
explain select t1.* from t1 left join v2d on v2d.a=t1.a;
--echo # This must be eliminated since both fields are bound:
explain select t1.* from t1 left join v2d on v2d.a=t1.a and v2d.b=t1.b;
create table t13 (dt date, b int);
--echo # Function year() in the GROUP BY list prevents treating this field
--echo # as a unique key
create view v2e as
select year(t13.dt) as yyy, max(t12.col1) as max_col1
from t13 join t12 on t12.pk=t13.b
group by yyy;
--echo # No elimination here since function year() is used
explain select t1.* from t1 left join v2e on v2e.yyy=t1.a;
create table t2 (a int, b int, c int);
insert into t2 select A.seq, B.seq, 123 from seq_1_to_3 A, seq_1_to_3 B;
--echo # No elimination here since not all fields of the derived table's
--echo # GROUP BY are on the SELECT list so D.a is not unique
explain select t1.* from t1 left join
(select a, count(*) as cnt from t2 group by a, b) D on D.a=t1.a;
--echo # Still no elimination 'cause field D.b is just an alias for t2.a
explain select t1.* from t1 left join
(select a, a as b, count(*) as cnt from t2 group by a, b) D on D.a=t1.a and D.b=t1.b;
--echo # Now both a and b fields are on the SELECT list and they are bound to t1
--echo # so derived D must be eliminated
explain select t1.* from t1 left join
(select a as a1, b as b1, count(*) as cnt from t2 group by a, b) D
on D.a1=t1.a and D.b1=t1.b;
--echo # Different order of fields in GROUP BY and SELECT lists
--echo # must not hamper the elimination
explain select t1.* from t1 left join
(select count(*) as cnt, b, a from t2 group by a, b) D on D.a=t1.a and D.b=t1.b;
drop view v2b, v2c, v2d, v2e;
drop table t1, t11, t12, t13, t2;
--echo #
--echo # End of MDEV-26278: Table elimination does not work across derived tables
--echo #