MDEV-26278 Add functionality to eliminate derived tables from the query

Elimination of unnecessary tables from SQL queries is already present in MariaDB. But it only works for regular tables and not for derived ones. Imagine we have a view: CREATE VIEW v1 AS SELECT a, b, max(c) AS maxc FROM t1 GROUP BY a, b Due to "GROUP BY a, b" the values of combinations {a, b} are unique, and this fact can be treated as like derived table "v1" has a unique key on fields {a, b}. Suppose we have a SQL query: SELECT t2.* FROM t2 LEFT JOIN v1 ON t2.a=v1.a and t2.b=v1.b 1. Since {v1.a, v1.b} is unique and both these fields are bound to t2, "v1" is functionally dependent on t2. This means every record of "t2" will be either joined with a single record of "v1" or NULL-complemented. 2. No fields of "v1" are present on the SELECT list These two facts allow the server to completely exclude (eliminate) the derived table "v1" from the query.
2025-07-27 18:02:13 +03:00 · 2022-02-17 22:53:37 +07:00
parent 1f0187ff8d
commit 7f0201a2b5
5 changed files with 650 additions and 20 deletions
--- a/mysql-test/main/table_elim.test
+++ b/mysql-test/main/table_elim.test
@ -1,6 +1,7 @@
 #
 # Table elimination (MWL#17) tests
 #
+--source include/have_sequence.inc
 --disable_warnings
 drop table if exists t0, t1, t2, t3, t4, t5, t6;
 drop view if exists v1, v2;
@ -641,3 +642,120 @@ LIMIT 1;

 DROP TABLE t1,t2;

+--echo #
+--echo # MDEV-26278: Table elimination does not work across derived tables
+--echo #
+create table t1 (a int, b int);
+insert into t1 select seq, seq+10 from seq_1_to_10;
+
+create table t11 (
+  a int not null,
+  b int,
+  key(a)
+);
+
+insert into t11 select A.seq, A.seq+B.seq
+from
+  seq_1_to_10 A,
+  seq_1_to_100 B;
+create table t12 (
+  pk int primary key,
+  col1 int
+);
+
+insert into t12 select seq, seq from seq_1_to_1000;
+
+create view v2b as 
+select t11.a as a, count(*) as b
+from t11 left join t12 on t12.pk=t11.b
+group by t11.a;
+
+--echo # The whole v2b is eliminated
+explain select t1.* from t1 left join v2b on v2b.a=t1.a;
+
+--echo # Check format JSON as well
+explain format=JSON select t1.* from t1 left join v2b on t1.a=v2b.a;
+
+--echo # Elimination of a whole subquery 
+explain select t1.* from t1 left join 
+  (select t11.a as a, count(*) as b
+   from t11 left join t12 on t12.pk=t11.b
+   group by t11.a) v2b on v2b.a=t1.a;
+
+--echo # In this case v2b cannot be eliminated (since v2b.b is not unique)!
+explain select t1.* from t1 left join v2b on t1.a=v2b.b;
+
+--echo # Check format JSON as well
+explain format=JSON select t1.* from t1 left join v2b on t1.a=v2b.b;
+
+create view v2c as 
+select t11.a as a, max(t12.col1) as b
+from t11 left join t12 on t12.pk=t11.b
+group by t11.a;
+
+--echo # The whole v2c is eliminated
+explain select t1.* from t1 left join v2c on v2c.a=t1.a;
+
+--echo # Check format JSON as well
+explain format=JSON select t1.* from t1 left join v2c on v2c.a=t1.a;
+
+--echo # In this case v2c cannot be eliminated (since v2c.b is not unique)!
+explain select t1.* from t1 left join v2c on t1.a=v2c.b;
+
+--echo # Check format JSON as well
+explain format=JSON select t1.* from t1 left join v2c on t1.a=v2c.b;
+
+--echo # Create a view with multiple fields in the GROUP BY clause:
+create view v2d as 
+select t11.a as a, t11.b as b, max(t12.col1) as max_col1
+from t11 left join t12 on t12.pk=t11.b
+group by t11.a, t11.b;
+
+--echo # This one must not be eliminated since only one of the GROUP BY fields is bound:
+explain select t1.* from t1 left join v2d on v2d.a=t1.a;
+
+--echo # This must be eliminated since both fields are bound:
+explain select t1.* from t1 left join v2d on v2d.a=t1.a and v2d.b=t1.b;
+
+create table t13 (dt date, b int);
+
+--echo # Function year() in the GROUP BY list prevents treating this field 
+--echo # as a unique key
+create view v2e as
+select year(t13.dt) as yyy, max(t12.col1) as max_col1
+from t13 join t12 on t12.pk=t13.b
+group by yyy;
+
+--echo # No elimination here since function year() is used
+explain select t1.* from t1 left join v2e on v2e.yyy=t1.a;
+
+create table t2 (a int, b int, c int);
+insert into t2 select A.seq, B.seq, 123 from seq_1_to_3 A, seq_1_to_3 B;
+
+--echo # No elimination here since not all fields of the derived table's 
+--echo # GROUP BY are on the SELECT list so D.a is not unique
+explain select t1.* from t1 left join 
+  (select a, count(*) as cnt from t2 group by a, b) D on D.a=t1.a;
+  
+--echo # Still no elimination 'cause field D.b is just an alias for t2.a
+explain select t1.* from t1 left join 
+  (select a, a as b, count(*) as cnt from t2 group by a, b) D on D.a=t1.a and D.b=t1.b;
+
+--echo # Now both a and b fields are on the SELECT list and they are bound to t1
+--echo # so derived D must be eliminated
+explain select t1.* from t1 left join 
+  (select a as a1, b as b1, count(*) as cnt from t2 group by a, b) D 
+  on D.a1=t1.a and D.b1=t1.b;
+  
+--echo # Different order of fields in GROUP BY and SELECT lists
+--echo # must not hamper the elimination
+explain select t1.* from t1 left join 
+  (select count(*) as cnt, b, a from t2 group by a, b) D on D.a=t1.a and D.b=t1.b;
+  
+
+drop view v2b, v2c, v2d, v2e;
+drop table t1, t11, t12, t13, t2;
+
+--echo #
+--echo # End of MDEV-26278: Table elimination does not work across derived tables
+--echo #