MDEV-26278 Add functionality to eliminate derived tables from the query

Elimination of unnecessary tables from SQL queries is already present in MariaDB. But it only works for regular tables and not for derived ones. Imagine we have a view: CREATE VIEW v1 AS SELECT a, b, max(c) AS maxc FROM t1 GROUP BY a, b Due to "GROUP BY a, b" the values of combinations {a, b} are unique, and this fact can be treated as like derived table "v1" has a unique key on fields {a, b}. Suppose we have a SQL query: SELECT t2.* FROM t2 LEFT JOIN v1 ON t2.a=v1.a and t2.b=v1.b 1. Since {v1.a, v1.b} is unique and both these fields are bound to t2, "v1" is functionally dependent on t2. This means every record of "t2" will be either joined with a single record of "v1" or NULL-complemented. 2. No fields of "v1" are present on the SELECT list These two facts allow the server to completely exclude (eliminate) the derived table "v1" from the query.
2025-07-29 05:21:33 +03:00 · 2022-02-17 22:53:37 +07:00
parent 1f0187ff8d
commit 7f0201a2b5
5 changed files with 650 additions and 20 deletions
--- a/mysql-test/main/table_elim.result
+++ b/mysql-test/main/table_elim.result
@ -704,3 +704,284 @@ LIMIT 1;
 PostID	Voted
 1	NULL
 DROP TABLE t1,t2;
+#
+# MDEV-26278: Table elimination does not work across derived tables
+#
+create table t1 (a int, b int);
+insert into t1 select seq, seq+10 from seq_1_to_10;
+create table t11 (
+a int not null,
+b int,
+key(a)
+);
+insert into t11 select A.seq, A.seq+B.seq
+from
+seq_1_to_10 A,
+seq_1_to_100 B;
+create table t12 (
+pk int primary key,
+col1 int
+);
+insert into t12 select seq, seq from seq_1_to_1000;
+create view v2b as 
+select t11.a as a, count(*) as b
+from t11 left join t12 on t12.pk=t11.b
+group by t11.a;
+# The whole v2b is eliminated
+explain select t1.* from t1 left join v2b on v2b.a=t1.a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	10	
+# Check format JSON as well
+explain format=JSON select t1.* from t1 left join v2b on t1.a=v2b.a;
+EXPLAIN
+{
+  "query_block": {
+    "select_id": 1,
+    "const_condition": "1",
+    "nested_loop": [
+      {
+        "table": {
+          "table_name": "t1",
+          "access_type": "ALL",
+          "rows": 10,
+          "filtered": 100
+        }
+      }
+    ]
+  }
+}
+# Elimination of a whole subquery 
+explain select t1.* from t1 left join 
+(select t11.a as a, count(*) as b
+from t11 left join t12 on t12.pk=t11.b
+group by t11.a) v2b on v2b.a=t1.a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	10	
+# In this case v2b cannot be eliminated (since v2b.b is not unique)!
+explain select t1.* from t1 left join v2b on t1.a=v2b.b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	10	
+1	PRIMARY	<derived2>	ref	key0	key0	8	test.t1.a	10	Using where
+2	DERIVED	t11	ALL	NULL	NULL	NULL	NULL	1000	Using temporary; Using filesort
+# Check format JSON as well
+explain format=JSON select t1.* from t1 left join v2b on t1.a=v2b.b;
+EXPLAIN
+{
+  "query_block": {
+    "select_id": 1,
+    "const_condition": "1",
+    "nested_loop": [
+      {
+        "table": {
+          "table_name": "t1",
+          "access_type": "ALL",
+          "rows": 10,
+          "filtered": 100
+        }
+      },
+      {
+        "table": {
+          "table_name": "<derived2>",
+          "access_type": "ref",
+          "possible_keys": ["key0"],
+          "key": "key0",
+          "key_length": "8",
+          "used_key_parts": ["b"],
+          "ref": ["test.t1.a"],
+          "rows": 10,
+          "filtered": 100,
+          "attached_condition": "trigcond(t1.a = v2b.b and trigcond(t1.a is not null))",
+          "materialized": {
+            "query_block": {
+              "select_id": 2,
+              "const_condition": "1",
+              "filesort": {
+                "sort_key": "t11.a",
+                "temporary_table": {
+                  "nested_loop": [
+                    {
+                      "table": {
+                        "table_name": "t11",
+                        "access_type": "ALL",
+                        "rows": 1000,
+                        "filtered": 100
+                      }
+                    }
+                  ]
+                }
+              }
+            }
+          }
+        }
+      }
+    ]
+  }
+}
+create view v2c as 
+select t11.a as a, max(t12.col1) as b
+from t11 left join t12 on t12.pk=t11.b
+group by t11.a;
+# The whole v2c is eliminated
+explain select t1.* from t1 left join v2c on v2c.a=t1.a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	10	
+# Check format JSON as well
+explain format=JSON select t1.* from t1 left join v2c on v2c.a=t1.a;
+EXPLAIN
+{
+  "query_block": {
+    "select_id": 1,
+    "const_condition": "1",
+    "nested_loop": [
+      {
+        "table": {
+          "table_name": "t1",
+          "access_type": "ALL",
+          "rows": 10,
+          "filtered": 100
+        }
+      }
+    ]
+  }
+}
+# In this case v2c cannot be eliminated (since v2c.b is not unique)!
+explain select t1.* from t1 left join v2c on t1.a=v2c.b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	10	
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.a	10	Using where
+2	DERIVED	t11	ALL	NULL	NULL	NULL	NULL	1000	Using temporary; Using filesort
+2	DERIVED	t12	eq_ref	PRIMARY	PRIMARY	4	test.t11.b	1	Using where
+# Check format JSON as well
+explain format=JSON select t1.* from t1 left join v2c on t1.a=v2c.b;
+EXPLAIN
+{
+  "query_block": {
+    "select_id": 1,
+    "const_condition": "1",
+    "nested_loop": [
+      {
+        "table": {
+          "table_name": "t1",
+          "access_type": "ALL",
+          "rows": 10,
+          "filtered": 100
+        }
+      },
+      {
+        "table": {
+          "table_name": "<derived2>",
+          "access_type": "ref",
+          "possible_keys": ["key0"],
+          "key": "key0",
+          "key_length": "5",
+          "used_key_parts": ["b"],
+          "ref": ["test.t1.a"],
+          "rows": 10,
+          "filtered": 100,
+          "attached_condition": "trigcond(trigcond(t1.a is not null))",
+          "materialized": {
+            "query_block": {
+              "select_id": 2,
+              "const_condition": "1",
+              "filesort": {
+                "sort_key": "t11.a",
+                "temporary_table": {
+                  "nested_loop": [
+                    {
+                      "table": {
+                        "table_name": "t11",
+                        "access_type": "ALL",
+                        "rows": 1000,
+                        "filtered": 100
+                      }
+                    },
+                    {
+                      "table": {
+                        "table_name": "t12",
+                        "access_type": "eq_ref",
+                        "possible_keys": ["PRIMARY"],
+                        "key": "PRIMARY",
+                        "key_length": "4",
+                        "used_key_parts": ["pk"],
+                        "ref": ["test.t11.b"],
+                        "rows": 1,
+                        "filtered": 100,
+                        "attached_condition": "trigcond(trigcond(t11.b is not null))"
+                      }
+                    }
+                  ]
+                }
+              }
+            }
+          }
+        }
+      }
+    ]
+  }
+}
+# Create a view with multiple fields in the GROUP BY clause:
+create view v2d as 
+select t11.a as a, t11.b as b, max(t12.col1) as max_col1
+from t11 left join t12 on t12.pk=t11.b
+group by t11.a, t11.b;
+# This one must not be eliminated since only one of the GROUP BY fields is bound:
+explain select t1.* from t1 left join v2d on v2d.a=t1.a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	10	
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.a	10	Using where
+2	DERIVED	t11	ALL	a	NULL	NULL	NULL	1000	Using temporary; Using filesort
+2	DERIVED	t12	eq_ref	PRIMARY	PRIMARY	4	test.t11.b	1	Using where
+# This must be eliminated since both fields are bound:
+explain select t1.* from t1 left join v2d on v2d.a=t1.a and v2d.b=t1.b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	10	
+create table t13 (dt date, b int);
+# Function year() in the GROUP BY list prevents treating this field 
+# as a unique key
+create view v2e as
+select year(t13.dt) as yyy, max(t12.col1) as max_col1
+from t13 join t12 on t12.pk=t13.b
+group by yyy;
+# No elimination here since function year() is used
+explain select t1.* from t1 left join v2e on v2e.yyy=t1.a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	10	
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.a	2	Using where
+2	DERIVED	NULL	NULL	NULL	NULL	NULL	NULL	NULL	no matching row in const table
+create table t2 (a int, b int, c int);
+insert into t2 select A.seq, B.seq, 123 from seq_1_to_3 A, seq_1_to_3 B;
+# No elimination here since not all fields of the derived table's 
+# GROUP BY are on the SELECT list so D.a is not unique
+explain select t1.* from t1 left join 
+(select a, count(*) as cnt from t2 group by a, b) D on D.a=t1.a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	10	
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.a	2	Using where
+2	DERIVED	t2	ALL	NULL	NULL	NULL	NULL	9	Using temporary; Using filesort
+# Still no elimination 'cause field D.b is just an alias for t2.a
+explain select t1.* from t1 left join 
+(select a, a as b, count(*) as cnt from t2 group by a, b) D on D.a=t1.a and D.b=t1.b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	10	
+1	PRIMARY	<derived2>	ref	key0	key0	10	test.t1.a,test.t1.b	2	Using where
+2	DERIVED	t2	ALL	NULL	NULL	NULL	NULL	9	Using temporary; Using filesort
+Warnings:
+Warning	1052	Column 'b' in group statement is ambiguous
+# Now both a and b fields are on the SELECT list and they are bound to t1
+# so derived D must be eliminated
+explain select t1.* from t1 left join 
+(select a as a1, b as b1, count(*) as cnt from t2 group by a, b) D 
+on D.a1=t1.a and D.b1=t1.b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	10	
+# Different order of fields in GROUP BY and SELECT lists
+# must not hamper the elimination
+explain select t1.* from t1 left join 
+(select count(*) as cnt, b, a from t2 group by a, b) D on D.a=t1.a and D.b=t1.b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	10	
+drop view v2b, v2c, v2d, v2e;
+drop table t1, t11, t12, t13, t2;
+#
+# End of MDEV-26278: Table elimination does not work across derived tables
+#
--- a/mysql-test/main/table_elim.test
+++ b/mysql-test/main/table_elim.test
@ -1,6 +1,7 @@
 #
 # Table elimination (MWL#17) tests
 #
+--source include/have_sequence.inc
 --disable_warnings
 drop table if exists t0, t1, t2, t3, t4, t5, t6;
 drop view if exists v1, v2;
@ -641,3 +642,120 @@ LIMIT 1;

 DROP TABLE t1,t2;

+--echo #
+--echo # MDEV-26278: Table elimination does not work across derived tables
+--echo #
+create table t1 (a int, b int);
+insert into t1 select seq, seq+10 from seq_1_to_10;
+
+create table t11 (
+  a int not null,
+  b int,
+  key(a)
+);
+
+insert into t11 select A.seq, A.seq+B.seq
+from
+  seq_1_to_10 A,
+  seq_1_to_100 B;
+create table t12 (
+  pk int primary key,
+  col1 int
+);
+
+insert into t12 select seq, seq from seq_1_to_1000;
+
+create view v2b as 
+select t11.a as a, count(*) as b
+from t11 left join t12 on t12.pk=t11.b
+group by t11.a;
+
+--echo # The whole v2b is eliminated
+explain select t1.* from t1 left join v2b on v2b.a=t1.a;
+
+--echo # Check format JSON as well
+explain format=JSON select t1.* from t1 left join v2b on t1.a=v2b.a;
+
+--echo # Elimination of a whole subquery 
+explain select t1.* from t1 left join 
+  (select t11.a as a, count(*) as b
+   from t11 left join t12 on t12.pk=t11.b
+   group by t11.a) v2b on v2b.a=t1.a;
+
+--echo # In this case v2b cannot be eliminated (since v2b.b is not unique)!
+explain select t1.* from t1 left join v2b on t1.a=v2b.b;
+
+--echo # Check format JSON as well
+explain format=JSON select t1.* from t1 left join v2b on t1.a=v2b.b;
+
+create view v2c as 
+select t11.a as a, max(t12.col1) as b
+from t11 left join t12 on t12.pk=t11.b
+group by t11.a;
+
+--echo # The whole v2c is eliminated
+explain select t1.* from t1 left join v2c on v2c.a=t1.a;
+
+--echo # Check format JSON as well
+explain format=JSON select t1.* from t1 left join v2c on v2c.a=t1.a;
+
+--echo # In this case v2c cannot be eliminated (since v2c.b is not unique)!
+explain select t1.* from t1 left join v2c on t1.a=v2c.b;
+
+--echo # Check format JSON as well
+explain format=JSON select t1.* from t1 left join v2c on t1.a=v2c.b;
+
+--echo # Create a view with multiple fields in the GROUP BY clause:
+create view v2d as 
+select t11.a as a, t11.b as b, max(t12.col1) as max_col1
+from t11 left join t12 on t12.pk=t11.b
+group by t11.a, t11.b;
+
+--echo # This one must not be eliminated since only one of the GROUP BY fields is bound:
+explain select t1.* from t1 left join v2d on v2d.a=t1.a;
+
+--echo # This must be eliminated since both fields are bound:
+explain select t1.* from t1 left join v2d on v2d.a=t1.a and v2d.b=t1.b;
+
+create table t13 (dt date, b int);
+
+--echo # Function year() in the GROUP BY list prevents treating this field 
+--echo # as a unique key
+create view v2e as
+select year(t13.dt) as yyy, max(t12.col1) as max_col1
+from t13 join t12 on t12.pk=t13.b
+group by yyy;
+
+--echo # No elimination here since function year() is used
+explain select t1.* from t1 left join v2e on v2e.yyy=t1.a;
+
+create table t2 (a int, b int, c int);
+insert into t2 select A.seq, B.seq, 123 from seq_1_to_3 A, seq_1_to_3 B;
+
+--echo # No elimination here since not all fields of the derived table's 
+--echo # GROUP BY are on the SELECT list so D.a is not unique
+explain select t1.* from t1 left join 
+  (select a, count(*) as cnt from t2 group by a, b) D on D.a=t1.a;
+  
+--echo # Still no elimination 'cause field D.b is just an alias for t2.a
+explain select t1.* from t1 left join 
+  (select a, a as b, count(*) as cnt from t2 group by a, b) D on D.a=t1.a and D.b=t1.b;
+
+--echo # Now both a and b fields are on the SELECT list and they are bound to t1
+--echo # so derived D must be eliminated
+explain select t1.* from t1 left join 
+  (select a as a1, b as b1, count(*) as cnt from t2 group by a, b) D 
+  on D.a1=t1.a and D.b1=t1.b;
+  
+--echo # Different order of fields in GROUP BY and SELECT lists
+--echo # must not hamper the elimination
+explain select t1.* from t1 left join 
+  (select count(*) as cnt, b, a from t2 group by a, b) D on D.a=t1.a and D.b=t1.b;
+  
+
+drop view v2b, v2c, v2d, v2e;
+drop table t1, t11, t12, t13, t2;
+
+--echo #
+--echo # End of MDEV-26278: Table elimination does not work across derived tables
+--echo #
--- a/sql/opt_table_elimination.cc
+++ b/sql/opt_table_elimination.cc
@ -33,6 +33,7 @@
 #include "sql_select.h"
 #include "opt_trace.h"
 #include "my_json_writer.h"
+#include <set>

 /*
  OVERVIEW
@ -134,6 +135,11 @@
   - Nodes representing unique keys. Unique key has
      = incoming edges from key component value modules
      = outgoing edge to key's table module
+   - Nodes representing unique pseudo-keys for derived tables.
+     Unique pseudo-keys are composed as a result of GROUP BY expressions.
+     Like normal unique keys, they have:
+      = incoming edges from key component value modules
+      = outgoing edge to key's table module
   - Inner side of outer join module. Outer join module has
      = incoming edges from table value modules
      = No outgoing edges. Once we reach it, we know we can eliminate the 
@ -205,6 +211,7 @@ class Dep_module;
  class Dep_module_expr;
  class Dep_module_goal;
  class Dep_module_key;
+  class Dep_module_pseudo_key;

 class Dep_analysis_context;

@ -278,6 +285,8 @@ private:
    Dep_module_key *key_dep;
    /* Otherwise, this and advance */
    uint equality_no;
+    /* Or this one and advance */
+    Dep_module_pseudo_key *pseudo_key_dep;
  };
  friend class Dep_analysis_context;
  friend class Field_dependency_recorder; 
@ -302,12 +311,20 @@ class Dep_value_table : public Dep_value
 {
 public:
  Dep_value_table(TABLE *table_arg) : 
-    table(table_arg), fields(NULL), keys(NULL)
+    table(table_arg), fields(NULL), keys(NULL), pseudo_key(NULL)
  {}
  TABLE *table;  /* Table this object is representing */
  /* Ordered list of fields that belong to this table */
  Dep_value_field *fields;
-  Dep_module_key *keys; /* Ordered list of Unique keys in this table */
+
+  /* Ordered list of Unique keys in this table */
+  Dep_module_key *keys;
+
+  /*
+    Possible unique pseudo-key applicable for this table
+    (only none or a single one is possible)
+  */
+  Dep_module_pseudo_key *pseudo_key;

  /* Iteration over unbound modules that are our dependencies */
  Iterator init_unbound_modules_iter(char *buf);
@ -443,9 +460,62 @@ private:
 const size_t Dep_module_key::iterator_size= 
  ALIGN_SIZE(sizeof(Dep_module_key::Value_iter));

-const size_t Dep_module::iterator_size=
-  MY_MAX(Dep_module_expr::iterator_size, Dep_module_key::iterator_size);

+/*
+  A unique pseudo-key module for a derived table.
+  For example, a derived table
+  "SELECT a, count(*) from t1 GROUP BY a"
+  has unique values in its first field "a" due to GROUP BY expression
+  so this can be considered as a unique key for this derived table
+*/
+
+class Dep_module_pseudo_key : public Dep_module
+{
+public:
+  Dep_module_pseudo_key(Dep_value_table *table_arg,
+                        std::set<field_index_t>&& field_indexes)
+      : table(table_arg), derived_table_field_indexes(field_indexes)
+  {
+    unbound_args= static_cast<uint>(field_indexes.size());
+  }
+
+  Dep_value_table *table;
+
+  Iterator init_unbound_values_iter(char *buf) override;
+
+  Dep_value *get_next_unbound_value(Dep_analysis_context *dac,
+                                    Iterator iter) override;
+
+  bool covers_field(int field_index);
+
+  static const size_t iterator_size;
+
+private:
+  /*
+    Set of field numbers (indexes) in the derived table's SELECT list
+    which are included in the GROUP BY expression.
+    For example, unique pseudo-key for SQL
+    "SELECT count(*), b, a FROM t1 GROUP BY a, b"
+    will include two elements: {2} and {1}, since "a" and "b" are on the
+    GROUP BY list and also are present on the SELECT list with indexes 2 and 1
+    (numeration starts from 0).
+  */
+  std::set<field_index_t> derived_table_field_indexes;
+
+  class Value_iter
+  {
+  public:
+    Dep_value_table *table;
+  };
+};
+
+const size_t Dep_module_pseudo_key::iterator_size=
+  ALIGN_SIZE(sizeof(Dep_module_pseudo_key::Value_iter));
+
+const size_t Dep_module::iterator_size=
+  MY_MAX(Dep_module_expr::iterator_size,
+         MY_MAX(Dep_module_key::iterator_size,
+                Dep_module_pseudo_key::iterator_size));

 /*
  A module that represents outer join that we're trying to eliminate. If we 
@ -508,12 +578,17 @@ public:
  */
  MY_BITMAP expr_deps;
  
-  Dep_value_table *create_table_value(TABLE *table);
+  Dep_value_table *create_table_value(TABLE_LIST *table_list);
  Dep_value_field *get_field_value(Field *field);

 #ifndef DBUG_OFF
  void dbug_print_deps();
 #endif 
+
+private:
+  void create_unique_pseudo_key_if_needed(TABLE_LIST *table_list,
+                                          Dep_value_table *tbl_dep);
+  int find_field_in_list(List<Item> &fields_list, Item *field);
 };


@ -851,7 +926,7 @@ bool check_func_dependency(JOIN *join,
  /* Create Dep_value_table objects for all tables we're trying to eliminate */
  if (oj_tbl)
  {
-    if (!dac.create_table_value(oj_tbl->table))
+    if (!dac.create_table_value(oj_tbl))
      return FALSE; /* purecov: inspected */
  }
  else
@ -861,7 +936,7 @@ bool check_func_dependency(JOIN *join,
    {
      if (tbl->table && (tbl->table->map & dep_tables))
      {
-        if (!dac.create_table_value(tbl->table))
+        if (!dac.create_table_value(tbl))
          return FALSE; /* purecov: inspected */
      }
    }
@ -1577,33 +1652,139 @@ void add_module_expr(Dep_analysis_context *ctx, Dep_module_expr **eq_mod,
  DESCRIPTION
    Create a Dep_value_table object for the given table. Also create
    Dep_module_key objects for all unique keys in the table.
+    Create a unique pseudo-key if this table is derived and has
+    a GROUP BY expression.

  RETURN
    Created table value object
    NULL if out of memory
 */

-Dep_value_table *Dep_analysis_context::create_table_value(TABLE *table)
+Dep_value_table *
+Dep_analysis_context::create_table_value(TABLE_LIST *table_list)
 {
  Dep_value_table *tbl_dep;
-  if (!(tbl_dep= new Dep_value_table(table)))
+  if (!(tbl_dep= new Dep_value_table(table_list->table)))
    return NULL; /* purecov: inspected */

  Dep_module_key **key_list= &(tbl_dep->keys);
  /* Add dependencies for unique keys */
-  for (uint i=0; i < table->s->keys; i++)
+  for (uint i= 0; i < table_list->table->s->keys; i++)
  {
-    KEY *key= table->key_info + i; 
+    KEY *key= table_list->table->key_info + i;
    if (key->flags & HA_NOSAME)
    {
      Dep_module_key *key_dep;
-      if (!(key_dep= new Dep_module_key(tbl_dep, i, key->user_defined_key_parts)))
+      if (!(key_dep= new Dep_module_key(tbl_dep, i,
+                                        key->user_defined_key_parts)))
        return NULL;
      *key_list= key_dep;
      key_list= &(key_dep->next_table_key);
    }
  }
-  return table_deps[table->tablenr]= tbl_dep;
+
+  create_unique_pseudo_key_if_needed(table_list, tbl_dep);
+  return table_deps[table_list->table->tablenr]= tbl_dep;
+}
+
+
+/*
+  @brief
+    Check if we can create a unique pseudo-key for the passed table.
+    If we can, create a dependency for it
+
+  @detail
+    Currently, pseudo-key is created for the list of GROUP BY columns.
+
+    TODO: also it can be created if the query uses
+     - SELECT DISTINCT
+     - UNION DISTINCT (not UNION ALL)
+*/
+
+void Dep_analysis_context::create_unique_pseudo_key_if_needed(
+    TABLE_LIST *table_list, Dep_value_table *tbl_dep)
+{
+  auto select_unit= table_list->get_unit();
+  SELECT_LEX *first_select= nullptr;
+  if (select_unit)
+  {
+    first_select= select_unit->first_select();
+
+    /*
+      Exclude UNION (ALL) queries from consideration by checking
+      next_select() == nullptr
+    */
+    if (unlikely(select_unit->first_select()->next_select()))
+      first_select= nullptr;
+  }
+
+  /*
+    GROUP BY expression is considered as a unique pseudo-key
+    for the derived table. Add this pseudo key as a dependency
+  */
+  if (first_select && first_select->group_list.elements > 0)
+  {
+    bool valid= true;
+    std::set<field_index_t> exposed_fields_indexes;
+    for (auto cur_group= first_select->group_list.first;
+         cur_group;
+         cur_group= cur_group->next)
+    {
+      auto elem= *(cur_group->item);
+      /*
+        Make sure GROUP BY elements contain only fields
+        and no functions or other expressions
+      */
+      if (elem->type() != Item::FIELD_ITEM)
+      {
+        valid= false;
+        break;
+      }
+      auto field_idx= find_field_in_list(first_select->join->fields_list, elem);
+      if (field_idx == -1)
+      {
+        /*
+          This GROUP BY element is not present in the select list. This is a
+          case like this:
+             (SELECT a FROM t1 GROUP by a,b) as TBL
+          Here, the combination of (a,b) is unique, but the select doesn't
+          include "b". "a" alone is not unique, so TBL doesn't have a unique
+          pseudo-key.
+        */
+        valid= false;
+        break;
+      }
+      exposed_fields_indexes.insert(field_idx);
+    }
+    if (valid)
+    {
+      Dep_module_pseudo_key *pseudo_key;
+      pseudo_key= new Dep_module_pseudo_key(tbl_dep,
+                                            std::move(exposed_fields_indexes));
+      tbl_dep->pseudo_key= pseudo_key;
+    }
+  }
+}
+
+
+/*
+  Iterate the list of fields and look for the given field.
+  Returns the index of the field if it is found on the list
+  and -1 otherwise
+*/
+
+int Dep_analysis_context::find_field_in_list(List<Item> &fields_list,
+                                             Item *field)
+{
+  List_iterator<Item> it(fields_list);
+  int field_idx= 0;
+  while (auto next_field= it++)
+  {
+    if (next_field->eq(field, false))
+      return field_idx;
+    field_idx++;
+  }
+  return -1; /*not found*/
 }


@ -1746,11 +1927,39 @@ Dep_value* Dep_module_key::get_next_unbound_value(Dep_analysis_context *dac,
 }


+char *Dep_module_pseudo_key::init_unbound_values_iter(char *buf)
+{
+  Value_iter *iter= ALIGN_PTR(my_ptrdiff_t(buf), Value_iter);
+  iter->table= table;
+  return (char *) iter;
+}
+
+Dep_value *
+Dep_module_pseudo_key::get_next_unbound_value(Dep_analysis_context *dac,
+                                                  Dep_module::Iterator iter)
+{
+  Dep_value *res= ((Value_iter *) iter)->table;
+  ((Value_iter *) iter)->table= NULL;
+  return res;
+}
+
+
+/*
+  Check if column number field_index is covered by the pseudo-key.
+*/
+
+bool Dep_module_pseudo_key::covers_field(int field_index)
+{
+  return derived_table_field_indexes.count(field_index) > 0;
+}
+
+
 Dep_value::Iterator Dep_value_field::init_unbound_modules_iter(char *buf)
 {
  Module_iter *iter= ALIGN_PTR(my_ptrdiff_t(buf), Module_iter);
  iter->key_dep= table->keys;
  iter->equality_no= 0;
+  iter->pseudo_key_dep= table->pseudo_key;
  return (char*)iter;
 }

@ -1758,7 +1967,8 @@ Dep_value::Iterator Dep_value_field::init_unbound_modules_iter(char *buf)
 void 
 Dep_value_field::make_unbound_modules_iter_skip_keys(Dep_value::Iterator iter)
 {
-  ((Module_iter*)iter)->key_dep= NULL;
+  ((Module_iter*) iter)->key_dep= NULL;
+  ((Module_iter*) iter)->pseudo_key_dep= NULL;
 }


@ -1786,6 +1996,16 @@ Dep_module* Dep_value_field::get_next_unbound_module(Dep_analysis_context *dac,
  }
  else 
    di->key_dep= NULL;
+
+  Dep_module_pseudo_key *pseudo_key_dep= di->pseudo_key_dep;
+  if (pseudo_key_dep && !pseudo_key_dep->is_applicable() &&
+      pseudo_key_dep->covers_field(field->field_index))
+  {
+    di->pseudo_key_dep= NULL;
+    return pseudo_key_dep;
+  }
+  else
+    di->pseudo_key_dep= NULL;
  
  /*
    Then walk through [multi]equalities and find those that
@ -1819,7 +2039,7 @@ static void mark_as_eliminated(JOIN *join, TABLE_LIST *tbl,
  TABLE *table;
  /*
    NOTE: there are TABLE_LIST object that have
-    tbl->table!= NULL && tbl->nested_join!=NULL and 
+    tbl->table!= NULL && tbl->nested_join!=NULL and
    tbl->table == tbl->nested_join->join_list->element(..)->table
  */
  if (tbl->nested_join)
@ -1848,7 +2068,6 @@ static void mark_as_eliminated(JOIN *join, TABLE_LIST *tbl,
    tbl->on_expr->walk(&Item::mark_as_eliminated_processor, FALSE, NULL);
 }

-
 #ifndef DBUG_OFF
 /* purecov: begin inspected */
 void Dep_analysis_context::dbug_print_deps()
--- a/sql/sql_lex.cc
+++ b/sql/sql_lex.cc
@ -3400,7 +3400,7 @@ bool st_select_lex::test_limit()



-st_select_lex* st_select_lex_unit::outer_select()
+st_select_lex* st_select_lex_unit::outer_select() const
 {
  return (st_select_lex*) master;
 }
@ -11917,15 +11917,24 @@ bool SELECT_LEX_UNIT::explainable() const
    EXPLAIN/ANALYZE unit, when:
    (1) if it's a subquery - it's not part of eliminated WHERE/ON clause.
    (2) if it's a CTE - it's not hanging (needed for execution)
-    (3) if it's a derived - it's not merged
+    (3) if it's a derived - it's not merged or eliminated
    if it's not 1/2/3 - it's some weird internal thing, ignore it
  */
+
  return item ?
           !item->eliminated :                        // (1)
           with_element ?
             derived && derived->derived_result &&
               !with_element->is_hanging_recursive(): // (2)
             derived ?
-               derived->is_materialized_derived() :   // (3)
+               derived->is_materialized_derived() && // (3)
+                 !is_derived_eliminated() :
               false;
 }
+
+bool SELECT_LEX_UNIT::is_derived_eliminated() const
+{
+  if (!derived)
+    return false;
+  return derived->table->map & outer_select()->join->eliminated_tables;
+}
--- a/sql/sql_lex.h
+++ b/sql/sql_lex.h
@ -971,7 +971,7 @@ public:
  };

  void init_query();
-  st_select_lex* outer_select();
+  st_select_lex* outer_select() const;
  const st_select_lex* first_select() const
  {
    return reinterpret_cast<const st_select_lex*>(slave);
@ -1039,6 +1039,9 @@ public:
  bool set_lock_to_the_last_select(Lex_select_lock l);

  friend class st_select_lex;
+
+private:
+  bool is_derived_eliminated() const;
 };

 typedef class st_select_lex_unit SELECT_LEX_UNIT;