MDEV-36321 keys generated on derived tables produce wrong out_rows estimates

record per key statistics on keys generated for derived tables are missing, causing a fallback to the usage of MATCHING_ROWS_IN_OTHER_TABLE in best_access_path(). This can grossly overestimate the number of rows expected during a join and cause a bad join order. This patch tries to infer a few simple things about the derived table, like when the key being created matches the group by list inside (if any) inside the derived table. Approved by Sergei Petrunia (sergey@mariadb.com) Tested by Lena Startseva (lena.startseva@mariadb.com)
2026-01-06 05:22:24 +03:00 · 2025-08-21 14:10:13 +11:00
parent 28f5322a44
commit 8d08350dd3
13 changed files with 1116 additions and 189 deletions
--- a/mysql-test/main/derived_cond_pushdown.result
+++ b/mysql-test/main/derived_cond_pushdown.result
@@ -1930,7 +1930,7 @@ a	b	max_c	avg_c	a	b	c	d
 explain select * from v1,t2 where (v1.a=v1.b) and (v1.a=t2.a);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	9	Using where
-1	PRIMARY	<derived2>	ref	key0	key0	10	test.t2.a,test.t2.a	2	
+1	PRIMARY	<derived2>	ref	key0	key0	10	test.t2.a,test.t2.a	1	
 2	DERIVED	t1	ALL	NULL	NULL	NULL	NULL	20	Using where; Using temporary; Using filesort
 explain format=json select * from v1,t2 where (v1.a=v1.b) and (v1.a=t2.a);
 EXPLAIN
@@ -1960,7 +1960,7 @@ EXPLAIN
          "used_key_parts": ["a", "b"],
          "ref": ["test.t2.a", "test.t2.a"],
          "loops": 9,
-          "rows": 2,
+          "rows": 1,
          "cost": "COST_REPLACED",
          "filtered": 100,
          "materialized": {
@@ -2993,7 +2993,7 @@ where t1.a>5 group by a,b having max_c < 707) v1,
 t2 where (v1.a=t2.a) and (v1.max_c>300) and (v1.a=v1.b);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	9	Using where
-1	PRIMARY	<derived2>	ref	key0	key0	10	test.t2.a,test.t2.a	2	Using where
+1	PRIMARY	<derived2>	ref	key0	key0	10	test.t2.a,test.t2.a	1	Using where
 2	DERIVED	t1	ALL	NULL	NULL	NULL	NULL	20	Using where; Using temporary; Using filesort
 explain format=json select * from
 (select a, b, max(c) as max_c, avg(c) as avg_c from t1
@@ -3026,7 +3026,7 @@ EXPLAIN
          "used_key_parts": ["a", "b"],
          "ref": ["test.t2.a", "test.t2.a"],
          "loops": 9,
-          "rows": 2,
+          "rows": 1,
          "cost": "COST_REPLACED",
          "filtered": 100,
          "attached_condition": "v1.max_c > 300",
@@ -3155,7 +3155,7 @@ a	b	max_c	avg_c	a	b	c	d
 explain select * from v1,t2 where (v1.a=t2.a) and (v1.b=t2.b);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	9	Using where
-1	PRIMARY	<derived2>	ref	key0	key0	10	test.t2.a,test.t2.b	2	
+1	PRIMARY	<derived2>	ref	key0	key0	10	test.t2.a,test.t2.b	1	
 2	DERIVED	t1	ALL	NULL	NULL	NULL	NULL	20	Using temporary; Using filesort
 explain format=json select * from v1,t2 where (v1.a=t2.a) and (v1.b=t2.b);
 EXPLAIN
@@ -3185,7 +3185,7 @@ EXPLAIN
          "used_key_parts": ["a", "b"],
          "ref": ["test.t2.a", "test.t2.b"],
          "loops": 9,
-          "rows": 2,
+          "rows": 1,
          "cost": "COST_REPLACED",
          "filtered": 100,
          "materialized": {
@@ -4041,7 +4041,7 @@ explain select * from v1,v2,t2 where
 (v1.a=t2.a) and (v1.a=v1.b) and (v1.a=v2.a) and (v2.max_c<300);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	9	Using where
-1	PRIMARY	<derived2>	ref	key1	key1	10	test.t2.a,test.t2.a	2	
+1	PRIMARY	<derived2>	ref	key1	key1	10	test.t2.a,test.t2.a	1	
 1	PRIMARY	<derived3>	ref	key0	key0	5	test.t2.a	2	Using where
 3	DERIVED	t1	ALL	NULL	NULL	NULL	NULL	20	Using where; Using temporary; Using filesort
 2	DERIVED	t1	ALL	NULL	NULL	NULL	NULL	20	Using where; Using temporary; Using filesort
@@ -4074,7 +4074,7 @@ EXPLAIN
          "used_key_parts": ["a", "b"],
          "ref": ["test.t2.a", "test.t2.a"],
          "loops": 9,
-          "rows": 2,
+          "rows": 1,
          "cost": "COST_REPLACED",
          "filtered": 100,
          "materialized": {
@@ -4113,7 +4113,7 @@ EXPLAIN
          "key_length": "5",
          "used_key_parts": ["a"],
          "ref": ["test.t2.a"],
-          "loops": 18,
+          "loops": 9,
          "rows": 2,
          "cost": "COST_REPLACED",
          "filtered": 100,
@@ -8165,8 +8165,8 @@ a	b	min_c	a	b	max_c	avg_c
 explain select * from v4,v2 where
 (v4.a=v2.b) and (v4.a=v4.b) and (v4.min_c<100);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	PRIMARY	<derived2>	ALL	NULL	NULL	NULL	NULL	20	Using where
-1	PRIMARY	<derived4>	ref	key0	key0	5	v4.a	2	
+1	PRIMARY	<derived4>	ALL	NULL	NULL	NULL	NULL	20	Using where
+1	PRIMARY	<derived2>	ref	key0	key0	10	v2.b,v2.b	1	Using where
 4	DERIVED	t1	ALL	NULL	NULL	NULL	NULL	20	Using where; Using temporary; Using filesort
 2	DERIVED	<derived3>	ALL	NULL	NULL	NULL	NULL	20	Using where; Using temporary; Using filesort
 3	DERIVED	t1	ALL	NULL	NULL	NULL	NULL	20	Using where; Using temporary; Using filesort
@@ -8180,13 +8180,54 @@ EXPLAIN
    "nested_loop": [
      {
        "table": {
-          "table_name": "<derived2>",
+          "table_name": "<derived4>",
          "access_type": "ALL",
          "loops": 1,
          "rows": 20,
          "cost": "COST_REPLACED",
          "filtered": 100,
-          "attached_condition": "v4.b = v4.a and v4.min_c < 100 and v4.a is not null",
+          "attached_condition": "v2.b is not null and v2.b is not null",
+          "materialized": {
+            "query_block": {
+              "select_id": 4,
+              "cost": "COST_REPLACED",
+              "having_condition": "max_c < 707",
+              "filesort": {
+                "sort_key": "t1.a, t1.b",
+                "temporary_table": {
+                  "nested_loop": [
+                    {
+                      "table": {
+                        "table_name": "t1",
+                        "access_type": "ALL",
+                        "loops": 1,
+                        "rows": 20,
+                        "cost": "COST_REPLACED",
+                        "filtered": 100,
+                        "attached_condition": "t1.a > 5"
+                      }
+                    }
+                  ]
+                }
+              }
+            }
+          }
+        }
+      },
+      {
+        "table": {
+          "table_name": "<derived2>",
+          "access_type": "ref",
+          "possible_keys": ["key0"],
+          "key": "key0",
+          "key_length": "10",
+          "used_key_parts": ["a", "b"],
+          "ref": ["v2.b", "v2.b"],
+          "loops": 20,
+          "rows": 1,
+          "cost": "COST_REPLACED",
+          "filtered": 100,
+          "attached_condition": "v4.min_c < 100",
          "materialized": {
            "query_block": {
              "select_id": 2,
@@ -8238,46 +8279,6 @@ EXPLAIN
            }
          }
        }
-      },
-      {
-        "table": {
-          "table_name": "<derived4>",
-          "access_type": "ref",
-          "possible_keys": ["key0"],
-          "key": "key0",
-          "key_length": "5",
-          "used_key_parts": ["b"],
-          "ref": ["v4.a"],
-          "loops": 20,
-          "rows": 2,
-          "cost": "COST_REPLACED",
-          "filtered": 100,
-          "materialized": {
-            "query_block": {
-              "select_id": 4,
-              "cost": "COST_REPLACED",
-              "having_condition": "max_c < 707",
-              "filesort": {
-                "sort_key": "t1.a, t1.b",
-                "temporary_table": {
-                  "nested_loop": [
-                    {
-                      "table": {
-                        "table_name": "t1",
-                        "access_type": "ALL",
-                        "loops": 1,
-                        "rows": 20,
-                        "cost": "COST_REPLACED",
-                        "filtered": 100,
-                        "attached_condition": "t1.a > 5"
-                      }
-                    }
-                  ]
-                }
-              }
-            }
-          }
-        }
      }
    ]
  }
@@ -8296,8 +8297,8 @@ a	b	min_c	a	b	max_c	avg_c
 explain select * from v4,v2 where
 (v4.a=v2.b) and (v4.a=v4.b) and (v2.b<30);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	PRIMARY	<derived2>	ALL	NULL	NULL	NULL	NULL	20	Using where
-1	PRIMARY	<derived4>	ref	key0	key0	5	v4.a	2	
+1	PRIMARY	<derived4>	ALL	NULL	NULL	NULL	NULL	20	Using where
+1	PRIMARY	<derived2>	ref	key0	key0	10	v2.b,v2.b	1	
 4	DERIVED	t1	ALL	NULL	NULL	NULL	NULL	20	Using where; Using temporary; Using filesort
 2	DERIVED	<derived3>	ALL	NULL	NULL	NULL	NULL	20	Using where; Using temporary; Using filesort
 3	DERIVED	t1	ALL	NULL	NULL	NULL	NULL	20	Using where; Using temporary; Using filesort
@@ -8311,13 +8312,53 @@ EXPLAIN
    "nested_loop": [
      {
        "table": {
-          "table_name": "<derived2>",
+          "table_name": "<derived4>",
          "access_type": "ALL",
          "loops": 1,
          "rows": 20,
          "cost": "COST_REPLACED",
          "filtered": 100,
-          "attached_condition": "v4.b = v4.a and v4.a < 30 and v4.a is not null",
+          "attached_condition": "v2.b < 30 and v2.b is not null and v2.b is not null",
+          "materialized": {
+            "query_block": {
+              "select_id": 4,
+              "cost": "COST_REPLACED",
+              "having_condition": "max_c < 707",
+              "filesort": {
+                "sort_key": "t1.a, t1.b",
+                "temporary_table": {
+                  "nested_loop": [
+                    {
+                      "table": {
+                        "table_name": "t1",
+                        "access_type": "ALL",
+                        "loops": 1,
+                        "rows": 20,
+                        "cost": "COST_REPLACED",
+                        "filtered": 100,
+                        "attached_condition": "t1.a > 5 and t1.b < 30"
+                      }
+                    }
+                  ]
+                }
+              }
+            }
+          }
+        }
+      },
+      {
+        "table": {
+          "table_name": "<derived2>",
+          "access_type": "ref",
+          "possible_keys": ["key0"],
+          "key": "key0",
+          "key_length": "10",
+          "used_key_parts": ["a", "b"],
+          "ref": ["v2.b", "v2.b"],
+          "loops": 20,
+          "rows": 1,
+          "cost": "COST_REPLACED",
+          "filtered": 100,
          "materialized": {
            "query_block": {
              "select_id": 2,
@@ -8368,46 +8409,6 @@ EXPLAIN
            }
          }
        }
-      },
-      {
-        "table": {
-          "table_name": "<derived4>",
-          "access_type": "ref",
-          "possible_keys": ["key0"],
-          "key": "key0",
-          "key_length": "5",
-          "used_key_parts": ["b"],
-          "ref": ["v4.a"],
-          "loops": 20,
-          "rows": 2,
-          "cost": "COST_REPLACED",
-          "filtered": 100,
-          "materialized": {
-            "query_block": {
-              "select_id": 4,
-              "cost": "COST_REPLACED",
-              "having_condition": "max_c < 707",
-              "filesort": {
-                "sort_key": "t1.a, t1.b",
-                "temporary_table": {
-                  "nested_loop": [
-                    {
-                      "table": {
-                        "table_name": "t1",
-                        "access_type": "ALL",
-                        "loops": 1,
-                        "rows": 20,
-                        "cost": "COST_REPLACED",
-                        "filtered": 100,
-                        "attached_condition": "t1.a > 5 and t1.b < 30"
-                      }
-                    }
-                  ]
-                }
-              }
-            }
-          }
-        }
      }
    ]
  }
@@ -8426,8 +8427,8 @@ a	b	min_c	a	b	max_c	avg_c
 explain select * from v4,v2 where
 (v4.a=v2.b) and (v4.a=v4.b) and ((v2.b<30) or (v4.a>2));
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	PRIMARY	<derived2>	ALL	NULL	NULL	NULL	NULL	20	Using where
-1	PRIMARY	<derived4>	ref	key0	key0	5	v4.a	2	
+1	PRIMARY	<derived4>	ALL	NULL	NULL	NULL	NULL	20	Using where
+1	PRIMARY	<derived2>	ref	key0	key0	10	v2.b,v2.b	1	
 4	DERIVED	t1	ALL	NULL	NULL	NULL	NULL	20	Using where; Using temporary; Using filesort
 2	DERIVED	<derived3>	ALL	NULL	NULL	NULL	NULL	20	Using where; Using temporary; Using filesort
 3	DERIVED	t1	ALL	NULL	NULL	NULL	NULL	20	Using where; Using temporary; Using filesort
@@ -8441,13 +8442,53 @@ EXPLAIN
    "nested_loop": [
      {
        "table": {
-          "table_name": "<derived2>",
+          "table_name": "<derived4>",
          "access_type": "ALL",
          "loops": 1,
          "rows": 20,
          "cost": "COST_REPLACED",
          "filtered": 100,
-          "attached_condition": "v4.b = v4.a and (v4.a < 30 or v4.a > 2) and v4.a is not null",
+          "attached_condition": "(v2.b < 30 or v2.b > 2) and v2.b is not null and v2.b is not null",
+          "materialized": {
+            "query_block": {
+              "select_id": 4,
+              "cost": "COST_REPLACED",
+              "having_condition": "max_c < 707",
+              "filesort": {
+                "sort_key": "t1.a, t1.b",
+                "temporary_table": {
+                  "nested_loop": [
+                    {
+                      "table": {
+                        "table_name": "t1",
+                        "access_type": "ALL",
+                        "loops": 1,
+                        "rows": 20,
+                        "cost": "COST_REPLACED",
+                        "filtered": 100,
+                        "attached_condition": "t1.a > 5 and (t1.b < 30 or t1.b > 2)"
+                      }
+                    }
+                  ]
+                }
+              }
+            }
+          }
+        }
+      },
+      {
+        "table": {
+          "table_name": "<derived2>",
+          "access_type": "ref",
+          "possible_keys": ["key0"],
+          "key": "key0",
+          "key_length": "10",
+          "used_key_parts": ["a", "b"],
+          "ref": ["v2.b", "v2.b"],
+          "loops": 20,
+          "rows": 1,
+          "cost": "COST_REPLACED",
+          "filtered": 100,
          "materialized": {
            "query_block": {
              "select_id": 2,
@@ -8498,46 +8539,6 @@ EXPLAIN
            }
          }
        }
-      },
-      {
-        "table": {
-          "table_name": "<derived4>",
-          "access_type": "ref",
-          "possible_keys": ["key0"],
-          "key": "key0",
-          "key_length": "5",
-          "used_key_parts": ["b"],
-          "ref": ["v4.a"],
-          "loops": 20,
-          "rows": 2,
-          "cost": "COST_REPLACED",
-          "filtered": 100,
-          "materialized": {
-            "query_block": {
-              "select_id": 4,
-              "cost": "COST_REPLACED",
-              "having_condition": "max_c < 707",
-              "filesort": {
-                "sort_key": "t1.a, t1.b",
-                "temporary_table": {
-                  "nested_loop": [
-                    {
-                      "table": {
-                        "table_name": "t1",
-                        "access_type": "ALL",
-                        "loops": 1,
-                        "rows": 20,
-                        "cost": "COST_REPLACED",
-                        "filtered": 100,
-                        "attached_condition": "t1.a > 5 and (t1.b < 30 or t1.b > 2)"
-                      }
-                    }
-                  ]
-                }
-              }
-            }
-          }
-        }
      }
    ]
  }
@@ -20148,7 +20149,7 @@ where t2.b < 40 and t2.a=t3.a and t3.c=t.c;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
 1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	90	60.00	Using where
 1	PRIMARY	t3	ref	idx_a	idx_a	5	test.t2.a	1	100.00	Using where
-1	PRIMARY	<derived2>	ref	key0	key0	128	test.t3.c	5	100.00	
+1	PRIMARY	<derived2>	ref	key0	key0	128	test.t3.c	1	100.00	
 2	DERIVED	t4	ALL	idx_c	NULL	NULL	NULL	160	100.00	Using temporary; Using filesort
 Warnings:
 Note	1003	/* select#1 */ select `test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b`,`test`.`t2`.`c` AS `c`,`t`.`c` AS `t_c`,`t`.`max` AS `max`,`t`.`min` AS `min` from `test`.`t2` join `test`.`t3` join (/* select#2 */ select `test`.`t4`.`c` AS `c`,max(`test`.`t4`.`b`) AS `max`,min(`test`.`t4`.`b`) AS `min` from `test`.`t4` group by `test`.`t4`.`c`) `t` where `test`.`t3`.`a` = `test`.`t2`.`a` and `t`.`c` = `test`.`t3`.`c` and `test`.`t2`.`b` < 40
@@ -20198,7 +20199,7 @@ EXPLAIN
          "used_key_parts": ["c"],
          "ref": ["test.t3.c"],
          "loops": 80.99999987,
-          "rows": 5,
+          "rows": 1,
          "cost": "COST_REPLACED",
          "filtered": 100,
          "materialized": {
@@ -21318,7 +21319,7 @@ id	a
 explain extended select id, a from t1 where  id in (select id from v1);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
 1	PRIMARY	t1	ALL	PRIMARY	NULL	NULL	NULL	20	100.00	
-1	PRIMARY	<derived3>	ref	key0	key0	4	test.t1.id	2	50.00	FirstMatch(t1)
+1	PRIMARY	<derived3>	ref	key0	key0	4	test.t1.id	1	100.00	FirstMatch(t1)
 3	DERIVED	t1	ALL	PRIMARY	NULL	NULL	NULL	20	100.00	Using temporary; Using filesort
 3	DERIVED	t2	ref	ro_id	ro_id	4	test.t1.id	1	100.00	Using where
 Warnings:
@@ -21356,7 +21357,7 @@ on (t1.id = t2.ro_id AND t2.flag = 1)
 group by t1.id) dt);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
 1	PRIMARY	t1	ALL	PRIMARY	NULL	NULL	NULL	20	100.00	
-1	PRIMARY	<derived3>	ref	key1,distinct_key	key1	4	test.t1.id	2	50.00	FirstMatch(t1)
+1	PRIMARY	<derived3>	ref	key1,distinct_key	key1	4	test.t1.id	1	100.00	FirstMatch(t1)
 3	DERIVED	t1	ALL	PRIMARY	NULL	NULL	NULL	20	100.00	Using temporary; Using filesort
 3	DERIVED	t2	ref	ro_id	ro_id	4	test.t1.id	1	100.00	Using where
 Warnings:
@@ -22025,7 +22026,7 @@ WHERE t1.id BETWEEN 200 AND 100000;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t3	range	t1_id	t1_id	5	NULL	47	Using where; Using index
 1	PRIMARY	t1	eq_ref	PRIMARY	PRIMARY	4	test.t3.t1_id	1	Using index
-1	PRIMARY	<derived2>	ref	key0	key0	5	test.t3.t1_id	10	
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t3.t1_id	1	
 2	DERIVED	t2	ALL	t1_id	NULL	NULL	NULL	2408	Using where; Using temporary; Using filesort
 set  optimizer_switch='split_materialized=default';
 DROP TABLE t1,t2,t3;
@@ -22302,7 +22303,7 @@ from_agg_items.ledger_id = charges.from_ledger_id
 WHERE charges.to_ledger_id = 2;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	charges	ref	PRIMARY,fk_charge_from_ledger,fk_charge_to_ledger	fk_charge_to_ledger	8	const	8	
-1	PRIMARY	<derived2>	ref	key0	key0	17	test.charges.from_ledger_id,test.charges.id	4	
+1	PRIMARY	<derived2>	ref	key0	key0	17	test.charges.from_ledger_id,test.charges.id	1	
 2	DERIVED	transaction_items	ALL	fk_items_transaction	NULL	NULL	NULL	40	Using temporary; Using filesort
 2	DERIVED	transactions	eq_ref	PRIMARY	PRIMARY	8	test.transaction_items.transaction_id	1	
 INSERT INTO charges (id, from_ledger_id, to_ledger_id, amount) VALUES
@@ -22507,7 +22508,7 @@ from_agg_items.ledger_id = charges.from_ledger_id
 WHERE charges.to_ledger_id = 2;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	charges	ref	fk_charge_to_ledger	fk_charge_to_ledger	8	const	10	
-1	PRIMARY	<derived2>	ref	key0	key0	18	test.charges.from_ledger_id,test.charges.id	4	
+1	PRIMARY	<derived2>	ref	key0	key0	18	test.charges.from_ledger_id,test.charges.id	1	
 2	DERIVED	transaction_items	ALL	fk_items_transaction	NULL	NULL	NULL	40	Using temporary; Using filesort
 2	DERIVED	transactions	eq_ref	PRIMARY	PRIMARY	8	test.transaction_items.transaction_id	1	
 set optimizer_switch='split_materialized=default';
@@ -22845,6 +22846,8 @@ INSERT INTO t1 VALUES
 (95,3290880,487,'2021-02-15 18:59:35'),(96,3290798,0,'2021-02-15 18:59:52'),
 (97,3290777,983,'2021-02-15 19:00:10'),(98,3290811,488,'2021-02-15 19:00:10'),
 (99,3290917,1283,'2021-02-15 19:00:36'),(100,3290858,482,'2021-02-15 19:00:42');
+insert into t1 select seq, 3300000+seq, 100+seq, '2021-02-09 18:31:35'
+from seq_101_to_1000;
 CREATE TABLE t2 (a int) ENGINE=MYISAM;
 INSERT INTO t2 VALUES
 (3289475),(3289496),(3289562),(3289593),(3289594),(3289595),(3289626),
@@ -22853,7 +22856,7 @@ INSERT INTO t2 VALUES
 ANALYZE TABLE t1,t2;
 Table	Op	Msg_type	Msg_text
 test.t1	analyze	status	Engine-independent statistics collected
-test.t1	analyze	status	Table is already up to date
+test.t1	analyze	status	OK
 test.t2	analyze	status	Engine-independent statistics collected
 test.t2	analyze	status	OK
 EXPLAIN SELECT t1.valdouble, t1.valint1
@@ -22865,9 +22868,9 @@ t1.valdate = dt.maxdate AND
 t1.valint1 IN (SELECT * FROM t2);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	21	Using where; Start temporary
-1	PRIMARY	t1	ref	valint1,valint1_2	valint1	5	test.t2.a	2	Using index condition; Using where; End temporary
+1	PRIMARY	t1	ref	valint1,valint1_2	valint1	5	test.t2.a	1	Using index condition; Using where; End temporary
 1	PRIMARY	<derived2>	ref	key0	key0	11	test.t1.valdate,test.t1.valint1	1	
-2	LATERAL DERIVED	t	ref	valint1,valint1_2	valint1	5	test.t2.a	2	Using index condition
+2	LATERAL DERIVED	t	ref	valint1,valint1_2	valint1	5	test.t2.a	1	Using index condition
 SELECT t1.valdouble, t1.valint1
 FROM t1,
 (SELECT max(t.valdate) AS maxdate, t.valint1 FROM t1 t GROUP BY t.valint1)
--- a/mysql-test/main/derived_cond_pushdown.test
+++ b/mysql-test/main/derived_cond_pushdown.test
@@ -4280,6 +4280,9 @@ INSERT INTO t1 VALUES
 (97,3290777,983,'2021-02-15 19:00:10'),(98,3290811,488,'2021-02-15 19:00:10'),
 (99,3290917,1283,'2021-02-15 19:00:36'),(100,3290858,482,'2021-02-15 19:00:42');

+insert into t1 select seq, 3300000+seq, 100+seq, '2021-02-09 18:31:35'
+from seq_101_to_1000;
+
 CREATE TABLE t2 (a int) ENGINE=MYISAM;
 INSERT INTO t2 VALUES
 (3289475),(3289496),(3289562),(3289593),(3289594),(3289595),(3289626),
--- a/mysql-test/main/derived_opt.result
+++ b/mysql-test/main/derived_opt.result
@@ -566,4 +566,501 @@ DROP TABLE t1, t2;
 #
 # End of 10.3 tests
 #
+#
+# MDEV-36321 keys generated on derived tables produce wrong out_rows estimates
+#
+create table t1 (
+grp_id int,
+value int,
+index (grp_id)
+);
+insert into t1 select
+A.seq, B.seq
+from
+seq_1_to_100 A,
+seq_1_to_100 B;
+create table t2 (a int);
+insert into t2 select seq from seq_1_to_5;
+create table t3 (b int);
+insert into t3 select seq from seq_1_to_5;
+analyze table t1,t2;
+Table	Op	Msg_type	Msg_text
+test.t1	analyze	status	Engine-independent statistics collected
+test.t1	analyze	status	Table is already up to date
+test.t2	analyze	status	Engine-independent statistics collected
+test.t2	analyze	status	OK
+SET optimizer_trace='enabled=on';
+SET optimizer_trace_max_mem_size=10485760;
+select * from
+t2,
+(select max(value), grp_id from t1 group by grp_id) DT
+where
+t2.a= DT.grp_id;
+a	max(value)	grp_id
+1	100	1
+2	100	2
+3	100	3
+4	100	4
+5	100	5
+select
+json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t
+from information_schema.optimizer_trace;
+t
+[
+    {
+        "table_alias": "DT",
+        "key_name": "key0",
+        "key_parts": 1,
+        "select": 
+        ["group_list_in_key"],
+        "rec_per_key_estimate": 1
+    }
+]
+# Same as above, but try a UNION:
+select * from
+t2,
+(select max(value), grp_id from t1 group by grp_id
+union all
+select max(value), grp_id from t1 group by grp_id) DT
+where
+t2.a= DT.grp_id;
+a	max(value)	grp_id
+1	100	1
+1	100	1
+2	100	2
+2	100	2
+3	100	3
+3	100	3
+4	100	4
+4	100	4
+5	100	5
+5	100	5
+select
+json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t
+from information_schema.optimizer_trace;
+t
+[
+    {
+        "table_alias": "DT",
+        "key_name": "key0",
+        "key_parts": 1,
+        "select": 
+        [
+            "group_list_in_key",
+            "group_list_in_key"
+        ],
+        "rec_per_key_estimate": 2
+    }
+]
+# Same as the previous query but unhandled group by expression
+explain
+select * from
+t2,
+(select max(value), grp_id from t1 group by grp_id
+union all
+select max(value), grp_id from t1 group by MOD(grp_id,2)) DT
+where
+t2.a= DT.grp_id;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	5	Using where
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.a	101	
+2	DERIVED	t1	ALL	NULL	NULL	NULL	NULL	10000	Using temporary; Using filesort
+3	UNION	t1	ALL	NULL	NULL	NULL	NULL	10000	Using temporary; Using filesort
+select
+json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t
+from information_schema.optimizer_trace;
+t
+[
+    {
+        "table_alias": "DT",
+        "key_name": "key0",
+        "key_parts": 1,
+        "select": 
+        [
+            "group_list_in_key",
+            "unhandled query"
+        ]
+    }
+]
+# view/cte/derived merged inside our derived table
+create view v1 as select * from t1;
+explain
+select * from
+t2,
+(select grp_id, max(value) as maxval from v1 group by grp_id) DT
+where
+t2.a= DT.grp_id;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	5	Using where
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.a	1	
+2	DERIVED	t1	ALL	grp_id	NULL	NULL	NULL	10000	Using temporary; Using filesort
+drop view v1;
+explain
+with cte1 as (select * from t1)
+select * from
+t2,
+(select grp_id, max(value) as maxval from cte1 group by grp_id) DT
+where
+t2.a= DT.grp_id;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	5	Using where
+1	PRIMARY	<derived3>	ref	key0	key0	5	test.t2.a	1	
+3	DERIVED	t1	ALL	grp_id	NULL	NULL	NULL	10000	Using temporary; Using filesort
+explain
+select * from
+t2,
+(
+select grp_id, max(value) as maxval from
+(
+select * from t1, t3
+where t1.grp_id = t3.b
+) dt1
+group by grp_id
+) DT
+where
+t2.a= DT.grp_id;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	5	Using where
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.a	1	
+2	DERIVED	t3	ALL	NULL	NULL	NULL	NULL	5	Using where; Using temporary; Using filesort
+2	DERIVED	t1	ref	grp_id	grp_id	5	test.t3.b	100	
+# Example with equalities on GROUP BY columns and other columns
+# Must produce {table=<derived2>, ref=test.t2.col2,test.t2.a, rows=1}
+alter table t2 add col2 int;
+explain
+select * from
+t2,
+(select max(value) as maxval, grp_id from t1 group by grp_id) DT
+where
+t2.col2=maxval and
+t2.a= DT.grp_id;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	5	Using where
+1	PRIMARY	<derived2>	ref	key0	key0	10	test.t2.col2,test.t2.a	1	
+2	DERIVED	t1	ALL	grp_id	NULL	NULL	NULL	10000	Using temporary; Using filesort
+select
+json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t
+from information_schema.optimizer_trace;
+t
+[
+    {
+        "table_alias": "DT",
+        "key_name": "key0",
+        "key_parts": 2,
+        "select": 
+        ["group_list_in_key"],
+        "rec_per_key_estimate": 1
+    }
+]
+explain
+select * from
+t2,
+(select grp_id, max(value) as maxval from t1 group by grp_id) DT
+where
+t2.col2=maxval and
+t2.a= DT.grp_id;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	5	Using where
+1	PRIMARY	<derived2>	ref	key0	key0	10	test.t2.a,test.t2.col2	1	
+2	DERIVED	t1	ALL	grp_id	NULL	NULL	NULL	10000	Using temporary; Using filesort
+delete from t1;
+insert into t1 select 1, a.seq from seq_1_to_10 a;
+analyze table t1;
+Table	Op	Msg_type	Msg_text
+test.t1	analyze	status	Engine-independent statistics collected
+test.t1	analyze	status	Table is already up to date
+explain
+select * from
+t2,
+(select distinct grp_id from t1
+union all
+select distinct value from t1) DT
+where
+t2.a= DT.grp_id;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	5	Using where
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.a	2	
+2	DERIVED	t1	range	NULL	grp_id	5	NULL	2	Using index for group-by
+3	UNION	t1	ALL	NULL	NULL	NULL	NULL	10	Using temporary
+select
+json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t
+from information_schema.optimizer_trace;
+t
+[
+    {
+        "table_alias": "DT",
+        "key_name": "key0",
+        "key_parts": 1,
+        "select": 
+        [
+            "distinct_in_query_block",
+            "distinct_in_query_block"
+        ],
+        "rec_per_key_estimate": 2
+    }
+]
+drop table t1, t2;
+create table t1
+(
+a int not null,
+b int,
+c int,
+d int,
+amount decimal,
+key t1_ix1 (a,b)
+);
+# More complex examples
+insert into t1 values (0, NULL, 0, NULL, 10.0000), (1, 1, 1, 1, 10.0000),
+(2, 2, 2, 2, 20.0000), (3, 3, 3, 3, 30.0000), (4, 4, 4, 4, 40.0000),
+(5, 5, 5, 5, NULL), (6, 6, 6, 6, NULL), (7, 7, 7, 7, 70.0000),
+(8, 8, 8, 8, 80.0000);
+create table t2
+(
+a int NOT NULL,
+b int,
+name varchar(50),
+key t2_ix1 (a,b)
+) engine = innodb;
+insert into t2 values (0, NULL, 'a'), (1, NULL, 'A'), (2, 2, 'B'), (3,3, 'C'),
+(4,4, 'D'), (5,5, NULL), (6,6, NULL), (7,7, 'E'), (8,8, 'F'), (9,9, 'G'),
+(10,10,'H'), (11,11, NULL), (12,12, NULL);
+drop table t3;
+create table t3
+(
+a int not null,
+b int,
+description varchar(50),
+key t3_ix1 (a,b)
+) engine = innodb;
+insert into t3 values (1, 1, 'bar'),(2,2,'buz'), (3,3, 'silver');
+insert into t3 select seq, seq, 'junk' from seq_3_to_13;
+create table t4
+(
+c int not null,
+d int,
+descr varchar(50),
+key t4_ix1 (c,d)
+) engine = innodb;
+insert into t4 values (1, 1, 'iron'), (2,2,'aluminium'), (3,3, 'silver');
+insert into t4 select seq, seq, 'junk' from seq_3_to_13;
+# split materialized
+explain
+select * from t1
+join t2 on t1.a = t2.a and t1.b = t2.b
+join
+(
+select a, b, description from t3 group by a, b
+) dt on dt.a = t1.a and dt.b = t1.b and dt.b = t2.b
+where dt.a < 1;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t1	range	t1_ix1	t1_ix1	4	NULL	1	Using index condition
+1	PRIMARY	t2	ref	t2_ix1	t2_ix1	9	test.t1.a,test.t1.b	1	
+1	PRIMARY	<derived2>	ref	key0	key0	9	test.t1.a,test.t1.b	1	
+2	LATERAL DERIVED	t3	ref	t3_ix1	t3_ix1	9	test.t1.a,test.t1.b	1	Using index condition
+select
+json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t
+from information_schema.optimizer_trace;
+t
+[
+    {
+        "table_alias": "dt",
+        "key_name": "key0",
+        "key_parts": 2,
+        "select": 
+        ["group_list_in_key"],
+        "rec_per_key_estimate": 1
+    },
+    {
+        "table_alias": "dt",
+        "key_name": "key1",
+        "key_parts": 2,
+        "select": 
+        ["group_list_in_key"],
+        "rec_per_key_estimate": 1
+    }
+]
+# union with distinct rows
+explain
+select * from t1
+join t2 on t1.a = t2.a and t1.b = t2.b
+join
+(
+select a, b, description from t3 group by a, b
+union
+select c, d, descr from t4 group by c, d
+) dt on dt.a = t1.a and dt.b = t1.b and dt.b = t2.b
+where dt.a < 1;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t1	range	t1_ix1	t1_ix1	4	NULL	1	Using index condition
+1	PRIMARY	t2	ref	t2_ix1	t2_ix1	9	test.t1.a,test.t1.b	1	
+1	PRIMARY	<derived2>	ref	key1,distinct_key	key1	9	test.t1.a,test.t1.b	2	
+2	DERIVED	t3	range	t3_ix1	t3_ix1	4	NULL	1	Using index condition
+3	UNION	t4	range	t4_ix1	t4_ix1	4	NULL	1	Using index condition
+NULL	UNION RESULT	<union2,3>	ALL	NULL	NULL	NULL	NULL	NULL	
+select
+json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t
+from information_schema.optimizer_trace;
+t
+[
+    {
+        "table_alias": "dt",
+        "key_name": "key1",
+        "key_parts": 2,
+        "select": 
+        [
+            "group_list_in_key",
+            "group_list_in_key"
+        ],
+        "rec_per_key_estimate": 2
+    },
+    {
+        "table_alias": "dt",
+        "key_name": "key2",
+        "key_parts": 2,
+        "select": 
+        [
+            "group_list_in_key",
+            "group_list_in_key"
+        ],
+        "rec_per_key_estimate": 2
+    }
+]
+# union without distinct rows
+explain
+select * from t1
+join t2 on t1.a = t2.a and t1.b = t2.b
+join
+(
+select a, b, description from t3 group by a, b
+union all
+select c, d, descr from t4 group by c, d
+) dt on dt.a = t1.a and dt.b = t1.b and dt.b = t2.b
+where dt.a < 1;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t1	range	t1_ix1	t1_ix1	4	NULL	1	Using index condition
+1	PRIMARY	t2	ref	t2_ix1	t2_ix1	9	test.t1.a,test.t1.b	1	
+1	PRIMARY	<derived2>	ref	key0	key0	9	test.t1.a,test.t1.b	2	
+2	DERIVED	t3	range	t3_ix1	t3_ix1	4	NULL	1	Using index condition
+3	UNION	t4	range	t4_ix1	t4_ix1	4	NULL	1	Using index condition
+select
+json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t
+from information_schema.optimizer_trace;
+t
+[
+    {
+        "table_alias": "dt",
+        "key_name": "key0",
+        "key_parts": 2,
+        "select": 
+        [
+            "group_list_in_key",
+            "group_list_in_key"
+        ],
+        "rec_per_key_estimate": 2
+    },
+    {
+        "table_alias": "dt",
+        "key_name": "key1",
+        "key_parts": 2,
+        "select": 
+        [
+            "group_list_in_key",
+            "group_list_in_key"
+        ],
+        "rec_per_key_estimate": 2
+    }
+]
+# union without distinct rows with simple non grouping 2nd select
+explain
+select * from t1
+join t2 on t1.a = t2.a and t1.b = t2.b
+join
+(
+select a, b, description from t3 group by a, b
+union all
+select c, d, descr from t4
+) dt on dt.a = t1.a and dt.b = t1.b and dt.b = t2.b
+where dt.a < 1;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t1	range	t1_ix1	t1_ix1	4	NULL	1	Using index condition
+1	PRIMARY	t2	ref	t2_ix1	t2_ix1	9	test.t1.a,test.t1.b	1	
+1	PRIMARY	<derived2>	ref	key0	key0	9	test.t1.a,test.t1.b	1	
+2	DERIVED	t3	range	t3_ix1	t3_ix1	4	NULL	1	Using index condition
+3	UNION	t4	range	t4_ix1	t4_ix1	4	NULL	1	Using index condition
+select
+json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t
+from information_schema.optimizer_trace;
+t
+[
+    {
+        "table_alias": "dt",
+        "key_name": "key0",
+        "key_parts": 2,
+        "select": 
+        [
+            "group_list_in_key",
+            "unhandled query"
+        ]
+    },
+    {
+        "table_alias": "dt",
+        "key_name": "key1",
+        "key_parts": 2,
+        "select": 
+        [
+            "group_list_in_key",
+            "unhandled query"
+        ]
+    }
+]
+# intersect
+explain
+select * from t1
+join t2 on t1.a = t2.a and t1.b = t2.b
+join
+(
+select a, b, description from t3 group by a, b
+intersect
+select c, d, descr from t4 group by c, d
+) dt on dt.a = t1.a and dt.b = t1.b and dt.b = t2.b
+where dt.a > 2 and dt.a < 4;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t1	range	t1_ix1	t1_ix1	4	NULL	1	Using index condition
+1	PRIMARY	t2	ref	t2_ix1	t2_ix1	9	test.t1.a,test.t1.b	1	
+1	PRIMARY	<derived2>	ref	key1,distinct_key	key1	9	test.t1.a,test.t1.b	2	
+2	DERIVED	t3	range	t3_ix1	t3_ix1	4	NULL	2	Using index condition
+3	INTERSECT	t4	range	t4_ix1	t4_ix1	4	NULL	2	Using index condition
+NULL	INTERSECT RESULT	<intersect2,3>	ALL	NULL	NULL	NULL	NULL	NULL	
+select
+json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t
+from information_schema.optimizer_trace;
+t
+[
+    {
+        "table_alias": "dt",
+        "key_name": "key1",
+        "key_parts": 2,
+        "select": 
+        [
+            "group_list_in_key",
+            "group_list_in_key"
+        ],
+        "rec_per_key_estimate": 2
+    },
+    {
+        "table_alias": "dt",
+        "key_name": "key2",
+        "key_parts": 2,
+        "select": 
+        [
+            "group_list_in_key",
+            "group_list_in_key"
+        ],
+        "rec_per_key_estimate": 2
+    }
+]
+drop table t1, t2, t3, t4;
+#
+# End of 11.4 tests
+#
 set optimizer_switch=@exit_optimizer_switch;
--- a/mysql-test/main/derived_opt.test
+++ b/mysql-test/main/derived_opt.test
@@ -1,4 +1,6 @@
 # Initialize
+--source include/not_embedded.inc
+--source include/have_innodb.inc
 --disable_warnings
 drop table if exists t0,t1,t2,t3;
 drop database if exists test1;
@@ -439,5 +441,277 @@ DROP TABLE t1, t2;
 --echo # End of 10.3 tests
 --echo #

+--echo #
+--echo # MDEV-36321 keys generated on derived tables produce wrong out_rows estimates
+--echo #
+--source include/have_sequence.inc
+
+create table t1 (
+  grp_id int,
+  value int,
+  index (grp_id)
+);
+
+insert into t1 select
+  A.seq, B.seq
+from
+  seq_1_to_100 A,
+  seq_1_to_100 B;
+
+create table t2 (a int);
+insert into t2 select seq from seq_1_to_5;
+
+create table t3 (b int);
+insert into t3 select seq from seq_1_to_5;
+
+analyze table t1,t2;
+
+SET optimizer_trace='enabled=on';
+SET optimizer_trace_max_mem_size=10485760;
+
+select * from
+  t2,
+  (select max(value), grp_id from t1 group by grp_id) DT
+where
+  t2.a= DT.grp_id;
+
+select
+  json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t
+from information_schema.optimizer_trace;
+
+
+--echo # Same as above, but try a UNION:
+select * from
+  t2,
+  (select max(value), grp_id from t1 group by grp_id
+   union all
+   select max(value), grp_id from t1 group by grp_id) DT
+where
+  t2.a= DT.grp_id;
+select
+  json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t
+from information_schema.optimizer_trace;
+
+--echo # Same as the previous query but unhandled group by expression
+explain
+select * from
+  t2,
+  (select max(value), grp_id from t1 group by grp_id
+   union all
+   select max(value), grp_id from t1 group by MOD(grp_id,2)) DT
+where
+  t2.a= DT.grp_id;
+select
+  json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t
+from information_schema.optimizer_trace;
+
+--echo # view/cte/derived merged inside our derived table
+
+create view v1 as select * from t1;
+
+explain
+select * from
+  t2,
+  (select grp_id, max(value) as maxval from v1 group by grp_id) DT
+where
+  t2.a= DT.grp_id;
+
+drop view v1;
+
+explain
+with cte1 as (select * from t1)
+select * from
+  t2,
+  (select grp_id, max(value) as maxval from cte1 group by grp_id) DT
+where
+  t2.a= DT.grp_id;
+
+explain
+select * from
+  t2,
+  (
+    select grp_id, max(value) as maxval from
+    (
+      select * from t1, t3
+        where t1.grp_id = t3.b
+    ) dt1
+    group by grp_id
+  ) DT
+where
+  t2.a= DT.grp_id;
+
+--echo # Example with equalities on GROUP BY columns and other columns
+--echo # Must produce {table=<derived2>, ref=test.t2.col2,test.t2.a, rows=1}
+alter table t2 add col2 int;
+explain
+select * from
+  t2,
+  (select max(value) as maxval, grp_id from t1 group by grp_id) DT
+where
+  t2.col2=maxval and
+  t2.a= DT.grp_id;
+select
+  json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t
+from information_schema.optimizer_trace;
+
+# Same as above but swap the column order.
+#  Now we'll get {table=<derived2>, ref=test.t2.a,test.t2.col2, rows=1}
+explain
+select * from
+  t2,
+  (select grp_id, max(value) as maxval from t1 group by grp_id) DT
+where
+  t2.col2=maxval and
+  t2.a= DT.grp_id;
+
+
+delete from t1;
+insert into t1 select 1, a.seq from seq_1_to_10 a;
+
+analyze table t1;
+
+explain
+select * from
+  t2,
+  (select distinct grp_id from t1
+   union all
+   select distinct value from t1) DT
+where
+  t2.a= DT.grp_id;
+select
+  json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t
+from information_schema.optimizer_trace;
+
+drop table t1, t2;
+
+create table t1
+(
+  a int not null,
+  b int,
+  c int,
+  d int,
+  amount decimal,
+  key t1_ix1 (a,b)
+);
+
+--echo # More complex examples
+insert into t1 values (0, NULL, 0, NULL, 10.0000), (1, 1, 1, 1, 10.0000),
+(2, 2, 2, 2, 20.0000), (3, 3, 3, 3, 30.0000), (4, 4, 4, 4, 40.0000),
+(5, 5, 5, 5, NULL), (6, 6, 6, 6, NULL), (7, 7, 7, 7, 70.0000),
+(8, 8, 8, 8, 80.0000);
+
+create table t2
+(
+  a int NOT NULL,
+  b int,
+  name varchar(50),
+  key t2_ix1 (a,b)
+) engine = innodb;
+
+insert into t2 values (0, NULL, 'a'), (1, NULL, 'A'), (2, 2, 'B'), (3,3, 'C'),
+(4,4, 'D'), (5,5, NULL), (6,6, NULL), (7,7, 'E'), (8,8, 'F'), (9,9, 'G'),
+(10,10,'H'), (11,11, NULL), (12,12, NULL);
+
+drop table t3;
+
+create table t3
+(
+  a int not null,
+  b int,
+  description varchar(50),
+  key t3_ix1 (a,b)
+) engine = innodb;
+insert into t3 values (1, 1, 'bar'),(2,2,'buz'), (3,3, 'silver');
+insert into t3 select seq, seq, 'junk' from seq_3_to_13;
+
+create table t4
+(
+  c int not null,
+  d int,
+  descr varchar(50),
+  key t4_ix1 (c,d)
+) engine = innodb;
+insert into t4 values (1, 1, 'iron'), (2,2,'aluminium'), (3,3, 'silver');
+insert into t4 select seq, seq, 'junk' from seq_3_to_13;
+
+--echo # split materialized
+explain
+select * from t1
+join t2 on t1.a = t2.a and t1.b = t2.b
+join
+(
+  select a, b, description from t3 group by a, b
+) dt on dt.a = t1.a and dt.b = t1.b and dt.b = t2.b
+where dt.a < 1;
+select
+  json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t
+from information_schema.optimizer_trace;
+
+--echo # union with distinct rows
+explain
+select * from t1
+join t2 on t1.a = t2.a and t1.b = t2.b
+join
+(
+  select a, b, description from t3 group by a, b
+  union
+  select c, d, descr from t4 group by c, d
+) dt on dt.a = t1.a and dt.b = t1.b and dt.b = t2.b
+where dt.a < 1;
+select
+  json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t
+from information_schema.optimizer_trace;
+
+--echo # union without distinct rows
+explain
+select * from t1
+join t2 on t1.a = t2.a and t1.b = t2.b
+join
+(
+  select a, b, description from t3 group by a, b
+  union all
+  select c, d, descr from t4 group by c, d
+) dt on dt.a = t1.a and dt.b = t1.b and dt.b = t2.b
+where dt.a < 1;
+select
+  json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t
+from information_schema.optimizer_trace;
+
+--echo # union without distinct rows with simple non grouping 2nd select
+explain
+select * from t1
+join t2 on t1.a = t2.a and t1.b = t2.b
+join
+(
+  select a, b, description from t3 group by a, b
+  union all
+  select c, d, descr from t4
+) dt on dt.a = t1.a and dt.b = t1.b and dt.b = t2.b
+where dt.a < 1;
+select
+  json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t
+from information_schema.optimizer_trace;
+
+--echo # intersect
+explain
+select * from t1
+join t2 on t1.a = t2.a and t1.b = t2.b
+join
+(
+  select a, b, description from t3 group by a, b
+  intersect
+  select c, d, descr from t4 group by c, d
+) dt on dt.a = t1.a and dt.b = t1.b and dt.b = t2.b
+where dt.a > 2 and dt.a < 4;
+select
+  json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t
+from information_schema.optimizer_trace;
+
+drop table t1, t2, t3, t4;
+
+--echo #
+--echo # End of 11.4 tests
+--echo #
+
 # The following command must be the last one the file 
 set optimizer_switch=@exit_optimizer_switch;
--- a/mysql-test/main/derived_split_innodb.result
+++ b/mysql-test/main/derived_split_innodb.result
@@ -283,7 +283,7 @@ on t3.a=t.a and t3.c=t.c
 where t3.b > 15;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t3	range	idx_b	idx_b	5	NULL	2	Using index condition; Using where
-1	PRIMARY	<derived2>	ref	key0	key0	133	test.t3.a,test.t3.c	2	
+1	PRIMARY	<derived2>	ref	key0	key0	133	test.t3.a,test.t3.c	1	
 2	DERIVED	t4	ALL	NULL	NULL	NULL	NULL	40	Using filesort
 drop table t3, t4;
 # End of 10.3 tests
@@ -348,7 +348,7 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	5	
 1	PRIMARY	t2	ref	a	a	5	test.t1.b	2	Using where
 1	PRIMARY	t3	ref	a	a	5	test.t1.b	3	Using where
-1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.b	10	Using where
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.b	1	Using where
 2	LATERAL DERIVED	t10	ref	grp_id	grp_id	5	test.t1.b	100	
 2	LATERAL DERIVED	t11	ALL	NULL	NULL	NULL	NULL	10	Using where; Using join buffer (flat, BNL join)
 # The important part in the below output is:
@@ -449,7 +449,7 @@ ANALYZE
          "ref": ["test.t1.b"],
          "loops": 30,
          "r_loops": 30,
-          "rows": 10,
+          "rows": 1,
          "r_rows": 1,
          "cost": "REPLACED",
          "r_table_time_ms": "REPLACED",
@@ -546,7 +546,7 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	5	
 1	PRIMARY	t2	ref	a	a	5	test.t1.b	2	Using where
 1	PRIMARY	t3	ref	a	a	5	test.t1.b	3	Using where
-1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.b	10	Using where
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.b	1	Using where
 2	LATERAL DERIVED	t10	ref	grp_id	grp_id	5	test.t1.b	100	
 2	LATERAL DERIVED	t11	ALL	NULL	NULL	NULL	NULL	10	Using where; Using join buffer (flat, BNL join)
 explain
@@ -568,7 +568,7 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	5	
 1	PRIMARY	t2	ref	a	a	5	test.t1.b	2	Using where
 1	PRIMARY	t3	ref	a	a	5	test.t1.b	3	Using where
-1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.b	10	Using where
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.b	1	Using where
 2	LATERAL DERIVED	t22	const	PRIMARY	PRIMARY	4	const	1	Using index
 2	LATERAL DERIVED	t10	ref	grp_id	grp_id	5	test.t1.b	100	
 2	LATERAL DERIVED	t11	ALL	NULL	NULL	NULL	NULL	10	Using where; Using join buffer (flat, BNL join)
@@ -596,7 +596,7 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t5	eq_ref	PRIMARY	PRIMARY	4	test.t1.b	1	Using index
 1	PRIMARY	t2	ref	a	a	5	test.t1.b	2	
 1	PRIMARY	t3	ref	a	a	5	test.t1.b	3	
-1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.b	10	Using where
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.b	1	Using where
 2	LATERAL DERIVED	t22	const	PRIMARY	PRIMARY	4	const	1	Using index
 2	LATERAL DERIVED	t10	ref	grp_id	grp_id	5	test.t5.pk	100	Using index condition
 2	LATERAL DERIVED	t11	ALL	NULL	NULL	NULL	NULL	10	Using where; Using join buffer (flat, BNL join)
@@ -660,7 +660,7 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	5	
 1	PRIMARY	t2	ref	a	a	5	test.t1.b	2	Using where
 1	PRIMARY	t3	ref	a	a	5	test.t1.b	3	Using where
-1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.b	10	Using where
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.b	1	Using where
 2	LATERAL DERIVED	t10	ref	grp_id	grp_id	5	test.t1.b	100	
 2	LATERAL DERIVED	t11	ALL	NULL	NULL	NULL	NULL	10	Using where; Using join buffer (flat, BNL join)
 select *
@@ -737,7 +737,7 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	5	
 1	PRIMARY	t2	ref	a	a	5	test.t1.b	2	Using where
 1	PRIMARY	t3	ref	a	a	5	test.t1.b	3	Using where
-1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.b	10	Using where
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.b	1	Using where
 2	LATERAL DERIVED	t10	ref	grp_id	grp_id	5	test.t1.b	100	
 2	LATERAL DERIVED	t11	hash_ALL	NULL	#hash#$hj	5	test.t10.col1	100	Using where; Using join buffer (flat, BNLH join)
 select *
@@ -815,7 +815,7 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	5	
 1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	50	Using where; Using join buffer (flat, BNL join)
 1	PRIMARY	t3	ALL	NULL	NULL	NULL	NULL	15	Using where; Using join buffer (incremental, BNL join)
-1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.b	10	Using where
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.b	1	Using where
 2	DERIVED	t10	ALL	grp_id	NULL	NULL	NULL	10000	Using temporary; Using filesort
 2	DERIVED	t11	ALL	NULL	NULL	NULL	NULL	10	Using where; Using join buffer (flat, BNL join)
 select *
@@ -997,7 +997,7 @@ T.grp_id=v1.COL10;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	2	
 1	PRIMARY	t2	ref	a	a	5	test.t1.a	1	Using where; Using index
-1	PRIMARY	<derived2>	ref	key0	key0	5	func	10	Using where
+1	PRIMARY	<derived2>	ref	key0	key0	5	func	1	Using where
 2	DERIVED	t10	index	grp_id	grp_id	5	NULL	10000	Using index; Using temporary; Using filesort
 drop table t1,t2, t10;
 drop view v1;
--- a/mysql-test/main/derived_view.result
+++ b/mysql-test/main/derived_view.result
@@ -1242,7 +1242,7 @@ SELECT * FROM t1, t2, v1 WHERE t2.a=t1.a AND t2.a=v1.a AND t2.a=v1.b;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t1	system	NULL	NULL	NULL	NULL	1	
 1	PRIMARY	t2	ref	a	a	4	const	1	Using index
-1	PRIMARY	<derived2>	ref	key0	key0	8	const,const	1	
+1	PRIMARY	<derived2>	ref	key1	key1	8	func,func	1	
 2	DERIVED	t3	ALL	NULL	NULL	NULL	NULL	12	Using temporary; Using filesort
 SELECT * FROM t1, t2, v1 WHERE t2.a=t1.a AND t2.a=v1.a AND t2.a=v1.b;
 a	a	a	b
@@ -2431,7 +2431,7 @@ GROUP BY TABLE_SCHEMA) AS UNIQUES
 ON ( COLUMNS.TABLE_SCHEMA = UNIQUES.TABLE_SCHEMA);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	COLUMNS	ALL	NULL	NULL	NULL	NULL	NULL	Open_frm_only; Scanned all databases
-1	PRIMARY	<derived2>	ref	key0	key0	194	information_schema.COLUMNS.TABLE_SCHEMA	10	
+1	PRIMARY	<derived2>	ref	key0	key0	194	information_schema.COLUMNS.TABLE_SCHEMA	1	
 2	DERIVED	STATISTICS	ALL	NULL	NULL	NULL	NULL	NULL	Open_frm_only; Scanned all databases; Using filesort
 SELECT COUNT(*) > 0
 FROM INFORMATION_SCHEMA.COLUMNS
--- a/mysql-test/main/opt_hints_split_materialized.result
+++ b/mysql-test/main/opt_hints_split_materialized.result
@@ -375,7 +375,7 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	5	
 1	PRIMARY	t2	ref	a	a	5	test.t1.b	2	Using where
 1	PRIMARY	t3	ref	a	a	5	test.t1.b	3	Using where
-1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.b	10	Using where
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.b	1	Using where
 2	LATERAL DERIVED	t22	const	PRIMARY	PRIMARY	4	const	1	Using index
 2	LATERAL DERIVED	t10	ref	grp_id	grp_id	5	test.t1.b	100	
 2	LATERAL DERIVED	t11	ALL	NULL	NULL	NULL	NULL	10	Using where; Using join buffer (flat, BNL join)
@@ -443,7 +443,7 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	5	
 1	PRIMARY	t2	ref	a	a	5	test.t1.b	2	Using where
 1	PRIMARY	t3	ref	a	a	5	test.t1.b	3	Using where
-1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.b	10	Using where
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.b	1	Using where
 2	DERIVED	t22	const	PRIMARY	PRIMARY	4	const	1	Using index; Using temporary; Using filesort
 2	DERIVED	t10	ALL	NULL	NULL	NULL	NULL	10000	
 2	DERIVED	t11	ALL	NULL	NULL	NULL	NULL	10	Using where; Using join buffer (flat, BNL join)
@@ -513,7 +513,7 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	5	
 1	PRIMARY	t2	ref	a	a	5	test.t1.b	2	Using where
 1	PRIMARY	t3	ref	a	a	5	test.t1.b	3	Using where
-1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.b	10	Using where
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.b	1	Using where
 2	DERIVED	t22	const	PRIMARY	PRIMARY	4	const	1	Using index; Using temporary; Using filesort
 2	DERIVED	t10	ALL	NULL	NULL	NULL	NULL	10000	
 2	DERIVED	t11	ALL	NULL	NULL	NULL	NULL	10	Using where; Using join buffer (flat, BNL join)
@@ -581,7 +581,7 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	5	
 1	PRIMARY	t2	ref	a	a	5	test.t1.b	2	Using where
 1	PRIMARY	t3	ref	a	a	5	test.t1.b	3	Using where
-1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.b	10	Using where
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.b	1	Using where
 2	LATERAL DERIVED	t22	const	PRIMARY	PRIMARY	4	const	1	Using index
 2	LATERAL DERIVED	t10	ref	grp_id	grp_id	5	test.t1.b	100	
 2	LATERAL DERIVED	t11	ALL	NULL	NULL	NULL	NULL	10	Using where; Using join buffer (flat, BNL join)
@@ -762,7 +762,7 @@ from
 one_k T1, (select grp, count(*) from t1000 group by grp) TBL where TBL.grp=T1.a;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	T1	ALL	NULL	NULL	NULL	NULL	1000	Using where
-1	PRIMARY	<derived2>	ref	key0	key0	5	test.T1.a	10	
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.T1.a	1	
 2	DERIVED	t1000	index	grp	grp	5	NULL	1000	Using index; Using temporary; Using filesort
 explain
 select /*+ SPLIT_MATERIALIZED(TBL) */ *
--- a/mysql-test/main/opt_trace.result
+++ b/mysql-test/main/opt_trace.result
@@ -12129,7 +12129,7 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	5	
 1	PRIMARY	t2	ref	a	a	5	test.t1.b	2	Using where
 1	PRIMARY	t3	ref	a	a	5	test.t1.b	3	Using where
-1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.b	10	Using where
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.b	1	Using where
 2	LATERAL DERIVED	t10	ref	grp_id	grp_id	5	test.t1.b	100	
 2	LATERAL DERIVED	t11	ALL	NULL	NULL	NULL	NULL	10	Using where; Using join buffer (flat, BNL join)
 select json_detailed(json_extract(trace, '$**.check_split_materialized')) as JS
--- a/sql/opt_group_by_cardinality.cc
+++ b/sql/opt_group_by_cardinality.cc
@@ -17,8 +17,13 @@
 /**
  @file

-    Contains estimate_post_group_cardinality() which estimates cardinality
-    after GROUP BY operation is applied.
+    Contains
+    - estimate_post_group_cardinality() which estimates cardinality
+      after GROUP BY operation is applied.
+
+    - infer_derived_key_statistics() to infer index statistics for
+      potential indexes on derived tables that have data produced with
+      a GROUP BY operation.
 */

 #include "mariadb.h"
@@ -26,6 +31,8 @@
 #include "sql_select.h"
 #include "sql_statistics.h"
 #include "opt_trace.h"
+#include "sql_lex.h"
+#include "opt_group_by_cardinality.h"

 static
 double estimate_table_group_cardinality(JOIN *join, Item ***group_list,
@@ -374,3 +381,148 @@ whole_table:
  goto normal_exit;
 }

+
+/**
+  @brief
+    Return the number of keypart that matches the item, -1 if there is no match
+*/
+
+static int item_index_in_key(Item *item, const KEY *keyinfo, uint key_parts)
+{
+  if (item->real_item()->type() == Item::FIELD_ITEM)
+  {
+    for (uint i= 0; i < key_parts; i++)
+    {
+      if (!cmp(item->name, keyinfo->key_part[i].field->field_name))
+        return (int)i;
+    }
+  }
+  return -1;
+}
+
+
+/**
+  @brief
+    Return TRUE if every item in the list appears in our key
+*/
+
+static
+bool all_list_contained_in_keyparts(const KEY *keyinfo,
+                                    uint key_parts,
+                                    SQL_I_List<st_order> *list)
+{
+  for (ORDER *grp= list->first; grp; grp= grp->next)
+  {
+    if (item_index_in_key((*grp->item), keyinfo, key_parts) < 0)
+      return FALSE;
+  }
+  return TRUE;
+}
+
+
+/**
+  @brief
+  When adding a key to a materialized derived table, we can determine some
+  key statistics from the query block.
+
+  @detail
+  Currently, we can infer this
+
+  1) rec_per_key[n-1]  (# records for each full key value), when :-
+       a) the last query set operation in the chain is not a UNION ALL, implying
+          that duplicate rows are removed, so if the select list matches the
+          key, we will have one record per distinct key
+       b) the query within the block has the DISTINCT flag set, and the select
+          list matches our key, we will have one record per distinct key.
+       c) The group by list in the query is a subset of our key, we will have
+          one record per key.
+ 
+  @todo
+    It is also possible to use predicates combined with existing key or
+    histogram statistics on the base tables in our derived table to fill in
+    this and other attributes of our generated key
+*/
+
+void infer_derived_key_statistics(st_select_lex_unit* derived,
+                                  KEY *keyinfo,
+                                  uint key_parts)
+{
+  st_select_lex* select= derived->first_select();
+  Json_writer_object wrapper(derived->thd);
+  Json_writer_object trace(derived->thd, "infer_derived_key_statistics");
+
+
+  trace.add("table_alias", keyinfo->table->alias.c_ptr());
+  trace.add("key_name", keyinfo->name);
+  trace.add("key_parts", key_parts);
+  /*
+    This whole union/intersect of selects does NOT have the ALL flag, so if
+    we have the same number of select list items as key parts, we can guarantee
+    that each line in the result set is unique
+  */
+  if (key_parts == select->item_list.elements &&
+      derived->check_distinct_in_union())
+  {
+    trace.add("distinct_in_query_expression", TRUE);
+    keyinfo->rec_per_key[key_parts - 1]= 1;
+  }
+  else
+  {
+    Json_writer_array select_proc(derived->thd, "select");
+    ulong rec_per_key= 0;
+    bool all_selects_covered= TRUE;
+    do
+    {
+      bool this_select_covered= FALSE;
+      /*
+        This is a SELECT DISTINCT query with $key_parts elements in the
+        select list.  This select in the union will produce one record
+        per key.
+        @todo
+        If we come across multiple SELECT DISTINCT selects in this union
+        have a problem in that we do not know anything about how they might
+        intersect
+      */
+      if (key_parts == select->item_list.elements &&
+          select->options & SELECT_DISTINCT)
+      {
+        select_proc.add("distinct_in_query_block");
+        this_select_covered= TRUE;
+        rec_per_key++;
+      }
+
+      /*
+        This is a grouping select and the group list is a subset of our key.
+        Our key can have additional fields, the rows will still be unique.
+      */
+      if (select->group_list.elements &&
+          all_list_contained_in_keyparts(keyinfo,
+                                         key_parts,
+                                         &select->group_list))
+      {
+        select_proc.add("group_list_in_key");
+        this_select_covered= TRUE;
+        rec_per_key++;
+      }
+
+      if (!this_select_covered)
+      {
+        select_proc.add("unhandled query");
+        all_selects_covered= FALSE;
+      }
+
+    } while ((select= select->next_select()));
+    select_proc.end();
+
+    /*
+      If we do not cover all selects here, do not update
+      keyinfo->rec_per_key[key_parts - 1] at all
+    */
+    if (all_selects_covered)
+    {
+      keyinfo->rec_per_key[key_parts - 1]= rec_per_key;
+      trace.add("rec_per_key_estimate", rec_per_key);
+    }
+  }
+}
+
--- a/sql/opt_group_by_cardinality.h
+++ b/sql/opt_group_by_cardinality.h
@@ -0,0 +1,11 @@
+#ifndef OPT_GROUP_BY_CARDINALITY
+#define OPT_GROUP_BY_CARDINALITY
+
+
+double estimate_post_group_cardinality(JOIN *join, double join_output_card);
+
+void infer_derived_key_statistics(st_select_lex_unit* derived,
+                                  KEY *keyinfo,
+                                  uint key_parts);
+
+#endif
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -69,6 +69,7 @@
 #include "optimizer_defaults.h"
 #include "derived_handler.h"
 #include "opt_hints.h"
+#include "opt_group_by_cardinality.h"

 /*
  A key part number that means we're using a fulltext scan.
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -2756,8 +2756,6 @@ void propagate_new_equalities(THD *thd, Item *cond,

 #define PREV_BITS(type, N_BITS) ((type)my_set_bits(N_BITS))

-double estimate_post_group_cardinality(JOIN *join, double join_output_card);
-
 bool dbug_user_var_equals_str(THD *thd, const char *name, const char *value);

 #include "opt_vcol_substitution.h"
--- a/sql/table.cc
+++ b/sql/table.cc
@@ -51,6 +51,7 @@
 #include "rpl_rli.h"             // class rpl_group_info
 #include "rpl_mi.h"              // class Master_info
 #include "vector_mhnsw.h"
+#include "opt_group_by_cardinality.h"

 #ifdef WITH_WSREP
 #include "wsrep_schema.h"
@@ -8670,6 +8671,7 @@ bool TABLE::check_tmp_key(uint key, uint key_parts,
         key_parts <= tmp_table_max_key_parts();
 }

+
 /**
  @brief
  Add one key to a temporary table
@@ -8728,6 +8730,7 @@ bool TABLE::add_tmp_key(uint key, uint key_parts,
  bzero(keyinfo->rec_per_key, sizeof(ulong)*key_parts);
  keyinfo->read_stats= NULL;
  keyinfo->collected_stats= NULL;
+  keyinfo->table= this;

  for (i= 0; i < key_parts; i++)
  {
@@ -8748,25 +8751,10 @@ bool TABLE::add_tmp_key(uint key, uint key_parts,
  */
  keyinfo->index_flags= file->index_flags(key, 0, 1);

-  /*
-    For the case when there is a derived table that would give distinct rows,
-    the index statistics are passed to the join optimizer to tell that a ref
-    access to all the fields of the derived table will produce only one row.
-  */
-
  st_select_lex_unit* derived= pos_in_table_list ?
                               pos_in_table_list->derived: NULL;
  if (derived)
-  {
-    st_select_lex* first= derived->first_select();
-    uint select_list_items= first->get_item_list()->elements;
-    if (key_parts == select_list_items)
-    {
-      if ((!first->is_part_of_union() && (first->options & SELECT_DISTINCT)) ||
-          derived->check_distinct_in_union())
-        keyinfo->rec_per_key[key_parts - 1]= 1;
-    }
-  }
+    infer_derived_key_statistics(derived, keyinfo, key_parts);

  set_if_bigger(s->max_key_length, keyinfo->key_length);
  s->keys++;