diff --git a/mysql-test/main/derived_cond_pushdown.result b/mysql-test/main/derived_cond_pushdown.result index 1f0d1514cb9..0709eeca5d9 100644 --- a/mysql-test/main/derived_cond_pushdown.result +++ b/mysql-test/main/derived_cond_pushdown.result @@ -1930,7 +1930,7 @@ a b max_c avg_c a b c d explain select * from v1,t2 where (v1.a=v1.b) and (v1.a=t2.a); id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t2 ALL NULL NULL NULL NULL 9 Using where -1 PRIMARY ref key0 key0 10 test.t2.a,test.t2.a 2 +1 PRIMARY ref key0 key0 10 test.t2.a,test.t2.a 1 2 DERIVED t1 ALL NULL NULL NULL NULL 20 Using where; Using temporary; Using filesort explain format=json select * from v1,t2 where (v1.a=v1.b) and (v1.a=t2.a); EXPLAIN @@ -1960,7 +1960,7 @@ EXPLAIN "used_key_parts": ["a", "b"], "ref": ["test.t2.a", "test.t2.a"], "loops": 9, - "rows": 2, + "rows": 1, "cost": "COST_REPLACED", "filtered": 100, "materialized": { @@ -2993,7 +2993,7 @@ where t1.a>5 group by a,b having max_c < 707) v1, t2 where (v1.a=t2.a) and (v1.max_c>300) and (v1.a=v1.b); id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t2 ALL NULL NULL NULL NULL 9 Using where -1 PRIMARY ref key0 key0 10 test.t2.a,test.t2.a 2 Using where +1 PRIMARY ref key0 key0 10 test.t2.a,test.t2.a 1 Using where 2 DERIVED t1 ALL NULL NULL NULL NULL 20 Using where; Using temporary; Using filesort explain format=json select * from (select a, b, max(c) as max_c, avg(c) as avg_c from t1 @@ -3026,7 +3026,7 @@ EXPLAIN "used_key_parts": ["a", "b"], "ref": ["test.t2.a", "test.t2.a"], "loops": 9, - "rows": 2, + "rows": 1, "cost": "COST_REPLACED", "filtered": 100, "attached_condition": "v1.max_c > 300", @@ -3155,7 +3155,7 @@ a b max_c avg_c a b c d explain select * from v1,t2 where (v1.a=t2.a) and (v1.b=t2.b); id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t2 ALL NULL NULL NULL NULL 9 Using where -1 PRIMARY ref key0 key0 10 test.t2.a,test.t2.b 2 +1 PRIMARY ref key0 key0 10 test.t2.a,test.t2.b 1 2 DERIVED t1 ALL NULL NULL NULL NULL 20 Using temporary; Using filesort explain format=json select * from v1,t2 where (v1.a=t2.a) and (v1.b=t2.b); EXPLAIN @@ -3185,7 +3185,7 @@ EXPLAIN "used_key_parts": ["a", "b"], "ref": ["test.t2.a", "test.t2.b"], "loops": 9, - "rows": 2, + "rows": 1, "cost": "COST_REPLACED", "filtered": 100, "materialized": { @@ -4041,7 +4041,7 @@ explain select * from v1,v2,t2 where (v1.a=t2.a) and (v1.a=v1.b) and (v1.a=v2.a) and (v2.max_c<300); id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t2 ALL NULL NULL NULL NULL 9 Using where -1 PRIMARY ref key1 key1 10 test.t2.a,test.t2.a 2 +1 PRIMARY ref key1 key1 10 test.t2.a,test.t2.a 1 1 PRIMARY ref key0 key0 5 test.t2.a 2 Using where 3 DERIVED t1 ALL NULL NULL NULL NULL 20 Using where; Using temporary; Using filesort 2 DERIVED t1 ALL NULL NULL NULL NULL 20 Using where; Using temporary; Using filesort @@ -4074,7 +4074,7 @@ EXPLAIN "used_key_parts": ["a", "b"], "ref": ["test.t2.a", "test.t2.a"], "loops": 9, - "rows": 2, + "rows": 1, "cost": "COST_REPLACED", "filtered": 100, "materialized": { @@ -4113,7 +4113,7 @@ EXPLAIN "key_length": "5", "used_key_parts": ["a"], "ref": ["test.t2.a"], - "loops": 18, + "loops": 9, "rows": 2, "cost": "COST_REPLACED", "filtered": 100, @@ -8165,8 +8165,8 @@ a b min_c a b max_c avg_c explain select * from v4,v2 where (v4.a=v2.b) and (v4.a=v4.b) and (v4.min_c<100); id select_type table type possible_keys key key_len ref rows Extra -1 PRIMARY ALL NULL NULL NULL NULL 20 Using where -1 PRIMARY ref key0 key0 5 v4.a 2 +1 PRIMARY ALL NULL NULL NULL NULL 20 Using where +1 PRIMARY ref key0 key0 10 v2.b,v2.b 1 Using where 4 DERIVED t1 ALL NULL NULL NULL NULL 20 Using where; Using temporary; Using filesort 2 DERIVED ALL NULL NULL NULL NULL 20 Using where; Using temporary; Using filesort 3 DERIVED t1 ALL NULL NULL NULL NULL 20 Using where; Using temporary; Using filesort @@ -8180,13 +8180,54 @@ EXPLAIN "nested_loop": [ { "table": { - "table_name": "", + "table_name": "", "access_type": "ALL", "loops": 1, "rows": 20, "cost": "COST_REPLACED", "filtered": 100, - "attached_condition": "v4.b = v4.a and v4.min_c < 100 and v4.a is not null", + "attached_condition": "v2.b is not null and v2.b is not null", + "materialized": { + "query_block": { + "select_id": 4, + "cost": "COST_REPLACED", + "having_condition": "max_c < 707", + "filesort": { + "sort_key": "t1.a, t1.b", + "temporary_table": { + "nested_loop": [ + { + "table": { + "table_name": "t1", + "access_type": "ALL", + "loops": 1, + "rows": 20, + "cost": "COST_REPLACED", + "filtered": 100, + "attached_condition": "t1.a > 5" + } + } + ] + } + } + } + } + } + }, + { + "table": { + "table_name": "", + "access_type": "ref", + "possible_keys": ["key0"], + "key": "key0", + "key_length": "10", + "used_key_parts": ["a", "b"], + "ref": ["v2.b", "v2.b"], + "loops": 20, + "rows": 1, + "cost": "COST_REPLACED", + "filtered": 100, + "attached_condition": "v4.min_c < 100", "materialized": { "query_block": { "select_id": 2, @@ -8238,46 +8279,6 @@ EXPLAIN } } } - }, - { - "table": { - "table_name": "", - "access_type": "ref", - "possible_keys": ["key0"], - "key": "key0", - "key_length": "5", - "used_key_parts": ["b"], - "ref": ["v4.a"], - "loops": 20, - "rows": 2, - "cost": "COST_REPLACED", - "filtered": 100, - "materialized": { - "query_block": { - "select_id": 4, - "cost": "COST_REPLACED", - "having_condition": "max_c < 707", - "filesort": { - "sort_key": "t1.a, t1.b", - "temporary_table": { - "nested_loop": [ - { - "table": { - "table_name": "t1", - "access_type": "ALL", - "loops": 1, - "rows": 20, - "cost": "COST_REPLACED", - "filtered": 100, - "attached_condition": "t1.a > 5" - } - } - ] - } - } - } - } - } } ] } @@ -8296,8 +8297,8 @@ a b min_c a b max_c avg_c explain select * from v4,v2 where (v4.a=v2.b) and (v4.a=v4.b) and (v2.b<30); id select_type table type possible_keys key key_len ref rows Extra -1 PRIMARY ALL NULL NULL NULL NULL 20 Using where -1 PRIMARY ref key0 key0 5 v4.a 2 +1 PRIMARY ALL NULL NULL NULL NULL 20 Using where +1 PRIMARY ref key0 key0 10 v2.b,v2.b 1 4 DERIVED t1 ALL NULL NULL NULL NULL 20 Using where; Using temporary; Using filesort 2 DERIVED ALL NULL NULL NULL NULL 20 Using where; Using temporary; Using filesort 3 DERIVED t1 ALL NULL NULL NULL NULL 20 Using where; Using temporary; Using filesort @@ -8311,13 +8312,53 @@ EXPLAIN "nested_loop": [ { "table": { - "table_name": "", + "table_name": "", "access_type": "ALL", "loops": 1, "rows": 20, "cost": "COST_REPLACED", "filtered": 100, - "attached_condition": "v4.b = v4.a and v4.a < 30 and v4.a is not null", + "attached_condition": "v2.b < 30 and v2.b is not null and v2.b is not null", + "materialized": { + "query_block": { + "select_id": 4, + "cost": "COST_REPLACED", + "having_condition": "max_c < 707", + "filesort": { + "sort_key": "t1.a, t1.b", + "temporary_table": { + "nested_loop": [ + { + "table": { + "table_name": "t1", + "access_type": "ALL", + "loops": 1, + "rows": 20, + "cost": "COST_REPLACED", + "filtered": 100, + "attached_condition": "t1.a > 5 and t1.b < 30" + } + } + ] + } + } + } + } + } + }, + { + "table": { + "table_name": "", + "access_type": "ref", + "possible_keys": ["key0"], + "key": "key0", + "key_length": "10", + "used_key_parts": ["a", "b"], + "ref": ["v2.b", "v2.b"], + "loops": 20, + "rows": 1, + "cost": "COST_REPLACED", + "filtered": 100, "materialized": { "query_block": { "select_id": 2, @@ -8368,46 +8409,6 @@ EXPLAIN } } } - }, - { - "table": { - "table_name": "", - "access_type": "ref", - "possible_keys": ["key0"], - "key": "key0", - "key_length": "5", - "used_key_parts": ["b"], - "ref": ["v4.a"], - "loops": 20, - "rows": 2, - "cost": "COST_REPLACED", - "filtered": 100, - "materialized": { - "query_block": { - "select_id": 4, - "cost": "COST_REPLACED", - "having_condition": "max_c < 707", - "filesort": { - "sort_key": "t1.a, t1.b", - "temporary_table": { - "nested_loop": [ - { - "table": { - "table_name": "t1", - "access_type": "ALL", - "loops": 1, - "rows": 20, - "cost": "COST_REPLACED", - "filtered": 100, - "attached_condition": "t1.a > 5 and t1.b < 30" - } - } - ] - } - } - } - } - } } ] } @@ -8426,8 +8427,8 @@ a b min_c a b max_c avg_c explain select * from v4,v2 where (v4.a=v2.b) and (v4.a=v4.b) and ((v2.b<30) or (v4.a>2)); id select_type table type possible_keys key key_len ref rows Extra -1 PRIMARY ALL NULL NULL NULL NULL 20 Using where -1 PRIMARY ref key0 key0 5 v4.a 2 +1 PRIMARY ALL NULL NULL NULL NULL 20 Using where +1 PRIMARY ref key0 key0 10 v2.b,v2.b 1 4 DERIVED t1 ALL NULL NULL NULL NULL 20 Using where; Using temporary; Using filesort 2 DERIVED ALL NULL NULL NULL NULL 20 Using where; Using temporary; Using filesort 3 DERIVED t1 ALL NULL NULL NULL NULL 20 Using where; Using temporary; Using filesort @@ -8441,13 +8442,53 @@ EXPLAIN "nested_loop": [ { "table": { - "table_name": "", + "table_name": "", "access_type": "ALL", "loops": 1, "rows": 20, "cost": "COST_REPLACED", "filtered": 100, - "attached_condition": "v4.b = v4.a and (v4.a < 30 or v4.a > 2) and v4.a is not null", + "attached_condition": "(v2.b < 30 or v2.b > 2) and v2.b is not null and v2.b is not null", + "materialized": { + "query_block": { + "select_id": 4, + "cost": "COST_REPLACED", + "having_condition": "max_c < 707", + "filesort": { + "sort_key": "t1.a, t1.b", + "temporary_table": { + "nested_loop": [ + { + "table": { + "table_name": "t1", + "access_type": "ALL", + "loops": 1, + "rows": 20, + "cost": "COST_REPLACED", + "filtered": 100, + "attached_condition": "t1.a > 5 and (t1.b < 30 or t1.b > 2)" + } + } + ] + } + } + } + } + } + }, + { + "table": { + "table_name": "", + "access_type": "ref", + "possible_keys": ["key0"], + "key": "key0", + "key_length": "10", + "used_key_parts": ["a", "b"], + "ref": ["v2.b", "v2.b"], + "loops": 20, + "rows": 1, + "cost": "COST_REPLACED", + "filtered": 100, "materialized": { "query_block": { "select_id": 2, @@ -8498,46 +8539,6 @@ EXPLAIN } } } - }, - { - "table": { - "table_name": "", - "access_type": "ref", - "possible_keys": ["key0"], - "key": "key0", - "key_length": "5", - "used_key_parts": ["b"], - "ref": ["v4.a"], - "loops": 20, - "rows": 2, - "cost": "COST_REPLACED", - "filtered": 100, - "materialized": { - "query_block": { - "select_id": 4, - "cost": "COST_REPLACED", - "having_condition": "max_c < 707", - "filesort": { - "sort_key": "t1.a, t1.b", - "temporary_table": { - "nested_loop": [ - { - "table": { - "table_name": "t1", - "access_type": "ALL", - "loops": 1, - "rows": 20, - "cost": "COST_REPLACED", - "filtered": 100, - "attached_condition": "t1.a > 5 and (t1.b < 30 or t1.b > 2)" - } - } - ] - } - } - } - } - } } ] } @@ -20148,7 +20149,7 @@ where t2.b < 40 and t2.a=t3.a and t3.c=t.c; id select_type table type possible_keys key key_len ref rows filtered Extra 1 PRIMARY t2 ALL NULL NULL NULL NULL 90 60.00 Using where 1 PRIMARY t3 ref idx_a idx_a 5 test.t2.a 1 100.00 Using where -1 PRIMARY ref key0 key0 128 test.t3.c 5 100.00 +1 PRIMARY ref key0 key0 128 test.t3.c 1 100.00 2 DERIVED t4 ALL idx_c NULL NULL NULL 160 100.00 Using temporary; Using filesort Warnings: Note 1003 /* select#1 */ select `test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b`,`test`.`t2`.`c` AS `c`,`t`.`c` AS `t_c`,`t`.`max` AS `max`,`t`.`min` AS `min` from `test`.`t2` join `test`.`t3` join (/* select#2 */ select `test`.`t4`.`c` AS `c`,max(`test`.`t4`.`b`) AS `max`,min(`test`.`t4`.`b`) AS `min` from `test`.`t4` group by `test`.`t4`.`c`) `t` where `test`.`t3`.`a` = `test`.`t2`.`a` and `t`.`c` = `test`.`t3`.`c` and `test`.`t2`.`b` < 40 @@ -20198,7 +20199,7 @@ EXPLAIN "used_key_parts": ["c"], "ref": ["test.t3.c"], "loops": 80.99999987, - "rows": 5, + "rows": 1, "cost": "COST_REPLACED", "filtered": 100, "materialized": { @@ -21318,7 +21319,7 @@ id a explain extended select id, a from t1 where id in (select id from v1); id select_type table type possible_keys key key_len ref rows filtered Extra 1 PRIMARY t1 ALL PRIMARY NULL NULL NULL 20 100.00 -1 PRIMARY ref key0 key0 4 test.t1.id 2 50.00 FirstMatch(t1) +1 PRIMARY ref key0 key0 4 test.t1.id 1 100.00 FirstMatch(t1) 3 DERIVED t1 ALL PRIMARY NULL NULL NULL 20 100.00 Using temporary; Using filesort 3 DERIVED t2 ref ro_id ro_id 4 test.t1.id 1 100.00 Using where Warnings: @@ -21356,7 +21357,7 @@ on (t1.id = t2.ro_id AND t2.flag = 1) group by t1.id) dt); id select_type table type possible_keys key key_len ref rows filtered Extra 1 PRIMARY t1 ALL PRIMARY NULL NULL NULL 20 100.00 -1 PRIMARY ref key1,distinct_key key1 4 test.t1.id 2 50.00 FirstMatch(t1) +1 PRIMARY ref key1,distinct_key key1 4 test.t1.id 1 100.00 FirstMatch(t1) 3 DERIVED t1 ALL PRIMARY NULL NULL NULL 20 100.00 Using temporary; Using filesort 3 DERIVED t2 ref ro_id ro_id 4 test.t1.id 1 100.00 Using where Warnings: @@ -22025,7 +22026,7 @@ WHERE t1.id BETWEEN 200 AND 100000; id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t3 range t1_id t1_id 5 NULL 47 Using where; Using index 1 PRIMARY t1 eq_ref PRIMARY PRIMARY 4 test.t3.t1_id 1 Using index -1 PRIMARY ref key0 key0 5 test.t3.t1_id 10 +1 PRIMARY ref key0 key0 5 test.t3.t1_id 1 2 DERIVED t2 ALL t1_id NULL NULL NULL 2408 Using where; Using temporary; Using filesort set optimizer_switch='split_materialized=default'; DROP TABLE t1,t2,t3; @@ -22302,7 +22303,7 @@ from_agg_items.ledger_id = charges.from_ledger_id WHERE charges.to_ledger_id = 2; id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY charges ref PRIMARY,fk_charge_from_ledger,fk_charge_to_ledger fk_charge_to_ledger 8 const 8 -1 PRIMARY ref key0 key0 17 test.charges.from_ledger_id,test.charges.id 4 +1 PRIMARY ref key0 key0 17 test.charges.from_ledger_id,test.charges.id 1 2 DERIVED transaction_items ALL fk_items_transaction NULL NULL NULL 40 Using temporary; Using filesort 2 DERIVED transactions eq_ref PRIMARY PRIMARY 8 test.transaction_items.transaction_id 1 INSERT INTO charges (id, from_ledger_id, to_ledger_id, amount) VALUES @@ -22507,7 +22508,7 @@ from_agg_items.ledger_id = charges.from_ledger_id WHERE charges.to_ledger_id = 2; id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY charges ref fk_charge_to_ledger fk_charge_to_ledger 8 const 10 -1 PRIMARY ref key0 key0 18 test.charges.from_ledger_id,test.charges.id 4 +1 PRIMARY ref key0 key0 18 test.charges.from_ledger_id,test.charges.id 1 2 DERIVED transaction_items ALL fk_items_transaction NULL NULL NULL 40 Using temporary; Using filesort 2 DERIVED transactions eq_ref PRIMARY PRIMARY 8 test.transaction_items.transaction_id 1 set optimizer_switch='split_materialized=default'; @@ -22845,6 +22846,8 @@ INSERT INTO t1 VALUES (95,3290880,487,'2021-02-15 18:59:35'),(96,3290798,0,'2021-02-15 18:59:52'), (97,3290777,983,'2021-02-15 19:00:10'),(98,3290811,488,'2021-02-15 19:00:10'), (99,3290917,1283,'2021-02-15 19:00:36'),(100,3290858,482,'2021-02-15 19:00:42'); +insert into t1 select seq, 3300000+seq, 100+seq, '2021-02-09 18:31:35' +from seq_101_to_1000; CREATE TABLE t2 (a int) ENGINE=MYISAM; INSERT INTO t2 VALUES (3289475),(3289496),(3289562),(3289593),(3289594),(3289595),(3289626), @@ -22853,7 +22856,7 @@ INSERT INTO t2 VALUES ANALYZE TABLE t1,t2; Table Op Msg_type Msg_text test.t1 analyze status Engine-independent statistics collected -test.t1 analyze status Table is already up to date +test.t1 analyze status OK test.t2 analyze status Engine-independent statistics collected test.t2 analyze status OK EXPLAIN SELECT t1.valdouble, t1.valint1 @@ -22865,9 +22868,9 @@ t1.valdate = dt.maxdate AND t1.valint1 IN (SELECT * FROM t2); id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t2 ALL NULL NULL NULL NULL 21 Using where; Start temporary -1 PRIMARY t1 ref valint1,valint1_2 valint1 5 test.t2.a 2 Using index condition; Using where; End temporary +1 PRIMARY t1 ref valint1,valint1_2 valint1 5 test.t2.a 1 Using index condition; Using where; End temporary 1 PRIMARY ref key0 key0 11 test.t1.valdate,test.t1.valint1 1 -2 LATERAL DERIVED t ref valint1,valint1_2 valint1 5 test.t2.a 2 Using index condition +2 LATERAL DERIVED t ref valint1,valint1_2 valint1 5 test.t2.a 1 Using index condition SELECT t1.valdouble, t1.valint1 FROM t1, (SELECT max(t.valdate) AS maxdate, t.valint1 FROM t1 t GROUP BY t.valint1) diff --git a/mysql-test/main/derived_cond_pushdown.test b/mysql-test/main/derived_cond_pushdown.test index 45840e710a3..70d5582e62c 100644 --- a/mysql-test/main/derived_cond_pushdown.test +++ b/mysql-test/main/derived_cond_pushdown.test @@ -4280,6 +4280,9 @@ INSERT INTO t1 VALUES (97,3290777,983,'2021-02-15 19:00:10'),(98,3290811,488,'2021-02-15 19:00:10'), (99,3290917,1283,'2021-02-15 19:00:36'),(100,3290858,482,'2021-02-15 19:00:42'); +insert into t1 select seq, 3300000+seq, 100+seq, '2021-02-09 18:31:35' +from seq_101_to_1000; + CREATE TABLE t2 (a int) ENGINE=MYISAM; INSERT INTO t2 VALUES (3289475),(3289496),(3289562),(3289593),(3289594),(3289595),(3289626), diff --git a/mysql-test/main/derived_opt.result b/mysql-test/main/derived_opt.result index 446de335131..984c8070870 100644 --- a/mysql-test/main/derived_opt.result +++ b/mysql-test/main/derived_opt.result @@ -566,4 +566,501 @@ DROP TABLE t1, t2; # # End of 10.3 tests # +# +# MDEV-36321 keys generated on derived tables produce wrong out_rows estimates +# +create table t1 ( +grp_id int, +value int, +index (grp_id) +); +insert into t1 select +A.seq, B.seq +from +seq_1_to_100 A, +seq_1_to_100 B; +create table t2 (a int); +insert into t2 select seq from seq_1_to_5; +create table t3 (b int); +insert into t3 select seq from seq_1_to_5; +analyze table t1,t2; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status Table is already up to date +test.t2 analyze status Engine-independent statistics collected +test.t2 analyze status OK +SET optimizer_trace='enabled=on'; +SET optimizer_trace_max_mem_size=10485760; +select * from +t2, +(select max(value), grp_id from t1 group by grp_id) DT +where +t2.a= DT.grp_id; +a max(value) grp_id +1 100 1 +2 100 2 +3 100 3 +4 100 4 +5 100 5 +select +json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t +from information_schema.optimizer_trace; +t +[ + { + "table_alias": "DT", + "key_name": "key0", + "key_parts": 1, + "select": + ["group_list_in_key"], + "rec_per_key_estimate": 1 + } +] +# Same as above, but try a UNION: +select * from +t2, +(select max(value), grp_id from t1 group by grp_id +union all +select max(value), grp_id from t1 group by grp_id) DT +where +t2.a= DT.grp_id; +a max(value) grp_id +1 100 1 +1 100 1 +2 100 2 +2 100 2 +3 100 3 +3 100 3 +4 100 4 +4 100 4 +5 100 5 +5 100 5 +select +json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t +from information_schema.optimizer_trace; +t +[ + { + "table_alias": "DT", + "key_name": "key0", + "key_parts": 1, + "select": + [ + "group_list_in_key", + "group_list_in_key" + ], + "rec_per_key_estimate": 2 + } +] +# Same as the previous query but unhandled group by expression +explain +select * from +t2, +(select max(value), grp_id from t1 group by grp_id +union all +select max(value), grp_id from t1 group by MOD(grp_id,2)) DT +where +t2.a= DT.grp_id; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t2 ALL NULL NULL NULL NULL 5 Using where +1 PRIMARY ref key0 key0 5 test.t2.a 101 +2 DERIVED t1 ALL NULL NULL NULL NULL 10000 Using temporary; Using filesort +3 UNION t1 ALL NULL NULL NULL NULL 10000 Using temporary; Using filesort +select +json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t +from information_schema.optimizer_trace; +t +[ + { + "table_alias": "DT", + "key_name": "key0", + "key_parts": 1, + "select": + [ + "group_list_in_key", + "unhandled query" + ] + } +] +# view/cte/derived merged inside our derived table +create view v1 as select * from t1; +explain +select * from +t2, +(select grp_id, max(value) as maxval from v1 group by grp_id) DT +where +t2.a= DT.grp_id; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t2 ALL NULL NULL NULL NULL 5 Using where +1 PRIMARY ref key0 key0 5 test.t2.a 1 +2 DERIVED t1 ALL grp_id NULL NULL NULL 10000 Using temporary; Using filesort +drop view v1; +explain +with cte1 as (select * from t1) +select * from +t2, +(select grp_id, max(value) as maxval from cte1 group by grp_id) DT +where +t2.a= DT.grp_id; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t2 ALL NULL NULL NULL NULL 5 Using where +1 PRIMARY ref key0 key0 5 test.t2.a 1 +3 DERIVED t1 ALL grp_id NULL NULL NULL 10000 Using temporary; Using filesort +explain +select * from +t2, +( +select grp_id, max(value) as maxval from +( +select * from t1, t3 +where t1.grp_id = t3.b +) dt1 +group by grp_id +) DT +where +t2.a= DT.grp_id; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t2 ALL NULL NULL NULL NULL 5 Using where +1 PRIMARY ref key0 key0 5 test.t2.a 1 +2 DERIVED t3 ALL NULL NULL NULL NULL 5 Using where; Using temporary; Using filesort +2 DERIVED t1 ref grp_id grp_id 5 test.t3.b 100 +# Example with equalities on GROUP BY columns and other columns +# Must produce {table=, ref=test.t2.col2,test.t2.a, rows=1} +alter table t2 add col2 int; +explain +select * from +t2, +(select max(value) as maxval, grp_id from t1 group by grp_id) DT +where +t2.col2=maxval and +t2.a= DT.grp_id; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t2 ALL NULL NULL NULL NULL 5 Using where +1 PRIMARY ref key0 key0 10 test.t2.col2,test.t2.a 1 +2 DERIVED t1 ALL grp_id NULL NULL NULL 10000 Using temporary; Using filesort +select +json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t +from information_schema.optimizer_trace; +t +[ + { + "table_alias": "DT", + "key_name": "key0", + "key_parts": 2, + "select": + ["group_list_in_key"], + "rec_per_key_estimate": 1 + } +] +explain +select * from +t2, +(select grp_id, max(value) as maxval from t1 group by grp_id) DT +where +t2.col2=maxval and +t2.a= DT.grp_id; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t2 ALL NULL NULL NULL NULL 5 Using where +1 PRIMARY ref key0 key0 10 test.t2.a,test.t2.col2 1 +2 DERIVED t1 ALL grp_id NULL NULL NULL 10000 Using temporary; Using filesort +delete from t1; +insert into t1 select 1, a.seq from seq_1_to_10 a; +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status Table is already up to date +explain +select * from +t2, +(select distinct grp_id from t1 +union all +select distinct value from t1) DT +where +t2.a= DT.grp_id; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t2 ALL NULL NULL NULL NULL 5 Using where +1 PRIMARY ref key0 key0 5 test.t2.a 2 +2 DERIVED t1 range NULL grp_id 5 NULL 2 Using index for group-by +3 UNION t1 ALL NULL NULL NULL NULL 10 Using temporary +select +json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t +from information_schema.optimizer_trace; +t +[ + { + "table_alias": "DT", + "key_name": "key0", + "key_parts": 1, + "select": + [ + "distinct_in_query_block", + "distinct_in_query_block" + ], + "rec_per_key_estimate": 2 + } +] +drop table t1, t2; +create table t1 +( +a int not null, +b int, +c int, +d int, +amount decimal, +key t1_ix1 (a,b) +); +# More complex examples +insert into t1 values (0, NULL, 0, NULL, 10.0000), (1, 1, 1, 1, 10.0000), +(2, 2, 2, 2, 20.0000), (3, 3, 3, 3, 30.0000), (4, 4, 4, 4, 40.0000), +(5, 5, 5, 5, NULL), (6, 6, 6, 6, NULL), (7, 7, 7, 7, 70.0000), +(8, 8, 8, 8, 80.0000); +create table t2 +( +a int NOT NULL, +b int, +name varchar(50), +key t2_ix1 (a,b) +) engine = innodb; +insert into t2 values (0, NULL, 'a'), (1, NULL, 'A'), (2, 2, 'B'), (3,3, 'C'), +(4,4, 'D'), (5,5, NULL), (6,6, NULL), (7,7, 'E'), (8,8, 'F'), (9,9, 'G'), +(10,10,'H'), (11,11, NULL), (12,12, NULL); +drop table t3; +create table t3 +( +a int not null, +b int, +description varchar(50), +key t3_ix1 (a,b) +) engine = innodb; +insert into t3 values (1, 1, 'bar'),(2,2,'buz'), (3,3, 'silver'); +insert into t3 select seq, seq, 'junk' from seq_3_to_13; +create table t4 +( +c int not null, +d int, +descr varchar(50), +key t4_ix1 (c,d) +) engine = innodb; +insert into t4 values (1, 1, 'iron'), (2,2,'aluminium'), (3,3, 'silver'); +insert into t4 select seq, seq, 'junk' from seq_3_to_13; +# split materialized +explain +select * from t1 +join t2 on t1.a = t2.a and t1.b = t2.b +join +( +select a, b, description from t3 group by a, b +) dt on dt.a = t1.a and dt.b = t1.b and dt.b = t2.b +where dt.a < 1; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t1 range t1_ix1 t1_ix1 4 NULL 1 Using index condition +1 PRIMARY t2 ref t2_ix1 t2_ix1 9 test.t1.a,test.t1.b 1 +1 PRIMARY ref key0 key0 9 test.t1.a,test.t1.b 1 +2 LATERAL DERIVED t3 ref t3_ix1 t3_ix1 9 test.t1.a,test.t1.b 1 Using index condition +select +json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t +from information_schema.optimizer_trace; +t +[ + { + "table_alias": "dt", + "key_name": "key0", + "key_parts": 2, + "select": + ["group_list_in_key"], + "rec_per_key_estimate": 1 + }, + { + "table_alias": "dt", + "key_name": "key1", + "key_parts": 2, + "select": + ["group_list_in_key"], + "rec_per_key_estimate": 1 + } +] +# union with distinct rows +explain +select * from t1 +join t2 on t1.a = t2.a and t1.b = t2.b +join +( +select a, b, description from t3 group by a, b +union +select c, d, descr from t4 group by c, d +) dt on dt.a = t1.a and dt.b = t1.b and dt.b = t2.b +where dt.a < 1; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t1 range t1_ix1 t1_ix1 4 NULL 1 Using index condition +1 PRIMARY t2 ref t2_ix1 t2_ix1 9 test.t1.a,test.t1.b 1 +1 PRIMARY ref key1,distinct_key key1 9 test.t1.a,test.t1.b 2 +2 DERIVED t3 range t3_ix1 t3_ix1 4 NULL 1 Using index condition +3 UNION t4 range t4_ix1 t4_ix1 4 NULL 1 Using index condition +NULL UNION RESULT ALL NULL NULL NULL NULL NULL +select +json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t +from information_schema.optimizer_trace; +t +[ + { + "table_alias": "dt", + "key_name": "key1", + "key_parts": 2, + "select": + [ + "group_list_in_key", + "group_list_in_key" + ], + "rec_per_key_estimate": 2 + }, + { + "table_alias": "dt", + "key_name": "key2", + "key_parts": 2, + "select": + [ + "group_list_in_key", + "group_list_in_key" + ], + "rec_per_key_estimate": 2 + } +] +# union without distinct rows +explain +select * from t1 +join t2 on t1.a = t2.a and t1.b = t2.b +join +( +select a, b, description from t3 group by a, b +union all +select c, d, descr from t4 group by c, d +) dt on dt.a = t1.a and dt.b = t1.b and dt.b = t2.b +where dt.a < 1; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t1 range t1_ix1 t1_ix1 4 NULL 1 Using index condition +1 PRIMARY t2 ref t2_ix1 t2_ix1 9 test.t1.a,test.t1.b 1 +1 PRIMARY ref key0 key0 9 test.t1.a,test.t1.b 2 +2 DERIVED t3 range t3_ix1 t3_ix1 4 NULL 1 Using index condition +3 UNION t4 range t4_ix1 t4_ix1 4 NULL 1 Using index condition +select +json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t +from information_schema.optimizer_trace; +t +[ + { + "table_alias": "dt", + "key_name": "key0", + "key_parts": 2, + "select": + [ + "group_list_in_key", + "group_list_in_key" + ], + "rec_per_key_estimate": 2 + }, + { + "table_alias": "dt", + "key_name": "key1", + "key_parts": 2, + "select": + [ + "group_list_in_key", + "group_list_in_key" + ], + "rec_per_key_estimate": 2 + } +] +# union without distinct rows with simple non grouping 2nd select +explain +select * from t1 +join t2 on t1.a = t2.a and t1.b = t2.b +join +( +select a, b, description from t3 group by a, b +union all +select c, d, descr from t4 +) dt on dt.a = t1.a and dt.b = t1.b and dt.b = t2.b +where dt.a < 1; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t1 range t1_ix1 t1_ix1 4 NULL 1 Using index condition +1 PRIMARY t2 ref t2_ix1 t2_ix1 9 test.t1.a,test.t1.b 1 +1 PRIMARY ref key0 key0 9 test.t1.a,test.t1.b 1 +2 DERIVED t3 range t3_ix1 t3_ix1 4 NULL 1 Using index condition +3 UNION t4 range t4_ix1 t4_ix1 4 NULL 1 Using index condition +select +json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t +from information_schema.optimizer_trace; +t +[ + { + "table_alias": "dt", + "key_name": "key0", + "key_parts": 2, + "select": + [ + "group_list_in_key", + "unhandled query" + ] + }, + { + "table_alias": "dt", + "key_name": "key1", + "key_parts": 2, + "select": + [ + "group_list_in_key", + "unhandled query" + ] + } +] +# intersect +explain +select * from t1 +join t2 on t1.a = t2.a and t1.b = t2.b +join +( +select a, b, description from t3 group by a, b +intersect +select c, d, descr from t4 group by c, d +) dt on dt.a = t1.a and dt.b = t1.b and dt.b = t2.b +where dt.a > 2 and dt.a < 4; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t1 range t1_ix1 t1_ix1 4 NULL 1 Using index condition +1 PRIMARY t2 ref t2_ix1 t2_ix1 9 test.t1.a,test.t1.b 1 +1 PRIMARY ref key1,distinct_key key1 9 test.t1.a,test.t1.b 2 +2 DERIVED t3 range t3_ix1 t3_ix1 4 NULL 2 Using index condition +3 INTERSECT t4 range t4_ix1 t4_ix1 4 NULL 2 Using index condition +NULL INTERSECT RESULT ALL NULL NULL NULL NULL NULL +select +json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t +from information_schema.optimizer_trace; +t +[ + { + "table_alias": "dt", + "key_name": "key1", + "key_parts": 2, + "select": + [ + "group_list_in_key", + "group_list_in_key" + ], + "rec_per_key_estimate": 2 + }, + { + "table_alias": "dt", + "key_name": "key2", + "key_parts": 2, + "select": + [ + "group_list_in_key", + "group_list_in_key" + ], + "rec_per_key_estimate": 2 + } +] +drop table t1, t2, t3, t4; +# +# End of 11.4 tests +# set optimizer_switch=@exit_optimizer_switch; diff --git a/mysql-test/main/derived_opt.test b/mysql-test/main/derived_opt.test index 778a4f0dbe8..99c18a840d1 100644 --- a/mysql-test/main/derived_opt.test +++ b/mysql-test/main/derived_opt.test @@ -1,4 +1,6 @@ # Initialize +--source include/not_embedded.inc +--source include/have_innodb.inc --disable_warnings drop table if exists t0,t1,t2,t3; drop database if exists test1; @@ -439,5 +441,277 @@ DROP TABLE t1, t2; --echo # End of 10.3 tests --echo # +--echo # +--echo # MDEV-36321 keys generated on derived tables produce wrong out_rows estimates +--echo # +--source include/have_sequence.inc + +create table t1 ( + grp_id int, + value int, + index (grp_id) +); + +insert into t1 select + A.seq, B.seq +from + seq_1_to_100 A, + seq_1_to_100 B; + +create table t2 (a int); +insert into t2 select seq from seq_1_to_5; + +create table t3 (b int); +insert into t3 select seq from seq_1_to_5; + +analyze table t1,t2; + +SET optimizer_trace='enabled=on'; +SET optimizer_trace_max_mem_size=10485760; + +select * from + t2, + (select max(value), grp_id from t1 group by grp_id) DT +where + t2.a= DT.grp_id; + +select + json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t +from information_schema.optimizer_trace; + + +--echo # Same as above, but try a UNION: +select * from + t2, + (select max(value), grp_id from t1 group by grp_id + union all + select max(value), grp_id from t1 group by grp_id) DT +where + t2.a= DT.grp_id; +select + json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t +from information_schema.optimizer_trace; + +--echo # Same as the previous query but unhandled group by expression +explain +select * from + t2, + (select max(value), grp_id from t1 group by grp_id + union all + select max(value), grp_id from t1 group by MOD(grp_id,2)) DT +where + t2.a= DT.grp_id; +select + json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t +from information_schema.optimizer_trace; + +--echo # view/cte/derived merged inside our derived table + +create view v1 as select * from t1; + +explain +select * from + t2, + (select grp_id, max(value) as maxval from v1 group by grp_id) DT +where + t2.a= DT.grp_id; + +drop view v1; + +explain +with cte1 as (select * from t1) +select * from + t2, + (select grp_id, max(value) as maxval from cte1 group by grp_id) DT +where + t2.a= DT.grp_id; + +explain +select * from + t2, + ( + select grp_id, max(value) as maxval from + ( + select * from t1, t3 + where t1.grp_id = t3.b + ) dt1 + group by grp_id + ) DT +where + t2.a= DT.grp_id; + +--echo # Example with equalities on GROUP BY columns and other columns +--echo # Must produce {table=, ref=test.t2.col2,test.t2.a, rows=1} +alter table t2 add col2 int; +explain +select * from + t2, + (select max(value) as maxval, grp_id from t1 group by grp_id) DT +where + t2.col2=maxval and + t2.a= DT.grp_id; +select + json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t +from information_schema.optimizer_trace; + +# Same as above but swap the column order. +# Now we'll get {table=, ref=test.t2.a,test.t2.col2, rows=1} +explain +select * from + t2, + (select grp_id, max(value) as maxval from t1 group by grp_id) DT +where + t2.col2=maxval and + t2.a= DT.grp_id; + + +delete from t1; +insert into t1 select 1, a.seq from seq_1_to_10 a; + +analyze table t1; + +explain +select * from + t2, + (select distinct grp_id from t1 + union all + select distinct value from t1) DT +where + t2.a= DT.grp_id; +select + json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t +from information_schema.optimizer_trace; + +drop table t1, t2; + +create table t1 +( + a int not null, + b int, + c int, + d int, + amount decimal, + key t1_ix1 (a,b) +); + +--echo # More complex examples +insert into t1 values (0, NULL, 0, NULL, 10.0000), (1, 1, 1, 1, 10.0000), +(2, 2, 2, 2, 20.0000), (3, 3, 3, 3, 30.0000), (4, 4, 4, 4, 40.0000), +(5, 5, 5, 5, NULL), (6, 6, 6, 6, NULL), (7, 7, 7, 7, 70.0000), +(8, 8, 8, 8, 80.0000); + +create table t2 +( + a int NOT NULL, + b int, + name varchar(50), + key t2_ix1 (a,b) +) engine = innodb; + +insert into t2 values (0, NULL, 'a'), (1, NULL, 'A'), (2, 2, 'B'), (3,3, 'C'), +(4,4, 'D'), (5,5, NULL), (6,6, NULL), (7,7, 'E'), (8,8, 'F'), (9,9, 'G'), +(10,10,'H'), (11,11, NULL), (12,12, NULL); + +drop table t3; + +create table t3 +( + a int not null, + b int, + description varchar(50), + key t3_ix1 (a,b) +) engine = innodb; +insert into t3 values (1, 1, 'bar'),(2,2,'buz'), (3,3, 'silver'); +insert into t3 select seq, seq, 'junk' from seq_3_to_13; + +create table t4 +( + c int not null, + d int, + descr varchar(50), + key t4_ix1 (c,d) +) engine = innodb; +insert into t4 values (1, 1, 'iron'), (2,2,'aluminium'), (3,3, 'silver'); +insert into t4 select seq, seq, 'junk' from seq_3_to_13; + +--echo # split materialized +explain +select * from t1 +join t2 on t1.a = t2.a and t1.b = t2.b +join +( + select a, b, description from t3 group by a, b +) dt on dt.a = t1.a and dt.b = t1.b and dt.b = t2.b +where dt.a < 1; +select + json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t +from information_schema.optimizer_trace; + +--echo # union with distinct rows +explain +select * from t1 +join t2 on t1.a = t2.a and t1.b = t2.b +join +( + select a, b, description from t3 group by a, b + union + select c, d, descr from t4 group by c, d +) dt on dt.a = t1.a and dt.b = t1.b and dt.b = t2.b +where dt.a < 1; +select + json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t +from information_schema.optimizer_trace; + +--echo # union without distinct rows +explain +select * from t1 +join t2 on t1.a = t2.a and t1.b = t2.b +join +( + select a, b, description from t3 group by a, b + union all + select c, d, descr from t4 group by c, d +) dt on dt.a = t1.a and dt.b = t1.b and dt.b = t2.b +where dt.a < 1; +select + json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t +from information_schema.optimizer_trace; + +--echo # union without distinct rows with simple non grouping 2nd select +explain +select * from t1 +join t2 on t1.a = t2.a and t1.b = t2.b +join +( + select a, b, description from t3 group by a, b + union all + select c, d, descr from t4 +) dt on dt.a = t1.a and dt.b = t1.b and dt.b = t2.b +where dt.a < 1; +select + json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t +from information_schema.optimizer_trace; + +--echo # intersect +explain +select * from t1 +join t2 on t1.a = t2.a and t1.b = t2.b +join +( + select a, b, description from t3 group by a, b + intersect + select c, d, descr from t4 group by c, d +) dt on dt.a = t1.a and dt.b = t1.b and dt.b = t2.b +where dt.a > 2 and dt.a < 4; +select + json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t +from information_schema.optimizer_trace; + +drop table t1, t2, t3, t4; + +--echo # +--echo # End of 11.4 tests +--echo # + # The following command must be the last one the file set optimizer_switch=@exit_optimizer_switch; diff --git a/mysql-test/main/derived_split_innodb.result b/mysql-test/main/derived_split_innodb.result index faf83e490df..8ca82eb429c 100644 --- a/mysql-test/main/derived_split_innodb.result +++ b/mysql-test/main/derived_split_innodb.result @@ -283,7 +283,7 @@ on t3.a=t.a and t3.c=t.c where t3.b > 15; id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t3 range idx_b idx_b 5 NULL 2 Using index condition; Using where -1 PRIMARY ref key0 key0 133 test.t3.a,test.t3.c 2 +1 PRIMARY ref key0 key0 133 test.t3.a,test.t3.c 1 2 DERIVED t4 ALL NULL NULL NULL NULL 40 Using filesort drop table t3, t4; # End of 10.3 tests @@ -348,7 +348,7 @@ id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t1 ALL NULL NULL NULL NULL 5 1 PRIMARY t2 ref a a 5 test.t1.b 2 Using where 1 PRIMARY t3 ref a a 5 test.t1.b 3 Using where -1 PRIMARY ref key0 key0 5 test.t1.b 10 Using where +1 PRIMARY ref key0 key0 5 test.t1.b 1 Using where 2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t1.b 100 2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join) # The important part in the below output is: @@ -449,7 +449,7 @@ ANALYZE "ref": ["test.t1.b"], "loops": 30, "r_loops": 30, - "rows": 10, + "rows": 1, "r_rows": 1, "cost": "REPLACED", "r_table_time_ms": "REPLACED", @@ -546,7 +546,7 @@ id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t1 ALL NULL NULL NULL NULL 5 1 PRIMARY t2 ref a a 5 test.t1.b 2 Using where 1 PRIMARY t3 ref a a 5 test.t1.b 3 Using where -1 PRIMARY ref key0 key0 5 test.t1.b 10 Using where +1 PRIMARY ref key0 key0 5 test.t1.b 1 Using where 2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t1.b 100 2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join) explain @@ -568,7 +568,7 @@ id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t1 ALL NULL NULL NULL NULL 5 1 PRIMARY t2 ref a a 5 test.t1.b 2 Using where 1 PRIMARY t3 ref a a 5 test.t1.b 3 Using where -1 PRIMARY ref key0 key0 5 test.t1.b 10 Using where +1 PRIMARY ref key0 key0 5 test.t1.b 1 Using where 2 LATERAL DERIVED t22 const PRIMARY PRIMARY 4 const 1 Using index 2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t1.b 100 2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join) @@ -596,7 +596,7 @@ id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t5 eq_ref PRIMARY PRIMARY 4 test.t1.b 1 Using index 1 PRIMARY t2 ref a a 5 test.t1.b 2 1 PRIMARY t3 ref a a 5 test.t1.b 3 -1 PRIMARY ref key0 key0 5 test.t1.b 10 Using where +1 PRIMARY ref key0 key0 5 test.t1.b 1 Using where 2 LATERAL DERIVED t22 const PRIMARY PRIMARY 4 const 1 Using index 2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t5.pk 100 Using index condition 2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join) @@ -660,7 +660,7 @@ id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t1 ALL NULL NULL NULL NULL 5 1 PRIMARY t2 ref a a 5 test.t1.b 2 Using where 1 PRIMARY t3 ref a a 5 test.t1.b 3 Using where -1 PRIMARY ref key0 key0 5 test.t1.b 10 Using where +1 PRIMARY ref key0 key0 5 test.t1.b 1 Using where 2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t1.b 100 2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join) select * @@ -737,7 +737,7 @@ id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t1 ALL NULL NULL NULL NULL 5 1 PRIMARY t2 ref a a 5 test.t1.b 2 Using where 1 PRIMARY t3 ref a a 5 test.t1.b 3 Using where -1 PRIMARY ref key0 key0 5 test.t1.b 10 Using where +1 PRIMARY ref key0 key0 5 test.t1.b 1 Using where 2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t1.b 100 2 LATERAL DERIVED t11 hash_ALL NULL #hash#$hj 5 test.t10.col1 100 Using where; Using join buffer (flat, BNLH join) select * @@ -815,7 +815,7 @@ id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t1 ALL NULL NULL NULL NULL 5 1 PRIMARY t2 ALL NULL NULL NULL NULL 50 Using where; Using join buffer (flat, BNL join) 1 PRIMARY t3 ALL NULL NULL NULL NULL 15 Using where; Using join buffer (incremental, BNL join) -1 PRIMARY ref key0 key0 5 test.t1.b 10 Using where +1 PRIMARY ref key0 key0 5 test.t1.b 1 Using where 2 DERIVED t10 ALL grp_id NULL NULL NULL 10000 Using temporary; Using filesort 2 DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join) select * @@ -997,7 +997,7 @@ T.grp_id=v1.COL10; id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t1 ALL NULL NULL NULL NULL 2 1 PRIMARY t2 ref a a 5 test.t1.a 1 Using where; Using index -1 PRIMARY ref key0 key0 5 func 10 Using where +1 PRIMARY ref key0 key0 5 func 1 Using where 2 DERIVED t10 index grp_id grp_id 5 NULL 10000 Using index; Using temporary; Using filesort drop table t1,t2, t10; drop view v1; diff --git a/mysql-test/main/derived_view.result b/mysql-test/main/derived_view.result index c819460c079..9f64e03dd53 100644 --- a/mysql-test/main/derived_view.result +++ b/mysql-test/main/derived_view.result @@ -1242,7 +1242,7 @@ SELECT * FROM t1, t2, v1 WHERE t2.a=t1.a AND t2.a=v1.a AND t2.a=v1.b; id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t1 system NULL NULL NULL NULL 1 1 PRIMARY t2 ref a a 4 const 1 Using index -1 PRIMARY ref key0 key0 8 const,const 1 +1 PRIMARY ref key1 key1 8 func,func 1 2 DERIVED t3 ALL NULL NULL NULL NULL 12 Using temporary; Using filesort SELECT * FROM t1, t2, v1 WHERE t2.a=t1.a AND t2.a=v1.a AND t2.a=v1.b; a a a b @@ -2431,7 +2431,7 @@ GROUP BY TABLE_SCHEMA) AS UNIQUES ON ( COLUMNS.TABLE_SCHEMA = UNIQUES.TABLE_SCHEMA); id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY COLUMNS ALL NULL NULL NULL NULL NULL Open_frm_only; Scanned all databases -1 PRIMARY ref key0 key0 194 information_schema.COLUMNS.TABLE_SCHEMA 10 +1 PRIMARY ref key0 key0 194 information_schema.COLUMNS.TABLE_SCHEMA 1 2 DERIVED STATISTICS ALL NULL NULL NULL NULL NULL Open_frm_only; Scanned all databases; Using filesort SELECT COUNT(*) > 0 FROM INFORMATION_SCHEMA.COLUMNS diff --git a/mysql-test/main/opt_hints_split_materialized.result b/mysql-test/main/opt_hints_split_materialized.result index c4193980f1b..798b3b53c7c 100644 --- a/mysql-test/main/opt_hints_split_materialized.result +++ b/mysql-test/main/opt_hints_split_materialized.result @@ -375,7 +375,7 @@ id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t1 ALL NULL NULL NULL NULL 5 1 PRIMARY t2 ref a a 5 test.t1.b 2 Using where 1 PRIMARY t3 ref a a 5 test.t1.b 3 Using where -1 PRIMARY ref key0 key0 5 test.t1.b 10 Using where +1 PRIMARY ref key0 key0 5 test.t1.b 1 Using where 2 LATERAL DERIVED t22 const PRIMARY PRIMARY 4 const 1 Using index 2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t1.b 100 2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join) @@ -443,7 +443,7 @@ id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t1 ALL NULL NULL NULL NULL 5 1 PRIMARY t2 ref a a 5 test.t1.b 2 Using where 1 PRIMARY t3 ref a a 5 test.t1.b 3 Using where -1 PRIMARY ref key0 key0 5 test.t1.b 10 Using where +1 PRIMARY ref key0 key0 5 test.t1.b 1 Using where 2 DERIVED t22 const PRIMARY PRIMARY 4 const 1 Using index; Using temporary; Using filesort 2 DERIVED t10 ALL NULL NULL NULL NULL 10000 2 DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join) @@ -513,7 +513,7 @@ id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t1 ALL NULL NULL NULL NULL 5 1 PRIMARY t2 ref a a 5 test.t1.b 2 Using where 1 PRIMARY t3 ref a a 5 test.t1.b 3 Using where -1 PRIMARY ref key0 key0 5 test.t1.b 10 Using where +1 PRIMARY ref key0 key0 5 test.t1.b 1 Using where 2 DERIVED t22 const PRIMARY PRIMARY 4 const 1 Using index; Using temporary; Using filesort 2 DERIVED t10 ALL NULL NULL NULL NULL 10000 2 DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join) @@ -581,7 +581,7 @@ id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t1 ALL NULL NULL NULL NULL 5 1 PRIMARY t2 ref a a 5 test.t1.b 2 Using where 1 PRIMARY t3 ref a a 5 test.t1.b 3 Using where -1 PRIMARY ref key0 key0 5 test.t1.b 10 Using where +1 PRIMARY ref key0 key0 5 test.t1.b 1 Using where 2 LATERAL DERIVED t22 const PRIMARY PRIMARY 4 const 1 Using index 2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t1.b 100 2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join) @@ -762,7 +762,7 @@ from one_k T1, (select grp, count(*) from t1000 group by grp) TBL where TBL.grp=T1.a; id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY T1 ALL NULL NULL NULL NULL 1000 Using where -1 PRIMARY ref key0 key0 5 test.T1.a 10 +1 PRIMARY ref key0 key0 5 test.T1.a 1 2 DERIVED t1000 index grp grp 5 NULL 1000 Using index; Using temporary; Using filesort explain select /*+ SPLIT_MATERIALIZED(TBL) */ * diff --git a/mysql-test/main/opt_trace.result b/mysql-test/main/opt_trace.result index 1b0e18341ba..92ea3044186 100644 --- a/mysql-test/main/opt_trace.result +++ b/mysql-test/main/opt_trace.result @@ -12129,7 +12129,7 @@ id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t1 ALL NULL NULL NULL NULL 5 1 PRIMARY t2 ref a a 5 test.t1.b 2 Using where 1 PRIMARY t3 ref a a 5 test.t1.b 3 Using where -1 PRIMARY ref key0 key0 5 test.t1.b 10 Using where +1 PRIMARY ref key0 key0 5 test.t1.b 1 Using where 2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t1.b 100 2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join) select json_detailed(json_extract(trace, '$**.check_split_materialized')) as JS diff --git a/sql/opt_group_by_cardinality.cc b/sql/opt_group_by_cardinality.cc index 8b58096cfd1..4ecb14884c2 100644 --- a/sql/opt_group_by_cardinality.cc +++ b/sql/opt_group_by_cardinality.cc @@ -17,8 +17,13 @@ /** @file - Contains estimate_post_group_cardinality() which estimates cardinality - after GROUP BY operation is applied. + Contains + - estimate_post_group_cardinality() which estimates cardinality + after GROUP BY operation is applied. + + - infer_derived_key_statistics() to infer index statistics for + potential indexes on derived tables that have data produced with + a GROUP BY operation. */ #include "mariadb.h" @@ -26,6 +31,8 @@ #include "sql_select.h" #include "sql_statistics.h" #include "opt_trace.h" +#include "sql_lex.h" +#include "opt_group_by_cardinality.h" static double estimate_table_group_cardinality(JOIN *join, Item ***group_list, @@ -374,3 +381,148 @@ whole_table: goto normal_exit; } + +/** + @brief + Return the number of keypart that matches the item, -1 if there is no match +*/ + +static int item_index_in_key(Item *item, const KEY *keyinfo, uint key_parts) +{ + if (item->real_item()->type() == Item::FIELD_ITEM) + { + for (uint i= 0; i < key_parts; i++) + { + if (!cmp(item->name, keyinfo->key_part[i].field->field_name)) + return (int)i; + } + } + return -1; +} + + +/** + @brief + Return TRUE if every item in the list appears in our key +*/ + +static +bool all_list_contained_in_keyparts(const KEY *keyinfo, + uint key_parts, + SQL_I_List *list) +{ + for (ORDER *grp= list->first; grp; grp= grp->next) + { + if (item_index_in_key((*grp->item), keyinfo, key_parts) < 0) + return FALSE; + } + return TRUE; +} + + +/** + @brief + When adding a key to a materialized derived table, we can determine some + key statistics from the query block. + + @detail + Currently, we can infer this + + 1) rec_per_key[n-1] (# records for each full key value), when :- + a) the last query set operation in the chain is not a UNION ALL, implying + that duplicate rows are removed, so if the select list matches the + key, we will have one record per distinct key + b) the query within the block has the DISTINCT flag set, and the select + list matches our key, we will have one record per distinct key. + c) The group by list in the query is a subset of our key, we will have + one record per key. + + @todo + It is also possible to use predicates combined with existing key or + histogram statistics on the base tables in our derived table to fill in + this and other attributes of our generated key +*/ + +void infer_derived_key_statistics(st_select_lex_unit* derived, + KEY *keyinfo, + uint key_parts) +{ + st_select_lex* select= derived->first_select(); + Json_writer_object wrapper(derived->thd); + Json_writer_object trace(derived->thd, "infer_derived_key_statistics"); + + + trace.add("table_alias", keyinfo->table->alias.c_ptr()); + trace.add("key_name", keyinfo->name); + trace.add("key_parts", key_parts); + /* + This whole union/intersect of selects does NOT have the ALL flag, so if + we have the same number of select list items as key parts, we can guarantee + that each line in the result set is unique + */ + if (key_parts == select->item_list.elements && + derived->check_distinct_in_union()) + { + trace.add("distinct_in_query_expression", TRUE); + keyinfo->rec_per_key[key_parts - 1]= 1; + } + else + { + Json_writer_array select_proc(derived->thd, "select"); + ulong rec_per_key= 0; + bool all_selects_covered= TRUE; + do + { + bool this_select_covered= FALSE; + /* + This is a SELECT DISTINCT query with $key_parts elements in the + select list. This select in the union will produce one record + per key. + @todo + If we come across multiple SELECT DISTINCT selects in this union + have a problem in that we do not know anything about how they might + intersect + */ + if (key_parts == select->item_list.elements && + select->options & SELECT_DISTINCT) + { + select_proc.add("distinct_in_query_block"); + this_select_covered= TRUE; + rec_per_key++; + } + + /* + This is a grouping select and the group list is a subset of our key. + Our key can have additional fields, the rows will still be unique. + */ + if (select->group_list.elements && + all_list_contained_in_keyparts(keyinfo, + key_parts, + &select->group_list)) + { + select_proc.add("group_list_in_key"); + this_select_covered= TRUE; + rec_per_key++; + } + + if (!this_select_covered) + { + select_proc.add("unhandled query"); + all_selects_covered= FALSE; + } + + } while ((select= select->next_select())); + select_proc.end(); + + /* + If we do not cover all selects here, do not update + keyinfo->rec_per_key[key_parts - 1] at all + */ + if (all_selects_covered) + { + keyinfo->rec_per_key[key_parts - 1]= rec_per_key; + trace.add("rec_per_key_estimate", rec_per_key); + } + } +} + diff --git a/sql/opt_group_by_cardinality.h b/sql/opt_group_by_cardinality.h new file mode 100644 index 00000000000..5dcb2589e72 --- /dev/null +++ b/sql/opt_group_by_cardinality.h @@ -0,0 +1,11 @@ +#ifndef OPT_GROUP_BY_CARDINALITY +#define OPT_GROUP_BY_CARDINALITY + + +double estimate_post_group_cardinality(JOIN *join, double join_output_card); + +void infer_derived_key_statistics(st_select_lex_unit* derived, + KEY *keyinfo, + uint key_parts); + +#endif diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 84f91df1590..f31955730e6 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -69,6 +69,7 @@ #include "optimizer_defaults.h" #include "derived_handler.h" #include "opt_hints.h" +#include "opt_group_by_cardinality.h" /* A key part number that means we're using a fulltext scan. diff --git a/sql/sql_select.h b/sql/sql_select.h index ea0f43f81c4..18564430892 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -2756,8 +2756,6 @@ void propagate_new_equalities(THD *thd, Item *cond, #define PREV_BITS(type, N_BITS) ((type)my_set_bits(N_BITS)) -double estimate_post_group_cardinality(JOIN *join, double join_output_card); - bool dbug_user_var_equals_str(THD *thd, const char *name, const char *value); #include "opt_vcol_substitution.h" diff --git a/sql/table.cc b/sql/table.cc index f9140c14b0d..ae25eb396d0 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -51,6 +51,7 @@ #include "rpl_rli.h" // class rpl_group_info #include "rpl_mi.h" // class Master_info #include "vector_mhnsw.h" +#include "opt_group_by_cardinality.h" #ifdef WITH_WSREP #include "wsrep_schema.h" @@ -8670,6 +8671,7 @@ bool TABLE::check_tmp_key(uint key, uint key_parts, key_parts <= tmp_table_max_key_parts(); } + /** @brief Add one key to a temporary table @@ -8728,6 +8730,7 @@ bool TABLE::add_tmp_key(uint key, uint key_parts, bzero(keyinfo->rec_per_key, sizeof(ulong)*key_parts); keyinfo->read_stats= NULL; keyinfo->collected_stats= NULL; + keyinfo->table= this; for (i= 0; i < key_parts; i++) { @@ -8748,25 +8751,10 @@ bool TABLE::add_tmp_key(uint key, uint key_parts, */ keyinfo->index_flags= file->index_flags(key, 0, 1); - /* - For the case when there is a derived table that would give distinct rows, - the index statistics are passed to the join optimizer to tell that a ref - access to all the fields of the derived table will produce only one row. - */ - st_select_lex_unit* derived= pos_in_table_list ? pos_in_table_list->derived: NULL; if (derived) - { - st_select_lex* first= derived->first_select(); - uint select_list_items= first->get_item_list()->elements; - if (key_parts == select_list_items) - { - if ((!first->is_part_of_union() && (first->options & SELECT_DISTINCT)) || - derived->check_distinct_in_union()) - keyinfo->rec_per_key[key_parts - 1]= 1; - } - } + infer_derived_key_statistics(derived, keyinfo, key_parts); set_if_bigger(s->max_key_length, keyinfo->key_length); s->keys++;