1
0
mirror of https://github.com/MariaDB/server.git synced 2026-01-06 05:22:24 +03:00

MDEV-36321 keys generated on derived tables produce wrong out_rows estimates

record per key statistics on keys generated for derived tables are missing,
causing a fallback to the usage of MATCHING_ROWS_IN_OTHER_TABLE in
best_access_path().
This can grossly overestimate the number of rows expected during a join
and cause a bad join order.  This patch tries to infer a few simple
things about the derived table, like when the key being created matches
the group by list inside (if any) inside the derived table.

Approved by Sergei Petrunia (sergey@mariadb.com)
Tested by Lena Startseva (lena.startseva@mariadb.com)
This commit is contained in:
Rex Johnston
2025-08-21 14:10:13 +11:00
committed by Rex Johnston
parent 28f5322a44
commit 8d08350dd3
13 changed files with 1116 additions and 189 deletions

View File

@@ -1930,7 +1930,7 @@ a b max_c avg_c a b c d
explain select * from v1,t2 where (v1.a=v1.b) and (v1.a=t2.a);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t2 ALL NULL NULL NULL NULL 9 Using where
1 PRIMARY <derived2> ref key0 key0 10 test.t2.a,test.t2.a 2
1 PRIMARY <derived2> ref key0 key0 10 test.t2.a,test.t2.a 1
2 DERIVED t1 ALL NULL NULL NULL NULL 20 Using where; Using temporary; Using filesort
explain format=json select * from v1,t2 where (v1.a=v1.b) and (v1.a=t2.a);
EXPLAIN
@@ -1960,7 +1960,7 @@ EXPLAIN
"used_key_parts": ["a", "b"],
"ref": ["test.t2.a", "test.t2.a"],
"loops": 9,
"rows": 2,
"rows": 1,
"cost": "COST_REPLACED",
"filtered": 100,
"materialized": {
@@ -2993,7 +2993,7 @@ where t1.a>5 group by a,b having max_c < 707) v1,
t2 where (v1.a=t2.a) and (v1.max_c>300) and (v1.a=v1.b);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t2 ALL NULL NULL NULL NULL 9 Using where
1 PRIMARY <derived2> ref key0 key0 10 test.t2.a,test.t2.a 2 Using where
1 PRIMARY <derived2> ref key0 key0 10 test.t2.a,test.t2.a 1 Using where
2 DERIVED t1 ALL NULL NULL NULL NULL 20 Using where; Using temporary; Using filesort
explain format=json select * from
(select a, b, max(c) as max_c, avg(c) as avg_c from t1
@@ -3026,7 +3026,7 @@ EXPLAIN
"used_key_parts": ["a", "b"],
"ref": ["test.t2.a", "test.t2.a"],
"loops": 9,
"rows": 2,
"rows": 1,
"cost": "COST_REPLACED",
"filtered": 100,
"attached_condition": "v1.max_c > 300",
@@ -3155,7 +3155,7 @@ a b max_c avg_c a b c d
explain select * from v1,t2 where (v1.a=t2.a) and (v1.b=t2.b);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t2 ALL NULL NULL NULL NULL 9 Using where
1 PRIMARY <derived2> ref key0 key0 10 test.t2.a,test.t2.b 2
1 PRIMARY <derived2> ref key0 key0 10 test.t2.a,test.t2.b 1
2 DERIVED t1 ALL NULL NULL NULL NULL 20 Using temporary; Using filesort
explain format=json select * from v1,t2 where (v1.a=t2.a) and (v1.b=t2.b);
EXPLAIN
@@ -3185,7 +3185,7 @@ EXPLAIN
"used_key_parts": ["a", "b"],
"ref": ["test.t2.a", "test.t2.b"],
"loops": 9,
"rows": 2,
"rows": 1,
"cost": "COST_REPLACED",
"filtered": 100,
"materialized": {
@@ -4041,7 +4041,7 @@ explain select * from v1,v2,t2 where
(v1.a=t2.a) and (v1.a=v1.b) and (v1.a=v2.a) and (v2.max_c<300);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t2 ALL NULL NULL NULL NULL 9 Using where
1 PRIMARY <derived2> ref key1 key1 10 test.t2.a,test.t2.a 2
1 PRIMARY <derived2> ref key1 key1 10 test.t2.a,test.t2.a 1
1 PRIMARY <derived3> ref key0 key0 5 test.t2.a 2 Using where
3 DERIVED t1 ALL NULL NULL NULL NULL 20 Using where; Using temporary; Using filesort
2 DERIVED t1 ALL NULL NULL NULL NULL 20 Using where; Using temporary; Using filesort
@@ -4074,7 +4074,7 @@ EXPLAIN
"used_key_parts": ["a", "b"],
"ref": ["test.t2.a", "test.t2.a"],
"loops": 9,
"rows": 2,
"rows": 1,
"cost": "COST_REPLACED",
"filtered": 100,
"materialized": {
@@ -4113,7 +4113,7 @@ EXPLAIN
"key_length": "5",
"used_key_parts": ["a"],
"ref": ["test.t2.a"],
"loops": 18,
"loops": 9,
"rows": 2,
"cost": "COST_REPLACED",
"filtered": 100,
@@ -8165,8 +8165,8 @@ a b min_c a b max_c avg_c
explain select * from v4,v2 where
(v4.a=v2.b) and (v4.a=v4.b) and (v4.min_c<100);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY <derived2> ALL NULL NULL NULL NULL 20 Using where
1 PRIMARY <derived4> ref key0 key0 5 v4.a 2
1 PRIMARY <derived4> ALL NULL NULL NULL NULL 20 Using where
1 PRIMARY <derived2> ref key0 key0 10 v2.b,v2.b 1 Using where
4 DERIVED t1 ALL NULL NULL NULL NULL 20 Using where; Using temporary; Using filesort
2 DERIVED <derived3> ALL NULL NULL NULL NULL 20 Using where; Using temporary; Using filesort
3 DERIVED t1 ALL NULL NULL NULL NULL 20 Using where; Using temporary; Using filesort
@@ -8180,13 +8180,54 @@ EXPLAIN
"nested_loop": [
{
"table": {
"table_name": "<derived2>",
"table_name": "<derived4>",
"access_type": "ALL",
"loops": 1,
"rows": 20,
"cost": "COST_REPLACED",
"filtered": 100,
"attached_condition": "v4.b = v4.a and v4.min_c < 100 and v4.a is not null",
"attached_condition": "v2.b is not null and v2.b is not null",
"materialized": {
"query_block": {
"select_id": 4,
"cost": "COST_REPLACED",
"having_condition": "max_c < 707",
"filesort": {
"sort_key": "t1.a, t1.b",
"temporary_table": {
"nested_loop": [
{
"table": {
"table_name": "t1",
"access_type": "ALL",
"loops": 1,
"rows": 20,
"cost": "COST_REPLACED",
"filtered": 100,
"attached_condition": "t1.a > 5"
}
}
]
}
}
}
}
}
},
{
"table": {
"table_name": "<derived2>",
"access_type": "ref",
"possible_keys": ["key0"],
"key": "key0",
"key_length": "10",
"used_key_parts": ["a", "b"],
"ref": ["v2.b", "v2.b"],
"loops": 20,
"rows": 1,
"cost": "COST_REPLACED",
"filtered": 100,
"attached_condition": "v4.min_c < 100",
"materialized": {
"query_block": {
"select_id": 2,
@@ -8238,46 +8279,6 @@ EXPLAIN
}
}
}
},
{
"table": {
"table_name": "<derived4>",
"access_type": "ref",
"possible_keys": ["key0"],
"key": "key0",
"key_length": "5",
"used_key_parts": ["b"],
"ref": ["v4.a"],
"loops": 20,
"rows": 2,
"cost": "COST_REPLACED",
"filtered": 100,
"materialized": {
"query_block": {
"select_id": 4,
"cost": "COST_REPLACED",
"having_condition": "max_c < 707",
"filesort": {
"sort_key": "t1.a, t1.b",
"temporary_table": {
"nested_loop": [
{
"table": {
"table_name": "t1",
"access_type": "ALL",
"loops": 1,
"rows": 20,
"cost": "COST_REPLACED",
"filtered": 100,
"attached_condition": "t1.a > 5"
}
}
]
}
}
}
}
}
}
]
}
@@ -8296,8 +8297,8 @@ a b min_c a b max_c avg_c
explain select * from v4,v2 where
(v4.a=v2.b) and (v4.a=v4.b) and (v2.b<30);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY <derived2> ALL NULL NULL NULL NULL 20 Using where
1 PRIMARY <derived4> ref key0 key0 5 v4.a 2
1 PRIMARY <derived4> ALL NULL NULL NULL NULL 20 Using where
1 PRIMARY <derived2> ref key0 key0 10 v2.b,v2.b 1
4 DERIVED t1 ALL NULL NULL NULL NULL 20 Using where; Using temporary; Using filesort
2 DERIVED <derived3> ALL NULL NULL NULL NULL 20 Using where; Using temporary; Using filesort
3 DERIVED t1 ALL NULL NULL NULL NULL 20 Using where; Using temporary; Using filesort
@@ -8311,13 +8312,53 @@ EXPLAIN
"nested_loop": [
{
"table": {
"table_name": "<derived2>",
"table_name": "<derived4>",
"access_type": "ALL",
"loops": 1,
"rows": 20,
"cost": "COST_REPLACED",
"filtered": 100,
"attached_condition": "v4.b = v4.a and v4.a < 30 and v4.a is not null",
"attached_condition": "v2.b < 30 and v2.b is not null and v2.b is not null",
"materialized": {
"query_block": {
"select_id": 4,
"cost": "COST_REPLACED",
"having_condition": "max_c < 707",
"filesort": {
"sort_key": "t1.a, t1.b",
"temporary_table": {
"nested_loop": [
{
"table": {
"table_name": "t1",
"access_type": "ALL",
"loops": 1,
"rows": 20,
"cost": "COST_REPLACED",
"filtered": 100,
"attached_condition": "t1.a > 5 and t1.b < 30"
}
}
]
}
}
}
}
}
},
{
"table": {
"table_name": "<derived2>",
"access_type": "ref",
"possible_keys": ["key0"],
"key": "key0",
"key_length": "10",
"used_key_parts": ["a", "b"],
"ref": ["v2.b", "v2.b"],
"loops": 20,
"rows": 1,
"cost": "COST_REPLACED",
"filtered": 100,
"materialized": {
"query_block": {
"select_id": 2,
@@ -8368,46 +8409,6 @@ EXPLAIN
}
}
}
},
{
"table": {
"table_name": "<derived4>",
"access_type": "ref",
"possible_keys": ["key0"],
"key": "key0",
"key_length": "5",
"used_key_parts": ["b"],
"ref": ["v4.a"],
"loops": 20,
"rows": 2,
"cost": "COST_REPLACED",
"filtered": 100,
"materialized": {
"query_block": {
"select_id": 4,
"cost": "COST_REPLACED",
"having_condition": "max_c < 707",
"filesort": {
"sort_key": "t1.a, t1.b",
"temporary_table": {
"nested_loop": [
{
"table": {
"table_name": "t1",
"access_type": "ALL",
"loops": 1,
"rows": 20,
"cost": "COST_REPLACED",
"filtered": 100,
"attached_condition": "t1.a > 5 and t1.b < 30"
}
}
]
}
}
}
}
}
}
]
}
@@ -8426,8 +8427,8 @@ a b min_c a b max_c avg_c
explain select * from v4,v2 where
(v4.a=v2.b) and (v4.a=v4.b) and ((v2.b<30) or (v4.a>2));
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY <derived2> ALL NULL NULL NULL NULL 20 Using where
1 PRIMARY <derived4> ref key0 key0 5 v4.a 2
1 PRIMARY <derived4> ALL NULL NULL NULL NULL 20 Using where
1 PRIMARY <derived2> ref key0 key0 10 v2.b,v2.b 1
4 DERIVED t1 ALL NULL NULL NULL NULL 20 Using where; Using temporary; Using filesort
2 DERIVED <derived3> ALL NULL NULL NULL NULL 20 Using where; Using temporary; Using filesort
3 DERIVED t1 ALL NULL NULL NULL NULL 20 Using where; Using temporary; Using filesort
@@ -8441,13 +8442,53 @@ EXPLAIN
"nested_loop": [
{
"table": {
"table_name": "<derived2>",
"table_name": "<derived4>",
"access_type": "ALL",
"loops": 1,
"rows": 20,
"cost": "COST_REPLACED",
"filtered": 100,
"attached_condition": "v4.b = v4.a and (v4.a < 30 or v4.a > 2) and v4.a is not null",
"attached_condition": "(v2.b < 30 or v2.b > 2) and v2.b is not null and v2.b is not null",
"materialized": {
"query_block": {
"select_id": 4,
"cost": "COST_REPLACED",
"having_condition": "max_c < 707",
"filesort": {
"sort_key": "t1.a, t1.b",
"temporary_table": {
"nested_loop": [
{
"table": {
"table_name": "t1",
"access_type": "ALL",
"loops": 1,
"rows": 20,
"cost": "COST_REPLACED",
"filtered": 100,
"attached_condition": "t1.a > 5 and (t1.b < 30 or t1.b > 2)"
}
}
]
}
}
}
}
}
},
{
"table": {
"table_name": "<derived2>",
"access_type": "ref",
"possible_keys": ["key0"],
"key": "key0",
"key_length": "10",
"used_key_parts": ["a", "b"],
"ref": ["v2.b", "v2.b"],
"loops": 20,
"rows": 1,
"cost": "COST_REPLACED",
"filtered": 100,
"materialized": {
"query_block": {
"select_id": 2,
@@ -8498,46 +8539,6 @@ EXPLAIN
}
}
}
},
{
"table": {
"table_name": "<derived4>",
"access_type": "ref",
"possible_keys": ["key0"],
"key": "key0",
"key_length": "5",
"used_key_parts": ["b"],
"ref": ["v4.a"],
"loops": 20,
"rows": 2,
"cost": "COST_REPLACED",
"filtered": 100,
"materialized": {
"query_block": {
"select_id": 4,
"cost": "COST_REPLACED",
"having_condition": "max_c < 707",
"filesort": {
"sort_key": "t1.a, t1.b",
"temporary_table": {
"nested_loop": [
{
"table": {
"table_name": "t1",
"access_type": "ALL",
"loops": 1,
"rows": 20,
"cost": "COST_REPLACED",
"filtered": 100,
"attached_condition": "t1.a > 5 and (t1.b < 30 or t1.b > 2)"
}
}
]
}
}
}
}
}
}
]
}
@@ -20148,7 +20149,7 @@ where t2.b < 40 and t2.a=t3.a and t3.c=t.c;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY t2 ALL NULL NULL NULL NULL 90 60.00 Using where
1 PRIMARY t3 ref idx_a idx_a 5 test.t2.a 1 100.00 Using where
1 PRIMARY <derived2> ref key0 key0 128 test.t3.c 5 100.00
1 PRIMARY <derived2> ref key0 key0 128 test.t3.c 1 100.00
2 DERIVED t4 ALL idx_c NULL NULL NULL 160 100.00 Using temporary; Using filesort
Warnings:
Note 1003 /* select#1 */ select `test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b`,`test`.`t2`.`c` AS `c`,`t`.`c` AS `t_c`,`t`.`max` AS `max`,`t`.`min` AS `min` from `test`.`t2` join `test`.`t3` join (/* select#2 */ select `test`.`t4`.`c` AS `c`,max(`test`.`t4`.`b`) AS `max`,min(`test`.`t4`.`b`) AS `min` from `test`.`t4` group by `test`.`t4`.`c`) `t` where `test`.`t3`.`a` = `test`.`t2`.`a` and `t`.`c` = `test`.`t3`.`c` and `test`.`t2`.`b` < 40
@@ -20198,7 +20199,7 @@ EXPLAIN
"used_key_parts": ["c"],
"ref": ["test.t3.c"],
"loops": 80.99999987,
"rows": 5,
"rows": 1,
"cost": "COST_REPLACED",
"filtered": 100,
"materialized": {
@@ -21318,7 +21319,7 @@ id a
explain extended select id, a from t1 where id in (select id from v1);
id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY t1 ALL PRIMARY NULL NULL NULL 20 100.00
1 PRIMARY <derived3> ref key0 key0 4 test.t1.id 2 50.00 FirstMatch(t1)
1 PRIMARY <derived3> ref key0 key0 4 test.t1.id 1 100.00 FirstMatch(t1)
3 DERIVED t1 ALL PRIMARY NULL NULL NULL 20 100.00 Using temporary; Using filesort
3 DERIVED t2 ref ro_id ro_id 4 test.t1.id 1 100.00 Using where
Warnings:
@@ -21356,7 +21357,7 @@ on (t1.id = t2.ro_id AND t2.flag = 1)
group by t1.id) dt);
id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY t1 ALL PRIMARY NULL NULL NULL 20 100.00
1 PRIMARY <derived3> ref key1,distinct_key key1 4 test.t1.id 2 50.00 FirstMatch(t1)
1 PRIMARY <derived3> ref key1,distinct_key key1 4 test.t1.id 1 100.00 FirstMatch(t1)
3 DERIVED t1 ALL PRIMARY NULL NULL NULL 20 100.00 Using temporary; Using filesort
3 DERIVED t2 ref ro_id ro_id 4 test.t1.id 1 100.00 Using where
Warnings:
@@ -22025,7 +22026,7 @@ WHERE t1.id BETWEEN 200 AND 100000;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t3 range t1_id t1_id 5 NULL 47 Using where; Using index
1 PRIMARY t1 eq_ref PRIMARY PRIMARY 4 test.t3.t1_id 1 Using index
1 PRIMARY <derived2> ref key0 key0 5 test.t3.t1_id 10
1 PRIMARY <derived2> ref key0 key0 5 test.t3.t1_id 1
2 DERIVED t2 ALL t1_id NULL NULL NULL 2408 Using where; Using temporary; Using filesort
set optimizer_switch='split_materialized=default';
DROP TABLE t1,t2,t3;
@@ -22302,7 +22303,7 @@ from_agg_items.ledger_id = charges.from_ledger_id
WHERE charges.to_ledger_id = 2;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY charges ref PRIMARY,fk_charge_from_ledger,fk_charge_to_ledger fk_charge_to_ledger 8 const 8
1 PRIMARY <derived2> ref key0 key0 17 test.charges.from_ledger_id,test.charges.id 4
1 PRIMARY <derived2> ref key0 key0 17 test.charges.from_ledger_id,test.charges.id 1
2 DERIVED transaction_items ALL fk_items_transaction NULL NULL NULL 40 Using temporary; Using filesort
2 DERIVED transactions eq_ref PRIMARY PRIMARY 8 test.transaction_items.transaction_id 1
INSERT INTO charges (id, from_ledger_id, to_ledger_id, amount) VALUES
@@ -22507,7 +22508,7 @@ from_agg_items.ledger_id = charges.from_ledger_id
WHERE charges.to_ledger_id = 2;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY charges ref fk_charge_to_ledger fk_charge_to_ledger 8 const 10
1 PRIMARY <derived2> ref key0 key0 18 test.charges.from_ledger_id,test.charges.id 4
1 PRIMARY <derived2> ref key0 key0 18 test.charges.from_ledger_id,test.charges.id 1
2 DERIVED transaction_items ALL fk_items_transaction NULL NULL NULL 40 Using temporary; Using filesort
2 DERIVED transactions eq_ref PRIMARY PRIMARY 8 test.transaction_items.transaction_id 1
set optimizer_switch='split_materialized=default';
@@ -22845,6 +22846,8 @@ INSERT INTO t1 VALUES
(95,3290880,487,'2021-02-15 18:59:35'),(96,3290798,0,'2021-02-15 18:59:52'),
(97,3290777,983,'2021-02-15 19:00:10'),(98,3290811,488,'2021-02-15 19:00:10'),
(99,3290917,1283,'2021-02-15 19:00:36'),(100,3290858,482,'2021-02-15 19:00:42');
insert into t1 select seq, 3300000+seq, 100+seq, '2021-02-09 18:31:35'
from seq_101_to_1000;
CREATE TABLE t2 (a int) ENGINE=MYISAM;
INSERT INTO t2 VALUES
(3289475),(3289496),(3289562),(3289593),(3289594),(3289595),(3289626),
@@ -22853,7 +22856,7 @@ INSERT INTO t2 VALUES
ANALYZE TABLE t1,t2;
Table Op Msg_type Msg_text
test.t1 analyze status Engine-independent statistics collected
test.t1 analyze status Table is already up to date
test.t1 analyze status OK
test.t2 analyze status Engine-independent statistics collected
test.t2 analyze status OK
EXPLAIN SELECT t1.valdouble, t1.valint1
@@ -22865,9 +22868,9 @@ t1.valdate = dt.maxdate AND
t1.valint1 IN (SELECT * FROM t2);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t2 ALL NULL NULL NULL NULL 21 Using where; Start temporary
1 PRIMARY t1 ref valint1,valint1_2 valint1 5 test.t2.a 2 Using index condition; Using where; End temporary
1 PRIMARY t1 ref valint1,valint1_2 valint1 5 test.t2.a 1 Using index condition; Using where; End temporary
1 PRIMARY <derived2> ref key0 key0 11 test.t1.valdate,test.t1.valint1 1
2 LATERAL DERIVED t ref valint1,valint1_2 valint1 5 test.t2.a 2 Using index condition
2 LATERAL DERIVED t ref valint1,valint1_2 valint1 5 test.t2.a 1 Using index condition
SELECT t1.valdouble, t1.valint1
FROM t1,
(SELECT max(t.valdate) AS maxdate, t.valint1 FROM t1 t GROUP BY t.valint1)

View File

@@ -4280,6 +4280,9 @@ INSERT INTO t1 VALUES
(97,3290777,983,'2021-02-15 19:00:10'),(98,3290811,488,'2021-02-15 19:00:10'),
(99,3290917,1283,'2021-02-15 19:00:36'),(100,3290858,482,'2021-02-15 19:00:42');
insert into t1 select seq, 3300000+seq, 100+seq, '2021-02-09 18:31:35'
from seq_101_to_1000;
CREATE TABLE t2 (a int) ENGINE=MYISAM;
INSERT INTO t2 VALUES
(3289475),(3289496),(3289562),(3289593),(3289594),(3289595),(3289626),

View File

@@ -566,4 +566,501 @@ DROP TABLE t1, t2;
#
# End of 10.3 tests
#
#
# MDEV-36321 keys generated on derived tables produce wrong out_rows estimates
#
create table t1 (
grp_id int,
value int,
index (grp_id)
);
insert into t1 select
A.seq, B.seq
from
seq_1_to_100 A,
seq_1_to_100 B;
create table t2 (a int);
insert into t2 select seq from seq_1_to_5;
create table t3 (b int);
insert into t3 select seq from seq_1_to_5;
analyze table t1,t2;
Table Op Msg_type Msg_text
test.t1 analyze status Engine-independent statistics collected
test.t1 analyze status Table is already up to date
test.t2 analyze status Engine-independent statistics collected
test.t2 analyze status OK
SET optimizer_trace='enabled=on';
SET optimizer_trace_max_mem_size=10485760;
select * from
t2,
(select max(value), grp_id from t1 group by grp_id) DT
where
t2.a= DT.grp_id;
a max(value) grp_id
1 100 1
2 100 2
3 100 3
4 100 4
5 100 5
select
json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t
from information_schema.optimizer_trace;
t
[
{
"table_alias": "DT",
"key_name": "key0",
"key_parts": 1,
"select":
["group_list_in_key"],
"rec_per_key_estimate": 1
}
]
# Same as above, but try a UNION:
select * from
t2,
(select max(value), grp_id from t1 group by grp_id
union all
select max(value), grp_id from t1 group by grp_id) DT
where
t2.a= DT.grp_id;
a max(value) grp_id
1 100 1
1 100 1
2 100 2
2 100 2
3 100 3
3 100 3
4 100 4
4 100 4
5 100 5
5 100 5
select
json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t
from information_schema.optimizer_trace;
t
[
{
"table_alias": "DT",
"key_name": "key0",
"key_parts": 1,
"select":
[
"group_list_in_key",
"group_list_in_key"
],
"rec_per_key_estimate": 2
}
]
# Same as the previous query but unhandled group by expression
explain
select * from
t2,
(select max(value), grp_id from t1 group by grp_id
union all
select max(value), grp_id from t1 group by MOD(grp_id,2)) DT
where
t2.a= DT.grp_id;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t2 ALL NULL NULL NULL NULL 5 Using where
1 PRIMARY <derived2> ref key0 key0 5 test.t2.a 101
2 DERIVED t1 ALL NULL NULL NULL NULL 10000 Using temporary; Using filesort
3 UNION t1 ALL NULL NULL NULL NULL 10000 Using temporary; Using filesort
select
json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t
from information_schema.optimizer_trace;
t
[
{
"table_alias": "DT",
"key_name": "key0",
"key_parts": 1,
"select":
[
"group_list_in_key",
"unhandled query"
]
}
]
# view/cte/derived merged inside our derived table
create view v1 as select * from t1;
explain
select * from
t2,
(select grp_id, max(value) as maxval from v1 group by grp_id) DT
where
t2.a= DT.grp_id;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t2 ALL NULL NULL NULL NULL 5 Using where
1 PRIMARY <derived2> ref key0 key0 5 test.t2.a 1
2 DERIVED t1 ALL grp_id NULL NULL NULL 10000 Using temporary; Using filesort
drop view v1;
explain
with cte1 as (select * from t1)
select * from
t2,
(select grp_id, max(value) as maxval from cte1 group by grp_id) DT
where
t2.a= DT.grp_id;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t2 ALL NULL NULL NULL NULL 5 Using where
1 PRIMARY <derived3> ref key0 key0 5 test.t2.a 1
3 DERIVED t1 ALL grp_id NULL NULL NULL 10000 Using temporary; Using filesort
explain
select * from
t2,
(
select grp_id, max(value) as maxval from
(
select * from t1, t3
where t1.grp_id = t3.b
) dt1
group by grp_id
) DT
where
t2.a= DT.grp_id;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t2 ALL NULL NULL NULL NULL 5 Using where
1 PRIMARY <derived2> ref key0 key0 5 test.t2.a 1
2 DERIVED t3 ALL NULL NULL NULL NULL 5 Using where; Using temporary; Using filesort
2 DERIVED t1 ref grp_id grp_id 5 test.t3.b 100
# Example with equalities on GROUP BY columns and other columns
# Must produce {table=<derived2>, ref=test.t2.col2,test.t2.a, rows=1}
alter table t2 add col2 int;
explain
select * from
t2,
(select max(value) as maxval, grp_id from t1 group by grp_id) DT
where
t2.col2=maxval and
t2.a= DT.grp_id;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t2 ALL NULL NULL NULL NULL 5 Using where
1 PRIMARY <derived2> ref key0 key0 10 test.t2.col2,test.t2.a 1
2 DERIVED t1 ALL grp_id NULL NULL NULL 10000 Using temporary; Using filesort
select
json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t
from information_schema.optimizer_trace;
t
[
{
"table_alias": "DT",
"key_name": "key0",
"key_parts": 2,
"select":
["group_list_in_key"],
"rec_per_key_estimate": 1
}
]
explain
select * from
t2,
(select grp_id, max(value) as maxval from t1 group by grp_id) DT
where
t2.col2=maxval and
t2.a= DT.grp_id;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t2 ALL NULL NULL NULL NULL 5 Using where
1 PRIMARY <derived2> ref key0 key0 10 test.t2.a,test.t2.col2 1
2 DERIVED t1 ALL grp_id NULL NULL NULL 10000 Using temporary; Using filesort
delete from t1;
insert into t1 select 1, a.seq from seq_1_to_10 a;
analyze table t1;
Table Op Msg_type Msg_text
test.t1 analyze status Engine-independent statistics collected
test.t1 analyze status Table is already up to date
explain
select * from
t2,
(select distinct grp_id from t1
union all
select distinct value from t1) DT
where
t2.a= DT.grp_id;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t2 ALL NULL NULL NULL NULL 5 Using where
1 PRIMARY <derived2> ref key0 key0 5 test.t2.a 2
2 DERIVED t1 range NULL grp_id 5 NULL 2 Using index for group-by
3 UNION t1 ALL NULL NULL NULL NULL 10 Using temporary
select
json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t
from information_schema.optimizer_trace;
t
[
{
"table_alias": "DT",
"key_name": "key0",
"key_parts": 1,
"select":
[
"distinct_in_query_block",
"distinct_in_query_block"
],
"rec_per_key_estimate": 2
}
]
drop table t1, t2;
create table t1
(
a int not null,
b int,
c int,
d int,
amount decimal,
key t1_ix1 (a,b)
);
# More complex examples
insert into t1 values (0, NULL, 0, NULL, 10.0000), (1, 1, 1, 1, 10.0000),
(2, 2, 2, 2, 20.0000), (3, 3, 3, 3, 30.0000), (4, 4, 4, 4, 40.0000),
(5, 5, 5, 5, NULL), (6, 6, 6, 6, NULL), (7, 7, 7, 7, 70.0000),
(8, 8, 8, 8, 80.0000);
create table t2
(
a int NOT NULL,
b int,
name varchar(50),
key t2_ix1 (a,b)
) engine = innodb;
insert into t2 values (0, NULL, 'a'), (1, NULL, 'A'), (2, 2, 'B'), (3,3, 'C'),
(4,4, 'D'), (5,5, NULL), (6,6, NULL), (7,7, 'E'), (8,8, 'F'), (9,9, 'G'),
(10,10,'H'), (11,11, NULL), (12,12, NULL);
drop table t3;
create table t3
(
a int not null,
b int,
description varchar(50),
key t3_ix1 (a,b)
) engine = innodb;
insert into t3 values (1, 1, 'bar'),(2,2,'buz'), (3,3, 'silver');
insert into t3 select seq, seq, 'junk' from seq_3_to_13;
create table t4
(
c int not null,
d int,
descr varchar(50),
key t4_ix1 (c,d)
) engine = innodb;
insert into t4 values (1, 1, 'iron'), (2,2,'aluminium'), (3,3, 'silver');
insert into t4 select seq, seq, 'junk' from seq_3_to_13;
# split materialized
explain
select * from t1
join t2 on t1.a = t2.a and t1.b = t2.b
join
(
select a, b, description from t3 group by a, b
) dt on dt.a = t1.a and dt.b = t1.b and dt.b = t2.b
where dt.a < 1;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 range t1_ix1 t1_ix1 4 NULL 1 Using index condition
1 PRIMARY t2 ref t2_ix1 t2_ix1 9 test.t1.a,test.t1.b 1
1 PRIMARY <derived2> ref key0 key0 9 test.t1.a,test.t1.b 1
2 LATERAL DERIVED t3 ref t3_ix1 t3_ix1 9 test.t1.a,test.t1.b 1 Using index condition
select
json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t
from information_schema.optimizer_trace;
t
[
{
"table_alias": "dt",
"key_name": "key0",
"key_parts": 2,
"select":
["group_list_in_key"],
"rec_per_key_estimate": 1
},
{
"table_alias": "dt",
"key_name": "key1",
"key_parts": 2,
"select":
["group_list_in_key"],
"rec_per_key_estimate": 1
}
]
# union with distinct rows
explain
select * from t1
join t2 on t1.a = t2.a and t1.b = t2.b
join
(
select a, b, description from t3 group by a, b
union
select c, d, descr from t4 group by c, d
) dt on dt.a = t1.a and dt.b = t1.b and dt.b = t2.b
where dt.a < 1;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 range t1_ix1 t1_ix1 4 NULL 1 Using index condition
1 PRIMARY t2 ref t2_ix1 t2_ix1 9 test.t1.a,test.t1.b 1
1 PRIMARY <derived2> ref key1,distinct_key key1 9 test.t1.a,test.t1.b 2
2 DERIVED t3 range t3_ix1 t3_ix1 4 NULL 1 Using index condition
3 UNION t4 range t4_ix1 t4_ix1 4 NULL 1 Using index condition
NULL UNION RESULT <union2,3> ALL NULL NULL NULL NULL NULL
select
json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t
from information_schema.optimizer_trace;
t
[
{
"table_alias": "dt",
"key_name": "key1",
"key_parts": 2,
"select":
[
"group_list_in_key",
"group_list_in_key"
],
"rec_per_key_estimate": 2
},
{
"table_alias": "dt",
"key_name": "key2",
"key_parts": 2,
"select":
[
"group_list_in_key",
"group_list_in_key"
],
"rec_per_key_estimate": 2
}
]
# union without distinct rows
explain
select * from t1
join t2 on t1.a = t2.a and t1.b = t2.b
join
(
select a, b, description from t3 group by a, b
union all
select c, d, descr from t4 group by c, d
) dt on dt.a = t1.a and dt.b = t1.b and dt.b = t2.b
where dt.a < 1;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 range t1_ix1 t1_ix1 4 NULL 1 Using index condition
1 PRIMARY t2 ref t2_ix1 t2_ix1 9 test.t1.a,test.t1.b 1
1 PRIMARY <derived2> ref key0 key0 9 test.t1.a,test.t1.b 2
2 DERIVED t3 range t3_ix1 t3_ix1 4 NULL 1 Using index condition
3 UNION t4 range t4_ix1 t4_ix1 4 NULL 1 Using index condition
select
json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t
from information_schema.optimizer_trace;
t
[
{
"table_alias": "dt",
"key_name": "key0",
"key_parts": 2,
"select":
[
"group_list_in_key",
"group_list_in_key"
],
"rec_per_key_estimate": 2
},
{
"table_alias": "dt",
"key_name": "key1",
"key_parts": 2,
"select":
[
"group_list_in_key",
"group_list_in_key"
],
"rec_per_key_estimate": 2
}
]
# union without distinct rows with simple non grouping 2nd select
explain
select * from t1
join t2 on t1.a = t2.a and t1.b = t2.b
join
(
select a, b, description from t3 group by a, b
union all
select c, d, descr from t4
) dt on dt.a = t1.a and dt.b = t1.b and dt.b = t2.b
where dt.a < 1;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 range t1_ix1 t1_ix1 4 NULL 1 Using index condition
1 PRIMARY t2 ref t2_ix1 t2_ix1 9 test.t1.a,test.t1.b 1
1 PRIMARY <derived2> ref key0 key0 9 test.t1.a,test.t1.b 1
2 DERIVED t3 range t3_ix1 t3_ix1 4 NULL 1 Using index condition
3 UNION t4 range t4_ix1 t4_ix1 4 NULL 1 Using index condition
select
json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t
from information_schema.optimizer_trace;
t
[
{
"table_alias": "dt",
"key_name": "key0",
"key_parts": 2,
"select":
[
"group_list_in_key",
"unhandled query"
]
},
{
"table_alias": "dt",
"key_name": "key1",
"key_parts": 2,
"select":
[
"group_list_in_key",
"unhandled query"
]
}
]
# intersect
explain
select * from t1
join t2 on t1.a = t2.a and t1.b = t2.b
join
(
select a, b, description from t3 group by a, b
intersect
select c, d, descr from t4 group by c, d
) dt on dt.a = t1.a and dt.b = t1.b and dt.b = t2.b
where dt.a > 2 and dt.a < 4;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 range t1_ix1 t1_ix1 4 NULL 1 Using index condition
1 PRIMARY t2 ref t2_ix1 t2_ix1 9 test.t1.a,test.t1.b 1
1 PRIMARY <derived2> ref key1,distinct_key key1 9 test.t1.a,test.t1.b 2
2 DERIVED t3 range t3_ix1 t3_ix1 4 NULL 2 Using index condition
3 INTERSECT t4 range t4_ix1 t4_ix1 4 NULL 2 Using index condition
NULL INTERSECT RESULT <intersect2,3> ALL NULL NULL NULL NULL NULL
select
json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t
from information_schema.optimizer_trace;
t
[
{
"table_alias": "dt",
"key_name": "key1",
"key_parts": 2,
"select":
[
"group_list_in_key",
"group_list_in_key"
],
"rec_per_key_estimate": 2
},
{
"table_alias": "dt",
"key_name": "key2",
"key_parts": 2,
"select":
[
"group_list_in_key",
"group_list_in_key"
],
"rec_per_key_estimate": 2
}
]
drop table t1, t2, t3, t4;
#
# End of 11.4 tests
#
set optimizer_switch=@exit_optimizer_switch;

View File

@@ -1,4 +1,6 @@
# Initialize
--source include/not_embedded.inc
--source include/have_innodb.inc
--disable_warnings
drop table if exists t0,t1,t2,t3;
drop database if exists test1;
@@ -439,5 +441,277 @@ DROP TABLE t1, t2;
--echo # End of 10.3 tests
--echo #
--echo #
--echo # MDEV-36321 keys generated on derived tables produce wrong out_rows estimates
--echo #
--source include/have_sequence.inc
create table t1 (
grp_id int,
value int,
index (grp_id)
);
insert into t1 select
A.seq, B.seq
from
seq_1_to_100 A,
seq_1_to_100 B;
create table t2 (a int);
insert into t2 select seq from seq_1_to_5;
create table t3 (b int);
insert into t3 select seq from seq_1_to_5;
analyze table t1,t2;
SET optimizer_trace='enabled=on';
SET optimizer_trace_max_mem_size=10485760;
select * from
t2,
(select max(value), grp_id from t1 group by grp_id) DT
where
t2.a= DT.grp_id;
select
json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t
from information_schema.optimizer_trace;
--echo # Same as above, but try a UNION:
select * from
t2,
(select max(value), grp_id from t1 group by grp_id
union all
select max(value), grp_id from t1 group by grp_id) DT
where
t2.a= DT.grp_id;
select
json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t
from information_schema.optimizer_trace;
--echo # Same as the previous query but unhandled group by expression
explain
select * from
t2,
(select max(value), grp_id from t1 group by grp_id
union all
select max(value), grp_id from t1 group by MOD(grp_id,2)) DT
where
t2.a= DT.grp_id;
select
json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t
from information_schema.optimizer_trace;
--echo # view/cte/derived merged inside our derived table
create view v1 as select * from t1;
explain
select * from
t2,
(select grp_id, max(value) as maxval from v1 group by grp_id) DT
where
t2.a= DT.grp_id;
drop view v1;
explain
with cte1 as (select * from t1)
select * from
t2,
(select grp_id, max(value) as maxval from cte1 group by grp_id) DT
where
t2.a= DT.grp_id;
explain
select * from
t2,
(
select grp_id, max(value) as maxval from
(
select * from t1, t3
where t1.grp_id = t3.b
) dt1
group by grp_id
) DT
where
t2.a= DT.grp_id;
--echo # Example with equalities on GROUP BY columns and other columns
--echo # Must produce {table=<derived2>, ref=test.t2.col2,test.t2.a, rows=1}
alter table t2 add col2 int;
explain
select * from
t2,
(select max(value) as maxval, grp_id from t1 group by grp_id) DT
where
t2.col2=maxval and
t2.a= DT.grp_id;
select
json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t
from information_schema.optimizer_trace;
# Same as above but swap the column order.
# Now we'll get {table=<derived2>, ref=test.t2.a,test.t2.col2, rows=1}
explain
select * from
t2,
(select grp_id, max(value) as maxval from t1 group by grp_id) DT
where
t2.col2=maxval and
t2.a= DT.grp_id;
delete from t1;
insert into t1 select 1, a.seq from seq_1_to_10 a;
analyze table t1;
explain
select * from
t2,
(select distinct grp_id from t1
union all
select distinct value from t1) DT
where
t2.a= DT.grp_id;
select
json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t
from information_schema.optimizer_trace;
drop table t1, t2;
create table t1
(
a int not null,
b int,
c int,
d int,
amount decimal,
key t1_ix1 (a,b)
);
--echo # More complex examples
insert into t1 values (0, NULL, 0, NULL, 10.0000), (1, 1, 1, 1, 10.0000),
(2, 2, 2, 2, 20.0000), (3, 3, 3, 3, 30.0000), (4, 4, 4, 4, 40.0000),
(5, 5, 5, 5, NULL), (6, 6, 6, 6, NULL), (7, 7, 7, 7, 70.0000),
(8, 8, 8, 8, 80.0000);
create table t2
(
a int NOT NULL,
b int,
name varchar(50),
key t2_ix1 (a,b)
) engine = innodb;
insert into t2 values (0, NULL, 'a'), (1, NULL, 'A'), (2, 2, 'B'), (3,3, 'C'),
(4,4, 'D'), (5,5, NULL), (6,6, NULL), (7,7, 'E'), (8,8, 'F'), (9,9, 'G'),
(10,10,'H'), (11,11, NULL), (12,12, NULL);
drop table t3;
create table t3
(
a int not null,
b int,
description varchar(50),
key t3_ix1 (a,b)
) engine = innodb;
insert into t3 values (1, 1, 'bar'),(2,2,'buz'), (3,3, 'silver');
insert into t3 select seq, seq, 'junk' from seq_3_to_13;
create table t4
(
c int not null,
d int,
descr varchar(50),
key t4_ix1 (c,d)
) engine = innodb;
insert into t4 values (1, 1, 'iron'), (2,2,'aluminium'), (3,3, 'silver');
insert into t4 select seq, seq, 'junk' from seq_3_to_13;
--echo # split materialized
explain
select * from t1
join t2 on t1.a = t2.a and t1.b = t2.b
join
(
select a, b, description from t3 group by a, b
) dt on dt.a = t1.a and dt.b = t1.b and dt.b = t2.b
where dt.a < 1;
select
json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t
from information_schema.optimizer_trace;
--echo # union with distinct rows
explain
select * from t1
join t2 on t1.a = t2.a and t1.b = t2.b
join
(
select a, b, description from t3 group by a, b
union
select c, d, descr from t4 group by c, d
) dt on dt.a = t1.a and dt.b = t1.b and dt.b = t2.b
where dt.a < 1;
select
json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t
from information_schema.optimizer_trace;
--echo # union without distinct rows
explain
select * from t1
join t2 on t1.a = t2.a and t1.b = t2.b
join
(
select a, b, description from t3 group by a, b
union all
select c, d, descr from t4 group by c, d
) dt on dt.a = t1.a and dt.b = t1.b and dt.b = t2.b
where dt.a < 1;
select
json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t
from information_schema.optimizer_trace;
--echo # union without distinct rows with simple non grouping 2nd select
explain
select * from t1
join t2 on t1.a = t2.a and t1.b = t2.b
join
(
select a, b, description from t3 group by a, b
union all
select c, d, descr from t4
) dt on dt.a = t1.a and dt.b = t1.b and dt.b = t2.b
where dt.a < 1;
select
json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t
from information_schema.optimizer_trace;
--echo # intersect
explain
select * from t1
join t2 on t1.a = t2.a and t1.b = t2.b
join
(
select a, b, description from t3 group by a, b
intersect
select c, d, descr from t4 group by c, d
) dt on dt.a = t1.a and dt.b = t1.b and dt.b = t2.b
where dt.a > 2 and dt.a < 4;
select
json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t
from information_schema.optimizer_trace;
drop table t1, t2, t3, t4;
--echo #
--echo # End of 11.4 tests
--echo #
# The following command must be the last one the file
set optimizer_switch=@exit_optimizer_switch;

View File

@@ -283,7 +283,7 @@ on t3.a=t.a and t3.c=t.c
where t3.b > 15;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t3 range idx_b idx_b 5 NULL 2 Using index condition; Using where
1 PRIMARY <derived2> ref key0 key0 133 test.t3.a,test.t3.c 2
1 PRIMARY <derived2> ref key0 key0 133 test.t3.a,test.t3.c 1
2 DERIVED t4 ALL NULL NULL NULL NULL 40 Using filesort
drop table t3, t4;
# End of 10.3 tests
@@ -348,7 +348,7 @@ id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL 5
1 PRIMARY t2 ref a a 5 test.t1.b 2 Using where
1 PRIMARY t3 ref a a 5 test.t1.b 3 Using where
1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 10 Using where
1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 1 Using where
2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t1.b 100
2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join)
# The important part in the below output is:
@@ -449,7 +449,7 @@ ANALYZE
"ref": ["test.t1.b"],
"loops": 30,
"r_loops": 30,
"rows": 10,
"rows": 1,
"r_rows": 1,
"cost": "REPLACED",
"r_table_time_ms": "REPLACED",
@@ -546,7 +546,7 @@ id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL 5
1 PRIMARY t2 ref a a 5 test.t1.b 2 Using where
1 PRIMARY t3 ref a a 5 test.t1.b 3 Using where
1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 10 Using where
1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 1 Using where
2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t1.b 100
2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join)
explain
@@ -568,7 +568,7 @@ id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL 5
1 PRIMARY t2 ref a a 5 test.t1.b 2 Using where
1 PRIMARY t3 ref a a 5 test.t1.b 3 Using where
1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 10 Using where
1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 1 Using where
2 LATERAL DERIVED t22 const PRIMARY PRIMARY 4 const 1 Using index
2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t1.b 100
2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join)
@@ -596,7 +596,7 @@ id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t5 eq_ref PRIMARY PRIMARY 4 test.t1.b 1 Using index
1 PRIMARY t2 ref a a 5 test.t1.b 2
1 PRIMARY t3 ref a a 5 test.t1.b 3
1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 10 Using where
1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 1 Using where
2 LATERAL DERIVED t22 const PRIMARY PRIMARY 4 const 1 Using index
2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t5.pk 100 Using index condition
2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join)
@@ -660,7 +660,7 @@ id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL 5
1 PRIMARY t2 ref a a 5 test.t1.b 2 Using where
1 PRIMARY t3 ref a a 5 test.t1.b 3 Using where
1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 10 Using where
1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 1 Using where
2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t1.b 100
2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join)
select *
@@ -737,7 +737,7 @@ id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL 5
1 PRIMARY t2 ref a a 5 test.t1.b 2 Using where
1 PRIMARY t3 ref a a 5 test.t1.b 3 Using where
1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 10 Using where
1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 1 Using where
2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t1.b 100
2 LATERAL DERIVED t11 hash_ALL NULL #hash#$hj 5 test.t10.col1 100 Using where; Using join buffer (flat, BNLH join)
select *
@@ -815,7 +815,7 @@ id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL 5
1 PRIMARY t2 ALL NULL NULL NULL NULL 50 Using where; Using join buffer (flat, BNL join)
1 PRIMARY t3 ALL NULL NULL NULL NULL 15 Using where; Using join buffer (incremental, BNL join)
1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 10 Using where
1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 1 Using where
2 DERIVED t10 ALL grp_id NULL NULL NULL 10000 Using temporary; Using filesort
2 DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join)
select *
@@ -997,7 +997,7 @@ T.grp_id=v1.COL10;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL 2
1 PRIMARY t2 ref a a 5 test.t1.a 1 Using where; Using index
1 PRIMARY <derived2> ref key0 key0 5 func 10 Using where
1 PRIMARY <derived2> ref key0 key0 5 func 1 Using where
2 DERIVED t10 index grp_id grp_id 5 NULL 10000 Using index; Using temporary; Using filesort
drop table t1,t2, t10;
drop view v1;

View File

@@ -1242,7 +1242,7 @@ SELECT * FROM t1, t2, v1 WHERE t2.a=t1.a AND t2.a=v1.a AND t2.a=v1.b;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 system NULL NULL NULL NULL 1
1 PRIMARY t2 ref a a 4 const 1 Using index
1 PRIMARY <derived2> ref key0 key0 8 const,const 1
1 PRIMARY <derived2> ref key1 key1 8 func,func 1
2 DERIVED t3 ALL NULL NULL NULL NULL 12 Using temporary; Using filesort
SELECT * FROM t1, t2, v1 WHERE t2.a=t1.a AND t2.a=v1.a AND t2.a=v1.b;
a a a b
@@ -2431,7 +2431,7 @@ GROUP BY TABLE_SCHEMA) AS UNIQUES
ON ( COLUMNS.TABLE_SCHEMA = UNIQUES.TABLE_SCHEMA);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY COLUMNS ALL NULL NULL NULL NULL NULL Open_frm_only; Scanned all databases
1 PRIMARY <derived2> ref key0 key0 194 information_schema.COLUMNS.TABLE_SCHEMA 10
1 PRIMARY <derived2> ref key0 key0 194 information_schema.COLUMNS.TABLE_SCHEMA 1
2 DERIVED STATISTICS ALL NULL NULL NULL NULL NULL Open_frm_only; Scanned all databases; Using filesort
SELECT COUNT(*) > 0
FROM INFORMATION_SCHEMA.COLUMNS

View File

@@ -375,7 +375,7 @@ id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL 5
1 PRIMARY t2 ref a a 5 test.t1.b 2 Using where
1 PRIMARY t3 ref a a 5 test.t1.b 3 Using where
1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 10 Using where
1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 1 Using where
2 LATERAL DERIVED t22 const PRIMARY PRIMARY 4 const 1 Using index
2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t1.b 100
2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join)
@@ -443,7 +443,7 @@ id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL 5
1 PRIMARY t2 ref a a 5 test.t1.b 2 Using where
1 PRIMARY t3 ref a a 5 test.t1.b 3 Using where
1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 10 Using where
1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 1 Using where
2 DERIVED t22 const PRIMARY PRIMARY 4 const 1 Using index; Using temporary; Using filesort
2 DERIVED t10 ALL NULL NULL NULL NULL 10000
2 DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join)
@@ -513,7 +513,7 @@ id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL 5
1 PRIMARY t2 ref a a 5 test.t1.b 2 Using where
1 PRIMARY t3 ref a a 5 test.t1.b 3 Using where
1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 10 Using where
1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 1 Using where
2 DERIVED t22 const PRIMARY PRIMARY 4 const 1 Using index; Using temporary; Using filesort
2 DERIVED t10 ALL NULL NULL NULL NULL 10000
2 DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join)
@@ -581,7 +581,7 @@ id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL 5
1 PRIMARY t2 ref a a 5 test.t1.b 2 Using where
1 PRIMARY t3 ref a a 5 test.t1.b 3 Using where
1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 10 Using where
1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 1 Using where
2 LATERAL DERIVED t22 const PRIMARY PRIMARY 4 const 1 Using index
2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t1.b 100
2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join)
@@ -762,7 +762,7 @@ from
one_k T1, (select grp, count(*) from t1000 group by grp) TBL where TBL.grp=T1.a;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY T1 ALL NULL NULL NULL NULL 1000 Using where
1 PRIMARY <derived2> ref key0 key0 5 test.T1.a 10
1 PRIMARY <derived2> ref key0 key0 5 test.T1.a 1
2 DERIVED t1000 index grp grp 5 NULL 1000 Using index; Using temporary; Using filesort
explain
select /*+ SPLIT_MATERIALIZED(TBL) */ *

View File

@@ -12129,7 +12129,7 @@ id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL 5
1 PRIMARY t2 ref a a 5 test.t1.b 2 Using where
1 PRIMARY t3 ref a a 5 test.t1.b 3 Using where
1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 10 Using where
1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 1 Using where
2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t1.b 100
2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join)
select json_detailed(json_extract(trace, '$**.check_split_materialized')) as JS

View File

@@ -17,8 +17,13 @@
/**
@file
Contains estimate_post_group_cardinality() which estimates cardinality
after GROUP BY operation is applied.
Contains
- estimate_post_group_cardinality() which estimates cardinality
after GROUP BY operation is applied.
- infer_derived_key_statistics() to infer index statistics for
potential indexes on derived tables that have data produced with
a GROUP BY operation.
*/
#include "mariadb.h"
@@ -26,6 +31,8 @@
#include "sql_select.h"
#include "sql_statistics.h"
#include "opt_trace.h"
#include "sql_lex.h"
#include "opt_group_by_cardinality.h"
static
double estimate_table_group_cardinality(JOIN *join, Item ***group_list,
@@ -374,3 +381,148 @@ whole_table:
goto normal_exit;
}
/**
@brief
Return the number of keypart that matches the item, -1 if there is no match
*/
static int item_index_in_key(Item *item, const KEY *keyinfo, uint key_parts)
{
if (item->real_item()->type() == Item::FIELD_ITEM)
{
for (uint i= 0; i < key_parts; i++)
{
if (!cmp(item->name, keyinfo->key_part[i].field->field_name))
return (int)i;
}
}
return -1;
}
/**
@brief
Return TRUE if every item in the list appears in our key
*/
static
bool all_list_contained_in_keyparts(const KEY *keyinfo,
uint key_parts,
SQL_I_List<st_order> *list)
{
for (ORDER *grp= list->first; grp; grp= grp->next)
{
if (item_index_in_key((*grp->item), keyinfo, key_parts) < 0)
return FALSE;
}
return TRUE;
}
/**
@brief
When adding a key to a materialized derived table, we can determine some
key statistics from the query block.
@detail
Currently, we can infer this
1) rec_per_key[n-1] (# records for each full key value), when :-
a) the last query set operation in the chain is not a UNION ALL, implying
that duplicate rows are removed, so if the select list matches the
key, we will have one record per distinct key
b) the query within the block has the DISTINCT flag set, and the select
list matches our key, we will have one record per distinct key.
c) The group by list in the query is a subset of our key, we will have
one record per key.
@todo
It is also possible to use predicates combined with existing key or
histogram statistics on the base tables in our derived table to fill in
this and other attributes of our generated key
*/
void infer_derived_key_statistics(st_select_lex_unit* derived,
KEY *keyinfo,
uint key_parts)
{
st_select_lex* select= derived->first_select();
Json_writer_object wrapper(derived->thd);
Json_writer_object trace(derived->thd, "infer_derived_key_statistics");
trace.add("table_alias", keyinfo->table->alias.c_ptr());
trace.add("key_name", keyinfo->name);
trace.add("key_parts", key_parts);
/*
This whole union/intersect of selects does NOT have the ALL flag, so if
we have the same number of select list items as key parts, we can guarantee
that each line in the result set is unique
*/
if (key_parts == select->item_list.elements &&
derived->check_distinct_in_union())
{
trace.add("distinct_in_query_expression", TRUE);
keyinfo->rec_per_key[key_parts - 1]= 1;
}
else
{
Json_writer_array select_proc(derived->thd, "select");
ulong rec_per_key= 0;
bool all_selects_covered= TRUE;
do
{
bool this_select_covered= FALSE;
/*
This is a SELECT DISTINCT query with $key_parts elements in the
select list. This select in the union will produce one record
per key.
@todo
If we come across multiple SELECT DISTINCT selects in this union
have a problem in that we do not know anything about how they might
intersect
*/
if (key_parts == select->item_list.elements &&
select->options & SELECT_DISTINCT)
{
select_proc.add("distinct_in_query_block");
this_select_covered= TRUE;
rec_per_key++;
}
/*
This is a grouping select and the group list is a subset of our key.
Our key can have additional fields, the rows will still be unique.
*/
if (select->group_list.elements &&
all_list_contained_in_keyparts(keyinfo,
key_parts,
&select->group_list))
{
select_proc.add("group_list_in_key");
this_select_covered= TRUE;
rec_per_key++;
}
if (!this_select_covered)
{
select_proc.add("unhandled query");
all_selects_covered= FALSE;
}
} while ((select= select->next_select()));
select_proc.end();
/*
If we do not cover all selects here, do not update
keyinfo->rec_per_key[key_parts - 1] at all
*/
if (all_selects_covered)
{
keyinfo->rec_per_key[key_parts - 1]= rec_per_key;
trace.add("rec_per_key_estimate", rec_per_key);
}
}
}

View File

@@ -0,0 +1,11 @@
#ifndef OPT_GROUP_BY_CARDINALITY
#define OPT_GROUP_BY_CARDINALITY
double estimate_post_group_cardinality(JOIN *join, double join_output_card);
void infer_derived_key_statistics(st_select_lex_unit* derived,
KEY *keyinfo,
uint key_parts);
#endif

View File

@@ -69,6 +69,7 @@
#include "optimizer_defaults.h"
#include "derived_handler.h"
#include "opt_hints.h"
#include "opt_group_by_cardinality.h"
/*
A key part number that means we're using a fulltext scan.

View File

@@ -2756,8 +2756,6 @@ void propagate_new_equalities(THD *thd, Item *cond,
#define PREV_BITS(type, N_BITS) ((type)my_set_bits(N_BITS))
double estimate_post_group_cardinality(JOIN *join, double join_output_card);
bool dbug_user_var_equals_str(THD *thd, const char *name, const char *value);
#include "opt_vcol_substitution.h"

View File

@@ -51,6 +51,7 @@
#include "rpl_rli.h" // class rpl_group_info
#include "rpl_mi.h" // class Master_info
#include "vector_mhnsw.h"
#include "opt_group_by_cardinality.h"
#ifdef WITH_WSREP
#include "wsrep_schema.h"
@@ -8670,6 +8671,7 @@ bool TABLE::check_tmp_key(uint key, uint key_parts,
key_parts <= tmp_table_max_key_parts();
}
/**
@brief
Add one key to a temporary table
@@ -8728,6 +8730,7 @@ bool TABLE::add_tmp_key(uint key, uint key_parts,
bzero(keyinfo->rec_per_key, sizeof(ulong)*key_parts);
keyinfo->read_stats= NULL;
keyinfo->collected_stats= NULL;
keyinfo->table= this;
for (i= 0; i < key_parts; i++)
{
@@ -8748,25 +8751,10 @@ bool TABLE::add_tmp_key(uint key, uint key_parts,
*/
keyinfo->index_flags= file->index_flags(key, 0, 1);
/*
For the case when there is a derived table that would give distinct rows,
the index statistics are passed to the join optimizer to tell that a ref
access to all the fields of the derived table will produce only one row.
*/
st_select_lex_unit* derived= pos_in_table_list ?
pos_in_table_list->derived: NULL;
if (derived)
{
st_select_lex* first= derived->first_select();
uint select_list_items= first->get_item_list()->elements;
if (key_parts == select_list_items)
{
if ((!first->is_part_of_union() && (first->options & SELECT_DISTINCT)) ||
derived->check_distinct_in_union())
keyinfo->rec_per_key[key_parts - 1]= 1;
}
}
infer_derived_key_statistics(derived, keyinfo, key_parts);
set_if_bigger(s->max_key_length, keyinfo->key_length);
s->keys++;