MDEV-26301 Split optimization refills temporary table too many times

This patch optimizes the number of refills for the lateral derived table to which a materialized derived table subject to split optimization is is converted. This optimized number of refills is now considered as the expected number of refills of the materialized derived table when searching for the best possible splitting of the table.
2025-07-29 05:21:33 +03:00 · 2023-05-02 23:17:07 -07:00
parent ec79f37718
commit ce7ffe61d8
5 changed files with 910 additions and 32 deletions
--- a/mysql-test/main/derived_split_innodb.result
+++ b/mysql-test/main/derived_split_innodb.result
@ -284,3 +284,514 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 2	DERIVED	t4	ALL	NULL	NULL	NULL	NULL	40	Using filesort
 drop table t3, t4;
 # End of 10.3 tests
+#
+# MDEV-26301: Split optimization refills temporary table too many times
+#
+create table t1(a int, b int);
+insert into t1 select seq,seq from seq_1_to_5;
+create table t2(a int, b int, key(a));
+insert into t2
+select A.seq,B.seq from seq_1_to_25 A, seq_1_to_2 B;
+create table t3(a int, b int, key(a));
+insert into t3
+select A.seq,B.seq from seq_1_to_5 A, seq_1_to_3 B;
+analyze table t1,t2,t3 persistent for all;
+Table	Op	Msg_type	Msg_text
+test.t1	analyze	status	Engine-independent statistics collected
+test.t1	analyze	status	OK
+test.t2	analyze	status	Engine-independent statistics collected
+test.t2	analyze	status	Table is already up to date
+test.t3	analyze	status	Engine-independent statistics collected
+test.t3	analyze	status	Table is already up to date
+explain
+select * from
+(t1 left join t2 on t2.a=t1.b) left join t3 on t3.a=t1.b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	5	
+1	SIMPLE	t2	ref	a	a	5	test.t1.b	2	Using where
+1	SIMPLE	t3	ref	a	a	5	test.t1.b	3	Using where
+create table t10 (
+grp_id int,
+col1 int,
+key(grp_id)
+);
+insert into t10
+select
+A.seq,
+B.seq
+from
+seq_1_to_100 A,
+seq_1_to_100 B;
+create table t11 (
+col1 int,
+col2 int
+);
+insert into t11
+select A.seq, A.seq from seq_1_to_10 A;
+analyze table t10,t11 persistent for all;
+Table	Op	Msg_type	Msg_text
+test.t10	analyze	status	Engine-independent statistics collected
+test.t10	analyze	status	Table is already up to date
+test.t11	analyze	status	Engine-independent statistics collected
+test.t11	analyze	status	OK
+explain select * from
+(
+(t1 left join t2 on t2.a=t1.b)
+left join t3 on t3.a=t1.b
+) left join (select grp_id, count(*)
+from t10 left join t11 on t11.col1=t10.col1
+group by grp_id) T on T.grp_id=t1.b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	5	
+1	PRIMARY	t2	ref	a	a	5	test.t1.b	2	Using where
+1	PRIMARY	t3	ref	a	a	5	test.t1.b	3	Using where
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.b	10	Using where
+2	LATERAL DERIVED	t10	ref	grp_id	grp_id	5	test.t1.b	100	
+2	LATERAL DERIVED	t11	ALL	NULL	NULL	NULL	NULL	10	Using where; Using join buffer (flat, BNL join)
+# The important part in the below output is:
+#        "lateral": 1,
+#        "query_block": {
+#          "select_id": 2,
+#          "r_loops": 5,  <-- must be 5, not 30.
+analyze format=json select * from
+(
+(t1 left join t2 on t2.a=t1.b)
+left join t3 on t3.a=t1.b
+) left join (select grp_id, count(*)
+from t10 left join t11 on t11.col1=t10.col1
+group by grp_id) T on T.grp_id=t1.b;
+ANALYZE
+{
+  "query_block": {
+    "select_id": 1,
+    "r_loops": 1,
+    "r_total_time_ms": "REPLACED",
+    "const_condition": "1",
+    "table": {
+      "table_name": "t1",
+      "access_type": "ALL",
+      "r_loops": 1,
+      "rows": 5,
+      "r_rows": 5,
+      "r_total_time_ms": "REPLACED",
+      "filtered": 100,
+      "r_filtered": 100
+    },
+    "table": {
+      "table_name": "t2",
+      "access_type": "ref",
+      "possible_keys": ["a"],
+      "key": "a",
+      "key_length": "5",
+      "used_key_parts": ["a"],
+      "ref": ["test.t1.b"],
+      "r_loops": 5,
+      "rows": 2,
+      "r_rows": 2,
+      "r_total_time_ms": "REPLACED",
+      "filtered": 100,
+      "r_filtered": 100,
+      "attached_condition": "trigcond(trigcond(t1.b is not null))"
+    },
+    "table": {
+      "table_name": "t3",
+      "access_type": "ref",
+      "possible_keys": ["a"],
+      "key": "a",
+      "key_length": "5",
+      "used_key_parts": ["a"],
+      "ref": ["test.t1.b"],
+      "r_loops": 10,
+      "rows": 3,
+      "r_rows": 3,
+      "r_total_time_ms": "REPLACED",
+      "filtered": 100,
+      "r_filtered": 100,
+      "attached_condition": "trigcond(trigcond(t1.b is not null))"
+    },
+    "table": {
+      "table_name": "<derived2>",
+      "access_type": "ref",
+      "possible_keys": ["key0"],
+      "key": "key0",
+      "key_length": "5",
+      "used_key_parts": ["grp_id"],
+      "ref": ["test.t1.b"],
+      "r_loops": 30,
+      "rows": 10,
+      "r_rows": 1,
+      "r_total_time_ms": "REPLACED",
+      "filtered": 100,
+      "r_filtered": 100,
+      "attached_condition": "trigcond(trigcond(t1.b is not null))",
+      "materialized": {
+        "lateral": 1,
+        "query_block": {
+          "select_id": 2,
+          "r_loops": 5,
+          "r_total_time_ms": "REPLACED",
+          "outer_ref_condition": "t1.b is not null",
+          "table": {
+            "table_name": "t10",
+            "access_type": "ref",
+            "possible_keys": ["grp_id"],
+            "key": "grp_id",
+            "key_length": "5",
+            "used_key_parts": ["grp_id"],
+            "ref": ["test.t1.b"],
+            "r_loops": 5,
+            "rows": 100,
+            "r_rows": 100,
+            "r_total_time_ms": "REPLACED",
+            "filtered": 100,
+            "r_filtered": 100
+          },
+          "block-nl-join": {
+            "table": {
+              "table_name": "t11",
+              "access_type": "ALL",
+              "r_loops": 5,
+              "rows": 10,
+              "r_rows": 10,
+              "r_total_time_ms": "REPLACED",
+              "filtered": 100,
+              "r_filtered": 100
+            },
+            "buffer_type": "flat",
+            "buffer_size": "1Kb",
+            "join_type": "BNL",
+            "attached_condition": "trigcond(t11.col1 = t10.col1)",
+            "r_filtered": 10
+          }
+        }
+      }
+    }
+  }
+}
+create table t21 (pk int primary key);
+insert into t21 values (1),(2),(3);
+create table t22 (pk int primary key);
+insert into t22 values (1),(2),(3);
+explain
+select * from
+t21, t22,
+(
+(t1 left join t2 on t2.a=t1.b)
+left join t3 on t3.a=t1.b
+) left join (select grp_id, count(*)
+from t10 left join t11 on t11.col1=t10.col1
+group by grp_id) T on T.grp_id=t1.b
+where
+t21.pk=1 and t22.pk=2;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t21	const	PRIMARY	PRIMARY	4	const	1	Using index
+1	PRIMARY	t22	const	PRIMARY	PRIMARY	4	const	1	Using index
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	5	
+1	PRIMARY	t2	ref	a	a	5	test.t1.b	2	Using where
+1	PRIMARY	t3	ref	a	a	5	test.t1.b	3	Using where
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.b	10	Using where
+2	LATERAL DERIVED	t10	ref	grp_id	grp_id	5	test.t1.b	100	
+2	LATERAL DERIVED	t11	ALL	NULL	NULL	NULL	NULL	10	Using where; Using join buffer (flat, BNL join)
+explain
+select * from
+t21,
+(
+(t1 left join t2 on t2.a=t1.b)
+left join t3 on t3.a=t1.b
+) left join (select grp_id, count(*)
+from
+t22 join t10 left join t11 on t11.col1=t10.col1
+where
+t22.pk=1
+group by grp_id) T on T.grp_id=t1.b
+where
+t21.pk=1;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t21	const	PRIMARY	PRIMARY	4	const	1	Using index
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	5	
+1	PRIMARY	t2	ref	a	a	5	test.t1.b	2	Using where
+1	PRIMARY	t3	ref	a	a	5	test.t1.b	3	Using where
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.b	10	Using where
+2	LATERAL DERIVED	t22	const	PRIMARY	PRIMARY	4	const	1	Using index
+2	LATERAL DERIVED	t10	ref	grp_id	grp_id	5	test.t1.b	100	
+2	LATERAL DERIVED	t11	ALL	NULL	NULL	NULL	NULL	10	Using where; Using join buffer (flat, BNL join)
+create table t5 (
+pk int primary key
+);
+insert into t5 select seq from seq_1_to_1000;
+explain
+select * from
+t21,
+(
+(((t1 join t5 on t5.pk=t1.b)) left join t2 on t2.a=t1.b)
+left join t3 on t3.a=t1.b
+) left join (select grp_id, count(*)
+from
+t22 join t10 left join t11 on t11.col1=t10.col1
+where
+t22.pk=1
+group by grp_id) T on T.grp_id=t1.b
+where
+t21.pk=1;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t21	const	PRIMARY	PRIMARY	4	const	1	Using index
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	5	Using where
+1	PRIMARY	t5	eq_ref	PRIMARY	PRIMARY	4	test.t1.b	1	Using index
+1	PRIMARY	t2	ref	a	a	5	test.t1.b	2	
+1	PRIMARY	t3	ref	a	a	5	test.t1.b	3	
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.b	10	Using where
+2	LATERAL DERIVED	t22	const	PRIMARY	PRIMARY	4	const	1	Using index
+2	LATERAL DERIVED	t10	ref	grp_id	grp_id	5	test.t5.pk	100	Using index condition
+2	LATERAL DERIVED	t11	ALL	NULL	NULL	NULL	NULL	10	Using where; Using join buffer (flat, BNL join)
+drop table t1,t2,t3,t5, t10, t11, t21, t22;
+create table t1(a int, b int);
+insert into t1 select seq,seq from seq_1_to_5;
+create table t2(a int, b int, key(a));
+insert into t2
+select A.seq,B.seq from seq_1_to_25 A, seq_1_to_2 B;
+create table t3(a int, b int, key(a));
+insert into t3
+select A.seq,B.seq from seq_1_to_5 A, seq_1_to_3 B;
+analyze table t1,t2,t3 persistent for all;
+Table	Op	Msg_type	Msg_text
+test.t1	analyze	status	Engine-independent statistics collected
+test.t1	analyze	status	OK
+test.t2	analyze	status	Engine-independent statistics collected
+test.t2	analyze	status	Table is already up to date
+test.t3	analyze	status	Engine-independent statistics collected
+test.t3	analyze	status	Table is already up to date
+create table t10 (
+grp_id int,
+col1 int,
+key(grp_id)
+);
+insert into t10
+select
+A.seq,
+B.seq
+from
+seq_1_to_100 A,
+seq_1_to_100 B;
+create table t11 (
+col1 int,
+col2 int
+);
+insert into t11
+select A.seq, A.seq from seq_1_to_10 A;
+analyze table t10,t11 persistent for all;
+Table	Op	Msg_type	Msg_text
+test.t10	analyze	status	Engine-independent statistics collected
+test.t10	analyze	status	Table is already up to date
+test.t11	analyze	status	Engine-independent statistics collected
+test.t11	analyze	status	OK
+explain select *
+from
+(
+(t1 left join t2 on t2.a=t1.b)
+left join
+t3
+on t3.a=t1.b
+) 
+left join
+( 
+select grp_id, count(*)
+from t10 left join t11 on t11.col1=t10.col1
+group by grp_id
+)dt
+on dt.grp_id=t1.b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	5	
+1	PRIMARY	t2	ref	a	a	5	test.t1.b	2	Using where
+1	PRIMARY	t3	ref	a	a	5	test.t1.b	3	Using where
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.b	10	Using where
+2	LATERAL DERIVED	t10	ref	grp_id	grp_id	5	test.t1.b	100	
+2	LATERAL DERIVED	t11	ALL	NULL	NULL	NULL	NULL	10	Using where; Using join buffer (flat, BNL join)
+select *
+from
+(
+(t1 left join t2 on t2.a=t1.b)
+left join
+t3
+on t3.a=t1.b
+) 
+left join
+( 
+select grp_id, count(*)
+from t10 left join t11 on t11.col1=t10.col1
+group by grp_id
+)dt
+on dt.grp_id=t1.b;
+a	b	a	b	a	b	grp_id	count(*)
+1	1	1	1	1	1	1	100
+1	1	1	1	1	2	1	100
+1	1	1	1	1	3	1	100
+1	1	1	2	1	1	1	100
+1	1	1	2	1	2	1	100
+1	1	1	2	1	3	1	100
+2	2	2	1	2	1	2	100
+2	2	2	1	2	2	2	100
+2	2	2	1	2	3	2	100
+2	2	2	2	2	1	2	100
+2	2	2	2	2	2	2	100
+2	2	2	2	2	3	2	100
+3	3	3	1	3	1	3	100
+3	3	3	1	3	2	3	100
+3	3	3	1	3	3	3	100
+3	3	3	2	3	1	3	100
+3	3	3	2	3	2	3	100
+3	3	3	2	3	3	3	100
+4	4	4	1	4	1	4	100
+4	4	4	1	4	2	4	100
+4	4	4	1	4	3	4	100
+4	4	4	2	4	1	4	100
+4	4	4	2	4	2	4	100
+4	4	4	2	4	3	4	100
+5	5	5	1	5	1	5	100
+5	5	5	1	5	2	5	100
+5	5	5	1	5	3	5	100
+5	5	5	2	5	1	5	100
+5	5	5	2	5	2	5	100
+5	5	5	2	5	3	5	100
+set join_cache_level=4;
+explain select *
+from
+(
+(t1 left join t2 on t2.a=t1.b)
+left join
+t3
+on t3.a=t1.b
+) 
+left join
+( 
+select grp_id, count(*)
+from t10 left join t11 on t11.col1=t10.col1
+group by grp_id
+)dt
+on dt.grp_id=t1.b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	5	
+1	PRIMARY	t2	ref	a	a	5	test.t1.b	2	Using where
+1	PRIMARY	t3	ref	a	a	5	test.t1.b	3	Using where
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.b	10	Using where
+2	LATERAL DERIVED	t10	ref	grp_id	grp_id	5	test.t1.b	100	
+2	LATERAL DERIVED	t11	hash_ALL	NULL	#hash#$hj	5	test.t10.col1	10	Using where; Using join buffer (flat, BNLH join)
+select *
+from
+(
+(t1 left join t2 on t2.a=t1.b)
+left join
+t3
+on t3.a=t1.b
+) 
+left join
+( 
+select grp_id, count(*)
+from t10 left join t11 on t11.col1=t10.col1
+group by grp_id
+)dt
+on dt.grp_id=t1.b;
+a	b	a	b	a	b	grp_id	count(*)
+1	1	1	1	1	1	1	100
+1	1	1	1	1	2	1	100
+1	1	1	1	1	3	1	100
+1	1	1	2	1	1	1	100
+1	1	1	2	1	2	1	100
+1	1	1	2	1	3	1	100
+2	2	2	1	2	1	2	100
+2	2	2	1	2	2	2	100
+2	2	2	1	2	3	2	100
+2	2	2	2	2	1	2	100
+2	2	2	2	2	2	2	100
+2	2	2	2	2	3	2	100
+3	3	3	1	3	1	3	100
+3	3	3	1	3	2	3	100
+3	3	3	1	3	3	3	100
+3	3	3	2	3	1	3	100
+3	3	3	2	3	2	3	100
+3	3	3	2	3	3	3	100
+4	4	4	1	4	1	4	100
+4	4	4	1	4	2	4	100
+4	4	4	1	4	3	4	100
+4	4	4	2	4	1	4	100
+4	4	4	2	4	2	4	100
+4	4	4	2	4	3	4	100
+5	5	5	1	5	1	5	100
+5	5	5	1	5	2	5	100
+5	5	5	1	5	3	5	100
+5	5	5	2	5	1	5	100
+5	5	5	2	5	2	5	100
+5	5	5	2	5	3	5	100
+set join_cache_level=default;
+drop index a on t2;
+drop index a on t3;
+explain select *
+from
+(
+(t1 left join t2 on t2.a=t1.b)
+left join
+t3
+on t3.a=t1.b
+) 
+left join
+( 
+select grp_id, count(*)
+from t10 left join t11 on t11.col1=t10.col1
+group by grp_id
+)dt
+on dt.grp_id=t1.b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	5	
+1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	50	Using where; Using join buffer (flat, BNL join)
+1	PRIMARY	t3	ALL	NULL	NULL	NULL	NULL	15	Using where; Using join buffer (incremental, BNL join)
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.b	1000	Using where
+2	DERIVED	t10	ALL	grp_id	NULL	NULL	NULL	10000	Using temporary; Using filesort
+2	DERIVED	t11	ALL	NULL	NULL	NULL	NULL	10	Using where; Using join buffer (flat, BNL join)
+select *
+from
+(
+(t1 left join t2 on t2.a=t1.b)
+left join
+t3
+on t3.a=t1.b
+) 
+left join
+( 
+select grp_id, count(*)
+from t10 left join t11 on t11.col1=t10.col1
+group by grp_id
+)dt
+on dt.grp_id=t1.b;
+a	b	a	b	a	b	grp_id	count(*)
+1	1	1	1	1	1	1	100
+1	1	1	2	1	1	1	100
+1	1	1	1	1	2	1	100
+1	1	1	2	1	2	1	100
+1	1	1	1	1	3	1	100
+1	1	1	2	1	3	1	100
+2	2	2	1	2	1	2	100
+2	2	2	2	2	1	2	100
+2	2	2	1	2	2	2	100
+2	2	2	2	2	2	2	100
+2	2	2	1	2	3	2	100
+2	2	2	2	2	3	2	100
+3	3	3	1	3	1	3	100
+3	3	3	2	3	1	3	100
+3	3	3	1	3	2	3	100
+3	3	3	2	3	2	3	100
+3	3	3	1	3	3	3	100
+3	3	3	2	3	3	3	100
+4	4	4	1	4	1	4	100
+4	4	4	2	4	1	4	100
+4	4	4	1	4	2	4	100
+4	4	4	2	4	2	4	100
+4	4	4	1	4	3	4	100
+4	4	4	2	4	3	4	100
+5	5	5	1	5	1	5	100
+5	5	5	2	5	1	5	100
+5	5	5	1	5	2	5	100
+5	5	5	2	5	2	5	100
+5	5	5	1	5	3	5	100
+5	5	5	2	5	3	5	100
+drop table t1,t2,t3;
+drop table t10, t11;
+# End of 10.4 tests