MDEV-34894: Poor query plan, because range estimates are not reused for ref(const)

(Variant 4, with @@optimizer_adjust_secondary_key_costs, reuse in two places, and conditions are replaced with equivalent simpler forms in two more) In best_access_path(), ReuseRangeEstimateForRef-3, the check for whether "all used key_part_i used key_part_i=const" was incorrect: it may produced a "NO" answer for cases when we had: key_part1= const // some key parts are usable key_part2= value_not_in_join_prefix //present but unusable key_part3= non_const_value // unusable due to gap in key parts. This caused the optimizer to fail to apply ReuseRangeEstimateForRef heuristics. The consequence is poor query plan choice when the index in question has very skewed data distribution. The fix is enabled if its @@optimizer_adjust_secondary_key_costs flag is set.
2025-07-30 16:24:05 +03:00 · 2024-09-07 17:17:44 +03:00
parent c41ab95a38
commit c630e23a18
9 changed files with 214 additions and 22 deletions
--- a/mysql-test/main/join.result
+++ b/mysql-test/main/join.result
@ -3495,3 +3495,87 @@ a	b	c
 SET OPTIMIZER_USE_CONDITION_SELECTIVITY=@tmp;
 DROP TABLE t1,t2;
 # End of 10.6 tests
+#
+# MDEV-34894: Poor query plan, because range estimates are not reused for ref(const)
+#
+create table t0 (
+a int,
+b int,
+dummy int
+);
+insert into t0 select seq,seq,seq from seq_1_to_10;
+create table t1 (
+pk1 int,
+pk2 int,
+pk3 int,
+key1 int,
+key(key1),
+filler char(100),
+primary key(pk1,pk2,pk3)
+);
+insert into t1
+select
+seq, seq, seq,
+FLOOR(seq/2),
+'filler-data'
+from seq_1_to_10000;
+analyze table t1;
+Table	Op	Msg_type	Msg_text
+test.t1	analyze	status	Engine-independent statistics collected
+test.t1	analyze	status	OK
+update t1 set pk1=1 where pk1 between 1 and 200;
+explain select * from t1 where pk1=1;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ref	PRIMARY	PRIMARY	4	const	231	
+explain select * from t0,t1 where t1.pk1=t0.a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t0	ALL	NULL	NULL	NULL	NULL	10	Using where
+1	SIMPLE	t1	ref	PRIMARY	PRIMARY	4	test.t0.a	1	
+create table t2 (
+col int
+);
+insert into t2 select seq from seq_1_to_10000;
+set optimizer_adjust_secondary_key_costs='fix_reuse_range_for_ref';
+# This must use this good query plan:
+#  t0 - ALL
+#  t1 - ref, key=key1, not PRIMARY as pk1=1 is true for 20% of all rows
+#  t2 - ALL
+explain select * from t0, t1, t2
+where
+t1.pk1=1 and t1.pk2=t2.col and t1.pk3=t0.dummy and
+t1.key1=t0.b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t0	ALL	NULL	NULL	NULL	NULL	10	Using where
+1	SIMPLE	t1	ref	PRIMARY,key1	key1	5	test.t0.b	1	Using where
+1	SIMPLE	t2	ALL	NULL	NULL	NULL	NULL	10000	Using where; Using join buffer (flat, BNL join)
+set optimizer_adjust_secondary_key_costs='';
+# Bad query:
+#  t0 - ALL
+#  t1 - ref, key=PRIMARY
+#  t2 - ALL
+explain select * from t0, t1, t2
+where
+t1.pk1=1 and t1.pk2=t2.col and t1.pk3=t0.dummy and
+t1.key1=t0.b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t0	ALL	NULL	NULL	NULL	NULL	10	
+1	SIMPLE	t1	ref	PRIMARY,key1	PRIMARY	4	const	1	Using index condition; Using where
+1	SIMPLE	t2	ALL	NULL	NULL	NULL	NULL	10000	Using where; Using join buffer (flat, BNL join)
+drop table t0,t1,t2;
+set @@optimizer_adjust_secondary_key_costs="fix_reuse_range_for_ref";
+CREATE OR REPLACE TABLE t1 (a INT NOT NULL, b INT NOT NULL, c INT, key(a,b,c)) ENGINE=Aria;
+INSERT INTO t1 select seq/10,mod(seq,2),seq from seq_1_to_1000;
+update t1 set a=10 WHERE c < 100;
+update t1 set a=12 WHERE a=11;
+insert into t1 values (11,1,11), (11,2,11);
+create or replace table t2 select seq from seq_1_to_10;
+explain select count(*) from t1, t2 as seq where a=10 and b=seq.seq;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	seq	ALL	NULL	NULL	NULL	NULL	10	
+1	SIMPLE	t1	ref	a	a	8	const,test.seq.seq	5	Using where; Using index
+explain select count(*) from t1, t2 as seq where a=11 and b=seq.seq;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ref	a	a	4	const	2	Using index
+1	SIMPLE	seq	ALL	NULL	NULL	NULL	NULL	10	Using where; Using join buffer (flat, BNL join)
+drop table t1,t2;
+set @@optimizer_adjust_secondary_key_costs=default;