MDEV-35180: ref_to_range rewrite causes poor query plan

(Variant 2: only allow rewrite for ref(const)) make_join_select() has a "ref_to_range" rewrite: it would rewrite any ref access to a range access on the same index if the latter uses more keyparts. It seems, he initial intent of this was to fix poor query plan choice in cases like t.keypart1=const AND t.keypart2 < 'foo' Due to deficiency in cost model, ref access could be picked while range would enumerate fewer rows and be cheaper. However, the condition also forces a rewrite in cases like: t.keypart1=prev_table.col AND t.keypart1<='foo' AND t.keypart2<'bar' Here, it can be that * keypart1=prev_table.col is highly selective * (keypart1, keypart2) <= ('foo', 'bar') is not at all selective. Still, the rewrite would be made and poor query plan chosen. Fixed this by only doing the rewrite if ref access was ref(const) so we can be certain that quick select also used these restrictions and will scan a subset of rows that ref access would scan.
2025-07-29 05:21:33 +03:00 · 2024-10-16 18:35:37 +03:00
parent 3a1cf2c85b
commit 0540eac05c
5 changed files with 99 additions and 10 deletions
--- a/mysql-test/main/join.result
+++ b/mysql-test/main/join.result
@ -3579,3 +3579,35 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	SIMPLE	seq	ALL	NULL	NULL	NULL	NULL	10	Using where; Using join buffer (flat, BNL join)
 drop table t1,t2;
 set @@optimizer_adjust_secondary_key_costs=default;
+#
+# MDEV-35180: ref_to_range rewrite causes poor query plan
+#
+create table t1 (a int);
+insert into t1 select seq from seq_1_to_100;
+create table t2 (
+kp1 int,
+kp2 int,
+filler char(100),
+key(kp1, kp2)
+);
+insert into t2
+select
+seq, seq,
+'filler-data'
+from seq_1_to_10000;
+analyze table t1,t2;
+Table	Op	Msg_type	Msg_text
+test.t1	analyze	status	Engine-independent statistics collected
+test.t1	analyze	status	OK
+test.t2	analyze	status	Engine-independent statistics collected
+test.t2	analyze	status	Table is already up to date
+# For t2, this must use type=ref, key_len=5 (not type=range, key_len=10)
+explain
+select *
+from t1, t2
+where
+t2.kp1=t1.a and t2.kp1<=100 and t2.kp2<=20;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	100	Using where
+1	SIMPLE	t2	ref	kp1	kp1	5	test.t1.a	1	Using index condition
+drop table t1,t2;
--- a/mysql-test/main/join.test
+++ b/mysql-test/main/join.test
@ -1986,3 +1986,32 @@ explain select count(*) from t1, t2 as seq where a=11 and b=seq.seq;
 drop table t1,t2;

 set @@optimizer_adjust_secondary_key_costs=default;
+
+--echo #
+--echo # MDEV-35180: ref_to_range rewrite causes poor query plan
+--echo #
+create table t1 (a int);
+insert into t1 select seq from seq_1_to_100;
+
+create table t2 (
+  kp1 int,
+  kp2 int,
+  filler char(100),
+  key(kp1, kp2)
+);
+insert into t2
+select
+  seq, seq,
+  'filler-data'
+from seq_1_to_10000;
+
+analyze table t1,t2;
+
+--echo # For t2, this must use type=ref, key_len=5 (not type=range, key_len=10)
+explain
+select *
+from t1, t2
+where
+  t2.kp1=t1.a and t2.kp1<=100 and t2.kp2<=20;
+
+drop table t1,t2;
--- a/mysql-test/main/join_cache.result
+++ b/mysql-test/main/join_cache.result
@ -6187,7 +6187,7 @@ explain format=json
 select *
 from t1 a, t3 b
 where
-b.kp1=a.a and
+b.kp1+1=a.a+1 and
 b.kp1 <= 10 and
 b.kp2 <= 10 and
 b.col1 +1 < 33333;
@ -6197,14 +6197,12 @@ EXPLAIN
    "select_id": 1,
    "table": {
      "table_name": "a",
-      "access_type": "range",
-      "possible_keys": ["PRIMARY"],
+      "access_type": "index",
      "key": "PRIMARY",
      "key_length": "4",
      "used_key_parts": ["a"],
      "rows": 10,
      "filtered": 100,
-      "attached_condition": "a.a <= 10",
      "using_index": true
    },
    "block-nl-join": {
@ -6216,14 +6214,14 @@ EXPLAIN
        "key_length": "10",
        "used_key_parts": ["kp1", "kp2"],
        "rows": 836,
-        "filtered": 76.43428802,
-        "index_condition": "b.kp2 <= 10",
-        "attached_condition": "b.kp2 <= 10 and b.col1 + 1 < 33333"
+        "filtered": 100,
+        "index_condition": "b.kp1 <= 10 and b.kp2 <= 10",
+        "attached_condition": "b.kp1 <= 10 and b.kp2 <= 10 and b.col1 + 1 < 33333"
      },
      "buffer_type": "flat",
      "buffer_size": "54",
      "join_type": "BNL",
-      "attached_condition": "b.kp1 = a.a"
+      "attached_condition": "b.kp1 + 1 = a.a + 1"
    }
  }
 }
--- a/mysql-test/main/join_cache.test
+++ b/mysql-test/main/join_cache.test
@ -4169,7 +4169,7 @@ explain format=json
 select *
 from t1 a, t3 b
 where
-  b.kp1=a.a and
+  b.kp1+1=a.a+1 and
  b.kp1 <= 10 and
  b.kp2 <= 10 and
  b.col1 +1 < 33333;
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@ -12969,11 +12969,41 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)

      used_tables|=current_map;

+      /*
+        Change from using ref access to using quick select on the same index
+        if the quick select uses more key parts.
+
+        There are two cases.
+        A. ref access is ref(const). quick select was also constructed using
+        equality restrictions that ref used, and so it will scan a subset of
+        rows that ref access scans.
+        Example: suppose the index is INDEX(kp1, kp2) and the WHERE has:
+
+          kp1='foo' and kp2 <= 10
+
+        here, ref access will use kp1='foo' and quick select will use
+        (foo) <= (kp1,kp2) <=(foo,10)
+
+        B. ref access is not constant.  In this case, quick select was
+        constructed from some other restriction and in general will scan
+        totally different set of rows (it maybe larger or smaller).
+        Example: for INDEX(kp1, kp2) and the WHERE:
+
+        kp1 <='foo' and kp1=prev_table.col and kp2 <= 10
+
+        the ref access will use kp1=prev_table.col, while quick select will
+        use (-inf) < (kp1, kp2) <= ('foo',10).
+
+        Because of the above, we perform the rewrite ONLY when ref is
+        ref(const).
+      */
      if (tab->type == JT_REF && tab->quick &&
 	  (((uint) tab->ref.key == tab->quick->index &&
 	    tab->ref.key_length < tab->quick->max_used_key_length) ||
           (!is_hash_join_key_no(tab->ref.key) &&
-            tab->table->intersect_keys.is_set(tab->ref.key))))
+            tab->table->intersect_keys.is_set(tab->ref.key))) &&
+          tab->ref.const_ref_part_map ==                       // (ref-is-const)
+              make_prev_keypart_map(tab->ref.key_parts))       // (ref-is-const)
      {
        /* Range uses longer key;  Use this instead of ref on key */
        Json_writer_object ref_to_range(thd);