MDEV-17761: Odd optimizer choice with ORDER BY LIMIT and condition selectivity

Make the "ORDER BY ... LIMIT n" optimizer take into account condition selectivity data from EITS (not just from potential range accesses).
2025-07-27 18:02:13 +03:00 · 2019-01-23 15:49:49 +03:00
parent 36a2a185fe
commit b7a784ae25
5 changed files with 145 additions and 0 deletions
--- a/mysql-test/main/order_by.result
+++ b/mysql-test/main/order_by.result
@ -3253,3 +3253,40 @@ Warnings:
 Note	1003	select `test`.`wings`.`id` AS `wing_id`,`test`.`wings`.`department_id` AS `department_id` from `test`.`wings` semi join (`test`.`books`) where `test`.`books`.`library_id` = 8663 and `test`.`books`.`scheduled_for_removal` = 0 and `test`.`wings`.`id` = `test`.`books`.`wings_id` order by `test`.`wings`.`id`
 set optimizer_switch= @save_optimizer_switch;
 DROP TABLE books, wings;
+#
+# MDEV-17761: Odd optimizer choice with ORDER BY LIMIT and condition selectivity
+#
+create table t1(a int);
+insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t2(a int);
+insert into t2 select A.a + B.a* 10 + C.a * 100 from t1 A, t1 B, t1 C;
+create table t3(a int);
+insert into t3 select A.a + 1000 *B.a from t2 A, t1 B;
+create table t4 (
+a int,
+b int,
+c int,
+filler1 char(255),
+filler2 char(255),
+key(a)
+);
+insert into t4 select a,a,a, a,a from t3;
+set @tmp_h=@@histogram_size, @tmp_u=@@use_stat_tables, 
+@tmp_o=@@optimizer_use_condition_selectivity;
+set histogram_size=100;
+set use_stat_tables=preferably;
+set optimizer_use_condition_selectivity=4;
+analyze table t4 persistent for columns(b) indexes ();
+Table	Op	Msg_type	Msg_text
+test.t4	analyze	status	Engine-independent statistics collected
+test.t4	analyze	status	Table is already up to date
+# rows must be around 1200, not 600:
+explain extended 
+select * from t4 where b < 5000 order by a limit 600;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	SIMPLE	t4	index	NULL	a	5	NULL	1188	100.00	Using where
+Warnings:
+Note	1003	select `test`.`t4`.`a` AS `a`,`test`.`t4`.`b` AS `b`,`test`.`t4`.`c` AS `c`,`test`.`t4`.`filler1` AS `filler1`,`test`.`t4`.`filler2` AS `filler2` from `test`.`t4` where `test`.`t4`.`b` < 5000 order by `test`.`t4`.`a` limit 600
+set histogram_size=@tmp_h, use_stat_tables=@tmp_u,
+optimizer_use_condition_selectivity=@tmp_o;
+drop table t1,t2,t3,t4;
--- a/mysql-test/main/order_by.test
+++ b/mysql-test/main/order_by.test
@ -2187,3 +2187,40 @@ eval explain extended $q;
 set optimizer_switch= @save_optimizer_switch;

 DROP TABLE books, wings;
+
+--echo #
+--echo # MDEV-17761: Odd optimizer choice with ORDER BY LIMIT and condition selectivity
+--echo #
+create table t1(a int);
+insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t2(a int);
+insert into t2 select A.a + B.a* 10 + C.a * 100 from t1 A, t1 B, t1 C;
+create table t3(a int);
+insert into t3 select A.a + 1000 *B.a from t2 A, t1 B;
+ 
+create table t4 (
+  a int,
+  b int,
+  c int,
+  filler1 char(255),
+  filler2 char(255),
+  key(a)
+);
+insert into t4 select a,a,a, a,a from t3;
+
+set @tmp_h=@@histogram_size, @tmp_u=@@use_stat_tables, 
+    @tmp_o=@@optimizer_use_condition_selectivity; 
+set histogram_size=100;
+set use_stat_tables=preferably;
+set optimizer_use_condition_selectivity=4;
+analyze table t4 persistent for columns(b) indexes ();
+
+--echo # rows must be around 1200, not 600:
+explain extended 
+select * from t4 where b < 5000 order by a limit 600;
+
+set histogram_size=@tmp_h, use_stat_tables=@tmp_u,
+    optimizer_use_condition_selectivity=@tmp_o; 
+
+drop table t1,t2,t3,t4;
+
--- a/mysql-test/main/range_vs_index_merge.result
+++ b/mysql-test/main/range_vs_index_merge.result
@ -1320,6 +1320,37 @@ WHERE ((Name > 'Ca' AND Name < 'Cf') OR (Country > 'E' AND Country < 'H'))
 AND (Population >= 100000 AND Population < 120000)
 ORDER BY Population LIMIT 5;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	City	index_merge	Country,Name,Population	Name,Country	35,3	NULL	#	Using sort_union(Name,Country); Using where; Using filesort
+FLUSH STATUS;
+SELECT * FROM City
+WHERE ((Name > 'Ca' AND Name < 'Cf') OR (Country > 'E' AND Country < 'H'))
+AND (Population >= 100000 AND Population < 120000)
+ORDER BY Population LIMIT 5;
+ID	Name	Country	Population
+519	Worthing	GBR	100000
+638	al-Arish	EGY	100447
+518	Basildon	GBR	100924
+707	Marbella	ESP	101144
+3792	Tartu	EST	101246
+SHOW STATUS LIKE 'Handler_read_%';
+Variable_name	Value
+Handler_read_first	0
+Handler_read_key	2
+Handler_read_last	0
+Handler_read_next	385
+Handler_read_prev	0
+Handler_read_retry	0
+Handler_read_rnd	377
+Handler_read_rnd_deleted	0
+Handler_read_rnd_next	0
+set @tmp_mdev585=@@optimizer_use_condition_selectivity;
+set optimizer_use_condition_selectivity=1;
+EXPLAIN 
+SELECT * FROM City
+WHERE ((Name > 'Ca' AND Name < 'Cf') OR (Country > 'E' AND Country < 'H'))
+AND (Population >= 100000 AND Population < 120000)
+ORDER BY Population LIMIT 5;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	SIMPLE	City	range	Country,Name,Population	Population	4	NULL	#	Using where
 FLUSH STATUS;
 SELECT * FROM City
@ -1343,6 +1374,7 @@ Handler_read_retry	0
 Handler_read_rnd	0
 Handler_read_rnd_deleted	0
 Handler_read_rnd_next	0
+set optimizer_use_condition_selectivity=@tmp_mdev585;
 set optimizer_switch='index_merge=off';
 EXPLAIN 
 SELECT * FROM City
--- a/mysql-test/main/range_vs_index_merge.test
+++ b/mysql-test/main/range_vs_index_merge.test
@ -718,6 +718,23 @@ SELECT * FROM City
 ORDER BY Population LIMIT 5;
 SHOW STATUS LIKE 'Handler_read_%';

+set @tmp_mdev585=@@optimizer_use_condition_selectivity;
+set optimizer_use_condition_selectivity=1;
+--replace_column 9 #
+EXPLAIN 
+SELECT * FROM City
+  WHERE ((Name > 'Ca' AND Name < 'Cf') OR (Country > 'E' AND Country < 'H'))
+        AND (Population >= 100000 AND Population < 120000)
+ORDER BY Population LIMIT 5;
+
+FLUSH STATUS;
+SELECT * FROM City
+  WHERE ((Name > 'Ca' AND Name < 'Cf') OR (Country > 'E' AND Country < 'H'))
+        AND (Population >= 100000 AND Population < 120000)
+ORDER BY Population LIMIT 5;
+SHOW STATUS LIKE 'Handler_read_%';
+
+set optimizer_use_condition_selectivity=@tmp_mdev585;

 set optimizer_switch='index_merge=off';

--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@ -26884,7 +26884,11 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
    uint tablenr= (uint)(tab - join->join_tab);
    read_time= join->best_positions[tablenr].read_time;
    for (uint i= tablenr+1; i < join->table_count; i++)
+    {
      fanout*= join->best_positions[i].records_read; // fanout is always >= 1
+      // But selectivity is =< 1 :
+      fanout*= join->best_positions[i].cond_selectivity;
+    }
  }
  else
    read_time= table->file->scan_time();
@ -27022,6 +27026,23 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
        */
        select_limit= (ha_rows) (select_limit < fanout ?
                                 1 : select_limit/fanout);
+
+        /*
+          refkey_rows_estimate is E(#rows) produced by the table access
+          strategy that was picked without regard to ORDER BY ... LIMIT.
+
+          It will be used as the source of selectivity data. 
+          Use table->cond_selectivity as a better estimate which includes
+          condition selectivity too.
+        */
+        {
+          // we use MIN(...), because "Using LooseScan" queries have
+          // cond_selectivity=1 while refkey_rows_estimate has a better
+          // estimate.
+          refkey_rows_estimate= MY_MIN(refkey_rows_estimate,
+                                       table_records * table->cond_selectivity);
+        }
+
        /*
          We assume that each of the tested indexes is not correlated
          with ref_key. Thus, to select first N records we have to scan
@ -27032,6 +27053,7 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
          N/(refkey_rows_estimate/table_records) > table_records
          <=> N > refkey_rows_estimate.
         */
+
        if (select_limit > refkey_rows_estimate)
          select_limit= table_records;
        else