From b7a784ae259c0fd8ed1adc88b84dcdaa2441987c Mon Sep 17 00:00:00 2001 From: Sergei Petrunia Date: Wed, 23 Jan 2019 15:49:49 +0300 Subject: [PATCH] MDEV-17761: Odd optimizer choice with ORDER BY LIMIT and condition selectivity Make the "ORDER BY ... LIMIT n" optimizer take into account condition selectivity data from EITS (not just from potential range accesses). --- mysql-test/main/order_by.result | 37 +++++++++++++++++++++ mysql-test/main/order_by.test | 37 +++++++++++++++++++++ mysql-test/main/range_vs_index_merge.result | 32 ++++++++++++++++++ mysql-test/main/range_vs_index_merge.test | 17 ++++++++++ sql/sql_select.cc | 22 ++++++++++++ 5 files changed, 145 insertions(+) diff --git a/mysql-test/main/order_by.result b/mysql-test/main/order_by.result index e2b05ccec9e..f466e20bb92 100644 --- a/mysql-test/main/order_by.result +++ b/mysql-test/main/order_by.result @@ -3253,3 +3253,40 @@ Warnings: Note 1003 select `test`.`wings`.`id` AS `wing_id`,`test`.`wings`.`department_id` AS `department_id` from `test`.`wings` semi join (`test`.`books`) where `test`.`books`.`library_id` = 8663 and `test`.`books`.`scheduled_for_removal` = 0 and `test`.`wings`.`id` = `test`.`books`.`wings_id` order by `test`.`wings`.`id` set optimizer_switch= @save_optimizer_switch; DROP TABLE books, wings; +# +# MDEV-17761: Odd optimizer choice with ORDER BY LIMIT and condition selectivity +# +create table t1(a int); +insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); +create table t2(a int); +insert into t2 select A.a + B.a* 10 + C.a * 100 from t1 A, t1 B, t1 C; +create table t3(a int); +insert into t3 select A.a + 1000 *B.a from t2 A, t1 B; +create table t4 ( +a int, +b int, +c int, +filler1 char(255), +filler2 char(255), +key(a) +); +insert into t4 select a,a,a, a,a from t3; +set @tmp_h=@@histogram_size, @tmp_u=@@use_stat_tables, +@tmp_o=@@optimizer_use_condition_selectivity; +set histogram_size=100; +set use_stat_tables=preferably; +set optimizer_use_condition_selectivity=4; +analyze table t4 persistent for columns(b) indexes (); +Table Op Msg_type Msg_text +test.t4 analyze status Engine-independent statistics collected +test.t4 analyze status Table is already up to date +# rows must be around 1200, not 600: +explain extended +select * from t4 where b < 5000 order by a limit 600; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t4 index NULL a 5 NULL 1188 100.00 Using where +Warnings: +Note 1003 select `test`.`t4`.`a` AS `a`,`test`.`t4`.`b` AS `b`,`test`.`t4`.`c` AS `c`,`test`.`t4`.`filler1` AS `filler1`,`test`.`t4`.`filler2` AS `filler2` from `test`.`t4` where `test`.`t4`.`b` < 5000 order by `test`.`t4`.`a` limit 600 +set histogram_size=@tmp_h, use_stat_tables=@tmp_u, +optimizer_use_condition_selectivity=@tmp_o; +drop table t1,t2,t3,t4; diff --git a/mysql-test/main/order_by.test b/mysql-test/main/order_by.test index b047a31c863..aab3b024371 100644 --- a/mysql-test/main/order_by.test +++ b/mysql-test/main/order_by.test @@ -2187,3 +2187,40 @@ eval explain extended $q; set optimizer_switch= @save_optimizer_switch; DROP TABLE books, wings; + +--echo # +--echo # MDEV-17761: Odd optimizer choice with ORDER BY LIMIT and condition selectivity +--echo # +create table t1(a int); +insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); +create table t2(a int); +insert into t2 select A.a + B.a* 10 + C.a * 100 from t1 A, t1 B, t1 C; +create table t3(a int); +insert into t3 select A.a + 1000 *B.a from t2 A, t1 B; + +create table t4 ( + a int, + b int, + c int, + filler1 char(255), + filler2 char(255), + key(a) +); +insert into t4 select a,a,a, a,a from t3; + +set @tmp_h=@@histogram_size, @tmp_u=@@use_stat_tables, + @tmp_o=@@optimizer_use_condition_selectivity; +set histogram_size=100; +set use_stat_tables=preferably; +set optimizer_use_condition_selectivity=4; +analyze table t4 persistent for columns(b) indexes (); + +--echo # rows must be around 1200, not 600: +explain extended +select * from t4 where b < 5000 order by a limit 600; + +set histogram_size=@tmp_h, use_stat_tables=@tmp_u, + optimizer_use_condition_selectivity=@tmp_o; + +drop table t1,t2,t3,t4; + diff --git a/mysql-test/main/range_vs_index_merge.result b/mysql-test/main/range_vs_index_merge.result index b10e499aad8..59c093ff010 100644 --- a/mysql-test/main/range_vs_index_merge.result +++ b/mysql-test/main/range_vs_index_merge.result @@ -1320,6 +1320,37 @@ WHERE ((Name > 'Ca' AND Name < 'Cf') OR (Country > 'E' AND Country < 'H')) AND (Population >= 100000 AND Population < 120000) ORDER BY Population LIMIT 5; id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE City index_merge Country,Name,Population Name,Country 35,3 NULL # Using sort_union(Name,Country); Using where; Using filesort +FLUSH STATUS; +SELECT * FROM City +WHERE ((Name > 'Ca' AND Name < 'Cf') OR (Country > 'E' AND Country < 'H')) +AND (Population >= 100000 AND Population < 120000) +ORDER BY Population LIMIT 5; +ID Name Country Population +519 Worthing GBR 100000 +638 al-Arish EGY 100447 +518 Basildon GBR 100924 +707 Marbella ESP 101144 +3792 Tartu EST 101246 +SHOW STATUS LIKE 'Handler_read_%'; +Variable_name Value +Handler_read_first 0 +Handler_read_key 2 +Handler_read_last 0 +Handler_read_next 385 +Handler_read_prev 0 +Handler_read_retry 0 +Handler_read_rnd 377 +Handler_read_rnd_deleted 0 +Handler_read_rnd_next 0 +set @tmp_mdev585=@@optimizer_use_condition_selectivity; +set optimizer_use_condition_selectivity=1; +EXPLAIN +SELECT * FROM City +WHERE ((Name > 'Ca' AND Name < 'Cf') OR (Country > 'E' AND Country < 'H')) +AND (Population >= 100000 AND Population < 120000) +ORDER BY Population LIMIT 5; +id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE City range Country,Name,Population Population 4 NULL # Using where FLUSH STATUS; SELECT * FROM City @@ -1343,6 +1374,7 @@ Handler_read_retry 0 Handler_read_rnd 0 Handler_read_rnd_deleted 0 Handler_read_rnd_next 0 +set optimizer_use_condition_selectivity=@tmp_mdev585; set optimizer_switch='index_merge=off'; EXPLAIN SELECT * FROM City diff --git a/mysql-test/main/range_vs_index_merge.test b/mysql-test/main/range_vs_index_merge.test index 84b87579e85..762fb481b39 100644 --- a/mysql-test/main/range_vs_index_merge.test +++ b/mysql-test/main/range_vs_index_merge.test @@ -718,6 +718,23 @@ SELECT * FROM City ORDER BY Population LIMIT 5; SHOW STATUS LIKE 'Handler_read_%'; +set @tmp_mdev585=@@optimizer_use_condition_selectivity; +set optimizer_use_condition_selectivity=1; +--replace_column 9 # +EXPLAIN +SELECT * FROM City + WHERE ((Name > 'Ca' AND Name < 'Cf') OR (Country > 'E' AND Country < 'H')) + AND (Population >= 100000 AND Population < 120000) +ORDER BY Population LIMIT 5; + +FLUSH STATUS; +SELECT * FROM City + WHERE ((Name > 'Ca' AND Name < 'Cf') OR (Country > 'E' AND Country < 'H')) + AND (Population >= 100000 AND Population < 120000) +ORDER BY Population LIMIT 5; +SHOW STATUS LIKE 'Handler_read_%'; + +set optimizer_use_condition_selectivity=@tmp_mdev585; set optimizer_switch='index_merge=off'; diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 978f0785887..4da7cd1a004 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -26884,7 +26884,11 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table, uint tablenr= (uint)(tab - join->join_tab); read_time= join->best_positions[tablenr].read_time; for (uint i= tablenr+1; i < join->table_count; i++) + { fanout*= join->best_positions[i].records_read; // fanout is always >= 1 + // But selectivity is =< 1 : + fanout*= join->best_positions[i].cond_selectivity; + } } else read_time= table->file->scan_time(); @@ -27022,6 +27026,23 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table, */ select_limit= (ha_rows) (select_limit < fanout ? 1 : select_limit/fanout); + + /* + refkey_rows_estimate is E(#rows) produced by the table access + strategy that was picked without regard to ORDER BY ... LIMIT. + + It will be used as the source of selectivity data. + Use table->cond_selectivity as a better estimate which includes + condition selectivity too. + */ + { + // we use MIN(...), because "Using LooseScan" queries have + // cond_selectivity=1 while refkey_rows_estimate has a better + // estimate. + refkey_rows_estimate= MY_MIN(refkey_rows_estimate, + table_records * table->cond_selectivity); + } + /* We assume that each of the tested indexes is not correlated with ref_key. Thus, to select first N records we have to scan @@ -27032,6 +27053,7 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table, N/(refkey_rows_estimate/table_records) > table_records <=> N > refkey_rows_estimate. */ + if (select_limit > refkey_rows_estimate) select_limit= table_records; else