From dae20dde4e7ae3ad6558eac9aa642fcccecaf13b Mon Sep 17 00:00:00 2001 From: Sergei Petrunia Date: Tue, 11 Jan 2022 17:09:55 +0300 Subject: [PATCH] MDEV-26901: Estimation for filtered rows less precise ... #4 In Histogram_json_hb::point_selectivity(), do return selectivity of 0.0 when the histogram says so. The logic of "Do not return 0.0 estimate as it causes a multiply-by-zero meltdown in cost and cardinality calculations" is moved into records_in_column_ranges() where it is one *once* per column pair (as opposed to doing once per range, which can cause the error to add-up to large number when there are many ranges) --- mysql-test/main/selectivity.result | 4 ++-- mysql-test/main/selectivity_innodb.result | 4 ++-- mysql-test/main/statistics_json.result | 20 ++++++++++++++++++++ mysql-test/main/statistics_json.test | 16 ++++++++++++++++ sql/opt_histogram_json.cc | 2 +- sql/opt_range.cc | 2 +- 6 files changed, 42 insertions(+), 6 deletions(-) diff --git a/mysql-test/main/selectivity.result b/mysql-test/main/selectivity.result index d559d3c39f7..003b91a6a28 100644 --- a/mysql-test/main/selectivity.result +++ b/mysql-test/main/selectivity.result @@ -1245,8 +1245,8 @@ EXPLAIN EXTENDED SELECT * FROM language, country, continent WHERE country_group = lang_group AND lang_group IS NULL; id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE country ALL NULL NULL NULL NULL 2 0.00 Using where -1 SIMPLE language ALL NULL NULL NULL NULL 6 0.00 Using where; Using join buffer (flat, BNL join) +1 SIMPLE country ALL NULL NULL NULL NULL 2 50.00 Using where +1 SIMPLE language ALL NULL NULL NULL NULL 6 16.67 Using where; Using join buffer (flat, BNL join) 1 SIMPLE continent ALL NULL NULL NULL NULL 6 100.00 Using join buffer (incremental, BNL join) Warnings: Note 1003 select `test`.`language`.`lang_group` AS `lang_group`,`test`.`language`.`lang` AS `lang`,`test`.`country`.`code` AS `code`,`test`.`country`.`country_group` AS `country_group`,`test`.`continent`.`cont_group` AS `cont_group`,`test`.`continent`.`cont` AS `cont` from `test`.`language` join `test`.`country` join `test`.`continent` where `test`.`language`.`lang_group` = `test`.`country`.`country_group` and `test`.`country`.`country_group` is null diff --git a/mysql-test/main/selectivity_innodb.result b/mysql-test/main/selectivity_innodb.result index 6e6517f7e08..8db83c6bcaf 100644 --- a/mysql-test/main/selectivity_innodb.result +++ b/mysql-test/main/selectivity_innodb.result @@ -1257,8 +1257,8 @@ EXPLAIN EXTENDED SELECT * FROM language, country, continent WHERE country_group = lang_group AND lang_group IS NULL; id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE country ALL NULL NULL NULL NULL 2 0.00 Using where -1 SIMPLE language ALL NULL NULL NULL NULL 6 0.00 Using where; Using join buffer (flat, BNL join) +1 SIMPLE country ALL NULL NULL NULL NULL 2 50.00 Using where +1 SIMPLE language ALL NULL NULL NULL NULL 6 16.67 Using where; Using join buffer (flat, BNL join) 1 SIMPLE continent ALL NULL NULL NULL NULL 6 100.00 Using join buffer (incremental, BNL join) Warnings: Note 1003 select `test`.`language`.`lang_group` AS `lang_group`,`test`.`language`.`lang` AS `lang`,`test`.`country`.`code` AS `code`,`test`.`country`.`country_group` AS `country_group`,`test`.`continent`.`cont_group` AS `cont_group`,`test`.`continent`.`cont` AS `cont` from `test`.`language` join `test`.`country` join `test`.`continent` where `test`.`language`.`lang_group` = `test`.`country`.`country_group` and `test`.`country`.`country_group` is null diff --git a/mysql-test/main/statistics_json.result b/mysql-test/main/statistics_json.result index 600b2ddd4ee..9eb8cf87c44 100644 --- a/mysql-test/main/statistics_json.result +++ b/mysql-test/main/statistics_json.result @@ -8264,3 +8264,23 @@ ANALYZE SELECT * FROM t1 WHERE f > '00:01:00'; id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra 1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 1000.00 50.00 50.00 Using where drop table t1; +# +# MDEV-26901: Estimation for filtered rows less precise ... #4 +# +create table t1 (f int); +insert into t1 values +(7),(5),(0),(5),(112),(9),(9),(7),(5),(9), +(1),(7),(0),(6),(6),(2),(1),(6),(169),(7); +select f from t1 where f in (77, 1, 144, 73, 14, 12); +f +1 +1 +set histogram_type= JSON_HB; +analyze table t1 persistent for all; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status OK +analyze select f from t1 where f in (77, 1, 144, 73, 14, 12); +id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 20 20.00 10.00 10.00 Using where +drop table t1; diff --git a/mysql-test/main/statistics_json.test b/mysql-test/main/statistics_json.test index 93a526dbfb0..0a1b886f8fd 100644 --- a/mysql-test/main/statistics_json.test +++ b/mysql-test/main/statistics_json.test @@ -429,3 +429,19 @@ ANALYZE TABLE t1 PERSISTENT FOR ALL; ANALYZE SELECT * FROM t1 WHERE f > '00:01:00'; drop table t1; +--echo # +--echo # MDEV-26901: Estimation for filtered rows less precise ... #4 +--echo # +create table t1 (f int); +insert into t1 values + (7),(5),(0),(5),(112),(9),(9),(7),(5),(9), + (1),(7),(0),(6),(6),(2),(1),(6),(169),(7); + +select f from t1 where f in (77, 1, 144, 73, 14, 12); + +set histogram_type= JSON_HB; +analyze table t1 persistent for all; + +analyze select f from t1 where f in (77, 1, 144, 73, 14, 12); +drop table t1; + diff --git a/sql/opt_histogram_json.cc b/sql/opt_histogram_json.cc index 4305737df1c..73284025511 100644 --- a/sql/opt_histogram_json.cc +++ b/sql/opt_histogram_json.cc @@ -921,7 +921,7 @@ double Histogram_json_hb::point_selectivity(Field *field, key_range *endpoint, The bucket has a single value and it doesn't match! Return a very small value. */ - sel= 1.0 / total_rows; + sel= 0.0; } else { diff --git a/sql/opt_range.cc b/sql/opt_range.cc index bb5043eecd7..37cf054dfb0 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -3279,7 +3279,7 @@ double records_in_column_ranges(PARAM *param, uint idx, total_rows += rows; } if (total_rows == 0) - total_rows= MY_MIN(1, param->table->stat_records()); + total_rows= MY_MIN(1, rows2double(param->table->stat_records())); return total_rows; }