MDEV-26901: Estimation for filtered rows less precise ... #4

In Histogram_json_hb::point_selectivity(), do return selectivity of 0.0 when the histogram says so. The logic of "Do not return 0.0 estimate as it causes a multiply-by-zero meltdown in cost and cardinality calculations" is moved into records_in_column_ranges() where it is one *once* per column pair (as opposed to doing once per range, which can cause the error to add-up to large number when there are many ranges)
2025-07-30 16:24:05 +03:00 · 2022-01-11 17:09:55 +03:00
parent db8f15be93
commit dae20dde4e
6 changed files with 42 additions and 6 deletions
--- a/mysql-test/main/selectivity.result
+++ b/mysql-test/main/selectivity.result
@ -1245,8 +1245,8 @@ EXPLAIN EXTENDED
 SELECT * FROM language, country, continent
 WHERE country_group = lang_group AND lang_group IS NULL;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	SIMPLE	country	ALL	NULL	NULL	NULL	NULL	2	0.00	Using where
-1	SIMPLE	language	ALL	NULL	NULL	NULL	NULL	6	0.00	Using where; Using join buffer (flat, BNL join)
+1	SIMPLE	country	ALL	NULL	NULL	NULL	NULL	2	50.00	Using where
+1	SIMPLE	language	ALL	NULL	NULL	NULL	NULL	6	16.67	Using where; Using join buffer (flat, BNL join)
 1	SIMPLE	continent	ALL	NULL	NULL	NULL	NULL	6	100.00	Using join buffer (incremental, BNL join)
 Warnings:
 Note	1003	select `test`.`language`.`lang_group` AS `lang_group`,`test`.`language`.`lang` AS `lang`,`test`.`country`.`code` AS `code`,`test`.`country`.`country_group` AS `country_group`,`test`.`continent`.`cont_group` AS `cont_group`,`test`.`continent`.`cont` AS `cont` from `test`.`language` join `test`.`country` join `test`.`continent` where `test`.`language`.`lang_group` = `test`.`country`.`country_group` and `test`.`country`.`country_group` is null
--- a/mysql-test/main/selectivity_innodb.result
+++ b/mysql-test/main/selectivity_innodb.result
@ -1257,8 +1257,8 @@ EXPLAIN EXTENDED
 SELECT * FROM language, country, continent
 WHERE country_group = lang_group AND lang_group IS NULL;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	SIMPLE	country	ALL	NULL	NULL	NULL	NULL	2	0.00	Using where
-1	SIMPLE	language	ALL	NULL	NULL	NULL	NULL	6	0.00	Using where; Using join buffer (flat, BNL join)
+1	SIMPLE	country	ALL	NULL	NULL	NULL	NULL	2	50.00	Using where
+1	SIMPLE	language	ALL	NULL	NULL	NULL	NULL	6	16.67	Using where; Using join buffer (flat, BNL join)
 1	SIMPLE	continent	ALL	NULL	NULL	NULL	NULL	6	100.00	Using join buffer (incremental, BNL join)
 Warnings:
 Note	1003	select `test`.`language`.`lang_group` AS `lang_group`,`test`.`language`.`lang` AS `lang`,`test`.`country`.`code` AS `code`,`test`.`country`.`country_group` AS `country_group`,`test`.`continent`.`cont_group` AS `cont_group`,`test`.`continent`.`cont` AS `cont` from `test`.`language` join `test`.`country` join `test`.`continent` where `test`.`language`.`lang_group` = `test`.`country`.`country_group` and `test`.`country`.`country_group` is null
--- a/mysql-test/main/statistics_json.result
+++ b/mysql-test/main/statistics_json.result
@ -8264,3 +8264,23 @@ ANALYZE SELECT * FROM t1 WHERE f > '00:01:00';
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	r_rows	filtered	r_filtered	Extra
 1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	1000	1000.00	50.00	50.00	Using where
 drop table t1;
+#
+# MDEV-26901: Estimation for filtered rows less precise ... #4
+#
+create table t1 (f int);
+insert into t1 values
+(7),(5),(0),(5),(112),(9),(9),(7),(5),(9),
+(1),(7),(0),(6),(6),(2),(1),(6),(169),(7);
+select f from t1 where f in (77, 1, 144, 73, 14, 12);
+f
+1
+1
+set histogram_type= JSON_HB;
+analyze table t1 persistent for all;
+Table	Op	Msg_type	Msg_text
+test.t1	analyze	status	Engine-independent statistics collected
+test.t1	analyze	status	OK
+analyze select f from t1 where f in (77, 1, 144, 73, 14, 12);
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	r_rows	filtered	r_filtered	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	20	20.00	10.00	10.00	Using where
+drop table t1;
--- a/mysql-test/main/statistics_json.test
+++ b/mysql-test/main/statistics_json.test
@ -429,3 +429,19 @@ ANALYZE TABLE t1 PERSISTENT FOR ALL;
 ANALYZE SELECT * FROM t1 WHERE f > '00:01:00';
 drop table t1;

+--echo #
+--echo # MDEV-26901: Estimation for filtered rows less precise ... #4
+--echo #
+create table t1 (f int);
+insert into t1 values
+  (7),(5),(0),(5),(112),(9),(9),(7),(5),(9),
+  (1),(7),(0),(6),(6),(2),(1),(6),(169),(7);
+
+select f from t1 where f in (77, 1, 144, 73, 14, 12);
+
+set histogram_type= JSON_HB;
+analyze table t1 persistent for all;
+
+analyze select f from t1 where f in (77, 1, 144, 73, 14, 12);
+drop table t1;
+
--- a/sql/opt_histogram_json.cc
+++ b/sql/opt_histogram_json.cc
@ -921,7 +921,7 @@ double Histogram_json_hb::point_selectivity(Field *field, key_range *endpoint,
      The bucket has a single value and it doesn't match! Return a very
      small value.
    */
-    sel= 1.0 / total_rows;
+    sel= 0.0;
  }
  else
  {
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@ -3279,7 +3279,7 @@ double records_in_column_ranges(PARAM *param, uint idx,
    total_rows += rows;
  }
  if (total_rows == 0)
-    total_rows= MY_MIN(1, param->table->stat_records());
+    total_rows= MY_MIN(1, rows2double(param->table->stat_records()));

  return total_rows;
 }