From 9d4a0dde0ae3e0d46b4c5c0967c25862d467e94e Mon Sep 17 00:00:00 2001 From: Igor Babaev Date: Mon, 24 Oct 2016 10:15:11 -0700 Subject: [PATCH] Fixed bug mdev-11096. 1. When min/max value is provided the null flag for it must be set to 0 in the bitmap Culumn_statistics::column_stat_nulls. 2. When the calculation of the selectivity of the range condition over a column requires min and max values for the column then we have to check that these values are provided. --- mysql-test/r/selectivity.result | 24 ++++++++++++++++++++++ mysql-test/r/selectivity_innodb.result | 28 ++++++++++++++++++++++++-- mysql-test/t/selectivity.test | 21 ++++++++++++++++++- sql/sql_statistics.cc | 15 ++++---------- sql/sql_statistics.h | 5 +++++ 5 files changed, 79 insertions(+), 14 deletions(-) diff --git a/mysql-test/r/selectivity.result b/mysql-test/r/selectivity.result index 620bdc6bd50..c2364e11ceb 100644 --- a/mysql-test/r/selectivity.result +++ b/mysql-test/r/selectivity.result @@ -1446,3 +1446,27 @@ a b i set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity; DROP TABLE t1,t2; set use_stat_tables=@save_use_stat_tables; +# +# Bug mdev-11096: range condition over column without statistical data +# +set use_stat_tables='preferably'; +set optimizer_use_condition_selectivity=3; +create table t1(col1 char(32)); +insert into t1 values ('a'),('b'),('c'),('d'), ('e'),('f'),('g'),('h'); +analyze table t1 persistent for columns () indexes (); +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status OK +explain extended +select * from t1 where col1 > 'b' and col1 < 'e'; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 8 100.00 Using where +Warnings: +Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where ((`test`.`t1`.`col1` > 'b') and (`test`.`t1`.`col1` < 'e')) +select * from t1 where col1 > 'b' and col1 < 'e'; +col1 +c +d +drop table t1; +set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity; +set use_stat_tables=@save_use_stat_tables; diff --git a/mysql-test/r/selectivity_innodb.result b/mysql-test/r/selectivity_innodb.result index 0acbb465ba8..882f51515b2 100644 --- a/mysql-test/r/selectivity_innodb.result +++ b/mysql-test/r/selectivity_innodb.result @@ -802,9 +802,9 @@ insert into t2 values (2),(3); explain extended select * from t1 where a in ( select b from t2 ) AND ( a > 3 ); id select_type table type possible_keys key key_len ref rows filtered Extra -1 PRIMARY t1 ALL NULL NULL NULL NULL 1 0.00 Using where +1 PRIMARY t1 ALL NULL NULL NULL NULL 1 100.00 Using where 1 PRIMARY eq_ref distinct_key distinct_key 4 func 1 100.00 -2 MATERIALIZED t2 ALL NULL NULL NULL NULL 2 0.00 +2 MATERIALIZED t2 ALL NULL NULL NULL NULL 2 100.00 Warnings: Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` semi join (`test`.`t2`) where ((`test`.`t1`.`a` > 3)) select * from t1 where a in ( select b from t2 ) AND ( a > 3 ); @@ -1450,6 +1450,30 @@ a b i set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity; DROP TABLE t1,t2; set use_stat_tables=@save_use_stat_tables; +# +# Bug mdev-11096: range condition over column without statistical data +# +set use_stat_tables='preferably'; +set optimizer_use_condition_selectivity=3; +create table t1(col1 char(32)); +insert into t1 values ('a'),('b'),('c'),('d'), ('e'),('f'),('g'),('h'); +analyze table t1 persistent for columns () indexes (); +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status OK +explain extended +select * from t1 where col1 > 'b' and col1 < 'e'; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 8 100.00 Using where +Warnings: +Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where ((`test`.`t1`.`col1` > 'b') and (`test`.`t1`.`col1` < 'e')) +select * from t1 where col1 > 'b' and col1 < 'e'; +col1 +c +d +drop table t1; +set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity; +set use_stat_tables=@save_use_stat_tables; set optimizer_switch=@save_optimizer_switch_for_selectivity_test; set @tmp_ust= @@use_stat_tables; set @tmp_oucs= @@optimizer_use_condition_selectivity; diff --git a/mysql-test/t/selectivity.test b/mysql-test/t/selectivity.test index c46ff69295f..1321046009e 100644 --- a/mysql-test/t/selectivity.test +++ b/mysql-test/t/selectivity.test @@ -970,6 +970,25 @@ set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivit DROP TABLE t1,t2; - +set use_stat_tables=@save_use_stat_tables; + +--echo # +--echo # Bug mdev-11096: range condition over column without statistical data +--echo # + +set use_stat_tables='preferably'; +set optimizer_use_condition_selectivity=3; + +create table t1(col1 char(32)); +insert into t1 values ('a'),('b'),('c'),('d'), ('e'),('f'),('g'),('h'); +analyze table t1 persistent for columns () indexes (); + +explain extended +select * from t1 where col1 > 'b' and col1 < 'e'; +select * from t1 where col1 > 'b' and col1 < 'e'; + +drop table t1; + +set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity; set use_stat_tables=@save_use_stat_tables; diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc index 47a5a40ebeb..70080a6b4f1 100644 --- a/sql/sql_statistics.cc +++ b/sql/sql_statistics.cc @@ -1003,11 +1003,13 @@ public: switch (i) { case COLUMN_STAT_MIN_VALUE: + table_field->read_stats->min_value->set_notnull(); stat_field->val_str(&val); table_field->read_stats->min_value->store(val.ptr(), val.length(), &my_charset_bin); break; case COLUMN_STAT_MAX_VALUE: + table_field->read_stats->max_value->set_notnull(); stat_field->val_str(&val); table_field->read_stats->max_value->store(val.ptr(), val.length(), &my_charset_bin); @@ -3659,17 +3661,8 @@ double get_column_range_cardinality(Field *field, { double avg_frequency= col_stats->get_avg_frequency(); res= avg_frequency; - /* - psergey-todo: what does check for min_value, max_value mean? - min/max_value are set to NULL in alloc_statistics_for_table() and - alloc_statistics_for_table_share(). Both functions will immediately - call create_min_max_statistical_fields_for_table and - create_min_max_statistical_fields_for_table_share() respectively, - which will set min/max_value to be valid pointers, unless OOM - occurs. - */ if (avg_frequency > 1.0 + 0.000001 && - col_stats->min_value && col_stats->max_value) + col_stats->min_max_values_are_provided()) { Histogram *hist= &col_stats->histogram; if (hist->is_available()) @@ -3692,7 +3685,7 @@ double get_column_range_cardinality(Field *field, } else { - if (col_stats->min_value && col_stats->max_value) + if (col_stats->min_max_values_are_provided()) { double sel, min_mp_pos, max_mp_pos; diff --git a/sql/sql_statistics.h b/sql/sql_statistics.h index 46e5cef22d1..8e5f8107849 100644 --- a/sql/sql_statistics.h +++ b/sql/sql_statistics.h @@ -388,6 +388,11 @@ public: avg_frequency= (ulong) (val * Scale_factor_avg_frequency); } + bool min_max_values_are_provided() + { + return !is_null(COLUMN_STAT_MIN_VALUE) && + !is_null(COLUMN_STAT_MIN_VALUE); + } };