From ad842b5f058d5342c22cdc86542baa2ae9db5e70 Mon Sep 17 00:00:00 2001
From: Sergey Petrunya <psergey@askmonty.org>
Date: Wed, 26 Mar 2014 17:55:00 +0400
Subject: [PATCH 1/3] MDEV-5926: EITS: Histogram estimates for
 column=least_possible_value are wrong [Attempt #2] - Use a new selectivity
 calculation formula in Histogram::point_selectivity.   The formula is
 different from the old one because it was developed from scratch.   it
 doesn't have any possible division-by-zero problems.

---
 mysql-test/r/selectivity.result        | 47 +++++++++++--
 mysql-test/r/selectivity_innodb.result | 47 +++++++++++--
 mysql-test/t/selectivity.test          | 25 ++++++-
 sql/sql_statistics.h                   | 92 +++++++++++++++++++++++---
 4 files changed, 191 insertions(+), 20 deletions(-)

diff --git a/mysql-test/r/selectivity.result b/mysql-test/r/selectivity.result
index 2c96ae4ae90..c91e4345f68 100644
--- a/mysql-test/r/selectivity.result
+++ b/mysql-test/r/selectivity.result
@@ -1,4 +1,4 @@
-drop table if exists t1,t2,t3;
+drop table if exists t0,t1,t2,t3;
 select @@global.use_stat_tables;
 @@global.use_stat_tables
 COMPLEMENTARY
@@ -826,7 +826,7 @@ flush table t1;
 set optimizer_use_condition_selectivity=4;
 explain extended select * from t1 where a=0;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	1025	49.61	Using where
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	1025	0.39	Using where
 Warnings:
 Note	1003	select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = 0)
 drop table t1;
@@ -1308,15 +1308,54 @@ test.t2	analyze	status	OK
 # The following two must have the same in 'Extra' column:
 explain extended select * from t2 where col1 IN (20, 180);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	SIMPLE	t2	ALL	NULL	NULL	NULL	NULL	1100	1.37	Using where
+1	SIMPLE	t2	ALL	NULL	NULL	NULL	NULL	1100	1.35	Using where
 Warnings:
 Note	1003	select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where (`test`.`t2`.`col1` in (20,180))
 explain extended select * from t2 where col1 IN (180, 20);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	SIMPLE	t2	ALL	NULL	NULL	NULL	NULL	1100	1.37	Using where
+1	SIMPLE	t2	ALL	NULL	NULL	NULL	NULL	1100	1.35	Using where
 Warnings:
 Note	1003	select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where (`test`.`t2`.`col1` in (180,20))
 drop table t1, t2;
+#
+# MDEV-5926: EITS: Histogram estimates for column=least_possible_value are wrong
+#
+create table t0(a int);
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t1(a int);
+insert into t1 select A.a from t0 A, t0 B, t0 C;
+set histogram_size=20;
+set histogram_type='single_prec_hb';
+analyze table t1 persistent for all;
+Table	Op	Msg_type	Msg_text
+test.t1	analyze	status	OK
+set use_stat_tables='preferably';
+set optimizer_use_condition_selectivity=4;
+# Should select about 10%:
+explain extended select * from t1 where a=2;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	1000	9.52	Using where
+Warnings:
+Note	1003	select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = 2)
+# Should select about 10%:
+explain extended select * from t1 where a=1;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	1000	9.52	Using where
+Warnings:
+Note	1003	select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = 1)
+# Must not have filtered=100%:
+explain extended select * from t1 where a=0;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	1000	9.52	Using where
+Warnings:
+Note	1003	select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = 0)
+# Again, must not have filtered=100%:
+explain extended select * from t1 where a=-1;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	1000	9.52	Using where
+Warnings:
+Note	1003	select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = <cache>(-(1)))
+drop table t0, t1;
 set histogram_type=@save_histogram_type;
 set histogram_size=@save_histogram_size;
 set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
diff --git a/mysql-test/r/selectivity_innodb.result b/mysql-test/r/selectivity_innodb.result
index 70ce55b50c4..a348836783e 100644
--- a/mysql-test/r/selectivity_innodb.result
+++ b/mysql-test/r/selectivity_innodb.result
@@ -1,7 +1,7 @@
 SET SESSION STORAGE_ENGINE='InnoDB';
 set @save_optimizer_switch_for_selectivity_test=@@optimizer_switch;
 set optimizer_switch='extended_keys=on';
-drop table if exists t1,t2,t3;
+drop table if exists t0,t1,t2,t3;
 select @@global.use_stat_tables;
 @@global.use_stat_tables
 COMPLEMENTARY
@@ -835,7 +835,7 @@ flush table t1;
 set optimizer_use_condition_selectivity=4;
 explain extended select * from t1 where a=0;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	1025	49.61	Using where
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	1025	0.39	Using where
 Warnings:
 Note	1003	select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = 0)
 drop table t1;
@@ -1318,15 +1318,54 @@ test.t2	analyze	status	OK
 # The following two must have the same in 'Extra' column:
 explain extended select * from t2 where col1 IN (20, 180);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	SIMPLE	t2	ALL	NULL	NULL	NULL	NULL	1100	1.37	Using where
+1	SIMPLE	t2	ALL	NULL	NULL	NULL	NULL	1100	1.35	Using where
 Warnings:
 Note	1003	select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where (`test`.`t2`.`col1` in (20,180))
 explain extended select * from t2 where col1 IN (180, 20);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	SIMPLE	t2	ALL	NULL	NULL	NULL	NULL	1100	1.37	Using where
+1	SIMPLE	t2	ALL	NULL	NULL	NULL	NULL	1100	1.35	Using where
 Warnings:
 Note	1003	select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where (`test`.`t2`.`col1` in (180,20))
 drop table t1, t2;
+#
+# MDEV-5926: EITS: Histogram estimates for column=least_possible_value are wrong
+#
+create table t0(a int);
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t1(a int);
+insert into t1 select A.a from t0 A, t0 B, t0 C;
+set histogram_size=20;
+set histogram_type='single_prec_hb';
+analyze table t1 persistent for all;
+Table	Op	Msg_type	Msg_text
+test.t1	analyze	status	OK
+set use_stat_tables='preferably';
+set optimizer_use_condition_selectivity=4;
+# Should select about 10%:
+explain extended select * from t1 where a=2;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	1000	9.52	Using where
+Warnings:
+Note	1003	select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = 2)
+# Should select about 10%:
+explain extended select * from t1 where a=1;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	1000	9.52	Using where
+Warnings:
+Note	1003	select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = 1)
+# Must not have filtered=100%:
+explain extended select * from t1 where a=0;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	1000	9.52	Using where
+Warnings:
+Note	1003	select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = 0)
+# Again, must not have filtered=100%:
+explain extended select * from t1 where a=-1;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	1000	9.52	Using where
+Warnings:
+Note	1003	select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = <cache>(-(1)))
+drop table t0, t1;
 set histogram_type=@save_histogram_type;
 set histogram_size=@save_histogram_size;
 set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
diff --git a/mysql-test/t/selectivity.test b/mysql-test/t/selectivity.test
index 8b7dfdff09f..fe35d9652ff 100644
--- a/mysql-test/t/selectivity.test
+++ b/mysql-test/t/selectivity.test
@@ -1,7 +1,7 @@
 --source include/have_stat_tables.inc
 
 --disable_warnings
-drop table if exists t1,t2,t3;
+drop table if exists t0,t1,t2,t3;
 --enable_warnings
 
 select @@global.use_stat_tables;
@@ -885,6 +885,29 @@ explain extended select * from t2 where col1 IN (180, 20);
 
 drop table t1, t2;
 
+--echo #
+--echo # MDEV-5926: EITS: Histogram estimates for column=least_possible_value are wrong
+--echo #
+create table t0(a int);
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t1(a int);
+insert into t1 select A.a from t0 A, t0 B, t0 C;
+set histogram_size=20;
+set histogram_type='single_prec_hb';
+analyze table t1 persistent for all;
+set use_stat_tables='preferably';
+set optimizer_use_condition_selectivity=4;
+--echo # Should select about 10%:
+explain extended select * from t1 where a=2;
+--echo # Should select about 10%:
+explain extended select * from t1 where a=1;
+--echo # Must not have filtered=100%:
+explain extended select * from t1 where a=0;
+--echo # Again, must not have filtered=100%:
+explain extended select * from t1 where a=-1;
+
+drop table t0, t1;
+
 set histogram_type=@save_histogram_type;
 set histogram_size=@save_histogram_size;
 set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
diff --git a/sql/sql_statistics.h b/sql/sql_statistics.h
index 68aacd69d98..936f23f1091 100644
--- a/sql/sql_statistics.h
+++ b/sql/sql_statistics.h
@@ -113,7 +113,7 @@ class Histogram
 
 private:
   Histogram_type type;
-  uint8 size;
+  uint8 size; /* Size of values array, in bytes */
   uchar *values;
 
   uint prec_factor()
@@ -142,6 +142,7 @@ public:
 private:
   uint get_value(uint i)
   {
+    DBUG_ASSERT(i < get_width());
     switch (type) {
     case SINGLE_PREC_HB:
       return (uint) (((uint8 *) values)[i]);
@@ -150,7 +151,7 @@ private:
     }
     return 0;
   }
-
+  /* Find the bucket which value 'pos' falls into. */
   uint find_bucket(double pos, bool first)
   {
     uint val= (uint) (pos * prec_factor());
@@ -169,6 +170,10 @@ private:
       else
         break;
     }
+
+    if (val > get_value(i))
+      i++;
+
     if (val == get_value(i))
     {
       if (first)
@@ -234,24 +239,89 @@ public:
     sel= bucket_sel * (max - min + 1);
     return sel;
   } 
+  
+  
+  /*
+    Estimate selectivity of "col=const" using a histogram
+    
+    @param pos      Position of the "const" between column's min_value and 
+                    max_value.  This is a number in [0..1] range.
+    @param avg_sel  Average selectivity of condition "col=const" in this table.
+                    It is calcuated as (#non_null_values / #distinct_values).
+    
+    @return
+       Expected condition selectivity (a number between 0 and 1)
+  */
 
   double point_selectivity(double pos, double avg_sel)
   {
     double sel;
-    double bucket_sel= 1.0/(get_width() + 1);  
+    /* Find the bucket that contains the value 'pos'. */
     uint min= find_bucket(pos, TRUE);
+    uint pos_value= (uint) (pos * prec_factor());
+
+    /* Find how many buckets this value occupies */
     uint max= min;
-    while (max + 1 < get_width() && get_value(max + 1) == get_value(max))
+    while (max + 1 < get_width() && get_value(max + 1) == pos_value)
       max++;
-    double inv_prec_factor= (double) 1.0 / prec_factor(); 
-    double width= (max + 1 == get_width() ?
-                   1.0 : get_value(max) * inv_prec_factor) -
-	          (min == 0 ?
-                   0.0 : get_value(min-1) * inv_prec_factor); 
-    sel= avg_sel * (bucket_sel * (max + 1 - min)) / width;
+
+    if (max > min)
+    {
+      /*
+        The value occupies multiple buckets. Use start_bucket ... end_bucket as
+        selectivity.
+      */
+      double bucket_sel= 1.0/(get_width() + 1);  
+      sel= bucket_sel * (max - min + 1);
+    }
+    else
+    {
+      /* 
+        The value 'pos' fits within one single histogram bucket.
+
+        Histogram buckets have the same numbers of rows, but they cover
+        different ranges of values.
+
+        We assume that values are uniformly distributed across the [0..1] value
+        range.
+      */
+
+      /* 
+        If all buckets covered value ranges of the same size, the width of
+        value range would be:
+      */
+      double avg_bucket_width= 1.0 / (get_width() + 1);
+      
+      /*
+        Let's see what is the width of value range that our bucket is covering.
+          (min==max currently. they are kept in the formula just in case we 
+           will want to extend it to handle multi-bucket case)
+      */
+      double inv_prec_factor= (double) 1.0 / prec_factor(); 
+      double current_bucket_width= 
+          (max + 1 == get_width() ?  1.0 : (get_value(max) * inv_prec_factor)) -
+          (min == 0 ?  0.0 : (get_value(min-1) * inv_prec_factor));
+
+      /*
+        So:
+        - each bucket has the same #rows 
+        - values are unformly distributed across the [min_value,max_value] domain.
+
+        If a bucket has value range that's N times bigger then average, than
+        each value will have to have N times fewer rows than average.
+      */
+      DBUG_ASSERT(current_bucket_width);
+      sel= avg_sel * avg_bucket_width / current_bucket_width;
+
+      /*
+        (Q: if we just follow this proportion we may end up in a situation
+        where number of different values we expect to find in this bucket
+        exceeds the number of rows that this histogram has in a bucket. Are 
+        we ok with this or we would want to have certain caps?)
+      */
+    }
     return sel;
   }
-             
 };
 
 

From dee11f9633be3091bd7d3c0b868e4ea1efe4ac7f Mon Sep 17 00:00:00 2001
From: Sergey Petrunya <psergey@askmonty.org>
Date: Wed, 26 Mar 2014 21:05:31 +0400
Subject: [PATCH 2/3] MDEV-4362: {division by zero when lookup constant is
 outside the value table} - Fix Histogram::point_selectivity() to work in the
 case where the   passed value_pos=0 (or 1) and the first (or the last) bucket
 in the   histogram has zero value-range (i.e one value).

---
 mysql-test/r/selectivity.result        | 31 ++++++++++++++++++++++++++
 mysql-test/r/selectivity_innodb.result | 31 ++++++++++++++++++++++++++
 mysql-test/t/selectivity.test          | 16 +++++++++++++
 sql/sql_statistics.h                   | 29 ++++++++++++++++--------
 4 files changed, 98 insertions(+), 9 deletions(-)

diff --git a/mysql-test/r/selectivity.result b/mysql-test/r/selectivity.result
index c91e4345f68..27e05e22714 100644
--- a/mysql-test/r/selectivity.result
+++ b/mysql-test/r/selectivity.result
@@ -1356,6 +1356,37 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
 Warnings:
 Note	1003	select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = <cache>(-(1)))
 drop table t0, t1;
+#
+# MDEV-4362: Selectivity estimates for IN (...) do not depend on whether the values are in range
+#
+create table t1 (col1 int);
+set @a=-1;
+create table t2 (a int)  select (@a:=@a+1) as a from information_schema.session_variables A limit 100;
+insert into t1 select A.a from t2 A, t2 B where A.a < 100 and B.a < 100;
+select min(col1), max(col1), count(*) from t1;
+min(col1)	max(col1)	count(*)
+0	99	10000
+set histogram_size=100;
+analyze table t1 persistent for all;
+Table	Op	Msg_type	Msg_text
+test.t1	analyze	status	OK
+explain extended select * from t1 where col1 in (1,2,3);
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	10000	3.37	Using where
+Warnings:
+Note	1003	select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1`.`col1` in (1,2,3))
+# Must not cause fp division by zero, or produce nonsense numbers:
+explain extended select * from t1 where col1 in (-1,-2,-3);
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	10000	3.00	Using where
+Warnings:
+Note	1003	select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1`.`col1` in (<cache>(-(1)),<cache>(-(2)),<cache>(-(3))))
+explain extended select * from t1 where col1<=-1;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	10000	1.00	Using where
+Warnings:
+Note	1003	select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1`.`col1` <= <cache>(-(1)))
+drop table t1, t2;
 set histogram_type=@save_histogram_type;
 set histogram_size=@save_histogram_size;
 set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
diff --git a/mysql-test/r/selectivity_innodb.result b/mysql-test/r/selectivity_innodb.result
index a348836783e..104b465b5a4 100644
--- a/mysql-test/r/selectivity_innodb.result
+++ b/mysql-test/r/selectivity_innodb.result
@@ -1366,6 +1366,37 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
 Warnings:
 Note	1003	select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = <cache>(-(1)))
 drop table t0, t1;
+#
+# MDEV-4362: Selectivity estimates for IN (...) do not depend on whether the values are in range
+#
+create table t1 (col1 int);
+set @a=-1;
+create table t2 (a int)  select (@a:=@a+1) as a from information_schema.session_variables A limit 100;
+insert into t1 select A.a from t2 A, t2 B where A.a < 100 and B.a < 100;
+select min(col1), max(col1), count(*) from t1;
+min(col1)	max(col1)	count(*)
+0	99	10000
+set histogram_size=100;
+analyze table t1 persistent for all;
+Table	Op	Msg_type	Msg_text
+test.t1	analyze	status	OK
+explain extended select * from t1 where col1 in (1,2,3);
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	10000	3.37	Using where
+Warnings:
+Note	1003	select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1`.`col1` in (1,2,3))
+# Must not cause fp division by zero, or produce nonsense numbers:
+explain extended select * from t1 where col1 in (-1,-2,-3);
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	10000	3.00	Using where
+Warnings:
+Note	1003	select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1`.`col1` in (<cache>(-(1)),<cache>(-(2)),<cache>(-(3))))
+explain extended select * from t1 where col1<=-1;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	10000	1.00	Using where
+Warnings:
+Note	1003	select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1`.`col1` <= <cache>(-(1)))
+drop table t1, t2;
 set histogram_type=@save_histogram_type;
 set histogram_size=@save_histogram_size;
 set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
diff --git a/mysql-test/t/selectivity.test b/mysql-test/t/selectivity.test
index fe35d9652ff..3e4940d53e4 100644
--- a/mysql-test/t/selectivity.test
+++ b/mysql-test/t/selectivity.test
@@ -908,6 +908,22 @@ explain extended select * from t1 where a=-1;
 
 drop table t0, t1;
 
+--echo #
+--echo # MDEV-4362: Selectivity estimates for IN (...) do not depend on whether the values are in range
+--echo #
+create table t1 (col1 int);
+set @a=-1;
+create table t2 (a int)  select (@a:=@a+1) as a from information_schema.session_variables A limit 100;
+insert into t1 select A.a from t2 A, t2 B where A.a < 100 and B.a < 100;
+select min(col1), max(col1), count(*) from t1;
+set histogram_size=100;
+analyze table t1 persistent for all;
+explain extended select * from t1 where col1 in (1,2,3);
+--echo # Must not cause fp division by zero, or produce nonsense numbers:
+explain extended select * from t1 where col1 in (-1,-2,-3);
+explain extended select * from t1 where col1<=-1;
+drop table t1, t2;
+
 set histogram_type=@save_histogram_type;
 set histogram_size=@save_histogram_size;
 set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
diff --git a/sql/sql_statistics.h b/sql/sql_statistics.h
index 936f23f1091..da6a9035b44 100644
--- a/sql/sql_statistics.h
+++ b/sql/sql_statistics.h
@@ -302,16 +302,27 @@ public:
           (max + 1 == get_width() ?  1.0 : (get_value(max) * inv_prec_factor)) -
           (min == 0 ?  0.0 : (get_value(min-1) * inv_prec_factor));
 
-      /*
-        So:
-        - each bucket has the same #rows 
-        - values are unformly distributed across the [min_value,max_value] domain.
+      if (current_bucket_width < 1e-16)
+      {
+        /*
+          A special case: we are at the first (or the last) bucket in the
+          histogram, the bucket's value range is a singlepoint [x,x], and 
+          pos_value=0 (for the first bucket) or pos_value=1 (for the last).
+        */
+        sel= avg_sel;
+      }
+      else
+      {
+        /*
+          So:
+          - each bucket has the same #rows 
+          - values are unformly distributed across the [min_value,max_value] domain.
 
-        If a bucket has value range that's N times bigger then average, than
-        each value will have to have N times fewer rows than average.
-      */
-      DBUG_ASSERT(current_bucket_width);
-      sel= avg_sel * avg_bucket_width / current_bucket_width;
+          If a bucket has value range that's N times bigger then average, than
+          each value will have to have N times fewer rows than average.
+        */
+        sel= avg_sel * avg_bucket_width / current_bucket_width;
+      }
 
       /*
         (Q: if we just follow this proportion we may end up in a situation

From ab061a2bb3723c00eb5c88ecc1cb90ee7f1458e6 Mon Sep 17 00:00:00 2001
From: Sergey Petrunya <psergey@askmonty.org>
Date: Thu, 27 Mar 2014 12:30:49 +0400
Subject: [PATCH 3/3] MDEV-5926, MDEV-4362 post-fixes: -
 Histogram::find_bucket() should not walk off the end of the value range. -
 Address review feedback in Histogram::point_selectivity(): different handling
   for zero-width buckets, and explanations.

---
 mysql-test/r/selectivity.result        |  2 +-
 mysql-test/r/selectivity_innodb.result |  2 +-
 sql/sql_statistics.h                   | 64 +++++++++++++++++---------
 3 files changed, 45 insertions(+), 23 deletions(-)

diff --git a/mysql-test/r/selectivity.result b/mysql-test/r/selectivity.result
index 27e05e22714..7e29b1014fc 100644
--- a/mysql-test/r/selectivity.result
+++ b/mysql-test/r/selectivity.result
@@ -1378,7 +1378,7 @@ Note	1003	select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1
 # Must not cause fp division by zero, or produce nonsense numbers:
 explain extended select * from t1 where col1 in (-1,-2,-3);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	10000	3.00	Using where
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	10000	5.94	Using where
 Warnings:
 Note	1003	select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1`.`col1` in (<cache>(-(1)),<cache>(-(2)),<cache>(-(3))))
 explain extended select * from t1 where col1<=-1;
diff --git a/mysql-test/r/selectivity_innodb.result b/mysql-test/r/selectivity_innodb.result
index 104b465b5a4..e91cfa41d73 100644
--- a/mysql-test/r/selectivity_innodb.result
+++ b/mysql-test/r/selectivity_innodb.result
@@ -1388,7 +1388,7 @@ Note	1003	select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1
 # Must not cause fp division by zero, or produce nonsense numbers:
 explain extended select * from t1 where col1 in (-1,-2,-3);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	10000	3.00	Using where
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	10000	5.94	Using where
 Warnings:
 Note	1003	select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1`.`col1` in (<cache>(-(1)),<cache>(-(2)),<cache>(-(3))))
 explain extended select * from t1 where col1<=-1;
diff --git a/sql/sql_statistics.h b/sql/sql_statistics.h
index da6a9035b44..d0db0a3bf33 100644
--- a/sql/sql_statistics.h
+++ b/sql/sql_statistics.h
@@ -151,6 +151,7 @@ private:
     }
     return 0;
   }
+
   /* Find the bucket which value 'pos' falls into. */
   uint find_bucket(double pos, bool first)
   {
@@ -171,7 +172,7 @@ private:
         break;
     }
 
-    if (val > get_value(i))
+    if (val > get_value(i) && i < (get_width() - 1))
       i++;
 
     if (val == get_value(i))
@@ -251,6 +252,27 @@ public:
     
     @return
        Expected condition selectivity (a number between 0 and 1)
+
+    @notes 
+       [re_zero_length_buckets] If a bucket with zero value-length is in the
+       middle of the histogram, we will not have min==max. Example: suppose, 
+       pos_value=0x12, and the histogram is:
+
+             #n  #n+1 #n+2                 
+        ... 0x10 0x12 0x12 0x14 ...
+                        |
+                        +------------- bucket with zero value-length
+      
+        Here, we will get min=#n+1, max=#n+2, and use the multi-bucket formula.
+       
+        The problem happens at the histogram ends. if pos_value=0, and the
+        histogram is:
+
+        0x00 0x10 ...
+
+        then min=0, max=0. This means pos_value is contained within bucket #0,
+        but on the other hand, histogram data says that the bucket has only one
+        value.
   */
 
   double point_selectivity(double pos, double avg_sel)
@@ -264,6 +286,16 @@ public:
     uint max= min;
     while (max + 1 < get_width() && get_value(max + 1) == pos_value)
       max++;
+    
+    /*
+      A special case: we're looking at a single bucket, and that bucket has
+      zero value-length. Use the multi-bucket formula (attempt to use
+      single-bucket formula will cause divison by zero).
+
+      For more details see [re_zero_length_buckets] above.
+    */
+    if (max == min && get_value(max) == ((max==0)? 0 : get_value(max-1)))
+      max++;
 
     if (max > min)
     {
@@ -302,27 +334,17 @@ public:
           (max + 1 == get_width() ?  1.0 : (get_value(max) * inv_prec_factor)) -
           (min == 0 ?  0.0 : (get_value(min-1) * inv_prec_factor));
 
-      if (current_bucket_width < 1e-16)
-      {
-        /*
-          A special case: we are at the first (or the last) bucket in the
-          histogram, the bucket's value range is a singlepoint [x,x], and 
-          pos_value=0 (for the first bucket) or pos_value=1 (for the last).
-        */
-        sel= avg_sel;
-      }
-      else
-      {
-        /*
-          So:
-          - each bucket has the same #rows 
-          - values are unformly distributed across the [min_value,max_value] domain.
+      DBUG_ASSERT(current_bucket_width); /* We shouldn't get a one zero-width bucket */
 
-          If a bucket has value range that's N times bigger then average, than
-          each value will have to have N times fewer rows than average.
-        */
-        sel= avg_sel * avg_bucket_width / current_bucket_width;
-      }
+      /*
+        So:
+        - each bucket has the same #rows 
+        - values are unformly distributed across the [min_value,max_value] domain.
+
+        If a bucket has value range that's N times bigger then average, than
+        each value will have to have N times fewer rows than average.
+      */
+      sel= avg_sel * avg_bucket_width / current_bucket_width;
 
       /*
         (Q: if we just follow this proportion we may end up in a situation