From 22bc137c2ea9dbba8f2ce34f71a914534549ec8d Mon Sep 17 00:00:00 2001
From: unknown <konstantin@mysql.com>
Date: Mon, 3 Nov 2003 19:56:01 +0300
Subject: [PATCH] fix for bug #1724 'WHERE ... IN() optimizer behaviour has
 changed since 4.0.14' We need to calculate cost of RANGE scan if it is
 present instead of cost of FULL scan.

mysql-test/r/order_by.result:
  more accurate row estimation
  for RANGE scan
mysql-test/r/range.result:
  added test case for bug #1724
  'WHERE ... IN() optimizer behaviour has changed since 4.0.14'
mysql-test/r/select.result:
  please ignore
mysql-test/t/range.test:
  added test case for bug #1724
  'WHERE ... IN() optimizer behaviour has changed since 4.0.14'
sql/sql_select.cc:
  fix for bug #1724 'WHERE ... IN() optimizer behaviour
  has changed since 4.0.14'
  We need to calculate cost of RANGE
  scan instead of cost of FULL scan
  if RANGE is present
  Few comments cleaned up.
---
 mysql-test/r/order_by.result |  6 ++--
 mysql-test/r/range.result    | 17 ++++++++++
 mysql-test/r/select.result   |  4 +--
 mysql-test/t/range.test      | 23 +++++++++++++
 sql/sql_select.cc            | 65 +++++++++++++++++++++++-------------
 5 files changed, 87 insertions(+), 28 deletions(-)

diff --git a/mysql-test/r/order_by.result b/mysql-test/r/order_by.result
index 58f4972d08f..076c80035b2 100644
--- a/mysql-test/r/order_by.result
+++ b/mysql-test/r/order_by.result
@@ -307,17 +307,17 @@ table	type	possible_keys	key	key_len	ref	rows	Extra
 t1	range	a	a	9	NULL	8	Using where; Using index
 explain select * from t1 where a = 2 and b >0 order by a desc,b desc;
 table	type	possible_keys	key	key_len	ref	rows	Extra
-t1	range	a	a	9	NULL	5	Using where; Using index
+t1	range	a	a	9	NULL	4	Using where; Using index
 explain select * from t1 where a = 2 and b is null order by a desc,b desc;
 table	type	possible_keys	key	key_len	ref	rows	Extra
 t1	ref	a	a	9	const,const	1	Using where; Using index; Using filesort
 explain select * from t1 where a = 2 and (b is null or b > 0) order by a
 desc,b desc;
 table	type	possible_keys	key	key_len	ref	rows	Extra
-t1	range	a	a	9	NULL	6	Using where; Using index
+t1	range	a	a	9	NULL	5	Using where; Using index
 explain select * from t1 where a = 2 and b > 0 order by a desc,b desc;
 table	type	possible_keys	key	key_len	ref	rows	Extra
-t1	range	a	a	9	NULL	5	Using where; Using index
+t1	range	a	a	9	NULL	4	Using where; Using index
 explain select * from t1 where a = 2 and b < 2 order by a desc,b desc;
 table	type	possible_keys	key	key_len	ref	rows	Extra
 t1	range	a	a	9	NULL	2	Using where; Using index
diff --git a/mysql-test/r/range.result b/mysql-test/r/range.result
index 2bd80cbfba3..68987009598 100644
--- a/mysql-test/r/range.result
+++ b/mysql-test/r/range.result
@@ -273,3 +273,20 @@ table	type	possible_keys	key	key_len	ref	rows	Extra
 t2	ref	j1	j1	4	const	1	Using where; Using index
 t1	ALL	i2	NULL	NULL	NULL	4	Range checked for each record (index map: 2)
 DROP TABLE t1,t2;
+CREATE TABLE t1 (
+a int(11) default NULL,
+b int(11) default NULL,
+KEY a (a),
+KEY b (b)
+) TYPE=MyISAM;
+INSERT INTO t1 VALUES
+(1,1),(2,1),(3,1),(4,1),(5,1),(6,1),(7,1),(8,1),(9,1),(10,2),(10,2),
+(13,2),(14,2),(15,2),(16,2),(17,3),(17,3),(16,3),(17,3),(19,3),(20,3),
+(21,4),(22,5),(23,5),(24,5),(25,5),(26,5),(30,5),(31,5),(32,5),(33,5),
+(33,5),(33,5),(33,5),(33,5),(34,5),(35,5);
+EXPLAIN SELECT * FROM t1 WHERE a IN(1,2) AND b=5;
+table	type	possible_keys	key	key_len	ref	rows	Extra
+t1	range	a,b	a	5	NULL	2	Using where
+SELECT * FROM t1 WHERE a IN(1,2) AND b=5;
+a	b
+DROP TABLE t1;
diff --git a/mysql-test/r/select.result b/mysql-test/r/select.result
index 09a4ffdb88b..9cc5ad76ff0 100644
--- a/mysql-test/r/select.result
+++ b/mysql-test/r/select.result
@@ -2136,10 +2136,10 @@ a	a	a
 select * from (t1 as t2 left join t1 as t3 using (a)) inner join t1 on t1.a>1;
 a	a	a
 1	1	2
-2	2	2
-3	3	2
 1	1	3
+2	2	2
 2	2	3
+3	3	2
 3	3	3
 select * from t1 inner join (t1 as t2 left join t1 as t3 using (a)) on t1.a>1;
 a	a	a
diff --git a/mysql-test/t/range.test b/mysql-test/t/range.test
index 75ca0fff5f2..a8b7679457b 100644
--- a/mysql-test/t/range.test
+++ b/mysql-test/t/range.test
@@ -214,3 +214,26 @@ explain select * from t1, t2 where (t1.key1 <t2.keya + 1) and t2.keya=3;
 explain select * from t1 force index(i2), t2 where (t1.key1 <t2.keya + 1) and t2.keya=3;
 DROP TABLE t1,t2;
 
+# bug #1724: use RANGE on more selective column instead of REF on less
+# selective
+
+CREATE TABLE t1 (
+  a int(11) default NULL,
+  b int(11) default NULL,
+  KEY a (a),
+  KEY b (b)
+) TYPE=MyISAM;
+
+
+INSERT INTO t1 VALUES
+(1,1),(2,1),(3,1),(4,1),(5,1),(6,1),(7,1),(8,1),(9,1),(10,2),(10,2),
+(13,2),(14,2),(15,2),(16,2),(17,3),(17,3),(16,3),(17,3),(19,3),(20,3),
+(21,4),(22,5),(23,5),(24,5),(25,5),(26,5),(30,5),(31,5),(32,5),(33,5),
+(33,5),(33,5),(33,5),(33,5),(34,5),(35,5);
+
+EXPLAIN SELECT * FROM t1 WHERE a IN(1,2) AND b=5;
+SELECT * FROM t1 WHERE a IN(1,2) AND b=5;
+
+DROP TABLE t1;
+
+# we expect that optimizer will choose key on A
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 55813378b9c..740b0470fdc 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -2145,8 +2145,6 @@ find_best(JOIN *join,table_map rest_tables,uint idx,double record_count,
 	  !(s->table->force_index && best_key))
       {						// Check full join
         ha_rows rnd_records= s->found_records;
-        /* Estimate cost of reading table. */
-        tmp= s->table->file->scan_time();
         /*
           If there is a restriction on the table, assume that 25% of the
           rows can be skipped on next part.
@@ -2156,36 +2154,57 @@ find_best(JOIN *join,table_map rest_tables,uint idx,double record_count,
         if (found_constraint)
           rnd_records-= rnd_records/4;
 
-        if (s->on_expr)                         // Can't use join cache
+        /*
+          Range optimizer never proposes a RANGE if it isn't better
+          than FULL: so if RANGE is present, it's always preferred to FULL.
+          Here we estimate its cost.
+        */
+        if (s->quick)
         {
+          /*
+            For each record we:
+             - read record range through 'quick'
+             - skip rows which does not satisfy WHERE constraints
+           */
           tmp= record_count *
-               /* We have to read the whole table for each record */
-               (tmp +     
-               /*
-                 And we have to skip rows which does not satisfy join
-                 condition for each record.
-               */
-               (s->records - rnd_records)/(double) TIME_FOR_COMPARE);
+               (s->quick->read_time +
+               (s->found_records - rnd_records)/(double) TIME_FOR_COMPARE);
         }
         else
         {
-          /* We read the table as many times as join buffer becomes full. */
-          tmp*= (1.0 + floor((double) cache_record_length(join,idx) *
-                             record_count /
-                             (double) thd->variables.join_buff_size));
-          /* 
-            We don't make full cartesian product between rows in the scanned
-            table and existing records because we skip all rows from the
-            scanned table, which does not satisfy join condition when 
-            we read the table (see flush_cached_records for details). Here we
-            take into account cost to read and skip these records.
-          */
-          tmp+= (s->records - rnd_records)/(double) TIME_FOR_COMPARE;
+          /* Estimate cost of reading table. */
+          tmp= s->table->file->scan_time();
+          if (s->on_expr)                         // Can't use join cache
+          {
+            /*
+              For each record we have to:
+              - read the whole table record 
+              - skip rows which does not satisfy join condition
+            */
+            tmp= record_count *
+                 (tmp +     
+                 (s->records - rnd_records)/(double) TIME_FOR_COMPARE);
+          }
+          else
+          {
+            /* We read the table as many times as join buffer becomes full. */
+            tmp*= (1.0 + floor((double) cache_record_length(join,idx) *
+                               record_count /
+                               (double) thd->variables.join_buff_size));
+            /* 
+              We don't make full cartesian product between rows in the scanned
+              table and existing records because we skip all rows from the
+              scanned table, which does not satisfy join condition when 
+              we read the table (see flush_cached_records for details). Here we
+              take into account cost to read and skip these records.
+            */
+            tmp+= (s->records - rnd_records)/(double) TIME_FOR_COMPARE;
+          }
         }
 
         /*
           We estimate the cost of evaluating WHERE clause for found records
-          as record_count * rnd_records + TIME_FOR_COMPARE. This cost plus
+          as record_count * rnd_records / TIME_FOR_COMPARE. This cost plus
           tmp give us total cost of using TABLE SCAN
         */
 	if (best == DBL_MAX ||