fix for bug #1724 'WHERE ... IN() optimizer behaviour

has changed since 4.0.14' We need to calculate cost of RANGE scan if it is present instead of cost of FULL scan. mysql-test/r/order_by.result: more accurate row estimation for RANGE scan mysql-test/r/range.result: added test case for bug #1724 'WHERE ... IN() optimizer behaviour has changed since 4.0.14' mysql-test/r/select.result: please ignore mysql-test/t/range.test: added test case for bug #1724 'WHERE ... IN() optimizer behaviour has changed since 4.0.14' sql/sql_select.cc: fix for bug #1724 'WHERE ... IN() optimizer behaviour has changed since 4.0.14' We need to calculate cost of RANGE scan instead of cost of FULL scan if RANGE is present Few comments cleaned up.
2025-12-24 11:21:21 +03:00 · 2003-11-03 19:56:01 +03:00
parent 15cc0ad2ec
commit 22bc137c2e
5 changed files with 87 additions and 28 deletions
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -2145,8 +2145,6 @@ find_best(JOIN *join,table_map rest_tables,uint idx,double record_count,
 	  !(s->table->force_index && best_key))
      {						// Check full join
        ha_rows rnd_records= s->found_records;
-        /* Estimate cost of reading table. */
-        tmp= s->table->file->scan_time();
        /*
          If there is a restriction on the table, assume that 25% of the
          rows can be skipped on next part.
@@ -2156,36 +2154,57 @@ find_best(JOIN *join,table_map rest_tables,uint idx,double record_count,
        if (found_constraint)
          rnd_records-= rnd_records/4;

-        if (s->on_expr)                         // Can't use join cache
+        /*
+          Range optimizer never proposes a RANGE if it isn't better
+          than FULL: so if RANGE is present, it's always preferred to FULL.
+          Here we estimate its cost.
+        */
+        if (s->quick)
        {
+          /*
+            For each record we:
+             - read record range through 'quick'
+             - skip rows which does not satisfy WHERE constraints
+           */
          tmp= record_count *
-               /* We have to read the whole table for each record */
-               (tmp +     
-               /*
-                 And we have to skip rows which does not satisfy join
-                 condition for each record.
-               */
-               (s->records - rnd_records)/(double) TIME_FOR_COMPARE);
+               (s->quick->read_time +
+               (s->found_records - rnd_records)/(double) TIME_FOR_COMPARE);
        }
        else
        {
-          /* We read the table as many times as join buffer becomes full. */
-          tmp*= (1.0 + floor((double) cache_record_length(join,idx) *
-                             record_count /
-                             (double) thd->variables.join_buff_size));
-          /* 
-            We don't make full cartesian product between rows in the scanned
-            table and existing records because we skip all rows from the
-            scanned table, which does not satisfy join condition when 
-            we read the table (see flush_cached_records for details). Here we
-            take into account cost to read and skip these records.
-          */
-          tmp+= (s->records - rnd_records)/(double) TIME_FOR_COMPARE;
+          /* Estimate cost of reading table. */
+          tmp= s->table->file->scan_time();
+          if (s->on_expr)                         // Can't use join cache
+          {
+            /*
+              For each record we have to:
+              - read the whole table record 
+              - skip rows which does not satisfy join condition
+            */
+            tmp= record_count *
+                 (tmp +     
+                 (s->records - rnd_records)/(double) TIME_FOR_COMPARE);
+          }
+          else
+          {
+            /* We read the table as many times as join buffer becomes full. */
+            tmp*= (1.0 + floor((double) cache_record_length(join,idx) *
+                               record_count /
+                               (double) thd->variables.join_buff_size));
+            /* 
+              We don't make full cartesian product between rows in the scanned
+              table and existing records because we skip all rows from the
+              scanned table, which does not satisfy join condition when 
+              we read the table (see flush_cached_records for details). Here we
+              take into account cost to read and skip these records.
+            */
+            tmp+= (s->records - rnd_records)/(double) TIME_FOR_COMPARE;
+          }
        }

        /*
          We estimate the cost of evaluating WHERE clause for found records
-          as record_count * rnd_records + TIME_FOR_COMPARE. This cost plus
+          as record_count * rnd_records / TIME_FOR_COMPARE. This cost plus
          tmp give us total cost of using TABLE SCAN
        */
 	if (best == DBL_MAX ||