Change cost for REF to take into account cost for 1 extra key read_next

The main difference in code path between EQ_REF and REF is that for REF we have to do an extra read_next on the index to check that there is no more matching rows. Before this patch we added a preference of EQ_REF by ensuring that REF would always estimate to find at least 2 rows. This patch adds the cost of the extra key read_next to REF access and removes the code that limited REF to at least 2 rows. For some queries this can have a big effect as the total estimated rows will be halved for each REF table with 1 rows. multi_range cost calculations are also changed to take into account the difference between EQ_REF and REF. The effect of the patch to the test suite: - About 80 test case changed - Almost all changes where for EXPLAIN where estimated rows for REF where changed from 2 to 1. - A few test cases using explain extended had a change of 'filtered'. This is because of the estimated rows are now closer to the calculated selectivity. - A very few test had a change of table order. This is because the change of estimated rows from 2 to 1 or the small cost change for REF (main.subselect_sj_jcl6, main.group_by, main.dervied_cond_pushdown, main.distinct, main.join_nested, main.order_by, main.join_cache) - No key statistics and the estimated rows are now smaller which cased estimated filtering to be lower. (main.subselect_sj_mat) - The number of total rows are halved. (main.derived_cond_pushdown) - Plans with 1 row changed to use RANGE instead of REF. (main.group_min_max) - ALL changed to REF (main.key_diff) - Key changed from ref + index_only to PRIMARY key for InnoDB, as OPTIMIZER_ROW_LOOKUP_COST + OPTIMIZER_ROW_NEXT_FIND_COST is smaller than OPTIMIZER_KEY_LOOKUP_COST + OPTIMIZER_KEY_NEXT_FIND_COST. (main.join_outer_innodb) - Cost changes printouts (main.opt_trace*) - Result order change (innodb_gis.rtree)
2025-08-07 00:04:31 +03:00 · 2022-12-27 14:49:27 +02:00
parent b5df077e85
commit 3fa99f0c0e
81 changed files with 765 additions and 740 deletions
--- a/sql/multi_range_read.cc
+++ b/sql/multi_range_read.cc
@@ -27,10 +27,25 @@ static void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted,



-/* The following calculation is the same as in multi_range_read_info() */
+/*
+  The following calculation is the same as in multi_range_read_info()
+
+  @param cost              Total cost is stored here
+  @param keyno             Key number
+  @param n_ranges          Number of different ranges
+  @param multi_row_ranges  Number of ranges that are not EQ_REF
+  @param flags             Flags. Only HA_MRR_INDEX_ONLY is used.
+  @param total_rows        Number of rows expected to be read.
+  @param io_blocks         Number of blocks we expect to read for
+                           a not clustered index.
+  @param unassigned_single_point_ranges
+                           Number of blocks we have not yet read for
+                           a clustered index.
+*/

 void handler::calculate_costs(Cost_estimate *cost, uint keyno,
-                              uint n_ranges, uint flags,
+                              uint n_ranges, uint multi_row_ranges,
+                              uint flags,
                              ha_rows total_rows,
                              ulonglong io_blocks,
                              ulonglong unassigned_single_point_ranges)
@@ -39,7 +54,9 @@ void handler::calculate_costs(Cost_estimate *cost, uint keyno,

  if (!is_clustering_key(keyno))
  {
-    cost->index_cost= ha_keyread_time(keyno, n_ranges, total_rows, io_blocks);
+    cost->index_cost= ha_keyread_time(keyno, n_ranges,
+                                      total_rows + multi_row_ranges,
+                                      io_blocks);

    if (!(flags & HA_MRR_INDEX_ONLY))
    {
@@ -58,7 +75,9 @@ void handler::calculate_costs(Cost_estimate *cost, uint keyno,
  {
    /* Clustered index */
    io_blocks= unassigned_single_point_ranges;
-    cost->index_cost= ha_keyread_time(keyno, n_ranges, total_rows, io_blocks);
+    cost->index_cost= ha_keyread_time(keyno, n_ranges,
+                                      total_rows + multi_row_ranges,
+                                      io_blocks);
    cost->copy_cost=  rows2double(total_rows) * ROW_COPY_COST;
  }
  /* Adjust io cost to data size */
@@ -355,7 +374,9 @@ handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
  {
    set_if_smaller(total_rows, max_rows);
    *flags |= HA_MRR_USE_DEFAULT_IMPL;
-    calculate_costs(cost, keyno, n_ranges, *flags, total_rows,
+    calculate_costs(cost, keyno, n_ranges,
+                    n_ranges - (uint) single_point_ranges,
+                    *flags, total_rows,
                    io_blocks, unassigned_single_point_ranges);
    if (top_limit < total_rows)
    {
@@ -365,8 +386,10 @@ handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
        when we find the 'accepted rows' at once.
      */
      Cost_estimate limit_cost;
-      calculate_costs(&limit_cost, keyno, n_ranges, *flags, top_limit,
-                      io_blocks, unassigned_single_point_ranges);
+      calculate_costs(&limit_cost, keyno, n_ranges,
+                      n_ranges - (uint)single_point_ranges,
+                      *flags, top_limit, io_blocks,
+                      unassigned_single_point_ranges);
      cost->limit_cost= limit_cost.total_cost();
    }
    DBUG_PRINT("statistics",