1
0
mirror of https://github.com/MariaDB/server.git synced 2025-08-07 00:04:31 +03:00

Change cost for REF to take into account cost for 1 extra key read_next

The main difference in code path between EQ_REF and REF is that for
REF we have to do an extra read_next on the index to check that there
is no more matching rows.

Before this patch we added a preference of EQ_REF by ensuring that REF
would always estimate to find at least 2 rows.

This patch adds the cost of the extra key read_next to REF access and
removes the code that limited REF to at least 2 rows. For some queries
this can have a big effect as the total estimated rows will be halved
for each REF table with 1 rows.

multi_range cost calculations are also changed to take into account
the difference between EQ_REF and REF.

The effect of the patch to the test suite:
- About 80 test case changed
- Almost all changes where for EXPLAIN where estimated rows for REF
  where changed from 2 to 1.
- A few test cases using explain extended had a change of 'filtered'.
  This is because of the estimated rows are now closer to the
  calculated selectivity.
- A very few test had a change of table order.
  This is because the change of estimated rows from 2 to 1 or the small
  cost change for REF
  (main.subselect_sj_jcl6, main.group_by, main.dervied_cond_pushdown,
  main.distinct, main.join_nested, main.order_by, main.join_cache)
- No key statistics and the estimated rows are now smaller which cased
  estimated filtering to be lower.
  (main.subselect_sj_mat)
- The number of total rows are halved.
  (main.derived_cond_pushdown)
- Plans with 1 row changed to use RANGE instead of REF.
  (main.group_min_max)
- ALL changed to REF
  (main.key_diff)
- Key changed from ref + index_only to PRIMARY key for InnoDB, as
  OPTIMIZER_ROW_LOOKUP_COST + OPTIMIZER_ROW_NEXT_FIND_COST is smaller than
  OPTIMIZER_KEY_LOOKUP_COST + OPTIMIZER_KEY_NEXT_FIND_COST.
  (main.join_outer_innodb)
- Cost changes printouts
  (main.opt_trace*)
- Result order change
  (innodb_gis.rtree)
This commit is contained in:
Monty
2022-12-27 14:49:27 +02:00
parent b5df077e85
commit 3fa99f0c0e
81 changed files with 765 additions and 740 deletions

View File

@@ -27,10 +27,25 @@ static void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted,
/* The following calculation is the same as in multi_range_read_info() */
/*
The following calculation is the same as in multi_range_read_info()
@param cost Total cost is stored here
@param keyno Key number
@param n_ranges Number of different ranges
@param multi_row_ranges Number of ranges that are not EQ_REF
@param flags Flags. Only HA_MRR_INDEX_ONLY is used.
@param total_rows Number of rows expected to be read.
@param io_blocks Number of blocks we expect to read for
a not clustered index.
@param unassigned_single_point_ranges
Number of blocks we have not yet read for
a clustered index.
*/
void handler::calculate_costs(Cost_estimate *cost, uint keyno,
uint n_ranges, uint flags,
uint n_ranges, uint multi_row_ranges,
uint flags,
ha_rows total_rows,
ulonglong io_blocks,
ulonglong unassigned_single_point_ranges)
@@ -39,7 +54,9 @@ void handler::calculate_costs(Cost_estimate *cost, uint keyno,
if (!is_clustering_key(keyno))
{
cost->index_cost= ha_keyread_time(keyno, n_ranges, total_rows, io_blocks);
cost->index_cost= ha_keyread_time(keyno, n_ranges,
total_rows + multi_row_ranges,
io_blocks);
if (!(flags & HA_MRR_INDEX_ONLY))
{
@@ -58,7 +75,9 @@ void handler::calculate_costs(Cost_estimate *cost, uint keyno,
{
/* Clustered index */
io_blocks= unassigned_single_point_ranges;
cost->index_cost= ha_keyread_time(keyno, n_ranges, total_rows, io_blocks);
cost->index_cost= ha_keyread_time(keyno, n_ranges,
total_rows + multi_row_ranges,
io_blocks);
cost->copy_cost= rows2double(total_rows) * ROW_COPY_COST;
}
/* Adjust io cost to data size */
@@ -355,7 +374,9 @@ handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
{
set_if_smaller(total_rows, max_rows);
*flags |= HA_MRR_USE_DEFAULT_IMPL;
calculate_costs(cost, keyno, n_ranges, *flags, total_rows,
calculate_costs(cost, keyno, n_ranges,
n_ranges - (uint) single_point_ranges,
*flags, total_rows,
io_blocks, unassigned_single_point_ranges);
if (top_limit < total_rows)
{
@@ -365,8 +386,10 @@ handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
when we find the 'accepted rows' at once.
*/
Cost_estimate limit_cost;
calculate_costs(&limit_cost, keyno, n_ranges, *flags, top_limit,
io_blocks, unassigned_single_point_ranges);
calculate_costs(&limit_cost, keyno, n_ranges,
n_ranges - (uint)single_point_ranges,
*flags, top_limit, io_blocks,
unassigned_single_point_ranges);
cost->limit_cost= limit_cost.total_cost();
}
DBUG_PRINT("statistics",