1
0
mirror of https://github.com/MariaDB/server.git synced 2025-08-08 11:22:35 +03:00

MDEV-31356: Range cost calculations does not take into account join_buffer

This patch also fixes
MDEV-31391 Assertion `((best.records_out) == 0.0 ... failed

Cost changes caused by this change:
- range queries with join buffer now have a notable smaller cost.
- range ranges are bit more expensive as the MULTI_RANGE_COST is now
  properly applied to it in all cases (this extra cost is equal to a
  key lookup).
- table scan cost is slight smaller as we now assume data is cached in
  the engine after the first scan pass. (We did this before for range
  scans and other access methods).
- partition tables had wrong values for max_row_blocks and
  max_index_blocks.  Correcting this, causes range access on
  partitioned tables to have slightly higher cost because of the
  increased estimated IO.
- Using first match + join buffer caused 'filtered' to be calcualted
  wrong.  (Only affected EXPLAIN, not query costs).
- Added cost_without_join_buffer to optimizer_trace.
- check_quick_select() adjusted the number of rows according to persistent
  statistics, but did not adjust cost. Now fixed.

The big change in the patch are:

- In best_access_path(), where we now are using storing the cost in
  'ALL_READ_COST cost' and only converting it to a double at the end.
   This allows us to more exactly calculate the effect of the join_cache.
- In JOIN_TAB::estimate_scan_time(), store the cost also in a
  ALL_READ_COST object.

One of effect if this change is that when joining very small tables:

t1    some_access_method
t2    range
t3    ALL         Use join buffer

This is swiched to

t1      some_access_method
t3      ALL
t2      range      use join buffer

Both plans has the same cost, but as table scan in this case has less
cost than rang, the table scan will be considered first and thus have
precidence.

Test case changes:
- optimizer_trace          - Addition of cost_without_join_buffer
- subselect_mat_cost_bugs  - Small tables and scan versus range
- range & range_mrr_icp    - Range + join_cache is faster than ref
- optimizer_trace          - cost_without_join_buffer, smaller scan cost,
                             range setup cost.
- mrr                      - range+join_buffer used as smaller cost
This commit is contained in:
Monty
2023-05-26 17:26:42 +03:00
parent 7079386587
commit 07b02ab40e
23 changed files with 444 additions and 195 deletions

View File

@@ -106,6 +106,13 @@
#define double_to_ulonglong(A) ((A) >= ((double)ULONGLONG_MAX) ? ULONGLONG_MAX : (ulonglong) (A))
/* Used to ensure that costs are calculate the same way */
inline bool compare_cost(double a, double b)
{
DBUG_ASSERT(a >= 0.0 && b >= 0.0);
return (a >= b - b/10000000.0 && a <= b+b/10000000.0);
}
inline double safe_filtered(double a, double b)
{
return b != 0 ? a/b*100.0 : 0.0;
@@ -8985,7 +8992,8 @@ best_access_path(JOIN *join,
copy_cost= (record_count * records_after_filter * WHERE_COST_THD(thd) +
startup_cost);
cur_cost= (file->cost_for_reading_multiple_times(prev_record_count, &tmp) +
cur_cost= (file->cost_for_reading_multiple_times(prev_record_count,
&tmp) +
copy_cost);
if (unlikely(trace_access_idx.trace_started()))
@@ -9242,6 +9250,7 @@ best_access_path(JOIN *join,
uint forced_index= MAX_KEY;
bool force_plan= 0, use_join_buffer= 0;
ulonglong refills= 1;
ALL_READ_COST cost;
/*
Range optimizer never proposes a RANGE if it isn't better
@@ -9255,16 +9264,7 @@ best_access_path(JOIN *join,
For each record we:
- read record range through 'quick'
- skip rows which does not satisfy WHERE constraints
Note that s->quick->read_time includes the cost of comparing
the row with the where clause (WHERE_COST)
TODO:
We take into account possible use of join cache for ALL/index
access (see first else-branch below), but we don't take it into
account here for range/index_merge access. Find out why this is so.
*/
cur_cost= COST_MULT(s->quick->read_time, record_count);
/*
Use record count from range optimizer.
@@ -9288,33 +9288,38 @@ best_access_path(JOIN *join,
DBUG_ASSERT(range->rows >= s->found_records);
DBUG_ASSERT((range->cost.total_cost() == 0.0 &&
s->quick->read_time == 0.0) ||
(range->cost.total_cost() / s->quick->read_time <= 1.0000001 &&
range->cost.total_cost() / s->quick->read_time >= 0.9999999));
compare_cost(range->cost.total_cost(),
s->quick->read_time));
DBUG_ASSERT(compare_cost(range->cost.comp_cost,
range->rows * file->WHERE_COST));
/* Get range cost. This does not include cost of the WHERE */
range->get_costs(&cost);
/* Ensure that cost from opt_range are correct */
DBUG_ASSERT(compare_cost(file->cost_no_capping(&cost) +
range->cost.comp_cost +
range->cost.setup_cost,
s->quick->read_time));
range->get_costs(&tmp);
if (table->can_use_rowid_filter(key_no))
{
filter= table->best_range_rowid_filter(key_no,
rows2double(range->rows),
file->cost(&tmp),
file->cost(tmp.index_cost),
file->cost(&cost),
file->cost(cost.index_cost),
record_count,
&records_best_filter);
set_if_smaller(best.records_out, records_best_filter);
if (filter)
{
filter= filter->apply_filter(thd, table, &tmp,
filter= filter->apply_filter(thd, table, &cost,
&records_after_filter,
&startup_cost,
range->ranges,
record_count);
if (filter)
{
tmp.row_cost.cpu+= records_after_filter * WHERE_COST_THD(thd);
cur_cost= file->cost_for_reading_multiple_times(record_count,
&tmp);
cur_cost= COST_ADD(cur_cost, startup_cost);
startup_cost= 0; // Avoid adding it again later
set_if_smaller(best.records_out, records_after_filter);
table->opt_range[key_no].selectivity= filter->selectivity;
}
}
@@ -9331,10 +9336,24 @@ best_access_path(JOIN *join,
force_plan= 1;
}
type= JT_RANGE;
/*
We cannot use range->cost.cmp_cost here as records_after_filter
is be different if filter is used.
*/
cost.copy_cost+= (records_after_filter * file->WHERE_COST +
range->cost.setup_cost);
}
else
{
type= JT_INDEX_MERGE;
/*
We don't know exactly from where the costs comes from.
Let's store it in copy_cost.
Note that s->quick->read_time includes the cost of comparing
the row with the where clause (WHERE_COST)
*/
cost.reset();
cost.copy_cost= s->quick->read_time;
}
loose_scan_opt.check_range_access(join, idx, s->quick);
}
@@ -9360,7 +9379,7 @@ best_access_path(JOIN *join,
if (s->cached_forced_index_type)
{
type= s->cached_forced_index_type;
cur_cost= s->cached_forced_index_cost;
cost= s->cached_forced_index_cost;
forced_index= s->cached_forced_index;
}
else
@@ -9376,7 +9395,7 @@ best_access_path(JOIN *join,
{
/* Use value from estimate_scan_time */
forced_index= s->cached_covering_key;
cur_cost= s->cached_scan_and_compare_time;
cost= s->cached_scan_and_compare_cost;
}
else
{
@@ -9386,93 +9405,93 @@ best_access_path(JOIN *join,
keys.intersect(table->keys_in_use_for_query);
if ((forced_index= find_shortest_key(table, &keys)) < MAX_KEY)
{
ALL_READ_COST cost= cost_for_index_read(thd, table,
forced_index,
s->records, 0);
cur_cost= file->cost(cost);
cost= cost_for_index_read(thd, table,
forced_index,
s->records, 0);
/* Calculate cost of checking the attached WHERE */
cur_cost= COST_ADD(cur_cost,
s->records * WHERE_COST_THD(thd));
cost.copy_cost+= s->records * file->WHERE_COST;
}
else
#endif
{
/* No usable key, use table scan */
cur_cost= s->cached_scan_and_compare_time;
cost= s->cached_scan_and_compare_cost;
type= JT_ALL;
}
}
}
else // table scan
{
cur_cost= s->cached_scan_and_compare_time;
cost= s->cached_scan_and_compare_cost;
type= JT_ALL;
}
/* Cache result for other calls */
s->cached_forced_index_type= type;
s->cached_forced_index_cost= cur_cost;
s->cached_forced_index_cost= cost;
s->cached_forced_index= forced_index;
}
}
if (disable_jbuf || (table->map & join->outer_join))
{
/*
Simple scan
We estimate we have to read org_records rows.
records_after_filter rows will survive the where check of constants.
'best.records_out' rows will survive after the check against columns
from previous tables.
*/
scan_type= "scan";
if (disable_jbuf || (table->map & join->outer_join))
{
/*
Simple scan
We estimate we have to read org_records rows.
records_after_filter rows will survive the where check of constants.
'best.records_out' rows will survive after the check against columns
from previous tables.
*/
scan_type= "scan";
/*
We have to compare each row set against all previous row combinations
*/
cur_cost= COST_MULT(cur_cost, record_count);
}
else
{
/* Scan trough join cache */
double cmp_time, row_copy_cost, tmp_refills;
/*
We have to compare each row set against all previous row combinations
*/
cur_cost= file->cost_for_reading_multiple_times(record_count,
&cost);
}
else
{
/* Scan trough join cache */
double cmp_time, row_copy_cost, tmp_refills;
/*
Note that the cost of checking all rows against the table specific
WHERE is already included in cur_cost.
*/
scan_type= "scan_with_join_cache";
/*
Note that the cost of checking all rows against the table specific
WHERE is already included in cur_cost.
*/
scan_type= "scan_with_join_cache";
/* Calculate cost of refills */
tmp_refills= (1.0 + floor((double) cache_record_length(join,idx) *
(record_count /
(double) thd->variables.join_buff_size)));
cur_cost= COST_MULT(cur_cost, tmp_refills);
refills= double_to_ulonglong(ceil(tmp_refills));
/* Calculate cost of refills */
tmp_refills= (1.0 + floor((double) cache_record_length(join,idx) *
(record_count /
(double) thd->variables.join_buff_size)));
cur_cost= file->cost_for_reading_multiple_times(tmp_refills,
&cost);
refills= double_to_ulonglong(ceil(tmp_refills));
/* We come here only if there are already rows in the join cache */
DBUG_ASSERT(idx != join->const_tables);
/*
records_after_filter is the number of rows that have survived
the table specific WHERE check that only involves constants.
/* We come here only if there are already rows in the join cache */
DBUG_ASSERT(idx != join->const_tables);
/*
records_after_filter is the number of rows that have survived
the table specific WHERE check that only involves constants.
Calculate cost of:
- Copying all previous record combinations to the join cache
- Copying the tables from the join cache to table records
- Checking the WHERE against the final row combination
*/
row_copy_cost= (ROW_COPY_COST_THD(thd) *
JOIN_CACHE_ROW_COPY_COST_FACTOR(thd));
cmp_time= (record_count * row_copy_cost +
records_after_filter * record_count *
((idx - join->const_tables) * row_copy_cost +
WHERE_COST_THD(thd)));
cur_cost= COST_ADD(cur_cost, cmp_time);
use_join_buffer= 1;
}
Calculate cost of:
- Copying all previous record combinations to the join cache
- Copying the tables from the join cache to table records
- Checking the WHERE against the final row combination
*/
row_copy_cost= (ROW_COPY_COST_THD(thd) *
JOIN_CACHE_ROW_COPY_COST_FACTOR(thd));
cmp_time= (record_count * row_copy_cost +
records_after_filter * record_count *
((idx - join->const_tables) * row_copy_cost +
WHERE_COST_THD(thd)));
cur_cost= COST_ADD(cur_cost, cmp_time);
use_join_buffer= 1;
}
/* Splitting technique cannot be used with join cache */
if (table->is_splittable())
startup_cost= table->get_materialization_cost();
startup_cost+= table->get_materialization_cost();
cur_cost+= startup_cost;
if (unlikely(trace_access_scan.trace_started()))
@@ -9488,6 +9507,10 @@ best_access_path(JOIN *join,
add("rows_after_filter", records_after_filter).
add("rows_out", best.records_out).
add("cost", cur_cost);
if (use_join_buffer)
trace_access_scan.
add("cost_without_join_buffer",
file->cost_for_reading_multiple_times(record_count, &cost));
if (type == JT_ALL)
{
trace_access_scan.add("index_only",
@@ -15745,7 +15768,9 @@ void JOIN_TAB::estimate_scan_time()
{
THD *thd= join->thd;
handler *file= table->file;
double copy_cost;
double row_copy_cost, copy_cost;
ALL_READ_COST * const cost= &cached_scan_and_compare_cost;
cost->reset();
cached_covering_key= MAX_KEY;
if (table->is_created())
@@ -15756,7 +15781,8 @@ void JOIN_TAB::estimate_scan_time()
&startup_cost);
table->opt_range_condition_rows= records;
table->used_stat_records= records;
copy_cost= file->ROW_COPY_COST;
cost->row_cost.cpu= read_time;
row_copy_cost= file->ROW_COPY_COST;
}
else
{
@@ -15770,14 +15796,15 @@ void JOIN_TAB::estimate_scan_time()
if (!table->covering_keys.is_clear_all() && ! table->no_keyread)
{
cached_covering_key= find_shortest_key(table, &table->covering_keys);
read_time= file->cost(file->ha_key_scan_time(cached_covering_key,
records));
copy_cost= 0; // included in ha_key_scan_time
cost->index_cost= file->ha_key_scan_time(cached_covering_key, records);
read_time= file->cost(cost->index_cost);
row_copy_cost= 0; // Included in ha_key_scan_time
}
else
{
read_time= file->cost(file->ha_scan_time(records));
copy_cost= 0;
cost->row_cost= file->ha_scan_time(records);
read_time= file->cost(cost->row_cost);
row_copy_cost= 0; // Included in ha_scan_time
}
}
}
@@ -15798,14 +15825,24 @@ void JOIN_TAB::estimate_scan_time()
records= table->stat_records();
DBUG_ASSERT(table->opt_range_condition_rows == records);
// Needs fix..
read_time= file->cost(table->file->ha_scan_time(MY_MAX(records, 1000)));
copy_cost= table->s->optimizer_costs.row_copy_cost;
cost->row_cost= table->file->ha_scan_time(MY_MAX(records, 1000));
read_time= file->cost(cost->row_cost);
row_copy_cost= table->s->optimizer_costs.row_copy_cost;
}
found_records= records;
cached_scan_and_compare_time= (read_time + records *
(copy_cost + WHERE_COST_THD(thd)));
copy_cost= (records * (row_copy_cost + WHERE_COST_THD(thd)));
cached_scan_and_compare_time= read_time + copy_cost;
cost->copy_cost+= copy_cost;
/*
Assume we only need to do physical IO once even if we scan the file
multiple times.
*/
cost->max_index_blocks= (longlong) ceil(cost->index_cost.io);
cost->max_row_blocks= (longlong) ceil(cost->row_cost.io);
DBUG_ASSERT(compare_cost(cached_scan_and_compare_time,
file->cost(cost)));
}