1
0
mirror of https://github.com/MariaDB/server.git synced 2025-07-29 05:21:33 +03:00

MDEV-18073: get_range_limit_read_cost() doesnt adjust LIMIT for the range access

The computation about which "fraction" of range/ref access cost we will
need to perform, was incorrect.

Adjusted the computation.
This commit is contained in:
Sergei Petrunia
2019-01-23 16:26:09 +03:00
parent b7a784ae25
commit 3238f2a6e9
3 changed files with 73 additions and 10 deletions

View File

@ -26685,16 +26685,22 @@ void JOIN::cache_const_exprs()
/*
Get a cost of reading rows_limit rows through index keynr.
Get the cost of using index keynr to read #LIMIT matching rows
@detail
- If there is a quick select, we try to use it.
- if there is a ref(const) access, we try to use it, too.
- quick and ref(const) use different cost formulas, so if both are possible
we should make a cost-based choice.
rows_limit is the number of rows we would need to read when using a full
index scan. This is generally higher than the N from "LIMIT N" clause,
because there's a WHERE condition (a part of which is used to construct a
range access we are considering using here)
@param tab JOIN_TAB with table access (is NULL for single-table
UPDATE/DELETE)
@param rows_limit See explanation above
@param read_time OUT Cost of reading using quick or ref(const) access.
@ -26707,6 +26713,7 @@ void JOIN::cache_const_exprs()
static bool get_range_limit_read_cost(const JOIN_TAB *tab,
const TABLE *table,
ha_rows table_records,
uint keynr,
ha_rows rows_limit,
double *read_time)
@ -26773,8 +26780,32 @@ static bool get_range_limit_read_cost(const JOIN_TAB *tab,
}
}
}
/*
Consider an example:
SELECT *
FROM t1
WHERE key1 BETWEEN 10 AND 20 AND col2='foo'
ORDER BY key1 LIMIT 10
If we were using a full index scan on key1, we would need to read this
many rows to get 10 matches:
10 / selectivity(key1 BETWEEN 10 AND 20 AND col2='foo')
This is the number we get in rows_limit.
But we intend to use range access on key1. The rows returned by quick
select will satisfy the range part of the condition,
"key1 BETWEEN 10 and 20". We will still need to filter them with
the remainder condition, (col2='foo').
The selectivity of the range access is (best_rows/table_records). We need
to discount it from the rows_limit:
*/
double rows_limit_for_quick= rows_limit * (best_rows / table_records);
if (best_rows > rows_limit)
if (best_rows > rows_limit_for_quick)
{
/*
LIMIT clause specifies that we will need to read fewer records than
@ -26783,7 +26814,7 @@ static bool get_range_limit_read_cost(const JOIN_TAB *tab,
only need 1/3rd of records, it will cost us 1/3rd of quick select's
read time)
*/
best_cost *= rows_limit / best_rows;
best_cost *= rows_limit_for_quick / best_rows;
}
*read_time= best_cost;
res= true;
@ -27076,8 +27107,8 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
index_scan_time= select_limit/rec_per_key *
MY_MIN(rec_per_key, table->file->scan_time());
double range_scan_time;
if (get_range_limit_read_cost(tab, table, nr, select_limit,
&range_scan_time))
if (get_range_limit_read_cost(tab, table, table_records, nr,
select_limit, &range_scan_time))
{
if (range_scan_time < index_scan_time)
index_scan_time= range_scan_time;