mirror of
https://github.com/MariaDB/server.git
synced 2025-07-27 18:02:13 +03:00
Fixed bug#28404.
This patch adds cost estimation for the queries with ORDER BY / GROUP BY and LIMIT. If there was a ref/range access to the table whose rows were required to be ordered in the result set the optimizer always employed this access though a scan by a different index that was compatible with the required order could be cheaper to produce the first L rows of the result set. Now for such queries the optimizer makes a choice between the cheapest ref/range accesses not compatible with the given order and index scans compatible with it.
This commit is contained in:
@ -6450,6 +6450,7 @@ void JOIN_TAB::cleanup()
|
||||
quick= 0;
|
||||
x_free(cache.buff);
|
||||
cache.buff= 0;
|
||||
limit= 0;
|
||||
if (table)
|
||||
{
|
||||
if (table->key_read)
|
||||
@ -12588,9 +12589,12 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
|
||||
{
|
||||
int ref_key;
|
||||
uint ref_key_parts;
|
||||
int order_direction;
|
||||
uint used_key_parts;
|
||||
TABLE *table=tab->table;
|
||||
SQL_SELECT *select=tab->select;
|
||||
key_map usable_keys;
|
||||
QUICK_SELECT_I *save_quick;
|
||||
DBUG_ENTER("test_if_skip_sort_order");
|
||||
LINT_INIT(ref_key_parts);
|
||||
|
||||
@ -12625,6 +12629,7 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
|
||||
else if (select && select->quick) // Range found by opt_range
|
||||
{
|
||||
int quick_type= select->quick->get_type();
|
||||
save_quick= select->quick;
|
||||
/*
|
||||
assume results are not ordered when index merge is used
|
||||
TODO: sergeyp: Results of all index merge selects actually are ordered
|
||||
@ -12644,8 +12649,6 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
|
||||
/*
|
||||
We come here when there is a REF key.
|
||||
*/
|
||||
int order_direction;
|
||||
uint used_key_parts;
|
||||
if (!usable_keys.is_set(ref_key))
|
||||
{
|
||||
/*
|
||||
@ -12706,63 +12709,30 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
|
||||
}
|
||||
/* Check if we get the rows in requested sorted order by using the key */
|
||||
if (usable_keys.is_set(ref_key) &&
|
||||
(order_direction = test_if_order_by_key(order,table,ref_key,
|
||||
&used_key_parts)))
|
||||
{
|
||||
if (order_direction == -1) // If ORDER BY ... DESC
|
||||
{
|
||||
if (select && select->quick)
|
||||
{
|
||||
/*
|
||||
Don't reverse the sort order, if it's already done.
|
||||
(In some cases test_if_order_by_key() can be called multiple times
|
||||
*/
|
||||
if (!select->quick->reverse_sorted())
|
||||
{
|
||||
QUICK_SELECT_DESC *tmp;
|
||||
int quick_type= select->quick->get_type();
|
||||
if (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE ||
|
||||
quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT ||
|
||||
quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION ||
|
||||
quick_type == QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX)
|
||||
DBUG_RETURN(0); // Use filesort
|
||||
|
||||
/* ORDER BY range_key DESC */
|
||||
tmp= new QUICK_SELECT_DESC((QUICK_RANGE_SELECT*)(select->quick),
|
||||
used_key_parts);
|
||||
if (!tmp || tmp->error)
|
||||
{
|
||||
delete tmp;
|
||||
DBUG_RETURN(0); // Reverse sort not supported
|
||||
}
|
||||
select->quick=tmp;
|
||||
}
|
||||
DBUG_RETURN(1);
|
||||
}
|
||||
if (tab->ref.key_parts < used_key_parts)
|
||||
{
|
||||
/*
|
||||
SELECT * FROM t1 WHERE a=1 ORDER BY a DESC,b DESC
|
||||
|
||||
Use a traversal function that starts by reading the last row
|
||||
with key part (A) and then traverse the index backwards.
|
||||
*/
|
||||
tab->read_first_record= join_read_last_key;
|
||||
tab->read_record.read_record= join_read_prev_same;
|
||||
/* fall through */
|
||||
}
|
||||
}
|
||||
else if (select && select->quick)
|
||||
select->quick->sorted= 1;
|
||||
DBUG_RETURN(1); /* No need to sort */
|
||||
}
|
||||
(order_direction= test_if_order_by_key(order,table,ref_key,
|
||||
&used_key_parts)))
|
||||
goto check_reverse_order;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* check if we can use a key to resolve the group */
|
||||
/* Tables using JT_NEXT are handled here */
|
||||
/*
|
||||
Check whether there is an index compatible with the given order
|
||||
usage of which is cheaper than usage of the ref_key index (ref_key>=0)
|
||||
or a table scan.
|
||||
It may be the case if ORDER/GROUP BY is used with LIMIT.
|
||||
*/
|
||||
uint nr;
|
||||
key_map keys;
|
||||
uint best_key_parts;
|
||||
int best_key_direction;
|
||||
ha_rows best_records;
|
||||
double read_time;
|
||||
int best_key= -1;
|
||||
bool is_best_covering= FALSE;
|
||||
double fanout= 1;
|
||||
JOIN *join= tab->join;
|
||||
uint tablenr= tab - join->join_tab;
|
||||
ha_rows table_records= table->file->stats.records;
|
||||
bool group= join->group && order == join->group_list;
|
||||
|
||||
/*
|
||||
filesort() and join cache are usually faster than reading in
|
||||
@ -12775,7 +12745,7 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
|
||||
resolved with a key; This is because filesort() is usually faster than
|
||||
retrieving all rows through an index.
|
||||
*/
|
||||
if (select_limit >= table->file->stats.records)
|
||||
if (select_limit >= table_records)
|
||||
{
|
||||
keys= *table->file->keys_to_use_for_scanning();
|
||||
keys.merge(table->covering_keys);
|
||||
@ -12786,38 +12756,224 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
|
||||
This is to allow users to use index in ORDER BY.
|
||||
*/
|
||||
if (table->force_index)
|
||||
keys.merge(table->keys_in_use_for_query);
|
||||
keys.merge(group ? table->keys_in_use_for_group_by :
|
||||
table->keys_in_use_for_order_by);
|
||||
keys.intersect(usable_keys);
|
||||
}
|
||||
else
|
||||
keys= usable_keys;
|
||||
|
||||
read_time= join->best_positions[tablenr].read_time;
|
||||
for (uint i= tablenr+1; i < join->tables; i++)
|
||||
fanout*= join->best_positions[i].records_read; // fanout is always >= 1
|
||||
|
||||
for (nr=0; nr < table->s->keys ; nr++)
|
||||
{
|
||||
uint not_used;
|
||||
if (keys.is_set(nr))
|
||||
int direction;
|
||||
if (keys.is_set(nr) &&
|
||||
(direction= test_if_order_by_key(order, table, nr, &used_key_parts)))
|
||||
{
|
||||
int flag;
|
||||
if ((flag= test_if_order_by_key(order, table, nr, ¬_used)))
|
||||
{
|
||||
if (!no_changes)
|
||||
{
|
||||
tab->index=nr;
|
||||
tab->read_first_record= (flag > 0 ? join_read_first:
|
||||
join_read_last);
|
||||
tab->type=JT_NEXT; // Read with index_first(), index_next()
|
||||
if (table->covering_keys.is_set(nr))
|
||||
{
|
||||
table->key_read=1;
|
||||
table->file->extra(HA_EXTRA_KEYREAD);
|
||||
}
|
||||
}
|
||||
DBUG_RETURN(1);
|
||||
}
|
||||
bool is_covering= table->covering_keys.is_set(nr) ||
|
||||
nr == table->s->primary_key &&
|
||||
table->file->primary_key_is_clustered();
|
||||
|
||||
/*
|
||||
Don't use an index scan with ORDER BY without limit.
|
||||
For GROUP BY without limit always use index scan
|
||||
if there is a suitable index.
|
||||
Why we hold to this asymmetry hardly can be explained
|
||||
rationally. It's easy to demonstrate that using
|
||||
temporary table + filesort could be cheaper for grouping
|
||||
queries too.
|
||||
*/
|
||||
if (is_covering ||
|
||||
select_limit != HA_POS_ERROR ||
|
||||
ref_key < 0 && (group || table->force_index))
|
||||
{
|
||||
double rec_per_key;
|
||||
double index_scan_time;
|
||||
KEY *keyinfo= tab->table->key_info+nr;
|
||||
if (select_limit == HA_POS_ERROR)
|
||||
select_limit= table_records;
|
||||
if (group)
|
||||
{
|
||||
rec_per_key= keyinfo->rec_per_key[used_key_parts-1];
|
||||
set_if_bigger(rec_per_key, 1);
|
||||
/*
|
||||
With a grouping query each group containing on average
|
||||
rec_per_key records produces only one row that will
|
||||
be included into the result set.
|
||||
*/
|
||||
if (select_limit > table_records/rec_per_key)
|
||||
select_limit= table_records;
|
||||
else
|
||||
select_limit= (ha_rows) (select_limit*rec_per_key);
|
||||
}
|
||||
/*
|
||||
If tab=tk is not the last joined table tn then to get first
|
||||
L records from the result set we can expect to retrieve
|
||||
only L/fanout(tk,tn) where fanout(tk,tn) says how many
|
||||
rows in the record set on average will match each row tk.
|
||||
Usually our estimates for fanouts are too pessimistic.
|
||||
So the estimate for L/fanout(tk,tn) will be too optimistic
|
||||
and as result we'll choose an index scan when using ref/range
|
||||
access + filesort will be cheaper.
|
||||
*/
|
||||
select_limit= (ha_rows) (select_limit < fanout ?
|
||||
1 : select_limit/fanout);
|
||||
/*
|
||||
We assume that each of the tested indexes is not correlated
|
||||
with ref_key. Thus, to select first N records we have to scan
|
||||
N/selectivity(ref_key) index entries.
|
||||
selectivity(ref_key) = #scanned_records/#table_records =
|
||||
table->quick_condition_rows/table_records.
|
||||
In any case we can't select more than #table_records.
|
||||
N/(table->quick_condition_rows/table_records) > table_records
|
||||
<=> N > table->quick_condition_rows.
|
||||
*/
|
||||
if (select_limit > table->quick_condition_rows)
|
||||
select_limit= table_records;
|
||||
else
|
||||
select_limit= (ha_rows) (select_limit *
|
||||
(double) table_records /
|
||||
table->quick_condition_rows);
|
||||
rec_per_key= keyinfo->rec_per_key[keyinfo->key_parts-1];
|
||||
set_if_bigger(rec_per_key, 1);
|
||||
/*
|
||||
Here we take into account the fact that rows are
|
||||
accessed in sequences rec_per_key records in each.
|
||||
Rows in such a sequence are supposed to be ordered
|
||||
by rowid/primary key. When reading the data
|
||||
in a sequence we'll touch not more pages than the
|
||||
table file contains.
|
||||
TODO. Use the formula for a disk sweep sequential access
|
||||
to calculate the cost of accessing data rows for one
|
||||
index entry.
|
||||
*/
|
||||
index_scan_time= select_limit/rec_per_key *
|
||||
min(rec_per_key, table->file->scan_time());
|
||||
if (is_covering ||
|
||||
ref_key < 0 && (group || table->force_index) ||
|
||||
index_scan_time < read_time)
|
||||
{
|
||||
ha_rows quick_records= table_records;
|
||||
if (is_best_covering && !is_covering)
|
||||
continue;
|
||||
if (table->quick_keys.is_set(nr))
|
||||
quick_records= table->quick_rows[nr];
|
||||
if (best_key < 0 ||
|
||||
(select_limit <= min(quick_records,best_records) ?
|
||||
keyinfo->key_parts < best_key_parts :
|
||||
quick_records < best_records))
|
||||
{
|
||||
best_key= nr;
|
||||
best_key_parts= keyinfo->key_parts;
|
||||
best_records= quick_records;
|
||||
is_best_covering= is_covering;
|
||||
best_key_direction= direction;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (best_key >= 0)
|
||||
{
|
||||
bool quick_created= FALSE;
|
||||
if (table->quick_keys.is_set(best_key) && best_key != ref_key)
|
||||
{
|
||||
key_map map;
|
||||
map.clear_all(); // Force the creation of quick select
|
||||
map.set_bit(best_key); // only best_key.
|
||||
quick_created=
|
||||
select->test_quick_select(join->thd, map, 0,
|
||||
join->select_options & OPTION_FOUND_ROWS ?
|
||||
HA_POS_ERROR :
|
||||
join->unit->select_limit_cnt,
|
||||
0) > 0;
|
||||
}
|
||||
if (!no_changes)
|
||||
{
|
||||
if (!quick_created)
|
||||
{
|
||||
tab->index= best_key;
|
||||
tab->read_first_record= best_key_direction > 0 ?
|
||||
join_read_first:join_read_last;
|
||||
tab->type=JT_NEXT; // Read with index_first(), index_next()
|
||||
if (table->covering_keys.is_set(best_key))
|
||||
{
|
||||
table->key_read=1;
|
||||
table->file->extra(HA_EXTRA_KEYREAD);
|
||||
}
|
||||
table->file->ha_index_or_rnd_end();
|
||||
if (join->select_options & SELECT_DESCRIBE)
|
||||
{
|
||||
tab->ref.key= -1;
|
||||
tab->ref.key_parts= 0;
|
||||
if (tab->select)
|
||||
tab->select->quick= 0;
|
||||
if (select_limit < table_records)
|
||||
tab->limit= select_limit;
|
||||
}
|
||||
}
|
||||
}
|
||||
used_key_parts= best_key_parts;
|
||||
order_direction= best_key_direction;
|
||||
}
|
||||
else
|
||||
DBUG_RETURN(0);
|
||||
}
|
||||
|
||||
check_reverse_order:
|
||||
if (order_direction == -1) // If ORDER BY ... DESC
|
||||
{
|
||||
if (select && select->quick)
|
||||
{
|
||||
/*
|
||||
Don't reverse the sort order, if it's already done.
|
||||
(In some cases test_if_order_by_key() can be called multiple times
|
||||
*/
|
||||
if (!select->quick->reverse_sorted())
|
||||
{
|
||||
QUICK_SELECT_DESC *tmp;
|
||||
int quick_type= select->quick->get_type();
|
||||
if (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE ||
|
||||
quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT ||
|
||||
quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION ||
|
||||
quick_type == QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX)
|
||||
{
|
||||
tab->limit= 0;
|
||||
select->quick= save_quick;
|
||||
DBUG_RETURN(0); // Use filesort
|
||||
}
|
||||
|
||||
/* ORDER BY range_key DESC */
|
||||
tmp= new QUICK_SELECT_DESC((QUICK_RANGE_SELECT*)(select->quick),
|
||||
used_key_parts);
|
||||
if (!tmp || tmp->error)
|
||||
{
|
||||
delete tmp;
|
||||
select->quick= save_quick;
|
||||
tab->limit= 0;
|
||||
DBUG_RETURN(0); // Reverse sort not supported
|
||||
}
|
||||
select->quick=tmp;
|
||||
}
|
||||
}
|
||||
else if (tab->ref.key >= 0 && tab->ref.key_parts < used_key_parts)
|
||||
{
|
||||
/*
|
||||
SELECT * FROM t1 WHERE a=1 ORDER BY a DESC,b DESC
|
||||
|
||||
Use a traversal function that starts by reading the last row
|
||||
with key part (A) and then traverse the index backwards.
|
||||
*/
|
||||
tab->read_first_record= join_read_last_key;
|
||||
tab->read_record.read_record= join_read_prev_same;
|
||||
}
|
||||
}
|
||||
DBUG_RETURN(0); // Can't use index.
|
||||
else if (select && select->quick)
|
||||
select->quick->sorted= 1;
|
||||
DBUG_RETURN(1);
|
||||
}
|
||||
|
||||
|
||||
@ -15524,7 +15680,7 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
|
||||
if (tab->select && tab->select->quick)
|
||||
examined_rows= tab->select->quick->records;
|
||||
else if (tab->type == JT_NEXT || tab->type == JT_ALL)
|
||||
examined_rows= tab->table->file->records();
|
||||
examined_rows= tab->limit ? tab->limit : tab->table->file->records();
|
||||
else
|
||||
examined_rows=(ha_rows)join->best_positions[i].records_read;
|
||||
|
||||
|
Reference in New Issue
Block a user