mirror of
https://github.com/MariaDB/server.git
synced 2025-07-30 16:24:05 +03:00
MDEV-5992: EITS: Selectivity of non-indexed condition is counted twice in table's fanout
MDEV-5984: EITS: Incorrect filtered% value for single-table select with range access - Fix calculate_cond_selectivity_for_table() to work correctly with range accesses over multi-component keys: = First, take selectivity of all possible range scans into account. Remember which fields were used bt the range scans. = Then, calculate selectivity produced by sargable predicates on fields. If a field was used in a possible range access, assume its selectivity is already taken into account. - Fix table_cond_selectivity(): when quick select is used, selectivity of COND(table) is taken into account in matching_candidates_in_table(). In table_cond_selectivity() we should not apply it for the second time.
This commit is contained in:
@ -118,6 +118,27 @@ Warnings:
|
|||||||
Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1`.`col1` <= <cache>(-(1)))
|
Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1`.`col1` <= <cache>(-(1)))
|
||||||
drop table t1, t2;
|
drop table t1, t2;
|
||||||
#
|
#
|
||||||
|
# MDEV-5984: EITS: Incorrect filtered% value for single-table select with range access
|
||||||
|
#
|
||||||
|
create table t1(a int);
|
||||||
|
insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
|
||||||
|
create table t2 (a int, b int, col1 varchar(64), col2 varchar(64), key(a,b));
|
||||||
|
insert into t2 select A.a+10*B.a, C.a+10*D.a, 'filler-data1', 'filler-data2' from t1 A, t1 B, t1 C, t1 D;
|
||||||
|
set histogram_size=100;
|
||||||
|
set optimizer_use_condition_selectivity=4;
|
||||||
|
set use_stat_tables='preferably';
|
||||||
|
analyze table t2 persistent for all;
|
||||||
|
Table Op Msg_type Msg_text
|
||||||
|
test.t2 analyze status Engine-independent statistics collected
|
||||||
|
test.t2 analyze status Table is already up to date
|
||||||
|
# This must show filtered=100%:
|
||||||
|
explain extended select * from t2 where a in (1,2,3) and b in (1,2,3);
|
||||||
|
id select_type table type possible_keys key key_len ref rows filtered Extra
|
||||||
|
1 SIMPLE t2 range a a 10 NULL 9 100.00 Using index condition
|
||||||
|
Warnings:
|
||||||
|
Note 1003 select `test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b`,`test`.`t2`.`col1` AS `col1`,`test`.`t2`.`col2` AS `col2` from `test`.`t2` where ((`test`.`t2`.`a` in (1,2,3)) and (`test`.`t2`.`b` in (1,2,3)))
|
||||||
|
drop table t2, t1;
|
||||||
|
#
|
||||||
# End of the test file
|
# End of the test file
|
||||||
#
|
#
|
||||||
set use_stat_tables= @save_use_stat_tables;
|
set use_stat_tables= @save_use_stat_tables;
|
||||||
|
@ -84,7 +84,23 @@ explain extended select * from t1 where col1 in (-1,-2,-3);
|
|||||||
explain extended select * from t1 where col1<=-1;
|
explain extended select * from t1 where col1<=-1;
|
||||||
drop table t1, t2;
|
drop table t1, t2;
|
||||||
|
|
||||||
|
--echo #
|
||||||
|
--echo # MDEV-5984: EITS: Incorrect filtered% value for single-table select with range access
|
||||||
|
--echo #
|
||||||
|
create table t1(a int);
|
||||||
|
insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
|
||||||
|
|
||||||
|
create table t2 (a int, b int, col1 varchar(64), col2 varchar(64), key(a,b));
|
||||||
|
insert into t2 select A.a+10*B.a, C.a+10*D.a, 'filler-data1', 'filler-data2' from t1 A, t1 B, t1 C, t1 D;
|
||||||
|
|
||||||
|
set histogram_size=100;
|
||||||
|
set optimizer_use_condition_selectivity=4;
|
||||||
|
set use_stat_tables='preferably';
|
||||||
|
analyze table t2 persistent for all;
|
||||||
|
--echo # This must show filtered=100%:
|
||||||
|
explain extended select * from t2 where a in (1,2,3) and b in (1,2,3);
|
||||||
|
|
||||||
|
drop table t2, t1;
|
||||||
--echo #
|
--echo #
|
||||||
--echo # End of the test file
|
--echo # End of the test file
|
||||||
--echo #
|
--echo #
|
||||||
|
138
sql/opt_range.cc
138
sql/opt_range.cc
@ -3390,6 +3390,17 @@ double records_in_column_ranges(PARAM *param, uint idx,
|
|||||||
on the rows of 'table' in the processed query.
|
on the rows of 'table' in the processed query.
|
||||||
The calculated selectivity is assigned to the field table->cond_selectivity.
|
The calculated selectivity is assigned to the field table->cond_selectivity.
|
||||||
|
|
||||||
|
Selectivity is calculated as a product of selectivities imposed by:
|
||||||
|
|
||||||
|
1. possible range accesses. (if multiple range accesses use the same
|
||||||
|
restrictions on the same field, we make adjustments for that)
|
||||||
|
2. Sargable conditions on fields for which we have column statistics (if
|
||||||
|
a field is used in a possible range access, we assume that selectivity
|
||||||
|
is already provided by the range access' estimates)
|
||||||
|
3. Reading a few records from the table pages and checking the condition
|
||||||
|
selectivity (this is used for conditions like "column LIKE '%val%'"
|
||||||
|
where approaches #1 and #2 do not provide selectivity data).
|
||||||
|
|
||||||
NOTE
|
NOTE
|
||||||
Currently the selectivities of range conditions over different columns are
|
Currently the selectivities of range conditions over different columns are
|
||||||
considered independent.
|
considered independent.
|
||||||
@ -3415,14 +3426,90 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item *cond)
|
|||||||
if (table->pos_in_table_list->schema_table)
|
if (table->pos_in_table_list->schema_table)
|
||||||
DBUG_RETURN(FALSE);
|
DBUG_RETURN(FALSE);
|
||||||
|
|
||||||
|
MY_BITMAP handled_columns;
|
||||||
|
my_bitmap_map* buf;
|
||||||
|
if (!(buf= (my_bitmap_map*)thd->alloc(table->s->column_bitmap_size)))
|
||||||
|
DBUG_RETURN(TRUE);
|
||||||
|
my_bitmap_init(&handled_columns, buf, table->s->fields, FALSE);
|
||||||
|
|
||||||
|
/*
|
||||||
|
First, take into account possible range accesses.
|
||||||
|
range access estimates are the most precise, we prefer them to any other
|
||||||
|
estimate sources.
|
||||||
|
*/
|
||||||
|
|
||||||
|
for (keynr= 0; keynr < table->s->keys; keynr++)
|
||||||
|
{
|
||||||
|
if (table->quick_keys.is_set(keynr))
|
||||||
|
set_if_bigger(max_quick_key_parts, table->quick_key_parts[keynr]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
Walk through all indexes, indexes where range access uses more keyparts
|
||||||
|
go first.
|
||||||
|
*/
|
||||||
|
for (uint quick_key_parts= max_quick_key_parts;
|
||||||
|
quick_key_parts; quick_key_parts--)
|
||||||
|
{
|
||||||
|
for (keynr= 0; keynr < table->s->keys; keynr++)
|
||||||
|
{
|
||||||
|
if (table->quick_keys.is_set(keynr) &&
|
||||||
|
table->quick_key_parts[keynr] == quick_key_parts)
|
||||||
|
{
|
||||||
|
uint i;
|
||||||
|
uint used_key_parts= table->quick_key_parts[keynr];
|
||||||
|
double quick_cond_selectivity= table->quick_rows[keynr] /
|
||||||
|
table_records;
|
||||||
|
KEY *key_info= table->key_info + keynr;
|
||||||
|
KEY_PART_INFO* key_part= key_info->key_part;
|
||||||
|
/*
|
||||||
|
Suppose, there are range conditions on two keys
|
||||||
|
KEY1 (col1, col2)
|
||||||
|
KEY2 (col3, col2)
|
||||||
|
|
||||||
|
we don't want to count selectivity of condition on col2 twice.
|
||||||
|
|
||||||
|
First, find the longest key prefix that's made of columns whose
|
||||||
|
selectivity wasn't already accounted for.
|
||||||
|
*/
|
||||||
|
for (i= 0; i < used_key_parts; i++, key_part++)
|
||||||
|
{
|
||||||
|
if (bitmap_is_set(&handled_columns, key_part->fieldnr-1))
|
||||||
|
break;
|
||||||
|
bitmap_set_bit(&handled_columns, key_part->fieldnr-1);
|
||||||
|
}
|
||||||
|
if (i)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
There is at least 1-column prefix of columns whose selectivity has
|
||||||
|
not yet been accounted for.
|
||||||
|
*/
|
||||||
|
table->cond_selectivity*= quick_cond_selectivity;
|
||||||
|
if (i != used_key_parts)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
Range access got us estimate for #used_key_parts.
|
||||||
|
We need estimate for #(i-1) key parts.
|
||||||
|
*/
|
||||||
|
double f1= key_info->actual_rec_per_key(i-1);
|
||||||
|
double f2= key_info->actual_rec_per_key(i);
|
||||||
|
table->cond_selectivity*= f1 / f2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
Second step: calculate the selectivity of the range conditions not
|
||||||
|
supported by any index
|
||||||
|
*/
|
||||||
|
bitmap_subtract(used_fields, &handled_columns);
|
||||||
|
/* no need to do: my_bitmap_free(&handled_columns); */
|
||||||
|
|
||||||
if (thd->variables.optimizer_use_condition_selectivity > 2 &&
|
if (thd->variables.optimizer_use_condition_selectivity > 2 &&
|
||||||
!bitmap_is_clear_all(used_fields))
|
!bitmap_is_clear_all(used_fields))
|
||||||
{
|
{
|
||||||
/*
|
|
||||||
Calculate the selectivity of the range conditions not supported
|
|
||||||
by any index
|
|
||||||
*/
|
|
||||||
|
|
||||||
PARAM param;
|
PARAM param;
|
||||||
MEM_ROOT alloc;
|
MEM_ROOT alloc;
|
||||||
SEL_TREE *tree;
|
SEL_TREE *tree;
|
||||||
@ -3509,47 +3596,8 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item *cond)
|
|||||||
|
|
||||||
bitmap_clear_all(used_fields);
|
bitmap_clear_all(used_fields);
|
||||||
|
|
||||||
for (keynr= 0; keynr < table->s->keys; keynr++)
|
|
||||||
{
|
|
||||||
if (table->quick_keys.is_set(keynr))
|
|
||||||
set_if_bigger(max_quick_key_parts, table->quick_key_parts[keynr]);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (uint quick_key_parts= max_quick_key_parts;
|
/* Check if we can improve selectivity estimates by using sampling */
|
||||||
quick_key_parts; quick_key_parts--)
|
|
||||||
{
|
|
||||||
for (keynr= 0; keynr < table->s->keys; keynr++)
|
|
||||||
{
|
|
||||||
if (table->quick_keys.is_set(keynr) &&
|
|
||||||
table->quick_key_parts[keynr] == quick_key_parts)
|
|
||||||
{
|
|
||||||
uint i;
|
|
||||||
uint used_key_parts= table->quick_key_parts[keynr];
|
|
||||||
double quick_cond_selectivity= table->quick_rows[keynr] /
|
|
||||||
table_records;
|
|
||||||
KEY *key_info= table->key_info + keynr;
|
|
||||||
KEY_PART_INFO* key_part= key_info->key_part;
|
|
||||||
for (i= 0; i < used_key_parts; i++, key_part++)
|
|
||||||
{
|
|
||||||
if (bitmap_is_set(used_fields, key_part->fieldnr-1))
|
|
||||||
break;
|
|
||||||
bitmap_set_bit(used_fields, key_part->fieldnr-1);
|
|
||||||
}
|
|
||||||
if (i)
|
|
||||||
{
|
|
||||||
table->cond_selectivity*= quick_cond_selectivity;
|
|
||||||
if (i != used_key_parts)
|
|
||||||
{
|
|
||||||
double f1= key_info->actual_rec_per_key(i-1);
|
|
||||||
double f2= key_info->actual_rec_per_key(i);
|
|
||||||
table->cond_selectivity*= f1 / f2;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Calculate selectivity of probably highly selective predicates */
|
|
||||||
ulong check_rows=
|
ulong check_rows=
|
||||||
MY_MIN(thd->variables.optimizer_selectivity_sampling_limit,
|
MY_MIN(thd->variables.optimizer_selectivity_sampling_limit,
|
||||||
(ulong) (table_records * SELECTIVITY_SAMPLING_SHARE));
|
(ulong) (table_records * SELECTIVITY_SAMPLING_SHARE));
|
||||||
|
@ -5557,7 +5557,20 @@ void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Estimate of the number matching candidates in the joined table */
|
/*
|
||||||
|
Estimate how many records we will get if we read just this table and apply
|
||||||
|
a part of WHERE that can be checked for it.
|
||||||
|
|
||||||
|
@detail
|
||||||
|
Estimate how many records we will get if we
|
||||||
|
- read the given table with its "independent" access method (either quick
|
||||||
|
select or full table/index scan),
|
||||||
|
- apply the part of WHERE that refers only to this table.
|
||||||
|
|
||||||
|
@seealso
|
||||||
|
table_cond_selectivity() produces selectivity of condition that is checked
|
||||||
|
after joining rows from this table to rows from preceding tables.
|
||||||
|
*/
|
||||||
|
|
||||||
inline
|
inline
|
||||||
double matching_candidates_in_table(JOIN_TAB *s, bool with_found_constraint,
|
double matching_candidates_in_table(JOIN_TAB *s, bool with_found_constraint,
|
||||||
@ -7236,14 +7249,25 @@ double table_multi_eq_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
@brief
|
@brief
|
||||||
Get the selectivity of conditions when joining a table
|
Get the selectivity of conditions when joining a table
|
||||||
|
|
||||||
@param join The optimized join
|
@param join The optimized join
|
||||||
@param s The table to be joined for evaluation
|
@param s The table to be joined for evaluation
|
||||||
@param rem_tables The bitmap of tables to be joined later
|
@param rem_tables The bitmap of tables to be joined later
|
||||||
|
|
||||||
|
@detail
|
||||||
|
Get selectivity of conditions that can be applied when joining this table
|
||||||
|
with previous tables.
|
||||||
|
|
||||||
|
For quick selects and full table scans, selectivity of COND(this_table)
|
||||||
|
is accounted for in matching_candidates_in_table(). Here, we only count
|
||||||
|
selectivity of COND(this_table, previous_tables).
|
||||||
|
|
||||||
|
For other access methods, we need to calculate selectivity of the whole
|
||||||
|
condition, "COND(this_table) AND COND(this_table, previous_tables)".
|
||||||
|
|
||||||
@retval
|
@retval
|
||||||
selectivity of the conditions imposed on the rows of s
|
selectivity of the conditions imposed on the rows of s
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static
|
static
|
||||||
@ -7255,22 +7279,21 @@ double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
|
|||||||
TABLE *table= s->table;
|
TABLE *table= s->table;
|
||||||
MY_BITMAP *read_set= table->read_set;
|
MY_BITMAP *read_set= table->read_set;
|
||||||
double sel= s->table->cond_selectivity;
|
double sel= s->table->cond_selectivity;
|
||||||
double table_records= table->stat_records();
|
|
||||||
POSITION *pos= &join->positions[idx];
|
POSITION *pos= &join->positions[idx];
|
||||||
uint keyparts= 0;
|
uint keyparts= 0;
|
||||||
uint found_part_ref_or_null= 0;
|
uint found_part_ref_or_null= 0;
|
||||||
|
|
||||||
/* Discount the selectivity of the access method used to join table s */
|
if (pos->key != 0)
|
||||||
if (s->quick && s->quick->index != MAX_KEY)
|
|
||||||
{
|
{
|
||||||
if (pos->key == 0 && table_records > 0)
|
/*
|
||||||
{
|
A ref access or hash join is used for this table.
|
||||||
sel/= table->quick_rows[s->quick->index]/table_records;
|
|
||||||
}
|
It could have some parts with "t.key_part=const". Using ref access
|
||||||
}
|
means that we will only get records where the condition holds, so we
|
||||||
else if (pos->key != 0)
|
should remove its selectivity from the condition selectivity.
|
||||||
{
|
|
||||||
/* A ref/ access or hash join is used to join table */
|
(TODO: more details about the "t.key=othertable.col" case)
|
||||||
|
*/
|
||||||
KEYUSE *keyuse= pos->key;
|
KEYUSE *keyuse= pos->key;
|
||||||
KEYUSE *prev_ref_keyuse= keyuse;
|
KEYUSE *prev_ref_keyuse= keyuse;
|
||||||
uint key= keyuse->key;
|
uint key= keyuse->key;
|
||||||
@ -7317,9 +7340,14 @@ double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
|
The table is accessed with full table scan, or quick select.
|
||||||
|
Selectivity of COND(table) is already accounted for in
|
||||||
|
matching_candidates_in_table().
|
||||||
|
*/
|
||||||
sel= 1;
|
sel= 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
If the field f from the table is equal to a field from one the
|
If the field f from the table is equal to a field from one the
|
||||||
earlier joined tables then the selectivity of the range conditions
|
earlier joined tables then the selectivity of the range conditions
|
||||||
|
Reference in New Issue
Block a user