mirror of
https://github.com/MariaDB/server.git
synced 2025-07-29 05:21:33 +03:00
MDEV-6003: EITS: ref access, keypart2=const vs keypart2=expr - inconsistent filtered% value
- Fix table_cond_selectivity() to work correctly for ref access and "keypart2=const" case.
This commit is contained in:
@ -139,6 +139,46 @@ Warnings:
|
||||
Note 1003 select `test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b`,`test`.`t2`.`col1` AS `col1`,`test`.`t2`.`col2` AS `col2` from `test`.`t2` where ((`test`.`t2`.`a` in (1,2,3)) and (`test`.`t2`.`b` in (1,2,3)))
|
||||
drop table t2, t1;
|
||||
#
|
||||
# MDEV-6003: EITS: ref access, keypart2=const vs keypart2=expr - inconsistent filtered% value
|
||||
#
|
||||
create table t0(a int);
|
||||
insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
|
||||
create table t1 (
|
||||
kp1 int, kp2 int,
|
||||
filler1 char(100),
|
||||
filler2 char(100),
|
||||
key(kp1, kp2)
|
||||
);
|
||||
insert into t1
|
||||
select
|
||||
A.a,
|
||||
B.a,
|
||||
'filler-data-1',
|
||||
'filler-data-2'
|
||||
from t0 A, t0 B, t0 C;
|
||||
set histogram_size=100;
|
||||
set use_stat_tables='preferably';
|
||||
set optimizer_use_condition_selectivity=4;
|
||||
analyze table t1 persistent for all;
|
||||
Table Op Msg_type Msg_text
|
||||
test.t1 analyze status Engine-independent statistics collected
|
||||
test.t1 analyze status Table is already up to date
|
||||
# NOTE: 10*100%, 10*100% rows is ok
|
||||
explain extended select * from t0, t1 where t1.kp1=t0.a and t1.kp2=t0.a+1;
|
||||
id select_type table type possible_keys key key_len ref rows filtered Extra
|
||||
1 SIMPLE t0 ALL NULL NULL NULL NULL 10 100.00 Using where
|
||||
1 SIMPLE t1 ref kp1 kp1 10 test.t0.a,func 10 100.00 Using index condition
|
||||
Warnings:
|
||||
Note 1003 select `test`.`t0`.`a` AS `a`,`test`.`t1`.`kp1` AS `kp1`,`test`.`t1`.`kp2` AS `kp2`,`test`.`t1`.`filler1` AS `filler1`,`test`.`t1`.`filler2` AS `filler2` from `test`.`t0` join `test`.`t1` where ((`test`.`t1`.`kp1` = `test`.`t0`.`a`) and (`test`.`t1`.`kp2` = (`test`.`t0`.`a` + 1)))
|
||||
# NOTE: t0: 10*100% is ok, t1: 10*9.90% is bad. t1 should have 10*100%.
|
||||
explain extended select * from t0, t1 where t1.kp1=t0.a and t1.kp2=4;
|
||||
id select_type table type possible_keys key key_len ref rows filtered Extra
|
||||
1 SIMPLE t0 ALL NULL NULL NULL NULL 10 100.00 Using where
|
||||
1 SIMPLE t1 ref kp1 kp1 10 test.t0.a,const 10 100.00
|
||||
Warnings:
|
||||
Note 1003 select `test`.`t0`.`a` AS `a`,`test`.`t1`.`kp1` AS `kp1`,`test`.`t1`.`kp2` AS `kp2`,`test`.`t1`.`filler1` AS `filler1`,`test`.`t1`.`filler2` AS `filler2` from `test`.`t0` join `test`.`t1` where ((`test`.`t1`.`kp1` = `test`.`t0`.`a`) and (`test`.`t1`.`kp2` = 4))
|
||||
drop table t0, t1;
|
||||
#
|
||||
# End of the test file
|
||||
#
|
||||
set use_stat_tables= @save_use_stat_tables;
|
||||
|
@ -101,6 +101,36 @@ analyze table t2 persistent for all;
|
||||
explain extended select * from t2 where a in (1,2,3) and b in (1,2,3);
|
||||
|
||||
drop table t2, t1;
|
||||
|
||||
--echo #
|
||||
--echo # MDEV-6003: EITS: ref access, keypart2=const vs keypart2=expr - inconsistent filtered% value
|
||||
--echo #
|
||||
create table t0(a int);
|
||||
insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
|
||||
create table t1 (
|
||||
kp1 int, kp2 int,
|
||||
filler1 char(100),
|
||||
filler2 char(100),
|
||||
key(kp1, kp2)
|
||||
);
|
||||
|
||||
insert into t1
|
||||
select
|
||||
A.a,
|
||||
B.a,
|
||||
'filler-data-1',
|
||||
'filler-data-2'
|
||||
from t0 A, t0 B, t0 C;
|
||||
set histogram_size=100;
|
||||
set use_stat_tables='preferably';
|
||||
set optimizer_use_condition_selectivity=4;
|
||||
analyze table t1 persistent for all;
|
||||
--echo # NOTE: 10*100%, 10*100% rows is ok
|
||||
explain extended select * from t0, t1 where t1.kp1=t0.a and t1.kp2=t0.a+1;
|
||||
|
||||
--echo # NOTE: t0: 10*100% is ok, t1: 10*9.90% is bad. t1 should have 10*100%.
|
||||
explain extended select * from t0, t1 where t1.kp1=t0.a and t1.kp2=4;
|
||||
drop table t0, t1;
|
||||
--echo #
|
||||
--echo # End of the test file
|
||||
--echo #
|
||||
|
@ -7287,18 +7287,74 @@ double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
|
||||
if (pos->key != 0)
|
||||
{
|
||||
/*
|
||||
A ref access or hash join is used for this table.
|
||||
A ref access or hash join is used for this table. ref access is created
|
||||
from
|
||||
|
||||
It could have some parts with "t.key_part=const". Using ref access
|
||||
means that we will only get records where the condition holds, so we
|
||||
should remove its selectivity from the condition selectivity.
|
||||
tbl.keypart1=expr1 AND tbl.keypart2=expr2 AND ...
|
||||
|
||||
and it will only return rows for which this condition is satisified.
|
||||
Suppose, certain expr{i} is a constant. Since ref access only returns
|
||||
rows that satisfy
|
||||
|
||||
tbl.keypart{i}=const (*)
|
||||
|
||||
then selectivity of this equality should not be counted in return value
|
||||
of this function. This function uses the value of
|
||||
|
||||
table->cond_selectivity=selectivity(COND(tbl)) (**)
|
||||
|
||||
as a starting point. This value includes selectivity of equality (*). We
|
||||
should somehow discount it.
|
||||
|
||||
Looking at calculate_cond_selectivity_for_table(), one can see that that
|
||||
the value is not necessarily a direct multiplicand in
|
||||
table->cond_selectivity
|
||||
|
||||
There are three possible ways to discount
|
||||
1. There is a potential range access on t.keypart{i}=const.
|
||||
(an important special case: the used ref access has a const prefix for
|
||||
which a range estimate is available)
|
||||
|
||||
2. The field has a histogram. field[x]->cond_selectivity has the data.
|
||||
|
||||
3. Use index stats on this index:
|
||||
rec_per_key[key_part+1]/rec_per_key[key_part]
|
||||
|
||||
(TODO: more details about the "t.key=othertable.col" case)
|
||||
*/
|
||||
KEYUSE *keyuse= pos->key;
|
||||
KEYUSE *prev_ref_keyuse= keyuse;
|
||||
uint key= keyuse->key;
|
||||
do
|
||||
|
||||
/*
|
||||
Check if we have a prefix of key=const that matches a quick select.
|
||||
*/
|
||||
if (!is_hash_join_key_no(key))
|
||||
{
|
||||
table_map quick_key_map= (table_map(1) << table->quick_key_parts[key]) - 1;
|
||||
if (table->quick_rows[key] &&
|
||||
!(quick_key_map & ~table->const_key_parts[key]))
|
||||
{
|
||||
/*
|
||||
Ok, there is an equality for each of the key parts used by the
|
||||
quick select. This means, quick select's estimate can be reused to
|
||||
discount the selectivity of a prefix of a ref access.
|
||||
*/
|
||||
for (; quick_key_map & 1 ; quick_key_map>>= 1)
|
||||
{
|
||||
while (keyuse->keypart == keyparts)
|
||||
keyuse++;
|
||||
keyparts++;
|
||||
}
|
||||
sel /= table->quick_rows[key] / table->stat_records();
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
Go through the "keypart{N}=..." equalities and find those that were
|
||||
already taken into account in table->cond_selectivity.
|
||||
*/
|
||||
while (keyuse->table == table && keyuse->key == key)
|
||||
{
|
||||
if (!(keyuse->used_tables & (rem_tables | table->map)))
|
||||
{
|
||||
@ -7312,22 +7368,35 @@ double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
|
||||
else
|
||||
{
|
||||
if (keyparts == keyuse->keypart &&
|
||||
!(~(keyuse->val->used_tables()) & pos->ref_depend_map) &&
|
||||
!((keyuse->val->used_tables()) & ~pos->ref_depend_map) &&
|
||||
!(found_part_ref_or_null & keyuse->optimize))
|
||||
{
|
||||
/* Found a KEYUSE object that will be used by ref access */
|
||||
keyparts++;
|
||||
found_part_ref_or_null|= keyuse->optimize & ~KEY_OPTIMIZE_EQ;
|
||||
}
|
||||
}
|
||||
|
||||
if (keyparts > keyuse->keypart)
|
||||
{
|
||||
/* Ok this is the keyuse that will be used for ref access */
|
||||
uint fldno;
|
||||
if (is_hash_join_key_no(key))
|
||||
fldno= keyuse->keypart;
|
||||
else
|
||||
fldno= table->key_info[key].key_part[keyparts-1].fieldnr - 1;
|
||||
if (keyuse->val->const_item())
|
||||
sel*= table->field[fldno]->cond_selectivity;
|
||||
{
|
||||
sel /= table->field[fldno]->cond_selectivity;
|
||||
/*
|
||||
TODO: we could do better here:
|
||||
1. cond_selectivity might be =1 (the default) because quick
|
||||
select on some index prevented us from analyzing
|
||||
histogram for this column.
|
||||
2. we could get an estimate through this?
|
||||
rec_per_key[key_part-1] / rec_per_key[key_part]
|
||||
*/
|
||||
}
|
||||
if (keyparts > 1)
|
||||
{
|
||||
ref_keyuse_steps[keyparts-2]= keyuse - prev_ref_keyuse;
|
||||
@ -7337,7 +7406,7 @@ double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
|
||||
}
|
||||
}
|
||||
keyuse++;
|
||||
} while (keyuse->table == table && keyuse->key == key);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
Reference in New Issue
Block a user