diff --git a/mysql-test/r/selectivity_no_engine.result b/mysql-test/r/selectivity_no_engine.result index 6516abbe318..0008e806a75 100644 --- a/mysql-test/r/selectivity_no_engine.result +++ b/mysql-test/r/selectivity_no_engine.result @@ -139,6 +139,46 @@ Warnings: Note 1003 select `test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b`,`test`.`t2`.`col1` AS `col1`,`test`.`t2`.`col2` AS `col2` from `test`.`t2` where ((`test`.`t2`.`a` in (1,2,3)) and (`test`.`t2`.`b` in (1,2,3))) drop table t2, t1; # +# MDEV-6003: EITS: ref access, keypart2=const vs keypart2=expr - inconsistent filtered% value +# +create table t0(a int); +insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); +create table t1 ( +kp1 int, kp2 int, +filler1 char(100), +filler2 char(100), +key(kp1, kp2) +); +insert into t1 +select +A.a, +B.a, +'filler-data-1', +'filler-data-2' +from t0 A, t0 B, t0 C; +set histogram_size=100; +set use_stat_tables='preferably'; +set optimizer_use_condition_selectivity=4; +analyze table t1 persistent for all; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status Table is already up to date +# NOTE: 10*100%, 10*100% rows is ok +explain extended select * from t0, t1 where t1.kp1=t0.a and t1.kp2=t0.a+1; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t0 ALL NULL NULL NULL NULL 10 100.00 Using where +1 SIMPLE t1 ref kp1 kp1 10 test.t0.a,func 10 100.00 Using index condition +Warnings: +Note 1003 select `test`.`t0`.`a` AS `a`,`test`.`t1`.`kp1` AS `kp1`,`test`.`t1`.`kp2` AS `kp2`,`test`.`t1`.`filler1` AS `filler1`,`test`.`t1`.`filler2` AS `filler2` from `test`.`t0` join `test`.`t1` where ((`test`.`t1`.`kp1` = `test`.`t0`.`a`) and (`test`.`t1`.`kp2` = (`test`.`t0`.`a` + 1))) +# NOTE: t0: 10*100% is ok, t1: 10*9.90% is bad. t1 should have 10*100%. +explain extended select * from t0, t1 where t1.kp1=t0.a and t1.kp2=4; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t0 ALL NULL NULL NULL NULL 10 100.00 Using where +1 SIMPLE t1 ref kp1 kp1 10 test.t0.a,const 10 100.00 +Warnings: +Note 1003 select `test`.`t0`.`a` AS `a`,`test`.`t1`.`kp1` AS `kp1`,`test`.`t1`.`kp2` AS `kp2`,`test`.`t1`.`filler1` AS `filler1`,`test`.`t1`.`filler2` AS `filler2` from `test`.`t0` join `test`.`t1` where ((`test`.`t1`.`kp1` = `test`.`t0`.`a`) and (`test`.`t1`.`kp2` = 4)) +drop table t0, t1; +# # End of the test file # set use_stat_tables= @save_use_stat_tables; diff --git a/mysql-test/t/selectivity_no_engine.test b/mysql-test/t/selectivity_no_engine.test index eb6642fb5c7..8f91f5d9261 100644 --- a/mysql-test/t/selectivity_no_engine.test +++ b/mysql-test/t/selectivity_no_engine.test @@ -101,6 +101,36 @@ analyze table t2 persistent for all; explain extended select * from t2 where a in (1,2,3) and b in (1,2,3); drop table t2, t1; + +--echo # +--echo # MDEV-6003: EITS: ref access, keypart2=const vs keypart2=expr - inconsistent filtered% value +--echo # +create table t0(a int); +insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); +create table t1 ( + kp1 int, kp2 int, + filler1 char(100), + filler2 char(100), + key(kp1, kp2) +); + +insert into t1 +select + A.a, + B.a, + 'filler-data-1', + 'filler-data-2' +from t0 A, t0 B, t0 C; +set histogram_size=100; +set use_stat_tables='preferably'; +set optimizer_use_condition_selectivity=4; +analyze table t1 persistent for all; +--echo # NOTE: 10*100%, 10*100% rows is ok +explain extended select * from t0, t1 where t1.kp1=t0.a and t1.kp2=t0.a+1; + +--echo # NOTE: t0: 10*100% is ok, t1: 10*9.90% is bad. t1 should have 10*100%. +explain extended select * from t0, t1 where t1.kp1=t0.a and t1.kp2=4; +drop table t0, t1; --echo # --echo # End of the test file --echo # diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 235de14c466..e4ea70ba47e 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -7287,18 +7287,74 @@ double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s, if (pos->key != 0) { /* - A ref access or hash join is used for this table. + A ref access or hash join is used for this table. ref access is created + from - It could have some parts with "t.key_part=const". Using ref access - means that we will only get records where the condition holds, so we - should remove its selectivity from the condition selectivity. + tbl.keypart1=expr1 AND tbl.keypart2=expr2 AND ... + and it will only return rows for which this condition is satisified. + Suppose, certain expr{i} is a constant. Since ref access only returns + rows that satisfy + + tbl.keypart{i}=const (*) + + then selectivity of this equality should not be counted in return value + of this function. This function uses the value of + + table->cond_selectivity=selectivity(COND(tbl)) (**) + + as a starting point. This value includes selectivity of equality (*). We + should somehow discount it. + + Looking at calculate_cond_selectivity_for_table(), one can see that that + the value is not necessarily a direct multiplicand in + table->cond_selectivity + + There are three possible ways to discount + 1. There is a potential range access on t.keypart{i}=const. + (an important special case: the used ref access has a const prefix for + which a range estimate is available) + + 2. The field has a histogram. field[x]->cond_selectivity has the data. + + 3. Use index stats on this index: + rec_per_key[key_part+1]/rec_per_key[key_part] + (TODO: more details about the "t.key=othertable.col" case) */ KEYUSE *keyuse= pos->key; KEYUSE *prev_ref_keyuse= keyuse; uint key= keyuse->key; - do + + /* + Check if we have a prefix of key=const that matches a quick select. + */ + if (!is_hash_join_key_no(key)) + { + table_map quick_key_map= (table_map(1) << table->quick_key_parts[key]) - 1; + if (table->quick_rows[key] && + !(quick_key_map & ~table->const_key_parts[key])) + { + /* + Ok, there is an equality for each of the key parts used by the + quick select. This means, quick select's estimate can be reused to + discount the selectivity of a prefix of a ref access. + */ + for (; quick_key_map & 1 ; quick_key_map>>= 1) + { + while (keyuse->keypart == keyparts) + keyuse++; + keyparts++; + } + sel /= table->quick_rows[key] / table->stat_records(); + } + } + + /* + Go through the "keypart{N}=..." equalities and find those that were + already taken into account in table->cond_selectivity. + */ + while (keyuse->table == table && keyuse->key == key) { if (!(keyuse->used_tables & (rem_tables | table->map))) { @@ -7312,22 +7368,35 @@ double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s, else { if (keyparts == keyuse->keypart && - !(~(keyuse->val->used_tables()) & pos->ref_depend_map) && + !((keyuse->val->used_tables()) & ~pos->ref_depend_map) && !(found_part_ref_or_null & keyuse->optimize)) { + /* Found a KEYUSE object that will be used by ref access */ keyparts++; found_part_ref_or_null|= keyuse->optimize & ~KEY_OPTIMIZE_EQ; } } + if (keyparts > keyuse->keypart) { + /* Ok this is the keyuse that will be used for ref access */ uint fldno; if (is_hash_join_key_no(key)) fldno= keyuse->keypart; else fldno= table->key_info[key].key_part[keyparts-1].fieldnr - 1; if (keyuse->val->const_item()) - sel*= table->field[fldno]->cond_selectivity; + { + sel /= table->field[fldno]->cond_selectivity; + /* + TODO: we could do better here: + 1. cond_selectivity might be =1 (the default) because quick + select on some index prevented us from analyzing + histogram for this column. + 2. we could get an estimate through this? + rec_per_key[key_part-1] / rec_per_key[key_part] + */ + } if (keyparts > 1) { ref_keyuse_steps[keyparts-2]= keyuse - prev_ref_keyuse; @@ -7337,7 +7406,7 @@ double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s, } } keyuse++; - } while (keyuse->table == table && keyuse->key == key); + } } else {