MDEV-6003: EITS: ref access, keypart2=const vs keypart2=expr - inconsistent filtered% value

- Fix table_cond_selectivity() to work correctly for ref access and "keypart2=const" case.
2025-07-29 05:21:33 +03:00 · 2014-04-25 19:04:54 +04:00
parent 182f7d76ee
commit 2acd81af73
3 changed files with 147 additions and 8 deletions
--- a/mysql-test/r/selectivity_no_engine.result
+++ b/mysql-test/r/selectivity_no_engine.result
@ -139,6 +139,46 @@ Warnings:
 Note	1003	select `test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b`,`test`.`t2`.`col1` AS `col1`,`test`.`t2`.`col2` AS `col2` from `test`.`t2` where ((`test`.`t2`.`a` in (1,2,3)) and (`test`.`t2`.`b` in (1,2,3)))
 drop table t2, t1;
 # 
 # MDEV-6003: EITS: ref access, keypart2=const vs keypart2=expr - inconsistent filtered% value
 # 
 create table t0(a int);
 insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
 create table t1 (
 kp1 int, kp2 int, 
 filler1 char(100),
 filler2 char(100),
 key(kp1, kp2)
 );
 insert into t1 
 select 
 A.a,
 B.a,
 'filler-data-1',
 'filler-data-2'
 from t0 A, t0 B, t0 C;
 set histogram_size=100;
 set use_stat_tables='preferably';
 set optimizer_use_condition_selectivity=4;
 analyze table t1 persistent for all;
 Table	Op	Msg_type	Msg_text
 test.t1	analyze	status	Engine-independent statistics collected
 test.t1	analyze	status	Table is already up to date
 # NOTE: 10*100%, 10*100% rows is ok
 explain extended select * from t0, t1 where t1.kp1=t0.a and t1.kp2=t0.a+1;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
 1	SIMPLE	t0	ALL	NULL	NULL	NULL	NULL	10	100.00	Using where
 1	SIMPLE	t1	ref	kp1	kp1	10	test.t0.a,func	10	100.00	Using index condition
 Warnings:
 Note	1003	select `test`.`t0`.`a` AS `a`,`test`.`t1`.`kp1` AS `kp1`,`test`.`t1`.`kp2` AS `kp2`,`test`.`t1`.`filler1` AS `filler1`,`test`.`t1`.`filler2` AS `filler2` from `test`.`t0` join `test`.`t1` where ((`test`.`t1`.`kp1` = `test`.`t0`.`a`) and (`test`.`t1`.`kp2` = (`test`.`t0`.`a` + 1)))
 # NOTE: t0: 10*100% is ok,  t1: 10*9.90% is bad. t1 should have 10*100%.
 explain extended select * from t0, t1 where t1.kp1=t0.a and t1.kp2=4;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
 1	SIMPLE	t0	ALL	NULL	NULL	NULL	NULL	10	100.00	Using where
 1	SIMPLE	t1	ref	kp1	kp1	10	test.t0.a,const	10	100.00	
 Warnings:
 Note	1003	select `test`.`t0`.`a` AS `a`,`test`.`t1`.`kp1` AS `kp1`,`test`.`t1`.`kp2` AS `kp2`,`test`.`t1`.`filler1` AS `filler1`,`test`.`t1`.`filler2` AS `filler2` from `test`.`t0` join `test`.`t1` where ((`test`.`t1`.`kp1` = `test`.`t0`.`a`) and (`test`.`t1`.`kp2` = 4))
 drop table t0, t1;
 # 
 # End of the test file
 # 
 set use_stat_tables= @save_use_stat_tables;
--- a/mysql-test/t/selectivity_no_engine.test
+++ b/mysql-test/t/selectivity_no_engine.test
@ -101,6 +101,36 @@ analyze table t2 persistent for all;
 explain extended select * from t2 where a in (1,2,3) and b in (1,2,3);
 drop table t2, t1;
 --echo # 
 --echo # MDEV-6003: EITS: ref access, keypart2=const vs keypart2=expr - inconsistent filtered% value
 --echo # 
 create table t0(a int);
 insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
 create table t1 (
  kp1 int, kp2 int, 
  filler1 char(100),
  filler2 char(100),
  key(kp1, kp2)
 );
 insert into t1 
 select 
  A.a,
  B.a,
  'filler-data-1',
  'filler-data-2'
 from t0 A, t0 B, t0 C;
 set histogram_size=100;
 set use_stat_tables='preferably';
 set optimizer_use_condition_selectivity=4;
 analyze table t1 persistent for all;
 --echo # NOTE: 10*100%, 10*100% rows is ok
 explain extended select * from t0, t1 where t1.kp1=t0.a and t1.kp2=t0.a+1;
 --echo # NOTE: t0: 10*100% is ok,  t1: 10*9.90% is bad. t1 should have 10*100%.
 explain extended select * from t0, t1 where t1.kp1=t0.a and t1.kp2=4;
 drop table t0, t1;
 --echo # 
 --echo # End of the test file
 --echo # 
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@ -7287,18 +7287,74 @@ double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
  if (pos->key != 0)
  {
    /* 
-      A ref access or hash join is used for this table.
+      A ref access or hash join is used for this table. ref access is created
      from
-      It could have some parts with "t.key_part=const". Using ref access
+        tbl.keypart1=expr1 AND tbl.keypart2=expr2 AND ...
      means that we will only get records where the condition holds, so we
      should remove its selectivity from the condition selectivity.
      and it will only return rows for which this condition is satisified.
      Suppose, certain expr{i} is a constant. Since ref access only returns
      rows that satisfy
         tbl.keypart{i}=const       (*)
      then selectivity of this equality should not be counted in return value 
      of this function. This function uses the value of 
         table->cond_selectivity=selectivity(COND(tbl)) (**)
      as a starting point. This value includes selectivity of equality (*). We
      should somehow discount it. 
      Looking at calculate_cond_selectivity_for_table(), one can see that that
      the value is not necessarily a direct multiplicand in 
      table->cond_selectivity
      There are three possible ways to discount
      1. There is a potential range access on t.keypart{i}=const. 
         (an important special case: the used ref access has a const prefix for
          which a range estimate is available)
      2. The field has a histogram. field[x]->cond_selectivity has the data.
      3. Use index stats on this index:
         rec_per_key[key_part+1]/rec_per_key[key_part]
      (TODO: more details about the "t.key=othertable.col" case)
    */
    KEYUSE *keyuse= pos->key;
    KEYUSE *prev_ref_keyuse= keyuse;
    uint key= keyuse->key;
-    do
+    
    /*
      Check if we have a prefix of key=const that matches a quick select.
    */
    if (!is_hash_join_key_no(key))
    {
      table_map quick_key_map= (table_map(1) << table->quick_key_parts[key]) - 1;
      if (table->quick_rows[key] && 
          !(quick_key_map & ~table->const_key_parts[key]))
      {
        /* 
          Ok, there is an equality for each of the key parts used by the
          quick select. This means, quick select's estimate can be reused to
          discount the selectivity of a prefix of a ref access.
        */
        for (; quick_key_map & 1 ; quick_key_map>>= 1)
        {
          while (keyuse->keypart == keyparts)
            keyuse++;
          keyparts++;
        }
        sel /= table->quick_rows[key] / table->stat_records();
      }
    }
    /*
      Go through the "keypart{N}=..." equalities and find those that were
      already taken into account in table->cond_selectivity.
    */
    while (keyuse->table == table && keyuse->key == key)
    {
      if (!(keyuse->used_tables & (rem_tables | table->map)))
      {
@ -7312,22 +7368,35 @@ double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
          else
 	  {
            if (keyparts == keyuse->keypart &&
-                !(~(keyuse->val->used_tables()) & pos->ref_depend_map) &&
+                !((keyuse->val->used_tables()) & ~pos->ref_depend_map) &&
                !(found_part_ref_or_null & keyuse->optimize))
 	    {
              /* Found a KEYUSE object that will be used by ref access */
              keyparts++;
              found_part_ref_or_null|= keyuse->optimize & ~KEY_OPTIMIZE_EQ;
            }
          }
          if (keyparts > keyuse->keypart)
 	  {
            /* Ok this is the keyuse that will be used for ref access */
            uint fldno;
            if (is_hash_join_key_no(key))
 	      fldno= keyuse->keypart;
            else
              fldno= table->key_info[key].key_part[keyparts-1].fieldnr - 1;
            if (keyuse->val->const_item())
-              sel*= table->field[fldno]->cond_selectivity; 
+            {              
              sel /= table->field[fldno]->cond_selectivity;
              /* 
               TODO: we could do better here:
                 1. cond_selectivity might be =1 (the default) because quick 
                    select on some index prevented us from analyzing 
                    histogram for this column.
                 2. we could get an estimate through this?
                     rec_per_key[key_part-1] / rec_per_key[key_part]
              */
            }
            if (keyparts > 1)
 	    {
              ref_keyuse_steps[keyparts-2]= keyuse - prev_ref_keyuse;
@ -7337,7 +7406,7 @@ double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
 	}
      }
      keyuse++;
-    } while (keyuse->table == table && keyuse->key == key);
+    }
  }
  else
  {