From 2e2ac428aee28f72eaa47d5eb9b0d27262861340 Mon Sep 17 00:00:00 2001 From: "gkodinov/kgeorge@magare.gmz" <> Date: Wed, 21 Nov 2007 11:40:05 +0200 Subject: [PATCH] Bug #30788: Inconsistent retrieval of char/varchar Index lookup does not always guarantee that we can simply remove the relevant conditions from the WHERE clause. Reasons can be e.g. conversion errors, partial indexes etc. The optimizer was removing these parts of the WHERE condition without any further checking. This leads to "false positives" when using indexes. Fixed by checking the index reference conditions (using WHERE) when using indexes with sub-queries. --- mysql-test/r/subselect.result | 47 ++++++++++++++++++++++++++++------ mysql-test/r/subselect3.result | 2 +- mysql-test/t/subselect.test | 25 ++++++++++++++++++ sql/item.h | 22 +++++++++++++++- sql/sql_select.cc | 26 +++++++++++-------- 5 files changed, 101 insertions(+), 21 deletions(-) diff --git a/mysql-test/r/subselect.result b/mysql-test/r/subselect.result index be99bdb1afc..b4b6506a4de 100644 --- a/mysql-test/r/subselect.result +++ b/mysql-test/r/subselect.result @@ -1307,7 +1307,7 @@ id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t2 index NULL PRIMARY 4 NULL 4 Using where; Using index 2 DEPENDENT SUBQUERY t1 unique_subquery PRIMARY PRIMARY 4 func 1 Using where Warnings: -Note 1003 select `test`.`t2`.`a` AS `a` from `test`.`t2` where (`test`.`t2`.`a`,(((`test`.`t2`.`a`) in t1 on PRIMARY where (`test`.`t1`.`b` <> 30)))) +Note 1003 select `test`.`t2`.`a` AS `a` from `test`.`t2` where (`test`.`t2`.`a`,(((`test`.`t2`.`a`) in t1 on PRIMARY where ((`test`.`t1`.`b` <> 30) and ((`test`.`t2`.`a`) = `test`.`t1`.`a`))))) select * from t2 where t2.a in (select t1.a from t1,t3 where t1.b=t3.a); a 2 @@ -1315,8 +1315,8 @@ a explain extended select * from t2 where t2.a in (select t1.a from t1,t3 where t1.b=t3.a); id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t2 index NULL PRIMARY 4 NULL 4 Using where; Using index -2 DEPENDENT SUBQUERY t1 eq_ref PRIMARY PRIMARY 4 func 1 Using where -2 DEPENDENT SUBQUERY t3 eq_ref PRIMARY PRIMARY 4 test.t1.b 1 Using where; Using index +2 DEPENDENT SUBQUERY t1 eq_ref PRIMARY PRIMARY 4 func 1 +2 DEPENDENT SUBQUERY t3 eq_ref PRIMARY PRIMARY 4 test.t1.b 1 Using index Warnings: Note 1003 select `test`.`t2`.`a` AS `a` from `test`.`t2` where (`test`.`t2`.`a`,(select 1 AS `Not_used` from `test`.`t1` join `test`.`t3` where ((`test`.`t3`.`a` = `test`.`t1`.`b`) and ((`test`.`t2`.`a`) = `test`.`t1`.`a`)))) drop table t1, t2, t3; @@ -1334,9 +1334,9 @@ a explain extended select * from t2 where t2.a in (select a from t1); id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t2 index NULL a 5 NULL 4 Using where; Using index -2 DEPENDENT SUBQUERY t1 index_subquery a a 5 func 1001 Using index +2 DEPENDENT SUBQUERY t1 index_subquery a a 5 func 1001 Using index; Using where Warnings: -Note 1003 select `test`.`t2`.`a` AS `a` from `test`.`t2` where (`test`.`t2`.`a`,(((`test`.`t2`.`a`) in t1 on a))) +Note 1003 select `test`.`t2`.`a` AS `a` from `test`.`t2` where (`test`.`t2`.`a`,(((`test`.`t2`.`a`) in t1 on a where ((`test`.`t2`.`a`) = `test`.`t1`.`a`)))) select * from t2 where t2.a in (select a from t1 where t1.b <> 30); a 2 @@ -1346,7 +1346,7 @@ id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t2 index NULL a 5 NULL 4 Using where; Using index 2 DEPENDENT SUBQUERY t1 index_subquery a a 5 func 1001 Using index; Using where Warnings: -Note 1003 select `test`.`t2`.`a` AS `a` from `test`.`t2` where (`test`.`t2`.`a`,(((`test`.`t2`.`a`) in t1 on a where (`test`.`t1`.`b` <> 30)))) +Note 1003 select `test`.`t2`.`a` AS `a` from `test`.`t2` where (`test`.`t2`.`a`,(((`test`.`t2`.`a`) in t1 on a where ((`test`.`t1`.`b` <> 30) and ((`test`.`t2`.`a`) = `test`.`t1`.`a`))))) select * from t2 where t2.a in (select t1.a from t1,t3 where t1.b=t3.a); a 2 @@ -1373,7 +1373,7 @@ id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t2 index NULL a 5 NULL 4 Using where; Using index 2 DEPENDENT SUBQUERY t1 index_subquery a a 5 func 1001 Using index; Using where Warnings: -Note 1003 select `test`.`t2`.`a` AS `a` from `test`.`t2` where (`test`.`t2`.`a`,(((`test`.`t2`.`a`) in t1 on a where (`test`.`t1`.`b` <> 30)))) +Note 1003 select `test`.`t2`.`a` AS `a` from `test`.`t2` where (`test`.`t2`.`a`,(((`test`.`t2`.`a`) in t1 on a where ((`test`.`t1`.`b` <> 30) and ((`test`.`t2`.`a`) = `test`.`t1`.`a`))))) drop table t1, t2, t3; create table t1 (a int, b int); create table t2 (a int, b int); @@ -1737,7 +1737,7 @@ id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t1 ALL NULL NULL NULL NULL 12 Using where 2 DEPENDENT SUBQUERY t1 unique_subquery PRIMARY PRIMARY 4 func 1 Using index; Using where Warnings: -Note 1003 select `test`.`t1`.`id` AS `id`,`test`.`t1`.`text` AS `text` from `test`.`t1` where (not((`test`.`t1`.`id`,(((`test`.`t1`.`id`) in t1 on PRIMARY where (`test`.`t1`.`id` < 8)))))) +Note 1003 select `test`.`t1`.`id` AS `id`,`test`.`t1`.`text` AS `text` from `test`.`t1` where (not((`test`.`t1`.`id`,(((`test`.`t1`.`id`) in t1 on PRIMARY where ((`test`.`t1`.`id` < 8) and ((`test`.`t1`.`id`) = `test`.`t1`.`id`))))))) explain extended select * from t1 as tt where not exists (select id from t1 where id < 8 and (id = tt.id or id is null) having id is not null); id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY tt ALL NULL NULL NULL NULL 12 Using where @@ -4139,4 +4139,35 @@ SELECT (SELECT SUM(t1.a) FROM t2 WHERE a=1) FROM t1; (SELECT SUM(t1.a) FROM t2 WHERE a=1) 3 DROP TABLE t1,t2; +CREATE TABLE t1 (a CHAR(1), b VARCHAR(10)); +INSERT INTO t1 VALUES ('a', 'aa'); +INSERT INTO t1 VALUES ('a', 'aaa'); +SELECT a,b FROM t1 WHERE b IN (SELECT a FROM t1); +a b +CREATE INDEX I1 ON t1 (a); +CREATE INDEX I2 ON t1 (b); +EXPLAIN SELECT a,b FROM t1 WHERE b IN (SELECT a FROM t1); +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t1 ALL NULL NULL NULL NULL 2 Using where +2 DEPENDENT SUBQUERY t1 index_subquery I1 I1 2 func 2 Using index; Using where +SELECT a,b FROM t1 WHERE b IN (SELECT a FROM t1); +a b +CREATE TABLE t2 (a VARCHAR(1), b VARCHAR(10)); +INSERT INTO t2 SELECT * FROM t1; +CREATE INDEX I1 ON t2 (a); +CREATE INDEX I2 ON t2 (b); +EXPLAIN SELECT a,b FROM t2 WHERE b IN (SELECT a FROM t2); +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t2 ALL NULL NULL NULL NULL 2 Using where +2 DEPENDENT SUBQUERY t2 index_subquery I1 I1 4 func 2 Using index; Using where +SELECT a,b FROM t2 WHERE b IN (SELECT a FROM t2); +a b +EXPLAIN +SELECT a,b FROM t1 WHERE b IN (SELECT a FROM t1 WHERE LENGTH(a)<500); +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t1 ALL NULL NULL NULL NULL 2 Using where +2 DEPENDENT SUBQUERY t1 index_subquery I1 I1 2 func 2 Using index; Using where +SELECT a,b FROM t1 WHERE b IN (SELECT a FROM t1 WHERE LENGTH(a)<500); +a b +DROP TABLE t1,t2; End of 5.0 tests. diff --git a/mysql-test/r/subselect3.result b/mysql-test/r/subselect3.result index 098dacc8189..bdf00e4c307 100644 --- a/mysql-test/r/subselect3.result +++ b/mysql-test/r/subselect3.result @@ -757,6 +757,6 @@ a EXPLAIN SELECT a FROM t1 WHERE a NOT IN (SELECT a FROM t2); id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t1 ALL NULL NULL NULL NULL 4 Using where -2 DEPENDENT SUBQUERY t2 unique_subquery PRIMARY PRIMARY 4 func 1 Using index +2 DEPENDENT SUBQUERY t2 unique_subquery PRIMARY PRIMARY 4 func 1 Using index; Using where DROP TABLE t1; End of 5.0 tests diff --git a/mysql-test/t/subselect.test b/mysql-test/t/subselect.test index d076ca6bd33..9154e67c4fb 100644 --- a/mysql-test/t/subselect.test +++ b/mysql-test/t/subselect.test @@ -2987,4 +2987,29 @@ SELECT (SELECT SUM(t1.a) FROM t2 WHERE a!=0) FROM t1; SELECT (SELECT SUM(t1.a) FROM t2 WHERE a=1) FROM t1; DROP TABLE t1,t2; +# +# Bug #30788: Inconsistent retrieval of char/varchar +# + +CREATE TABLE t1 (a CHAR(1), b VARCHAR(10)); +INSERT INTO t1 VALUES ('a', 'aa'); +INSERT INTO t1 VALUES ('a', 'aaa'); +SELECT a,b FROM t1 WHERE b IN (SELECT a FROM t1); +CREATE INDEX I1 ON t1 (a); +CREATE INDEX I2 ON t1 (b); +EXPLAIN SELECT a,b FROM t1 WHERE b IN (SELECT a FROM t1); +SELECT a,b FROM t1 WHERE b IN (SELECT a FROM t1); + +CREATE TABLE t2 (a VARCHAR(1), b VARCHAR(10)); +INSERT INTO t2 SELECT * FROM t1; +CREATE INDEX I1 ON t2 (a); +CREATE INDEX I2 ON t2 (b); +EXPLAIN SELECT a,b FROM t2 WHERE b IN (SELECT a FROM t2); +SELECT a,b FROM t2 WHERE b IN (SELECT a FROM t2); +EXPLAIN +SELECT a,b FROM t1 WHERE b IN (SELECT a FROM t1 WHERE LENGTH(a)<500); +SELECT a,b FROM t1 WHERE b IN (SELECT a FROM t1 WHERE LENGTH(a)<500); + +DROP TABLE t1,t2; + --echo End of 5.0 tests. diff --git a/sql/item.h b/sql/item.h index b611c59b8f1..ba65014d5e6 100644 --- a/sql/item.h +++ b/sql/item.h @@ -2452,8 +2452,18 @@ class Item_cache: public Item protected: Item *example; table_map used_table_map; + /* + Field that this object will get value from. This is set/used by + index-based subquery engines to detect and remove the equality injected + by IN->EXISTS transformation. + For all other uses of Item_cache, cached_field doesn't matter. + */ + Field *cached_field; public: - Item_cache(): example(0), used_table_map(0) {fixed= 1; null_value= 1;} + Item_cache(): example(0), used_table_map(0), cached_field(0) + { + fixed= 1; null_value= 1; + } void set_used_tables(table_map map) { used_table_map= map; } @@ -2465,6 +2475,8 @@ public: decimals= item->decimals; collation.set(item->collation); unsigned_flag= item->unsigned_flag; + if (item->type() == FIELD_ITEM) + cached_field= ((Item_field *)item)->field; return 0; }; virtual void store(Item *)= 0; @@ -2475,6 +2487,14 @@ public: // to prevent drop fixed flag (no need parent cleanup call) void cleanup() {} void print(String *str); + bool eq_def(Field *field) + { + return cached_field ? cached_field->eq_def (field) : FALSE; + } + bool eq(const Item *item, bool binary_cmp) const + { + return this == item; + } }; diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 7af39071561..211ac416c6d 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -217,6 +217,7 @@ static void select_describe(JOIN *join, bool need_tmp_table,bool need_order, bool distinct, const char *message=NullS); static Item *remove_additional_cond(Item* conds); static void add_group_and_distinct_keys(JOIN *join, JOIN_TAB *join_tab); +static bool test_if_ref(Item_field *left_item,Item *right_item); /* @@ -673,9 +674,6 @@ err: without "checking NULL", remove the predicates that were pushed down into the subquery. - We can remove the equalities that will be guaranteed to be true by the - fact that subquery engine will be using index lookup. - If the subquery compares scalar values, we can remove the condition that was wrapped into trig_cond (it will be checked when needed by the subquery engine) @@ -685,6 +683,12 @@ err: and non-NULL values, we'll do a full table scan and will rely on the equalities corresponding to non-NULL parts of left tuple to filter out non-matching records. + + TODO: We can remove the equalities that will be guaranteed to be true by the + fact that subquery engine will be using index lookup. This must be done only + for cases where there are no conversion errors of significance, e.g. 257 + that is searched in a byte. But this requires homogenization of the return + codes of all Field*::store() methods. */ void JOIN::remove_subq_pushed_predicates(Item **where) @@ -692,17 +696,13 @@ void JOIN::remove_subq_pushed_predicates(Item **where) if (conds->type() == Item::FUNC_ITEM && ((Item_func *)this->conds)->functype() == Item_func::EQ_FUNC && ((Item_func *)conds)->arguments()[0]->type() == Item::REF_ITEM && - ((Item_func *)conds)->arguments()[1]->type() == Item::FIELD_ITEM) + ((Item_func *)conds)->arguments()[1]->type() == Item::FIELD_ITEM && + test_if_ref ((Item_field *)((Item_func *)conds)->arguments()[1], + ((Item_func *)conds)->arguments()[0])) { *where= 0; return; } - if (conds->type() == Item::COND_ITEM && - ((class Item_func *)this->conds)->functype() == - Item_func::COND_AND_FUNC) - { - *where= remove_additional_cond(conds); - } } @@ -1219,7 +1219,7 @@ JOIN::optimize() { if (!having) { - Item *where= 0; + Item *where= conds; if (join_tab[0].type == JT_EQ_REF && join_tab[0].ref.items[0]->name == in_left_expr_name) { @@ -11862,8 +11862,12 @@ static bool test_if_ref(Item_field *left_item,Item *right_item) Item *ref_item=part_of_refkey(field->table,field); if (ref_item && ref_item->eq(right_item,1)) { + right_item= right_item->real_item(); if (right_item->type() == Item::FIELD_ITEM) return (field->eq_def(((Item_field *) right_item)->field)); + /* remove equalities injected by IN->EXISTS transformation */ + else if (right_item->type() == Item::CACHE_ITEM) + return ((Item_cache *)right_item)->eq_def (field); if (right_item->const_item() && !(right_item->is_null())) { /*