1
0
mirror of https://github.com/MariaDB/server.git synced 2025-07-29 05:21:33 +03:00

BUG#21282: Incorrect query results for "t.key NOT IN (<big const list>)

In fix for BUG#15872, a condition of type "t.key NOT IN (c1, .... cN)"
where N>1000, was incorrectly converted to
  (-inf < X < c_min) OR (c_max < X)
Now this conversion is removed, we dont produce any range lists for such
conditions.
This commit is contained in:
sergefp@mysql.com
2006-08-15 21:08:22 +04:00
parent b5f814abed
commit 1230f3ad38
3 changed files with 95 additions and 63 deletions

View File

@ -838,3 +838,25 @@ select a, hex(filler) from t1 where a not between 'b' and 'b';
a hex(filler)
a 0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
drop table t1,t2,t3;
create table t1 (a int);
insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t2 (a int, key(a));
insert into t2 select 2*(A.a + 10*(B.a + 10*C.a)) from t1 A, t1 B, t1 C;
set @a="select * from t2 force index (a) where a NOT IN(0";
select count(*) from (select @a:=concat(@a, ',', a) from t2 ) Z;
count(*)
1000
set @a=concat(@a, ')');
insert into t2 values (11),(13),(15);
set @b= concat("explain ", @a);
prepare stmt1 from @b;
execute stmt1;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 index a a 5 NULL 1003 Using where; Using index
prepare stmt1 from @a;
execute stmt1;
a
11
13
15
drop table t1, t2;

View File

@ -656,3 +656,28 @@ explain select * from t1 where a not between 'b' and 'b';
select a, hex(filler) from t1 where a not between 'b' and 'b';
drop table t1,t2,t3;
#
# BUG#21282
#
create table t1 (a int);
insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t2 (a int, key(a));
insert into t2 select 2*(A.a + 10*(B.a + 10*C.a)) from t1 A, t1 B, t1 C;
set @a="select * from t2 force index (a) where a NOT IN(0";
select count(*) from (select @a:=concat(@a, ',', a) from t2 ) Z;
set @a=concat(@a, ')');
insert into t2 values (11),(13),(15);
set @b= concat("explain ", @a);
prepare stmt1 from @b;
execute stmt1;
prepare stmt1 from @a;
execute stmt1;
drop table t1, t2;
# End of 5.0 tests

View File

@ -3608,41 +3608,33 @@ static SEL_TREE *get_func_mm_tree(PARAM *param, Item_func *cond_func,
if (func->array && func->cmp_type != ROW_RESULT)
{
/*
We get here for conditions in form "t.key NOT IN (c1, c2, ...)"
(where c{i} are constants).
Our goal is to produce a SEL_ARG graph that represents intervals:
We get here for conditions in form "t.key NOT IN (c1, c2, ...)",
where c{i} are constants. Our goal is to produce a SEL_TREE that
represents intervals:
($MIN<t.key<c1) OR (c1<t.key<c2) OR (c2<t.key<c3) OR ... (*)
where $MIN is either "-inf" or NULL.
The most straightforward way to handle NOT IN would be to convert
it to "(t.key != c1) AND (t.key != c2) AND ..." and let the range
optimizer to build SEL_ARG graph from that. However that will cause
the range optimizer to use O(N^2) memory (it's a bug, not filed),
and people do use big NOT IN lists (see BUG#15872). Also, for big
NOT IN lists constructing/using graph (*) does not make the query
faster.
The most straightforward way to produce it is to convert NOT IN
into "(t.key != c1) AND (t.key != c2) AND ... " and let the range
analyzer to build SEL_TREE from that. The problem is that the
range analyzer will use O(N^2) memory (which is probably a bug),
and people do use big NOT IN lists (e.g. see BUG#15872, BUG#21282),
will run out of memory.
So, we will handle NOT IN manually in the following way:
* if the number of entries in the NOT IN list is less then
NOT_IN_IGNORE_THRESHOLD, we will construct SEL_ARG graph (*)
manually.
* Otherwise, we will construct a smaller graph: for
"t.key NOT IN (c1,...cN)" we construct a graph representing
($MIN < t.key) OR (cN < t.key) // here sequence of c_i is
// ordered.
Another problem with big lists like (*) is that a big list is
unlikely to produce a good "range" access, while considering that
range access will require expensive CPU calculations (and for
MyISAM even index accesses). In short, big NOT IN lists are rarely
worth analyzing.
A note about partially-covering indexes: for those (e.g. for
"a CHAR(10), KEY(a(5))") the handling is correct (albeit not very
efficient):
Instead of "t.key < c1" we get "t.key <= prefix-val(c1)".
Combining the intervals in (*) together, we get:
(-inf<=t.key<=c1) OR (c1<=t.key<=c2) OR (c2<=t.key<=c3) OR ...
i.e. actually we get intervals combined into one interval:
(-inf<=t.key<=+inf). This doesn't make much sense but it doesn't
cause any problems.
Considering the above, we'll handle NOT IN as follows:
* if the number of entries in the NOT IN list is less than
NOT_IN_IGNORE_THRESHOLD, construct the SEL_TREE (*) manually.
* Otherwise, don't produce a SEL_TREE.
*/
#define NOT_IN_IGNORE_THRESHOLD 1000
MEM_ROOT *tmp_root= param->mem_root;
param->thd->mem_root= param->old_root;
/*
@ -3656,7 +3648,7 @@ static SEL_TREE *get_func_mm_tree(PARAM *param, Item_func *cond_func,
Item *value_item= func->array->create_item();
param->thd->mem_root= tmp_root;
if (!value_item)
if (func->array->count > NOT_IN_IGNORE_THRESHOLD || !value_item)
break;
/* Get a SEL_TREE for "(-inf|NULL) < X < c_0" interval. */
@ -3677,10 +3669,7 @@ static SEL_TREE *get_func_mm_tree(PARAM *param, Item_func *cond_func,
tree= NULL;
break;
}
#define NOT_IN_IGNORE_THRESHOLD 1000
SEL_TREE *tree2;
if (func->array->count < NOT_IN_IGNORE_THRESHOLD)
{
for (; i < func->array->count; i++)
{
if (func->array->compare_elems(i, i-1))
@ -3713,9 +3702,6 @@ static SEL_TREE *get_func_mm_tree(PARAM *param, Item_func *cond_func,
tree= tree_or(param, tree, tree2);
}
}
}
else
func->array->value_to_item(func->array->count - 1, value_item);
if (tree && tree->type != SEL_TREE::IMPOSSIBLE)
{
@ -3780,7 +3766,6 @@ static SEL_TREE *get_func_mm_tree(PARAM *param, Item_func *cond_func,
}
DBUG_RETURN(tree);
}
/* make a select tree of all keys in condition */