mirror of
https://github.com/MariaDB/server.git
synced 2025-07-29 05:21:33 +03:00
BUG#21282: Incorrect query results for "t.key NOT IN (<big const list>)
In fix for BUG#15872, a condition of type "t.key NOT IN (c1, .... cN)" where N>1000, was incorrectly converted to (-inf < X < c_min) OR (c_max < X) Now this conversion is removed, we dont produce any range lists for such conditions.
This commit is contained in:
@ -838,3 +838,25 @@ select a, hex(filler) from t1 where a not between 'b' and 'b';
|
||||
a hex(filler)
|
||||
a 0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
|
||||
drop table t1,t2,t3;
|
||||
create table t1 (a int);
|
||||
insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
|
||||
create table t2 (a int, key(a));
|
||||
insert into t2 select 2*(A.a + 10*(B.a + 10*C.a)) from t1 A, t1 B, t1 C;
|
||||
set @a="select * from t2 force index (a) where a NOT IN(0";
|
||||
select count(*) from (select @a:=concat(@a, ',', a) from t2 ) Z;
|
||||
count(*)
|
||||
1000
|
||||
set @a=concat(@a, ')');
|
||||
insert into t2 values (11),(13),(15);
|
||||
set @b= concat("explain ", @a);
|
||||
prepare stmt1 from @b;
|
||||
execute stmt1;
|
||||
id select_type table type possible_keys key key_len ref rows Extra
|
||||
1 SIMPLE t2 index a a 5 NULL 1003 Using where; Using index
|
||||
prepare stmt1 from @a;
|
||||
execute stmt1;
|
||||
a
|
||||
11
|
||||
13
|
||||
15
|
||||
drop table t1, t2;
|
||||
|
@ -656,3 +656,28 @@ explain select * from t1 where a not between 'b' and 'b';
|
||||
select a, hex(filler) from t1 where a not between 'b' and 'b';
|
||||
|
||||
drop table t1,t2,t3;
|
||||
|
||||
#
|
||||
# BUG#21282
|
||||
#
|
||||
create table t1 (a int);
|
||||
insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
|
||||
create table t2 (a int, key(a));
|
||||
insert into t2 select 2*(A.a + 10*(B.a + 10*C.a)) from t1 A, t1 B, t1 C;
|
||||
|
||||
set @a="select * from t2 force index (a) where a NOT IN(0";
|
||||
select count(*) from (select @a:=concat(@a, ',', a) from t2 ) Z;
|
||||
set @a=concat(@a, ')');
|
||||
|
||||
insert into t2 values (11),(13),(15);
|
||||
|
||||
set @b= concat("explain ", @a);
|
||||
|
||||
prepare stmt1 from @b;
|
||||
execute stmt1;
|
||||
|
||||
prepare stmt1 from @a;
|
||||
execute stmt1;
|
||||
|
||||
drop table t1, t2;
|
||||
# End of 5.0 tests
|
||||
|
@ -3608,41 +3608,33 @@ static SEL_TREE *get_func_mm_tree(PARAM *param, Item_func *cond_func,
|
||||
if (func->array && func->cmp_type != ROW_RESULT)
|
||||
{
|
||||
/*
|
||||
We get here for conditions in form "t.key NOT IN (c1, c2, ...)"
|
||||
(where c{i} are constants).
|
||||
Our goal is to produce a SEL_ARG graph that represents intervals:
|
||||
We get here for conditions in form "t.key NOT IN (c1, c2, ...)",
|
||||
where c{i} are constants. Our goal is to produce a SEL_TREE that
|
||||
represents intervals:
|
||||
|
||||
($MIN<t.key<c1) OR (c1<t.key<c2) OR (c2<t.key<c3) OR ... (*)
|
||||
|
||||
where $MIN is either "-inf" or NULL.
|
||||
|
||||
The most straightforward way to handle NOT IN would be to convert
|
||||
it to "(t.key != c1) AND (t.key != c2) AND ..." and let the range
|
||||
optimizer to build SEL_ARG graph from that. However that will cause
|
||||
the range optimizer to use O(N^2) memory (it's a bug, not filed),
|
||||
and people do use big NOT IN lists (see BUG#15872). Also, for big
|
||||
NOT IN lists constructing/using graph (*) does not make the query
|
||||
faster.
|
||||
The most straightforward way to produce it is to convert NOT IN
|
||||
into "(t.key != c1) AND (t.key != c2) AND ... " and let the range
|
||||
analyzer to build SEL_TREE from that. The problem is that the
|
||||
range analyzer will use O(N^2) memory (which is probably a bug),
|
||||
and people do use big NOT IN lists (e.g. see BUG#15872, BUG#21282),
|
||||
will run out of memory.
|
||||
|
||||
So, we will handle NOT IN manually in the following way:
|
||||
* if the number of entries in the NOT IN list is less then
|
||||
NOT_IN_IGNORE_THRESHOLD, we will construct SEL_ARG graph (*)
|
||||
manually.
|
||||
* Otherwise, we will construct a smaller graph: for
|
||||
"t.key NOT IN (c1,...cN)" we construct a graph representing
|
||||
($MIN < t.key) OR (cN < t.key) // here sequence of c_i is
|
||||
// ordered.
|
||||
Another problem with big lists like (*) is that a big list is
|
||||
unlikely to produce a good "range" access, while considering that
|
||||
range access will require expensive CPU calculations (and for
|
||||
MyISAM even index accesses). In short, big NOT IN lists are rarely
|
||||
worth analyzing.
|
||||
|
||||
A note about partially-covering indexes: for those (e.g. for
|
||||
"a CHAR(10), KEY(a(5))") the handling is correct (albeit not very
|
||||
efficient):
|
||||
Instead of "t.key < c1" we get "t.key <= prefix-val(c1)".
|
||||
Combining the intervals in (*) together, we get:
|
||||
(-inf<=t.key<=c1) OR (c1<=t.key<=c2) OR (c2<=t.key<=c3) OR ...
|
||||
i.e. actually we get intervals combined into one interval:
|
||||
(-inf<=t.key<=+inf). This doesn't make much sense but it doesn't
|
||||
cause any problems.
|
||||
Considering the above, we'll handle NOT IN as follows:
|
||||
* if the number of entries in the NOT IN list is less than
|
||||
NOT_IN_IGNORE_THRESHOLD, construct the SEL_TREE (*) manually.
|
||||
* Otherwise, don't produce a SEL_TREE.
|
||||
*/
|
||||
#define NOT_IN_IGNORE_THRESHOLD 1000
|
||||
MEM_ROOT *tmp_root= param->mem_root;
|
||||
param->thd->mem_root= param->old_root;
|
||||
/*
|
||||
@ -3656,7 +3648,7 @@ static SEL_TREE *get_func_mm_tree(PARAM *param, Item_func *cond_func,
|
||||
Item *value_item= func->array->create_item();
|
||||
param->thd->mem_root= tmp_root;
|
||||
|
||||
if (!value_item)
|
||||
if (func->array->count > NOT_IN_IGNORE_THRESHOLD || !value_item)
|
||||
break;
|
||||
|
||||
/* Get a SEL_TREE for "(-inf|NULL) < X < c_0" interval. */
|
||||
@ -3677,10 +3669,7 @@ static SEL_TREE *get_func_mm_tree(PARAM *param, Item_func *cond_func,
|
||||
tree= NULL;
|
||||
break;
|
||||
}
|
||||
#define NOT_IN_IGNORE_THRESHOLD 1000
|
||||
SEL_TREE *tree2;
|
||||
if (func->array->count < NOT_IN_IGNORE_THRESHOLD)
|
||||
{
|
||||
for (; i < func->array->count; i++)
|
||||
{
|
||||
if (func->array->compare_elems(i, i-1))
|
||||
@ -3713,9 +3702,6 @@ static SEL_TREE *get_func_mm_tree(PARAM *param, Item_func *cond_func,
|
||||
tree= tree_or(param, tree, tree2);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
func->array->value_to_item(func->array->count - 1, value_item);
|
||||
|
||||
if (tree && tree->type != SEL_TREE::IMPOSSIBLE)
|
||||
{
|
||||
@ -3780,7 +3766,6 @@ static SEL_TREE *get_func_mm_tree(PARAM *param, Item_func *cond_func,
|
||||
}
|
||||
|
||||
DBUG_RETURN(tree);
|
||||
|
||||
}
|
||||
|
||||
/* make a select tree of all keys in condition */
|
||||
|
Reference in New Issue
Block a user