1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-06 07:49:08 +03:00

Use a hash table to speed up NOT IN(values)

Similar to 50e17ad28, which allowed hash tables to be used for IN clauses
with a set of constants, here we add the same feature for NOT IN clauses.

NOT IN evaluates the same as: WHERE a <> v1 AND a <> v2 AND a <> v3.
Obviously, if we're using a hash table we must be exactly equivalent to
that and return the same result taking into account that either side of
the condition could contain a NULL.  This requires a little bit of
special handling to make work with the hash table version.

When processing NOT IN, the ScalarArrayOpExpr's operator will be the <>
operator.  To be able to build and lookup a hash table we must use the
<>'s negator operator.  The planner checks if that exists and is hashable
and sets the relevant fields in ScalarArrayOpExpr to instruct the executor
to use hashing.

Author: David Rowley, James Coleman
Reviewed-by: James Coleman, Zhihong Yu
Discussion: https://postgr.es/m/CAApHDvoF1mum_FRk6D621edcB6KSHBi2+GAgWmioj5AhOu2vwQ@mail.gmail.com
This commit is contained in:
David Rowley
2021-07-07 16:29:17 +12:00
parent d854720df6
commit 29f45e299e
16 changed files with 242 additions and 29 deletions

View File

@@ -93,6 +93,15 @@ select return_int_input(1) in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1, null);
select return_int_input(null::int) in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1);
select return_int_input(null::int) in (10, 9, 2, 8, 3, 7, 4, 6, 5, null);
select return_text_input('a') in ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j');
-- NOT IN
select return_int_input(1) not in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1);
select return_int_input(1) not in (10, 9, 2, 8, 3, 7, 4, 6, 5, 0);
select return_int_input(1) not in (10, 9, 2, 8, 3, 7, 4, 6, 5, 2, null);
select return_int_input(1) not in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1, null);
select return_int_input(1) not in (null, null, null, null, null, null, null, null, null, null, null);
select return_int_input(null::int) not in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1);
select return_int_input(null::int) not in (10, 9, 2, 8, 3, 7, 4, 6, 5, null);
select return_text_input('a') not in ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j');
rollback;
@@ -124,6 +133,12 @@ begin
end;
$$ language plpgsql immutable;
create function myintne(myint, myint) returns bool as $$
begin
return not myinteq($1, $2);
end;
$$ language plpgsql immutable;
create operator = (
leftarg = myint,
rightarg = myint,
@@ -135,6 +150,17 @@ create operator = (
merges
);
create operator <> (
leftarg = myint,
rightarg = myint,
commutator = <>,
negator = =,
procedure = myintne,
restrict = eqsel,
join = eqjoinsel,
merges
);
create operator class myint_ops
default for type myint using hash as
operator 1 = (myint, myint),
@@ -145,8 +171,12 @@ insert into inttest values(1::myint),(null);
-- try an array with enough elements to cause hashing
select * from inttest where a in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint, null);
select * from inttest where a not in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint, null);
select * from inttest where a not in (0::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint, null);
-- ensure the result matched with the non-hashed version. We simply remove
-- some array elements so that we don't reach the hashing threshold.
select * from inttest where a in (1::myint,2::myint,3::myint,4::myint,5::myint, null);
select * from inttest where a not in (1::myint,2::myint,3::myint,4::myint,5::myint, null);
select * from inttest where a not in (0::myint,2::myint,3::myint,4::myint,5::myint, null);
rollback;