mirror of
https://github.com/postgres/postgres.git
synced 2025-07-09 22:41:56 +03:00
Use a hash table to speed up NOT IN(values)
Similar to 50e17ad28
, which allowed hash tables to be used for IN clauses
with a set of constants, here we add the same feature for NOT IN clauses.
NOT IN evaluates the same as: WHERE a <> v1 AND a <> v2 AND a <> v3.
Obviously, if we're using a hash table we must be exactly equivalent to
that and return the same result taking into account that either side of
the condition could contain a NULL. This requires a little bit of
special handling to make work with the hash table version.
When processing NOT IN, the ScalarArrayOpExpr's operator will be the <>
operator. To be able to build and lookup a hash table we must use the
<>'s negator operator. The planner checks if that exists and is hashable
and sets the relevant fields in ScalarArrayOpExpr to instruct the executor
to use hashing.
Author: David Rowley, James Coleman
Reviewed-by: James Coleman, Zhihong Yu
Discussion: https://postgr.es/m/CAApHDvoF1mum_FRk6D621edcB6KSHBi2+GAgWmioj5AhOu2vwQ@mail.gmail.com
This commit is contained in:
@ -1205,19 +1205,34 @@ ExecInitExprRec(Expr *node, ExprState *state,
|
||||
AclResult aclresult;
|
||||
FmgrInfo *hash_finfo;
|
||||
FunctionCallInfo hash_fcinfo;
|
||||
Oid cmpfuncid;
|
||||
|
||||
/*
|
||||
* Select the correct comparison function. When we do hashed
|
||||
* NOT IN clauses, the opfuncid will be the inequality
|
||||
* comparison function and negfuncid will be set to equality.
|
||||
* We need to use the equality function for hash probes.
|
||||
*/
|
||||
if (OidIsValid(opexpr->negfuncid))
|
||||
{
|
||||
Assert(OidIsValid(opexpr->hashfuncid));
|
||||
cmpfuncid = opexpr->negfuncid;
|
||||
}
|
||||
else
|
||||
cmpfuncid = opexpr->opfuncid;
|
||||
|
||||
Assert(list_length(opexpr->args) == 2);
|
||||
scalararg = (Expr *) linitial(opexpr->args);
|
||||
arrayarg = (Expr *) lsecond(opexpr->args);
|
||||
|
||||
/* Check permission to call function */
|
||||
aclresult = pg_proc_aclcheck(opexpr->opfuncid,
|
||||
aclresult = pg_proc_aclcheck(cmpfuncid,
|
||||
GetUserId(),
|
||||
ACL_EXECUTE);
|
||||
if (aclresult != ACLCHECK_OK)
|
||||
aclcheck_error(aclresult, OBJECT_FUNCTION,
|
||||
get_func_name(opexpr->opfuncid));
|
||||
InvokeFunctionExecuteHook(opexpr->opfuncid);
|
||||
get_func_name(cmpfuncid));
|
||||
InvokeFunctionExecuteHook(cmpfuncid);
|
||||
|
||||
if (OidIsValid(opexpr->hashfuncid))
|
||||
{
|
||||
@ -1233,7 +1248,7 @@ ExecInitExprRec(Expr *node, ExprState *state,
|
||||
/* Set up the primary fmgr lookup information */
|
||||
finfo = palloc0(sizeof(FmgrInfo));
|
||||
fcinfo = palloc0(SizeForFunctionCallInfo(2));
|
||||
fmgr_info(opexpr->opfuncid, finfo);
|
||||
fmgr_info(cmpfuncid, finfo);
|
||||
fmgr_info_set_expr((Node *) node, finfo);
|
||||
InitFunctionCallInfoData(*fcinfo, finfo, 2,
|
||||
opexpr->inputcollid, NULL, NULL);
|
||||
@ -1274,6 +1289,7 @@ ExecInitExprRec(Expr *node, ExprState *state,
|
||||
|
||||
/* And perform the operation */
|
||||
scratch.opcode = EEOP_HASHED_SCALARARRAYOP;
|
||||
scratch.d.hashedscalararrayop.inclause = opexpr->useOr;
|
||||
scratch.d.hashedscalararrayop.finfo = finfo;
|
||||
scratch.d.hashedscalararrayop.fcinfo_data = fcinfo;
|
||||
scratch.d.hashedscalararrayop.fn_addr = finfo->fn_addr;
|
||||
|
@ -3493,6 +3493,7 @@ ExecEvalHashedScalarArrayOp(ExprState *state, ExprEvalStep *op, ExprContext *eco
|
||||
{
|
||||
ScalarArrayOpExprHashTable *elements_tab = op->d.hashedscalararrayop.elements_tab;
|
||||
FunctionCallInfo fcinfo = op->d.hashedscalararrayop.fcinfo_data;
|
||||
bool inclause = op->d.hashedscalararrayop.inclause;
|
||||
bool strictfunc = op->d.hashedscalararrayop.finfo->fn_strict;
|
||||
Datum scalar = fcinfo->args[0].value;
|
||||
bool scalar_isnull = fcinfo->args[0].isnull;
|
||||
@ -3596,7 +3597,12 @@ ExecEvalHashedScalarArrayOp(ExprState *state, ExprEvalStep *op, ExprContext *eco
|
||||
/* Check the hash to see if we have a match. */
|
||||
hashfound = NULL != saophash_lookup(elements_tab->hashtab, scalar);
|
||||
|
||||
result = BoolGetDatum(hashfound);
|
||||
/* the result depends on if the clause is an IN or NOT IN clause */
|
||||
if (inclause)
|
||||
result = BoolGetDatum(hashfound); /* IN */
|
||||
else
|
||||
result = BoolGetDatum(!hashfound); /* NOT IN */
|
||||
|
||||
resultnull = false;
|
||||
|
||||
/*
|
||||
@ -3605,7 +3611,7 @@ ExecEvalHashedScalarArrayOp(ExprState *state, ExprEvalStep *op, ExprContext *eco
|
||||
* hashtable, but instead marked if we found any when building the table
|
||||
* in has_nulls.
|
||||
*/
|
||||
if (!DatumGetBool(result) && op->d.hashedscalararrayop.has_nulls)
|
||||
if (!hashfound && op->d.hashedscalararrayop.has_nulls)
|
||||
{
|
||||
if (strictfunc)
|
||||
{
|
||||
@ -3633,6 +3639,13 @@ ExecEvalHashedScalarArrayOp(ExprState *state, ExprEvalStep *op, ExprContext *eco
|
||||
|
||||
result = op->d.hashedscalararrayop.fn_addr(fcinfo);
|
||||
resultnull = fcinfo->isnull;
|
||||
|
||||
/*
|
||||
* Reverse the result for NOT IN clauses since the above function
|
||||
* is the equality function and we need not-equals.
|
||||
*/
|
||||
if (!inclause)
|
||||
result = !result;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1718,6 +1718,7 @@ _copyScalarArrayOpExpr(const ScalarArrayOpExpr *from)
|
||||
COPY_SCALAR_FIELD(opno);
|
||||
COPY_SCALAR_FIELD(opfuncid);
|
||||
COPY_SCALAR_FIELD(hashfuncid);
|
||||
COPY_SCALAR_FIELD(negfuncid);
|
||||
COPY_SCALAR_FIELD(useOr);
|
||||
COPY_SCALAR_FIELD(inputcollid);
|
||||
COPY_NODE_FIELD(args);
|
||||
|
@ -414,6 +414,12 @@ _equalScalarArrayOpExpr(const ScalarArrayOpExpr *a, const ScalarArrayOpExpr *b)
|
||||
b->hashfuncid != 0)
|
||||
return false;
|
||||
|
||||
/* Likewise for the negfuncid */
|
||||
if (a->negfuncid != b->negfuncid &&
|
||||
a->negfuncid != 0 &&
|
||||
b->negfuncid != 0)
|
||||
return false;
|
||||
|
||||
COMPARE_SCALAR_FIELD(useOr);
|
||||
COMPARE_SCALAR_FIELD(inputcollid);
|
||||
COMPARE_NODE_FIELD(args);
|
||||
|
@ -1311,6 +1311,7 @@ _outScalarArrayOpExpr(StringInfo str, const ScalarArrayOpExpr *node)
|
||||
WRITE_OID_FIELD(opno);
|
||||
WRITE_OID_FIELD(opfuncid);
|
||||
WRITE_OID_FIELD(hashfuncid);
|
||||
WRITE_OID_FIELD(negfuncid);
|
||||
WRITE_BOOL_FIELD(useOr);
|
||||
WRITE_OID_FIELD(inputcollid);
|
||||
WRITE_NODE_FIELD(args);
|
||||
|
@ -832,6 +832,7 @@ _readScalarArrayOpExpr(void)
|
||||
READ_OID_FIELD(opno);
|
||||
READ_OID_FIELD(opfuncid);
|
||||
READ_OID_FIELD(hashfuncid);
|
||||
READ_OID_FIELD(negfuncid);
|
||||
READ_BOOL_FIELD(useOr);
|
||||
READ_OID_FIELD(inputcollid);
|
||||
READ_NODE_FIELD(args);
|
||||
|
@ -1687,6 +1687,9 @@ fix_expr_common(PlannerInfo *root, Node *node)
|
||||
|
||||
if (!OidIsValid(saop->hashfuncid))
|
||||
record_plan_function_dependency(root, saop->hashfuncid);
|
||||
|
||||
if (!OidIsValid(saop->negfuncid))
|
||||
record_plan_function_dependency(root, saop->negfuncid);
|
||||
}
|
||||
else if (IsA(node, Const))
|
||||
{
|
||||
|
@ -128,6 +128,7 @@ negate_clause(Node *node)
|
||||
newopexpr->opno = negator;
|
||||
newopexpr->opfuncid = InvalidOid;
|
||||
newopexpr->hashfuncid = InvalidOid;
|
||||
newopexpr->negfuncid = InvalidOid;
|
||||
newopexpr->useOr = !saopexpr->useOr;
|
||||
newopexpr->inputcollid = saopexpr->inputcollid;
|
||||
newopexpr->args = saopexpr->args;
|
||||
|
@ -2140,27 +2140,71 @@ convert_saop_to_hashed_saop_walker(Node *node, void *context)
|
||||
Oid lefthashfunc;
|
||||
Oid righthashfunc;
|
||||
|
||||
if (saop->useOr && arrayarg && IsA(arrayarg, Const) &&
|
||||
!((Const *) arrayarg)->constisnull &&
|
||||
get_op_hash_functions(saop->opno, &lefthashfunc, &righthashfunc) &&
|
||||
lefthashfunc == righthashfunc)
|
||||
if (arrayarg && IsA(arrayarg, Const) &&
|
||||
!((Const *) arrayarg)->constisnull)
|
||||
{
|
||||
Datum arrdatum = ((Const *) arrayarg)->constvalue;
|
||||
ArrayType *arr = (ArrayType *) DatumGetPointer(arrdatum);
|
||||
int nitems;
|
||||
|
||||
/*
|
||||
* Only fill in the hash functions if the array looks large enough
|
||||
* for it to be worth hashing instead of doing a linear search.
|
||||
*/
|
||||
nitems = ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr));
|
||||
|
||||
if (nitems >= MIN_ARRAY_SIZE_FOR_HASHED_SAOP)
|
||||
if (saop->useOr)
|
||||
{
|
||||
/* Looks good. Fill in the hash functions */
|
||||
saop->hashfuncid = lefthashfunc;
|
||||
if (get_op_hash_functions(saop->opno, &lefthashfunc, &righthashfunc) &&
|
||||
lefthashfunc == righthashfunc)
|
||||
{
|
||||
Datum arrdatum = ((Const *) arrayarg)->constvalue;
|
||||
ArrayType *arr = (ArrayType *) DatumGetPointer(arrdatum);
|
||||
int nitems;
|
||||
|
||||
/*
|
||||
* Only fill in the hash functions if the array looks
|
||||
* large enough for it to be worth hashing instead of
|
||||
* doing a linear search.
|
||||
*/
|
||||
nitems = ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr));
|
||||
|
||||
if (nitems >= MIN_ARRAY_SIZE_FOR_HASHED_SAOP)
|
||||
{
|
||||
/* Looks good. Fill in the hash functions */
|
||||
saop->hashfuncid = lefthashfunc;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else /* !saop->useOr */
|
||||
{
|
||||
Oid negator = get_negator(saop->opno);
|
||||
|
||||
/*
|
||||
* Check if this is a NOT IN using an operator whose negator
|
||||
* is hashable. If so we can still build a hash table and
|
||||
* just ensure the lookup items are not in the hash table.
|
||||
*/
|
||||
if (OidIsValid(negator) &&
|
||||
get_op_hash_functions(negator, &lefthashfunc, &righthashfunc) &&
|
||||
lefthashfunc == righthashfunc)
|
||||
{
|
||||
Datum arrdatum = ((Const *) arrayarg)->constvalue;
|
||||
ArrayType *arr = (ArrayType *) DatumGetPointer(arrdatum);
|
||||
int nitems;
|
||||
|
||||
/*
|
||||
* Only fill in the hash functions if the array looks
|
||||
* large enough for it to be worth hashing instead of
|
||||
* doing a linear search.
|
||||
*/
|
||||
nitems = ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr));
|
||||
|
||||
if (nitems >= MIN_ARRAY_SIZE_FOR_HASHED_SAOP)
|
||||
{
|
||||
/* Looks good. Fill in the hash functions */
|
||||
saop->hashfuncid = lefthashfunc;
|
||||
|
||||
/*
|
||||
* Also set the negfuncid. The executor will need
|
||||
* that to perform hashtable lookups.
|
||||
*/
|
||||
saop->negfuncid = get_opcode(negator);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -895,6 +895,7 @@ make_scalar_array_op(ParseState *pstate, List *opname,
|
||||
result->opno = oprid(tup);
|
||||
result->opfuncid = opform->oprcode;
|
||||
result->hashfuncid = InvalidOid;
|
||||
result->negfuncid = InvalidOid;
|
||||
result->useOr = useOr;
|
||||
/* inputcollid will be set by parse_collate.c */
|
||||
result->args = args;
|
||||
|
@ -3878,6 +3878,7 @@ make_partition_op_expr(PartitionKey key, int keynum,
|
||||
saopexpr->opno = operoid;
|
||||
saopexpr->opfuncid = get_opcode(operoid);
|
||||
saopexpr->hashfuncid = InvalidOid;
|
||||
saopexpr->negfuncid = InvalidOid;
|
||||
saopexpr->useOr = true;
|
||||
saopexpr->inputcollid = key->partcollation[keynum];
|
||||
saopexpr->args = list_make2(arg1, arrexpr);
|
||||
|
Reference in New Issue
Block a user