1
0
mirror of https://github.com/postgres/postgres.git synced 2025-06-27 23:21:58 +03:00

Speedup ScalarArrayOpExpr evaluation

ScalarArrayOpExprs with "useOr=true" and a set of Consts on the righthand
side have traditionally been evaluated by using a linear search over the
array.  When these arrays contain large numbers of elements then this
linear search could become a significant part of execution time.

Here we add a new method of evaluating ScalarArrayOpExpr expressions to
allow them to be evaluated by first building a hash table containing each
element, then on subsequent evaluations, we just probe that hash table to
determine if there is a match.

The planner is in charge of determining when this optimization is possible
and it enables it by setting hashfuncid in the ScalarArrayOpExpr.  The
executor will only perform the hash table evaluation when the hashfuncid
is set.

This means that not all cases are optimized. For example CHECK constraints
containing an IN clause won't go through the planner, so won't get the
hashfuncid set.  We could maybe do something about that at some later
date.  The reason we're not doing it now is from fear that we may slow
down cases where the expression is evaluated only once.  Those cases can
be common, for example, a single row INSERT to a table with a CHECK
constraint containing an IN clause.

In the planner, we enable this when there are suitable hash functions for
the ScalarArrayOpExpr's operator and only when there is at least
MIN_ARRAY_SIZE_FOR_HASHED_SAOP elements in the array.  The threshold is
currently set to 9.

Author: James Coleman, David Rowley
Reviewed-by: David Rowley, Tomas Vondra, Heikki Linnakangas
Discussion: https://postgr.es/m/CAAaqYe8x62+=wn0zvNKCj55tPpg-JBHzhZFFc6ANovdqFw7-dA@mail.gmail.com
This commit is contained in:
David Rowley
2021-04-08 23:51:22 +12:00
parent 1d257577e0
commit 50e17ad281
21 changed files with 711 additions and 29 deletions

View File

@ -1149,6 +1149,8 @@ ExecInitExprRec(Expr *node, ExprState *state,
FmgrInfo *finfo;
FunctionCallInfo fcinfo;
AclResult aclresult;
FmgrInfo *hash_finfo;
FunctionCallInfo hash_fcinfo;
Assert(list_length(opexpr->args) == 2);
scalararg = (Expr *) linitial(opexpr->args);
@ -1163,6 +1165,17 @@ ExecInitExprRec(Expr *node, ExprState *state,
get_func_name(opexpr->opfuncid));
InvokeFunctionExecuteHook(opexpr->opfuncid);
if (OidIsValid(opexpr->hashfuncid))
{
aclresult = pg_proc_aclcheck(opexpr->hashfuncid,
GetUserId(),
ACL_EXECUTE);
if (aclresult != ACLCHECK_OK)
aclcheck_error(aclresult, OBJECT_FUNCTION,
get_func_name(opexpr->hashfuncid));
InvokeFunctionExecuteHook(opexpr->hashfuncid);
}
/* Set up the primary fmgr lookup information */
finfo = palloc0(sizeof(FmgrInfo));
fcinfo = palloc0(SizeForFunctionCallInfo(2));
@ -1171,26 +1184,76 @@ ExecInitExprRec(Expr *node, ExprState *state,
InitFunctionCallInfoData(*fcinfo, finfo, 2,
opexpr->inputcollid, NULL, NULL);
/* Evaluate scalar directly into left function argument */
ExecInitExprRec(scalararg, state,
&fcinfo->args[0].value, &fcinfo->args[0].isnull);
/*
* Evaluate array argument into our return value. There's no
* danger in that, because the return value is guaranteed to
* be overwritten by EEOP_SCALARARRAYOP, and will not be
* passed to any other expression.
* If hashfuncid is set, we create a EEOP_HASHED_SCALARARRAYOP
* step instead of a EEOP_SCALARARRAYOP. This provides much
* faster lookup performance than the normal linear search
* when the number of items in the array is anything but very
* small.
*/
ExecInitExprRec(arrayarg, state, resv, resnull);
if (OidIsValid(opexpr->hashfuncid))
{
hash_finfo = palloc0(sizeof(FmgrInfo));
hash_fcinfo = palloc0(SizeForFunctionCallInfo(1));
fmgr_info(opexpr->hashfuncid, hash_finfo);
fmgr_info_set_expr((Node *) node, hash_finfo);
InitFunctionCallInfoData(*hash_fcinfo, hash_finfo,
1, opexpr->inputcollid, NULL,
NULL);
/* And perform the operation */
scratch.opcode = EEOP_SCALARARRAYOP;
scratch.d.scalararrayop.element_type = InvalidOid;
scratch.d.scalararrayop.useOr = opexpr->useOr;
scratch.d.scalararrayop.finfo = finfo;
scratch.d.scalararrayop.fcinfo_data = fcinfo;
scratch.d.scalararrayop.fn_addr = finfo->fn_addr;
ExprEvalPushStep(state, &scratch);
scratch.d.hashedscalararrayop.hash_finfo = hash_finfo;
scratch.d.hashedscalararrayop.hash_fcinfo_data = hash_fcinfo;
scratch.d.hashedscalararrayop.hash_fn_addr = hash_finfo->fn_addr;
/* Evaluate scalar directly into left function argument */
ExecInitExprRec(scalararg, state,
&fcinfo->args[0].value, &fcinfo->args[0].isnull);
/*
* Evaluate array argument into our return value. There's
* no danger in that, because the return value is
* guaranteed to be overwritten by
* EEOP_HASHED_SCALARARRAYOP, and will not be passed to
* any other expression.
*/
ExecInitExprRec(arrayarg, state, resv, resnull);
/* And perform the operation */
scratch.opcode = EEOP_HASHED_SCALARARRAYOP;
scratch.d.hashedscalararrayop.finfo = finfo;
scratch.d.hashedscalararrayop.fcinfo_data = fcinfo;
scratch.d.hashedscalararrayop.fn_addr = finfo->fn_addr;
scratch.d.hashedscalararrayop.hash_finfo = hash_finfo;
scratch.d.hashedscalararrayop.hash_fcinfo_data = hash_fcinfo;
scratch.d.hashedscalararrayop.hash_fn_addr = hash_finfo->fn_addr;
ExprEvalPushStep(state, &scratch);
}
else
{
/* Evaluate scalar directly into left function argument */
ExecInitExprRec(scalararg, state,
&fcinfo->args[0].value,
&fcinfo->args[0].isnull);
/*
* Evaluate array argument into our return value. There's
* no danger in that, because the return value is
* guaranteed to be overwritten by EEOP_SCALARARRAYOP, and
* will not be passed to any other expression.
*/
ExecInitExprRec(arrayarg, state, resv, resnull);
/* And perform the operation */
scratch.opcode = EEOP_SCALARARRAYOP;
scratch.d.scalararrayop.element_type = InvalidOid;
scratch.d.scalararrayop.useOr = opexpr->useOr;
scratch.d.scalararrayop.finfo = finfo;
scratch.d.scalararrayop.fcinfo_data = fcinfo;
scratch.d.scalararrayop.fn_addr = finfo->fn_addr;
ExprEvalPushStep(state, &scratch);
}
break;
}