1
0
mirror of https://github.com/postgres/postgres.git synced 2025-08-27 07:42:10 +03:00

Speedup ScalarArrayOpExpr evaluation

ScalarArrayOpExprs with "useOr=true" and a set of Consts on the righthand
side have traditionally been evaluated by using a linear search over the
array.  When these arrays contain large numbers of elements then this
linear search could become a significant part of execution time.

Here we add a new method of evaluating ScalarArrayOpExpr expressions to
allow them to be evaluated by first building a hash table containing each
element, then on subsequent evaluations, we just probe that hash table to
determine if there is a match.

The planner is in charge of determining when this optimization is possible
and it enables it by setting hashfuncid in the ScalarArrayOpExpr.  The
executor will only perform the hash table evaluation when the hashfuncid
is set.

This means that not all cases are optimized. For example CHECK constraints
containing an IN clause won't go through the planner, so won't get the
hashfuncid set.  We could maybe do something about that at some later
date.  The reason we're not doing it now is from fear that we may slow
down cases where the expression is evaluated only once.  Those cases can
be common, for example, a single row INSERT to a table with a CHECK
constraint containing an IN clause.

In the planner, we enable this when there are suitable hash functions for
the ScalarArrayOpExpr's operator and only when there is at least
MIN_ARRAY_SIZE_FOR_HASHED_SAOP elements in the array.  The threshold is
currently set to 9.

Author: James Coleman, David Rowley
Reviewed-by: David Rowley, Tomas Vondra, Heikki Linnakangas
Discussion: https://postgr.es/m/CAAaqYe8x62+=wn0zvNKCj55tPpg-JBHzhZFFc6ANovdqFw7-dA@mail.gmail.com
This commit is contained in:
David Rowley
2021-04-08 23:51:22 +12:00
parent 1d257577e0
commit 50e17ad281
21 changed files with 711 additions and 29 deletions

View File

@@ -4436,21 +4436,50 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
}
else if (IsA(node, ScalarArrayOpExpr))
{
/*
* Estimate that the operator will be applied to about half of the
* array elements before the answer is determined.
*/
ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) node;
Node *arraynode = (Node *) lsecond(saop->args);
QualCost sacosts;
QualCost hcosts;
int estarraylen = estimate_array_length(arraynode);
set_sa_opfuncid(saop);
sacosts.startup = sacosts.per_tuple = 0;
add_function_cost(context->root, saop->opfuncid, NULL,
&sacosts);
context->total.startup += sacosts.startup;
context->total.per_tuple += sacosts.per_tuple *
estimate_array_length(arraynode) * 0.5;
if (OidIsValid(saop->hashfuncid))
{
/* Handle costs for hashed ScalarArrayOpExpr */
hcosts.startup = hcosts.per_tuple = 0;
add_function_cost(context->root, saop->hashfuncid, NULL, &hcosts);
context->total.startup += sacosts.startup + hcosts.startup;
/* Estimate the cost of building the hashtable. */
context->total.startup += estarraylen * hcosts.per_tuple;
/*
* XXX should we charge a little bit for sacosts.per_tuple when
* building the table, or is it ok to assume there will be zero
* hash collision?
*/
/*
* Charge for hashtable lookups. Charge a single hash and a
* single comparison.
*/
context->total.per_tuple += hcosts.per_tuple + sacosts.per_tuple;
}
else
{
/*
* Estimate that the operator will be applied to about half of the
* array elements before the answer is determined.
*/
context->total.startup += sacosts.startup;
context->total.per_tuple += sacosts.per_tuple *
estimate_array_length(arraynode) * 0.5;
}
}
else if (IsA(node, Aggref) ||
IsA(node, WindowFunc))

View File

@@ -1110,6 +1110,16 @@ preprocess_expression(PlannerInfo *root, Node *expr, int kind)
#endif
}
/*
* Check for ANY ScalarArrayOpExpr with Const arrays and set the
* hashfuncid of any that might execute more quickly by using hash lookups
* instead of a linear search.
*/
if (kind == EXPRKIND_QUAL || kind == EXPRKIND_TARGET)
{
convert_saop_to_hashed_saop(expr);
}
/* Expand SubLinks to SubPlans */
if (root->parse->hasSubLinks)
expr = SS_process_sublinks(root, expr, (kind == EXPRKIND_QUAL));

View File

@@ -1674,9 +1674,13 @@ fix_expr_common(PlannerInfo *root, Node *node)
}
else if (IsA(node, ScalarArrayOpExpr))
{
set_sa_opfuncid((ScalarArrayOpExpr *) node);
record_plan_function_dependency(root,
((ScalarArrayOpExpr *) node)->opfuncid);
ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) node;
set_sa_opfuncid(saop);
record_plan_function_dependency(root, saop->opfuncid);
if (!OidIsValid(saop->hashfuncid))
record_plan_function_dependency(root, saop->hashfuncid);
}
else if (IsA(node, Const))
{

View File

@@ -127,6 +127,7 @@ negate_clause(Node *node)
newopexpr->opno = negator;
newopexpr->opfuncid = InvalidOid;
newopexpr->hashfuncid = InvalidOid;
newopexpr->useOr = !saopexpr->useOr;
newopexpr->inputcollid = saopexpr->inputcollid;
newopexpr->args = saopexpr->args;

View File

@@ -106,6 +106,7 @@ static bool contain_leaked_vars_walker(Node *node, void *context);
static Relids find_nonnullable_rels_walker(Node *node, bool top_level);
static List *find_nonnullable_vars_walker(Node *node, bool top_level);
static bool is_strict_saop(ScalarArrayOpExpr *expr, bool falseOK);
static bool convert_saop_to_hashed_saop_walker(Node *node, void *context);
static Node *eval_const_expressions_mutator(Node *node,
eval_const_expressions_context *context);
static bool contain_non_const_walker(Node *node, void *context);
@@ -2101,6 +2102,69 @@ eval_const_expressions(PlannerInfo *root, Node *node)
return eval_const_expressions_mutator(node, &context);
}
#define MIN_ARRAY_SIZE_FOR_HASHED_SAOP 9
/*--------------------
* convert_saop_to_hashed_saop
*
* Recursively search 'node' for ScalarArrayOpExprs and fill in the hash
* function for any ScalarArrayOpExpr that looks like it would be useful to
* evaluate using a hash table rather than a linear search.
*
* We'll use a hash table if all of the following conditions are met:
* 1. The 2nd argument of the array contain only Consts.
* 2. useOr is true.
* 3. There's valid hash function for both left and righthand operands and
* these hash functions are the same.
* 4. If the array contains enough elements for us to consider it to be
* worthwhile using a hash table rather than a linear search.
*/
void
convert_saop_to_hashed_saop(Node *node)
{
(void) convert_saop_to_hashed_saop_walker(node, NULL);
}
static bool
convert_saop_to_hashed_saop_walker(Node *node, void *context)
{
if (node == NULL)
return false;
if (IsA(node, ScalarArrayOpExpr))
{
ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) node;
Expr *arrayarg = (Expr *) lsecond(saop->args);
Oid lefthashfunc;
Oid righthashfunc;
if (saop->useOr && arrayarg && IsA(arrayarg, Const) &&
!((Const *) arrayarg)->constisnull &&
get_op_hash_functions(saop->opno, &lefthashfunc, &righthashfunc) &&
lefthashfunc == righthashfunc)
{
Datum arrdatum = ((Const *) arrayarg)->constvalue;
ArrayType *arr = (ArrayType *) DatumGetPointer(arrdatum);
int nitems;
/*
* Only fill in the hash functions if the array looks large enough
* for it to be worth hashing instead of doing a linear search.
*/
nitems = ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr));
if (nitems >= MIN_ARRAY_SIZE_FOR_HASHED_SAOP)
{
/* Looks good. Fill in the hash functions */
saop->hashfuncid = lefthashfunc;
}
return true;
}
}
return expression_tree_walker(node, convert_saop_to_hashed_saop_walker, NULL);
}
/*--------------------
* estimate_expression_value
*