From 08a823e53b78507c9fcc8a6d77de1a1d5b7c1b13 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 24 Aug 2016 14:37:51 -0400 Subject: [PATCH] Fix improper repetition of previous results from a hashed aggregate. ExecReScanAgg's check for whether it could re-use a previously calculated hashtable neglected the possibility that the Agg node might reference PARAM_EXEC Params that are not referenced by its input plan node. That's okay if the Params are in upper tlist or qual expressions; but if one appears in aggregate input expressions, then the hashtable contents need to be recomputed when the Param's value changes. To avoid unnecessary performance degradation in the case of a Param that isn't within an aggregate input, add logic to the planner to determine which Params are within aggregate inputs. This requires a new field in struct Agg, but fortunately we never write plans to disk, so this isn't an initdb-forcing change. Per report from Jeevan Chalke. This has been broken since forever, so back-patch to all supported branches. Andrew Gierth, with minor adjustments by me Report: --- src/backend/executor/nodeAgg.c | 15 +++-- src/backend/nodes/copyfuncs.c | 1 + src/backend/nodes/outfuncs.c | 1 + src/backend/optimizer/plan/createplan.c | 1 + src/backend/optimizer/plan/subselect.c | 48 ++++++++++++++- src/include/nodes/plannodes.h | 2 + src/test/regress/expected/aggregates.out | 75 ++++++++++++++++++++++++ src/test/regress/sql/aggregates.sql | 22 +++++++ 8 files changed, 158 insertions(+), 7 deletions(-) diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c index 510d1c50e9c..52479296679 100644 --- a/src/backend/executor/nodeAgg.c +++ b/src/backend/executor/nodeAgg.c @@ -2049,13 +2049,14 @@ void ExecReScanAgg(AggState *node) { ExprContext *econtext = node->ss.ps.ps_ExprContext; + Agg *aggnode = (Agg *) node->ss.ps.plan; int aggno; node->agg_done = false; node->ss.ps.ps_TupFromTlist = false; - if (((Agg *) node->ss.ps.plan)->aggstrategy == AGG_HASHED) + if (aggnode->aggstrategy == AGG_HASHED) { /* * In the hashed case, if we haven't yet built the hash table then we @@ -2067,11 +2068,13 @@ ExecReScanAgg(AggState *node) return; /* - * If we do have the hash table and the subplan does not have any - * parameter changes, then we can just rescan the existing hash table; - * no need to build it again. + * If we do have the hash table, and the subplan does not have any + * parameter changes, and none of our own parameter changes affect + * input expressions of the aggregated functions, then we can just + * rescan the existing hash table; no need to build it again. */ - if (node->ss.ps.lefttree->chgParam == NULL) + if (node->ss.ps.lefttree->chgParam == NULL && + !bms_overlap(node->ss.ps.chgParam, aggnode->aggParams)) { ResetTupleHashIterator(node->hashtable, &node->hashiter); return; @@ -2110,7 +2113,7 @@ ExecReScanAgg(AggState *node) */ MemoryContextResetAndDeleteChildren(node->aggcontext); - if (((Agg *) node->ss.ps.plan)->aggstrategy == AGG_HASHED) + if (aggnode->aggstrategy == AGG_HASHED) { /* Rebuild an empty hash table */ build_hash_table(node); diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 04311dccfe5..823ca3e2cd9 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -779,6 +779,7 @@ _copyAgg(const Agg *from) COPY_POINTER_FIELD(grpOperators, from->numCols * sizeof(Oid)); } COPY_SCALAR_FIELD(numGroups); + COPY_BITMAPSET_FIELD(aggParams); return newnode; } diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index ec58a198f9b..9c16a1f4ea9 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -643,6 +643,7 @@ _outAgg(StringInfo str, const Agg *node) appendStringInfo(str, " %u", node->grpOperators[i]); WRITE_LONG_FIELD(numGroups); + WRITE_BITMAPSET_FIELD(aggParams); } static void diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 9cf133d8a16..addb030b6b5 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -4299,6 +4299,7 @@ make_agg(PlannerInfo *root, List *tlist, List *qual, node->grpColIdx = grpColIdx; node->grpOperators = grpOperators; node->numGroups = numGroups; + node->aggParams = NULL; /* SS_finalize_plan() will fill this */ copy_plan_costsize(plan, lefttree); /* only care about copying size */ cost_agg(&agg_path, root, diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index be92049ec4d..89bcbfe3a38 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -80,6 +80,7 @@ static Bitmapset *finalize_plan(PlannerInfo *root, Bitmapset *valid_params, Bitmapset *scan_params); static bool finalize_primnode(Node *node, finalize_primnode_context *context); +static bool finalize_agg_primnode(Node *node, finalize_primnode_context *context); /* @@ -2380,6 +2381,29 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params, locally_added_param); break; + case T_Agg: + { + Agg *agg = (Agg *) plan; + + /* + * AGG_HASHED plans need to know which Params are referenced + * in aggregate calls. Do a separate scan to identify them. + */ + if (agg->aggstrategy == AGG_HASHED) + { + finalize_primnode_context aggcontext; + + aggcontext.root = root; + aggcontext.paramids = NULL; + finalize_agg_primnode((Node *) agg->plan.targetlist, + &aggcontext); + finalize_agg_primnode((Node *) agg->plan.qual, + &aggcontext); + agg->aggParams = aggcontext.paramids; + } + } + break; + case T_WindowAgg: finalize_primnode(((WindowAgg *) plan)->startOffset, &context); @@ -2388,7 +2412,6 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params, break; case T_Hash: - case T_Agg: case T_Material: case T_Sort: case T_Unique: @@ -2532,6 +2555,29 @@ finalize_primnode(Node *node, finalize_primnode_context *context) (void *) context); } +/* + * finalize_agg_primnode: find all Aggref nodes in the given expression tree, + * and add IDs of all PARAM_EXEC params appearing within their aggregated + * arguments to the result set. + */ +static bool +finalize_agg_primnode(Node *node, finalize_primnode_context *context) +{ + if (node == NULL) + return false; + if (IsA(node, Aggref)) + { + Aggref *agg = (Aggref *) node; + + /* we should not consider the direct arguments, if any */ + finalize_primnode((Node *) agg->args, context); + finalize_primnode((Node *) agg->aggfilter, context); + return false; /* there can't be any Aggrefs below here */ + } + return expression_tree_walker(node, finalize_agg_primnode, + (void *) context); +} + /* * SS_make_initplan_from_plan - given a plan tree, make it an InitPlan * diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 3b9c6838295..fab1c3b820c 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -631,6 +631,8 @@ typedef struct Agg AttrNumber *grpColIdx; /* their indexes in the target list */ Oid *grpOperators; /* equality operators to compare with */ long numGroups; /* estimated number of groups in input */ + Bitmapset *aggParams; /* IDs of Params used in Aggref inputs */ + /* Note: planner provides numGroups & aggParams only in AGG_HASHED case */ } Agg; /* ---------------- diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out index 40f5398b72c..a43f674a9e3 100644 --- a/src/test/regress/expected/aggregates.out +++ b/src/test/regress/expected/aggregates.out @@ -366,6 +366,81 @@ from tenk1 o; 9999 (1 row) +-- Test handling of Params within aggregate arguments in hashed aggregation. +-- Per bug report from Jeevan Chalke. +explain (verbose, costs off) +select s1, s2, sm +from generate_series(1, 3) s1, + lateral (select s2, sum(s1 + s2) sm + from generate_series(1, 3) s2 group by s2) ss +order by 1, 2; + QUERY PLAN +------------------------------------------------------------------ + Sort + Output: s1.s1, s2.s2, (sum((s1.s1 + s2.s2))) + Sort Key: s1.s1, s2.s2 + -> Nested Loop + Output: s1.s1, s2.s2, (sum((s1.s1 + s2.s2))) + -> Function Scan on pg_catalog.generate_series s1 + Output: s1.s1 + Function Call: generate_series(1, 3) + -> HashAggregate + Output: s2.s2, sum((s1.s1 + s2.s2)) + Group Key: s2.s2 + -> Function Scan on pg_catalog.generate_series s2 + Output: s2.s2 + Function Call: generate_series(1, 3) +(14 rows) + +select s1, s2, sm +from generate_series(1, 3) s1, + lateral (select s2, sum(s1 + s2) sm + from generate_series(1, 3) s2 group by s2) ss +order by 1, 2; + s1 | s2 | sm +----+----+---- + 1 | 1 | 2 + 1 | 2 | 3 + 1 | 3 | 4 + 2 | 1 | 3 + 2 | 2 | 4 + 2 | 3 | 5 + 3 | 1 | 4 + 3 | 2 | 5 + 3 | 3 | 6 +(9 rows) + +explain (verbose, costs off) +select array(select sum(x+y) s + from generate_series(1,3) y group by y order by s) + from generate_series(1,3) x; + QUERY PLAN +------------------------------------------------------------------- + Function Scan on pg_catalog.generate_series x + Output: (SubPlan 1) + Function Call: generate_series(1, 3) + SubPlan 1 + -> Sort + Output: (sum((x.x + y.y))), y.y + Sort Key: (sum((x.x + y.y))) + -> HashAggregate + Output: sum((x.x + y.y)), y.y + Group Key: y.y + -> Function Scan on pg_catalog.generate_series y + Output: y.y + Function Call: generate_series(1, 3) +(13 rows) + +select array(select sum(x+y) s + from generate_series(1,3) y group by y order by s) + from generate_series(1,3) x; + array +--------- + {2,3,4} + {3,4,5} + {4,5,6} +(3 rows) + -- -- test for bitwise integer aggregates -- diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql index a84327d24cc..c12a2d8b7b6 100644 --- a/src/test/regress/sql/aggregates.sql +++ b/src/test/regress/sql/aggregates.sql @@ -98,6 +98,28 @@ select (select max((select i.unique2 from tenk1 i where i.unique1 = o.unique1))) from tenk1 o; +-- Test handling of Params within aggregate arguments in hashed aggregation. +-- Per bug report from Jeevan Chalke. +explain (verbose, costs off) +select s1, s2, sm +from generate_series(1, 3) s1, + lateral (select s2, sum(s1 + s2) sm + from generate_series(1, 3) s2 group by s2) ss +order by 1, 2; +select s1, s2, sm +from generate_series(1, 3) s1, + lateral (select s2, sum(s1 + s2) sm + from generate_series(1, 3) s2 group by s2) ss +order by 1, 2; + +explain (verbose, costs off) +select array(select sum(x+y) s + from generate_series(1,3) y group by y order by s) + from generate_series(1,3) x; +select array(select sum(x+y) s + from generate_series(1,3) y group by y order by s) + from generate_series(1,3) x; + -- -- test for bitwise integer aggregates --