Move per-agg and per-trans duplicate finding to the planner.

This has the advantage that the cost estimates for aggregates can count the number of calls to transition and final functions correctly. Bump catalog version, because views can contain Aggrefs. Reviewed-by: Andres Freund Discussion: https://www.postgresql.org/message-id/b2e3536b-1dbc-8303-c97e-89cb0b4a9a48%40iki.fi
2025-09-02 04:21:28 +03:00 · 2020-11-24 10:45:00 +02:00
parent e522024bd8
commit 0a2bc5d61e
29 changed files with 955 additions and 747 deletions
--- a/src/backend/optimizer/plan/planagg.c
+++ b/src/backend/optimizer/plan/planagg.c
@@ -47,7 +47,7 @@
 #include "utils/lsyscache.h"
 #include "utils/syscache.h"

-static bool find_minmax_aggs_walker(Node *node, List **context);
+static bool can_minmax_aggs(PlannerInfo *root, List **context);
 static bool build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
 							  Oid eqop, Oid sortop, bool nulls_first);
 static void minmax_qp_callback(PlannerInfo *root, void *extra);
@@ -66,7 +66,8 @@ static Oid	fetch_agg_sort_op(Oid aggfnoid);
 * query_planner(), because we generate indexscan paths by cloning the
 * planner's state and invoking query_planner() on a modified version of
 * the query parsetree.  Thus, all preprocessing needed before query_planner()
- * must already be done.
+ * must already be done.  This relies on the list of aggregates in
+ * root->agginfos, so preprocess_aggrefs() must have been called already, too.
 */
 void
 preprocess_minmax_aggregates(PlannerInfo *root)
@@ -140,9 +141,7 @@ preprocess_minmax_aggregates(PlannerInfo *root)
 	 * all are MIN/MAX aggregates.  Stop as soon as we find one that isn't.
 	 */
 	aggs_list = NIL;
-	if (find_minmax_aggs_walker((Node *) root->processed_tlist, &aggs_list))
-		return;
-	if (find_minmax_aggs_walker(parse->havingQual, &aggs_list))
+	if (!can_minmax_aggs(root, &aggs_list))
 		return;

 	/*
@@ -227,38 +226,33 @@ preprocess_minmax_aggregates(PlannerInfo *root)
 }

 /*
- * find_minmax_aggs_walker
- *		Recursively scan the Aggref nodes in an expression tree, and check
- *		that each one is a MIN/MAX aggregate.  If so, build a list of the
+ * can_minmax_aggs
+ *		Walk through all the aggregates in the query, and check
+ *		if they are all MIN/MAX aggregates.  If so, build a list of the
 *		distinct aggregate calls in the tree.
 *
- * Returns true if a non-MIN/MAX aggregate is found, false otherwise.
- * (This seemingly-backward definition is used because expression_tree_walker
- * aborts the scan on true return, which is what we want.)
- *
- * Found aggregates are added to the list at *context; it's up to the caller
- * to initialize the list to NIL.
+ * Returns false if a non-MIN/MAX aggregate is found, true otherwise.
 *
 * This does not descend into subqueries, and so should be used only after
 * reduction of sublinks to subplans.  There mustn't be outer-aggregate
 * references either.
 */
 static bool
-find_minmax_aggs_walker(Node *node, List **context)
+can_minmax_aggs(PlannerInfo *root, List **context)
 {
-	if (node == NULL)
-		return false;
-	if (IsA(node, Aggref))
+	ListCell   *lc;
+
+	foreach(lc, root->agginfos)
 	{
-		Aggref	   *aggref = (Aggref *) node;
+		AggInfo    *agginfo = (AggInfo *) lfirst(lc);
+		Aggref	   *aggref = agginfo->representative_aggref;
 		Oid			aggsortop;
 		TargetEntry *curTarget;
 		MinMaxAggInfo *mminfo;
-		ListCell   *l;

 		Assert(aggref->agglevelsup == 0);
 		if (list_length(aggref->args) != 1)
-			return true;		/* it couldn't be MIN/MAX */
+			return false;		/* it couldn't be MIN/MAX */

 		/*
 		 * ORDER BY is usually irrelevant for MIN/MAX, but it can change the
@@ -274,7 +268,7 @@ find_minmax_aggs_walker(Node *node, List **context)
 		 * quickly.
 		 */
 		if (aggref->aggorder != NIL)
-			return true;
+			return false;
 		/* note: we do not care if DISTINCT is mentioned ... */

 		/*
@@ -283,30 +277,19 @@ find_minmax_aggs_walker(Node *node, List **context)
 		 * now, just punt.
 		 */
 		if (aggref->aggfilter != NULL)
-			return true;
+			return false;

 		aggsortop = fetch_agg_sort_op(aggref->aggfnoid);
 		if (!OidIsValid(aggsortop))
-			return true;		/* not a MIN/MAX aggregate */
+			return false;		/* not a MIN/MAX aggregate */

 		curTarget = (TargetEntry *) linitial(aggref->args);

 		if (contain_mutable_functions((Node *) curTarget->expr))
-			return true;		/* not potentially indexable */
+			return false;		/* not potentially indexable */

 		if (type_is_rowtype(exprType((Node *) curTarget->expr)))
-			return true;		/* IS NOT NULL would have weird semantics */
-
-		/*
-		 * Check whether it's already in the list, and add it if not.
-		 */
-		foreach(l, *context)
-		{
-			mminfo = (MinMaxAggInfo *) lfirst(l);
-			if (mminfo->aggfnoid == aggref->aggfnoid &&
-				equal(mminfo->target, curTarget->expr))
-				return false;
-		}
+			return false;		/* IS NOT NULL would have weird semantics */

 		mminfo = makeNode(MinMaxAggInfo);
 		mminfo->aggfnoid = aggref->aggfnoid;
@@ -318,16 +301,8 @@ find_minmax_aggs_walker(Node *node, List **context)
 		mminfo->param = NULL;

 		*context = lappend(*context, mminfo);
-
-		/*
-		 * We need not recurse into the argument, since it can't contain any
-		 * aggregates.
-		 */
-		return false;
 	}
-	Assert(!IsA(node, SubLink));
-	return expression_tree_walker(node, find_minmax_aggs_walker,
-								  (void *) context);
+	return true;
 }

 /*
@@ -368,6 +343,8 @@ build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
 	subroot->plan_params = NIL;
 	subroot->outer_params = NULL;
 	subroot->init_plans = NIL;
+	subroot->agginfos = NIL;
+	subroot->aggtransinfos = NIL;

 	subroot->parse = parse = copyObject(root->parse);
 	IncrementVarSublevelsUp((Node *) parse, 1, 1);
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -152,7 +152,6 @@ static RelOptInfo *create_grouping_paths(PlannerInfo *root,
 										 RelOptInfo *input_rel,
 										 PathTarget *target,
 										 bool target_parallel_safe,
-										 const AggClauseCosts *agg_costs,
 										 grouping_sets_data *gd);
 static bool is_degenerate_grouping(PlannerInfo *root);
 static void create_degenerate_grouping_paths(PlannerInfo *root,
@@ -228,8 +227,7 @@ static RelOptInfo *create_partial_grouping_paths(PlannerInfo *root,
 												 GroupPathExtraData *extra,
 												 bool force_rel_creation);
 static void gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel);
-static bool can_partial_agg(PlannerInfo *root,
-							const AggClauseCosts *agg_costs);
+static bool can_partial_agg(PlannerInfo *root);
 static void apply_scanjoin_target_to_paths(PlannerInfo *root,
 										   RelOptInfo *rel,
 										   List *scanjoin_targets,
@@ -1944,7 +1942,6 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
 		bool		scanjoin_target_parallel_safe;
 		bool		scanjoin_target_same_exprs;
 		bool		have_grouping;
-		AggClauseCosts agg_costs;
 		WindowFuncLists *wflists = NULL;
 		List	   *activeWindows = NIL;
 		grouping_sets_data *gset_data = NULL;
@@ -1975,25 +1972,16 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
 		root->processed_tlist = preprocess_targetlist(root);

 		/*
-		 * Collect statistics about aggregates for estimating costs, and mark
-		 * all the aggregates with resolved aggtranstypes.  We must do this
-		 * before slicing and dicing the tlist into various pathtargets, else
-		 * some copies of the Aggref nodes might escape being marked with the
-		 * correct transtypes.
-		 *
-		 * Note: currently, we do not detect duplicate aggregates here.  This
-		 * may result in somewhat-overestimated cost, which is fine for our
-		 * purposes since all Paths will get charged the same.  But at some
-		 * point we might wish to do that detection in the planner, rather
-		 * than during executor startup.
+		 * Mark all the aggregates with resolved aggtranstypes, and detect
+		 * aggregates that are duplicates or can share transition state.  We
+		 * must do this before slicing and dicing the tlist into various
+		 * pathtargets, else some copies of the Aggref nodes might escape
+		 * being marked.
 		 */
-		MemSet(&agg_costs, 0, sizeof(AggClauseCosts));
 		if (parse->hasAggs)
 		{
-			get_agg_clause_costs(root, (Node *) root->processed_tlist,
-								 AGGSPLIT_SIMPLE, &agg_costs);
-			get_agg_clause_costs(root, parse->havingQual, AGGSPLIT_SIMPLE,
-								 &agg_costs);
+			preprocess_aggrefs(root, (Node *) root->processed_tlist);
+			preprocess_aggrefs(root, (Node *) parse->havingQual);
 		}

 		/*
@@ -2198,7 +2186,6 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
 												current_rel,
 												grouping_target,
 												grouping_target_parallel_safe,
-												&agg_costs,
 												gset_data);
 			/* Fix things up if grouping_target contains SRFs */
 			if (parse->hasTargetSRFs)
@@ -3790,7 +3777,6 @@ get_number_of_groups(PlannerInfo *root,
 *
 * input_rel: contains the source-data Paths
 * target: the pathtarget for the result Paths to compute
- * agg_costs: cost info about all aggregates in query (in AGGSPLIT_SIMPLE mode)
 * gd: grouping sets data including list of grouping sets and their clauses
 *
 * Note: all Paths in input_rel are expected to return the target computed
@@ -3801,12 +3787,15 @@ create_grouping_paths(PlannerInfo *root,
 					  RelOptInfo *input_rel,
 					  PathTarget *target,
 					  bool target_parallel_safe,
-					  const AggClauseCosts *agg_costs,
 					  grouping_sets_data *gd)
 {
 	Query	   *parse = root->parse;
 	RelOptInfo *grouped_rel;
 	RelOptInfo *partially_grouped_rel;
+	AggClauseCosts agg_costs;
+
+	MemSet(&agg_costs, 0, sizeof(AggClauseCosts));
+	get_agg_clause_costs(root, AGGSPLIT_SIMPLE, &agg_costs);

 	/*
 	 * Create grouping relation to hold fully aggregated grouping and/or
@@ -3862,14 +3851,14 @@ create_grouping_paths(PlannerInfo *root,
 		 * the other gating conditions, so we want to do it last.
 		 */
 		if ((parse->groupClause != NIL &&
-			 agg_costs->numOrderedAggs == 0 &&
+			 root->numOrderedAggs == 0 &&
 			 (gd ? gd->any_hashable : grouping_is_hashable(parse->groupClause))))
 			flags |= GROUPING_CAN_USE_HASH;

 		/*
 		 * Determine whether partial aggregation is possible.
 		 */
-		if (can_partial_agg(root, agg_costs))
+		if (can_partial_agg(root))
 			flags |= GROUPING_CAN_PARTIAL_AGG;

 		extra.flags = flags;
@@ -3890,7 +3879,7 @@ create_grouping_paths(PlannerInfo *root,
 			extra.patype = PARTITIONWISE_AGGREGATE_NONE;

 		create_ordinary_grouping_paths(root, input_rel, grouped_rel,
-									   agg_costs, gd, &extra,
+									   &agg_costs, gd, &extra,
 									   &partially_grouped_rel);
 	}

@@ -4248,7 +4237,8 @@ consider_groupingsets_paths(PlannerInfo *root,
 			l_start = lnext(gd->rollups, l_start);
 		}

-		hashsize = estimate_hashagg_tablesize(path,
+		hashsize = estimate_hashagg_tablesize(root,
+											  path,
 											  agg_costs,
 											  dNumGroups - exclude_groups);

@@ -4382,7 +4372,8 @@ consider_groupingsets_paths(PlannerInfo *root,
 		/*
 		 * Account first for space needed for groups we can't sort at all.
 		 */
-		availspace -= estimate_hashagg_tablesize(path,
+		availspace -= estimate_hashagg_tablesize(root,
+												 path,
 												 agg_costs,
 												 gd->dNumHashGroups);

@@ -4433,7 +4424,8 @@ consider_groupingsets_paths(PlannerInfo *root,

 				if (rollup->hashable)
 				{
-					double		sz = estimate_hashagg_tablesize(path,
+					double		sz = estimate_hashagg_tablesize(root,
+																path,
 																agg_costs,
 																rollup->numGroups);

@@ -6926,20 +6918,12 @@ create_partial_grouping_paths(PlannerInfo *root,
 		MemSet(agg_final_costs, 0, sizeof(AggClauseCosts));
 		if (parse->hasAggs)
 		{
-			List	   *partial_target_exprs;
-
 			/* partial phase */
-			partial_target_exprs = partially_grouped_rel->reltarget->exprs;
-			get_agg_clause_costs(root, (Node *) partial_target_exprs,
-								 AGGSPLIT_INITIAL_SERIAL,
+			get_agg_clause_costs(root, AGGSPLIT_INITIAL_SERIAL,
 								 agg_partial_costs);

 			/* final phase */
-			get_agg_clause_costs(root, (Node *) grouped_rel->reltarget->exprs,
-								 AGGSPLIT_FINAL_DESERIAL,
-								 agg_final_costs);
-			get_agg_clause_costs(root, extra->havingQual,
-								 AGGSPLIT_FINAL_DESERIAL,
+			get_agg_clause_costs(root, AGGSPLIT_FINAL_DESERIAL,
 								 agg_final_costs);
 		}

@@ -7324,7 +7308,7 @@ gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel)
 * Returns true when possible, false otherwise.
 */
 static bool
-can_partial_agg(PlannerInfo *root, const AggClauseCosts *agg_costs)
+can_partial_agg(PlannerInfo *root)
 {
 	Query	   *parse = root->parse;

@@ -7341,7 +7325,7 @@ can_partial_agg(PlannerInfo *root, const AggClauseCosts *agg_costs)
 		/* We don't know how to do grouping sets in parallel. */
 		return false;
 	}
-	else if (agg_costs->hasNonPartial || agg_costs->hasNonSerial)
+	else if (root->hasNonPartialAggs || root->hasNonSerialAggs)
 	{
 		/* Insufficient support for partial mode. */
 		return false;