Support ordered-set (WITHIN GROUP) aggregates.

This patch introduces generic support for ordered-set and hypothetical-set aggregate functions, as well as implementations of the instances defined in SQL:2008 (percentile_cont(), percentile_disc(), rank(), dense_rank(), percent_rank(), cume_dist()). We also added mode() though it is not in the spec, as well as versions of percentile_cont() and percentile_disc() that can compute multiple percentile values in one pass over the data. Unlike the original submission, this patch puts full control of the sorting process in the hands of the aggregate's support functions. To allow the support functions to find out how they're supposed to sort, a new API function AggGetAggref() is added to nodeAgg.c. This allows retrieval of the aggregate call's Aggref node, which may have other uses beyond the immediate need. There is also support for ordered-set aggregates to install cleanup callback functions, so that they can be sure that infrastructure such as tuplesort objects gets cleaned up. In passing, make some fixes in the recently-added support for variadic aggregates, and make some editorial adjustments in the recent FILTER additions for aggregates. Also, simplify use of IsBinaryCoercible() by allowing it to succeed whenever the target type is ANY or ANYELEMENT. It was inconsistent that it dealt with other polymorphic target types but not these. Atri Sharma and Andrew Gierth; reviewed by Pavel Stehule and Vik Fearing, and rather heavily editorialized upon by Tom Lane
2025-07-15 19:21:59 +03:00 · 2013-12-23 16:11:35 -05:00
parent 37484ad2aa
commit 8d65da1f01
64 changed files with 4686 additions and 755 deletions
--- a/src/backend/optimizer/plan/planagg.c
+++ b/src/backend/optimizer/plan/planagg.c
@ -328,17 +328,20 @@ find_minmax_aggs_walker(Node *node, List **context)
 		 * that differs for each of those equal values of the argument
 		 * expression makes the result predictable once again.  This is a
 		 * niche requirement, and we do not implement it with subquery paths.
+		 * In any case, this test lets us reject ordered-set aggregates
+		 * quickly.
 		 */
 		if (aggref->aggorder != NIL)
 			return true;
+		/* note: we do not care if DISTINCT is mentioned ... */

 		/*
 		 * We might implement the optimization when a FILTER clause is present
-		 * by adding the filter to the quals of the generated subquery.
+		 * by adding the filter to the quals of the generated subquery.  For
+		 * now, just punt.
 		 */
 		if (aggref->aggfilter != NULL)
 			return true;
-		/* note: we do not care if DISTINCT is mentioned ... */

 		aggsortop = fetch_agg_sort_op(aggref->aggfnoid);
 		if (!OidIsValid(aggsortop))
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@ -2659,7 +2659,9 @@ choose_hashed_grouping(PlannerInfo *root,
 	 * Executor doesn't support hashed aggregation with DISTINCT or ORDER BY
 	 * aggregates.	(Doing so would imply storing *all* the input values in
 	 * the hash table, and/or running many sorts in parallel, either of which
-	 * seems like a certain loser.)
+	 * seems like a certain loser.)  We similarly don't support ordered-set
+	 * aggregates in hashed aggregation, but that case is included in the
+	 * numOrderedAggs count.
 	 */
 	can_hash = (agg_costs->numOrderedAggs == 0 &&
 				grouping_is_hashable(parse->groupClause));
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@ -37,6 +37,7 @@
 #include "optimizer/prep.h"
 #include "optimizer/var.h"
 #include "parser/analyze.h"
+#include "parser/parse_agg.h"
 #include "parser/parse_coerce.h"
 #include "parser/parse_func.h"
 #include "rewrite/rewriteManip.h"
@ -463,9 +464,8 @@ count_agg_clauses_walker(Node *node, count_agg_clauses_context *context)
 		Oid			aggtranstype;
 		int32		aggtransspace;
 		QualCost	argcosts;
-		Oid		   *inputTypes;
+		Oid			inputTypes[FUNC_MAX_ARGS];
 		int			numArguments;
-		ListCell   *l;

 		Assert(aggref->agglevelsup == 0);

@ -482,7 +482,7 @@ count_agg_clauses_walker(Node *node, count_agg_clauses_context *context)
 		aggtransspace = aggform->aggtransspace;
 		ReleaseSysCache(aggTuple);

-		/* count it */
+		/* count it; note ordered-set aggs always have nonempty aggorder */
 		costs->numAggs++;
 		if (aggref->aggorder != NIL || aggref->aggdistinct != NIL)
 			costs->numOrderedAggs++;
@ -498,42 +498,39 @@ count_agg_clauses_walker(Node *node, count_agg_clauses_context *context)
 		costs->transCost.per_tuple += argcosts.per_tuple;

 		/*
-		 * Add the filter's cost to per-input-row costs.  XXX We should reduce
-		 * input expression costs according to filter selectivity.
+		 * Add any filter's cost to per-input-row costs.
+		 *
+		 * XXX Ideally we should reduce input expression costs according to
+		 * filter selectivity, but it's not clear it's worth the trouble.
 		 */
-		cost_qual_eval_node(&argcosts, (Node *) aggref->aggfilter,
-							context->root);
-		costs->transCost.startup += argcosts.startup;
-		costs->transCost.per_tuple += argcosts.per_tuple;
+		if (aggref->aggfilter)
+		{
+			cost_qual_eval_node(&argcosts, (Node *) aggref->aggfilter,
+								context->root);
+			costs->transCost.startup += argcosts.startup;
+			costs->transCost.per_tuple += argcosts.per_tuple;
+		}
+
+		/*
+		 * If there are direct arguments, treat their evaluation cost like the
+		 * cost of the finalfn.
+		 */
+		if (aggref->aggdirectargs)
+		{
+			cost_qual_eval_node(&argcosts, (Node *) aggref->aggdirectargs,
+								context->root);
+			costs->transCost.startup += argcosts.startup;
+			costs->finalCost += argcosts.per_tuple;
+		}

 		/* extract argument types (ignoring any ORDER BY expressions) */
-		inputTypes = (Oid *) palloc(sizeof(Oid) * list_length(aggref->args));
-		numArguments = 0;
-		foreach(l, aggref->args)
-		{
-			TargetEntry *tle = (TargetEntry *) lfirst(l);
-
-			if (!tle->resjunk)
-				inputTypes[numArguments++] = exprType((Node *) tle->expr);
-		}
+		numArguments = get_aggregate_argtypes(aggref, inputTypes);

 		/* resolve actual type of transition state, if polymorphic */
-		if (IsPolymorphicType(aggtranstype))
-		{
-			/* have to fetch the agg's declared input types... */
-			Oid		   *declaredArgTypes;
-			int			agg_nargs;
-
-			(void) get_func_signature(aggref->aggfnoid,
-									  &declaredArgTypes, &agg_nargs);
-			Assert(agg_nargs == numArguments);
-			aggtranstype = enforce_generic_type_consistency(inputTypes,
-															declaredArgTypes,
-															agg_nargs,
-															aggtranstype,
-															false);
-			pfree(declaredArgTypes);
-		}
+		aggtranstype = resolve_aggregate_transtype(aggref->aggfnoid,
+												   aggtranstype,
+												   inputTypes,
+												   numArguments);

 		/*
 		 * If the transition type is pass-by-value then it doesn't add
@ -551,14 +548,16 @@ count_agg_clauses_walker(Node *node, count_agg_clauses_context *context)
 			else
 			{
 				/*
-				 * If transition state is of same type as first input, assume
-				 * it's the same typmod (same width) as well.  This works for
-				 * cases like MAX/MIN and is probably somewhat reasonable
-				 * otherwise.
+				 * If transition state is of same type as first aggregated
+				 * input, assume it's the same typmod (same width) as well.
+				 * This works for cases like MAX/MIN and is probably somewhat
+				 * reasonable otherwise.
 				 */
+				int			numdirectargs = list_length(aggref->aggdirectargs);
 				int32		aggtranstypmod;

-				if (numArguments > 0 && aggtranstype == inputTypes[0])
+				if (numArguments > numdirectargs &&
+					aggtranstype == inputTypes[numdirectargs])
 					aggtranstypmod = exprTypmod((Node *) linitial(aggref->args));
 				else
 					aggtranstypmod = -1;
@ -587,17 +586,11 @@ count_agg_clauses_walker(Node *node, count_agg_clauses_context *context)
 		}

 		/*
-		 * Complain if the aggregate's arguments contain any aggregates;
-		 * nested agg functions are semantically nonsensical.  Aggregates in
-		 * the FILTER clause are detected in transformAggregateCall().
-		 */
-		if (contain_agg_clause((Node *) aggref->args))
-			ereport(ERROR,
-					(errcode(ERRCODE_GROUPING_ERROR),
-					 errmsg("aggregate function calls cannot be nested")));
-
-		/*
-		 * Having checked that, we need not recurse into the argument.
+		 * We assume that the parser checked that there are no aggregates (of
+		 * this level anyway) in the aggregated arguments, direct arguments,
+		 * or filter clause.  Hence, we need not recurse into any of them. (If
+		 * either the parser or the planner screws up on this point, the
+		 * executor will still catch it; see ExecInitExpr.)
 		 */
 		return false;
 	}
@ -662,17 +655,10 @@ find_window_functions_walker(Node *node, WindowFuncLists *lists)
 		lists->numWindowFuncs++;

 		/*
-		 * Complain if the window function's arguments contain window
-		 * functions.  Window functions in the FILTER clause are detected in
-		 * transformAggregateCall().
-		 */
-		if (contain_window_function((Node *) wfunc->args))
-			ereport(ERROR,
-					(errcode(ERRCODE_WINDOWING_ERROR),
-					 errmsg("window function calls cannot be nested")));
-
-		/*
-		 * Having checked that, we need not recurse into the argument.
+		 * We assume that the parser checked that there are no window
+		 * functions in the arguments or filter clause.  Hence, we need not
+		 * recurse into them.  (If either the parser or the planner screws up
+		 * on this point, the executor will still catch it; see ExecInitExpr.)
 		 */
 		return false;
 	}