1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-15 19:21:59 +03:00

Support ordered-set (WITHIN GROUP) aggregates.

This patch introduces generic support for ordered-set and hypothetical-set
aggregate functions, as well as implementations of the instances defined in
SQL:2008 (percentile_cont(), percentile_disc(), rank(), dense_rank(),
percent_rank(), cume_dist()).  We also added mode() though it is not in the
spec, as well as versions of percentile_cont() and percentile_disc() that
can compute multiple percentile values in one pass over the data.

Unlike the original submission, this patch puts full control of the sorting
process in the hands of the aggregate's support functions.  To allow the
support functions to find out how they're supposed to sort, a new API
function AggGetAggref() is added to nodeAgg.c.  This allows retrieval of
the aggregate call's Aggref node, which may have other uses beyond the
immediate need.  There is also support for ordered-set aggregates to
install cleanup callback functions, so that they can be sure that
infrastructure such as tuplesort objects gets cleaned up.

In passing, make some fixes in the recently-added support for variadic
aggregates, and make some editorial adjustments in the recent FILTER
additions for aggregates.  Also, simplify use of IsBinaryCoercible() by
allowing it to succeed whenever the target type is ANY or ANYELEMENT.
It was inconsistent that it dealt with other polymorphic target types
but not these.

Atri Sharma and Andrew Gierth; reviewed by Pavel Stehule and Vik Fearing,
and rather heavily editorialized upon by Tom Lane
This commit is contained in:
Tom Lane
2013-12-23 16:11:35 -05:00
parent 37484ad2aa
commit 8d65da1f01
64 changed files with 4686 additions and 755 deletions

View File

@ -328,17 +328,20 @@ find_minmax_aggs_walker(Node *node, List **context)
* that differs for each of those equal values of the argument
* expression makes the result predictable once again. This is a
* niche requirement, and we do not implement it with subquery paths.
* In any case, this test lets us reject ordered-set aggregates
* quickly.
*/
if (aggref->aggorder != NIL)
return true;
/* note: we do not care if DISTINCT is mentioned ... */
/*
* We might implement the optimization when a FILTER clause is present
* by adding the filter to the quals of the generated subquery.
* by adding the filter to the quals of the generated subquery. For
* now, just punt.
*/
if (aggref->aggfilter != NULL)
return true;
/* note: we do not care if DISTINCT is mentioned ... */
aggsortop = fetch_agg_sort_op(aggref->aggfnoid);
if (!OidIsValid(aggsortop))

View File

@ -2659,7 +2659,9 @@ choose_hashed_grouping(PlannerInfo *root,
* Executor doesn't support hashed aggregation with DISTINCT or ORDER BY
* aggregates. (Doing so would imply storing *all* the input values in
* the hash table, and/or running many sorts in parallel, either of which
* seems like a certain loser.)
* seems like a certain loser.) We similarly don't support ordered-set
* aggregates in hashed aggregation, but that case is included in the
* numOrderedAggs count.
*/
can_hash = (agg_costs->numOrderedAggs == 0 &&
grouping_is_hashable(parse->groupClause));

View File

@ -37,6 +37,7 @@
#include "optimizer/prep.h"
#include "optimizer/var.h"
#include "parser/analyze.h"
#include "parser/parse_agg.h"
#include "parser/parse_coerce.h"
#include "parser/parse_func.h"
#include "rewrite/rewriteManip.h"
@ -463,9 +464,8 @@ count_agg_clauses_walker(Node *node, count_agg_clauses_context *context)
Oid aggtranstype;
int32 aggtransspace;
QualCost argcosts;
Oid *inputTypes;
Oid inputTypes[FUNC_MAX_ARGS];
int numArguments;
ListCell *l;
Assert(aggref->agglevelsup == 0);
@ -482,7 +482,7 @@ count_agg_clauses_walker(Node *node, count_agg_clauses_context *context)
aggtransspace = aggform->aggtransspace;
ReleaseSysCache(aggTuple);
/* count it */
/* count it; note ordered-set aggs always have nonempty aggorder */
costs->numAggs++;
if (aggref->aggorder != NIL || aggref->aggdistinct != NIL)
costs->numOrderedAggs++;
@ -498,42 +498,39 @@ count_agg_clauses_walker(Node *node, count_agg_clauses_context *context)
costs->transCost.per_tuple += argcosts.per_tuple;
/*
* Add the filter's cost to per-input-row costs. XXX We should reduce
* input expression costs according to filter selectivity.
* Add any filter's cost to per-input-row costs.
*
* XXX Ideally we should reduce input expression costs according to
* filter selectivity, but it's not clear it's worth the trouble.
*/
cost_qual_eval_node(&argcosts, (Node *) aggref->aggfilter,
context->root);
costs->transCost.startup += argcosts.startup;
costs->transCost.per_tuple += argcosts.per_tuple;
if (aggref->aggfilter)
{
cost_qual_eval_node(&argcosts, (Node *) aggref->aggfilter,
context->root);
costs->transCost.startup += argcosts.startup;
costs->transCost.per_tuple += argcosts.per_tuple;
}
/*
* If there are direct arguments, treat their evaluation cost like the
* cost of the finalfn.
*/
if (aggref->aggdirectargs)
{
cost_qual_eval_node(&argcosts, (Node *) aggref->aggdirectargs,
context->root);
costs->transCost.startup += argcosts.startup;
costs->finalCost += argcosts.per_tuple;
}
/* extract argument types (ignoring any ORDER BY expressions) */
inputTypes = (Oid *) palloc(sizeof(Oid) * list_length(aggref->args));
numArguments = 0;
foreach(l, aggref->args)
{
TargetEntry *tle = (TargetEntry *) lfirst(l);
if (!tle->resjunk)
inputTypes[numArguments++] = exprType((Node *) tle->expr);
}
numArguments = get_aggregate_argtypes(aggref, inputTypes);
/* resolve actual type of transition state, if polymorphic */
if (IsPolymorphicType(aggtranstype))
{
/* have to fetch the agg's declared input types... */
Oid *declaredArgTypes;
int agg_nargs;
(void) get_func_signature(aggref->aggfnoid,
&declaredArgTypes, &agg_nargs);
Assert(agg_nargs == numArguments);
aggtranstype = enforce_generic_type_consistency(inputTypes,
declaredArgTypes,
agg_nargs,
aggtranstype,
false);
pfree(declaredArgTypes);
}
aggtranstype = resolve_aggregate_transtype(aggref->aggfnoid,
aggtranstype,
inputTypes,
numArguments);
/*
* If the transition type is pass-by-value then it doesn't add
@ -551,14 +548,16 @@ count_agg_clauses_walker(Node *node, count_agg_clauses_context *context)
else
{
/*
* If transition state is of same type as first input, assume
* it's the same typmod (same width) as well. This works for
* cases like MAX/MIN and is probably somewhat reasonable
* otherwise.
* If transition state is of same type as first aggregated
* input, assume it's the same typmod (same width) as well.
* This works for cases like MAX/MIN and is probably somewhat
* reasonable otherwise.
*/
int numdirectargs = list_length(aggref->aggdirectargs);
int32 aggtranstypmod;
if (numArguments > 0 && aggtranstype == inputTypes[0])
if (numArguments > numdirectargs &&
aggtranstype == inputTypes[numdirectargs])
aggtranstypmod = exprTypmod((Node *) linitial(aggref->args));
else
aggtranstypmod = -1;
@ -587,17 +586,11 @@ count_agg_clauses_walker(Node *node, count_agg_clauses_context *context)
}
/*
* Complain if the aggregate's arguments contain any aggregates;
* nested agg functions are semantically nonsensical. Aggregates in
* the FILTER clause are detected in transformAggregateCall().
*/
if (contain_agg_clause((Node *) aggref->args))
ereport(ERROR,
(errcode(ERRCODE_GROUPING_ERROR),
errmsg("aggregate function calls cannot be nested")));
/*
* Having checked that, we need not recurse into the argument.
* We assume that the parser checked that there are no aggregates (of
* this level anyway) in the aggregated arguments, direct arguments,
* or filter clause. Hence, we need not recurse into any of them. (If
* either the parser or the planner screws up on this point, the
* executor will still catch it; see ExecInitExpr.)
*/
return false;
}
@ -662,17 +655,10 @@ find_window_functions_walker(Node *node, WindowFuncLists *lists)
lists->numWindowFuncs++;
/*
* Complain if the window function's arguments contain window
* functions. Window functions in the FILTER clause are detected in
* transformAggregateCall().
*/
if (contain_window_function((Node *) wfunc->args))
ereport(ERROR,
(errcode(ERRCODE_WINDOWING_ERROR),
errmsg("window function calls cannot be nested")));
/*
* Having checked that, we need not recurse into the argument.
* We assume that the parser checked that there are no window
* functions in the arguments or filter clause. Hence, we need not
* recurse into them. (If either the parser or the planner screws up
* on this point, the executor will still catch it; see ExecInitExpr.)
*/
return false;
}