1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-26 01:22:12 +03:00

Support window functions a la SQL:2008.

Hitoshi Harada, with some kibitzing from Heikki and Tom.
This commit is contained in:
Tom Lane
2008-12-28 18:54:01 +00:00
parent 38e9348282
commit 95b07bc7f5
92 changed files with 6720 additions and 321 deletions

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.177 2008/11/15 19:43:46 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.178 2008/12/28 18:53:56 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -929,10 +929,13 @@ standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels)
* 1. If the subquery has a LIMIT clause, we must not push down any quals,
* since that could change the set of rows returned.
*
* 2. If the subquery contains EXCEPT or EXCEPT ALL set ops we cannot push
* 2. If the subquery contains any window functions, we can't push quals
* into it, because that would change the results.
*
* 3. If the subquery contains EXCEPT or EXCEPT ALL set ops we cannot push
* quals into it, because that would change the results.
*
* 3. For subqueries using UNION/UNION ALL/INTERSECT/INTERSECT ALL, we can
* 4. For subqueries using UNION/UNION ALL/INTERSECT/INTERSECT ALL, we can
* push quals into each component query, but the quals can only reference
* subquery columns that suffer no type coercions in the set operation.
* Otherwise there are possible semantic gotchas. So, we check the
@ -950,6 +953,10 @@ subquery_is_pushdown_safe(Query *subquery, Query *topquery,
if (subquery->limitOffset != NULL || subquery->limitCount != NULL)
return false;
/* Check point 2 */
if (subquery->hasWindowFuncs)
return false;
/* Are we at top level, or looking at a setop component? */
if (subquery == topquery)
{
@ -1092,6 +1099,12 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
if (contain_subplans(qual))
return false;
/*
* It would be unsafe to push down window function calls, but at least
* for the moment we could never see any in a qual anyhow.
*/
Assert(!contain_window_function(qual));
/*
* Examine all Vars used in clause; since it's a restriction clause, all
* such Vars must refer to subselect output columns.

View File

@ -54,7 +54,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.201 2008/11/22 22:47:05 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.202 2008/12/28 18:53:56 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -1283,6 +1283,40 @@ cost_agg(Path *path, PlannerInfo *root,
path->total_cost = total_cost;
}
/*
* cost_windowagg
* Determines and returns the cost of performing a WindowAgg plan node,
* including the cost of its input.
*
* Input is assumed already properly sorted.
*/
void
cost_windowagg(Path *path, PlannerInfo *root,
int numWindowFuncs, int numPartCols, int numOrderCols,
Cost input_startup_cost, Cost input_total_cost,
double input_tuples)
{
Cost startup_cost;
Cost total_cost;
startup_cost = input_startup_cost;
total_cost = input_total_cost;
/*
* We charge one cpu_operator_cost per window function per tuple (often a
* drastic underestimate, but without a way to gauge how many tuples the
* window function will fetch, it's hard to do better). We also charge
* cpu_operator_cost per grouping column per tuple for grouping
* comparisons, plus cpu_tuple_cost per tuple for general overhead.
*/
total_cost += cpu_operator_cost * input_tuples * numWindowFuncs;
total_cost += cpu_operator_cost * input_tuples * (numPartCols + numOrderCols);
total_cost += cpu_tuple_cost * input_tuples;
path->startup_cost = startup_cost;
path->total_cost = total_cost;
}
/*
* cost_group
* Determines and returns the cost of performing a Group plan node,
@ -2155,6 +2189,11 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
* Vars and Consts are charged zero, and so are boolean operators (AND,
* OR, NOT). Simplistic, but a lot better than no model at all.
*
* Note that Aggref and WindowFunc nodes are (and should be) treated
* like Vars --- whatever execution cost they have is absorbed into
* plan-node-specific costing. As far as expression evaluation is
* concerned they're just like Vars.
*
* Should we try to account for the possibility of short-circuit
* evaluation of AND/OR? Probably *not*, because that would make the
* results depend on the clause ordering, and we are not in any position

View File

@ -10,7 +10,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/equivclass.c,v 1.14 2008/12/01 21:06:13 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/path/equivclass.c,v 1.15 2008/12/28 18:53:56 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -438,14 +438,16 @@ get_eclass_for_sort_expr(PlannerInfo *root,
/*
* add_eq_member doesn't check for volatile functions, set-returning
* functions, or aggregates, but such could appear in sort expressions; so
* we have to check whether its const-marking was correct.
* functions, aggregates, or window functions, but such could appear
* in sort expressions; so we have to check whether its const-marking
* was correct.
*/
if (newec->ec_has_const)
{
if (newec->ec_has_volatile ||
expression_returns_set((Node *) expr) ||
contain_agg_clause((Node *) expr))
contain_agg_clause((Node *) expr) ||
contain_window_function((Node *) expr))
{
newec->ec_has_const = false;
newem->em_is_const = false;

View File

@ -10,7 +10,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.252 2008/11/20 19:52:54 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.253 2008/12/28 18:53:56 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -3237,8 +3237,8 @@ make_agg(PlannerInfo *root, List *tlist, List *qual,
* anything for Aggref nodes; this is okay since they are really
* comparable to Vars.
*
* See notes in grouping_planner about why this routine and make_group are
* the only ones in this file that worry about tlist eval cost.
* See notes in grouping_planner about why only make_agg, make_windowagg
* and make_group worry about tlist eval cost.
*/
if (qual)
{
@ -3260,6 +3260,53 @@ make_agg(PlannerInfo *root, List *tlist, List *qual,
return node;
}
WindowAgg *
make_windowagg(PlannerInfo *root, List *tlist, int numWindowFuncs,
int partNumCols, AttrNumber *partColIdx, Oid *partOperators,
int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators,
Plan *lefttree)
{
WindowAgg *node = makeNode(WindowAgg);
Plan *plan = &node->plan;
Path windowagg_path; /* dummy for result of cost_windowagg */
QualCost qual_cost;
node->partNumCols = partNumCols;
node->partColIdx = partColIdx;
node->partOperators = partOperators;
node->ordNumCols = ordNumCols;
node->ordColIdx = ordColIdx;
node->ordOperators = ordOperators;
copy_plan_costsize(plan, lefttree); /* only care about copying size */
cost_windowagg(&windowagg_path, root,
numWindowFuncs, partNumCols, ordNumCols,
lefttree->startup_cost,
lefttree->total_cost,
lefttree->plan_rows);
plan->startup_cost = windowagg_path.startup_cost;
plan->total_cost = windowagg_path.total_cost;
/*
* We also need to account for the cost of evaluation of the tlist.
*
* See notes in grouping_planner about why only make_agg, make_windowagg
* and make_group worry about tlist eval cost.
*/
cost_qual_eval(&qual_cost, tlist, root);
plan->startup_cost += qual_cost.startup;
plan->total_cost += qual_cost.startup;
plan->total_cost += qual_cost.per_tuple * plan->plan_rows;
plan->targetlist = tlist;
plan->lefttree = lefttree;
plan->righttree = NULL;
/* WindowAgg nodes never have a qual clause */
plan->qual = NIL;
return node;
}
Group *
make_group(PlannerInfo *root,
List *tlist,
@ -3300,8 +3347,8 @@ make_group(PlannerInfo *root,
* lower plan level and will only be copied by the Group node. Worth
* fixing?
*
* See notes in grouping_planner about why this routine and make_agg are
* the only ones in this file that worry about tlist eval cost.
* See notes in grouping_planner about why only make_agg, make_windowagg
* and make_group worry about tlist eval cost.
*/
if (qual)
{

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/plan/planagg.c,v 1.43 2008/08/25 22:42:33 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/plan/planagg.c,v 1.44 2008/12/28 18:53:56 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -95,11 +95,11 @@ optimize_minmax_aggregates(PlannerInfo *root, List *tlist, Path *best_path)
/*
* Reject unoptimizable cases.
*
* We don't handle GROUP BY, because our current implementations of
* grouping require looking at all the rows anyway, and so there's not
* much point in optimizing MIN/MAX.
* We don't handle GROUP BY or windowing, because our current
* implementations of grouping require looking at all the rows anyway,
* and so there's not much point in optimizing MIN/MAX.
*/
if (parse->groupClause)
if (parse->groupClause || parse->hasWindowFuncs)
return NULL;
/*

View File

@ -14,7 +14,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.112 2008/10/22 20:17:51 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.113 2008/12/28 18:53:56 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -67,9 +67,9 @@
* PlannerInfo field and not a passed parameter is that the low-level routines
* in indxpath.c need to see it.)
*
* Note: the PlannerInfo node also includes group_pathkeys, distinct_pathkeys,
* and sort_pathkeys, which like query_pathkeys need to be canonicalized once
* the info is available.
* Note: the PlannerInfo node also includes group_pathkeys, window_pathkeys,
* distinct_pathkeys, and sort_pathkeys, which like query_pathkeys need to be
* canonicalized once the info is available.
*
* tuple_fraction is interpreted as follows:
* 0: expect all tuples to be retrieved (normal case)
@ -121,6 +121,8 @@ query_planner(PlannerInfo *root, List *tlist,
root->query_pathkeys);
root->group_pathkeys = canonicalize_pathkeys(root,
root->group_pathkeys);
root->window_pathkeys = canonicalize_pathkeys(root,
root->window_pathkeys);
root->distinct_pathkeys = canonicalize_pathkeys(root,
root->distinct_pathkeys);
root->sort_pathkeys = canonicalize_pathkeys(root,
@ -228,11 +230,12 @@ query_planner(PlannerInfo *root, List *tlist,
/*
* We have completed merging equivalence sets, so it's now possible to
* convert the requested query_pathkeys to canonical form. Also
* canonicalize the groupClause, distinctClause and sortClause pathkeys
* for use later.
* canonicalize the groupClause, windowClause, distinctClause and
* sortClause pathkeys for use later.
*/
root->query_pathkeys = canonicalize_pathkeys(root, root->query_pathkeys);
root->group_pathkeys = canonicalize_pathkeys(root, root->group_pathkeys);
root->window_pathkeys = canonicalize_pathkeys(root, root->window_pathkeys);
root->distinct_pathkeys = canonicalize_pathkeys(root, root->distinct_pathkeys);
root->sort_pathkeys = canonicalize_pathkeys(root, root->sort_pathkeys);
@ -287,10 +290,12 @@ query_planner(PlannerInfo *root, List *tlist,
* If both GROUP BY and ORDER BY are specified, we will need two
* levels of sort --- and, therefore, certainly need to read all the
* tuples --- unless ORDER BY is a subset of GROUP BY. Likewise if
* we have both DISTINCT and GROUP BY.
* we have both DISTINCT and GROUP BY, or if we have a window
* specification not compatible with the GROUP BY.
*/
if (!pathkeys_contained_in(root->sort_pathkeys, root->group_pathkeys) ||
!pathkeys_contained_in(root->distinct_pathkeys, root->group_pathkeys))
!pathkeys_contained_in(root->distinct_pathkeys, root->group_pathkeys) ||
!pathkeys_contained_in(root->window_pathkeys, root->group_pathkeys))
tuple_fraction = 0.0;
}
else if (parse->hasAggs || root->hasHavingQual)

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.247 2008/12/18 18:20:33 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.248 2008/12/28 18:53:56 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -82,6 +82,18 @@ static void locate_grouping_columns(PlannerInfo *root,
List *sub_tlist,
AttrNumber *groupColIdx);
static List *postprocess_setop_tlist(List *new_tlist, List *orig_tlist);
static List *select_active_windows(PlannerInfo *root, WindowFuncLists *wflists);
static List *make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc,
List *tlist, bool canonicalize);
static void get_column_info_for_window(PlannerInfo *root, WindowClause *wc,
List *tlist,
int numSortCols, AttrNumber *sortColIdx,
int *partNumCols,
AttrNumber **partColIdx,
Oid **partOperators,
int *ordNumCols,
AttrNumber **ordColIdx,
Oid **ordOperators);
/*****************************************************************************
@ -852,6 +864,8 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
AggClauseCounts agg_counts;
int numGroupCols;
bool use_hashed_grouping = false;
WindowFuncLists *wflists = NULL;
List *activeWindows = NIL;
MemSet(&agg_counts, 0, sizeof(AggClauseCounts));
@ -866,6 +880,22 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
/* Preprocess targetlist */
tlist = preprocess_targetlist(root, tlist);
/*
* Locate any window functions in the tlist. (We don't need to look
* anywhere else, since expressions used in ORDER BY will be in there
* too.) Note that they could all have been eliminated by constant
* folding, in which case we don't need to do any more work.
*/
if (parse->hasWindowFuncs)
{
wflists = find_window_functions((Node *) tlist,
list_length(parse->windowClause));
if (wflists->numWindowFuncs > 0)
activeWindows = select_active_windows(root, wflists);
else
parse->hasWindowFuncs = false;
}
/*
* Generate appropriate target list for subplan; may be different from
* tlist if grouping or aggregation is needed.
@ -890,6 +920,19 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
else
root->group_pathkeys = NIL;
/* We consider only the first (bottom) window in pathkeys logic */
if (activeWindows != NIL)
{
WindowClause *wc = (WindowClause *) linitial(activeWindows);
root->window_pathkeys = make_pathkeys_for_window(root,
wc,
tlist,
false);
}
else
root->window_pathkeys = NIL;
if (parse->distinctClause &&
grouping_is_sortable(parse->distinctClause))
root->distinct_pathkeys =
@ -927,11 +970,12 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
* Figure out whether we want a sorted result from query_planner.
*
* If we have a sortable GROUP BY clause, then we want a result sorted
* properly for grouping. Otherwise, if there's a sortable DISTINCT
* clause that's more rigorous than the ORDER BY clause, we try to
* produce output that's sufficiently well sorted for the DISTINCT.
* Otherwise, if there is an ORDER BY clause, we want to sort by the
* ORDER BY clause.
* properly for grouping. Otherwise, if we have window functions to
* evaluate, we try to sort for the first window. Otherwise, if
* there's a sortable DISTINCT clause that's more rigorous than the
* ORDER BY clause, we try to produce output that's sufficiently well
* sorted for the DISTINCT. Otherwise, if there is an ORDER BY
* clause, we want to sort by the ORDER BY clause.
*
* Note: if we have both ORDER BY and GROUP BY, and ORDER BY is a
* superset of GROUP BY, it would be tempting to request sort by ORDER
@ -942,6 +986,8 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
*/
if (root->group_pathkeys)
root->query_pathkeys = root->group_pathkeys;
else if (root->window_pathkeys)
root->query_pathkeys = root->window_pathkeys;
else if (list_length(root->distinct_pathkeys) >
list_length(root->sort_pathkeys))
root->query_pathkeys = root->distinct_pathkeys;
@ -1092,10 +1138,10 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
*
* Below this point, any tlist eval cost for added-on nodes
* should be accounted for as we create those nodes.
* Presently, of the node types we can add on, only Agg and
* Group project new tlists (the rest just copy their input
* tuples) --- so make_agg() and make_group() are responsible
* for computing the added cost.
* Presently, of the node types we can add on, only Agg,
* WindowAgg, and Group project new tlists (the rest just copy
* their input tuples) --- so make_agg(), make_windowagg() and
* make_group() are responsible for computing the added cost.
*/
cost_qual_eval(&tlist_cost, sub_tlist, root);
result_plan->startup_cost += tlist_cost.startup;
@ -1225,6 +1271,142 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
NULL);
}
} /* end of non-minmax-aggregate case */
/*
* Since each window function could require a different sort order,
* we stack up a WindowAgg node for each window, with sort steps
* between them as needed.
*/
if (activeWindows)
{
List *window_tlist;
ListCell *l;
/*
* If the top-level plan node is one that cannot do expression
* evaluation, we must insert a Result node to project the
* desired tlist. (In some cases this might not really be
* required, but it's not worth trying to avoid it.) Note that
* on second and subsequent passes through the following loop,
* the top-level node will be a WindowAgg which we know can
* project; so we only need to check once.
*/
if (!is_projection_capable_plan(result_plan))
{
result_plan = (Plan *) make_result(root,
NIL,
NULL,
result_plan);
}
/*
* The "base" targetlist for all steps of the windowing process
* is a flat tlist of all Vars and Aggs needed in the result.
* (In some cases we wouldn't need to propagate all of these
* all the way to the top, since they might only be needed as
* inputs to WindowFuncs. It's probably not worth trying to
* optimize that though.) As we climb up the stack, we add
* outputs for the WindowFuncs computed at each level. Also,
* each input tlist has to present all the columns needed to
* sort the data for the next WindowAgg step. That's handled
* internally by make_sort_from_pathkeys, but we need the
* copyObject steps here to ensure that each plan node has
* a separately modifiable tlist.
*/
window_tlist = flatten_tlist(tlist);
if (parse->hasAggs)
window_tlist = add_to_flat_tlist(window_tlist,
pull_agg_clause((Node *) tlist));
result_plan->targetlist = (List *) copyObject(window_tlist);
foreach(l, activeWindows)
{
WindowClause *wc = (WindowClause *) lfirst(l);
List *window_pathkeys;
int partNumCols;
AttrNumber *partColIdx;
Oid *partOperators;
int ordNumCols;
AttrNumber *ordColIdx;
Oid *ordOperators;
window_pathkeys = make_pathkeys_for_window(root,
wc,
tlist,
true);
/*
* This is a bit tricky: we build a sort node even if we don't
* really have to sort. Even when no explicit sort is needed,
* we need to have suitable resjunk items added to the input
* plan's tlist for any partitioning or ordering columns that
* aren't plain Vars. Furthermore, this way we can use
* existing infrastructure to identify which input columns are
* the interesting ones.
*/
if (window_pathkeys)
{
Sort *sort_plan;
sort_plan = make_sort_from_pathkeys(root,
result_plan,
window_pathkeys,
-1.0);
if (!pathkeys_contained_in(window_pathkeys,
current_pathkeys))
{
/* we do indeed need to sort */
result_plan = (Plan *) sort_plan;
current_pathkeys = window_pathkeys;
}
/* In either case, extract the per-column information */
get_column_info_for_window(root, wc, tlist,
sort_plan->numCols,
sort_plan->sortColIdx,
&partNumCols,
&partColIdx,
&partOperators,
&ordNumCols,
&ordColIdx,
&ordOperators);
}
else
{
/* empty window specification, nothing to sort */
partNumCols = 0;
partColIdx = NULL;
partOperators = NULL;
ordNumCols = 0;
ordColIdx = NULL;
ordOperators = NULL;
}
if (lnext(l))
{
/* Add the current WindowFuncs to the running tlist */
window_tlist = add_to_flat_tlist(window_tlist,
wflists->windowFuncs[wc->winref]);
}
else
{
/* Install the original tlist in the topmost WindowAgg */
window_tlist = tlist;
}
/* ... and make the WindowAgg plan node */
result_plan = (Plan *)
make_windowagg(root,
(List *) copyObject(window_tlist),
list_length(wflists->windowFuncs[wc->winref]),
partNumCols,
partColIdx,
partOperators,
ordNumCols,
ordColIdx,
ordOperators,
result_plan);
}
}
} /* end of if (setOperations) */
/*
@ -2030,7 +2212,8 @@ make_subplanTargetList(PlannerInfo *root,
* If we're not grouping or aggregating, there's nothing to do here;
* query_planner should receive the unmodified target list.
*/
if (!parse->hasAggs && !parse->groupClause && !root->hasHavingQual)
if (!parse->hasAggs && !parse->groupClause && !root->hasHavingQual &&
!parse->hasWindowFuncs)
{
*need_tlist_eval = true;
return tlist;
@ -2039,7 +2222,9 @@ make_subplanTargetList(PlannerInfo *root,
/*
* Otherwise, start with a "flattened" tlist (having just the vars
* mentioned in the targetlist and HAVING qual --- but not upper-level
* Vars; they will be replaced by Params later on).
* Vars; they will be replaced by Params later on). Note this includes
* vars used in resjunk items, so we are covering the needs of ORDER BY
* and window specifications.
*/
sub_tlist = flatten_tlist(tlist);
extravars = pull_var_clause(parse->havingQual, true);
@ -2066,7 +2251,7 @@ make_subplanTargetList(PlannerInfo *root,
{
SortGroupClause *grpcl = (SortGroupClause *) lfirst(gl);
Node *groupexpr = get_sortgroupclause_expr(grpcl, tlist);
TargetEntry *te = NULL;
TargetEntry *te;
/*
* Find or make a matching sub_tlist entry. If the groupexpr
@ -2074,20 +2259,10 @@ make_subplanTargetList(PlannerInfo *root,
* won't make multiple groupClause entries for the same TLE.)
*/
if (groupexpr && IsA(groupexpr, Var))
{
ListCell *sl;
te = tlist_member(groupexpr, sub_tlist);
else
te = NULL;
foreach(sl, sub_tlist)
{
TargetEntry *lte = (TargetEntry *) lfirst(sl);
if (equal(groupexpr, lte->expr))
{
te = lte;
break;
}
}
}
if (!te)
{
te = makeTargetEntry((Expr *) groupexpr,
@ -2112,7 +2287,7 @@ make_subplanTargetList(PlannerInfo *root,
*
* This is only needed if we don't use the sub_tlist chosen by
* make_subplanTargetList. We have to forget the column indexes found
* by that routine and re-locate the grouping vars in the real sub_tlist.
* by that routine and re-locate the grouping exprs in the real sub_tlist.
*/
static void
locate_grouping_columns(PlannerInfo *root,
@ -2137,18 +2312,10 @@ locate_grouping_columns(PlannerInfo *root,
{
SortGroupClause *grpcl = (SortGroupClause *) lfirst(gl);
Node *groupexpr = get_sortgroupclause_expr(grpcl, tlist);
TargetEntry *te = NULL;
ListCell *sl;
TargetEntry *te = tlist_member(groupexpr, sub_tlist);
foreach(sl, sub_tlist)
{
te = (TargetEntry *) lfirst(sl);
if (equal(groupexpr, te->expr))
break;
}
if (!sl)
if (!te)
elog(ERROR, "failed to locate grouping columns");
groupColIdx[keyno++] = te->resno;
}
}
@ -2190,3 +2357,219 @@ postprocess_setop_tlist(List *new_tlist, List *orig_tlist)
elog(ERROR, "resjunk output columns are not implemented");
return new_tlist;
}
/*
* select_active_windows
* Create a list of the "active" window clauses (ie, those referenced
* by non-deleted WindowFuncs) in the order they are to be executed.
*/
static List *
select_active_windows(PlannerInfo *root, WindowFuncLists *wflists)
{
List *result;
List *actives;
ListCell *lc;
/* First, make a list of the active windows */
actives = NIL;
foreach(lc, root->parse->windowClause)
{
WindowClause *wc = (WindowClause *) lfirst(lc);
/* It's only active if wflists shows some related WindowFuncs */
Assert(wc->winref <= wflists->maxWinRef);
if (wflists->windowFuncs[wc->winref] != NIL)
actives = lappend(actives, wc);
}
/*
* Now, ensure that windows with identical partitioning/ordering clauses
* are adjacent in the list. This is required by the SQL standard, which
* says that only one sort is to be used for such windows, even if they
* are otherwise distinct (eg, different names or framing clauses).
*
* There is room to be much smarter here, for example detecting whether
* one window's sort keys are a prefix of another's (so that sorting
* for the latter would do for the former), or putting windows first
* that match a sort order available for the underlying query. For the
* moment we are content with meeting the spec.
*/
result = NIL;
while (actives != NIL)
{
WindowClause *wc = (WindowClause *) linitial(actives);
ListCell *prev;
ListCell *next;
/* Move wc from actives to result */
actives = list_delete_first(actives);
result = lappend(result, wc);
/* Now move any matching windows from actives to result */
prev = NULL;
for (lc = list_head(actives); lc; lc = next)
{
WindowClause *wc2 = (WindowClause *) lfirst(lc);
next = lnext(lc);
if (equal(wc->partitionClause, wc2->partitionClause) &&
equal(wc->orderClause, wc2->orderClause))
{
actives = list_delete_cell(actives, lc, prev);
result = lappend(result, wc2);
}
else
prev = lc;
}
}
return result;
}
/*
* make_pathkeys_for_window
* Create a pathkeys list describing the required input ordering
* for the given WindowClause.
*
* The required ordering is first the PARTITION keys, then the ORDER keys.
* In the future we might try to implement windowing using hashing, in which
* case the ordering could be relaxed, but for now we always sort.
*/
static List *
make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc,
List *tlist, bool canonicalize)
{
List *window_pathkeys;
List *window_sortclauses;
/* Throw error if can't sort */
if (!grouping_is_sortable(wc->partitionClause))
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("could not implement window PARTITION BY"),
errdetail("Window partitioning columns must be of sortable datatypes.")));
if (!grouping_is_sortable(wc->orderClause))
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("could not implement window ORDER BY"),
errdetail("Window ordering columns must be of sortable datatypes.")));
/* Okay, make the combined pathkeys */
window_sortclauses = list_concat(list_copy(wc->partitionClause),
list_copy(wc->orderClause));
window_pathkeys = make_pathkeys_for_sortclauses(root,
window_sortclauses,
tlist,
canonicalize);
list_free(window_sortclauses);
return window_pathkeys;
}
/*----------
* get_column_info_for_window
* Get the partitioning/ordering column numbers and equality operators
* for a WindowAgg node.
*
* This depends on the behavior of make_pathkeys_for_window()!
*
* We are given the target WindowClause and an array of the input column
* numbers associated with the resulting pathkeys. In the easy case, there
* are the same number of pathkey columns as partitioning + ordering columns
* and we just have to copy some data around. However, it's possible that
* some of the original partitioning + ordering columns were eliminated as
* redundant during the transformation to pathkeys. (This can happen even
* though the parser gets rid of obvious duplicates. A typical scenario is a
* window specification "PARTITION BY x ORDER BY y" coupled with a clause
* "WHERE x = y" that causes the two sort columns to be recognized as
* redundant.) In that unusual case, we have to work a lot harder to
* determine which keys are significant.
*
* The method used here is a bit brute-force: add the sort columns to a list
* one at a time and note when the resulting pathkey list gets longer. But
* it's a sufficiently uncommon case that a faster way doesn't seem worth
* the amount of code refactoring that'd be needed.
*----------
*/
static void
get_column_info_for_window(PlannerInfo *root, WindowClause *wc, List *tlist,
int numSortCols, AttrNumber *sortColIdx,
int *partNumCols,
AttrNumber **partColIdx,
Oid **partOperators,
int *ordNumCols,
AttrNumber **ordColIdx,
Oid **ordOperators)
{
int numPart = list_length(wc->partitionClause);
int numOrder = list_length(wc->orderClause);
if (numSortCols == numPart + numOrder)
{
/* easy case */
*partNumCols = numPart;
*partColIdx = sortColIdx;
*partOperators = extract_grouping_ops(wc->partitionClause);
*ordNumCols = numOrder;
*ordColIdx = sortColIdx + numPart;
*ordOperators = extract_grouping_ops(wc->orderClause);
}
else
{
List *sortclauses;
List *pathkeys;
int scidx;
ListCell *lc;
/* first, allocate what's certainly enough space for the arrays */
*partNumCols = 0;
*partColIdx = (AttrNumber *) palloc(numPart * sizeof(AttrNumber));
*partOperators = (Oid *) palloc(numPart * sizeof(Oid));
*ordNumCols = 0;
*ordColIdx = (AttrNumber *) palloc(numOrder * sizeof(AttrNumber));
*ordOperators = (Oid *) palloc(numOrder * sizeof(Oid));
sortclauses = NIL;
pathkeys = NIL;
scidx = 0;
foreach(lc, wc->partitionClause)
{
SortGroupClause *sgc = (SortGroupClause *) lfirst(lc);
List *new_pathkeys;
sortclauses = lappend(sortclauses, sgc);
new_pathkeys = make_pathkeys_for_sortclauses(root,
sortclauses,
tlist,
true);
if (list_length(new_pathkeys) > list_length(pathkeys))
{
/* this sort clause is actually significant */
*partColIdx[*partNumCols] = sortColIdx[scidx++];
*partOperators[*partNumCols] = sgc->eqop;
(*partNumCols)++;
pathkeys = new_pathkeys;
}
}
foreach(lc, wc->orderClause)
{
SortGroupClause *sgc = (SortGroupClause *) lfirst(lc);
List *new_pathkeys;
sortclauses = lappend(sortclauses, sgc);
new_pathkeys = make_pathkeys_for_sortclauses(root,
sortclauses,
tlist,
true);
if (list_length(new_pathkeys) > list_length(pathkeys))
{
/* this sort clause is actually significant */
*ordColIdx[*ordNumCols] = sortColIdx[scidx++];
*ordOperators[*ordNumCols] = sgc->eqop;
(*ordNumCols)++;
pathkeys = new_pathkeys;
}
}
/* complain if we didn't eat exactly the right number of sort cols */
if (scidx != numSortCols)
elog(ERROR, "failed to deconstruct sort operators into partitioning/ordering operators");
}
}

View File

@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/plan/setrefs.c,v 1.146 2008/10/21 20:42:53 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/plan/setrefs.c,v 1.147 2008/12/28 18:53:57 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -415,6 +415,7 @@ set_plan_refs(PlannerGlobal *glob, Plan *plan, int rtoffset)
}
break;
case T_Agg:
case T_WindowAgg:
case T_Group:
set_upper_references(glob, plan, rtoffset);
break;
@ -679,6 +680,11 @@ fix_expr_common(PlannerGlobal *glob, Node *node)
record_plan_function_dependency(glob,
((Aggref *) node)->aggfnoid);
}
else if (IsA(node, WindowFunc))
{
record_plan_function_dependency(glob,
((WindowFunc *) node)->winfnoid);
}
else if (IsA(node, FuncExpr))
{
record_plan_function_dependency(glob,

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.143 2008/12/08 00:16:09 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.144 2008/12/28 18:53:57 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -1243,6 +1243,7 @@ simplify_EXISTS_query(Query *query)
query->intoClause ||
query->setOperations ||
query->hasAggs ||
query->hasWindowFuncs ||
query->havingQual ||
query->limitOffset ||
query->limitCount ||
@ -1258,13 +1259,14 @@ simplify_EXISTS_query(Query *query)
/*
* Otherwise, we can throw away the targetlist, as well as any GROUP,
* DISTINCT, and ORDER BY clauses; none of those clauses will change
* a nonzero-rows result to zero rows or vice versa. (Furthermore,
* WINDOW, DISTINCT, and ORDER BY clauses; none of those clauses will
* change a nonzero-rows result to zero rows or vice versa. (Furthermore,
* since our parsetree representation of these clauses depends on the
* targetlist, we'd better throw them away if we drop the targetlist.)
*/
query->targetList = NIL;
query->groupClause = NIL;
query->windowClause = NIL;
query->distinctClause = NIL;
query->sortClause = NIL;
query->hasDistinctOn = false;
@ -1321,8 +1323,8 @@ convert_EXISTS_to_ANY(PlannerInfo *root, Query *subselect,
* The rest of the sub-select must not refer to any Vars of the parent
* query. (Vars of higher levels should be okay, though.)
*
* Note: we need not check for Aggs separately because we know the
* sub-select is as yet unoptimized; any uplevel Agg must therefore
* Note: we need not check for Aggrefs separately because we know the
* sub-select is as yet unoptimized; any uplevel Aggref must therefore
* contain an uplevel Var reference. This is not the case below ...
*/
if (contain_vars_of_level((Node *) subselect, 1))
@ -1432,7 +1434,8 @@ convert_EXISTS_to_ANY(PlannerInfo *root, Query *subselect,
/*
* And there can't be any child Vars in the stuff we intend to pull up.
* (Note: we'd need to check for child Aggs too, except we know the
* child has no aggs at all because of simplify_EXISTS_query's check.)
* child has no aggs at all because of simplify_EXISTS_query's check.
* The same goes for window functions.)
*/
if (contain_vars_of_level((Node *) leftargs, 0))
return NULL;
@ -1955,6 +1958,7 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params)
case T_RecursiveUnion:
case T_Hash:
case T_Agg:
case T_WindowAgg:
case T_SeqScan:
case T_Material:
case T_Sort:

View File

@ -16,7 +16,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/prep/prepjointree.c,v 1.60 2008/11/11 19:05:21 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/prep/prepjointree.c,v 1.61 2008/12/28 18:53:57 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -742,7 +742,10 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte,
* Miscellaneous housekeeping.
*/
parse->hasSubLinks |= subquery->hasSubLinks;
/* subquery won't be pulled up if it hasAggs, so no work there */
/*
* subquery won't be pulled up if it hasAggs or hasWindowFuncs, so no
* work needed on those flags
*/
/*
* Return the adjusted subquery jointree to replace the RangeTblRef entry
@ -931,6 +934,7 @@ is_simple_subquery(Query *subquery)
* limiting, or WITH. (XXX WITH could possibly be allowed later)
*/
if (subquery->hasAggs ||
subquery->hasWindowFuncs ||
subquery->groupClause ||
subquery->havingQual ||
subquery->sortClause ||

View File

@ -22,7 +22,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.162 2008/11/15 19:43:46 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.163 2008/12/28 18:53:57 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -136,6 +136,7 @@ plan_set_operations(PlannerInfo *root, double tuple_fraction,
Assert(parse->jointree->quals == NULL);
Assert(parse->groupClause == NIL);
Assert(parse->havingQual == NULL);
Assert(parse->windowClause == NIL);
Assert(parse->distinctClause == NIL);
/*

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.271 2008/12/18 18:20:34 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.272 2008/12/28 18:53:57 tgl Exp $
*
* HISTORY
* AUTHOR DATE MAJOR EVENT
@ -72,7 +72,9 @@ typedef struct
} substitute_actual_srf_parameters_context;
static bool contain_agg_clause_walker(Node *node, void *context);
static bool pull_agg_clause_walker(Node *node, List **context);
static bool count_agg_clauses_walker(Node *node, AggClauseCounts *counts);
static bool find_window_functions_walker(Node *node, WindowFuncLists *lists);
static bool expression_returns_set_rows_walker(Node *node, double *count);
static bool contain_subplans_walker(Node *node, void *context);
static bool contain_mutable_functions_walker(Node *node, void *context);
@ -388,6 +390,41 @@ contain_agg_clause_walker(Node *node, void *context)
return expression_tree_walker(node, contain_agg_clause_walker, context);
}
/*
* pull_agg_clause
* Recursively search for Aggref nodes within a clause.
*
* Returns a List of all Aggrefs found.
*
* This does not descend into subqueries, and so should be used only after
* reduction of sublinks to subplans, or in contexts where it's known there
* are no subqueries. There mustn't be outer-aggregate references either.
*/
List *
pull_agg_clause(Node *clause)
{
List *result = NIL;
(void) pull_agg_clause_walker(clause, &result);
return result;
}
static bool
pull_agg_clause_walker(Node *node, List **context)
{
if (node == NULL)
return false;
if (IsA(node, Aggref))
{
Assert(((Aggref *) node)->agglevelsup == 0);
*context = lappend(*context, node);
return false; /* no need to descend into arguments */
}
Assert(!IsA(node, SubLink));
return expression_tree_walker(node, pull_agg_clause_walker,
(void *) context);
}
/*
* count_agg_clauses
* Recursively count the Aggref nodes in an expression tree.
@ -519,6 +556,79 @@ count_agg_clauses_walker(Node *node, AggClauseCounts *counts)
}
/*****************************************************************************
* Window-function clause manipulation
*****************************************************************************/
/*
* contain_window_function
* Recursively search for WindowFunc nodes within a clause.
*
* Since window functions don't have level fields, but are hard-wired to
* be associated with the current query level, this is just the same as
* rewriteManip.c's function.
*/
bool
contain_window_function(Node *clause)
{
return checkExprHasWindowFuncs(clause);
}
/*
* find_window_functions
* Locate all the WindowFunc nodes in an expression tree, and organize
* them by winref ID number.
*
* Caller must provide an upper bound on the winref IDs expected in the tree.
*/
WindowFuncLists *
find_window_functions(Node *clause, Index maxWinRef)
{
WindowFuncLists *lists = palloc(sizeof(WindowFuncLists));
lists->numWindowFuncs = 0;
lists->maxWinRef = maxWinRef;
lists->windowFuncs = (List **) palloc0((maxWinRef + 1) * sizeof(List *));
(void) find_window_functions_walker(clause, lists);
return lists;
}
static bool
find_window_functions_walker(Node *node, WindowFuncLists *lists)
{
if (node == NULL)
return false;
if (IsA(node, WindowFunc))
{
WindowFunc *wfunc = (WindowFunc *) node;
/* winref is unsigned, so one-sided test is OK */
if (wfunc->winref > lists->maxWinRef)
elog(ERROR, "WindowFunc contains out-of-range winref %u",
wfunc->winref);
lists->windowFuncs[wfunc->winref] =
lappend(lists->windowFuncs[wfunc->winref], wfunc);
lists->numWindowFuncs++;
/*
* Complain if the window function's arguments contain window functions
*/
if (contain_window_function((Node *) wfunc->args))
ereport(ERROR,
(errcode(ERRCODE_WINDOWING_ERROR),
errmsg("window function calls cannot be nested")));
/*
* Having checked that, we need not recurse into the argument.
*/
return false;
}
Assert(!IsA(node, SubLink));
return expression_tree_walker(node, find_window_functions_walker,
(void *) lists);
}
/*****************************************************************************
* Support for expressions returning sets
*****************************************************************************/
@ -567,6 +677,8 @@ expression_returns_set_rows_walker(Node *node, double *count)
/* Avoid recursion for some cases that can't return a set */
if (IsA(node, Aggref))
return false;
if (IsA(node, WindowFunc))
return false;
if (IsA(node, DistinctExpr))
return false;
if (IsA(node, ScalarArrayOpExpr))
@ -897,6 +1009,11 @@ contain_nonstrict_functions_walker(Node *node, void *context)
/* an aggregate could return non-null with null input */
return true;
}
if (IsA(node, WindowFunc))
{
/* a window function could return non-null with null input */
return true;
}
if (IsA(node, ArrayRef))
{
/* array assignment is nonstrict, but subscripting is strict */
@ -1589,7 +1706,8 @@ is_strict_saop(ScalarArrayOpExpr *expr, bool falseOK)
* not-constant expressions, namely aggregates (Aggrefs). In current usage
* this is only applied to WHERE clauses and so a check for Aggrefs would be
* a waste of cycles; but be sure to also check contain_agg_clause() if you
* want to know about pseudo-constness in other contexts.
* want to know about pseudo-constness in other contexts. The same goes
* for window functions (WindowFuncs).
*/
bool
is_pseudo_constant_clause(Node *clause)
@ -3472,6 +3590,7 @@ inline_function(Oid funcid, Oid result_type, List *args,
querytree->utilityStmt ||
querytree->intoClause ||
querytree->hasAggs ||
querytree->hasWindowFuncs ||
querytree->hasSubLinks ||
querytree->cteList ||
querytree->rtable ||
@ -3479,6 +3598,7 @@ inline_function(Oid funcid, Oid result_type, List *args,
querytree->jointree->quals ||
querytree->groupClause ||
querytree->havingQual ||
querytree->windowClause ||
querytree->distinctClause ||
querytree->sortClause ||
querytree->limitOffset ||

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/util/tlist.c,v 1.83 2008/10/21 20:42:53 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/util/tlist.c,v 1.84 2008/12/28 18:53:57 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -101,28 +101,28 @@ flatten_tlist(List *tlist)
/*
* add_to_flat_tlist
* Add more vars to a flattened tlist (if they're not already in it)
* Add more items to a flattened tlist (if they're not already in it)
*
* 'tlist' is the flattened tlist
* 'vars' is a list of Var and/or PlaceHolderVar nodes
* 'exprs' is a list of expressions (usually, but not necessarily, Vars)
*
* Returns the extended tlist.
*/
List *
add_to_flat_tlist(List *tlist, List *vars)
add_to_flat_tlist(List *tlist, List *exprs)
{
int next_resno = list_length(tlist) + 1;
ListCell *v;
ListCell *lc;
foreach(v, vars)
foreach(lc, exprs)
{
Node *var = (Node *) lfirst(v);
Node *expr = (Node *) lfirst(lc);
if (!tlist_member(var, tlist))
if (!tlist_member(expr, tlist))
{
TargetEntry *tle;
tle = makeTargetEntry(copyObject(var), /* copy needed?? */
tle = makeTargetEntry(copyObject(expr), /* copy needed?? */
next_resno++,
NULL,
false);