1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-03 20:02:46 +03:00

Make the upper part of the planner work by generating and comparing Paths.

I've been saying we needed to do this for more than five years, and here it
finally is.  This patch removes the ever-growing tangle of spaghetti logic
that grouping_planner() used to use to try to identify the best plan for
post-scan/join query steps.  Now, there is (nearly) independent
consideration of each execution step, and entirely separate construction of
Paths to represent each of the possible ways to do that step.  We choose
the best Path or set of Paths using the same add_path() logic that's been
used inside query_planner() for years.

In addition, this patch removes the old restriction that subquery_planner()
could return only a single Plan.  It now returns a RelOptInfo containing a
set of Paths, just as query_planner() does, and the parent query level can
use each of those Paths as the basis of a SubqueryScanPath at its level.
This allows finding some optimizations that we missed before, wherein a
subquery was capable of returning presorted data and thereby avoiding a
sort in the parent level, making the overall cost cheaper even though
delivering sorted output was not the cheapest plan for the subquery in
isolation.  (A couple of regression test outputs change in consequence of
that.  However, there is very little change in visible planner behavior
overall, because the point of this patch is not to get immediate planning
benefits but to create the infrastructure for future improvements.)

There is a great deal left to do here.  This patch unblocks a lot of
planner work that was basically impractical in the old code structure,
such as allowing FDWs to implement remote aggregation, or rewriting
plan_set_operations() to allow consideration of multiple implementation
orders for set operations.  (The latter will likely require a full
rewrite of plan_set_operations(); what I've done here is only to fix it
to return Paths not Plans.)  I have also left unfinished some localized
refactoring in createplan.c and planner.c, because it was not necessary
to get this patch to a working state.

Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
This commit is contained in:
Tom Lane
2016-03-07 15:58:22 -05:00
parent b642e50aea
commit 3fc6e2d7f5
35 changed files with 5591 additions and 3126 deletions

View File

@ -37,6 +37,7 @@
#include "optimizer/planner.h"
#include "optimizer/prep.h"
#include "optimizer/restrictinfo.h"
#include "optimizer/tlist.h"
#include "optimizer/var.h"
#include "parser/parse_clause.h"
#include "parser/parsetree.h"
@ -97,7 +98,6 @@ static Path *get_cheapest_parameterized_child_path(PlannerInfo *root,
RelOptInfo *rel,
Relids required_outer);
static List *accumulate_append_subpath(List *subpaths, Path *path);
static void set_dummy_rel_pathlist(RelOptInfo *rel);
static void set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
Index rti, RangeTblEntry *rte);
static void set_function_pathlist(PlannerInfo *root, RelOptInfo *rel,
@ -1507,8 +1507,10 @@ accumulate_append_subpath(List *subpaths, Path *path)
*
* Rather than inventing a special "dummy" path type, we represent this as an
* AppendPath with no members (see also IS_DUMMY_PATH/IS_DUMMY_REL macros).
*
* This is exported because inheritance_planner() has need for it.
*/
static void
void
set_dummy_rel_pathlist(RelOptInfo *rel)
{
/* Set dummy size estimates --- we leave attr_widths[] as zeroes */
@ -1554,15 +1556,15 @@ has_multiple_baserels(PlannerInfo *root)
/*
* set_subquery_pathlist
* Build the (single) access path for a subquery RTE
* Generate SubqueryScan access paths for a subquery RTE
*
* We don't currently support generating parameterized paths for subqueries
* by pushing join clauses down into them; it seems too expensive to re-plan
* the subquery multiple times to consider different alternatives. So the
* subquery will have exactly one path. (The path will be parameterized
* if the subquery contains LATERAL references, otherwise not.) Since there's
* no freedom of action here, there's no need for a separate set_subquery_size
* phase: we just make the path right away.
* the subquery multiple times to consider different alternatives.
* (XXX that could stand to be reconsidered, now that we use Paths.)
* So the paths made here will be parameterized if the subquery contains
* LATERAL references, otherwise not. As long as that's true, there's no need
* for a separate set_subquery_size phase: just make the paths right away.
*/
static void
set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
@ -1573,8 +1575,8 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
Relids required_outer;
pushdown_safety_info safetyInfo;
double tuple_fraction;
PlannerInfo *subroot;
List *pathkeys;
RelOptInfo *sub_final_rel;
ListCell *lc;
/*
* Must copy the Query so that planning doesn't mess up the RTE contents
@ -1685,12 +1687,10 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
/* plan_params should not be in use in current query level */
Assert(root->plan_params == NIL);
/* Generate the plan for the subquery */
rel->subplan = subquery_planner(root->glob, subquery,
/* Generate a subroot and Paths for the subquery */
rel->subroot = subquery_planner(root->glob, subquery,
root,
false, tuple_fraction,
&subroot);
rel->subroot = subroot;
false, tuple_fraction);
/* Isolate the params needed by this specific subplan */
rel->subplan_params = root->plan_params;
@ -1698,23 +1698,44 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
/*
* It's possible that constraint exclusion proved the subquery empty. If
* so, it's convenient to turn it back into a dummy path so that we will
* so, it's desirable to produce an unadorned dummy path so that we will
* recognize appropriate optimizations at this query level.
*/
if (is_dummy_plan(rel->subplan))
sub_final_rel = fetch_upper_rel(rel->subroot, UPPERREL_FINAL, NULL);
if (IS_DUMMY_REL(sub_final_rel))
{
set_dummy_rel_pathlist(rel);
return;
}
/* Mark rel with estimated output rows, width, etc */
/*
* Mark rel with estimated output rows, width, etc. Note that we have to
* do this before generating outer-query paths, else cost_subqueryscan is
* not happy.
*/
set_subquery_size_estimates(root, rel);
/* Convert subquery pathkeys to outer representation */
pathkeys = convert_subquery_pathkeys(root, rel, subroot->query_pathkeys);
/*
* For each Path that subquery_planner produced, make a SubqueryScanPath
* in the outer query.
*/
foreach(lc, sub_final_rel->pathlist)
{
Path *subpath = (Path *) lfirst(lc);
List *pathkeys;
/* Generate appropriate path */
add_path(rel, create_subqueryscan_path(root, rel, pathkeys, required_outer));
/* Convert subpath's pathkeys to outer representation */
pathkeys = convert_subquery_pathkeys(root,
rel,
subpath->pathkeys,
make_tlist_from_pathtarget(subpath->pathtarget));
/* Generate outer path using this subpath */
add_path(rel, (Path *)
create_subqueryscan_path(root, rel, subpath,
pathkeys, required_outer));
}
}
/*
@ -1858,7 +1879,7 @@ set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
cteplan = (Plan *) list_nth(root->glob->subplans, plan_id - 1);
/* Mark rel with estimated output rows, width, etc */
set_cte_size_estimates(root, rel, cteplan);
set_cte_size_estimates(root, rel, cteplan->plan_rows);
/*
* We don't support pushing join clauses into the quals of a CTE scan, but
@ -1881,13 +1902,13 @@ set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
static void
set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
{
Plan *cteplan;
Path *ctepath;
PlannerInfo *cteroot;
Index levelsup;
Relids required_outer;
/*
* We need to find the non-recursive term's plan, which is in the plan
* We need to find the non-recursive term's path, which is in the plan
* level that's processing the recursive UNION, which is one level *below*
* where the CTE comes from.
*/
@ -1902,12 +1923,12 @@ set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
if (!cteroot) /* shouldn't happen */
elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename);
}
cteplan = cteroot->non_recursive_plan;
if (!cteplan) /* shouldn't happen */
elog(ERROR, "could not find plan for CTE \"%s\"", rte->ctename);
ctepath = cteroot->non_recursive_path;
if (!ctepath) /* shouldn't happen */
elog(ERROR, "could not find path for CTE \"%s\"", rte->ctename);
/* Mark rel with estimated output rows, width, etc */
set_cte_size_estimates(root, rel, cteplan);
set_cte_size_estimates(root, rel, ctepath->rows);
/*
* We don't support pushing join clauses into the quals of a worktable
@ -2859,6 +2880,9 @@ print_path(PlannerInfo *root, Path *path, int indent)
case T_TidPath:
ptype = "TidScan";
break;
case T_SubqueryScanPath:
ptype = "SubqueryScanScan";
break;
case T_ForeignPath:
ptype = "ForeignScan";
break;
@ -2883,6 +2907,55 @@ print_path(PlannerInfo *root, Path *path, int indent)
ptype = "Gather";
subpath = ((GatherPath *) path)->subpath;
break;
case T_ProjectionPath:
ptype = "Projection";
subpath = ((ProjectionPath *) path)->subpath;
break;
case T_SortPath:
ptype = "Sort";
subpath = ((SortPath *) path)->subpath;
break;
case T_GroupPath:
ptype = "Group";
subpath = ((GroupPath *) path)->subpath;
break;
case T_UpperUniquePath:
ptype = "UpperUnique";
subpath = ((UpperUniquePath *) path)->subpath;
break;
case T_AggPath:
ptype = "Agg";
subpath = ((AggPath *) path)->subpath;
break;
case T_GroupingSetsPath:
ptype = "GroupingSets";
subpath = ((GroupingSetsPath *) path)->subpath;
break;
case T_MinMaxAggPath:
ptype = "MinMaxAgg";
break;
case T_WindowAggPath:
ptype = "WindowAgg";
subpath = ((WindowAggPath *) path)->subpath;
break;
case T_SetOpPath:
ptype = "SetOp";
subpath = ((SetOpPath *) path)->subpath;
break;
case T_RecursiveUnionPath:
ptype = "RecursiveUnion";
break;
case T_LockRowsPath:
ptype = "LockRows";
subpath = ((LockRowsPath *) path)->subpath;
break;
case T_ModifyTablePath:
ptype = "ModifyTable";
break;
case T_LimitPath:
ptype = "Limit";
subpath = ((LimitPath *) path)->subpath;
break;
case T_NestPath:
ptype = "NestLoop";
join = true;