1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-22 12:22:45 +03:00

Add an explicit representation of the output targetlist to Paths.

Up to now, there's been an assumption that all Paths for a given relation
compute the same output column set (targetlist).  However, there are good
reasons to remove that assumption.  For example, an indexscan on an
expression index might be able to return the value of an expensive function
"for free".  While we have the ability to generate such a plan today in
simple cases, we don't have a way to model that it's cheaper than a plan
that computes the function from scratch, nor a way to create such a plan
in join cases (where the function computation would normally happen at
the topmost join node).  Also, we need this so that we can have Paths
representing post-scan/join steps, where the targetlist may well change
from one step to the next.  Therefore, invent a "struct PathTarget"
representing the columns we expect a plan step to emit.  It's convenient
to include the output tuple width and tlist evaluation cost in this struct,
and there will likely be additional fields in future.

While Path nodes that actually do have custom outputs will need their own
PathTargets, it will still be true that most Paths for a given relation
will compute the same tlist.  To reduce the overhead added by this patch,
keep a "default PathTarget" in RelOptInfo, and allow Paths that compute
that column set to just point to their parent RelOptInfo's reltarget.
(In the patch as committed, actually every Path is like that, since we
do not yet have any cases of custom PathTargets.)

I took this opportunity to provide some more-honest costing of
PlaceHolderVar evaluation.  Up to now, the assumption that "scan/join
reltargetlists have cost zero" was applied not only to Vars, where it's
reasonable, but also PlaceHolderVars where it isn't.  Now, we add the eval
cost of a PlaceHolderVar's expression to the first plan level where it can
be computed, by including it in the PathTarget cost field and adding that
to the cost estimates for Paths.  This isn't perfect yet but it's much
better than before, and there is a way forward to improve it more.  This
costing change affects the join order chosen for a couple of the regression
tests, changing expected row ordering.
This commit is contained in:
Tom Lane
2016-02-18 20:01:49 -05:00
parent 3386f34cdc
commit 19a541143a
18 changed files with 337 additions and 154 deletions

View File

@@ -3489,7 +3489,7 @@ eval_const_expressions_mutator(Node *node,
* can optimize field selection from a RowExpr construct.
*
* However, replacing a whole-row Var in this way has a
* pitfall: if we've already built the reltargetlist for the
* pitfall: if we've already built the rel targetlist for the
* source relation, then the whole-row Var is scheduled to be
* produced by the relation scan, but the simple Var probably
* isn't, which will lead to a failure in setrefs.c. This is

View File

@@ -929,6 +929,7 @@ create_seqscan_path(PlannerInfo *root, RelOptInfo *rel,
pathnode->pathtype = T_SeqScan;
pathnode->parent = rel;
pathnode->pathtarget = &(rel->reltarget);
pathnode->param_info = get_baserel_parampathinfo(root, rel,
required_outer);
pathnode->parallel_aware = parallel_degree > 0 ? true : false;
@@ -952,6 +953,7 @@ create_samplescan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer
pathnode->pathtype = T_SampleScan;
pathnode->parent = rel;
pathnode->pathtarget = &(rel->reltarget);
pathnode->param_info = get_baserel_parampathinfo(root, rel,
required_outer);
pathnode->parallel_aware = false;
@@ -1008,6 +1010,7 @@ create_index_path(PlannerInfo *root,
pathnode->path.pathtype = indexonly ? T_IndexOnlyScan : T_IndexScan;
pathnode->path.parent = rel;
pathnode->path.pathtarget = &(rel->reltarget);
pathnode->path.param_info = get_baserel_parampathinfo(root, rel,
required_outer);
pathnode->path.parallel_aware = false;
@@ -1056,6 +1059,7 @@ create_bitmap_heap_path(PlannerInfo *root,
pathnode->path.pathtype = T_BitmapHeapScan;
pathnode->path.parent = rel;
pathnode->path.pathtarget = &(rel->reltarget);
pathnode->path.param_info = get_baserel_parampathinfo(root, rel,
required_outer);
pathnode->path.parallel_aware = false;
@@ -1085,6 +1089,7 @@ create_bitmap_and_path(PlannerInfo *root,
pathnode->path.pathtype = T_BitmapAnd;
pathnode->path.parent = rel;
pathnode->path.pathtarget = &(rel->reltarget);
pathnode->path.param_info = NULL; /* not used in bitmap trees */
/*
@@ -1120,6 +1125,7 @@ create_bitmap_or_path(PlannerInfo *root,
pathnode->path.pathtype = T_BitmapOr;
pathnode->path.parent = rel;
pathnode->path.pathtarget = &(rel->reltarget);
pathnode->path.param_info = NULL; /* not used in bitmap trees */
/*
@@ -1154,6 +1160,7 @@ create_tidscan_path(PlannerInfo *root, RelOptInfo *rel, List *tidquals,
pathnode->path.pathtype = T_TidScan;
pathnode->path.parent = rel;
pathnode->path.pathtarget = &(rel->reltarget);
pathnode->path.param_info = get_baserel_parampathinfo(root, rel,
required_outer);
pathnode->path.parallel_aware = false;
@@ -1185,6 +1192,7 @@ create_append_path(RelOptInfo *rel, List *subpaths, Relids required_outer,
pathnode->path.pathtype = T_Append;
pathnode->path.parent = rel;
pathnode->path.pathtarget = &(rel->reltarget);
pathnode->path.param_info = get_appendrel_parampathinfo(rel,
required_outer);
pathnode->path.parallel_aware = false;
@@ -1243,6 +1251,7 @@ create_merge_append_path(PlannerInfo *root,
pathnode->path.pathtype = T_MergeAppend;
pathnode->path.parent = rel;
pathnode->path.pathtarget = &(rel->reltarget);
pathnode->path.param_info = get_appendrel_parampathinfo(rel,
required_outer);
pathnode->path.parallel_aware = false;
@@ -1290,7 +1299,7 @@ create_merge_append_path(PlannerInfo *root,
pathkeys,
subpath->total_cost,
subpath->parent->tuples,
subpath->parent->width,
subpath->pathtarget->width,
0.0,
work_mem,
pathnode->limit_tuples);
@@ -1322,7 +1331,8 @@ create_result_path(RelOptInfo *rel, List *quals)
ResultPath *pathnode = makeNode(ResultPath);
pathnode->path.pathtype = T_Result;
pathnode->path.parent = NULL;
pathnode->path.parent = rel;
pathnode->path.pathtarget = &(rel->reltarget);
pathnode->path.param_info = NULL; /* there are no other rels... */
pathnode->path.parallel_aware = false;
pathnode->path.parallel_safe = rel->consider_parallel;
@@ -1339,7 +1349,10 @@ create_result_path(RelOptInfo *rel, List *quals)
* In theory we should include the qual eval cost as well, but at present
* that doesn't accomplish much except duplicate work that will be done
* again in make_result; since this is only used for degenerate cases,
* nothing interesting will be done with the path cost values...
* nothing interesting will be done with the path cost values.
*
* (Likewise, we don't worry about pathtarget->cost since that tlist will
* be empty at this point.)
*/
return pathnode;
@@ -1359,6 +1372,7 @@ create_material_path(RelOptInfo *rel, Path *subpath)
pathnode->path.pathtype = T_Material;
pathnode->path.parent = rel;
pathnode->path.pathtarget = &(rel->reltarget);
pathnode->path.param_info = subpath->param_info;
pathnode->path.parallel_aware = false;
pathnode->path.parallel_safe = subpath->parallel_safe;
@@ -1371,7 +1385,7 @@ create_material_path(RelOptInfo *rel, Path *subpath)
subpath->startup_cost,
subpath->total_cost,
subpath->rows,
rel->width);
subpath->pathtarget->width);
return pathnode;
}
@@ -1422,6 +1436,7 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
pathnode->path.pathtype = T_Unique;
pathnode->path.parent = rel;
pathnode->path.pathtarget = &(rel->reltarget);
pathnode->path.param_info = subpath->param_info;
pathnode->path.parallel_aware = false;
pathnode->path.parallel_safe = subpath->parallel_safe;
@@ -1516,7 +1531,7 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
cost_sort(&sort_path, root, NIL,
subpath->total_cost,
rel->rows,
rel->width,
subpath->pathtarget->width,
0.0,
work_mem,
-1.0);
@@ -1536,7 +1551,7 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
* Estimate the overhead per hashtable entry at 64 bytes (same as in
* planner.c).
*/
int hashentrysize = rel->width + 64;
int hashentrysize = subpath->pathtarget->width + 64;
if (hashentrysize * pathnode->path.rows > work_mem * 1024L)
{
@@ -1607,6 +1622,7 @@ create_gather_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
pathnode->path.pathtype = T_Gather;
pathnode->path.parent = rel;
pathnode->path.pathtarget = &(rel->reltarget);
pathnode->path.param_info = get_baserel_parampathinfo(root, rel,
required_outer);
pathnode->path.parallel_aware = false;
@@ -1672,6 +1688,7 @@ create_subqueryscan_path(PlannerInfo *root, RelOptInfo *rel,
pathnode->pathtype = T_SubqueryScan;
pathnode->parent = rel;
pathnode->pathtarget = &(rel->reltarget);
pathnode->param_info = get_baserel_parampathinfo(root, rel,
required_outer);
pathnode->parallel_aware = false;
@@ -1697,6 +1714,7 @@ create_functionscan_path(PlannerInfo *root, RelOptInfo *rel,
pathnode->pathtype = T_FunctionScan;
pathnode->parent = rel;
pathnode->pathtarget = &(rel->reltarget);
pathnode->param_info = get_baserel_parampathinfo(root, rel,
required_outer);
pathnode->parallel_aware = false;
@@ -1722,6 +1740,7 @@ create_valuesscan_path(PlannerInfo *root, RelOptInfo *rel,
pathnode->pathtype = T_ValuesScan;
pathnode->parent = rel;
pathnode->pathtarget = &(rel->reltarget);
pathnode->param_info = get_baserel_parampathinfo(root, rel,
required_outer);
pathnode->parallel_aware = false;
@@ -1746,6 +1765,7 @@ create_ctescan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer)
pathnode->pathtype = T_CteScan;
pathnode->parent = rel;
pathnode->pathtarget = &(rel->reltarget);
pathnode->param_info = get_baserel_parampathinfo(root, rel,
required_outer);
pathnode->parallel_aware = false;
@@ -1771,6 +1791,7 @@ create_worktablescan_path(PlannerInfo *root, RelOptInfo *rel,
pathnode->pathtype = T_WorkTableScan;
pathnode->parent = rel;
pathnode->pathtarget = &(rel->reltarget);
pathnode->param_info = get_baserel_parampathinfo(root, rel,
required_outer);
pathnode->parallel_aware = false;
@@ -1806,6 +1827,7 @@ create_foreignscan_path(PlannerInfo *root, RelOptInfo *rel,
pathnode->path.pathtype = T_ForeignScan;
pathnode->path.parent = rel;
pathnode->path.pathtarget = &(rel->reltarget);
pathnode->path.param_info = get_baserel_parampathinfo(root, rel,
required_outer);
pathnode->path.parallel_aware = false;
@@ -1938,6 +1960,7 @@ create_nestloop_path(PlannerInfo *root,
pathnode->path.pathtype = T_NestLoop;
pathnode->path.parent = joinrel;
pathnode->path.pathtarget = &(joinrel->reltarget);
pathnode->path.param_info =
get_joinrel_parampathinfo(root,
joinrel,
@@ -2000,6 +2023,7 @@ create_mergejoin_path(PlannerInfo *root,
pathnode->jpath.path.pathtype = T_MergeJoin;
pathnode->jpath.path.parent = joinrel;
pathnode->jpath.path.pathtarget = &(joinrel->reltarget);
pathnode->jpath.path.param_info =
get_joinrel_parampathinfo(root,
joinrel,
@@ -2060,6 +2084,7 @@ create_hashjoin_path(PlannerInfo *root,
pathnode->jpath.path.pathtype = T_HashJoin;
pathnode->jpath.path.parent = joinrel;
pathnode->jpath.path.pathtarget = &(joinrel->reltarget);
pathnode->jpath.path.param_info =
get_joinrel_parampathinfo(root,
joinrel,

View File

@@ -16,6 +16,7 @@
#include "postgres.h"
#include "nodes/nodeFuncs.h"
#include "optimizer/cost.h"
#include "optimizer/pathnode.h"
#include "optimizer/placeholder.h"
#include "optimizer/planmain.h"
@@ -388,8 +389,9 @@ add_placeholders_to_base_rels(PlannerInfo *root)
{
RelOptInfo *rel = find_base_rel(root, varno);
rel->reltargetlist = lappend(rel->reltargetlist,
copyObject(phinfo->ph_var));
rel->reltarget.exprs = lappend(rel->reltarget.exprs,
copyObject(phinfo->ph_var));
/* reltarget's cost and width fields will be updated later */
}
}
}
@@ -402,11 +404,10 @@ add_placeholders_to_base_rels(PlannerInfo *root)
*
* A join rel should emit a PlaceHolderVar if (a) the PHV is needed above
* this join level and (b) the PHV can be computed at or below this level.
* At this time we do not need to distinguish whether the PHV will be
* computed here or copied up from below.
*/
void
add_placeholders_to_joinrel(PlannerInfo *root, RelOptInfo *joinrel)
add_placeholders_to_joinrel(PlannerInfo *root, RelOptInfo *joinrel,
RelOptInfo *outer_rel, RelOptInfo *inner_rel)
{
Relids relids = joinrel->relids;
ListCell *lc;
@@ -422,9 +423,32 @@ add_placeholders_to_joinrel(PlannerInfo *root, RelOptInfo *joinrel)
if (bms_is_subset(phinfo->ph_eval_at, relids))
{
/* Yup, add it to the output */
joinrel->reltargetlist = lappend(joinrel->reltargetlist,
phinfo->ph_var);
joinrel->width += phinfo->ph_width;
joinrel->reltarget.exprs = lappend(joinrel->reltarget.exprs,
phinfo->ph_var);
joinrel->reltarget.width += phinfo->ph_width;
/*
* Charge the cost of evaluating the contained expression if
* the PHV can be computed here but not in either input. This
* is a bit bogus because we make the decision based on the
* first pair of possible input relations considered for the
* joinrel. With other pairs, it might be possible to compute
* the PHV in one input or the other, and then we'd be double
* charging the PHV's cost for some join paths. For now, live
* with that; but we might want to improve it later by
* refiguring the reltarget costs for each pair of inputs.
*/
if (!bms_is_subset(phinfo->ph_eval_at, outer_rel->relids) &&
!bms_is_subset(phinfo->ph_eval_at, inner_rel->relids))
{
QualCost cost;
cost_qual_eval_node(&cost, (Node *) phinfo->ph_var->phexpr,
root);
joinrel->reltarget.cost.startup += cost.startup;
joinrel->reltarget.cost.per_tuple += cost.per_tuple;
}
/* Adjust joinrel's direct_lateral_relids as needed */
joinrel->direct_lateral_relids =
bms_add_members(joinrel->direct_lateral_relids,

View File

@@ -102,12 +102,14 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptKind reloptkind)
rel->reloptkind = reloptkind;
rel->relids = bms_make_singleton(relid);
rel->rows = 0;
rel->width = 0;
/* cheap startup cost is interesting iff not all tuples to be retrieved */
rel->consider_startup = (root->tuple_fraction > 0);
rel->consider_param_startup = false; /* might get changed later */
rel->consider_parallel = false; /* might get changed later */
rel->reltargetlist = NIL;
rel->reltarget.exprs = NIL;
rel->reltarget.cost.startup = 0;
rel->reltarget.cost.per_tuple = 0;
rel->reltarget.width = 0;
rel->pathlist = NIL;
rel->ppilist = NIL;
rel->partial_pathlist = NIL;
@@ -387,12 +389,14 @@ build_join_rel(PlannerInfo *root,
joinrel->reloptkind = RELOPT_JOINREL;
joinrel->relids = bms_copy(joinrelids);
joinrel->rows = 0;
joinrel->width = 0;
/* cheap startup cost is interesting iff not all tuples to be retrieved */
joinrel->consider_startup = (root->tuple_fraction > 0);
joinrel->consider_param_startup = false;
joinrel->consider_parallel = false;
joinrel->reltargetlist = NIL;
joinrel->reltarget.exprs = NIL;
joinrel->reltarget.cost.startup = 0;
joinrel->reltarget.cost.per_tuple = 0;
joinrel->reltarget.width = 0;
joinrel->pathlist = NIL;
joinrel->ppilist = NIL;
joinrel->partial_pathlist = NIL;
@@ -459,7 +463,7 @@ build_join_rel(PlannerInfo *root,
*/
build_joinrel_tlist(root, joinrel, outer_rel);
build_joinrel_tlist(root, joinrel, inner_rel);
add_placeholders_to_joinrel(root, joinrel);
add_placeholders_to_joinrel(root, joinrel, outer_rel, inner_rel);
/*
* add_placeholders_to_joinrel also took care of adding the ph_lateral
@@ -609,7 +613,7 @@ build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel,
Relids relids = joinrel->relids;
ListCell *vars;
foreach(vars, input_rel->reltargetlist)
foreach(vars, input_rel->reltarget.exprs)
{
Var *var = (Var *) lfirst(vars);
RelOptInfo *baserel;
@@ -628,7 +632,7 @@ build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel,
* rels, which will never be seen here.)
*/
if (!IsA(var, Var))
elog(ERROR, "unexpected node type in reltargetlist: %d",
elog(ERROR, "unexpected node type in rel targetlist: %d",
(int) nodeTag(var));
/* Get the Var's original base rel */
@@ -639,8 +643,9 @@ build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel,
if (bms_nonempty_difference(baserel->attr_needed[ndx], relids))
{
/* Yup, add it to the output */
joinrel->reltargetlist = lappend(joinrel->reltargetlist, var);
joinrel->width += baserel->attr_widths[ndx];
joinrel->reltarget.exprs = lappend(joinrel->reltarget.exprs, var);
/* Vars have cost zero, so no need to adjust reltarget.cost */
joinrel->reltarget.width += baserel->attr_widths[ndx];
}
}
}
@@ -826,7 +831,6 @@ build_empty_join_rel(PlannerInfo *root)
joinrel->reloptkind = RELOPT_JOINREL;
joinrel->relids = NULL; /* empty set */
joinrel->rows = 1; /* we produce one row for such cases */
joinrel->width = 0; /* it contains no Vars */
joinrel->rtekind = RTE_JOIN;
root->join_rel_list = lappend(root->join_rel_list, joinrel);