mirror of
https://github.com/postgres/postgres.git
synced 2025-09-03 15:22:11 +03:00
Estimate cost of elided SubqueryScan, Append, MergeAppend nodes better.
setrefs.c contains logic to discard no-op SubqueryScan nodes, that is, ones that have no qual to check and copy the input targetlist unchanged. (Formally it's not very nice to be applying such optimizations so late in the planner, but there are practical reasons for it; mostly that we can't unify relids between the subquery and the parent query until we flatten the rangetable during setrefs.c.) This behavior falsifies our previous cost estimates, since we would've charged cpu_tuple_cost per row just to pass data through the node. Most of the time that's little enough to not matter, but there are cases where this effect visibly changes the plan compared to what you would've gotten with no sub-select. To improve the situation, make the callers of cost_subqueryscan tell it whether they think the targetlist is trivial. cost_subqueryscan already has the qual list, so it can check the other half of the condition easily. It could make its own determination of tlist triviality too, but doing so would be repetitive (for callers that may call it several times) or unnecessarily expensive (for callers that can determine this more cheaply than a general test would do). This isn't a 100% solution, because createplan.c also does things that can falsify any earlier estimate of whether the tlist is trivial. However, it fixes nearly all cases in practice, if results for the regression tests are anything to go by. setrefs.c also contains logic to discard no-op Append and MergeAppend nodes. We did have knowledge of that behavior at costing time, but somebody failed to update it when a check on parallel-awareness was added to the setrefs.c logic. Fix that while we're here. These changes result in two minor changes in query plans shown in our regression tests. Neither is relevant to the purposes of its test case AFAICT. Patch by me; thanks to Richard Guo for review. Discussion: https://postgr.es/m/2581077.1651703520@sss.pgh.pa.us
This commit is contained in:
@@ -2451,6 +2451,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
|
||||
{
|
||||
Query *parse = root->parse;
|
||||
Query *subquery = rte->subquery;
|
||||
bool trivial_pathtarget;
|
||||
Relids required_outer;
|
||||
pushdown_safety_info safetyInfo;
|
||||
double tuple_fraction;
|
||||
@@ -2613,6 +2614,36 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
|
||||
*/
|
||||
set_subquery_size_estimates(root, rel);
|
||||
|
||||
/*
|
||||
* Also detect whether the reltarget is trivial, so that we can pass that
|
||||
* info to cost_subqueryscan (rather than re-deriving it multiple times).
|
||||
* It's trivial if it fetches all the subplan output columns in order.
|
||||
*/
|
||||
if (list_length(rel->reltarget->exprs) != list_length(subquery->targetList))
|
||||
trivial_pathtarget = false;
|
||||
else
|
||||
{
|
||||
trivial_pathtarget = true;
|
||||
foreach(lc, rel->reltarget->exprs)
|
||||
{
|
||||
Node *node = (Node *) lfirst(lc);
|
||||
Var *var;
|
||||
|
||||
if (!IsA(node, Var))
|
||||
{
|
||||
trivial_pathtarget = false;
|
||||
break;
|
||||
}
|
||||
var = (Var *) node;
|
||||
if (var->varno != rti ||
|
||||
var->varattno != foreach_current_index(lc) + 1)
|
||||
{
|
||||
trivial_pathtarget = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* For each Path that subquery_planner produced, make a SubqueryScanPath
|
||||
* in the outer query.
|
||||
@@ -2631,6 +2662,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
|
||||
/* Generate outer path using this subpath */
|
||||
add_path(rel, (Path *)
|
||||
create_subqueryscan_path(root, rel, subpath,
|
||||
trivial_pathtarget,
|
||||
pathkeys, required_outer));
|
||||
}
|
||||
|
||||
@@ -2656,6 +2688,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
|
||||
/* Generate outer path using this subpath */
|
||||
add_partial_path(rel, (Path *)
|
||||
create_subqueryscan_path(root, rel, subpath,
|
||||
trivial_pathtarget,
|
||||
pathkeys,
|
||||
required_outer));
|
||||
}
|
||||
|
@@ -1415,10 +1415,12 @@ cost_tidrangescan(Path *path, PlannerInfo *root,
|
||||
*
|
||||
* 'baserel' is the relation to be scanned
|
||||
* 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
|
||||
* 'trivial_pathtarget' is true if the pathtarget is believed to be trivial.
|
||||
*/
|
||||
void
|
||||
cost_subqueryscan(SubqueryScanPath *path, PlannerInfo *root,
|
||||
RelOptInfo *baserel, ParamPathInfo *param_info)
|
||||
RelOptInfo *baserel, ParamPathInfo *param_info,
|
||||
bool trivial_pathtarget)
|
||||
{
|
||||
Cost startup_cost;
|
||||
Cost run_cost;
|
||||
@@ -1458,6 +1460,22 @@ cost_subqueryscan(SubqueryScanPath *path, PlannerInfo *root,
|
||||
path->path.startup_cost = path->subpath->startup_cost;
|
||||
path->path.total_cost = path->subpath->total_cost;
|
||||
|
||||
/*
|
||||
* However, if there are no relevant restriction clauses and the
|
||||
* pathtarget is trivial, then we expect that setrefs.c will optimize away
|
||||
* the SubqueryScan plan node altogether, so we should just make its cost
|
||||
* and rowcount equal to the input path's.
|
||||
*
|
||||
* Note: there are some edge cases where createplan.c will apply a
|
||||
* different targetlist to the SubqueryScan node, thus falsifying our
|
||||
* current estimate of whether the target is trivial, and making the cost
|
||||
* estimate (though not the rowcount) wrong. It does not seem worth the
|
||||
* extra complication to try to account for that exactly, especially since
|
||||
* that behavior falsifies other cost estimates as well.
|
||||
*/
|
||||
if (qpquals == NIL && trivial_pathtarget)
|
||||
return;
|
||||
|
||||
get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
|
||||
|
||||
startup_cost = qpqual_cost.startup;
|
||||
|
Reference in New Issue
Block a user