1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-03 20:02:46 +03:00

Fix some new issues with planning of PlaceHolderVars.

In the wake of commit a16ef313f, we need to deal with more cases
involving PlaceHolderVars in NestLoopParams than we did before.

For one thing, a16ef313f was incorrect to suppose that we could
rely on the required-outer relids of the lefthand path to decide
placement of nestloop-parameter PHVs.  As Richard Guo argued at
the time, we must look at the required-outer relids of the join
path itself.

For another, we have to apply replace_nestloop_params() to such
a PHV's expression, in case it contains references to values that
will be supplied from NestLoopParams of higher-level nestloops.

For another, we need to be more careful about the phnullingrels
of the PHV than we were being.  identify_current_nestloop_params
only bothered to ensure that the phnullingrels didn't contain
"too many" relids, but now it has to be exact, because setrefs.c
will apply both NRM_SUBSET and NRM_SUPERSET checks in different
places.  We can compute the correct relids by determining the
set of outer joins that should be able to null the PHV and then
subtracting whatever's been applied at or below this join.
Do the same for plain Vars, too.  (This should make it possible
to use NRM_EQUAL to process nestloop params in setrefs.c, but
I won't risk making such a change in v18 now.)

Lastly, if a nestloop parameter PHV was pulled up out of a subquery
and it contains a subquery that was originally pushed down from this
query level, then that will still be represented as a SubLink, because
SS_process_sublinks won't recurse into outer PHVs, so it didn't get
transformed during expression preprocessing in the subquery.  We can
substitute the version of the PHV's expression appearing in its
PlaceHolderInfo to ensure that that preprocessing has happened.
(Seems like this processing sequence could stand to be redesigned,
but again, late in v18 development is not the time for that.)

It's not very clear to me why the old have_dangerous_phv join-order
restriction prevented us from seeing the last three of these problems.
But given the lack of field complaints, it must have done so.

Reported-by: Alexander Lakhin <exclusion@gmail.com>
Author: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://postgr.es/m/18953-1c9883a9d4afeb30@postgresql.org
This commit is contained in:
Tom Lane
2025-06-29 15:04:32 -04:00
parent 8319e5cb54
commit 66e9df9f6e
7 changed files with 330 additions and 37 deletions

View File

@ -4344,6 +4344,7 @@ create_nestloop_plan(PlannerInfo *root,
NestLoop *join_plan; NestLoop *join_plan;
Plan *outer_plan; Plan *outer_plan;
Plan *inner_plan; Plan *inner_plan;
Relids outerrelids;
List *tlist = build_path_tlist(root, &best_path->jpath.path); List *tlist = build_path_tlist(root, &best_path->jpath.path);
List *joinrestrictclauses = best_path->jpath.joinrestrictinfo; List *joinrestrictclauses = best_path->jpath.joinrestrictinfo;
List *joinclauses; List *joinclauses;
@ -4374,8 +4375,8 @@ create_nestloop_plan(PlannerInfo *root,
outer_plan = create_plan_recurse(root, best_path->jpath.outerjoinpath, 0); outer_plan = create_plan_recurse(root, best_path->jpath.outerjoinpath, 0);
/* For a nestloop, include outer relids in curOuterRels for inner side */ /* For a nestloop, include outer relids in curOuterRels for inner side */
root->curOuterRels = bms_union(root->curOuterRels, outerrelids = best_path->jpath.outerjoinpath->parent->relids;
best_path->jpath.outerjoinpath->parent->relids); root->curOuterRels = bms_union(root->curOuterRels, outerrelids);
inner_plan = create_plan_recurse(root, best_path->jpath.innerjoinpath, 0); inner_plan = create_plan_recurse(root, best_path->jpath.innerjoinpath, 0);
@ -4415,7 +4416,8 @@ create_nestloop_plan(PlannerInfo *root,
* node, and remove them from root->curOuterParams. * node, and remove them from root->curOuterParams.
*/ */
nestParams = identify_current_nestloop_params(root, nestParams = identify_current_nestloop_params(root,
best_path->jpath.outerjoinpath); outerrelids,
PATH_REQ_OUTER((Path *) best_path));
/* /*
* While nestloop parameters that are Vars had better be available from * While nestloop parameters that are Vars had better be available from
@ -4423,32 +4425,50 @@ create_nestloop_plan(PlannerInfo *root,
* that are PHVs won't be. In such cases we must add them to the * that are PHVs won't be. In such cases we must add them to the
* outer_plan's tlist, since the executor's NestLoopParam machinery * outer_plan's tlist, since the executor's NestLoopParam machinery
* requires the params to be simple outer-Var references to that tlist. * requires the params to be simple outer-Var references to that tlist.
* (This is cheating a little bit, because the outer path's required-outer
* relids might not be enough to allow evaluating such a PHV. But in
* practice, if we could have evaluated the PHV at the nestloop node, we
* can do so in the outer plan too.)
*/ */
outer_tlist = outer_plan->targetlist; outer_tlist = outer_plan->targetlist;
outer_parallel_safe = outer_plan->parallel_safe; outer_parallel_safe = outer_plan->parallel_safe;
foreach(lc, nestParams) foreach(lc, nestParams)
{ {
NestLoopParam *nlp = (NestLoopParam *) lfirst(lc); NestLoopParam *nlp = (NestLoopParam *) lfirst(lc);
PlaceHolderVar *phv;
TargetEntry *tle; TargetEntry *tle;
if (IsA(nlp->paramval, Var)) if (IsA(nlp->paramval, Var))
continue; /* nothing to do for simple Vars */ continue; /* nothing to do for simple Vars */
if (tlist_member((Expr *) nlp->paramval, outer_tlist)) /* Otherwise it must be a PHV */
phv = castNode(PlaceHolderVar, nlp->paramval);
if (tlist_member((Expr *) phv, outer_tlist))
continue; /* already available */ continue; /* already available */
/*
* It's possible that nestloop parameter PHVs selected to evaluate
* here contain references to surviving root->curOuterParams items
* (that is, they reference values that will be supplied by some
* higher-level nestloop). Those need to be converted to Params now.
* Note: it's safe to do this after the tlist_member() check, because
* equal() won't pay attention to phv->phexpr.
*/
phv->phexpr = (Expr *) replace_nestloop_params(root,
(Node *) phv->phexpr);
/* Make a shallow copy of outer_tlist, if we didn't already */ /* Make a shallow copy of outer_tlist, if we didn't already */
if (outer_tlist == outer_plan->targetlist) if (outer_tlist == outer_plan->targetlist)
outer_tlist = list_copy(outer_tlist); outer_tlist = list_copy(outer_tlist);
/* ... and add the needed expression */ /* ... and add the needed expression */
tle = makeTargetEntry((Expr *) copyObject(nlp->paramval), tle = makeTargetEntry((Expr *) copyObject(phv),
list_length(outer_tlist) + 1, list_length(outer_tlist) + 1,
NULL, NULL,
true); true);
outer_tlist = lappend(outer_tlist, tle); outer_tlist = lappend(outer_tlist, tle);
/* ... and track whether tlist is (still) parallel-safe */ /* ... and track whether tlist is (still) parallel-safe */
if (outer_parallel_safe) if (outer_parallel_safe)
outer_parallel_safe = is_parallel_safe(root, outer_parallel_safe = is_parallel_safe(root, (Node *) phv);
(Node *) nlp->paramval);
} }
if (outer_tlist != outer_plan->targetlist) if (outer_tlist != outer_plan->targetlist)
outer_plan = change_plan_targetlist(outer_plan, outer_tlist, outer_plan = change_plan_targetlist(outer_plan, outer_tlist,

View File

@ -599,38 +599,31 @@ process_subquery_nestloop_params(PlannerInfo *root, List *subplan_params)
} }
/* /*
* Identify any NestLoopParams that should be supplied by a NestLoop plan * Identify any NestLoopParams that should be supplied by a NestLoop
* node with the specified lefthand input path. Remove them from the active * plan node with the specified lefthand rels and required-outer rels.
* root->curOuterParams list and return them as the result list. * Remove them from the active root->curOuterParams list and return
* them as the result list.
* *
* XXX Here we also hack up the returned Vars and PHVs so that they do not * Vars and PHVs appearing in the result list must have nullingrel sets
* contain nullingrel sets exceeding what is available from the outer side. * that could validly appear in the lefthand rel's output. Ordinarily that
* This is needed if we have applied outer join identity 3, * would be true already, but if we have applied outer join identity 3,
* (A leftjoin B on (Pab)) leftjoin C on (Pb*c) * there could be more or fewer nullingrel bits in the nodes appearing in
* = A leftjoin (B leftjoin C on (Pbc)) on (Pab) * curOuterParams than are in the nominal leftrelids. We deal with that by
* and C contains lateral references to B. It's still safe to apply the * forcing their nullingrel sets to include exactly the outer-join relids
* identity, but the parser will have created those references in the form * that appear in leftrelids and can null the respective Var or PHV.
* "b*" (i.e., with varnullingrels listing the A/B join), while what we will * This fix is a bit ad-hoc and intellectually unsatisfactory, because it's
* have available from the nestloop's outer side is just "b". We deal with * essentially jumping to the conclusion that we've placed evaluation of
* that here by stripping the nullingrels down to what is available from the * the nestloop parameters correctly, and thus it defeats the intent of the
* outer side according to leftrelids. * subsequent nullingrel cross-checks in setrefs.c. But the alternative
*
* That fixes matters for the case of forward application of identity 3.
* If the identity was applied in the reverse direction, we will have
* parameter Vars containing too few nullingrel bits rather than too many.
* Currently, that causes no problems because setrefs.c applies only a
* subset check to nullingrels in NestLoopParams, but we'd have to work
* harder if we ever want to tighten that check. This is all pretty annoying
* because it greatly weakens setrefs.c's cross-check, but the alternative
* seems to be to generate multiple versions of each laterally-parameterized * seems to be to generate multiple versions of each laterally-parameterized
* subquery, which'd be unduly expensive. * subquery, which'd be unduly expensive.
*/ */
List * List *
identify_current_nestloop_params(PlannerInfo *root, Path *leftpath) identify_current_nestloop_params(PlannerInfo *root,
Relids leftrelids,
Relids outerrelids)
{ {
List *result; List *result;
Relids leftrelids = leftpath->parent->relids;
Relids outerrelids = PATH_REQ_OUTER(leftpath);
Relids allleftrelids; Relids allleftrelids;
ListCell *cell; ListCell *cell;
@ -661,25 +654,58 @@ identify_current_nestloop_params(PlannerInfo *root, Path *leftpath)
bms_is_member(nlp->paramval->varno, leftrelids)) bms_is_member(nlp->paramval->varno, leftrelids))
{ {
Var *var = (Var *) nlp->paramval; Var *var = (Var *) nlp->paramval;
RelOptInfo *rel = root->simple_rel_array[var->varno];
root->curOuterParams = foreach_delete_current(root->curOuterParams, root->curOuterParams = foreach_delete_current(root->curOuterParams,
cell); cell);
var->varnullingrels = bms_intersect(var->varnullingrels, var->varnullingrels = bms_intersect(rel->nulling_relids,
leftrelids); leftrelids);
result = lappend(result, nlp); result = lappend(result, nlp);
} }
else if (IsA(nlp->paramval, PlaceHolderVar)) else if (IsA(nlp->paramval, PlaceHolderVar))
{ {
PlaceHolderVar *phv = (PlaceHolderVar *) nlp->paramval; PlaceHolderVar *phv = (PlaceHolderVar *) nlp->paramval;
Relids eval_at = find_placeholder_info(root, phv)->ph_eval_at; PlaceHolderInfo *phinfo = find_placeholder_info(root, phv);
Relids eval_at = phinfo->ph_eval_at;
if (bms_is_subset(eval_at, allleftrelids) && if (bms_is_subset(eval_at, allleftrelids) &&
bms_overlap(eval_at, leftrelids)) bms_overlap(eval_at, leftrelids))
{ {
root->curOuterParams = foreach_delete_current(root->curOuterParams, root->curOuterParams = foreach_delete_current(root->curOuterParams,
cell); cell);
phv->phnullingrels = bms_intersect(phv->phnullingrels,
/*
* Deal with an edge case: if the PHV was pulled up out of a
* subquery and it contains a subquery that was originally
* pushed down from this query level, then that will still be
* represented as a SubLink, because SS_process_sublinks won't
* recurse into outer PHVs, so it didn't get transformed
* during expression preprocessing in the subquery. We need a
* version of the PHV that has a SubPlan, which we can get
* from the current query level's placeholder_list. This is
* quite grotty of course, but dealing with it earlier in the
* handling of subplan params would be just as grotty, and it
* might end up being a waste of cycles if we don't decide to
* treat the PHV as a NestLoopParam. (Perhaps that whole
* mechanism should be redesigned someday, but today is not
* that day.)
*/
if (root->parse->hasSubLinks)
{
phv = copyObject(phinfo->ph_var);
/*
* The ph_var will have empty nullingrels, but that
* doesn't matter since we're about to overwrite
* phv->phnullingrels. Other fields should be OK already.
*/
nlp->paramval = (Var *) phv;
}
phv->phnullingrels =
bms_intersect(get_placeholder_nulling_relids(root, phinfo),
leftrelids); leftrelids);
result = lappend(result, nlp); result = lappend(result, nlp);
} }
} }

View File

@ -545,3 +545,43 @@ contain_placeholder_references_walker(Node *node,
return expression_tree_walker(node, contain_placeholder_references_walker, return expression_tree_walker(node, contain_placeholder_references_walker,
context); context);
} }
/*
* Compute the set of outer-join relids that can null a placeholder.
*
* This is analogous to RelOptInfo.nulling_relids for Vars, but we compute it
* on-the-fly rather than saving it somewhere. Currently the value is needed
* at most once per query, so there's little value in doing otherwise. If it
* ever gains more widespread use, perhaps we should cache the result in
* PlaceHolderInfo.
*/
Relids
get_placeholder_nulling_relids(PlannerInfo *root, PlaceHolderInfo *phinfo)
{
Relids result = NULL;
int relid = -1;
/*
* Form the union of all potential nulling OJs for each baserel included
* in ph_eval_at.
*/
while ((relid = bms_next_member(phinfo->ph_eval_at, relid)) > 0)
{
RelOptInfo *rel = root->simple_rel_array[relid];
/* ignore the RTE_GROUP RTE */
if (relid == root->group_rtindex)
continue;
if (rel == NULL) /* must be an outer join */
{
Assert(bms_is_member(relid, root->outer_join_rels));
continue;
}
result = bms_add_members(result, rel->nulling_relids);
}
/* Now remove any OJs already included in ph_eval_at, and we're done. */
result = bms_del_members(result, phinfo->ph_eval_at);
return result;
}

View File

@ -30,7 +30,8 @@ extern Param *replace_nestloop_param_placeholdervar(PlannerInfo *root,
extern void process_subquery_nestloop_params(PlannerInfo *root, extern void process_subquery_nestloop_params(PlannerInfo *root,
List *subplan_params); List *subplan_params);
extern List *identify_current_nestloop_params(PlannerInfo *root, extern List *identify_current_nestloop_params(PlannerInfo *root,
Path *leftpath); Relids leftrelids,
Relids outerrelids);
extern Param *generate_new_exec_param(PlannerInfo *root, Oid paramtype, extern Param *generate_new_exec_param(PlannerInfo *root, Oid paramtype,
int32 paramtypmod, Oid paramcollation); int32 paramtypmod, Oid paramcollation);
extern int assign_special_exec_param(PlannerInfo *root); extern int assign_special_exec_param(PlannerInfo *root);

View File

@ -30,5 +30,7 @@ extern void add_placeholders_to_joinrel(PlannerInfo *root, RelOptInfo *joinrel,
SpecialJoinInfo *sjinfo); SpecialJoinInfo *sjinfo);
extern bool contain_placeholder_references_to(PlannerInfo *root, Node *clause, extern bool contain_placeholder_references_to(PlannerInfo *root, Node *clause,
int relid); int relid);
extern Relids get_placeholder_nulling_relids(PlannerInfo *root,
PlaceHolderInfo *phinfo);
#endif /* PLACEHOLDER_H */ #endif /* PLACEHOLDER_H */

View File

@ -4119,6 +4119,164 @@ select * from int8_tbl t1
(16 rows) (16 rows)
rollback; rollback;
-- ... not that the initial replacement didn't have some bugs too
begin;
create temp table t(i int primary key);
explain (verbose, costs off)
select * from t t1
left join (select 1 as x, * from t t2(i2)) t2ss on t1.i = t2ss.i2
left join t t3(i3) on false
left join t t4(i4) on t4.i4 > t2ss.x;
QUERY PLAN
----------------------------------------------------------
Nested Loop Left Join
Output: t1.i, (1), t2.i2, i3, t4.i4
-> Nested Loop Left Join
Output: t1.i, t2.i2, (1), i3
Join Filter: false
-> Hash Left Join
Output: t1.i, t2.i2, (1)
Inner Unique: true
Hash Cond: (t1.i = t2.i2)
-> Seq Scan on pg_temp.t t1
Output: t1.i
-> Hash
Output: t2.i2, (1)
-> Seq Scan on pg_temp.t t2
Output: t2.i2, 1
-> Result
Output: i3
One-Time Filter: false
-> Memoize
Output: t4.i4
Cache Key: (1)
Cache Mode: binary
-> Index Only Scan using t_pkey on pg_temp.t t4
Output: t4.i4
Index Cond: (t4.i4 > (1))
(25 rows)
explain (verbose, costs off)
select * from
(select k from
(select i, coalesce(i, j) as k from
(select i from t union all select 0)
join (select 1 as j limit 1) on i = j)
right join (select 2 as x) on true
join (select 3 as y) on i is not null
),
lateral (select k as kl limit 1);
QUERY PLAN
-------------------------------------------------------------------
Nested Loop
Output: COALESCE(t.i, (1)), ((COALESCE(t.i, (1))))
-> Limit
Output: 1
-> Result
Output: 1
-> Nested Loop
Output: t.i, ((COALESCE(t.i, (1))))
-> Result
Output: t.i, COALESCE(t.i, (1))
-> Append
-> Index Only Scan using t_pkey on pg_temp.t
Output: t.i
Index Cond: (t.i = (1))
-> Result
Output: 0
One-Time Filter: ((1) = 0)
-> Limit
Output: ((COALESCE(t.i, (1))))
-> Result
Output: (COALESCE(t.i, (1)))
(21 rows)
rollback;
-- PHVs containing SubLinks are quite tricky to get right
explain (verbose, costs off)
select *
from int8_tbl i8
inner join
(select (select true) as x
from int4_tbl i4, lateral (select i4.f1 as y limit 1) ss1
where i4.f1 = 0) ss2 on true
right join (select false as z) ss3 on true,
lateral (select i8.q2 as q2l where x limit 1) ss4
where i8.q2 = 123;
QUERY PLAN
----------------------------------------------------------------
Nested Loop
Output: i8.q1, i8.q2, (InitPlan 1).col1, false, (i8.q2)
InitPlan 1
-> Result
Output: true
InitPlan 2
-> Result
Output: true
-> Seq Scan on public.int4_tbl i4
Output: i4.f1
Filter: (i4.f1 = 0)
-> Nested Loop
Output: i8.q1, i8.q2, (i8.q2)
-> Subquery Scan on ss1
Output: ss1.y, (InitPlan 1).col1
-> Limit
Output: NULL::integer
-> Result
Output: NULL::integer
-> Nested Loop
Output: i8.q1, i8.q2, (i8.q2)
-> Seq Scan on public.int8_tbl i8
Output: i8.q1, i8.q2
Filter: (i8.q2 = 123)
-> Limit
Output: (i8.q2)
-> Result
Output: i8.q2
One-Time Filter: ((InitPlan 1).col1)
(29 rows)
explain (verbose, costs off)
select *
from int8_tbl i8
inner join
(select (select true) as x
from int4_tbl i4, lateral (select 1 as y limit 1) ss1
where i4.f1 = 0) ss2 on true
right join (select false as z) ss3 on true,
lateral (select i8.q2 as q2l where x limit 1) ss4
where i8.q2 = 123;
QUERY PLAN
----------------------------------------------------------------
Nested Loop
Output: i8.q1, i8.q2, (InitPlan 1).col1, false, (i8.q2)
InitPlan 1
-> Result
Output: true
InitPlan 2
-> Result
Output: true
-> Limit
Output: NULL::integer
-> Result
Output: NULL::integer
-> Nested Loop
Output: i8.q1, i8.q2, (i8.q2)
-> Seq Scan on public.int4_tbl i4
Output: i4.f1, (InitPlan 1).col1
Filter: (i4.f1 = 0)
-> Nested Loop
Output: i8.q1, i8.q2, (i8.q2)
-> Seq Scan on public.int8_tbl i8
Output: i8.q1, i8.q2
Filter: (i8.q2 = 123)
-> Limit
Output: (i8.q2)
-> Result
Output: i8.q2
One-Time Filter: ((InitPlan 1).col1)
(27 rows)
-- Test proper handling of appendrel PHVs during useless-RTE removal -- Test proper handling of appendrel PHVs during useless-RTE removal
explain (costs off) explain (costs off)
select * from select * from

View File

@ -1361,6 +1361,52 @@ select * from int8_tbl t1
on true; on true;
rollback; rollback;
-- ... not that the initial replacement didn't have some bugs too
begin;
create temp table t(i int primary key);
explain (verbose, costs off)
select * from t t1
left join (select 1 as x, * from t t2(i2)) t2ss on t1.i = t2ss.i2
left join t t3(i3) on false
left join t t4(i4) on t4.i4 > t2ss.x;
explain (verbose, costs off)
select * from
(select k from
(select i, coalesce(i, j) as k from
(select i from t union all select 0)
join (select 1 as j limit 1) on i = j)
right join (select 2 as x) on true
join (select 3 as y) on i is not null
),
lateral (select k as kl limit 1);
rollback;
-- PHVs containing SubLinks are quite tricky to get right
explain (verbose, costs off)
select *
from int8_tbl i8
inner join
(select (select true) as x
from int4_tbl i4, lateral (select i4.f1 as y limit 1) ss1
where i4.f1 = 0) ss2 on true
right join (select false as z) ss3 on true,
lateral (select i8.q2 as q2l where x limit 1) ss4
where i8.q2 = 123;
explain (verbose, costs off)
select *
from int8_tbl i8
inner join
(select (select true) as x
from int4_tbl i4, lateral (select 1 as y limit 1) ss1
where i4.f1 = 0) ss2 on true
right join (select false as z) ss3 on true,
lateral (select i8.q2 as q2l where x limit 1) ss4
where i8.q2 = 123;
-- Test proper handling of appendrel PHVs during useless-RTE removal -- Test proper handling of appendrel PHVs during useless-RTE removal
explain (costs off) explain (costs off)
select * from select * from