1
0
mirror of https://github.com/postgres/postgres.git synced 2025-06-17 17:02:08 +03:00

Avoid unnecessary wrapping for Vars and PHVs

When pulling up a lateral subquery that is under an outer join, the
current code always wraps a Var or PHV in the subquery's targetlist
into a new PlaceHolderVar if it is a lateral reference to something
outside the subquery.  This is necessary when the Var/PHV references
the non-nullable side of the outer join from the nullable side: we
need to ensure that it is evaluated at the right place and hence is
forced to null when the outer join should do so.  However, if the
referenced rel is under the same lowest nulling outer join, we can
actually omit the wrapping.  That's safe because if the subquery
variable is forced to NULL by the outer join, the lateral reference
variable will come out as NULL too.  It could be beneficial to get rid
of such PHVs because they imply lateral dependencies, which force us
to resort to nestloop joins.

This patch leverages the newly introduced nullingrel_info to check if
the nullingrels of the subquery RTE are a subset of those of the
laterally referenced rel, in order to determine if the referenced rel
is under the same lowest nulling outer join.

No backpatch as this could result in plan changes.

Author: Richard Guo
Reviewed-by: James Coleman, Dmitry Dolgov, Andrei Lepikhov
Discussion: https://postgr.es/m/CAMbWs48uk6C7Z9m_FNT8_21CMCk68hrgAsz=z6zpP1PNZMkeoQ@mail.gmail.com
This commit is contained in:
Richard Guo
2024-12-09 20:38:22 +09:00
parent 5668a857de
commit f64ec81a81
3 changed files with 375 additions and 9 deletions

View File

@ -2598,26 +2598,47 @@ pullup_replace_vars_callback(Var *var,
/* /*
* Simple Vars always escape being wrapped, unless they are * Simple Vars always escape being wrapped, unless they are
* lateral references to something outside the subquery being * lateral references to something outside the subquery being
* pulled up. (Even then, we could omit the PlaceHolderVar if * pulled up and the referenced rel is not under the same
* the referenced rel is under the same lowest outer join, but * lowest nulling outer join.
* it doesn't seem worth the trouble to check that.)
*/ */
wrap = false;
if (rcon->target_rte->lateral && if (rcon->target_rte->lateral &&
!bms_is_member(((Var *) newnode)->varno, rcon->relids)) !bms_is_member(((Var *) newnode)->varno, rcon->relids))
wrap = true; {
else nullingrel_info *nullinfo = rcon->nullinfo;
wrap = false; int lvarno = ((Var *) newnode)->varno;
Assert(lvarno > 0 && lvarno <= nullinfo->rtlength);
if (!bms_is_subset(nullinfo->nullingrels[rcon->varno],
nullinfo->nullingrels[lvarno]))
wrap = true;
}
} }
else if (newnode && IsA(newnode, PlaceHolderVar) && else if (newnode && IsA(newnode, PlaceHolderVar) &&
((PlaceHolderVar *) newnode)->phlevelsup == 0) ((PlaceHolderVar *) newnode)->phlevelsup == 0)
{ {
/* The same rules apply for a PlaceHolderVar */ /* The same rules apply for a PlaceHolderVar */
wrap = false;
if (rcon->target_rte->lateral && if (rcon->target_rte->lateral &&
!bms_is_subset(((PlaceHolderVar *) newnode)->phrels, !bms_is_subset(((PlaceHolderVar *) newnode)->phrels,
rcon->relids)) rcon->relids))
wrap = true; {
else nullingrel_info *nullinfo = rcon->nullinfo;
wrap = false; Relids lvarnos = ((PlaceHolderVar *) newnode)->phrels;
int lvarno;
lvarno = -1;
while ((lvarno = bms_next_member(lvarnos, lvarno)) >= 0)
{
Assert(lvarno > 0 && lvarno <= nullinfo->rtlength);
if (!bms_is_subset(nullinfo->nullingrels[rcon->varno],
nullinfo->nullingrels[lvarno]))
{
wrap = true;
break;
}
}
}
} }
else if (rcon->wrap_non_vars) else if (rcon->wrap_non_vars)
{ {

View File

@ -1848,6 +1848,281 @@ order by 1, 2;
4567890123456789 | 9135780246913578 4567890123456789 | 9135780246913578
(11 rows) (11 rows)
-- lateral references for simple Vars can escape being wrapped if the
-- referenced rel is under the same lowest nulling outer join
explain (verbose, costs off)
select t1.q1, x from
int8_tbl t1 left join
(int8_tbl t2 inner join
lateral (select t2.q2 as x, * from int8_tbl t3) ss on t2.q2 = ss.q1)
on t1.q1 = t2.q1
order by 1, 2;
QUERY PLAN
--------------------------------------------------------
Sort
Output: t1.q1, t2.q2
Sort Key: t1.q1, t2.q2
-> Hash Right Join
Output: t1.q1, t2.q2
Hash Cond: (t2.q1 = t1.q1)
-> Hash Join
Output: t2.q2, t2.q1
Hash Cond: (t2.q2 = t3.q1)
-> Seq Scan on public.int8_tbl t2
Output: t2.q1, t2.q2
-> Hash
Output: t3.q1
-> Seq Scan on public.int8_tbl t3
Output: t3.q1
-> Hash
Output: t1.q1
-> Seq Scan on public.int8_tbl t1
Output: t1.q1
(19 rows)
select t1.q1, x from
int8_tbl t1 left join
(int8_tbl t2 inner join
lateral (select t2.q2 as x, * from int8_tbl t3) ss on t2.q2 = ss.q1)
on t1.q1 = t2.q1
order by 1, 2;
q1 | x
------------------+------------------
123 | 4567890123456789
123 | 4567890123456789
123 | 4567890123456789
123 | 4567890123456789
123 | 4567890123456789
123 | 4567890123456789
4567890123456789 | 123
4567890123456789 | 123
4567890123456789 | 123
4567890123456789 | 123
4567890123456789 | 123
4567890123456789 | 123
4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789
(21 rows)
-- otherwise we need to wrap the Vars
explain (verbose, costs off)
select t1.q1, x from
int8_tbl t1 left join
(int8_tbl t2 left join
lateral (select t2.q2 as x, * from int8_tbl t3) ss on t2.q2 = ss.q1)
on t1.q1 = t2.q1
order by 1, 2;
QUERY PLAN
--------------------------------------------------
Sort
Output: t1.q1, (t2.q2)
Sort Key: t1.q1, (t2.q2)
-> Hash Right Join
Output: t1.q1, (t2.q2)
Hash Cond: (t2.q1 = t1.q1)
-> Nested Loop Left Join
Output: t2.q1, (t2.q2)
-> Seq Scan on public.int8_tbl t2
Output: t2.q1, t2.q2
-> Seq Scan on public.int8_tbl t3
Output: t3.q1, t2.q2
Filter: (t2.q2 = t3.q1)
-> Hash
Output: t1.q1
-> Seq Scan on public.int8_tbl t1
Output: t1.q1
(17 rows)
select t1.q1, x from
int8_tbl t1 left join
(int8_tbl t2 left join
lateral (select t2.q2 as x, * from int8_tbl t3) ss on t2.q2 = ss.q1)
on t1.q1 = t2.q1
order by 1, 2;
q1 | x
------------------+------------------
123 | 4567890123456789
123 | 4567890123456789
123 | 4567890123456789
123 | 4567890123456789
123 | 4567890123456789
123 | 4567890123456789
123 |
123 |
4567890123456789 | 123
4567890123456789 | 123
4567890123456789 | 123
4567890123456789 | 123
4567890123456789 | 123
4567890123456789 | 123
4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789
4567890123456789 |
4567890123456789 |
4567890123456789 |
(26 rows)
-- lateral references for PHVs can also escape being wrapped if the
-- referenced rel is under the same lowest nulling outer join
explain (verbose, costs off)
select ss2.* from
int8_tbl t1 left join
(int8_tbl t2 left join
(select coalesce(q1) as x, * from int8_tbl t3) ss1 on t2.q1 = ss1.q2 inner join
lateral (select ss1.x as y, * from int8_tbl t4) ss2 on t2.q2 = ss2.q1)
on t1.q2 = ss2.q1
order by 1, 2, 3;
QUERY PLAN
----------------------------------------------------------------
Sort
Output: (COALESCE(t3.q1)), t4.q1, t4.q2
Sort Key: (COALESCE(t3.q1)), t4.q1, t4.q2
-> Hash Right Join
Output: (COALESCE(t3.q1)), t4.q1, t4.q2
Hash Cond: (t4.q1 = t1.q2)
-> Hash Join
Output: (COALESCE(t3.q1)), t4.q1, t4.q2
Hash Cond: (t2.q2 = t4.q1)
-> Hash Left Join
Output: t2.q2, (COALESCE(t3.q1))
Hash Cond: (t2.q1 = t3.q2)
-> Seq Scan on public.int8_tbl t2
Output: t2.q1, t2.q2
-> Hash
Output: t3.q2, (COALESCE(t3.q1))
-> Seq Scan on public.int8_tbl t3
Output: t3.q2, COALESCE(t3.q1)
-> Hash
Output: t4.q1, t4.q2
-> Seq Scan on public.int8_tbl t4
Output: t4.q1, t4.q2
-> Hash
Output: t1.q2
-> Seq Scan on public.int8_tbl t1
Output: t1.q2
(26 rows)
select ss2.* from
int8_tbl t1 left join
(int8_tbl t2 left join
(select coalesce(q1) as x, * from int8_tbl t3) ss1 on t2.q1 = ss1.q2 inner join
lateral (select ss1.x as y, * from int8_tbl t4) ss2 on t2.q2 = ss2.q1)
on t1.q2 = ss2.q1
order by 1, 2, 3;
y | q1 | q2
------------------+------------------+-------------------
123 | 123 | 456
123 | 123 | 4567890123456789
123 | 4567890123456789 | -4567890123456789
123 | 4567890123456789 | -4567890123456789
123 | 4567890123456789 | 123
123 | 4567890123456789 | 123
123 | 4567890123456789 | 4567890123456789
123 | 4567890123456789 | 4567890123456789
4567890123456789 | 123 | 456
4567890123456789 | 123 | 4567890123456789
4567890123456789 | 4567890123456789 | -4567890123456789
4567890123456789 | 4567890123456789 | -4567890123456789
4567890123456789 | 4567890123456789 | -4567890123456789
4567890123456789 | 4567890123456789 | -4567890123456789
4567890123456789 | 4567890123456789 | 123
4567890123456789 | 4567890123456789 | 123
4567890123456789 | 4567890123456789 | 123
4567890123456789 | 4567890123456789 | 123
4567890123456789 | 4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789 | 4567890123456789
| |
| |
(24 rows)
-- otherwise we need to wrap the PHVs
explain (verbose, costs off)
select ss2.* from
int8_tbl t1 left join
(int8_tbl t2 left join
(select coalesce(q1) as x, * from int8_tbl t3) ss1 on t2.q1 = ss1.q2 left join
lateral (select ss1.x as y, * from int8_tbl t4) ss2 on t2.q2 = ss2.q1)
on t1.q2 = ss2.q1
order by 1, 2, 3;
QUERY PLAN
----------------------------------------------------------------
Sort
Output: ((COALESCE(t3.q1))), t4.q1, t4.q2
Sort Key: ((COALESCE(t3.q1))), t4.q1, t4.q2
-> Hash Right Join
Output: ((COALESCE(t3.q1))), t4.q1, t4.q2
Hash Cond: (t4.q1 = t1.q2)
-> Nested Loop
Output: t4.q1, t4.q2, ((COALESCE(t3.q1)))
Join Filter: (t2.q2 = t4.q1)
-> Hash Left Join
Output: t2.q2, (COALESCE(t3.q1))
Hash Cond: (t2.q1 = t3.q2)
-> Seq Scan on public.int8_tbl t2
Output: t2.q1, t2.q2
-> Hash
Output: t3.q2, (COALESCE(t3.q1))
-> Seq Scan on public.int8_tbl t3
Output: t3.q2, COALESCE(t3.q1)
-> Seq Scan on public.int8_tbl t4
Output: t4.q1, t4.q2, (COALESCE(t3.q1))
-> Hash
Output: t1.q2
-> Seq Scan on public.int8_tbl t1
Output: t1.q2
(24 rows)
select ss2.* from
int8_tbl t1 left join
(int8_tbl t2 left join
(select coalesce(q1) as x, * from int8_tbl t3) ss1 on t2.q1 = ss1.q2 left join
lateral (select ss1.x as y, * from int8_tbl t4) ss2 on t2.q2 = ss2.q1)
on t1.q2 = ss2.q1
order by 1, 2, 3;
y | q1 | q2
------------------+------------------+-------------------
123 | 123 | 456
123 | 123 | 4567890123456789
123 | 4567890123456789 | -4567890123456789
123 | 4567890123456789 | -4567890123456789
123 | 4567890123456789 | 123
123 | 4567890123456789 | 123
123 | 4567890123456789 | 4567890123456789
123 | 4567890123456789 | 4567890123456789
4567890123456789 | 123 | 456
4567890123456789 | 123 | 4567890123456789
4567890123456789 | 4567890123456789 | -4567890123456789
4567890123456789 | 4567890123456789 | -4567890123456789
4567890123456789 | 4567890123456789 | -4567890123456789
4567890123456789 | 4567890123456789 | -4567890123456789
4567890123456789 | 4567890123456789 | 123
4567890123456789 | 4567890123456789 | 123
4567890123456789 | 4567890123456789 | 123
4567890123456789 | 4567890123456789 | 123
4567890123456789 | 4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789 | 4567890123456789
| |
| |
(24 rows)
-- --
-- Tests for CTE inlining behavior -- Tests for CTE inlining behavior
-- --

View File

@ -939,6 +939,76 @@ select t1.q1, x from
on t1.q2 = t2.q2 on t1.q2 = t2.q2
order by 1, 2; order by 1, 2;
-- lateral references for simple Vars can escape being wrapped if the
-- referenced rel is under the same lowest nulling outer join
explain (verbose, costs off)
select t1.q1, x from
int8_tbl t1 left join
(int8_tbl t2 inner join
lateral (select t2.q2 as x, * from int8_tbl t3) ss on t2.q2 = ss.q1)
on t1.q1 = t2.q1
order by 1, 2;
select t1.q1, x from
int8_tbl t1 left join
(int8_tbl t2 inner join
lateral (select t2.q2 as x, * from int8_tbl t3) ss on t2.q2 = ss.q1)
on t1.q1 = t2.q1
order by 1, 2;
-- otherwise we need to wrap the Vars
explain (verbose, costs off)
select t1.q1, x from
int8_tbl t1 left join
(int8_tbl t2 left join
lateral (select t2.q2 as x, * from int8_tbl t3) ss on t2.q2 = ss.q1)
on t1.q1 = t2.q1
order by 1, 2;
select t1.q1, x from
int8_tbl t1 left join
(int8_tbl t2 left join
lateral (select t2.q2 as x, * from int8_tbl t3) ss on t2.q2 = ss.q1)
on t1.q1 = t2.q1
order by 1, 2;
-- lateral references for PHVs can also escape being wrapped if the
-- referenced rel is under the same lowest nulling outer join
explain (verbose, costs off)
select ss2.* from
int8_tbl t1 left join
(int8_tbl t2 left join
(select coalesce(q1) as x, * from int8_tbl t3) ss1 on t2.q1 = ss1.q2 inner join
lateral (select ss1.x as y, * from int8_tbl t4) ss2 on t2.q2 = ss2.q1)
on t1.q2 = ss2.q1
order by 1, 2, 3;
select ss2.* from
int8_tbl t1 left join
(int8_tbl t2 left join
(select coalesce(q1) as x, * from int8_tbl t3) ss1 on t2.q1 = ss1.q2 inner join
lateral (select ss1.x as y, * from int8_tbl t4) ss2 on t2.q2 = ss2.q1)
on t1.q2 = ss2.q1
order by 1, 2, 3;
-- otherwise we need to wrap the PHVs
explain (verbose, costs off)
select ss2.* from
int8_tbl t1 left join
(int8_tbl t2 left join
(select coalesce(q1) as x, * from int8_tbl t3) ss1 on t2.q1 = ss1.q2 left join
lateral (select ss1.x as y, * from int8_tbl t4) ss2 on t2.q2 = ss2.q1)
on t1.q2 = ss2.q1
order by 1, 2, 3;
select ss2.* from
int8_tbl t1 left join
(int8_tbl t2 left join
(select coalesce(q1) as x, * from int8_tbl t3) ss1 on t2.q1 = ss1.q2 left join
lateral (select ss1.x as y, * from int8_tbl t4) ss2 on t2.q2 = ss2.q1)
on t1.q2 = ss2.q1
order by 1, 2, 3;
-- --
-- Tests for CTE inlining behavior -- Tests for CTE inlining behavior
-- --