1
0
mirror of https://github.com/postgres/postgres.git synced 2025-05-31 03:21:24 +03:00

Avoid unnecessary wrapping for more complex expressions

When pulling up a subquery that is under an outer join, if the
subquery's target list contains a strict expression that uses a
subquery variable, it's okay to pull up the expression without
wrapping it in a PlaceHolderVar: if the subquery variable is forced to
NULL by the outer join, the expression result will come out as NULL
too.

If the strict expression does not contain any subquery variables, the
current code always wraps it in a PlaceHolderVar.  While this is not
incorrect, the analysis could be tighter: if the strict expression
contains any variables of rels that are under the same lowest nulling
outer join as the subquery, we can also avoid wrapping it.  This is
safe because if the subquery variable is forced to NULL by the outer
join, the variables of rels that are under the same lowest nulling
outer join will also be forced to NULL, resulting in the expression
evaluating to NULL as well.  Therefore, it's not necessary to force
the expression to be evaluated below the outer join.  It could be
beneficial to get rid of such PHVs because they could imply lateral
dependencies, which force us to resort to nestloop joins.

This patch checks if the lateral references in the strict expression
contain any variables of rels under the same lowest nulling outer join
as the subquery, and avoids wrapping the expression in that case.

This is fundamentally a generalization of the optimizations for bare
Vars and PHVs introduced in commit f64ec81a8.

No backpatch as this could result in plan changes.

Author: Richard Guo
Discussion: https://postgr.es/m/CAMbWs4_ENtfRdLaM_bXAxiKRYO7DmwDBDG4_2=VTDi0mJP-jAw@mail.gmail.com
This commit is contained in:
Richard Guo 2024-12-17 19:53:01 +09:00
parent 2364f61488
commit 60be3f9f0a
3 changed files with 186 additions and 17 deletions

View File

@ -2649,11 +2649,12 @@ pullup_replace_vars_callback(Var *var,
{
/*
* If the node contains Var(s) or PlaceHolderVar(s) of the
* subquery being pulled up, and does not contain any
* non-strict constructs, then instead of adding a PHV on top
* we can add the required nullingrels to those Vars/PHVs.
* (This is fundamentally a generalization of the above cases
* for bare Vars and PHVs.)
* subquery being pulled up, or of rels that are under the
* same lowest nulling outer join as the subquery, and does
* not contain any non-strict constructs, then instead of
* adding a PHV on top we can add the required nullingrels to
* those Vars/PHVs. (This is fundamentally a generalization
* of the above cases for bare Vars and PHVs.)
*
* This test is somewhat expensive, but it avoids pessimizing
* the plan in cases where the nullingrels get removed again
@ -2661,14 +2662,16 @@ pullup_replace_vars_callback(Var *var,
*
* Note that we don't force wrapping of expressions containing
* lateral references, so long as they also contain Vars/PHVs
* of the subquery. This is okay because of the restriction
* to strict constructs: if the subquery's Vars/PHVs have been
* forced to NULL by an outer join then the end result of the
* expression will be NULL too, regardless of the lateral
* references. So it's not necessary to force the expression
* to be evaluated below the outer join. This can be a very
* valuable optimization, because it may allow us to avoid
* using a nested loop to pass the lateral reference down.
* of the subquery, or of rels that are under the same lowest
* nulling outer join as the subquery. This is okay because
* of the restriction to strict constructs: if those Vars/PHVs
* have been forced to NULL by an outer join then the end
* result of the expression will be NULL too, regardless of
* the lateral references. So it's not necessary to force the
* expression to be evaluated below the outer join. This can
* be a very valuable optimization, because it may allow us to
* avoid using a nested loop to pass the lateral reference
* down.
*
* This analysis could be tighter: in particular, a non-strict
* construct hidden within a lower-level PlaceHolderVar is not
@ -2679,10 +2682,40 @@ pullup_replace_vars_callback(Var *var,
* membership of the node, but if it's non-lateral then any
* level-zero var must belong to the subquery.
*/
if ((rcon->target_rte->lateral ?
bms_overlap(pull_varnos(rcon->root, newnode),
rcon->relids) :
contain_vars_of_level(newnode, 0)) &&
bool contain_nullable_vars = false;
if (!rcon->target_rte->lateral)
{
if (contain_vars_of_level(newnode, 0))
contain_nullable_vars = true;
}
else
{
Relids all_varnos;
all_varnos = pull_varnos(rcon->root, newnode);
if (bms_overlap(all_varnos, rcon->relids))
contain_nullable_vars = true;
else
{
nullingrel_info *nullinfo = rcon->nullinfo;
int varno;
varno = -1;
while ((varno = bms_next_member(all_varnos, varno)) >= 0)
{
Assert(varno > 0 && varno <= nullinfo->rtlength);
if (bms_is_subset(nullinfo->nullingrels[rcon->varno],
nullinfo->nullingrels[varno]))
{
contain_nullable_vars = true;
break;
}
}
}
}
if (contain_nullable_vars &&
!contain_nonstrict_functions(newnode))
{
/* No wrap needed */

View File

@ -1848,6 +1848,109 @@ order by 1, 2;
4567890123456789 | 9135780246913578
(11 rows)
-- strict expressions containing variables of rels under the same lowest
-- nulling outer join can escape being wrapped
explain (verbose, costs off)
select t1.q1, x from
int8_tbl t1 left join
(int8_tbl t2 inner join
lateral (select t2.q1+1 as x, * from int8_tbl t3) t3 on t2.q2 = t3.q2)
on t1.q2 = t2.q2
order by 1, 2;
QUERY PLAN
--------------------------------------------------------
Sort
Output: t1.q1, ((t2.q1 + 1))
Sort Key: t1.q1, ((t2.q1 + 1))
-> Hash Right Join
Output: t1.q1, (t2.q1 + 1)
Hash Cond: (t2.q2 = t1.q2)
-> Hash Join
Output: t2.q1, t2.q2
Hash Cond: (t2.q2 = t3.q2)
-> Seq Scan on public.int8_tbl t2
Output: t2.q1, t2.q2
-> Hash
Output: t3.q2
-> Seq Scan on public.int8_tbl t3
Output: t3.q2
-> Hash
Output: t1.q1, t1.q2
-> Seq Scan on public.int8_tbl t1
Output: t1.q1, t1.q2
(19 rows)
select t1.q1, x from
int8_tbl t1 left join
(int8_tbl t2 inner join
lateral (select t2.q1+1 as x, * from int8_tbl t3) t3 on t2.q2 = t3.q2)
on t1.q2 = t2.q2
order by 1, 2;
q1 | x
------------------+------------------
123 | 124
123 | 124
123 | 124
123 | 4567890123456790
123 | 4567890123456790
4567890123456789 | 124
4567890123456789 | 124
4567890123456789 | 4567890123456790
4567890123456789 | 4567890123456790
4567890123456789 | 4567890123456790
4567890123456789 | 4567890123456790
(11 rows)
-- otherwise we need to wrap the strict expressions
explain (verbose, costs off)
select t1.q1, x from
int8_tbl t1 left join
(int8_tbl t2 left join
lateral (select t2.q1+1 as x, * from int8_tbl t3) t3 on t2.q2 = t3.q2)
on t1.q2 = t2.q2
order by 1, 2;
QUERY PLAN
--------------------------------------------------
Sort
Output: t1.q1, ((t2.q1 + 1))
Sort Key: t1.q1, ((t2.q1 + 1))
-> Hash Right Join
Output: t1.q1, ((t2.q1 + 1))
Hash Cond: (t2.q2 = t1.q2)
-> Nested Loop Left Join
Output: t2.q2, ((t2.q1 + 1))
-> Seq Scan on public.int8_tbl t2
Output: t2.q1, t2.q2
-> Seq Scan on public.int8_tbl t3
Output: t3.q2, (t2.q1 + 1)
Filter: (t2.q2 = t3.q2)
-> Hash
Output: t1.q1, t1.q2
-> Seq Scan on public.int8_tbl t1
Output: t1.q1, t1.q2
(17 rows)
select t1.q1, x from
int8_tbl t1 left join
(int8_tbl t2 left join
lateral (select t2.q1+1 as x, * from int8_tbl t3) t3 on t2.q2 = t3.q2)
on t1.q2 = t2.q2
order by 1, 2;
q1 | x
------------------+------------------
123 | 124
123 | 124
123 | 124
123 | 4567890123456790
123 | 4567890123456790
4567890123456789 | 124
4567890123456789 | 124
4567890123456789 | 4567890123456790
4567890123456789 | 4567890123456790
4567890123456789 | 4567890123456790
4567890123456789 | 4567890123456790
(11 rows)
-- lateral references for simple Vars can escape being wrapped if the
-- referenced rel is under the same lowest nulling outer join
explain (verbose, costs off)

View File

@ -939,6 +939,39 @@ select t1.q1, x from
on t1.q2 = t2.q2
order by 1, 2;
-- strict expressions containing variables of rels under the same lowest
-- nulling outer join can escape being wrapped
explain (verbose, costs off)
select t1.q1, x from
int8_tbl t1 left join
(int8_tbl t2 inner join
lateral (select t2.q1+1 as x, * from int8_tbl t3) t3 on t2.q2 = t3.q2)
on t1.q2 = t2.q2
order by 1, 2;
select t1.q1, x from
int8_tbl t1 left join
(int8_tbl t2 inner join
lateral (select t2.q1+1 as x, * from int8_tbl t3) t3 on t2.q2 = t3.q2)
on t1.q2 = t2.q2
order by 1, 2;
-- otherwise we need to wrap the strict expressions
explain (verbose, costs off)
select t1.q1, x from
int8_tbl t1 left join
(int8_tbl t2 left join
lateral (select t2.q1+1 as x, * from int8_tbl t3) t3 on t2.q2 = t3.q2)
on t1.q2 = t2.q2
order by 1, 2;
select t1.q1, x from
int8_tbl t1 left join
(int8_tbl t2 left join
lateral (select t2.q1+1 as x, * from int8_tbl t3) t3 on t2.q2 = t3.q2)
on t1.q2 = t2.q2
order by 1, 2;
-- lateral references for simple Vars can escape being wrapped if the
-- referenced rel is under the same lowest nulling outer join
explain (verbose, costs off)