From 8538519db107777a6b06b7277185e6605caf8d4c Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sat, 4 Feb 2023 15:19:54 -0500 Subject: [PATCH] Fix thinko in outer-join removal. If we have a RestrictInfo that mentions both the removal-candidate relation and the outer join's relid, then that is a pushed-down condition not a join condition, so it should be grounds for deciding that we can't remove the outer join. In commit 2489d76c4, I'd blindly included the OJ's relid into "joinrelids" as per the new standard convention, but the checks of attr_needed and ph_needed should only allow the join's input rels to be mentioned. Having done that, the check for references in pushed-down quals a few lines further down should be redundant. I left it in place as an Assert, though. While researching this I happened across a couple of comments that worried about the effects of update_placeholder_eval_levels. That's gone as of b448f1c8d, so we can remove some worry. Per bug #17769 from Robins Tharakan. The submitted test case triggers this more or less accidentally because we flatten out a LATERAL sub-select after we've done join strength reduction; if we did that in the other order, this problem would be masked because the outer join would get simplified to an inner join. To ensure that the committed test case will continue to test what it means to even if we make that happen someday, use a test clause involving COALESCE(), which will prevent us from using it to do join strength reduction. Patch by me, but thanks to Richard Guo for initial investigation. Discussion: https://postgr.es/m/17769-e4f7a5c9d84a80a7@postgresql.org --- src/backend/optimizer/plan/analyzejoins.c | 30 +++++++++++------------ src/backend/optimizer/plan/initsplan.c | 6 ++--- src/backend/optimizer/util/var.c | 10 -------- src/test/regress/expected/join.out | 15 ++++++++++++ src/test/regress/sql/join.sql | 7 ++++++ 5 files changed, 39 insertions(+), 29 deletions(-) diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c index 42da62c1c6b..1ca554a6430 100644 --- a/src/backend/optimizer/plan/analyzejoins.c +++ b/src/backend/optimizer/plan/analyzejoins.c @@ -164,6 +164,7 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) { int innerrelid; RelOptInfo *innerrel; + Relids inputrelids; Relids joinrelids; List *clause_list = NIL; ListCell *l; @@ -190,17 +191,16 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) return false; /* Compute the relid set for the join we are considering */ - joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand); - if (sjinfo->ojrelid != 0) - joinrelids = bms_add_member(joinrelids, sjinfo->ojrelid); + inputrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand); + Assert(sjinfo->ojrelid != 0); + joinrelids = bms_copy(inputrelids); + joinrelids = bms_add_member(joinrelids, sjinfo->ojrelid); /* * We can't remove the join if any inner-rel attributes are used above the - * join. - * - * Note that this test only detects use of inner-rel attributes in higher - * join conditions and the target list. There might be such attributes in - * pushed-down conditions at this join, too. We check that case below. + * join. Here, "above" the join includes pushed-down conditions, so we + * should reject if attr_needed includes the OJ's own relid; therefore, + * compare to inputrelids not joinrelids. * * As a micro-optimization, it seems better to start with max_attr and * count down rather than starting with min_attr and counting up, on the @@ -211,7 +211,7 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) attroff >= 0; attroff--) { - if (!bms_is_subset(innerrel->attr_needed[attroff], joinrelids)) + if (!bms_is_subset(innerrel->attr_needed[attroff], inputrelids)) return false; } @@ -230,7 +230,7 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) if (bms_overlap(phinfo->ph_lateral, innerrel->relids)) return false; /* it references innerrel laterally */ - if (bms_is_subset(phinfo->ph_needed, joinrelids)) + if (bms_is_subset(phinfo->ph_needed, inputrelids)) continue; /* PHV is not used above the join */ if (!bms_overlap(phinfo->ph_eval_at, innerrel->relids)) continue; /* it definitely doesn't reference innerrel */ @@ -273,13 +273,11 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) { /* * If such a clause actually references the inner rel then join - * removal has to be disallowed. We have to check this despite - * the previous attr_needed checks because of the possibility of - * pushed-down clauses referencing the rel. + * removal has to be disallowed. The previous attr_needed checks + * should have caught this, though. */ - if (bms_is_member(innerrelid, restrictinfo->clause_relids)) - return false; - continue; /* else, ignore; not useful here */ + Assert(!bms_is_member(innerrelid, restrictinfo->clause_relids)); + continue; /* ignore; not useful here */ } /* Ignore if it's not a mergejoinable clause */ diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c index 7fa6f17382e..a19260f9bc4 100644 --- a/src/backend/optimizer/plan/initsplan.c +++ b/src/backend/optimizer/plan/initsplan.c @@ -494,9 +494,6 @@ extract_lateral_references(PlannerInfo *root, RelOptInfo *brel, Index rtindex) * create_lateral_join_info * Fill in the per-base-relation direct_lateral_relids, lateral_relids * and lateral_referencers sets. - * - * This has to run after deconstruct_jointree, because we need to know the - * final ph_eval_at values for PlaceHolderVars. */ void create_lateral_join_info(PlannerInfo *root) @@ -509,6 +506,9 @@ create_lateral_join_info(PlannerInfo *root) if (!root->hasLateralRTEs) return; + /* We'll need to have the ph_eval_at values for PlaceHolderVars */ + Assert(root->placeholdersFrozen); + /* * Examine all baserels (the rel array has been set up by now). */ diff --git a/src/backend/optimizer/util/var.c b/src/backend/optimizer/util/var.c index 8c9824e54d6..c55c5f805b3 100644 --- a/src/backend/optimizer/util/var.c +++ b/src/backend/optimizer/util/var.c @@ -198,16 +198,6 @@ pull_varnos_walker(Node *node, pull_varnos_context *context) * fall back to the conservative assumption that the PHV will be * evaluated at its syntactic level (phv->phrels). * - * There is a second hazard: this code is also used to examine - * qual clauses during deconstruct_jointree, when we may have a - * PlaceHolderInfo but its ph_eval_at value is not yet final, so - * that theoretically we could obtain a relid set that's smaller - * than we'd see later on. That should never happen though, - * because we deconstruct the jointree working upwards. Any outer - * join that forces delay of evaluation of a given qual clause - * will be processed before we examine that clause here, so the - * ph_eval_at value should have been updated to include it. - * * Another problem is that a PlaceHolderVar can appear in quals or * tlists that have been translated for use in a child appendrel. * Typically such a PHV is a parameter expression sourced by some diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 8d7bd937a77..c520839bf7d 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -5150,6 +5150,21 @@ SELECT * FROM 1 | 4567890123456789 | -4567890123456789 | 4567890123456789 (5 rows) +-- join removal bug #17769: can't remove if there's a pushed-down reference +EXPLAIN (COSTS OFF) +SELECT q2 FROM + (SELECT * + FROM int8_tbl LEFT JOIN innertab ON q2 = id) ss + WHERE COALESCE(dat1, 0) = q1; + QUERY PLAN +---------------------------------------------------------------- + Nested Loop Left Join + Filter: (COALESCE(innertab.dat1, '0'::bigint) = int8_tbl.q1) + -> Seq Scan on int8_tbl + -> Index Scan using innertab_pkey on innertab + Index Cond: (id = int8_tbl.q2) +(5 rows) + rollback; -- another join removal bug: we must clean up correctly when removing a PHV begin; diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index 9f55147be78..b0e8d559cdb 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -1860,6 +1860,13 @@ SELECT * FROM FROM int8_tbl LEFT JOIN innertab ON q2 = id) ss2 ON true; +-- join removal bug #17769: can't remove if there's a pushed-down reference +EXPLAIN (COSTS OFF) +SELECT q2 FROM + (SELECT * + FROM int8_tbl LEFT JOIN innertab ON q2 = id) ss + WHERE COALESCE(dat1, 0) = q1; + rollback; -- another join removal bug: we must clean up correctly when removing a PHV