From 739f1d6218f5ce1e0243127ab23f431a7d07977c Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 23 Feb 2023 11:05:58 -0500 Subject: [PATCH] Fix mis-handling of outer join quals generated by EquivalenceClasses. It's possible, in admittedly-rather-contrived cases, for an eclass to generate a derived "join" qual that constrains the post-outer-join value(s) of some RHS variable(s) without mentioning the LHS at all. While the mechanisms were set up to work for this, we fell foul of the "get_common_eclass_indexes" filter installed by commit 3373c7155: it could decide that such an eclass wasn't relevant to the join, so that the required qual clause wouldn't get emitted there or anywhere else. To fix, apply get_common_eclass_indexes only at inner joins, where its rule is still valid. At an outer join, fall back to examining all eclasses that mention either input (or the OJ relid, though it should be impossible for an eclass to mention that without mentioning either input). Perhaps we can improve on that later, but the cost/benefit of adding more complexity to skip some irrelevant eclasses is dubious. To allow cheaply distinguishing outer from inner joins, pass the ojrelid to generate_join_implied_equalities as a separate argument. This also allows cleaning up some sloppiness that had crept into the definition of its join_relids argument, and it allows accurate calculation of nominal_join_relids for a child outer join. (The latter oversight seems not to have been a live bug, but it certainly could have caused problems in future.) Also fix what might be a live bug in check_index_predicates: it was being sloppy about what it passed to generate_join_implied_equalities. Per report from Richard Guo. Discussion: https://postgr.es/m/CAMbWs4-DsTBfOvXuw64GdFss2=M5cwtEhY=0DCS7t2gT7P6hSA@mail.gmail.com --- src/backend/optimizer/path/equivclass.c | 50 +++++++++++++++++++---- src/backend/optimizer/path/indxpath.c | 6 ++- src/backend/optimizer/plan/analyzejoins.c | 3 +- src/backend/optimizer/util/relnode.c | 22 ++++++---- src/include/optimizer/paths.h | 3 +- src/test/regress/expected/join.out | 32 +++++++++++++++ src/test/regress/sql/join.sql | 9 ++++ 7 files changed, 107 insertions(+), 18 deletions(-) diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index faabe4d0652..9949d5b1d3b 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -1366,15 +1366,19 @@ generate_base_implied_equalities_broken(PlannerInfo *root, * commutative duplicates, i.e. if the algorithm selects "a.x = b.y" but * we already have "b.y = a.x", we return the existing clause. * - * join_relids should always equal bms_union(outer_relids, inner_rel->relids). - * We could simplify this function's API by computing it internally, but in - * most current uses, the caller has the value at hand anyway. + * If we are considering an outer join, ojrelid is the associated OJ relid, + * otherwise it's zero. + * + * join_relids should always equal bms_union(outer_relids, inner_rel->relids) + * plus ojrelid if that's not zero. We could simplify this function's API by + * computing it internally, but most callers have the value at hand anyway. */ List * generate_join_implied_equalities(PlannerInfo *root, Relids join_relids, Relids outer_relids, - RelOptInfo *inner_rel) + RelOptInfo *inner_rel, + Index ojrelid) { List *result = NIL; Relids inner_relids = inner_rel->relids; @@ -1392,6 +1396,8 @@ generate_join_implied_equalities(PlannerInfo *root, nominal_inner_relids = inner_rel->top_parent_relids; /* ECs will be marked with the parent's relid, not the child's */ nominal_join_relids = bms_union(outer_relids, nominal_inner_relids); + if (ojrelid != 0) + nominal_join_relids = bms_add_member(nominal_join_relids, ojrelid); } else { @@ -1400,10 +1406,23 @@ generate_join_implied_equalities(PlannerInfo *root, } /* - * Get all eclasses that mention both inner and outer sides of the join + * Examine all potentially-relevant eclasses. + * + * If we are considering an outer join, we must include "join" clauses + * that mention either input rel plus the outer join's relid; these + * represent post-join filter clauses that have to be applied at this + * join. We don't have infrastructure that would let us identify such + * eclasses cheaply, so just fall back to considering all eclasses + * mentioning anything in nominal_join_relids. + * + * At inner joins, we can be smarter: only consider eclasses mentioning + * both input rels. */ - matching_ecs = get_common_eclass_indexes(root, nominal_inner_relids, - outer_relids); + if (ojrelid != 0) + matching_ecs = get_eclass_indexes_for_relids(root, nominal_join_relids); + else + matching_ecs = get_common_eclass_indexes(root, nominal_inner_relids, + outer_relids); i = -1; while ((i = bms_next_member(matching_ecs, i)) >= 0) @@ -1447,6 +1466,10 @@ generate_join_implied_equalities(PlannerInfo *root, /* * generate_join_implied_equalities_for_ecs * As above, but consider only the listed ECs. + * + * For the sole current caller, we can assume ojrelid == 0, that is we are + * not interested in outer-join filter clauses. This might need to change + * in future. */ List * generate_join_implied_equalities_for_ecs(PlannerInfo *root, @@ -2970,6 +2993,8 @@ generate_implied_equalities_for_column(PlannerInfo *root, * generate_join_implied_equalities(). Note it's OK to occasionally say "yes" * incorrectly. Hence we don't bother with details like whether the lack of a * cross-type operator might prevent the clause from actually being generated. + * False negatives are not always fatal either: they will discourage, but not + * completely prevent, investigation of particular join pathways. */ bool have_relevant_eclass_joinclause(PlannerInfo *root, @@ -2978,7 +3003,16 @@ have_relevant_eclass_joinclause(PlannerInfo *root, Bitmapset *matching_ecs; int i; - /* Examine only eclasses mentioning both rel1 and rel2 */ + /* + * Examine only eclasses mentioning both rel1 and rel2. + * + * Note that we do not consider the possibility of an eclass generating + * "join" clauses that mention just one of the rels plus an outer join + * that could be formed from them. Although such clauses must be + * correctly enforced when we form the outer join, they don't seem like + * sufficient reason to prioritize this join over other ones. The join + * ordering rules will force the join to be made when necessary. + */ matching_ecs = get_common_eclass_indexes(root, rel1->relids, rel2->relids); diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index 721a0752018..011a0337dad 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -3349,12 +3349,15 @@ check_index_predicates(PlannerInfo *root, RelOptInfo *rel) * relid sets for generate_join_implied_equalities is slightly tricky * because the rel could be a child rel rather than a true baserel, and in * that case we must subtract its parents' relid(s) from all_query_rels. + * Additionally, we mustn't consider clauses that are only computable + * after outer joins that can null the rel. */ if (rel->reloptkind == RELOPT_OTHER_MEMBER_REL) otherrels = bms_difference(root->all_query_rels, find_childrel_parents(root, rel)); else otherrels = bms_difference(root->all_query_rels, rel->relids); + otherrels = bms_del_members(otherrels, rel->nulling_relids); if (!bms_is_empty(otherrels)) clauselist = @@ -3363,7 +3366,8 @@ check_index_predicates(PlannerInfo *root, RelOptInfo *rel) bms_union(rel->relids, otherrels), otherrels, - rel)); + rel, + 0)); /* * Normally we remove quals that are implied by a partial index's diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c index f79bc4430c1..98cf3494e6f 100644 --- a/src/backend/optimizer/plan/analyzejoins.c +++ b/src/backend/optimizer/plan/analyzejoins.c @@ -669,7 +669,8 @@ reduce_unique_semijoins(PlannerInfo *root) list_concat(generate_join_implied_equalities(root, joinrelids, sjinfo->min_lefthand, - innerrel), + innerrel, + 0), innerrel->joininfo); /* Test whether the innerrel is unique for those clauses. */ diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index d29a25f8aa6..9ad44a0508c 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -47,7 +47,8 @@ static void build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel, static List *build_joinrel_restrictlist(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outer_rel, - RelOptInfo *inner_rel); + RelOptInfo *inner_rel, + SpecialJoinInfo *sjinfo); static void build_joinrel_joinlist(RelOptInfo *joinrel, RelOptInfo *outer_rel, RelOptInfo *inner_rel); @@ -667,7 +668,8 @@ build_join_rel(PlannerInfo *root, *restrictlist_ptr = build_joinrel_restrictlist(root, joinrel, outer_rel, - inner_rel); + inner_rel, + sjinfo); return joinrel; } @@ -779,7 +781,8 @@ build_join_rel(PlannerInfo *root, * for set_joinrel_size_estimates().) */ restrictlist = build_joinrel_restrictlist(root, joinrel, - outer_rel, inner_rel); + outer_rel, inner_rel, + sjinfo); if (restrictlist_ptr) *restrictlist_ptr = restrictlist; build_joinrel_joinlist(joinrel, outer_rel, inner_rel); @@ -1220,6 +1223,7 @@ build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel, * 'joinrel' is a join relation node * 'outer_rel' and 'inner_rel' are a pair of relations that can be joined * to form joinrel. + * 'sjinfo': join context info * * build_joinrel_restrictlist() returns a list of relevant restrictinfos, * whereas build_joinrel_joinlist() stores its results in the joinrel's @@ -1234,7 +1238,8 @@ static List * build_joinrel_restrictlist(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outer_rel, - RelOptInfo *inner_rel) + RelOptInfo *inner_rel, + SpecialJoinInfo *sjinfo) { List *result; Relids both_input_relids; @@ -1260,7 +1265,8 @@ build_joinrel_restrictlist(PlannerInfo *root, generate_join_implied_equalities(root, joinrel->relids, outer_rel->relids, - inner_rel)); + inner_rel, + sjinfo->ojrelid)); return result; } @@ -1543,7 +1549,8 @@ get_baserel_parampathinfo(PlannerInfo *root, RelOptInfo *baserel, generate_join_implied_equalities(root, joinrelids, required_outer, - baserel)); + baserel, + 0)); /* Compute set of serial numbers of the enforced clauses */ pserials = NULL; @@ -1665,7 +1672,8 @@ get_joinrel_parampathinfo(PlannerInfo *root, RelOptInfo *joinrel, eclauses = generate_join_implied_equalities(root, join_and_req, required_outer, - joinrel); + joinrel, + 0); /* We only want ones that aren't movable to lower levels */ dropped_ecs = NIL; foreach(lc, eclauses) diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 736d78ea4c3..5b9db7733d2 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -149,7 +149,8 @@ extern void generate_base_implied_equalities(PlannerInfo *root); extern List *generate_join_implied_equalities(PlannerInfo *root, Relids join_relids, Relids outer_relids, - RelOptInfo *inner_rel); + RelOptInfo *inner_rel, + Index ojrelid); extern List *generate_join_implied_equalities_for_ecs(PlannerInfo *root, List *eclasses, Relids join_relids, diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 5a2756b333c..e293de03c07 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -4100,6 +4100,38 @@ select a.unique1, b.unique1, c.unique1, coalesce(b.twothousand, a.twothousand) ---------+---------+---------+---------- (0 rows) +-- related case +explain (costs off) +select * from int8_tbl t1 left join int8_tbl t2 on t1.q2 = t2.q1, + lateral (select * from int8_tbl t3 where t2.q1 = t2.q2) ss; + QUERY PLAN +------------------------------------------- + Nested Loop + -> Hash Left Join + Hash Cond: (t1.q2 = t2.q1) + Filter: (t2.q1 = t2.q2) + -> Seq Scan on int8_tbl t1 + -> Hash + -> Seq Scan on int8_tbl t2 + -> Seq Scan on int8_tbl t3 +(8 rows) + +select * from int8_tbl t1 left join int8_tbl t2 on t1.q2 = t2.q1, + lateral (select * from int8_tbl t3 where t2.q1 = t2.q2) ss; + q1 | q2 | q1 | q2 | q1 | q2 +------------------+------------------+------------------+------------------+------------------+------------------- + 123 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 123 | 456 + 123 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 123 | 4567890123456789 + 123 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 123 + 123 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 + 123 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | -4567890123456789 + 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 123 | 456 + 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 123 | 4567890123456789 + 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 123 + 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 + 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | -4567890123456789 +(10 rows) + -- -- check handling of join aliases when flattening multiple levels of subquery -- diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index a38034bce74..5c0328ed764 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -1386,6 +1386,15 @@ select a.unique1, b.unique1, c.unique1, coalesce(b.twothousand, a.twothousand) from tenk1 a left join tenk1 b on b.thousand = a.unique1 left join tenk1 c on c.unique2 = coalesce(b.twothousand, a.twothousand) where a.unique2 < 10 and coalesce(b.twothousand, a.twothousand) = 44; +-- related case + +explain (costs off) +select * from int8_tbl t1 left join int8_tbl t2 on t1.q2 = t2.q1, + lateral (select * from int8_tbl t3 where t2.q1 = t2.q2) ss; + +select * from int8_tbl t1 left join int8_tbl t2 on t1.q2 = t2.q1, + lateral (select * from int8_tbl t3 where t2.q1 = t2.q2) ss; + -- -- check handling of join aliases when flattening multiple levels of subquery --