diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index faabe4d0652..9949d5b1d3b 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -1366,15 +1366,19 @@ generate_base_implied_equalities_broken(PlannerInfo *root, * commutative duplicates, i.e. if the algorithm selects "a.x = b.y" but * we already have "b.y = a.x", we return the existing clause. * - * join_relids should always equal bms_union(outer_relids, inner_rel->relids). - * We could simplify this function's API by computing it internally, but in - * most current uses, the caller has the value at hand anyway. + * If we are considering an outer join, ojrelid is the associated OJ relid, + * otherwise it's zero. + * + * join_relids should always equal bms_union(outer_relids, inner_rel->relids) + * plus ojrelid if that's not zero. We could simplify this function's API by + * computing it internally, but most callers have the value at hand anyway. */ List * generate_join_implied_equalities(PlannerInfo *root, Relids join_relids, Relids outer_relids, - RelOptInfo *inner_rel) + RelOptInfo *inner_rel, + Index ojrelid) { List *result = NIL; Relids inner_relids = inner_rel->relids; @@ -1392,6 +1396,8 @@ generate_join_implied_equalities(PlannerInfo *root, nominal_inner_relids = inner_rel->top_parent_relids; /* ECs will be marked with the parent's relid, not the child's */ nominal_join_relids = bms_union(outer_relids, nominal_inner_relids); + if (ojrelid != 0) + nominal_join_relids = bms_add_member(nominal_join_relids, ojrelid); } else { @@ -1400,10 +1406,23 @@ generate_join_implied_equalities(PlannerInfo *root, } /* - * Get all eclasses that mention both inner and outer sides of the join + * Examine all potentially-relevant eclasses. + * + * If we are considering an outer join, we must include "join" clauses + * that mention either input rel plus the outer join's relid; these + * represent post-join filter clauses that have to be applied at this + * join. We don't have infrastructure that would let us identify such + * eclasses cheaply, so just fall back to considering all eclasses + * mentioning anything in nominal_join_relids. + * + * At inner joins, we can be smarter: only consider eclasses mentioning + * both input rels. */ - matching_ecs = get_common_eclass_indexes(root, nominal_inner_relids, - outer_relids); + if (ojrelid != 0) + matching_ecs = get_eclass_indexes_for_relids(root, nominal_join_relids); + else + matching_ecs = get_common_eclass_indexes(root, nominal_inner_relids, + outer_relids); i = -1; while ((i = bms_next_member(matching_ecs, i)) >= 0) @@ -1447,6 +1466,10 @@ generate_join_implied_equalities(PlannerInfo *root, /* * generate_join_implied_equalities_for_ecs * As above, but consider only the listed ECs. + * + * For the sole current caller, we can assume ojrelid == 0, that is we are + * not interested in outer-join filter clauses. This might need to change + * in future. */ List * generate_join_implied_equalities_for_ecs(PlannerInfo *root, @@ -2970,6 +2993,8 @@ generate_implied_equalities_for_column(PlannerInfo *root, * generate_join_implied_equalities(). Note it's OK to occasionally say "yes" * incorrectly. Hence we don't bother with details like whether the lack of a * cross-type operator might prevent the clause from actually being generated. + * False negatives are not always fatal either: they will discourage, but not + * completely prevent, investigation of particular join pathways. */ bool have_relevant_eclass_joinclause(PlannerInfo *root, @@ -2978,7 +3003,16 @@ have_relevant_eclass_joinclause(PlannerInfo *root, Bitmapset *matching_ecs; int i; - /* Examine only eclasses mentioning both rel1 and rel2 */ + /* + * Examine only eclasses mentioning both rel1 and rel2. + * + * Note that we do not consider the possibility of an eclass generating + * "join" clauses that mention just one of the rels plus an outer join + * that could be formed from them. Although such clauses must be + * correctly enforced when we form the outer join, they don't seem like + * sufficient reason to prioritize this join over other ones. The join + * ordering rules will force the join to be made when necessary. + */ matching_ecs = get_common_eclass_indexes(root, rel1->relids, rel2->relids); diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index 721a0752018..011a0337dad 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -3349,12 +3349,15 @@ check_index_predicates(PlannerInfo *root, RelOptInfo *rel) * relid sets for generate_join_implied_equalities is slightly tricky * because the rel could be a child rel rather than a true baserel, and in * that case we must subtract its parents' relid(s) from all_query_rels. + * Additionally, we mustn't consider clauses that are only computable + * after outer joins that can null the rel. */ if (rel->reloptkind == RELOPT_OTHER_MEMBER_REL) otherrels = bms_difference(root->all_query_rels, find_childrel_parents(root, rel)); else otherrels = bms_difference(root->all_query_rels, rel->relids); + otherrels = bms_del_members(otherrels, rel->nulling_relids); if (!bms_is_empty(otherrels)) clauselist = @@ -3363,7 +3366,8 @@ check_index_predicates(PlannerInfo *root, RelOptInfo *rel) bms_union(rel->relids, otherrels), otherrels, - rel)); + rel, + 0)); /* * Normally we remove quals that are implied by a partial index's diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c index f79bc4430c1..98cf3494e6f 100644 --- a/src/backend/optimizer/plan/analyzejoins.c +++ b/src/backend/optimizer/plan/analyzejoins.c @@ -669,7 +669,8 @@ reduce_unique_semijoins(PlannerInfo *root) list_concat(generate_join_implied_equalities(root, joinrelids, sjinfo->min_lefthand, - innerrel), + innerrel, + 0), innerrel->joininfo); /* Test whether the innerrel is unique for those clauses. */ diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index d29a25f8aa6..9ad44a0508c 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -47,7 +47,8 @@ static void build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel, static List *build_joinrel_restrictlist(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outer_rel, - RelOptInfo *inner_rel); + RelOptInfo *inner_rel, + SpecialJoinInfo *sjinfo); static void build_joinrel_joinlist(RelOptInfo *joinrel, RelOptInfo *outer_rel, RelOptInfo *inner_rel); @@ -667,7 +668,8 @@ build_join_rel(PlannerInfo *root, *restrictlist_ptr = build_joinrel_restrictlist(root, joinrel, outer_rel, - inner_rel); + inner_rel, + sjinfo); return joinrel; } @@ -779,7 +781,8 @@ build_join_rel(PlannerInfo *root, * for set_joinrel_size_estimates().) */ restrictlist = build_joinrel_restrictlist(root, joinrel, - outer_rel, inner_rel); + outer_rel, inner_rel, + sjinfo); if (restrictlist_ptr) *restrictlist_ptr = restrictlist; build_joinrel_joinlist(joinrel, outer_rel, inner_rel); @@ -1220,6 +1223,7 @@ build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel, * 'joinrel' is a join relation node * 'outer_rel' and 'inner_rel' are a pair of relations that can be joined * to form joinrel. + * 'sjinfo': join context info * * build_joinrel_restrictlist() returns a list of relevant restrictinfos, * whereas build_joinrel_joinlist() stores its results in the joinrel's @@ -1234,7 +1238,8 @@ static List * build_joinrel_restrictlist(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outer_rel, - RelOptInfo *inner_rel) + RelOptInfo *inner_rel, + SpecialJoinInfo *sjinfo) { List *result; Relids both_input_relids; @@ -1260,7 +1265,8 @@ build_joinrel_restrictlist(PlannerInfo *root, generate_join_implied_equalities(root, joinrel->relids, outer_rel->relids, - inner_rel)); + inner_rel, + sjinfo->ojrelid)); return result; } @@ -1543,7 +1549,8 @@ get_baserel_parampathinfo(PlannerInfo *root, RelOptInfo *baserel, generate_join_implied_equalities(root, joinrelids, required_outer, - baserel)); + baserel, + 0)); /* Compute set of serial numbers of the enforced clauses */ pserials = NULL; @@ -1665,7 +1672,8 @@ get_joinrel_parampathinfo(PlannerInfo *root, RelOptInfo *joinrel, eclauses = generate_join_implied_equalities(root, join_and_req, required_outer, - joinrel); + joinrel, + 0); /* We only want ones that aren't movable to lower levels */ dropped_ecs = NIL; foreach(lc, eclauses) diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 736d78ea4c3..5b9db7733d2 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -149,7 +149,8 @@ extern void generate_base_implied_equalities(PlannerInfo *root); extern List *generate_join_implied_equalities(PlannerInfo *root, Relids join_relids, Relids outer_relids, - RelOptInfo *inner_rel); + RelOptInfo *inner_rel, + Index ojrelid); extern List *generate_join_implied_equalities_for_ecs(PlannerInfo *root, List *eclasses, Relids join_relids, diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 5a2756b333c..e293de03c07 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -4100,6 +4100,38 @@ select a.unique1, b.unique1, c.unique1, coalesce(b.twothousand, a.twothousand) ---------+---------+---------+---------- (0 rows) +-- related case +explain (costs off) +select * from int8_tbl t1 left join int8_tbl t2 on t1.q2 = t2.q1, + lateral (select * from int8_tbl t3 where t2.q1 = t2.q2) ss; + QUERY PLAN +------------------------------------------- + Nested Loop + -> Hash Left Join + Hash Cond: (t1.q2 = t2.q1) + Filter: (t2.q1 = t2.q2) + -> Seq Scan on int8_tbl t1 + -> Hash + -> Seq Scan on int8_tbl t2 + -> Seq Scan on int8_tbl t3 +(8 rows) + +select * from int8_tbl t1 left join int8_tbl t2 on t1.q2 = t2.q1, + lateral (select * from int8_tbl t3 where t2.q1 = t2.q2) ss; + q1 | q2 | q1 | q2 | q1 | q2 +------------------+------------------+------------------+------------------+------------------+------------------- + 123 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 123 | 456 + 123 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 123 | 4567890123456789 + 123 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 123 + 123 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 + 123 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | -4567890123456789 + 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 123 | 456 + 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 123 | 4567890123456789 + 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 123 + 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 + 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | -4567890123456789 +(10 rows) + -- -- check handling of join aliases when flattening multiple levels of subquery -- diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index a38034bce74..5c0328ed764 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -1386,6 +1386,15 @@ select a.unique1, b.unique1, c.unique1, coalesce(b.twothousand, a.twothousand) from tenk1 a left join tenk1 b on b.thousand = a.unique1 left join tenk1 c on c.unique2 = coalesce(b.twothousand, a.twothousand) where a.unique2 < 10 and coalesce(b.twothousand, a.twothousand) = 44; +-- related case + +explain (costs off) +select * from int8_tbl t1 left join int8_tbl t2 on t1.q2 = t2.q1, + lateral (select * from int8_tbl t3 where t2.q1 = t2.q2) ss; + +select * from int8_tbl t1 left join int8_tbl t2 on t1.q2 = t2.q1, + lateral (select * from int8_tbl t3 where t2.q1 = t2.q2) ss; + -- -- check handling of join aliases when flattening multiple levels of subquery --