diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index a9b43769a04..1df71f64794 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -1690,6 +1690,7 @@ _outPlannerInfo(StringInfo str, const PlannerInfo *node) WRITE_UINT_FIELD(query_level); WRITE_NODE_FIELD(plan_params); WRITE_BITMAPSET_FIELD(all_baserels); + WRITE_BITMAPSET_FIELD(nullable_baserels); WRITE_NODE_FIELD(join_rel_list); WRITE_INT_FIELD(join_cur_level); WRITE_NODE_FIELD(init_plans); diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index 2c2f5ca8144..643f249bd28 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -510,6 +510,13 @@ add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids, * equivalence class it is a member of; if none, optionally build a new * single-member EquivalenceClass for it. * + * expr is the expression, and nullable_relids is the set of base relids + * that are potentially nullable below it. We actually only care about + * the set of such relids that are used in the expression; but for caller + * convenience, we perform that intersection step here. The caller need + * only be sure that nullable_relids doesn't omit any nullable rels that + * might appear in the expr. + * * sortref is the SortGroupRef of the originating SortGroupClause, if any, * or zero if not. (It should never be zero if the expression is volatile!) * @@ -538,6 +545,7 @@ add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids, EquivalenceClass * get_eclass_for_sort_expr(PlannerInfo *root, Expr *expr, + Relids nullable_relids, List *opfamilies, Oid opcintype, Oid collation, @@ -545,6 +553,7 @@ get_eclass_for_sort_expr(PlannerInfo *root, Relids rel, bool create_it) { + Relids expr_relids; EquivalenceClass *newec; EquivalenceMember *newem; ListCell *lc1; @@ -555,6 +564,12 @@ get_eclass_for_sort_expr(PlannerInfo *root, */ expr = canonicalize_ec_expression(expr, opcintype, collation); + /* + * Get the precise set of nullable relids appearing in the expression. + */ + expr_relids = pull_varnos((Node *) expr); + nullable_relids = bms_intersect(nullable_relids, expr_relids); + /* * Scan through the existing EquivalenceClasses for a match */ @@ -629,8 +644,8 @@ get_eclass_for_sort_expr(PlannerInfo *root, if (newec->ec_has_volatile && sortref == 0) /* should not happen */ elog(ERROR, "volatile EquivalenceClass has no sortref"); - newem = add_eq_member(newec, copyObject(expr), pull_varnos((Node *) expr), - NULL, false, opcintype); + newem = add_eq_member(newec, copyObject(expr), expr_relids, + nullable_relids, false, opcintype); /* * add_eq_member doesn't check for volatile functions, set-returning diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index 8561e5ffe26..7cd90701d3f 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -170,6 +170,9 @@ canonicalize_pathkeys(PlannerInfo *root, List *pathkeys) * Given an expression and sort-order information, create a PathKey. * The result is always a "canonical" PathKey, but it might be redundant. * + * expr is the expression, and nullable_relids is the set of base relids + * that are potentially nullable below it. + * * If the PathKey is being generated from a SortGroupClause, sortref should be * the SortGroupClause's SortGroupRef; otherwise zero. * @@ -185,6 +188,7 @@ canonicalize_pathkeys(PlannerInfo *root, List *pathkeys) static PathKey * make_pathkey_from_sortinfo(PlannerInfo *root, Expr *expr, + Relids nullable_relids, Oid opfamily, Oid opcintype, Oid collation, @@ -223,8 +227,8 @@ make_pathkey_from_sortinfo(PlannerInfo *root, equality_op); /* Now find or (optionally) create a matching EquivalenceClass */ - eclass = get_eclass_for_sort_expr(root, expr, opfamilies, - opcintype, collation, + eclass = get_eclass_for_sort_expr(root, expr, nullable_relids, + opfamilies, opcintype, collation, sortref, rel, create_it); /* Fail if no EC and !create_it */ @@ -246,6 +250,7 @@ make_pathkey_from_sortinfo(PlannerInfo *root, static PathKey * make_pathkey_from_sortop(PlannerInfo *root, Expr *expr, + Relids nullable_relids, Oid ordering_op, bool nulls_first, Index sortref, @@ -268,6 +273,7 @@ make_pathkey_from_sortop(PlannerInfo *root, return make_pathkey_from_sortinfo(root, expr, + nullable_relids, opfamily, opcintype, collation, @@ -482,9 +488,13 @@ build_index_pathkeys(PlannerInfo *root, nulls_first = index->nulls_first[i]; } - /* OK, try to make a canonical pathkey for this sort key */ + /* + * OK, try to make a canonical pathkey for this sort key. Note we're + * underneath any outer joins, so nullable_relids should be NULL. + */ cpathkey = make_pathkey_from_sortinfo(root, indexkey, + NULL, index->sortopfamily[i], index->opcintype[i], index->indexcollations[i], @@ -573,11 +583,14 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel, * expression is *not* volatile in the outer query: it's just * a Var referencing whatever the subquery emitted. (IOW, the * outer query isn't going to re-execute the volatile - * expression itself.) So this is okay. + * expression itself.) So this is okay. Likewise, it's + * correct to pass nullable_relids = NULL, because we're + * underneath any outer joins appearing in the outer query. */ outer_ec = get_eclass_for_sort_expr(root, outer_expr, + NULL, sub_eclass->ec_opfamilies, sub_member->em_datatype, sub_eclass->ec_collation, @@ -665,6 +678,7 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel, /* See if we have a matching EC for that */ outer_ec = get_eclass_for_sort_expr(root, outer_expr, + NULL, sub_eclass->ec_opfamilies, sub_expr_type, sub_expr_coll, @@ -770,6 +784,13 @@ build_join_pathkeys(PlannerInfo *root, * The resulting PathKeys are always in canonical form. (Actually, there * is no longer any code anywhere that creates non-canonical PathKeys.) * + * We assume that root->nullable_baserels is the set of base relids that could + * have gone to NULL below the SortGroupClause expressions. This is okay if + * the expressions came from the query's top level (ORDER BY, DISTINCT, etc) + * and if this function is only invoked after deconstruct_jointree. In the + * future we might have to make callers pass in the appropriate + * nullable-relids set, but for now it seems unnecessary. + * * 'sortclauses' is a list of SortGroupClause nodes * 'tlist' is the targetlist to find the referenced tlist entries in */ @@ -794,6 +815,7 @@ make_pathkeys_for_sortclauses(PlannerInfo *root, Assert(OidIsValid(sortcl->sortop)); pathkey = make_pathkey_from_sortop(root, sortkey, + root->nullable_baserels, sortcl->sortop, sortcl->nulls_first, sortcl->tleSortGroupRef, @@ -850,6 +872,7 @@ initialize_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo) restrictinfo->left_ec = get_eclass_for_sort_expr(root, (Expr *) get_leftop(clause), + restrictinfo->nullable_relids, restrictinfo->mergeopfamilies, lefttype, ((OpExpr *) clause)->inputcollid, @@ -859,6 +882,7 @@ initialize_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo) restrictinfo->right_ec = get_eclass_for_sort_expr(root, (Expr *) get_rightop(clause), + restrictinfo->nullable_relids, restrictinfo->mergeopfamilies, righttype, ((OpExpr *) clause)->inputcollid, diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c index 66b14ca7ddf..41fe303d91f 100644 --- a/src/backend/optimizer/plan/initsplan.c +++ b/src/backend/optimizer/plan/initsplan.c @@ -250,6 +250,9 @@ deconstruct_jointree(PlannerInfo *root) Assert(root->parse->jointree != NULL && IsA(root->parse->jointree, FromExpr)); + /* this is filled as we scan the jointree */ + root->nullable_baserels = NULL; + return deconstruct_recurse(root, (Node *) root->parse->jointree, false, &qualscope, &inner_join_rels); } @@ -361,6 +364,7 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join, left_inners, right_inners, nonnullable_rels, + nullable_rels, ojscope; List *leftjoinlist, *rightjoinlist; @@ -392,6 +396,8 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join, *inner_join_rels = *qualscope; /* Inner join adds no restrictions for quals */ nonnullable_rels = NULL; + /* and it doesn't force anything to null, either */ + nullable_rels = NULL; break; case JOIN_LEFT: case JOIN_ANTI: @@ -404,6 +410,7 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join, *qualscope = bms_union(leftids, rightids); *inner_join_rels = bms_union(left_inners, right_inners); nonnullable_rels = leftids; + nullable_rels = rightids; break; case JOIN_SEMI: leftjoinlist = deconstruct_recurse(root, j->larg, @@ -416,6 +423,13 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join, *inner_join_rels = bms_union(left_inners, right_inners); /* Semi join adds no restrictions for quals */ nonnullable_rels = NULL; + + /* + * Theoretically, a semijoin would null the RHS; but since the + * RHS can't be accessed above the join, this is immaterial + * and we needn't account for it. + */ + nullable_rels = NULL; break; case JOIN_FULL: leftjoinlist = deconstruct_recurse(root, j->larg, @@ -428,16 +442,22 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join, *inner_join_rels = bms_union(left_inners, right_inners); /* each side is both outer and inner */ nonnullable_rels = *qualscope; + nullable_rels = *qualscope; break; default: /* JOIN_RIGHT was eliminated during reduce_outer_joins() */ elog(ERROR, "unrecognized join type: %d", (int) j->jointype); nonnullable_rels = NULL; /* keep compiler quiet */ + nullable_rels = NULL; leftjoinlist = rightjoinlist = NIL; break; } + /* Report all rels that will be nulled anywhere in the jointree */ + root->nullable_baserels = bms_add_members(root->nullable_baserels, + nullable_rels); + /* * For an OJ, form the SpecialJoinInfo now, because we need the OJ's * semantic scope (ojscope) to pass to distribute_qual_to_rels. But diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index ea04809099d..8b997f05eb2 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -151,7 +151,8 @@ typedef struct PlannerInfo /* * all_baserels is a Relids set of all base relids (but not "other" * relids) in the query; that is, the Relids identifier of the final join - * we need to form. + * we need to form. This is computed in make_one_rel, just before we + * start making Paths. */ Relids all_baserels; @@ -244,6 +245,9 @@ typedef struct PlannerInfo /* Added post-release, will be in a saner place in 9.3: */ List *plan_params; /* list of PlannerParamItems, see below */ + + /* This will be in a saner place in 9.4: */ + Relids nullable_baserels; } PlannerInfo; diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index ab04d5e6047..c787ebb341a 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -106,6 +106,7 @@ extern Expr *canonicalize_ec_expression(Expr *expr, extern void reconsider_outer_join_clauses(PlannerInfo *root); extern EquivalenceClass *get_eclass_for_sort_expr(PlannerInfo *root, Expr *expr, + Relids nullable_relids, List *opfamilies, Oid opcintype, Oid collation, diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index dff8f7f8895..9a60ccf4ed4 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -2905,6 +2905,35 @@ select f1, unique2, case when unique2 is null then f1 else 0 end 0 | 0 | 0 (1 row) +-- +-- another case with equivalence clauses above outer joins (bug #8591) +-- +explain (costs off) +select a.unique1, b.unique1, c.unique1, coalesce(b.twothousand, a.twothousand) + from tenk1 a left join tenk1 b on b.thousand = a.unique1 left join tenk1 c on c.unique2 = coalesce(b.twothousand, a.twothousand) + where a.unique2 = 5530 and coalesce(b.twothousand, a.twothousand) = 44; + QUERY PLAN +--------------------------------------------------------------------------------------------- + Nested Loop Left Join + -> Nested Loop Left Join + Filter: (COALESCE(b.twothousand, a.twothousand) = 44) + -> Index Scan using tenk1_unique2 on tenk1 a + Index Cond: (unique2 = 5530) + -> Bitmap Heap Scan on tenk1 b + Recheck Cond: (thousand = a.unique1) + -> Bitmap Index Scan on tenk1_thous_tenthous + Index Cond: (thousand = a.unique1) + -> Index Scan using tenk1_unique2 on tenk1 c + Index Cond: ((unique2 = COALESCE(b.twothousand, a.twothousand)) AND (unique2 = 44)) +(11 rows) + +select a.unique1, b.unique1, c.unique1, coalesce(b.twothousand, a.twothousand) + from tenk1 a left join tenk1 b on b.thousand = a.unique1 left join tenk1 c on c.unique2 = coalesce(b.twothousand, a.twothousand) + where a.unique2 = 5530 and coalesce(b.twothousand, a.twothousand) = 44; + unique1 | unique1 | unique1 | coalesce +---------+---------+---------+---------- +(0 rows) + -- -- test ability to push constants through outer join clauses -- diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index eef2beb5afb..eb1a2cd228d 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -788,6 +788,19 @@ select f1, unique2, case when unique2 is null then f1 else 0 end from int4_tbl a left join tenk1 b on f1 = unique2 where (case when unique2 is null then f1 else 0 end) = 0; +-- +-- another case with equivalence clauses above outer joins (bug #8591) +-- + +explain (costs off) +select a.unique1, b.unique1, c.unique1, coalesce(b.twothousand, a.twothousand) + from tenk1 a left join tenk1 b on b.thousand = a.unique1 left join tenk1 c on c.unique2 = coalesce(b.twothousand, a.twothousand) + where a.unique2 = 5530 and coalesce(b.twothousand, a.twothousand) = 44; + +select a.unique1, b.unique1, c.unique1, coalesce(b.twothousand, a.twothousand) + from tenk1 a left join tenk1 b on b.thousand = a.unique1 left join tenk1 c on c.unique2 = coalesce(b.twothousand, a.twothousand) + where a.unique2 = 5530 and coalesce(b.twothousand, a.twothousand) = 44; + -- -- test ability to push constants through outer join clauses --