diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 2fe05fe26a5..f3a8e76e476 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -1587,6 +1587,7 @@ _outEquivalenceMember(StringInfo str, EquivalenceMember *node) WRITE_NODE_FIELD(em_expr); WRITE_BITMAPSET_FIELD(em_relids); + WRITE_BITMAPSET_FIELD(em_nullable_relids); WRITE_BOOL_FIELD(em_is_const); WRITE_BOOL_FIELD(em_is_child); WRITE_OID_FIELD(em_datatype); diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index 12669d1b090..c911bcda242 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -28,7 +28,7 @@ static EquivalenceMember *add_eq_member(EquivalenceClass *ec, - Expr *expr, Relids relids, + Expr *expr, Relids relids, Relids nullable_relids, bool is_child, Oid datatype); static void generate_base_implied_equalities_const(PlannerInfo *root, EquivalenceClass *ec); @@ -98,7 +98,9 @@ process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo, Expr *item1; Expr *item2; Relids item1_relids, - item2_relids; + item2_relids, + item1_nullable_relids, + item2_nullable_relids; List *opfamilies; EquivalenceClass *ec1, *ec2; @@ -140,6 +142,12 @@ process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo, return false; /* RHS is non-strict but not constant */ } + /* Calculate nullable-relid sets for each side of the clause */ + item1_nullable_relids = bms_intersect(item1_relids, + restrictinfo->nullable_relids); + item2_nullable_relids = bms_intersect(item2_relids, + restrictinfo->nullable_relids); + /* * We use the declared input types of the operator, not exprType() of the * inputs, as the nominal datatypes for opfamily lookup. This presumes @@ -274,7 +282,8 @@ process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo, else if (ec1) { /* Case 3: add item2 to ec1 */ - em2 = add_eq_member(ec1, item2, item2_relids, false, item2_type); + em2 = add_eq_member(ec1, item2, item2_relids, item2_nullable_relids, + false, item2_type); ec1->ec_sources = lappend(ec1->ec_sources, restrictinfo); ec1->ec_below_outer_join |= below_outer_join; /* mark the RI as usable with this pair of EMs */ @@ -284,7 +293,8 @@ process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo, else if (ec2) { /* Case 3: add item1 to ec2 */ - em1 = add_eq_member(ec2, item1, item1_relids, false, item1_type); + em1 = add_eq_member(ec2, item1, item1_relids, item1_nullable_relids, + false, item1_type); ec2->ec_sources = lappend(ec2->ec_sources, restrictinfo); ec2->ec_below_outer_join |= below_outer_join; /* mark the RI as usable with this pair of EMs */ @@ -307,8 +317,10 @@ process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo, ec->ec_broken = false; ec->ec_sortref = 0; ec->ec_merged = NULL; - em1 = add_eq_member(ec, item1, item1_relids, false, item1_type); - em2 = add_eq_member(ec, item2, item2_relids, false, item2_type); + em1 = add_eq_member(ec, item1, item1_relids, item1_nullable_relids, + false, item1_type); + em2 = add_eq_member(ec, item2, item2_relids, item2_nullable_relids, + false, item2_type); root->eq_classes = lappend(root->eq_classes, ec); @@ -325,12 +337,13 @@ process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo, */ static EquivalenceMember * add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids, - bool is_child, Oid datatype) + Relids nullable_relids, bool is_child, Oid datatype) { EquivalenceMember *em = makeNode(EquivalenceMember); em->em_expr = expr; em->em_relids = relids; + em->em_nullable_relids = nullable_relids; em->em_is_const = false; em->em_is_child = is_child; em->em_datatype = datatype; @@ -452,7 +465,7 @@ get_eclass_for_sort_expr(PlannerInfo *root, elog(ERROR, "volatile EquivalenceClass has no sortref"); newem = add_eq_member(newec, expr, pull_varnos((Node *) expr), - false, expr_datatype); + NULL, false, expr_datatype); /* * add_eq_member doesn't check for volatile functions, set-returning @@ -632,7 +645,9 @@ generate_base_implied_equalities_const(PlannerInfo *root, } process_implied_equality(root, eq_op, cur_em->em_expr, const_em->em_expr, - ec->ec_relids, + bms_copy(ec->ec_relids), + bms_union(cur_em->em_nullable_relids, + const_em->em_nullable_relids), ec->ec_below_outer_join, cur_em->em_is_const); } @@ -687,7 +702,9 @@ generate_base_implied_equalities_no_const(PlannerInfo *root, } process_implied_equality(root, eq_op, prev_em->em_expr, cur_em->em_expr, - ec->ec_relids, + bms_copy(ec->ec_relids), + bms_union(prev_em->em_nullable_relids, + cur_em->em_nullable_relids), ec->ec_below_outer_join, false); } @@ -1089,7 +1106,9 @@ create_join_clause(PlannerInfo *root, leftem->em_expr, rightem->em_expr, bms_union(leftem->em_relids, - rightem->em_relids)); + rightem->em_relids), + bms_union(leftem->em_nullable_relids, + rightem->em_nullable_relids)); /* Mark the clause as redundant, or not */ rinfo->parent_ec = parent_ec; @@ -1310,7 +1329,8 @@ reconsider_outer_join_clause(PlannerInfo *root, RestrictInfo *rinfo, left_type, right_type, inner_datatype; - Relids inner_relids; + Relids inner_relids, + inner_nullable_relids; ListCell *lc1; Assert(is_opclause(rinfo->clause)); @@ -1336,6 +1356,8 @@ reconsider_outer_join_clause(PlannerInfo *root, RestrictInfo *rinfo, inner_datatype = left_type; inner_relids = rinfo->left_relids; } + inner_nullable_relids = bms_intersect(inner_relids, + rinfo->nullable_relids); /* Scan EquivalenceClasses for a match to outervar */ foreach(lc1, root->eq_classes) @@ -1390,7 +1412,8 @@ reconsider_outer_join_clause(PlannerInfo *root, RestrictInfo *rinfo, newrinfo = build_implied_join_equality(eq_op, innervar, cur_em->em_expr, - inner_relids); + bms_copy(inner_relids), + bms_copy(inner_nullable_relids)); if (process_equivalence(root, newrinfo, true)) match = true; } @@ -1423,7 +1446,9 @@ reconsider_full_join_clause(PlannerInfo *root, RestrictInfo *rinfo) left_type, right_type; Relids left_relids, - right_relids; + right_relids, + left_nullable_relids, + right_nullable_relids; ListCell *lc1; /* Can't use an outerjoin_delayed clause here */ @@ -1438,6 +1463,10 @@ reconsider_full_join_clause(PlannerInfo *root, RestrictInfo *rinfo) rightvar = (Expr *) get_rightop(rinfo->clause); left_relids = rinfo->left_relids; right_relids = rinfo->right_relids; + left_nullable_relids = bms_intersect(left_relids, + rinfo->nullable_relids); + right_nullable_relids = bms_intersect(right_relids, + rinfo->nullable_relids); foreach(lc1, root->eq_classes) { @@ -1519,7 +1548,8 @@ reconsider_full_join_clause(PlannerInfo *root, RestrictInfo *rinfo) newrinfo = build_implied_join_equality(eq_op, leftvar, cur_em->em_expr, - left_relids); + bms_copy(left_relids), + bms_copy(left_nullable_relids)); if (process_equivalence(root, newrinfo, true)) matchleft = true; } @@ -1531,7 +1561,8 @@ reconsider_full_join_clause(PlannerInfo *root, RestrictInfo *rinfo) newrinfo = build_implied_join_equality(eq_op, rightvar, cur_em->em_expr, - right_relids); + bms_copy(right_relids), + bms_copy(right_nullable_relids)); if (process_equivalence(root, newrinfo, true)) matchright = true; } @@ -1651,11 +1682,27 @@ add_child_rel_equivalences(PlannerInfo *root, { /* Yes, generate transformed child version */ Expr *child_expr; + Relids new_nullable_relids; child_expr = (Expr *) adjust_appendrel_attrs((Node *) cur_em->em_expr, appinfo); - (void) add_eq_member(cur_ec, child_expr, child_rel->relids, + + /* + * Must translate nullable_relids. Note this code assumes + * parent and child relids are singletons. + */ + new_nullable_relids = cur_em->em_nullable_relids; + if (bms_overlap(new_nullable_relids, parent_rel->relids)) + { + new_nullable_relids = bms_difference(new_nullable_relids, + parent_rel->relids); + new_nullable_relids = bms_add_members(new_nullable_relids, + child_rel->relids); + } + + (void) add_eq_member(cur_ec, child_expr, + child_rel->relids, new_nullable_relids, true, cur_em->em_datatype); } } diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c index 870dfe25ac2..be0ace6c0ce 100644 --- a/src/backend/optimizer/plan/initsplan.c +++ b/src/backend/optimizer/plan/initsplan.c @@ -51,9 +51,12 @@ static void distribute_qual_to_rels(PlannerInfo *root, Node *clause, JoinType jointype, Relids qualscope, Relids ojscope, - Relids outerjoin_nonnullable); + Relids outerjoin_nonnullable, + Relids deduced_nullable_relids); static bool check_outerjoin_delay(PlannerInfo *root, Relids *relids_p, Relids *nullable_relids_p, bool is_pushed_down); +static bool check_equivalence_delay(PlannerInfo *root, + RestrictInfo *restrictinfo); static bool check_redundant_nullability_qual(PlannerInfo *root, Node *clause); static void check_mergejoinable(RestrictInfo *restrictinfo); static void check_hashjoinable(RestrictInfo *restrictinfo); @@ -350,7 +353,7 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join, distribute_qual_to_rels(root, qual, false, below_outer_join, JOIN_INNER, - *qualscope, NULL, NULL); + *qualscope, NULL, NULL, NULL); } } else if (IsA(jtnode, JoinExpr)) @@ -474,7 +477,7 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join, distribute_qual_to_rels(root, qual, false, below_outer_join, j->jointype, *qualscope, - ojscope, nonnullable_rels); + ojscope, nonnullable_rels, NULL); } /* Now we can add the SpecialJoinInfo to join_info_list */ @@ -780,13 +783,19 @@ make_outerjoininfo(PlannerInfo *root, * baserels appearing on the outer (nonnullable) side of the join * (for FULL JOIN this includes both sides of the join, and must in fact * equal qualscope) + * 'deduced_nullable_relids': if is_deduced is TRUE, the nullable relids to + * impute to the clause; otherwise NULL * * 'qualscope' identifies what level of JOIN the qual came from syntactically. * 'ojscope' is needed if we decide to force the qual up to the outer-join * level, which will be ojscope not necessarily qualscope. * - * At the time this is called, root->join_info_list must contain entries for - * all and only those special joins that are syntactically below this qual. + * In normal use (when is_deduced is FALSE), at the time this is called, + * root->join_info_list must contain entries for all and only those special + * joins that are syntactically below this qual. But when is_deduced is TRUE, + * we are adding new deduced clauses after completion of deconstruct_jointree, + * so it cannot be assumed that root->join_info_list has anything to do with + * qual placement. */ static void distribute_qual_to_rels(PlannerInfo *root, Node *clause, @@ -795,7 +804,8 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause, JoinType jointype, Relids qualscope, Relids ojscope, - Relids outerjoin_nonnullable) + Relids outerjoin_nonnullable, + Relids deduced_nullable_relids) { Relids relids; bool is_pushed_down; @@ -908,12 +918,13 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause, * If the qual came from implied-equality deduction, it should not be * outerjoin-delayed, else deducer blew it. But we can't check this * because the join_info_list may now contain OJs above where the qual - * belongs. + * belongs. For the same reason, we must rely on caller to supply the + * correct nullable_relids set. */ Assert(!ojscope); is_pushed_down = true; outerjoin_delayed = false; - nullable_relids = NULL; + nullable_relids = deduced_nullable_relids; /* Don't feed it back for more deductions */ maybe_equivalence = false; maybe_outer_join = false; @@ -1075,7 +1086,8 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause, { if (maybe_equivalence) { - if (process_equivalence(root, restrictinfo, below_outer_join)) + if (check_equivalence_delay(root, restrictinfo) && + process_equivalence(root, restrictinfo, below_outer_join)) return; /* EC rejected it, so pass to distribute_restrictinfo_to_rels */ } @@ -1236,6 +1248,44 @@ check_outerjoin_delay(PlannerInfo *root, return outerjoin_delayed; } +/* + * check_equivalence_delay + * Detect whether a potential equivalence clause is rendered unsafe + * by outer-join-delay considerations. Return TRUE if it's safe. + * + * The initial tests in distribute_qual_to_rels will consider a mergejoinable + * clause to be a potential equivalence clause if it is not outerjoin_delayed. + * But since the point of equivalence processing is that we will recombine the + * two sides of the clause with others, we have to check that each side + * satisfies the not-outerjoin_delayed condition on its own; otherwise it might + * not be safe to evaluate everywhere we could place a derived equivalence + * condition. + */ +static bool +check_equivalence_delay(PlannerInfo *root, + RestrictInfo *restrictinfo) +{ + Relids relids; + Relids nullable_relids; + + /* fast path if no special joins */ + if (root->join_info_list == NIL) + return true; + + /* must copy restrictinfo's relids to avoid changing it */ + relids = bms_copy(restrictinfo->left_relids); + /* check left side does not need delay */ + if (check_outerjoin_delay(root, &relids, &nullable_relids, true)) + return false; + + /* and similarly for the right side */ + relids = bms_copy(restrictinfo->right_relids); + if (check_outerjoin_delay(root, &relids, &nullable_relids, true)) + return false; + + return true; +} + /* * check_redundant_nullability_qual * Check to see if the qual is an IS NULL qual that is redundant with @@ -1347,11 +1397,20 @@ distribute_restrictinfo_to_rels(PlannerInfo *root, * variable-free. Otherwise the qual is applied at the lowest join level * that provides all its variables. * + * "nullable_relids" is the set of relids used in the expressions that are + * potentially nullable below the expressions. (This has to be supplied by + * caller because this function is used after deconstruct_jointree, so we + * don't have knowledge of where the clause items came from.) + * * "both_const" indicates whether both items are known pseudo-constant; * in this case it is worth applying eval_const_expressions() in case we * can produce constant TRUE or constant FALSE. (Otherwise it's not, * because the expressions went through eval_const_expressions already.) * + * Note: this function will copy item1 and item2, but it is caller's + * responsibility to make sure that the Relids parameters are fresh copies + * not shared with other uses. + * * This is currently used only when an EquivalenceClass is found to * contain pseudoconstants. See path/pathkeys.c for more details. */ @@ -1361,6 +1420,7 @@ process_implied_equality(PlannerInfo *root, Expr *item1, Expr *item2, Relids qualscope, + Relids nullable_relids, bool below_outer_join, bool both_const) { @@ -1392,15 +1452,12 @@ process_implied_equality(PlannerInfo *root, } } - /* Make a copy of qualscope to avoid problems if source EC changes */ - qualscope = bms_copy(qualscope); - /* * Push the new clause into all the appropriate restrictinfo lists. */ distribute_qual_to_rels(root, (Node *) clause, true, below_outer_join, JOIN_INNER, - qualscope, NULL, NULL); + qualscope, NULL, NULL, nullable_relids); } /* @@ -1408,12 +1465,17 @@ process_implied_equality(PlannerInfo *root, * * This overlaps the functionality of process_implied_equality(), but we * must return the RestrictInfo, not push it into the joininfo tree. + * + * Note: this function will copy item1 and item2, but it is caller's + * responsibility to make sure that the Relids parameters are fresh copies + * not shared with other uses. */ RestrictInfo * build_implied_join_equality(Oid opno, Expr *item1, Expr *item2, - Relids qualscope) + Relids qualscope, + Relids nullable_relids) { RestrictInfo *restrictinfo; Expr *clause; @@ -1428,9 +1490,6 @@ build_implied_join_equality(Oid opno, (Expr *) copyObject(item1), (Expr *) copyObject(item2)); - /* Make a copy of qualscope to avoid problems if source EC changes */ - qualscope = bms_copy(qualscope); - /* * Build the RestrictInfo node itself. */ @@ -1439,7 +1498,7 @@ build_implied_join_equality(Oid opno, false, /* outerjoin_delayed */ false, /* pseudoconstant */ qualscope, /* required_relids */ - NULL); /* nullable_relids */ + nullable_relids); /* nullable_relids */ /* Set mergejoinability info always, and hashjoinability if enabled */ check_mergejoinable(restrictinfo); diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 3726d608dbe..278cfa3c39b 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -539,6 +539,7 @@ typedef struct EquivalenceMember Expr *em_expr; /* the expression represented */ Relids em_relids; /* all relids appearing in em_expr */ + Relids em_nullable_relids; /* nullable by lower outer joins */ bool em_is_const; /* expression is pseudoconstant? */ bool em_is_child; /* derived version for a child relation? */ Oid em_datatype; /* the "nominal type" used by the opfamily */ diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h index 65233bdc023..3328b8d4596 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -94,12 +94,14 @@ extern void process_implied_equality(PlannerInfo *root, Expr *item1, Expr *item2, Relids qualscope, + Relids nullable_relids, bool below_outer_join, bool both_const); extern RestrictInfo *build_implied_join_equality(Oid opno, Expr *item1, Expr *item2, - Relids qualscope); + Relids qualscope, + Relids nullable_relids); /* * prototypes for plan/setrefs.c diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index fd752e69a0a..dabb1857e30 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -2129,6 +2129,7 @@ on (x1 = xx1) where (xx2 is not null); -- regression test: check for bug with propagation of implied equality -- to outside an IN -- +analyze tenk1; -- ensure we get consistent plans here select count(*) from tenk1 a where unique1 in (select unique1 from tenk1 b join tenk1 c using (unique1) where b.unique2 = 42); @@ -2576,3 +2577,21 @@ select * from int4_tbl a full join int4_tbl b on false; -2147483647 | (10 rows) +-- +-- test handling of potential equivalence clauses above outer joins +-- +select q1, unique2, thousand, hundred + from int8_tbl a left join tenk1 b on q1 = unique2 + where coalesce(thousand,123) = q1 and q1 = coalesce(hundred,123); + q1 | unique2 | thousand | hundred +----+---------+----------+--------- +(0 rows) + +select f1, unique2, case when unique2 is null then f1 else 0 end + from int4_tbl a left join tenk1 b on f1 = unique2 + where (case when unique2 is null then f1 else 0 end) = 0; + f1 | unique2 | case +----+---------+------ + 0 | 0 | 0 +(1 row) + diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index 9b969f4e101..1d7396dbaf7 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -330,6 +330,8 @@ on (x1 = xx1) where (xx2 is not null); -- regression test: check for bug with propagation of implied equality -- to outside an IN -- +analyze tenk1; -- ensure we get consistent plans here + select count(*) from tenk1 a where unique1 in (select unique1 from tenk1 b join tenk1 c using (unique1) where b.unique2 = 42); @@ -637,3 +639,15 @@ order by 1,2; -- select * from int4_tbl a full join int4_tbl b on true; select * from int4_tbl a full join int4_tbl b on false; + +-- +-- test handling of potential equivalence clauses above outer joins +-- + +select q1, unique2, thousand, hundred + from int8_tbl a left join tenk1 b on q1 = unique2 + where coalesce(thousand,123) = q1 and q1 = coalesce(hundred,123); + +select f1, unique2, case when unique2 is null then f1 else 0 end + from int4_tbl a left join tenk1 b on f1 = unique2 + where (case when unique2 is null then f1 else 0 end) = 0;