diff --git a/src/backend/optimizer/README b/src/backend/optimizer/README index bd302bc215c..60505814fd4 100644 --- a/src/backend/optimizer/README +++ b/src/backend/optimizer/README @@ -105,12 +105,16 @@ that are either base rels or joinrels constructed per sub-join-lists. We can join these rels together in any order the planner sees fit. The standard (non-GEQO) planner does this as follows: -Consider joining each RelOptInfo to each other RelOptInfo specified in its -RelOptInfo.joininfo, and generate a Path for each possible join method for -each such pair. (If we have a RelOptInfo with no join clauses, we have no -choice but to generate a clauseless Cartesian-product join; so we consider -joining that rel to each other available rel. But in the presence of join -clauses we will only consider joins that use available join clauses.) +Consider joining each RelOptInfo to each other RelOptInfo for which there +is a usable joinclause, and generate a Path for each possible join method +for each such pair. (If we have a RelOptInfo with no join clauses, we have +no choice but to generate a clauseless Cartesian-product join; so we +consider joining that rel to each other available rel. But in the presence +of join clauses we will only consider joins that use available join +clauses. Note that join-order restrictions induced by outer joins and +IN clauses are treated as if they were real join clauses, to ensure that +we find a workable join order in cases where those restrictions force a +clauseless join to be done.) If we only had two relations in the list, we are done: we just pick the cheapest path for the join RelOptInfo. If we had more than two, we now diff --git a/src/backend/optimizer/geqo/geqo_eval.c b/src/backend/optimizer/geqo/geqo_eval.c index 847afd465dd..d528720c3e2 100644 --- a/src/backend/optimizer/geqo/geqo_eval.c +++ b/src/backend/optimizer/geqo/geqo_eval.c @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/optimizer/geqo/geqo_eval.c,v 1.84 2007/02/13 02:31:02 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/geqo/geqo_eval.c,v 1.85 2007/02/16 00:14:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -253,52 +253,14 @@ static bool desirable_join(PlannerInfo *root, RelOptInfo *outer_rel, RelOptInfo *inner_rel) { - ListCell *l; - /* - * Join if there is an applicable join clause. + * Join if there is an applicable join clause, or if there is a join + * order restriction forcing these rels to be joined. */ - if (have_relevant_joinclause(root, outer_rel, inner_rel)) + if (have_relevant_joinclause(root, outer_rel, inner_rel) || + have_join_order_restriction(root, outer_rel, inner_rel)) return true; - /* - * Join if the rels overlap the same outer-join side and don't already - * implement the outer join. This is needed to ensure that we can find a - * valid solution in a case where an OJ contains a clauseless join. - */ - foreach(l, root->oj_info_list) - { - OuterJoinInfo *ojinfo = (OuterJoinInfo *) lfirst(l); - - /* ignore full joins --- other mechanisms preserve their ordering */ - if (ojinfo->is_full_join) - continue; - if (bms_overlap(outer_rel->relids, ojinfo->min_righthand) && - bms_overlap(inner_rel->relids, ojinfo->min_righthand) && - !bms_overlap(outer_rel->relids, ojinfo->min_lefthand) && - !bms_overlap(inner_rel->relids, ojinfo->min_lefthand)) - return true; - if (bms_overlap(outer_rel->relids, ojinfo->min_lefthand) && - bms_overlap(inner_rel->relids, ojinfo->min_lefthand) && - !bms_overlap(outer_rel->relids, ojinfo->min_righthand) && - !bms_overlap(inner_rel->relids, ojinfo->min_righthand)) - return true; - } - - /* - * Join if the rels are members of the same IN sub-select. This is needed - * to ensure that we can find a valid solution in a case where an IN - * sub-select has a clauseless join. - */ - foreach(l, root->in_info_list) - { - InClauseInfo *ininfo = (InClauseInfo *) lfirst(l); - - if (bms_is_subset(outer_rel->relids, ininfo->righthand) && - bms_is_subset(inner_rel->relids, ininfo->righthand)) - return true; - } - /* Otherwise postpone the join till later. */ return false; } diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index e138f9b0aa2..d7f9d802bb3 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/joinrels.c,v 1.85 2007/02/13 02:31:03 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/joinrels.c,v 1.86 2007/02/16 00:14:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -72,7 +72,8 @@ make_rels_by_joins(PlannerInfo *root, int level, List **joinrels) other_rels = list_head(joinrels[1]); /* consider all initial * rels */ - if (old_rel->joininfo != NIL || old_rel->has_eclass_joins) + if (old_rel->joininfo != NIL || old_rel->has_eclass_joins || + has_join_restriction(root, old_rel)) { /* * Note that if all available join clauses for this rel require @@ -80,31 +81,19 @@ make_rels_by_joins(PlannerInfo *root, int level, List **joinrels) * it here. In most cases that's OK; it'll be considered by * "bushy plan" join code in a higher-level pass where we have * those other rels collected into a join rel. + * + * See also the last-ditch case below. */ new_rels = make_rels_by_clause_joins(root, old_rel, other_rels); - - /* - * An exception occurs when there is a clauseless join inside a - * construct that restricts join order, i.e., an outer join or - * an IN (sub-SELECT) construct. Here, the rel may well have join - * clauses against stuff outside its OJ side or IN sub-SELECT, but - * the clauseless join *must* be done before we can make use of - * those join clauses. So do the clauseless join bit. - * - * See also the last-ditch case below. - */ - if (new_rels == NIL && has_join_restriction(root, old_rel)) - new_rels = make_rels_by_clauseless_joins(root, - old_rel, - other_rels); } else { /* * Oops, we have a relation that is not joined to any other - * relation. Cartesian product time. + * relation, either directly or by join-order restrictions. + * Cartesian product time. */ new_rels = make_rels_by_clauseless_joins(root, old_rel, @@ -127,8 +116,8 @@ make_rels_by_joins(PlannerInfo *root, int level, List **joinrels) * joined to relations of level-k initial rels, for 2 <= k <= level-2. * * We only consider bushy-plan joins for pairs of rels where there is a - * suitable join clause, in order to avoid unreasonable growth of planning - * time. + * suitable join clause (or join order restriction), in order to avoid + * unreasonable growth of planning time. */ for (k = 2;; k++) { @@ -148,12 +137,12 @@ make_rels_by_joins(PlannerInfo *root, int level, List **joinrels) ListCell *r2; /* - * We can ignore clauseless joins here, *except* when there are - * outer joins --- then we might have to force a bushy outer - * join. See have_relevant_joinclause(). + * We can ignore clauseless joins here, *except* when they + * participate in join-order restrictions --- then we might have + * to force a bushy join plan. */ if (old_rel->joininfo == NIL && !old_rel->has_eclass_joins && - root->oj_info_list == NIL) + !has_join_restriction(root, old_rel)) continue; if (k == other_level) @@ -170,9 +159,10 @@ make_rels_by_joins(PlannerInfo *root, int level, List **joinrels) /* * OK, we can build a rel of the right level from this * pair of rels. Do so if there is at least one usable - * join clause. + * join clause or a relevant join restriction. */ - if (have_relevant_joinclause(root, old_rel, new_rel)) + if (have_relevant_joinclause(root, old_rel, new_rel) || + have_join_order_restriction(root, old_rel, new_rel)) { RelOptInfo *jrel; @@ -252,7 +242,8 @@ make_rels_by_joins(PlannerInfo *root, int level, List **joinrels) /* * make_rels_by_clause_joins * Build joins between the given relation 'old_rel' and other relations - * that participate in join clauses that 'old_rel' also participates in. + * that participate in join clauses that 'old_rel' also participates in + * (or participate in join-order restrictions with it). * The join rel nodes are returned in a list. * * 'old_rel' is the relation entry for the relation to be joined @@ -275,7 +266,8 @@ make_rels_by_clause_joins(PlannerInfo *root, RelOptInfo *other_rel = (RelOptInfo *) lfirst(l); if (!bms_overlap(old_rel->relids, other_rel->relids) && - have_relevant_joinclause(root, old_rel, other_rel)) + (have_relevant_joinclause(root, old_rel, other_rel) || + have_join_order_restriction(root, old_rel, other_rel))) { RelOptInfo *jrel; @@ -333,44 +325,6 @@ make_rels_by_clauseless_joins(PlannerInfo *root, } -/* - * has_join_restriction - * Detect whether the specified relation has join-order restrictions - * due to being inside an outer join or an IN (sub-SELECT). - */ -static bool -has_join_restriction(PlannerInfo *root, RelOptInfo *rel) -{ - ListCell *l; - - foreach(l, root->oj_info_list) - { - OuterJoinInfo *ojinfo = (OuterJoinInfo *) lfirst(l); - - /* ignore full joins --- other mechanisms preserve their ordering */ - if (ojinfo->is_full_join) - continue; - /* if it overlaps RHS and isn't yet joined to LHS, it's restricted */ - if (bms_overlap(rel->relids, ojinfo->min_righthand) && - !bms_overlap(rel->relids, ojinfo->min_lefthand)) - return true; - /* if it's a proper subset of the LHS, it's also restricted */ - if (bms_is_subset(rel->relids, ojinfo->min_lefthand) && - !bms_equal(rel->relids, ojinfo->min_lefthand)) - return true; - } - - foreach(l, root->in_info_list) - { - InClauseInfo *ininfo = (InClauseInfo *) lfirst(l); - - if (bms_is_subset(rel->relids, ininfo->righthand)) - return true; - } - return false; -} - - /* * make_join_rel * Find or create a join RelOptInfo that represents the join of @@ -669,3 +623,152 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2) return joinrel; } + + +/* + * have_join_order_restriction + * Detect whether the two relations should be joined to satisfy + * a join-order restriction arising from outer joins or IN clauses. + * + * In practice this is always used with have_relevant_joinclause(), and so + * could be merged with that function, but it seems clearer to separate the + * two concerns. We need these tests because there are degenerate cases where + * a clauseless join must be performed to satisfy join-order restrictions. + * + * Note: this is only a problem if one side of a degenerate outer join + * contains multiple rels, or a clauseless join is required within an IN's + * RHS; else we will find a join path via the "last ditch" case in + * make_rels_by_joins(). We could dispense with this test if we were willing + * to try bushy plans in the "last ditch" case, but that seems much less + * efficient. + */ +bool +have_join_order_restriction(PlannerInfo *root, + RelOptInfo *rel1, RelOptInfo *rel2) +{ + ListCell *l; + + /* + * It's possible that the rels correspond to the left and right sides + * of a degenerate outer join, that is, one with no joinclause mentioning + * the non-nullable side; in which case we should force the join to occur. + * + * Also, the two rels could represent a clauseless join that has to be + * completed to build up the LHS or RHS of an outer join. + */ + foreach(l, root->oj_info_list) + { + OuterJoinInfo *ojinfo = (OuterJoinInfo *) lfirst(l); + + /* ignore full joins --- other mechanisms handle them */ + if (ojinfo->is_full_join) + continue; + + /* Can we perform the OJ with these rels? */ + if (bms_is_subset(ojinfo->min_lefthand, rel1->relids) && + bms_is_subset(ojinfo->min_righthand, rel2->relids)) + return true; + if (bms_is_subset(ojinfo->min_lefthand, rel2->relids) && + bms_is_subset(ojinfo->min_righthand, rel1->relids)) + return true; + + /* + * Might we need to join these rels to complete the RHS? We have + * to use "overlap" tests since either rel might include a lower OJ + * that has been proven to commute with this one. + */ + if (bms_overlap(ojinfo->min_righthand, rel1->relids) && + bms_overlap(ojinfo->min_righthand, rel2->relids)) + return true; + + /* Likewise for the LHS. */ + if (bms_overlap(ojinfo->min_lefthand, rel1->relids) && + bms_overlap(ojinfo->min_lefthand, rel2->relids)) + return true; + } + + /* + * Similarly, we need to allow a join that completes a degenerate + * IN-clause, or one that builds up its LHS or RHS. + */ + foreach(l, root->in_info_list) + { + InClauseInfo *ininfo = (InClauseInfo *) lfirst(l); + + /* Can we perform the IN with these rels? */ + if (bms_is_subset(ininfo->lefthand, rel1->relids) && + bms_is_subset(ininfo->righthand, rel2->relids)) + return true; + if (bms_is_subset(ininfo->lefthand, rel2->relids) && + bms_is_subset(ininfo->righthand, rel1->relids)) + return true; + + /* + * Might we need to join these rels to complete the RHS? It's + * probably overkill to test "overlap", since we never join part of an + * IN's RHS to anything else, but may as well keep the coding similar + * to the OJ case. + */ + if (bms_overlap(ininfo->righthand, rel1->relids) && + bms_overlap(ininfo->righthand, rel2->relids)) + return true; + + /* Likewise for the LHS. */ + if (bms_overlap(ininfo->lefthand, rel1->relids) && + bms_overlap(ininfo->lefthand, rel2->relids)) + return true; + } + + return false; +} + + +/* + * has_join_restriction + * Detect whether the specified relation has join-order restrictions + * due to being inside an outer join or an IN (sub-SELECT). + * + * Essentially, this tests whether have_join_order_restriction() could + * succeed with this rel and some other one. + */ +static bool +has_join_restriction(PlannerInfo *root, RelOptInfo *rel) +{ + ListCell *l; + + foreach(l, root->oj_info_list) + { + OuterJoinInfo *ojinfo = (OuterJoinInfo *) lfirst(l); + + /* ignore full joins --- other mechanisms preserve their ordering */ + if (ojinfo->is_full_join) + continue; + + /* ignore if OJ is already contained in rel */ + if (bms_is_subset(ojinfo->min_lefthand, rel->relids) && + bms_is_subset(ojinfo->min_righthand, rel->relids)) + continue; + + /* restricted if it overlaps LHS or RHS, but doesn't contain OJ */ + if (bms_overlap(ojinfo->min_lefthand, rel->relids) || + bms_overlap(ojinfo->min_righthand, rel->relids)) + return true; + } + + foreach(l, root->in_info_list) + { + InClauseInfo *ininfo = (InClauseInfo *) lfirst(l); + + /* ignore if IN is already contained in rel */ + if (bms_is_subset(ininfo->lefthand, rel->relids) && + bms_is_subset(ininfo->righthand, rel->relids)) + continue; + + /* restricted if it overlaps LHS or RHS, but doesn't contain IN */ + if (bms_overlap(ininfo->lefthand, rel->relids) || + bms_overlap(ininfo->righthand, rel->relids)) + return true; + } + + return false; +} diff --git a/src/backend/optimizer/util/joininfo.c b/src/backend/optimizer/util/joininfo.c index e58903c5d0a..6a31a02835c 100644 --- a/src/backend/optimizer/util/joininfo.c +++ b/src/backend/optimizer/util/joininfo.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/util/joininfo.c,v 1.47 2007/01/20 20:45:39 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/util/joininfo.c,v 1.48 2007/02/16 00:14:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -62,40 +62,6 @@ have_relevant_joinclause(PlannerInfo *root, if (!result && rel1->has_eclass_joins && rel2->has_eclass_joins) result = have_relevant_eclass_joinclause(root, rel1, rel2); - /* - * It's possible that the rels correspond to the left and right sides - * of a degenerate outer join, that is, one with no joinclause mentioning - * the non-nullable side. The above scan will then have failed to locate - * any joinclause indicating we should join, but nonetheless we must - * allow the join to occur. - * - * Note: we need no comparable check for IN-joins because we can handle - * sequential buildup of an IN-join to multiple outer-side rels; therefore - * the "last ditch" case in make_rels_by_joins() always succeeds. We - * could dispense with this hack if we were willing to try bushy plans - * in the "last ditch" case, but that seems too expensive. - */ - if (!result) - { - foreach(l, root->oj_info_list) - { - OuterJoinInfo *ojinfo = (OuterJoinInfo *) lfirst(l); - - /* ignore full joins --- other mechanisms handle them */ - if (ojinfo->is_full_join) - continue; - - if ((bms_is_subset(ojinfo->min_lefthand, rel1->relids) && - bms_is_subset(ojinfo->min_righthand, rel2->relids)) || - (bms_is_subset(ojinfo->min_lefthand, rel2->relids) && - bms_is_subset(ojinfo->min_righthand, rel1->relids))) - { - result = true; - break; - } - } - } - bms_free(join_relids); return result; diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 7e7eb15469e..6b985e24cd7 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/optimizer/paths.h,v 1.95 2007/01/20 20:45:40 tgl Exp $ + * $PostgreSQL: pgsql/src/include/optimizer/paths.h,v 1.96 2007/02/16 00:14:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -91,6 +91,8 @@ extern void add_paths_to_joinrel(PlannerInfo *root, RelOptInfo *joinrel, extern List *make_rels_by_joins(PlannerInfo *root, int level, List **joinrels); extern RelOptInfo *make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2); +extern bool have_join_order_restriction(PlannerInfo *root, + RelOptInfo *rel1, RelOptInfo *rel2); /* * equivclass.c diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index bd92397b3af..245e22dcd05 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -2137,6 +2137,19 @@ select count(*) from tenk1 a where unique1 in 1 (1 row) +-- +-- regression test: check for failure to generate a plan with multiple +-- degenerate IN clauses +-- +select count(*) from tenk1 x where + x.unique1 in (select a.f1 from int4_tbl a,float8_tbl b where a.f1=b.f1) and + x.unique1 = 0 and + x.unique1 in (select aa.f1 from int4_tbl aa,float8_tbl bb where aa.f1=bb.f1); + count +------- + 1 +(1 row) + -- -- Clean up -- diff --git a/src/test/regress/expected/join_1.out b/src/test/regress/expected/join_1.out index 07291225b76..9677408707f 100644 --- a/src/test/regress/expected/join_1.out +++ b/src/test/regress/expected/join_1.out @@ -2137,6 +2137,19 @@ select count(*) from tenk1 a where unique1 in 1 (1 row) +-- +-- regression test: check for failure to generate a plan with multiple +-- degenerate IN clauses +-- +select count(*) from tenk1 x where + x.unique1 in (select a.f1 from int4_tbl a,float8_tbl b where a.f1=b.f1) and + x.unique1 = 0 and + x.unique1 in (select aa.f1 from int4_tbl aa,float8_tbl bb where aa.f1=bb.f1); + count +------- + 1 +(1 row) + -- -- Clean up -- diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index 149cdadc190..71208d2d2d8 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -334,6 +334,15 @@ select count(*) from tenk1 a where unique1 in (select unique1 from tenk1 b join tenk1 c using (unique1) where b.unique2 = 42); +-- +-- regression test: check for failure to generate a plan with multiple +-- degenerate IN clauses +-- +select count(*) from tenk1 x where + x.unique1 in (select a.f1 from int4_tbl a,float8_tbl b where a.f1=b.f1) and + x.unique1 = 0 and + x.unique1 in (select aa.f1 from int4_tbl aa,float8_tbl bb where aa.f1=bb.f1); + -- -- Clean up