1
0
mirror of https://github.com/postgres/postgres.git synced 2025-05-02 11:44:50 +03:00

Restructure code that is responsible for ensuring that clauseless joins are

considered when it is necessary to do so because of a join-order restriction
(that is, an outer-join or IN-subselect construct).  The former coding was a
bit ad-hoc and inconsistent, and it missed some cases, as exposed by Mario
Weilguni's recent bug report.  His specific problem was that an IN could be
turned into a "clauseless" join due to constant-propagation removing the IN's
joinclause, and if the IN's subselect involved more than one relation and
there was more than one such IN linking to the same upper relation, then the
only valid join orders involve "bushy" plans but we would fail to consider the
specific paths needed to get there.  (See the example case added to the join
regression test.)  On examining the code I wonder if there weren't some other
problem cases too; in particular it seems that GEQO was defending against a
different set of corner cases than the main planner was.  There was also an
efficiency problem, in that when we did realize we needed a clauseless join
because of an IN, we'd consider clauseless joins against every other relation
whether this was sensible or not.  It seems a better design is to use the
outer-join and in-clause lists as a backup heuristic, just as the rule of
joining only where there are joinclauses is a heuristic: we'll join two
relations if they have a usable joinclause *or* this might be necessary to
satisfy an outer-join or IN-clause join order restriction.  I refactored the
code to have just one place considering this instead of three, and made sure
that it covered all the cases that any of them had been considering.

Backpatch as far as 8.1 (which has only the IN-clause form of the disease).
By rights 8.0 and 7.4 should have the bug too, but they accidentally fail
to fail, because the joininfo structure used in those releases preserves some
memory of there having once been a joinclause between the inner and outer
sides of an IN, and so it leads the code in the right direction anyway.
I'll be conservative and not touch them.
This commit is contained in:
Tom Lane 2007-02-16 00:14:16 +00:00
parent e6aa62ec14
commit 0990afb8e2
6 changed files with 155 additions and 70 deletions

View File

@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/backend/optimizer/geqo/geqo_eval.c,v 1.77.2.1 2005/11/22 18:23:10 momjian Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/geqo/geqo_eval.c,v 1.77.2.2 2007/02/16 00:14:16 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -254,28 +254,14 @@ static bool
desirable_join(PlannerInfo *root,
RelOptInfo *outer_rel, RelOptInfo *inner_rel)
{
ListCell *l;
/*
* Join if there is an applicable join clause.
* Join if there is an applicable join clause, or if there is a join
* order restriction forcing these rels to be joined.
*/
if (have_relevant_joinclause(outer_rel, inner_rel))
if (have_relevant_joinclause(outer_rel, inner_rel) ||
have_join_order_restriction(root, outer_rel, inner_rel))
return true;
/*
* Join if the rels are members of the same IN sub-select. This is needed
* to improve the odds that we will find a valid solution in a case where
* an IN sub-select has a clauseless join.
*/
foreach(l, root->in_info_list)
{
InClauseInfo *ininfo = (InClauseInfo *) lfirst(l);
if (bms_is_subset(outer_rel->relids, ininfo->righthand) &&
bms_is_subset(inner_rel->relids, ininfo->righthand))
return true;
}
/* Otherwise postpone the join till later. */
return false;
}

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/joinrels.c,v 1.76.2.1 2005/11/22 18:23:10 momjian Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/path/joinrels.c,v 1.76.2.2 2007/02/16 00:14:16 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -25,7 +25,7 @@ static List *make_rels_by_clause_joins(PlannerInfo *root,
static List *make_rels_by_clauseless_joins(PlannerInfo *root,
RelOptInfo *old_rel,
ListCell *other_rels);
static bool is_inside_IN(PlannerInfo *root, RelOptInfo *rel);
static bool has_join_restriction(PlannerInfo *root, RelOptInfo *rel);
/*
@ -72,7 +72,7 @@ make_rels_by_joins(PlannerInfo *root, int level, List **joinrels)
other_rels = list_head(joinrels[1]); /* consider all initial
* rels */
if (old_rel->joininfo != NIL)
if (old_rel->joininfo != NIL || has_join_restriction(root, old_rel))
{
/*
* Note that if all available join clauses for this rel require
@ -80,30 +80,19 @@ make_rels_by_joins(PlannerInfo *root, int level, List **joinrels)
* it here. In most cases that's OK; it'll be considered by
* "bushy plan" join code in a higher-level pass where we have
* those other rels collected into a join rel.
*
* See also the last-ditch case below.
*/
new_rels = make_rels_by_clause_joins(root,
old_rel,
other_rels);
/*
* An exception occurs when there is a clauseless join inside an
* IN (sub-SELECT) construct. Here, the members of the subselect
* all have join clauses (against the stuff outside the IN), but
* they *must* be joined to each other before we can make use of
* those join clauses. So do the clauseless join bit.
*
* See also the last-ditch case below.
*/
if (new_rels == NIL && is_inside_IN(root, old_rel))
new_rels = make_rels_by_clauseless_joins(root,
old_rel,
other_rels);
}
else
{
/*
* Oops, we have a relation that is not joined to any other
* relation. Cartesian product time.
* relation, either directly or by join-order restrictions.
* Cartesian product time.
*/
new_rels = make_rels_by_clauseless_joins(root,
old_rel,
@ -126,8 +115,8 @@ make_rels_by_joins(PlannerInfo *root, int level, List **joinrels)
* joined to relations of level-k initial rels, for 2 <= k <= level-2.
*
* We only consider bushy-plan joins for pairs of rels where there is a
* suitable join clause, in order to avoid unreasonable growth of planning
* time.
* suitable join clause (or join order restriction), in order to avoid
* unreasonable growth of planning time.
*/
for (k = 2;; k++)
{
@ -146,8 +135,14 @@ make_rels_by_joins(PlannerInfo *root, int level, List **joinrels)
ListCell *other_rels;
ListCell *r2;
if (old_rel->joininfo == NIL)
continue; /* we ignore clauseless joins here */
/*
* We can ignore clauseless joins here, *except* when they
* participate in join-order restrictions --- then we might have
* to force a bushy join plan.
*/
if (old_rel->joininfo == NIL &&
!has_join_restriction(root, old_rel))
continue;
if (k == other_level)
other_rels = lnext(r); /* only consider remaining rels */
@ -163,9 +158,10 @@ make_rels_by_joins(PlannerInfo *root, int level, List **joinrels)
/*
* OK, we can build a rel of the right level from this
* pair of rels. Do so if there is at least one usable
* join clause.
* join clause or a relevant join restriction.
*/
if (have_relevant_joinclause(old_rel, new_rel))
if (have_relevant_joinclause(old_rel, new_rel) ||
have_join_order_restriction(root, old_rel, new_rel))
{
RelOptInfo *jrel;
@ -245,8 +241,8 @@ make_rels_by_joins(PlannerInfo *root, int level, List **joinrels)
/*
* make_rels_by_clause_joins
* Build joins between the given relation 'old_rel' and other relations
* that are mentioned within old_rel's joininfo list (i.e., relations
* that participate in join clauses that 'old_rel' also participates in).
* that participate in join clauses that 'old_rel' also participates in
* (or participate in join-order restrictions with it).
* The join rel nodes are returned in a list.
*
* 'old_rel' is the relation entry for the relation to be joined
@ -269,7 +265,8 @@ make_rels_by_clause_joins(PlannerInfo *root,
RelOptInfo *other_rel = (RelOptInfo *) lfirst(l);
if (!bms_overlap(old_rel->relids, other_rel->relids) &&
have_relevant_joinclause(old_rel, other_rel))
(have_relevant_joinclause(old_rel, other_rel) ||
have_join_order_restriction(root, old_rel, other_rel)))
{
RelOptInfo *jrel;
@ -327,29 +324,6 @@ make_rels_by_clauseless_joins(PlannerInfo *root,
}
/*
* is_inside_IN
* Detect whether the specified relation is inside an IN (sub-SELECT).
*
* Note that we are actually only interested in rels that have been pulled up
* out of an IN, so the routine name is a slight misnomer.
*/
static bool
is_inside_IN(PlannerInfo *root, RelOptInfo *rel)
{
ListCell *l;
foreach(l, root->in_info_list)
{
InClauseInfo *ininfo = (InClauseInfo *) lfirst(l);
if (bms_is_subset(rel->relids, ininfo->righthand))
return true;
}
return false;
}
/*
* make_jointree_rel
* Find or build a RelOptInfo join rel representing a specific
@ -603,3 +577,91 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
return joinrel;
}
/*
* have_join_order_restriction
* Detect whether the two relations should be joined to satisfy
* a join-order restriction arising from IN clauses.
*
* In practice this is always used with have_relevant_joinclause(), and so
* could be merged with that function, but it seems clearer to separate the
* two concerns. We need these tests because there are degenerate cases where
* a clauseless join must be performed to satisfy join-order restrictions.
*/
bool
have_join_order_restriction(PlannerInfo *root,
RelOptInfo *rel1, RelOptInfo *rel2)
{
ListCell *l;
/*
* It's possible that the rels correspond to the left and right sides
* of a degenerate IN-clause; in which case we should force the join
* to occur.
*
* Also, the two rels could represent a clauseless join that has to be
* completed to build up the LHS or RHS of an IN-clause.
*/
foreach(l, root->in_info_list)
{
InClauseInfo *ininfo = (InClauseInfo *) lfirst(l);
/* Can we perform the IN with these rels? */
if (bms_is_subset(ininfo->lefthand, rel1->relids) &&
bms_is_subset(ininfo->righthand, rel2->relids))
return true;
if (bms_is_subset(ininfo->lefthand, rel2->relids) &&
bms_is_subset(ininfo->righthand, rel1->relids))
return true;
/*
* Might we need to join these rels to complete the RHS? It's
* probably overkill to test "overlap", since we never join part of an
* IN's RHS to anything else, but may as well keep the coding similar
* to the OJ case.
*/
if (bms_overlap(ininfo->righthand, rel1->relids) &&
bms_overlap(ininfo->righthand, rel2->relids))
return true;
/* Likewise for the LHS. */
if (bms_overlap(ininfo->lefthand, rel1->relids) &&
bms_overlap(ininfo->lefthand, rel2->relids))
return true;
}
return false;
}
/*
* has_join_restriction
* Detect whether the specified relation has join-order restrictions
* due to being inside an IN (sub-SELECT).
*
* Essentially, this tests whether have_join_order_restriction() could
* succeed with this rel and some other one.
*/
static bool
has_join_restriction(PlannerInfo *root, RelOptInfo *rel)
{
ListCell *l;
foreach(l, root->in_info_list)
{
InClauseInfo *ininfo = (InClauseInfo *) lfirst(l);
/* ignore if IN is already contained in rel */
if (bms_is_subset(ininfo->lefthand, rel->relids) &&
bms_is_subset(ininfo->righthand, rel->relids))
continue;
/* restricted if it overlaps LHS or RHS, but doesn't contain IN */
if (bms_overlap(ininfo->lefthand, rel->relids) ||
bms_overlap(ininfo->righthand, rel->relids))
return true;
}
return false;
}

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/optimizer/paths.h,v 1.88.2.1 2006/01/29 17:27:50 tgl Exp $
* $PostgreSQL: pgsql/src/include/optimizer/paths.h,v 1.88.2.2 2007/02/16 00:14:16 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -83,6 +83,8 @@ extern RelOptInfo *make_jointree_rel(PlannerInfo *root, Node *jtnode);
extern RelOptInfo *make_join_rel(PlannerInfo *root,
RelOptInfo *rel1, RelOptInfo *rel2,
JoinType jointype);
extern bool have_join_order_restriction(PlannerInfo *root,
RelOptInfo *rel1, RelOptInfo *rel2);
/*
* pathkeys.c

View File

@ -2139,6 +2139,19 @@ select count(*) from tenk1 a where unique1 in
1
(1 row)
--
-- regression test: check for failure to generate a plan with multiple
-- degenerate IN clauses
--
select count(*) from tenk1 x where
x.unique1 in (select a.f1 from int4_tbl a,float8_tbl b where a.f1=b.f1) and
x.unique1 = 0 and
x.unique1 in (select aa.f1 from int4_tbl aa,float8_tbl bb where aa.f1=bb.f1);
count
-------
1
(1 row)
--
-- Clean up
--

View File

@ -2139,6 +2139,19 @@ select count(*) from tenk1 a where unique1 in
1
(1 row)
--
-- regression test: check for failure to generate a plan with multiple
-- degenerate IN clauses
--
select count(*) from tenk1 x where
x.unique1 in (select a.f1 from int4_tbl a,float8_tbl b where a.f1=b.f1) and
x.unique1 = 0 and
x.unique1 in (select aa.f1 from int4_tbl aa,float8_tbl bb where aa.f1=bb.f1);
count
-------
1
(1 row)
--
-- Clean up
--

View File

@ -338,6 +338,15 @@ select count(*) from tenk1 a where unique1 in
(select unique1 from tenk1 b join tenk1 c using (unique1)
where b.unique2 = 42);
--
-- regression test: check for failure to generate a plan with multiple
-- degenerate IN clauses
--
select count(*) from tenk1 x where
x.unique1 in (select a.f1 from int4_tbl a,float8_tbl b where a.f1=b.f1) and
x.unique1 = 0 and
x.unique1 in (select aa.f1 from int4_tbl aa,float8_tbl bb where aa.f1=bb.f1);
--
-- Clean up