1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-27 12:41:57 +03:00

Teach planner how to rearrange join order for some classes of OUTER JOIN.

Per my recent proposal.  I ended up basing the implementation on the
existing mechanism for enforcing valid join orders of IN joins --- the
rules for valid outer-join orders are somewhat similar.
This commit is contained in:
Tom Lane
2005-12-20 02:30:36 +00:00
parent 1a6aaaa6c4
commit e3b9852728
23 changed files with 969 additions and 722 deletions

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.138 2005/11/22 18:17:12 momjian Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.139 2005/12/20 02:30:35 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -51,6 +51,7 @@ static void set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
Index rti, RangeTblEntry *rte);
static void set_function_pathlist(PlannerInfo *root, RelOptInfo *rel,
RangeTblEntry *rte);
static RelOptInfo *make_rel_from_joinlist(PlannerInfo *root, List *joinlist);
static RelOptInfo *make_one_rel_by_joins(PlannerInfo *root, int levels_needed,
List *initial_rels);
static bool subquery_is_pushdown_safe(Query *subquery, Query *topquery,
@ -73,7 +74,7 @@ static void recurse_push_qual(Node *setOp, Query *topquery,
* single rel that represents the join of all base rels in the query.
*/
RelOptInfo *
make_one_rel(PlannerInfo *root)
make_one_rel(PlannerInfo *root, List *joinlist)
{
RelOptInfo *rel;
@ -85,10 +86,7 @@ make_one_rel(PlannerInfo *root)
/*
* Generate access paths for the entire join tree.
*/
Assert(root->parse->jointree != NULL &&
IsA(root->parse->jointree, FromExpr));
rel = make_fromexpr_rel(root, root->parse->jointree);
rel = make_rel_from_joinlist(root, joinlist);
/*
* The result should join all and only the query's base rels.
@ -528,43 +526,65 @@ set_function_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
}
/*
* make_fromexpr_rel
* Build access paths for a FromExpr jointree node.
* make_rel_from_joinlist
* Build access paths using a "joinlist" to guide the join path search.
*
* See comments for deconstruct_jointree() for definition of the joinlist
* data structure.
*/
RelOptInfo *
make_fromexpr_rel(PlannerInfo *root, FromExpr *from)
static RelOptInfo *
make_rel_from_joinlist(PlannerInfo *root, List *joinlist)
{
int levels_needed;
List *initial_rels = NIL;
ListCell *jt;
List *initial_rels;
ListCell *jl;
/*
* Count the number of child jointree nodes. This is the depth of the
* Count the number of child joinlist nodes. This is the depth of the
* dynamic-programming algorithm we must employ to consider all ways of
* joining the child nodes.
*/
levels_needed = list_length(from->fromlist);
levels_needed = list_length(joinlist);
if (levels_needed <= 0)
return NULL; /* nothing to do? */
/*
* Construct a list of rels corresponding to the child jointree nodes.
* Construct a list of rels corresponding to the child joinlist nodes.
* This may contain both base rels and rels constructed according to
* explicit JOIN directives.
* sub-joinlists.
*/
foreach(jt, from->fromlist)
initial_rels = NIL;
foreach(jl, joinlist)
{
Node *jtnode = (Node *) lfirst(jt);
Node *jlnode = (Node *) lfirst(jl);
RelOptInfo *thisrel;
initial_rels = lappend(initial_rels,
make_jointree_rel(root, jtnode));
if (IsA(jlnode, RangeTblRef))
{
int varno = ((RangeTblRef *) jlnode)->rtindex;
thisrel = find_base_rel(root, varno);
}
else if (IsA(jlnode, List))
{
/* Recurse to handle subproblem */
thisrel = make_rel_from_joinlist(root, (List *) jlnode);
}
else
{
elog(ERROR, "unrecognized joinlist node type: %d",
(int) nodeTag(jlnode));
thisrel = NULL; /* keep compiler quiet */
}
initial_rels = lappend(initial_rels, thisrel);
}
if (levels_needed == 1)
{
/*
* Single jointree node, so we're done.
* Single joinlist node, so we're done.
*/
return (RelOptInfo *) linitial(initial_rels);
}

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/joinrels.c,v 1.77 2005/11/22 18:17:12 momjian Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/path/joinrels.c,v 1.78 2005/12/20 02:30:35 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -25,7 +25,7 @@ static List *make_rels_by_clause_joins(PlannerInfo *root,
static List *make_rels_by_clauseless_joins(PlannerInfo *root,
RelOptInfo *old_rel,
ListCell *other_rels);
static bool is_inside_IN(PlannerInfo *root, RelOptInfo *rel);
static bool has_join_restriction(PlannerInfo *root, RelOptInfo *rel);
/*
@ -86,15 +86,16 @@ make_rels_by_joins(PlannerInfo *root, int level, List **joinrels)
other_rels);
/*
* An exception occurs when there is a clauseless join inside an
* IN (sub-SELECT) construct. Here, the members of the subselect
* all have join clauses (against the stuff outside the IN), but
* they *must* be joined to each other before we can make use of
* those join clauses. So do the clauseless join bit.
* An exception occurs when there is a clauseless join inside a
* construct that restricts join order, i.e., an outer join RHS
* or an IN (sub-SELECT) construct. Here, the rel may well have
* join clauses against stuff outside the OJ RHS or IN sub-SELECT,
* but the clauseless join *must* be done before we can make use
* of those join clauses. So do the clauseless join bit.
*
* See also the last-ditch case below.
*/
if (new_rels == NIL && is_inside_IN(root, old_rel))
if (new_rels == NIL && has_join_restriction(root, old_rel))
new_rels = make_rels_by_clauseless_joins(root,
old_rel,
other_rels);
@ -169,8 +170,7 @@ make_rels_by_joins(PlannerInfo *root, int level, List **joinrels)
{
RelOptInfo *jrel;
jrel = make_join_rel(root, old_rel, new_rel,
JOIN_INNER);
jrel = make_join_rel(root, old_rel, new_rel);
/* Avoid making duplicate entries ... */
if (jrel)
result_rels = list_append_unique_ptr(result_rels,
@ -219,8 +219,8 @@ make_rels_by_joins(PlannerInfo *root, int level, List **joinrels)
}
/*----------
* When IN clauses are involved, there may be no legal way to make
* an N-way join for some values of N. For example consider
* When OJs or IN clauses are involved, there may be no legal way
* to make an N-way join for some values of N. For example consider
*
* SELECT ... FROM t1 WHERE
* x IN (SELECT ... FROM t2,t3 WHERE ...) AND
@ -231,11 +231,12 @@ make_rels_by_joins(PlannerInfo *root, int level, List **joinrels)
* to accept failure at level 4 and go on to discover a workable
* bushy plan at level 5.
*
* However, if there are no IN clauses then make_join_rel() should
* However, if there are no such clauses then make_join_rel() should
* never fail, and so the following sanity check is useful.
*----------
*/
if (result_rels == NIL && root->in_info_list == NIL)
if (result_rels == NIL &&
root->oj_info_list == NIL && root->in_info_list == NIL)
elog(ERROR, "failed to build any %d-way joins", level);
}
@ -273,7 +274,7 @@ make_rels_by_clause_joins(PlannerInfo *root,
{
RelOptInfo *jrel;
jrel = make_join_rel(root, old_rel, other_rel, JOIN_INNER);
jrel = make_join_rel(root, old_rel, other_rel);
if (jrel)
result = lcons(jrel, result);
}
@ -312,7 +313,7 @@ make_rels_by_clauseless_joins(PlannerInfo *root,
{
RelOptInfo *jrel;
jrel = make_join_rel(root, old_rel, other_rel, JOIN_INNER);
jrel = make_join_rel(root, old_rel, other_rel);
/*
* As long as given other_rels are distinct, don't need to test to
@ -328,17 +329,23 @@ make_rels_by_clauseless_joins(PlannerInfo *root,
/*
* is_inside_IN
* Detect whether the specified relation is inside an IN (sub-SELECT).
*
* Note that we are actually only interested in rels that have been pulled up
* out of an IN, so the routine name is a slight misnomer.
* has_join_restriction
* Detect whether the specified relation has join-order restrictions
* due to being inside an OJ RHS or an IN (sub-SELECT).
*/
static bool
is_inside_IN(PlannerInfo *root, RelOptInfo *rel)
has_join_restriction(PlannerInfo *root, RelOptInfo *rel)
{
ListCell *l;
foreach(l, root->oj_info_list)
{
OuterJoinInfo *ojinfo = (OuterJoinInfo *) lfirst(l);
if (bms_is_subset(rel->relids, ojinfo->min_righthand))
return true;
}
foreach(l, root->in_info_list)
{
InClauseInfo *ininfo = (InClauseInfo *) lfirst(l);
@ -350,66 +357,6 @@ is_inside_IN(PlannerInfo *root, RelOptInfo *rel)
}
/*
* make_jointree_rel
* Find or build a RelOptInfo join rel representing a specific
* jointree item. For JoinExprs, we only consider the construction
* path that corresponds exactly to what the user wrote.
*/
RelOptInfo *
make_jointree_rel(PlannerInfo *root, Node *jtnode)
{
if (IsA(jtnode, RangeTblRef))
{
int varno = ((RangeTblRef *) jtnode)->rtindex;
return find_base_rel(root, varno);
}
else if (IsA(jtnode, FromExpr))
{
FromExpr *f = (FromExpr *) jtnode;
/* Recurse back to multi-way-join planner */
return make_fromexpr_rel(root, f);
}
else if (IsA(jtnode, JoinExpr))
{
JoinExpr *j = (JoinExpr *) jtnode;
RelOptInfo *rel,
*lrel,
*rrel;
/* Recurse */
lrel = make_jointree_rel(root, j->larg);
rrel = make_jointree_rel(root, j->rarg);
/* Make this join rel */
rel = make_join_rel(root, lrel, rrel, j->jointype);
if (rel == NULL) /* oops */
elog(ERROR, "invalid join order");
/*
* Since we are only going to consider this one way to do it, we're
* done generating Paths for this joinrel and can now select the
* cheapest. In fact we *must* do so now, since next level up will
* need it!
*/
set_cheapest(rel);
#ifdef OPTIMIZER_DEBUG
debug_print_rel(root, rel);
#endif
return rel;
}
else
elog(ERROR, "unrecognized node type: %d",
(int) nodeTag(jtnode));
return NULL; /* keep compiler quiet */
}
/*
* make_join_rel
* Find or create a join RelOptInfo that represents the join of
@ -418,16 +365,19 @@ make_jointree_rel(PlannerInfo *root, Node *jtnode)
* (The join rel may already contain paths generated from other
* pairs of rels that add up to the same set of base rels.)
*
* NB: will return NULL if attempted join is not valid. This can only
* happen when working with IN clauses that have been turned into joins.
* NB: will return NULL if attempted join is not valid. This can happen
* when working with outer joins, or with IN clauses that have been turned
* into joins.
*/
RelOptInfo *
make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
JoinType jointype)
make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
{
Relids joinrelids;
JoinType jointype;
bool is_valid_inner;
RelOptInfo *joinrel;
List *restrictlist;
ListCell *l;
/* We should never try to join two overlapping sets of rels. */
Assert(!bms_overlap(rel1->relids, rel2->relids));
@ -436,94 +386,176 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
joinrelids = bms_union(rel1->relids, rel2->relids);
/*
* If we are implementing IN clauses as joins, there are some joins that
* are illegal. Check to see if the proposed join is trouble. We can skip
* the work if looking at an outer join, however, because only top-level
* joins might be affected.
* If we have any outer joins, the proposed join might be illegal; and
* in any case we have to determine its join type. Scan the OJ list
* for conflicts.
*/
if (jointype == JOIN_INNER)
jointype = JOIN_INNER; /* default if no match to an OJ */
is_valid_inner = true;
foreach(l, root->oj_info_list)
{
ListCell *l;
OuterJoinInfo *ojinfo = (OuterJoinInfo *) lfirst(l);
foreach(l, root->in_info_list)
/*
* This OJ is not relevant unless its RHS overlaps the proposed join.
* (Check this first as a fast path for dismissing most irrelevant OJs
* quickly.)
*/
if (!bms_overlap(ojinfo->min_righthand, joinrelids))
continue;
/*
* Also, not relevant if proposed join is fully contained within RHS
* (ie, we're still building up the RHS).
*/
if (bms_is_subset(joinrelids, ojinfo->min_righthand))
continue;
/*
* Also, not relevant if OJ is already done within either input.
*/
if (bms_is_subset(ojinfo->min_lefthand, rel1->relids) &&
bms_is_subset(ojinfo->min_righthand, rel1->relids))
continue;
if (bms_is_subset(ojinfo->min_lefthand, rel2->relids) &&
bms_is_subset(ojinfo->min_righthand, rel2->relids))
continue;
/*
* If one input contains min_lefthand and the other contains
* min_righthand, then we can perform the OJ at this join.
*
* Barf if we get matches to more than one OJ (is that possible?)
*/
if (bms_is_subset(ojinfo->min_lefthand, rel1->relids) &&
bms_is_subset(ojinfo->min_righthand, rel2->relids))
{
InClauseInfo *ininfo = (InClauseInfo *) lfirst(l);
/*
* This IN clause is not relevant unless its RHS overlaps the
* proposed join. (Check this first as a fast path for dismissing
* most irrelevant INs quickly.)
*/
if (!bms_overlap(ininfo->righthand, joinrelids))
continue;
/*
* If we are still building the IN clause's RHS, then this IN
* clause isn't relevant yet.
*/
if (bms_is_subset(joinrelids, ininfo->righthand))
continue;
/*
* Cannot join if proposed join contains rels not in the RHS *and*
* contains only part of the RHS. We must build the complete RHS
* (subselect's join) before it can be joined to rels outside the
* subselect.
*/
if (!bms_is_subset(ininfo->righthand, joinrelids))
{
bms_free(joinrelids);
return NULL;
}
/*
* At this point we are considering a join of the IN's RHS to some
* other rel(s).
*
* If we already joined IN's RHS to any other rels in either input
* path, then this join is not constrained (the necessary work was
* done at the lower level where that join occurred).
*/
if (bms_is_subset(ininfo->righthand, rel1->relids) &&
!bms_equal(ininfo->righthand, rel1->relids))
continue;
if (bms_is_subset(ininfo->righthand, rel2->relids) &&
!bms_equal(ininfo->righthand, rel2->relids))
continue;
/*
* JOIN_IN technique will work if outerrel includes LHS and
* innerrel is exactly RHS; conversely JOIN_REVERSE_IN handles
* RHS/LHS.
*
* JOIN_UNIQUE_OUTER will work if outerrel is exactly RHS;
* conversely JOIN_UNIQUE_INNER will work if innerrel is exactly
* RHS.
*
* But none of these will work if we already found another IN that
* needs to trigger here.
*/
if (jointype != JOIN_INNER)
{
bms_free(joinrelids);
return NULL;
}
if (bms_is_subset(ininfo->lefthand, rel1->relids) &&
bms_equal(ininfo->righthand, rel2->relids))
jointype = JOIN_IN;
else if (bms_is_subset(ininfo->lefthand, rel2->relids) &&
bms_equal(ininfo->righthand, rel1->relids))
jointype = JOIN_REVERSE_IN;
else if (bms_equal(ininfo->righthand, rel1->relids))
jointype = JOIN_UNIQUE_OUTER;
else if (bms_equal(ininfo->righthand, rel2->relids))
jointype = JOIN_UNIQUE_INNER;
else
{
/* invalid join path */
bms_free(joinrelids);
return NULL;
}
jointype = ojinfo->is_full_join ? JOIN_FULL : JOIN_LEFT;
}
else if (bms_is_subset(ojinfo->min_lefthand, rel2->relids) &&
bms_is_subset(ojinfo->min_righthand, rel1->relids))
{
if (jointype != JOIN_INNER)
{
/* invalid join path */
bms_free(joinrelids);
return NULL;
}
jointype = ojinfo->is_full_join ? JOIN_FULL : JOIN_RIGHT;
}
else
{
/*----------
* Otherwise, the proposed join overlaps the RHS but isn't
* a valid implementation of this OJ. It might still be
* a valid implementation of some other OJ, however. We have
* to allow this to support the associative identity
* (a LJ b on Pab) LJ c ON Pbc = a LJ (b LJ c ON Pbc) on Pab
* since joining B directly to C violates the lower OJ's RHS.
* We assume that make_outerjoininfo() set things up correctly
* so that we'll only match to the upper OJ if the transformation
* is valid. Set flag here to check at bottom of loop.
*----------
*/
is_valid_inner = false;
}
}
/* Fail if violated some OJ's RHS and didn't match to another OJ */
if (jointype == JOIN_INNER && !is_valid_inner)
{
/* invalid join path */
bms_free(joinrelids);
return NULL;
}
/*
* Similarly, if we are implementing IN clauses as joins, check for
* illegal join path and detect whether we need a non-default join type.
*/
foreach(l, root->in_info_list)
{
InClauseInfo *ininfo = (InClauseInfo *) lfirst(l);
/*
* This IN clause is not relevant unless its RHS overlaps the
* proposed join. (Check this first as a fast path for dismissing
* most irrelevant INs quickly.)
*/
if (!bms_overlap(ininfo->righthand, joinrelids))
continue;
/*
* If we are still building the IN clause's RHS, then this IN
* clause isn't relevant yet.
*/
if (bms_is_subset(joinrelids, ininfo->righthand))
continue;
/*
* Cannot join if proposed join contains rels not in the RHS *and*
* contains only part of the RHS. We must build the complete RHS
* (subselect's join) before it can be joined to rels outside the
* subselect.
*/
if (!bms_is_subset(ininfo->righthand, joinrelids))
{
bms_free(joinrelids);
return NULL;
}
/*
* At this point we are considering a join of the IN's RHS to some
* other rel(s).
*
* If we already joined IN's RHS to any other rels in either input
* path, then this join is not constrained (the necessary work was
* done at the lower level where that join occurred).
*/
if (bms_is_subset(ininfo->righthand, rel1->relids) &&
!bms_equal(ininfo->righthand, rel1->relids))
continue;
if (bms_is_subset(ininfo->righthand, rel2->relids) &&
!bms_equal(ininfo->righthand, rel2->relids))
continue;
/*
* JOIN_IN technique will work if outerrel includes LHS and innerrel
* is exactly RHS; conversely JOIN_REVERSE_IN handles RHS/LHS.
*
* JOIN_UNIQUE_OUTER will work if outerrel is exactly RHS; conversely
* JOIN_UNIQUE_INNER will work if innerrel is exactly RHS.
*
* But none of these will work if we already found an OJ or another IN
* that needs to trigger here.
*/
if (jointype != JOIN_INNER)
{
bms_free(joinrelids);
return NULL;
}
if (bms_is_subset(ininfo->lefthand, rel1->relids) &&
bms_equal(ininfo->righthand, rel2->relids))
jointype = JOIN_IN;
else if (bms_is_subset(ininfo->lefthand, rel2->relids) &&
bms_equal(ininfo->righthand, rel1->relids))
jointype = JOIN_REVERSE_IN;
else if (bms_equal(ininfo->righthand, rel1->relids))
jointype = JOIN_UNIQUE_OUTER;
else if (bms_equal(ininfo->righthand, rel2->relids))
jointype = JOIN_UNIQUE_INNER;
else
{
/* invalid join path */
bms_free(joinrelids);
return NULL;
}
}