1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-28 23:42:10 +03:00

Implement SEMI and ANTI joins in the planner and executor. (Semijoins replace

the old JOIN_IN code, but antijoins are new functionality.)  Teach the planner
to convert appropriate EXISTS and NOT EXISTS subqueries into semi and anti
joins respectively.  Also, LEFT JOINs with suitable upper-level IS NULL
filters are recognized as being anti joins.  Unify the InClauseInfo and
OuterJoinInfo infrastructure into "SpecialJoinInfo".  With that change,
it becomes possible to associate a SpecialJoinInfo with every join attempt,
which permits some cleanup of join selectivity estimation.  That needs to be
taken much further than this patch does, but the next step is to change the
API for oprjoin selectivity functions, which seems like material for a
separate patch.  So for the moment the output size estimates for semi and
especially anti joins are quite bogus.
This commit is contained in:
Tom Lane
2008-08-14 18:48:00 +00:00
parent ef1c807c25
commit e006a24ad1
40 changed files with 2129 additions and 1204 deletions

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/clausesel.c,v 1.90 2008/01/11 17:00:45 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/path/clausesel.c,v 1.91 2008/08/14 18:47:59 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -94,7 +94,8 @@ Selectivity
clauselist_selectivity(PlannerInfo *root,
List *clauses,
int varRelid,
JoinType jointype)
JoinType jointype,
SpecialJoinInfo *sjinfo)
{
Selectivity s1 = 1.0;
RangeQueryClause *rqlist = NULL;
@ -106,7 +107,7 @@ clauselist_selectivity(PlannerInfo *root,
*/
if (list_length(clauses) == 1)
return clause_selectivity(root, (Node *) linitial(clauses),
varRelid, jointype);
varRelid, jointype, sjinfo);
/*
* Initial scan over clauses. Anything that doesn't look like a potential
@ -120,7 +121,7 @@ clauselist_selectivity(PlannerInfo *root,
Selectivity s2;
/* Always compute the selectivity using clause_selectivity */
s2 = clause_selectivity(root, clause, varRelid, jointype);
s2 = clause_selectivity(root, clause, varRelid, jointype, sjinfo);
/*
* Check for being passed a RestrictInfo.
@ -227,9 +228,8 @@ clauselist_selectivity(PlannerInfo *root,
s2 = rqlist->hibound + rqlist->lobound - 1.0;
/* Adjust for double-exclusion of NULLs */
/* HACK: disable nulltestsel's special outer-join logic */
s2 += nulltestsel(root, IS_NULL, rqlist->var,
varRelid, JOIN_INNER);
varRelid, jointype, sjinfo);
/*
* A zero or slightly negative s2 should be converted into a
@ -420,13 +420,32 @@ bms_is_subset_singleton(const Bitmapset *s, int x)
* is appropriate for ordinary join clauses and restriction clauses.
*
* jointype is the join type, if the clause is a join clause. Pass JOIN_INNER
* if the clause isn't a join clause or the context is uncertain.
* if the clause isn't a join clause.
*
* sjinfo is NULL for a non-join clause, otherwise it provides additional
* context information about the join being performed. There are some
* special cases:
* 1. For a special (not INNER) join, sjinfo is always a member of
* root->join_info_list.
* 2. For an INNER join, sjinfo is just a transient struct, and only the
* relids and jointype fields in it can be trusted.
* 3. XXX sjinfo might be NULL even though it really is a join. This case
* will go away soon, but fixing it requires API changes for oprjoin and
* amcostestimate functions.
* It is possible for jointype to be different from sjinfo->jointype.
* This indicates we are considering a variant join: either with
* the LHS and RHS switched, or with one input unique-ified.
*
* Note: when passing nonzero varRelid, it's normally appropriate to set
* jointype == JOIN_INNER, sjinfo == NULL, even if the clause is really a
* join clause; because we aren't treating it as a join clause.
*/
Selectivity
clause_selectivity(PlannerInfo *root,
Node *clause,
int varRelid,
JoinType jointype)
JoinType jointype,
SpecialJoinInfo *sjinfo)
{
Selectivity s1 = 0.5; /* default for any unhandled clause type */
RestrictInfo *rinfo = NULL;
@ -457,36 +476,15 @@ clause_selectivity(PlannerInfo *root,
* If possible, cache the result of the selectivity calculation for
* the clause. We can cache if varRelid is zero or the clause
* contains only vars of that relid --- otherwise varRelid will affect
* the result, so mustn't cache. We also have to be careful about the
* jointype. It's OK to cache when jointype is JOIN_INNER or one of
* the outer join types (any given outer-join clause should always be
* examined with the same jointype, so result won't change). It's not
* OK to cache when jointype is one of the special types associated
* with IN processing, because the same clause may be examined with
* different jointypes and the result should vary.
* the result, so mustn't cache.
*/
if (varRelid == 0 ||
bms_is_subset_singleton(rinfo->clause_relids, varRelid))
{
switch (jointype)
{
case JOIN_INNER:
case JOIN_LEFT:
case JOIN_FULL:
case JOIN_RIGHT:
/* Cacheable --- do we already have the result? */
if (rinfo->this_selec >= 0)
return rinfo->this_selec;
cacheable = true;
break;
case JOIN_IN:
case JOIN_REVERSE_IN:
case JOIN_UNIQUE_OUTER:
case JOIN_UNIQUE_INNER:
/* unsafe to cache */
break;
}
/* Cacheable --- do we already have the result? */
if (rinfo->this_selec >= 0)
return rinfo->this_selec;
cacheable = true;
}
/*
@ -568,7 +566,8 @@ clause_selectivity(PlannerInfo *root,
s1 = 1.0 - clause_selectivity(root,
(Node *) get_notclausearg((Expr *) clause),
varRelid,
jointype);
jointype,
sjinfo);
}
else if (and_clause(clause))
{
@ -576,7 +575,8 @@ clause_selectivity(PlannerInfo *root,
s1 = clauselist_selectivity(root,
((BoolExpr *) clause)->args,
varRelid,
jointype);
jointype,
sjinfo);
}
else if (or_clause(clause))
{
@ -594,7 +594,8 @@ clause_selectivity(PlannerInfo *root,
Selectivity s2 = clause_selectivity(root,
(Node *) lfirst(arg),
varRelid,
jointype);
jointype,
sjinfo);
s1 = s1 + s2 - s1 * s2;
}
@ -700,7 +701,8 @@ clause_selectivity(PlannerInfo *root,
(ScalarArrayOpExpr *) clause,
is_join_clause,
varRelid,
jointype);
jointype,
sjinfo);
}
else if (IsA(clause, RowCompareExpr))
{
@ -708,7 +710,8 @@ clause_selectivity(PlannerInfo *root,
s1 = rowcomparesel(root,
(RowCompareExpr *) clause,
varRelid,
jointype);
jointype,
sjinfo);
}
else if (IsA(clause, NullTest))
{
@ -717,7 +720,8 @@ clause_selectivity(PlannerInfo *root,
((NullTest *) clause)->nulltesttype,
(Node *) ((NullTest *) clause)->arg,
varRelid,
jointype);
jointype,
sjinfo);
}
else if (IsA(clause, BooleanTest))
{
@ -726,7 +730,8 @@ clause_selectivity(PlannerInfo *root,
((BooleanTest *) clause)->booltesttype,
(Node *) ((BooleanTest *) clause)->arg,
varRelid,
jointype);
jointype,
sjinfo);
}
else if (IsA(clause, CurrentOfExpr))
{
@ -743,7 +748,8 @@ clause_selectivity(PlannerInfo *root,
s1 = clause_selectivity(root,
(Node *) ((RelabelType *) clause)->arg,
varRelid,
jointype);
jointype,
sjinfo);
}
else if (IsA(clause, CoerceToDomain))
{
@ -751,7 +757,8 @@ clause_selectivity(PlannerInfo *root,
s1 = clause_selectivity(root,
(Node *) ((CoerceToDomain *) clause)->arg,
varRelid,
jointype);
jointype,
sjinfo);
}
/* Cache the result if possible */

View File

@ -54,7 +54,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.192 2008/03/24 21:53:03 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.193 2008/08/14 18:47:59 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -119,8 +119,9 @@ static MergeScanSelCache *cached_scansel(PlannerInfo *root,
PathKey *pathkey);
static bool cost_qual_eval_walker(Node *node, cost_qual_eval_context *context);
static Selectivity approx_selectivity(PlannerInfo *root, List *quals,
JoinType jointype);
static Selectivity join_in_selectivity(JoinPath *path, PlannerInfo *root);
SpecialJoinInfo *sjinfo);
static Selectivity join_in_selectivity(JoinPath *path, PlannerInfo *root,
SpecialJoinInfo *sjinfo);
static void set_rel_width(PlannerInfo *root, RelOptInfo *rel);
static double relation_byte_size(double tuples, int width);
static double page_size(double tuples, int width);
@ -1273,9 +1274,10 @@ nestloop_inner_path_rows(Path *path)
* nested loop algorithm.
*
* 'path' is already filled in except for the cost fields
* 'sjinfo' is extra info about the join for selectivity estimation
*/
void
cost_nestloop(NestPath *path, PlannerInfo *root)
cost_nestloop(NestPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
{
Path *outer_path = path->outerjoinpath;
Path *inner_path = path->innerjoinpath;
@ -1298,7 +1300,7 @@ cost_nestloop(NestPath *path, PlannerInfo *root)
* selectivity. (This assumes that all the quals attached to the join are
* IN quals, which should be true.)
*/
joininfactor = join_in_selectivity(path, root);
joininfactor = join_in_selectivity(path, root, sjinfo);
/* cost of source data */
@ -1349,6 +1351,7 @@ cost_nestloop(NestPath *path, PlannerInfo *root)
* merge join algorithm.
*
* 'path' is already filled in except for the cost fields
* 'sjinfo' is extra info about the join for selectivity estimation
*
* Notes: path's mergeclauses should be a subset of the joinrestrictinfo list;
* outersortkeys and innersortkeys are lists of the keys to be used
@ -1356,7 +1359,7 @@ cost_nestloop(NestPath *path, PlannerInfo *root)
* sort is needed because the source path is already ordered.
*/
void
cost_mergejoin(MergePath *path, PlannerInfo *root)
cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
{
Path *outer_path = path->jpath.outerjoinpath;
Path *inner_path = path->jpath.innerjoinpath;
@ -1402,8 +1405,7 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
* Note: it's probably bogus to use the normal selectivity calculation
* here when either the outer or inner path is a UniquePath.
*/
merge_selec = approx_selectivity(root, mergeclauses,
path->jpath.jointype);
merge_selec = approx_selectivity(root, mergeclauses, sjinfo);
cost_qual_eval(&merge_qual_cost, mergeclauses, root);
cost_qual_eval(&qp_qual_cost, path->jpath.joinrestrictinfo, root);
qp_qual_cost.startup -= merge_qual_cost.startup;
@ -1605,7 +1607,7 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
* output size. (This assumes that all the quals attached to the join are
* IN quals, which should be true.)
*/
joininfactor = join_in_selectivity(&path->jpath, root);
joininfactor = join_in_selectivity(&path->jpath, root, sjinfo);
/*
* The number of tuple comparisons needed is approximately number of outer
@ -1696,11 +1698,12 @@ cached_scansel(PlannerInfo *root, RestrictInfo *rinfo, PathKey *pathkey)
* hash join algorithm.
*
* 'path' is already filled in except for the cost fields
* 'sjinfo' is extra info about the join for selectivity estimation
*
* Note: path's hashclauses should be a subset of the joinrestrictinfo list
*/
void
cost_hashjoin(HashPath *path, PlannerInfo *root)
cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
{
Path *outer_path = path->jpath.outerjoinpath;
Path *inner_path = path->jpath.innerjoinpath;
@ -1733,8 +1736,7 @@ cost_hashjoin(HashPath *path, PlannerInfo *root)
* Note: it's probably bogus to use the normal selectivity calculation
* here when either the outer or inner path is a UniquePath.
*/
hash_selec = approx_selectivity(root, hashclauses,
path->jpath.jointype);
hash_selec = approx_selectivity(root, hashclauses, sjinfo);
cost_qual_eval(&hash_qual_cost, hashclauses, root);
cost_qual_eval(&qp_qual_cost, path->jpath.joinrestrictinfo, root);
qp_qual_cost.startup -= hash_qual_cost.startup;
@ -1863,7 +1865,7 @@ cost_hashjoin(HashPath *path, PlannerInfo *root)
* output size. (This assumes that all the quals attached to the join are
* IN quals, which should be true.)
*/
joininfactor = join_in_selectivity(&path->jpath, root);
joininfactor = join_in_selectivity(&path->jpath, root, sjinfo);
/*
* The number of tuple comparisons needed is the number of outer tuples
@ -2216,6 +2218,9 @@ get_initplan_cost(PlannerInfo *root, SubPlan *subplan)
* The input can be either an implicitly-ANDed list of boolean
* expressions, or a list of RestrictInfo nodes (typically the latter).
*
* Currently this is only used in join estimation, so sjinfo should never
* be NULL.
*
* This is quick-and-dirty because we bypass clauselist_selectivity, and
* simply multiply the independent clause selectivities together. Now
* clauselist_selectivity often can't do any better than that anyhow, but
@ -2228,7 +2233,7 @@ get_initplan_cost(PlannerInfo *root, SubPlan *subplan)
* seems OK to live with the approximation.
*/
static Selectivity
approx_selectivity(PlannerInfo *root, List *quals, JoinType jointype)
approx_selectivity(PlannerInfo *root, List *quals, SpecialJoinInfo *sjinfo)
{
Selectivity total = 1.0;
ListCell *l;
@ -2238,7 +2243,7 @@ approx_selectivity(PlannerInfo *root, List *quals, JoinType jointype)
Node *qual = (Node *) lfirst(l);
/* Note that clause_selectivity will be able to cache its result */
total *= clause_selectivity(root, qual, 0, jointype);
total *= clause_selectivity(root, qual, 0, sjinfo->jointype, sjinfo);
}
return total;
}
@ -2269,7 +2274,8 @@ set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel)
clauselist_selectivity(root,
rel->baserestrictinfo,
0,
JOIN_INNER);
JOIN_INNER,
NULL);
rel->rows = clamp_row_est(nrows);
@ -2295,11 +2301,6 @@ set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel)
* calculations for each pair of input rels that's encountered, and somehow
* average the results? Probably way more trouble than it's worth.)
*
* It's important that the results for symmetric JoinTypes be symmetric,
* eg, (rel1, rel2, JOIN_LEFT) should produce the same result as (rel2,
* rel1, JOIN_RIGHT). Also, JOIN_IN should produce the same result as
* JOIN_UNIQUE_INNER, likewise JOIN_REVERSE_IN == JOIN_UNIQUE_OUTER.
*
* We set only the rows field here. The width field was already set by
* build_joinrel_tlist, and baserestrictcost is not used for join rels.
*/
@ -2307,9 +2308,10 @@ void
set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
RelOptInfo *outer_rel,
RelOptInfo *inner_rel,
JoinType jointype,
SpecialJoinInfo *sjinfo,
List *restrictlist)
{
JoinType jointype = sjinfo->jointype;
Selectivity jselec;
Selectivity pselec;
double nrows;
@ -2347,11 +2349,13 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
jselec = clauselist_selectivity(root,
joinquals,
0,
jointype);
jointype,
sjinfo);
pselec = clauselist_selectivity(root,
pushedquals,
0,
jointype);
jointype,
sjinfo);
/* Avoid leaking a lot of ListCells */
list_free(joinquals);
@ -2362,7 +2366,8 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
jselec = clauselist_selectivity(root,
restrictlist,
0,
jointype);
jointype,
sjinfo);
pselec = 0.0; /* not used, keep compiler quiet */
}
@ -2390,12 +2395,6 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
nrows = outer_rel->rows;
nrows *= pselec;
break;
case JOIN_RIGHT:
nrows = outer_rel->rows * inner_rel->rows * jselec;
if (nrows < inner_rel->rows)
nrows = inner_rel->rows;
nrows *= pselec;
break;
case JOIN_FULL:
nrows = outer_rel->rows * inner_rel->rows * jselec;
if (nrows < outer_rel->rows)
@ -2404,23 +2403,27 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
nrows = inner_rel->rows;
nrows *= pselec;
break;
case JOIN_IN:
case JOIN_UNIQUE_INNER:
case JOIN_SEMI:
/* XXX this is unsafe, could Assert? */
upath = create_unique_path(root, inner_rel,
inner_rel->cheapest_total_path);
nrows = outer_rel->rows * upath->rows * jselec;
inner_rel->cheapest_total_path,
sjinfo);
if (upath)
nrows = outer_rel->rows * upath->rows * jselec;
else
nrows = outer_rel->rows * inner_rel->rows * jselec;
if (nrows > outer_rel->rows)
nrows = outer_rel->rows;
break;
case JOIN_REVERSE_IN:
case JOIN_UNIQUE_OUTER:
upath = create_unique_path(root, outer_rel,
outer_rel->cheapest_total_path);
nrows = upath->rows * inner_rel->rows * jselec;
if (nrows > inner_rel->rows)
nrows = inner_rel->rows;
case JOIN_ANTI:
/* XXX this is utterly wrong */
nrows = outer_rel->rows * inner_rel->rows * jselec;
if (nrows < outer_rel->rows)
nrows = outer_rel->rows;
nrows *= pselec;
break;
default:
/* other values not expected here */
elog(ERROR, "unrecognized join type: %d", (int) jointype);
nrows = 0; /* keep compiler quiet */
break;
@ -2435,9 +2438,10 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
* to be smaller than an ordinary inner join.
*
* 'path' is already filled in except for the cost fields
* 'sjinfo' must be the JOIN_SEMI SpecialJoinInfo for the join
*/
static Selectivity
join_in_selectivity(JoinPath *path, PlannerInfo *root)
join_in_selectivity(JoinPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
{
RelOptInfo *innerrel;
UniquePath *innerunique;
@ -2445,8 +2449,9 @@ join_in_selectivity(JoinPath *path, PlannerInfo *root)
double nrows;
/* Return 1.0 whenever it's not JOIN_IN */
if (path->jointype != JOIN_IN)
if (path->jointype != JOIN_SEMI)
return 1.0;
Assert(sjinfo && sjinfo->jointype == JOIN_SEMI);
/*
* Return 1.0 if the inner side is already known unique. The case where
@ -2458,10 +2463,12 @@ join_in_selectivity(JoinPath *path, PlannerInfo *root)
if (IsA(path->innerjoinpath, UniquePath))
return 1.0;
innerrel = path->innerjoinpath->parent;
/* XXX might assert if sjinfo doesn't exactly match innerrel? */
innerunique = create_unique_path(root,
innerrel,
innerrel->cheapest_total_path);
if (innerunique->rows >= innerrel->rows)
innerrel->cheapest_total_path,
sjinfo);
if (innerunique && innerunique->rows >= innerrel->rows)
return 1.0;
/*
@ -2473,7 +2480,8 @@ join_in_selectivity(JoinPath *path, PlannerInfo *root)
selec = clauselist_selectivity(root,
path->joinrestrictinfo,
0,
JOIN_INNER);
JOIN_INNER,
NULL);
nrows = path->outerjoinpath->parent->rows * innerrel->rows * selec;
nrows = clamp_row_est(nrows);

View File

@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.231 2008/05/27 00:13:09 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.232 2008/08/14 18:47:59 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -1631,16 +1631,16 @@ best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel,
*cheapest_startup = *cheapest_total = NULL;
/*
* Nestloop only supports inner, left, and IN joins.
* Nestloop only supports inner, left, semi, and anti joins.
*/
switch (jointype)
{
case JOIN_INNER:
case JOIN_IN:
case JOIN_UNIQUE_OUTER:
isouterjoin = false;
break;
case JOIN_LEFT:
case JOIN_SEMI:
case JOIN_ANTI:
isouterjoin = true;
break;
default:

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.116 2008/03/24 21:53:03 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.117 2008/08/14 18:47:59 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -24,14 +24,15 @@
static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
RelOptInfo *outerrel, RelOptInfo *innerrel,
List *restrictlist, List *mergeclause_list,
JoinType jointype);
JoinType jointype, SpecialJoinInfo *sjinfo);
static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel,
RelOptInfo *outerrel, RelOptInfo *innerrel,
List *restrictlist, List *mergeclause_list,
JoinType jointype);
JoinType jointype, SpecialJoinInfo *sjinfo);
static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
RelOptInfo *outerrel, RelOptInfo *innerrel,
List *restrictlist, JoinType jointype);
List *restrictlist,
JoinType jointype, SpecialJoinInfo *sjinfo);
static Path *best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel,
RelOptInfo *outer_rel, JoinType jointype);
static List *select_mergejoin_clauses(PlannerInfo *root,
@ -52,6 +53,18 @@ static List *select_mergejoin_clauses(PlannerInfo *root,
*
* Modifies the pathlist field of the joinrel node to contain the best
* paths found so far.
*
* jointype is not necessarily the same as sjinfo->jointype; it might be
* "flipped around" if we are considering joining the rels in the opposite
* direction from what's indicated in sjinfo.
*
* Also, this routine and others in this module accept the special JoinTypes
* JOIN_UNIQUE_OUTER and JOIN_UNIQUE_INNER to indicate that we should
* unique-ify the outer or inner relation and then apply a regular inner
* join. These values are not allowed to propagate outside this module,
* however. Path cost estimation code may need to recognize that it's
* dealing with such a case --- the combination of nominal jointype INNER
* with sjinfo->jointype == JOIN_SEMI indicates that.
*/
void
add_paths_to_joinrel(PlannerInfo *root,
@ -59,6 +72,7 @@ add_paths_to_joinrel(PlannerInfo *root,
RelOptInfo *outerrel,
RelOptInfo *innerrel,
JoinType jointype,
SpecialJoinInfo *sjinfo,
List *restrictlist)
{
List *mergeclause_list = NIL;
@ -82,7 +96,7 @@ add_paths_to_joinrel(PlannerInfo *root,
* sorted.
*/
sort_inner_and_outer(root, joinrel, outerrel, innerrel,
restrictlist, mergeclause_list, jointype);
restrictlist, mergeclause_list, jointype, sjinfo);
/*
* 2. Consider paths where the outer relation need not be explicitly
@ -90,7 +104,7 @@ add_paths_to_joinrel(PlannerInfo *root,
* path is already ordered.
*/
match_unsorted_outer(root, joinrel, outerrel, innerrel,
restrictlist, mergeclause_list, jointype);
restrictlist, mergeclause_list, jointype, sjinfo);
#ifdef NOT_USED
@ -106,7 +120,7 @@ add_paths_to_joinrel(PlannerInfo *root,
* invoked with the two rels given in the other order.
*/
match_unsorted_inner(root, joinrel, outerrel, innerrel,
restrictlist, mergeclause_list, jointype);
restrictlist, mergeclause_list, jointype, sjinfo);
#endif
/*
@ -115,7 +129,7 @@ add_paths_to_joinrel(PlannerInfo *root,
*/
if (enable_hashjoin)
hash_inner_and_outer(root, joinrel, outerrel, innerrel,
restrictlist, jointype);
restrictlist, jointype, sjinfo);
}
/*
@ -131,6 +145,7 @@ add_paths_to_joinrel(PlannerInfo *root,
* 'mergeclause_list' is a list of RestrictInfo nodes for available
* mergejoin clauses in this join
* 'jointype' is the type of join to do
* 'sjinfo' is extra info about the join for selectivity estimation
*/
static void
sort_inner_and_outer(PlannerInfo *root,
@ -139,7 +154,8 @@ sort_inner_and_outer(PlannerInfo *root,
RelOptInfo *innerrel,
List *restrictlist,
List *mergeclause_list,
JoinType jointype)
JoinType jointype,
SpecialJoinInfo *sjinfo)
{
bool useallclauses;
Path *outer_path;
@ -155,7 +171,8 @@ sort_inner_and_outer(PlannerInfo *root,
{
case JOIN_INNER:
case JOIN_LEFT:
case JOIN_IN:
case JOIN_SEMI:
case JOIN_ANTI:
case JOIN_UNIQUE_OUTER:
case JOIN_UNIQUE_INNER:
useallclauses = false;
@ -184,12 +201,16 @@ sort_inner_and_outer(PlannerInfo *root,
inner_path = innerrel->cheapest_total_path;
if (jointype == JOIN_UNIQUE_OUTER)
{
outer_path = (Path *) create_unique_path(root, outerrel, outer_path);
outer_path = (Path *) create_unique_path(root, outerrel,
outer_path, sjinfo);
Assert(outer_path);
jointype = JOIN_INNER;
}
else if (jointype == JOIN_UNIQUE_INNER)
{
inner_path = (Path *) create_unique_path(root, innerrel, inner_path);
inner_path = (Path *) create_unique_path(root, innerrel,
inner_path, sjinfo);
Assert(inner_path);
jointype = JOIN_INNER;
}
@ -270,6 +291,7 @@ sort_inner_and_outer(PlannerInfo *root,
create_mergejoin_path(root,
joinrel,
jointype,
sjinfo,
outer_path,
inner_path,
restrictlist,
@ -312,6 +334,7 @@ sort_inner_and_outer(PlannerInfo *root,
* 'mergeclause_list' is a list of RestrictInfo nodes for available
* mergejoin clauses in this join
* 'jointype' is the type of join to do
* 'sjinfo' is extra info about the join for selectivity estimation
*/
static void
match_unsorted_outer(PlannerInfo *root,
@ -320,7 +343,8 @@ match_unsorted_outer(PlannerInfo *root,
RelOptInfo *innerrel,
List *restrictlist,
List *mergeclause_list,
JoinType jointype)
JoinType jointype,
SpecialJoinInfo *sjinfo)
{
JoinType save_jointype = jointype;
bool nestjoinOK;
@ -333,19 +357,18 @@ match_unsorted_outer(PlannerInfo *root,
ListCell *l;
/*
* Nestloop only supports inner, left, and IN joins. Also, if we are
* doing a right or full join, we must use *all* the mergeclauses as join
* clauses, else we will not have a valid plan. (Although these two flags
* are currently inverses, keep them separate for clarity and possible
* future changes.)
* Nestloop only supports inner, left, semi, and anti joins. Also, if we
* are doing a right or full join, we must use *all* the mergeclauses as
* join clauses, else we will not have a valid plan. (Although these two
* flags are currently inverses, keep them separate for clarity and
* possible future changes.)
*/
switch (jointype)
{
case JOIN_INNER:
case JOIN_LEFT:
case JOIN_IN:
case JOIN_UNIQUE_OUTER:
case JOIN_UNIQUE_INNER:
case JOIN_SEMI:
case JOIN_ANTI:
nestjoinOK = true;
useallclauses = false;
break;
@ -354,6 +377,12 @@ match_unsorted_outer(PlannerInfo *root,
nestjoinOK = false;
useallclauses = true;
break;
case JOIN_UNIQUE_OUTER:
case JOIN_UNIQUE_INNER:
jointype = JOIN_INNER;
nestjoinOK = true;
useallclauses = false;
break;
default:
elog(ERROR, "unrecognized join type: %d",
(int) jointype);
@ -366,12 +395,12 @@ match_unsorted_outer(PlannerInfo *root,
* If we need to unique-ify the inner path, we will consider only the
* cheapest inner.
*/
if (jointype == JOIN_UNIQUE_INNER)
if (save_jointype == JOIN_UNIQUE_INNER)
{
inner_cheapest_total = (Path *)
create_unique_path(root, innerrel, inner_cheapest_total);
create_unique_path(root, innerrel, inner_cheapest_total, sjinfo);
Assert(inner_cheapest_total);
inner_cheapest_startup = inner_cheapest_total;
jointype = JOIN_INNER;
}
else if (nestjoinOK)
{
@ -424,8 +453,9 @@ match_unsorted_outer(PlannerInfo *root,
{
if (outerpath != outerrel->cheapest_total_path)
continue;
outerpath = (Path *) create_unique_path(root, outerrel, outerpath);
jointype = JOIN_INNER;
outerpath = (Path *) create_unique_path(root, outerrel,
outerpath, sjinfo);
Assert(outerpath);
}
/*
@ -449,6 +479,7 @@ match_unsorted_outer(PlannerInfo *root,
create_nestloop_path(root,
joinrel,
jointype,
sjinfo,
outerpath,
inner_cheapest_total,
restrictlist,
@ -458,6 +489,7 @@ match_unsorted_outer(PlannerInfo *root,
create_nestloop_path(root,
joinrel,
jointype,
sjinfo,
outerpath,
matpath,
restrictlist,
@ -467,6 +499,7 @@ match_unsorted_outer(PlannerInfo *root,
create_nestloop_path(root,
joinrel,
jointype,
sjinfo,
outerpath,
inner_cheapest_startup,
restrictlist,
@ -476,6 +509,7 @@ match_unsorted_outer(PlannerInfo *root,
create_nestloop_path(root,
joinrel,
jointype,
sjinfo,
outerpath,
index_cheapest_total,
restrictlist,
@ -486,6 +520,7 @@ match_unsorted_outer(PlannerInfo *root,
create_nestloop_path(root,
joinrel,
jointype,
sjinfo,
outerpath,
index_cheapest_startup,
restrictlist,
@ -536,6 +571,7 @@ match_unsorted_outer(PlannerInfo *root,
create_mergejoin_path(root,
joinrel,
jointype,
sjinfo,
outerpath,
inner_cheapest_total,
restrictlist,
@ -604,6 +640,7 @@ match_unsorted_outer(PlannerInfo *root,
create_mergejoin_path(root,
joinrel,
jointype,
sjinfo,
outerpath,
innerpath,
restrictlist,
@ -649,6 +686,7 @@ match_unsorted_outer(PlannerInfo *root,
create_mergejoin_path(root,
joinrel,
jointype,
sjinfo,
outerpath,
innerpath,
restrictlist,
@ -680,6 +718,7 @@ match_unsorted_outer(PlannerInfo *root,
* 'restrictlist' contains all of the RestrictInfo nodes for restriction
* clauses that apply to this join
* 'jointype' is the type of join to do
* 'sjinfo' is extra info about the join for selectivity estimation
*/
static void
hash_inner_and_outer(PlannerInfo *root,
@ -687,24 +726,26 @@ hash_inner_and_outer(PlannerInfo *root,
RelOptInfo *outerrel,
RelOptInfo *innerrel,
List *restrictlist,
JoinType jointype)
JoinType jointype,
SpecialJoinInfo *sjinfo)
{
bool isouterjoin;
List *hashclauses;
ListCell *l;
/*
* Hashjoin only supports inner, left, and IN joins.
* Hashjoin only supports inner, left, semi, and anti joins.
*/
switch (jointype)
{
case JOIN_INNER:
case JOIN_IN:
case JOIN_UNIQUE_OUTER:
case JOIN_UNIQUE_INNER:
isouterjoin = false;
break;
case JOIN_LEFT:
case JOIN_SEMI:
case JOIN_ANTI:
isouterjoin = true;
break;
default:
@ -769,14 +810,18 @@ hash_inner_and_outer(PlannerInfo *root,
if (jointype == JOIN_UNIQUE_OUTER)
{
cheapest_total_outer = (Path *)
create_unique_path(root, outerrel, cheapest_total_outer);
create_unique_path(root, outerrel,
cheapest_total_outer, sjinfo);
Assert(cheapest_total_outer);
cheapest_startup_outer = cheapest_total_outer;
jointype = JOIN_INNER;
}
else if (jointype == JOIN_UNIQUE_INNER)
{
cheapest_total_inner = (Path *)
create_unique_path(root, innerrel, cheapest_total_inner);
create_unique_path(root, innerrel,
cheapest_total_inner, sjinfo);
Assert(cheapest_total_inner);
jointype = JOIN_INNER;
}
@ -784,6 +829,7 @@ hash_inner_and_outer(PlannerInfo *root,
create_hashjoin_path(root,
joinrel,
jointype,
sjinfo,
cheapest_total_outer,
cheapest_total_inner,
restrictlist,
@ -793,6 +839,7 @@ hash_inner_and_outer(PlannerInfo *root,
create_hashjoin_path(root,
joinrel,
jointype,
sjinfo,
cheapest_startup_outer,
cheapest_total_inner,
restrictlist,

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/joinrels.c,v 1.92 2008/03/24 21:53:03 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/path/joinrels.c,v 1.93 2008/08/14 18:47:59 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -218,7 +218,7 @@ join_search_one_level(PlannerInfo *root, int level, List **joinrels)
}
/*----------
* When OJs or IN clauses are involved, there may be no legal way
* When special joins are involved, there may be no legal way
* to make an N-way join for some values of N. For example consider
*
* SELECT ... FROM t1 WHERE
@ -230,12 +230,11 @@ join_search_one_level(PlannerInfo *root, int level, List **joinrels)
* to accept failure at level 4 and go on to discover a workable
* bushy plan at level 5.
*
* However, if there are no such clauses then join_is_legal() should
* However, if there are no special joins then join_is_legal() should
* never fail, and so the following sanity check is useful.
*----------
*/
if (result_rels == NIL &&
root->oj_info_list == NIL && root->in_info_list == NIL)
if (result_rels == NIL && root->join_info_list == NIL)
elog(ERROR, "failed to build any %d-way joins", level);
}
@ -337,89 +336,98 @@ make_rels_by_clauseless_joins(PlannerInfo *root,
* (We could simplify the API by computing joinrelids locally, but this
* would be redundant work in the normal path through make_join_rel.)
*
* On success, *jointype_p is set to the required join type.
* On success, *sjinfo_p is set to NULL if this is to be a plain inner join,
* else it's set to point to the associated SpecialJoinInfo node. Also,
* *reversed_p is set TRUE if the given relations need to be swapped to
* match the SpecialJoinInfo node.
*/
static bool
join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
Relids joinrelids, JoinType *jointype_p)
Relids joinrelids,
SpecialJoinInfo **sjinfo_p, bool *reversed_p)
{
JoinType jointype;
SpecialJoinInfo *match_sjinfo;
bool reversed;
bool is_valid_inner;
ListCell *l;
/*
* Ensure *jointype_p is set on failure return. This is just to suppress
* uninitialized-variable warnings from overly anal compilers.
* Ensure output params are set on failure return. This is just to
* suppress uninitialized-variable warnings from overly anal compilers.
*/
*jointype_p = JOIN_INNER;
*sjinfo_p = NULL;
*reversed_p = false;
/*
* If we have any outer joins, the proposed join might be illegal; and in
* any case we have to determine its join type. Scan the OJ list for
* conflicts.
* If we have any special joins, the proposed join might be illegal; and
* in any case we have to determine its join type. Scan the join info
* list for conflicts.
*/
jointype = JOIN_INNER; /* default if no match to an OJ */
match_sjinfo = NULL;
reversed = false;
is_valid_inner = true;
foreach(l, root->oj_info_list)
foreach(l, root->join_info_list)
{
OuterJoinInfo *ojinfo = (OuterJoinInfo *) lfirst(l);
SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);
/*
* This OJ is not relevant unless its RHS overlaps the proposed join.
* (Check this first as a fast path for dismissing most irrelevant OJs
* quickly.)
* This special join is not relevant unless its RHS overlaps the
* proposed join. (Check this first as a fast path for dismissing
* most irrelevant SJs quickly.)
*/
if (!bms_overlap(ojinfo->min_righthand, joinrelids))
if (!bms_overlap(sjinfo->min_righthand, joinrelids))
continue;
/*
* Also, not relevant if proposed join is fully contained within RHS
* (ie, we're still building up the RHS).
*/
if (bms_is_subset(joinrelids, ojinfo->min_righthand))
if (bms_is_subset(joinrelids, sjinfo->min_righthand))
continue;
/*
* Also, not relevant if OJ is already done within either input.
* Also, not relevant if SJ is already done within either input.
*/
if (bms_is_subset(ojinfo->min_lefthand, rel1->relids) &&
bms_is_subset(ojinfo->min_righthand, rel1->relids))
if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) &&
bms_is_subset(sjinfo->min_righthand, rel1->relids))
continue;
if (bms_is_subset(ojinfo->min_lefthand, rel2->relids) &&
bms_is_subset(ojinfo->min_righthand, rel2->relids))
if (bms_is_subset(sjinfo->min_lefthand, rel2->relids) &&
bms_is_subset(sjinfo->min_righthand, rel2->relids))
continue;
/*
* If one input contains min_lefthand and the other contains
* min_righthand, then we can perform the OJ at this join.
* min_righthand, then we can perform the SJ at this join.
*
* Barf if we get matches to more than one OJ (is that possible?)
* Barf if we get matches to more than one SJ (is that possible?)
*/
if (bms_is_subset(ojinfo->min_lefthand, rel1->relids) &&
bms_is_subset(ojinfo->min_righthand, rel2->relids))
if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) &&
bms_is_subset(sjinfo->min_righthand, rel2->relids))
{
if (jointype != JOIN_INNER)
if (match_sjinfo)
return false; /* invalid join path */
jointype = ojinfo->is_full_join ? JOIN_FULL : JOIN_LEFT;
match_sjinfo = sjinfo;
reversed = false;
}
else if (bms_is_subset(ojinfo->min_lefthand, rel2->relids) &&
bms_is_subset(ojinfo->min_righthand, rel1->relids))
else if (bms_is_subset(sjinfo->min_lefthand, rel2->relids) &&
bms_is_subset(sjinfo->min_righthand, rel1->relids))
{
if (jointype != JOIN_INNER)
if (match_sjinfo)
return false; /* invalid join path */
jointype = ojinfo->is_full_join ? JOIN_FULL : JOIN_RIGHT;
match_sjinfo = sjinfo;
reversed = true;
}
else
{
/*----------
* Otherwise, the proposed join overlaps the RHS but isn't
* a valid implementation of this OJ. It might still be
* a valid implementation of this SJ. It might still be
* a legal join, however. If both inputs overlap the RHS,
* assume that it's OK. Since the inputs presumably got past
* this function's checks previously, they can't overlap the
* LHS and their violations of the RHS boundary must represent
* OJs that have been determined to commute with this one.
* SJs that have been determined to commute with this one.
* We have to allow this to work correctly in cases like
* (a LEFT JOIN (b JOIN (c LEFT JOIN d)))
* when the c/d join has been determined to commute with the join
@ -428,105 +436,33 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
* as a violation of the upper join's RHS.
* Furthermore, if one input overlaps the RHS and the other does
* not, we should still allow the join if it is a valid
* implementation of some other OJ. We have to allow this to
* implementation of some other SJ. We have to allow this to
* support the associative identity
* (a LJ b on Pab) LJ c ON Pbc = a LJ (b LJ c ON Pbc) on Pab
* since joining B directly to C violates the lower OJ's RHS.
* since joining B directly to C violates the lower SJ's RHS.
* We assume that make_outerjoininfo() set things up correctly
* so that we'll only match to some OJ if the join is valid.
* so that we'll only match to some SJ if the join is valid.
* Set flag here to check at bottom of loop.
*----------
*/
if (bms_overlap(rel1->relids, ojinfo->min_righthand) &&
bms_overlap(rel2->relids, ojinfo->min_righthand))
if (bms_overlap(rel1->relids, sjinfo->min_righthand) &&
bms_overlap(rel2->relids, sjinfo->min_righthand))
{
/* seems OK */
Assert(!bms_overlap(joinrelids, ojinfo->min_lefthand));
Assert(!bms_overlap(joinrelids, sjinfo->min_lefthand));
}
else
is_valid_inner = false;
}
}
/* Fail if violated some OJ's RHS and didn't match to another OJ */
if (jointype == JOIN_INNER && !is_valid_inner)
/* Fail if violated some SJ's RHS and didn't match to another SJ */
if (match_sjinfo == NULL && !is_valid_inner)
return false; /* invalid join path */
/*
* Similarly, if we are implementing IN clauses as joins, check for
* illegal join path and detect whether we need a non-default join type.
*/
foreach(l, root->in_info_list)
{
InClauseInfo *ininfo = (InClauseInfo *) lfirst(l);
/*
* This IN clause is not relevant unless its RHS overlaps the proposed
* join. (Check this first as a fast path for dismissing most
* irrelevant INs quickly.)
*/
if (!bms_overlap(ininfo->righthand, joinrelids))
continue;
/*
* If we are still building the IN clause's RHS, then this IN clause
* isn't relevant yet.
*/
if (bms_is_subset(joinrelids, ininfo->righthand))
continue;
/*
* Cannot join if proposed join contains rels not in the RHS *and*
* contains only part of the RHS. We must build the complete RHS
* (subselect's join) before it can be joined to rels outside the
* subselect.
*/
if (!bms_is_subset(ininfo->righthand, joinrelids))
return false;
/*
* At this point we are considering a join of the IN's RHS to some
* other rel(s).
*
* If we already joined IN's RHS to any other rels in either input
* path, then this join is not constrained (the necessary work was
* done at the lower level where that join occurred).
*/
if (bms_is_subset(ininfo->righthand, rel1->relids) &&
!bms_equal(ininfo->righthand, rel1->relids))
continue;
if (bms_is_subset(ininfo->righthand, rel2->relids) &&
!bms_equal(ininfo->righthand, rel2->relids))
continue;
/*
* JOIN_IN technique will work if outerrel includes LHS and innerrel
* is exactly RHS; conversely JOIN_REVERSE_IN handles RHS/LHS.
*
* JOIN_UNIQUE_OUTER will work if outerrel is exactly RHS; conversely
* JOIN_UNIQUE_INNER will work if innerrel is exactly RHS.
*
* But none of these will work if we already found an OJ or another IN
* that needs to trigger here.
*/
if (jointype != JOIN_INNER)
return false;
if (bms_is_subset(ininfo->lefthand, rel1->relids) &&
bms_equal(ininfo->righthand, rel2->relids))
jointype = JOIN_IN;
else if (bms_is_subset(ininfo->lefthand, rel2->relids) &&
bms_equal(ininfo->righthand, rel1->relids))
jointype = JOIN_REVERSE_IN;
else if (bms_equal(ininfo->righthand, rel1->relids))
jointype = JOIN_UNIQUE_OUTER;
else if (bms_equal(ininfo->righthand, rel2->relids))
jointype = JOIN_UNIQUE_INNER;
else
return false; /* invalid join path */
}
/* Join is valid */
*jointype_p = jointype;
/* Otherwise, it's a valid join */
*sjinfo_p = match_sjinfo;
*reversed_p = reversed;
return true;
}
@ -540,14 +476,16 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
* pairs of rels that add up to the same set of base rels.)
*
* NB: will return NULL if attempted join is not valid. This can happen
* when working with outer joins, or with IN clauses that have been turned
* into joins.
* when working with outer joins, or with IN or EXISTS clauses that have been
* turned into joins.
*/
RelOptInfo *
make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
{
Relids joinrelids;
JoinType jointype;
SpecialJoinInfo *sjinfo;
bool reversed;
SpecialJoinInfo sjinfo_data;
RelOptInfo *joinrel;
List *restrictlist;
@ -558,18 +496,48 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
joinrelids = bms_union(rel1->relids, rel2->relids);
/* Check validity and determine join type. */
if (!join_is_legal(root, rel1, rel2, joinrelids, &jointype))
if (!join_is_legal(root, rel1, rel2, joinrelids,
&sjinfo, &reversed))
{
/* invalid join path */
bms_free(joinrelids);
return NULL;
}
/* Swap rels if needed to match the join info. */
if (reversed)
{
RelOptInfo *trel = rel1;
rel1 = rel2;
rel2 = trel;
}
/*
* If it's a plain inner join, then we won't have found anything in
* join_info_list. Make up a SpecialJoinInfo so that selectivity
* estimation functions will know what's being joined.
*/
if (sjinfo == NULL)
{
sjinfo = &sjinfo_data;
sjinfo->type = T_SpecialJoinInfo;
sjinfo->min_lefthand = rel1->relids;
sjinfo->min_righthand = rel2->relids;
sjinfo->syn_lefthand = rel1->relids;
sjinfo->syn_righthand = rel2->relids;
sjinfo->jointype = JOIN_INNER;
/* we don't bother trying to make the remaining fields valid */
sjinfo->lhs_strict = false;
sjinfo->delay_upper_joins = false;
sjinfo->join_quals = NIL;
}
/*
* Find or build the join RelOptInfo, and compute the restrictlist that
* goes with this particular joining.
*/
joinrel = build_join_rel(root, joinrelids, rel1, rel2, jointype,
joinrel = build_join_rel(root, joinrelids, rel1, rel2, sjinfo,
&restrictlist);
/*
@ -589,8 +557,11 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
* previously computed paths and mark the join as dummy. (We do it
* this way since it's conceivable that dummy-ness of a multi-element
* join might only be noticeable for certain construction paths.)
*
* We need only consider the jointypes that appear in join_info_list,
* plus JOIN_INNER.
*/
switch (jointype)
switch (sjinfo->jointype)
{
case JOIN_INNER:
if (is_dummy_rel(rel1) || is_dummy_rel(rel2))
@ -598,9 +569,11 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
mark_dummy_join(joinrel);
break;
}
add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_INNER,
add_paths_to_joinrel(root, joinrel, rel1, rel2,
JOIN_INNER, sjinfo,
restrictlist);
add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_INNER,
add_paths_to_joinrel(root, joinrel, rel2, rel1,
JOIN_INNER, sjinfo,
restrictlist);
break;
case JOIN_LEFT:
@ -609,9 +582,11 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
mark_dummy_join(joinrel);
break;
}
add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_LEFT,
add_paths_to_joinrel(root, joinrel, rel1, rel2,
JOIN_LEFT, sjinfo,
restrictlist);
add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_RIGHT,
add_paths_to_joinrel(root, joinrel, rel2, rel1,
JOIN_RIGHT, sjinfo,
restrictlist);
break;
case JOIN_FULL:
@ -620,75 +595,53 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
mark_dummy_join(joinrel);
break;
}
add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_FULL,
add_paths_to_joinrel(root, joinrel, rel1, rel2,
JOIN_FULL, sjinfo,
restrictlist);
add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_FULL,
add_paths_to_joinrel(root, joinrel, rel2, rel1,
JOIN_FULL, sjinfo,
restrictlist);
break;
case JOIN_RIGHT:
if (is_dummy_rel(rel2))
{
mark_dummy_join(joinrel);
break;
}
add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_RIGHT,
restrictlist);
add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_LEFT,
restrictlist);
break;
case JOIN_IN:
case JOIN_SEMI:
if (is_dummy_rel(rel1) || is_dummy_rel(rel2))
{
mark_dummy_join(joinrel);
break;
}
add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_IN,
restrictlist);
/* REVERSE_IN isn't supported by joinpath.c */
add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_UNIQUE_INNER,
restrictlist);
add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_UNIQUE_OUTER,
add_paths_to_joinrel(root, joinrel, rel1, rel2,
JOIN_SEMI, sjinfo,
restrictlist);
/*
* If we know how to unique-ify the RHS and one input rel is
* exactly the RHS (not a superset) we can consider unique-ifying
* it and then doing a regular join.
*/
if (bms_equal(sjinfo->syn_righthand, rel2->relids) &&
create_unique_path(root, rel2, rel2->cheapest_total_path,
sjinfo) != NULL)
{
add_paths_to_joinrel(root, joinrel, rel1, rel2,
JOIN_UNIQUE_INNER, sjinfo,
restrictlist);
add_paths_to_joinrel(root, joinrel, rel2, rel1,
JOIN_UNIQUE_OUTER, sjinfo,
restrictlist);
}
break;
case JOIN_REVERSE_IN:
if (is_dummy_rel(rel1) || is_dummy_rel(rel2))
case JOIN_ANTI:
if (is_dummy_rel(rel1))
{
mark_dummy_join(joinrel);
break;
}
/* REVERSE_IN isn't supported by joinpath.c */
add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_IN,
restrictlist);
add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_UNIQUE_OUTER,
restrictlist);
add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_UNIQUE_INNER,
restrictlist);
break;
case JOIN_UNIQUE_OUTER:
if (is_dummy_rel(rel1) || is_dummy_rel(rel2))
{
mark_dummy_join(joinrel);
break;
}
add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_UNIQUE_OUTER,
restrictlist);
add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_UNIQUE_INNER,
restrictlist);
break;
case JOIN_UNIQUE_INNER:
if (is_dummy_rel(rel1) || is_dummy_rel(rel2))
{
mark_dummy_join(joinrel);
break;
}
add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_UNIQUE_INNER,
restrictlist);
add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_UNIQUE_OUTER,
add_paths_to_joinrel(root, joinrel, rel1, rel2,
JOIN_ANTI, sjinfo,
restrictlist);
break;
default:
elog(ERROR, "unrecognized join type: %d",
(int) jointype);
/* other values not expected here */
elog(ERROR, "unrecognized join type: %d", (int) sjinfo->jointype);
break;
}
@ -701,7 +654,7 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
/*
* have_join_order_restriction
* Detect whether the two relations should be joined to satisfy
* a join-order restriction arising from outer joins or IN clauses.
* a join-order restriction arising from special joins.
*
* In practice this is always used with have_relevant_joinclause(), and so
* could be merged with that function, but it seems clearer to separate the
@ -709,8 +662,8 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
* a clauseless join must be performed to satisfy join-order restrictions.
*
* Note: this is only a problem if one side of a degenerate outer join
* contains multiple rels, or a clauseless join is required within an IN's
* RHS; else we will find a join path via the "last ditch" case in
* contains multiple rels, or a clauseless join is required within an
* IN/EXISTS RHS; else we will find a join path via the "last ditch" case in
* join_search_one_level(). We could dispense with this test if we were
* willing to try bushy plans in the "last ditch" case, but that seems much
* less efficient.
@ -730,23 +683,23 @@ have_join_order_restriction(PlannerInfo *root,
* Also, the two rels could represent a clauseless join that has to be
* completed to build up the LHS or RHS of an outer join.
*/
foreach(l, root->oj_info_list)
foreach(l, root->join_info_list)
{
OuterJoinInfo *ojinfo = (OuterJoinInfo *) lfirst(l);
SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);
/* ignore full joins --- other mechanisms handle them */
if (ojinfo->is_full_join)
if (sjinfo->jointype == JOIN_FULL)
continue;
/* Can we perform the OJ with these rels? */
if (bms_is_subset(ojinfo->min_lefthand, rel1->relids) &&
bms_is_subset(ojinfo->min_righthand, rel2->relids))
/* Can we perform the SJ with these rels? */
if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) &&
bms_is_subset(sjinfo->min_righthand, rel2->relids))
{
result = true;
break;
}
if (bms_is_subset(ojinfo->min_lefthand, rel2->relids) &&
bms_is_subset(ojinfo->min_righthand, rel1->relids))
if (bms_is_subset(sjinfo->min_lefthand, rel2->relids) &&
bms_is_subset(sjinfo->min_righthand, rel1->relids))
{
result = true;
break;
@ -754,63 +707,19 @@ have_join_order_restriction(PlannerInfo *root,
/*
* Might we need to join these rels to complete the RHS? We have to
* use "overlap" tests since either rel might include a lower OJ that
* use "overlap" tests since either rel might include a lower SJ that
* has been proven to commute with this one.
*/
if (bms_overlap(ojinfo->min_righthand, rel1->relids) &&
bms_overlap(ojinfo->min_righthand, rel2->relids))
if (bms_overlap(sjinfo->min_righthand, rel1->relids) &&
bms_overlap(sjinfo->min_righthand, rel2->relids))
{
result = true;
break;
}
/* Likewise for the LHS. */
if (bms_overlap(ojinfo->min_lefthand, rel1->relids) &&
bms_overlap(ojinfo->min_lefthand, rel2->relids))
{
result = true;
break;
}
}
/*
* Similarly, we need to allow a join that completes a degenerate
* IN-clause, or one that builds up its LHS or RHS.
*/
foreach(l, root->in_info_list)
{
InClauseInfo *ininfo = (InClauseInfo *) lfirst(l);
/* Can we perform the IN with these rels? */
if (bms_is_subset(ininfo->lefthand, rel1->relids) &&
bms_is_subset(ininfo->righthand, rel2->relids))
{
result = true;
break;
}
if (bms_is_subset(ininfo->lefthand, rel2->relids) &&
bms_is_subset(ininfo->righthand, rel1->relids))
{
result = true;
break;
}
/*
* Might we need to join these rels to complete the RHS? It's
* probably overkill to test "overlap", since we never join part of an
* IN's RHS to anything else, but may as well keep the coding similar
* to the OJ case.
*/
if (bms_overlap(ininfo->righthand, rel1->relids) &&
bms_overlap(ininfo->righthand, rel2->relids))
{
result = true;
break;
}
/* Likewise for the LHS. */
if (bms_overlap(ininfo->lefthand, rel1->relids) &&
bms_overlap(ininfo->lefthand, rel2->relids))
if (bms_overlap(sjinfo->min_lefthand, rel1->relids) &&
bms_overlap(sjinfo->min_lefthand, rel2->relids))
{
result = true;
break;
@ -852,37 +761,22 @@ has_join_restriction(PlannerInfo *root, RelOptInfo *rel)
{
ListCell *l;
foreach(l, root->oj_info_list)
foreach(l, root->join_info_list)
{
OuterJoinInfo *ojinfo = (OuterJoinInfo *) lfirst(l);
SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);
/* ignore full joins --- other mechanisms preserve their ordering */
if (ojinfo->is_full_join)
if (sjinfo->jointype == JOIN_FULL)
continue;
/* ignore if OJ is already contained in rel */
if (bms_is_subset(ojinfo->min_lefthand, rel->relids) &&
bms_is_subset(ojinfo->min_righthand, rel->relids))
/* ignore if SJ is already contained in rel */
if (bms_is_subset(sjinfo->min_lefthand, rel->relids) &&
bms_is_subset(sjinfo->min_righthand, rel->relids))
continue;
/* restricted if it overlaps LHS or RHS, but doesn't contain OJ */
if (bms_overlap(ojinfo->min_lefthand, rel->relids) ||
bms_overlap(ojinfo->min_righthand, rel->relids))
return true;
}
foreach(l, root->in_info_list)
{
InClauseInfo *ininfo = (InClauseInfo *) lfirst(l);
/* ignore if IN is already contained in rel */
if (bms_is_subset(ininfo->lefthand, rel->relids) &&
bms_is_subset(ininfo->righthand, rel->relids))
continue;
/* restricted if it overlaps LHS or RHS, but doesn't contain IN */
if (bms_overlap(ininfo->lefthand, rel->relids) ||
bms_overlap(ininfo->righthand, rel->relids))
/* restricted if it overlaps LHS or RHS, but doesn't contain SJ */
if (bms_overlap(sjinfo->min_lefthand, rel->relids) ||
bms_overlap(sjinfo->min_righthand, rel->relids))
return true;
}
@ -922,12 +816,14 @@ has_legal_joinclause(PlannerInfo *root, RelOptInfo *rel)
if (have_relevant_joinclause(root, rel, rel2))
{
Relids joinrelids;
JoinType jointype;
SpecialJoinInfo *sjinfo;
bool reversed;
/* join_is_legal needs relids of the union */
joinrelids = bms_union(rel->relids, rel2->relids);
if (join_is_legal(root, rel, rel2, joinrelids, &jointype))
if (join_is_legal(root, rel, rel2, joinrelids,
&sjinfo, &reversed))
{
/* Yes, this will work */
bms_free(joinrelids);

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/orindxpath.c,v 1.84 2008/01/09 20:42:27 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/path/orindxpath.c,v 1.85 2008/08/14 18:47:59 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -169,11 +169,11 @@ create_or_index_quals(PlannerInfo *root, RelOptInfo *rel)
* selectivity will stay cached ...)
*/
or_selec = clause_selectivity(root, (Node *) or_rinfo,
0, JOIN_INNER);
0, JOIN_INNER, NULL);
if (or_selec > 0 && or_selec < 1)
{
orig_selec = clause_selectivity(root, (Node *) bestrinfo,
0, JOIN_INNER);
0, JOIN_INNER, NULL);
bestrinfo->this_selec = orig_selec / or_selec;
/* clamp result to sane range */
if (bestrinfo->this_selec > 1)