1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-21 00:42:43 +03:00

Implement SEMI and ANTI joins in the planner and executor. (Semijoins replace

the old JOIN_IN code, but antijoins are new functionality.)  Teach the planner
to convert appropriate EXISTS and NOT EXISTS subqueries into semi and anti
joins respectively.  Also, LEFT JOINs with suitable upper-level IS NULL
filters are recognized as being anti joins.  Unify the InClauseInfo and
OuterJoinInfo infrastructure into "SpecialJoinInfo".  With that change,
it becomes possible to associate a SpecialJoinInfo with every join attempt,
which permits some cleanup of join selectivity estimation.  That needs to be
taken much further than this patch does, but the next step is to change the
API for oprjoin selectivity functions, which seems like material for a
separate patch.  So for the moment the output size estimates for semi and
especially anti joins are quite bogus.
This commit is contained in:
Tom Lane
2008-08-14 18:48:00 +00:00
parent ef1c807c25
commit e006a24ad1
40 changed files with 2129 additions and 1204 deletions

View File

@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/clausesel.c,v 1.90 2008/01/11 17:00:45 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/path/clausesel.c,v 1.91 2008/08/14 18:47:59 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -94,7 +94,8 @@ Selectivity
clauselist_selectivity(PlannerInfo *root,
List *clauses,
int varRelid,
JoinType jointype)
JoinType jointype,
SpecialJoinInfo *sjinfo)
{
Selectivity s1 = 1.0;
RangeQueryClause *rqlist = NULL;
@@ -106,7 +107,7 @@ clauselist_selectivity(PlannerInfo *root,
*/
if (list_length(clauses) == 1)
return clause_selectivity(root, (Node *) linitial(clauses),
varRelid, jointype);
varRelid, jointype, sjinfo);
/*
* Initial scan over clauses. Anything that doesn't look like a potential
@@ -120,7 +121,7 @@ clauselist_selectivity(PlannerInfo *root,
Selectivity s2;
/* Always compute the selectivity using clause_selectivity */
s2 = clause_selectivity(root, clause, varRelid, jointype);
s2 = clause_selectivity(root, clause, varRelid, jointype, sjinfo);
/*
* Check for being passed a RestrictInfo.
@@ -227,9 +228,8 @@ clauselist_selectivity(PlannerInfo *root,
s2 = rqlist->hibound + rqlist->lobound - 1.0;
/* Adjust for double-exclusion of NULLs */
/* HACK: disable nulltestsel's special outer-join logic */
s2 += nulltestsel(root, IS_NULL, rqlist->var,
varRelid, JOIN_INNER);
varRelid, jointype, sjinfo);
/*
* A zero or slightly negative s2 should be converted into a
@@ -420,13 +420,32 @@ bms_is_subset_singleton(const Bitmapset *s, int x)
* is appropriate for ordinary join clauses and restriction clauses.
*
* jointype is the join type, if the clause is a join clause. Pass JOIN_INNER
* if the clause isn't a join clause or the context is uncertain.
* if the clause isn't a join clause.
*
* sjinfo is NULL for a non-join clause, otherwise it provides additional
* context information about the join being performed. There are some
* special cases:
* 1. For a special (not INNER) join, sjinfo is always a member of
* root->join_info_list.
* 2. For an INNER join, sjinfo is just a transient struct, and only the
* relids and jointype fields in it can be trusted.
* 3. XXX sjinfo might be NULL even though it really is a join. This case
* will go away soon, but fixing it requires API changes for oprjoin and
* amcostestimate functions.
* It is possible for jointype to be different from sjinfo->jointype.
* This indicates we are considering a variant join: either with
* the LHS and RHS switched, or with one input unique-ified.
*
* Note: when passing nonzero varRelid, it's normally appropriate to set
* jointype == JOIN_INNER, sjinfo == NULL, even if the clause is really a
* join clause; because we aren't treating it as a join clause.
*/
Selectivity
clause_selectivity(PlannerInfo *root,
Node *clause,
int varRelid,
JoinType jointype)
JoinType jointype,
SpecialJoinInfo *sjinfo)
{
Selectivity s1 = 0.5; /* default for any unhandled clause type */
RestrictInfo *rinfo = NULL;
@@ -457,36 +476,15 @@ clause_selectivity(PlannerInfo *root,
* If possible, cache the result of the selectivity calculation for
* the clause. We can cache if varRelid is zero or the clause
* contains only vars of that relid --- otherwise varRelid will affect
* the result, so mustn't cache. We also have to be careful about the
* jointype. It's OK to cache when jointype is JOIN_INNER or one of
* the outer join types (any given outer-join clause should always be
* examined with the same jointype, so result won't change). It's not
* OK to cache when jointype is one of the special types associated
* with IN processing, because the same clause may be examined with
* different jointypes and the result should vary.
* the result, so mustn't cache.
*/
if (varRelid == 0 ||
bms_is_subset_singleton(rinfo->clause_relids, varRelid))
{
switch (jointype)
{
case JOIN_INNER:
case JOIN_LEFT:
case JOIN_FULL:
case JOIN_RIGHT:
/* Cacheable --- do we already have the result? */
if (rinfo->this_selec >= 0)
return rinfo->this_selec;
cacheable = true;
break;
case JOIN_IN:
case JOIN_REVERSE_IN:
case JOIN_UNIQUE_OUTER:
case JOIN_UNIQUE_INNER:
/* unsafe to cache */
break;
}
/* Cacheable --- do we already have the result? */
if (rinfo->this_selec >= 0)
return rinfo->this_selec;
cacheable = true;
}
/*
@@ -568,7 +566,8 @@ clause_selectivity(PlannerInfo *root,
s1 = 1.0 - clause_selectivity(root,
(Node *) get_notclausearg((Expr *) clause),
varRelid,
jointype);
jointype,
sjinfo);
}
else if (and_clause(clause))
{
@@ -576,7 +575,8 @@ clause_selectivity(PlannerInfo *root,
s1 = clauselist_selectivity(root,
((BoolExpr *) clause)->args,
varRelid,
jointype);
jointype,
sjinfo);
}
else if (or_clause(clause))
{
@@ -594,7 +594,8 @@ clause_selectivity(PlannerInfo *root,
Selectivity s2 = clause_selectivity(root,
(Node *) lfirst(arg),
varRelid,
jointype);
jointype,
sjinfo);
s1 = s1 + s2 - s1 * s2;
}
@@ -700,7 +701,8 @@ clause_selectivity(PlannerInfo *root,
(ScalarArrayOpExpr *) clause,
is_join_clause,
varRelid,
jointype);
jointype,
sjinfo);
}
else if (IsA(clause, RowCompareExpr))
{
@@ -708,7 +710,8 @@ clause_selectivity(PlannerInfo *root,
s1 = rowcomparesel(root,
(RowCompareExpr *) clause,
varRelid,
jointype);
jointype,
sjinfo);
}
else if (IsA(clause, NullTest))
{
@@ -717,7 +720,8 @@ clause_selectivity(PlannerInfo *root,
((NullTest *) clause)->nulltesttype,
(Node *) ((NullTest *) clause)->arg,
varRelid,
jointype);
jointype,
sjinfo);
}
else if (IsA(clause, BooleanTest))
{
@@ -726,7 +730,8 @@ clause_selectivity(PlannerInfo *root,
((BooleanTest *) clause)->booltesttype,
(Node *) ((BooleanTest *) clause)->arg,
varRelid,
jointype);
jointype,
sjinfo);
}
else if (IsA(clause, CurrentOfExpr))
{
@@ -743,7 +748,8 @@ clause_selectivity(PlannerInfo *root,
s1 = clause_selectivity(root,
(Node *) ((RelabelType *) clause)->arg,
varRelid,
jointype);
jointype,
sjinfo);
}
else if (IsA(clause, CoerceToDomain))
{
@@ -751,7 +757,8 @@ clause_selectivity(PlannerInfo *root,
s1 = clause_selectivity(root,
(Node *) ((CoerceToDomain *) clause)->arg,
varRelid,
jointype);
jointype,
sjinfo);
}
/* Cache the result if possible */