1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-21 00:42:43 +03:00

Clean up the loose ends in selectivity estimation left by my patch for semi

and anti joins.  To do this, pass the SpecialJoinInfo struct for the current
join as an additional optional argument to operator join selectivity
estimation functions.  This allows the estimator to tell not only what kind
of join is being formed, but which variable is on which side of the join;
a requirement long recognized but not dealt with till now.  This also leaves
the door open for future improvements in the estimators, such as accounting
for the null-insertion effects of lower outer joins.  I didn't do anything
about that in the current patch but the information is in principle deducible
from what's passed.

The patch also clarifies the definition of join selectivity for semi/anti
joins: it's the fraction of the left input that has (at least one) match
in the right input.  This allows getting rid of some very fuzzy thinking
that I had committed in the original 7.4-era IN-optimization patch.
There's probably room to estimate this better than the present patch does,
but at least we know what to estimate.

Since I had to touch CREATE OPERATOR anyway to allow a variant signature
for join estimator functions, I took the opportunity to add a couple of
additional checks that were missing, per my recent message to -hackers:
* Check that estimator functions return float8;
* Require execute permission at the time of CREATE OPERATOR on the
operator's function as well as the estimator functions;
* Require ownership of any pre-existing operator that's modified by
the command.
I also moved the lookup of the functions out of OperatorCreate() and
into operatorcmds.c, since that seemed more consistent with most of
the other catalog object creation processes, eg CREATE TYPE.
This commit is contained in:
Tom Lane
2008-08-16 00:01:38 +00:00
parent 118461114e
commit d4af2a6481
13 changed files with 704 additions and 483 deletions

View File

@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/clausesel.c,v 1.91 2008/08/14 18:47:59 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/path/clausesel.c,v 1.92 2008/08/16 00:01:36 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -398,6 +398,50 @@ bms_is_subset_singleton(const Bitmapset *s, int x)
return false;
}
/*
* treat_as_join_clause -
* Decide whether an operator clause is to be handled by the
* restriction or join estimator. Subroutine for clause_selectivity().
*/
static inline bool
treat_as_join_clause(Node *clause, RestrictInfo *rinfo,
int varRelid, SpecialJoinInfo *sjinfo)
{
if (varRelid != 0)
{
/*
* Caller is forcing restriction mode (eg, because we are examining
* an inner indexscan qual).
*/
return false;
}
else if (sjinfo == NULL)
{
/*
* It must be a restriction clause, since it's being evaluated at
* a scan node.
*/
return false;
}
else
{
/*
* Otherwise, it's a join if there's more than one relation used.
* We can optimize this calculation if an rinfo was passed.
*
* XXX Since we know the clause is being evaluated at a join,
* the only way it could be single-relation is if it was delayed
* by outer joins. Although we can make use of the restriction
* qual estimators anyway, it seems likely that we ought to account
* for the probability of injected nulls somehow.
*/
if (rinfo)
return (bms_membership(rinfo->clause_relids) == BMS_MULTIPLE);
else
return (NumRelids(clause) > 1);
}
}
/*
* clause_selectivity -
@@ -429,9 +473,6 @@ bms_is_subset_singleton(const Bitmapset *s, int x)
* root->join_info_list.
* 2. For an INNER join, sjinfo is just a transient struct, and only the
* relids and jointype fields in it can be trusted.
* 3. XXX sjinfo might be NULL even though it really is a join. This case
* will go away soon, but fixing it requires API changes for oprjoin and
* amcostestimate functions.
* It is possible for jointype to be different from sjinfo->jointype.
* This indicates we are considering a variant join: either with
* the LHS and RHS switched, or with one input unique-ified.
@@ -603,36 +644,14 @@ clause_selectivity(PlannerInfo *root,
else if (is_opclause(clause) || IsA(clause, DistinctExpr))
{
Oid opno = ((OpExpr *) clause)->opno;
bool is_join_clause;
if (varRelid != 0)
{
/*
* If we are considering a nestloop join then all clauses are
* restriction clauses, since we are only interested in the one
* relation.
*/
is_join_clause = false;
}
else
{
/*
* Otherwise, it's a join if there's more than one relation used.
* We can optimize this calculation if an rinfo was passed.
*/
if (rinfo)
is_join_clause = (bms_membership(rinfo->clause_relids) ==
BMS_MULTIPLE);
else
is_join_clause = (NumRelids(clause) > 1);
}
if (is_join_clause)
if (treat_as_join_clause(clause, rinfo, varRelid, sjinfo))
{
/* Estimate selectivity for a join clause. */
s1 = join_selectivity(root, opno,
((OpExpr *) clause)->args,
jointype);
jointype,
sjinfo);
}
else
{
@@ -671,35 +690,11 @@ clause_selectivity(PlannerInfo *root,
#endif
else if (IsA(clause, ScalarArrayOpExpr))
{
/* First, decide if it's a join clause, same as for OpExpr */
bool is_join_clause;
if (varRelid != 0)
{
/*
* If we are considering a nestloop join then all clauses are
* restriction clauses, since we are only interested in the one
* relation.
*/
is_join_clause = false;
}
else
{
/*
* Otherwise, it's a join if there's more than one relation used.
* We can optimize this calculation if an rinfo was passed.
*/
if (rinfo)
is_join_clause = (bms_membership(rinfo->clause_relids) ==
BMS_MULTIPLE);
else
is_join_clause = (NumRelids(clause) > 1);
}
/* Use node specific selectivity calculation function */
s1 = scalararraysel(root,
(ScalarArrayOpExpr *) clause,
is_join_clause,
treat_as_join_clause(clause, rinfo,
varRelid, sjinfo),
varRelid,
jointype,
sjinfo);