mirror of
https://github.com/postgres/postgres.git
synced 2025-11-21 00:42:43 +03:00
Clean up the loose ends in selectivity estimation left by my patch for semi
and anti joins. To do this, pass the SpecialJoinInfo struct for the current join as an additional optional argument to operator join selectivity estimation functions. This allows the estimator to tell not only what kind of join is being formed, but which variable is on which side of the join; a requirement long recognized but not dealt with till now. This also leaves the door open for future improvements in the estimators, such as accounting for the null-insertion effects of lower outer joins. I didn't do anything about that in the current patch but the information is in principle deducible from what's passed. The patch also clarifies the definition of join selectivity for semi/anti joins: it's the fraction of the left input that has (at least one) match in the right input. This allows getting rid of some very fuzzy thinking that I had committed in the original 7.4-era IN-optimization patch. There's probably room to estimate this better than the present patch does, but at least we know what to estimate. Since I had to touch CREATE OPERATOR anyway to allow a variant signature for join estimator functions, I took the opportunity to add a couple of additional checks that were missing, per my recent message to -hackers: * Check that estimator functions return float8; * Require execute permission at the time of CREATE OPERATOR on the operator's function as well as the estimator functions; * Require ownership of any pre-existing operator that's modified by the command. I also moved the lookup of the functions out of OperatorCreate() and into operatorcmds.c, since that seemed more consistent with most of the other catalog object creation processes, eg CREATE TYPE.
This commit is contained in:
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/optimizer/path/clausesel.c,v 1.91 2008/08/14 18:47:59 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/optimizer/path/clausesel.c,v 1.92 2008/08/16 00:01:36 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -398,6 +398,50 @@ bms_is_subset_singleton(const Bitmapset *s, int x)
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* treat_as_join_clause -
|
||||
* Decide whether an operator clause is to be handled by the
|
||||
* restriction or join estimator. Subroutine for clause_selectivity().
|
||||
*/
|
||||
static inline bool
|
||||
treat_as_join_clause(Node *clause, RestrictInfo *rinfo,
|
||||
int varRelid, SpecialJoinInfo *sjinfo)
|
||||
{
|
||||
if (varRelid != 0)
|
||||
{
|
||||
/*
|
||||
* Caller is forcing restriction mode (eg, because we are examining
|
||||
* an inner indexscan qual).
|
||||
*/
|
||||
return false;
|
||||
}
|
||||
else if (sjinfo == NULL)
|
||||
{
|
||||
/*
|
||||
* It must be a restriction clause, since it's being evaluated at
|
||||
* a scan node.
|
||||
*/
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* Otherwise, it's a join if there's more than one relation used.
|
||||
* We can optimize this calculation if an rinfo was passed.
|
||||
*
|
||||
* XXX Since we know the clause is being evaluated at a join,
|
||||
* the only way it could be single-relation is if it was delayed
|
||||
* by outer joins. Although we can make use of the restriction
|
||||
* qual estimators anyway, it seems likely that we ought to account
|
||||
* for the probability of injected nulls somehow.
|
||||
*/
|
||||
if (rinfo)
|
||||
return (bms_membership(rinfo->clause_relids) == BMS_MULTIPLE);
|
||||
else
|
||||
return (NumRelids(clause) > 1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* clause_selectivity -
|
||||
@@ -429,9 +473,6 @@ bms_is_subset_singleton(const Bitmapset *s, int x)
|
||||
* root->join_info_list.
|
||||
* 2. For an INNER join, sjinfo is just a transient struct, and only the
|
||||
* relids and jointype fields in it can be trusted.
|
||||
* 3. XXX sjinfo might be NULL even though it really is a join. This case
|
||||
* will go away soon, but fixing it requires API changes for oprjoin and
|
||||
* amcostestimate functions.
|
||||
* It is possible for jointype to be different from sjinfo->jointype.
|
||||
* This indicates we are considering a variant join: either with
|
||||
* the LHS and RHS switched, or with one input unique-ified.
|
||||
@@ -603,36 +644,14 @@ clause_selectivity(PlannerInfo *root,
|
||||
else if (is_opclause(clause) || IsA(clause, DistinctExpr))
|
||||
{
|
||||
Oid opno = ((OpExpr *) clause)->opno;
|
||||
bool is_join_clause;
|
||||
|
||||
if (varRelid != 0)
|
||||
{
|
||||
/*
|
||||
* If we are considering a nestloop join then all clauses are
|
||||
* restriction clauses, since we are only interested in the one
|
||||
* relation.
|
||||
*/
|
||||
is_join_clause = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* Otherwise, it's a join if there's more than one relation used.
|
||||
* We can optimize this calculation if an rinfo was passed.
|
||||
*/
|
||||
if (rinfo)
|
||||
is_join_clause = (bms_membership(rinfo->clause_relids) ==
|
||||
BMS_MULTIPLE);
|
||||
else
|
||||
is_join_clause = (NumRelids(clause) > 1);
|
||||
}
|
||||
|
||||
if (is_join_clause)
|
||||
if (treat_as_join_clause(clause, rinfo, varRelid, sjinfo))
|
||||
{
|
||||
/* Estimate selectivity for a join clause. */
|
||||
s1 = join_selectivity(root, opno,
|
||||
((OpExpr *) clause)->args,
|
||||
jointype);
|
||||
jointype,
|
||||
sjinfo);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -671,35 +690,11 @@ clause_selectivity(PlannerInfo *root,
|
||||
#endif
|
||||
else if (IsA(clause, ScalarArrayOpExpr))
|
||||
{
|
||||
/* First, decide if it's a join clause, same as for OpExpr */
|
||||
bool is_join_clause;
|
||||
|
||||
if (varRelid != 0)
|
||||
{
|
||||
/*
|
||||
* If we are considering a nestloop join then all clauses are
|
||||
* restriction clauses, since we are only interested in the one
|
||||
* relation.
|
||||
*/
|
||||
is_join_clause = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* Otherwise, it's a join if there's more than one relation used.
|
||||
* We can optimize this calculation if an rinfo was passed.
|
||||
*/
|
||||
if (rinfo)
|
||||
is_join_clause = (bms_membership(rinfo->clause_relids) ==
|
||||
BMS_MULTIPLE);
|
||||
else
|
||||
is_join_clause = (NumRelids(clause) > 1);
|
||||
}
|
||||
|
||||
/* Use node specific selectivity calculation function */
|
||||
s1 = scalararraysel(root,
|
||||
(ScalarArrayOpExpr *) clause,
|
||||
is_join_clause,
|
||||
treat_as_join_clause(clause, rinfo,
|
||||
varRelid, sjinfo),
|
||||
varRelid,
|
||||
jointype,
|
||||
sjinfo);
|
||||
|
||||
Reference in New Issue
Block a user