Clean up the loose ends in selectivity estimation left by my patch for semi

and anti joins. To do this, pass the SpecialJoinInfo struct for the current join as an additional optional argument to operator join selectivity estimation functions. This allows the estimator to tell not only what kind of join is being formed, but which variable is on which side of the join; a requirement long recognized but not dealt with till now. This also leaves the door open for future improvements in the estimators, such as accounting for the null-insertion effects of lower outer joins. I didn't do anything about that in the current patch but the information is in principle deducible from what's passed. The patch also clarifies the definition of join selectivity for semi/anti joins: it's the fraction of the left input that has (at least one) match in the right input. This allows getting rid of some very fuzzy thinking that I had committed in the original 7.4-era IN-optimization patch. There's probably room to estimate this better than the present patch does, but at least we know what to estimate. Since I had to touch CREATE OPERATOR anyway to allow a variant signature for join estimator functions, I took the opportunity to add a couple of additional checks that were missing, per my recent message to -hackers: * Check that estimator functions return float8; * Require execute permission at the time of CREATE OPERATOR on the operator's function as well as the estimator functions; * Require ownership of any pre-existing operator that's modified by the command. I also moved the lookup of the functions out of OperatorCreate() and into operatorcmds.c, since that seemed more consistent with most of the other catalog object creation processes, eg CREATE TYPE.
2025-07-02 09:02:37 +03:00 · 2008-08-16 00:01:38 +00:00
parent 118461114e
commit d4af2a6481
13 changed files with 704 additions and 483 deletions
--- a/src/backend/optimizer/path/clausesel.c
+++ b/src/backend/optimizer/path/clausesel.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/path/clausesel.c,v 1.91 2008/08/14 18:47:59 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/path/clausesel.c,v 1.92 2008/08/16 00:01:36 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -398,6 +398,50 @@ bms_is_subset_singleton(const Bitmapset *s, int x)
 	return false;
 }

+/*
+ * treat_as_join_clause -
+ *	  Decide whether an operator clause is to be handled by the
+ *	  restriction or join estimator.  Subroutine for clause_selectivity().
+ */
+static inline bool
+treat_as_join_clause(Node *clause, RestrictInfo *rinfo,
+					 int varRelid, SpecialJoinInfo *sjinfo)
+{
+	if (varRelid != 0)
+	{
+		/*
+		 * Caller is forcing restriction mode (eg, because we are examining
+		 * an inner indexscan qual).
+		 */
+		return false;
+	}
+	else if (sjinfo == NULL)
+	{
+		/*
+		 * It must be a restriction clause, since it's being evaluated at
+		 * a scan node.
+		 */
+		return false;
+	}
+	else
+	{
+		/*
+		 * Otherwise, it's a join if there's more than one relation used.
+		 * We can optimize this calculation if an rinfo was passed.
+		 *
+		 * XXX  Since we know the clause is being evaluated at a join,
+		 * the only way it could be single-relation is if it was delayed
+		 * by outer joins.  Although we can make use of the restriction
+		 * qual estimators anyway, it seems likely that we ought to account
+		 * for the probability of injected nulls somehow.
+		 */
+		if (rinfo)
+			return (bms_membership(rinfo->clause_relids) == BMS_MULTIPLE);
+		else
+			return (NumRelids(clause) > 1);
+	}
+}
+

 /*
 * clause_selectivity -
@ -429,9 +473,6 @@ bms_is_subset_singleton(const Bitmapset *s, int x)
 *	   root->join_info_list.
 *	2. For an INNER join, sjinfo is just a transient struct, and only the
 *	   relids and jointype fields in it can be trusted.
- *	3. XXX sjinfo might be NULL even though it really is a join.  This case
- *	   will go away soon, but fixing it requires API changes for oprjoin and
- *	   amcostestimate functions.
 * It is possible for jointype to be different from sjinfo->jointype.
 * This indicates we are considering a variant join: either with
 * the LHS and RHS switched, or with one input unique-ified.
@ -603,36 +644,14 @@ clause_selectivity(PlannerInfo *root,
 	else if (is_opclause(clause) || IsA(clause, DistinctExpr))
 	{
 		Oid			opno = ((OpExpr *) clause)->opno;
-		bool		is_join_clause;

-		if (varRelid != 0)
-		{
-			/*
-			 * If we are considering a nestloop join then all clauses are
-			 * restriction clauses, since we are only interested in the one
-			 * relation.
-			 */
-			is_join_clause = false;
-		}
-		else
-		{
-			/*
-			 * Otherwise, it's a join if there's more than one relation used.
-			 * We can optimize this calculation if an rinfo was passed.
-			 */
-			if (rinfo)
-				is_join_clause = (bms_membership(rinfo->clause_relids) ==
-								  BMS_MULTIPLE);
-			else
-				is_join_clause = (NumRelids(clause) > 1);
-		}
-
-		if (is_join_clause)
+		if (treat_as_join_clause(clause, rinfo, varRelid, sjinfo))
 		{
 			/* Estimate selectivity for a join clause. */
 			s1 = join_selectivity(root, opno,
 								  ((OpExpr *) clause)->args,
-								  jointype);
+								  jointype,
+								  sjinfo);
 		}
 		else
 		{
@ -671,35 +690,11 @@ clause_selectivity(PlannerInfo *root,
 #endif
 	else if (IsA(clause, ScalarArrayOpExpr))
 	{
-		/* First, decide if it's a join clause, same as for OpExpr */
-		bool		is_join_clause;
-
-		if (varRelid != 0)
-		{
-			/*
-			 * If we are considering a nestloop join then all clauses are
-			 * restriction clauses, since we are only interested in the one
-			 * relation.
-			 */
-			is_join_clause = false;
-		}
-		else
-		{
-			/*
-			 * Otherwise, it's a join if there's more than one relation used.
-			 * We can optimize this calculation if an rinfo was passed.
-			 */
-			if (rinfo)
-				is_join_clause = (bms_membership(rinfo->clause_relids) ==
-								  BMS_MULTIPLE);
-			else
-				is_join_clause = (NumRelids(clause) > 1);
-		}
-
 		/* Use node specific selectivity calculation function */
 		s1 = scalararraysel(root,
 							(ScalarArrayOpExpr *) clause,
-							is_join_clause,
+							treat_as_join_clause(clause, rinfo,
+												 varRelid, sjinfo),
 							varRelid,
 							jointype,
 							sjinfo);
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@ -54,7 +54,7 @@
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.193 2008/08/14 18:47:59 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.194 2008/08/16 00:01:36 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -118,10 +118,8 @@ static MergeScanSelCache *cached_scansel(PlannerInfo *root,
 			   RestrictInfo *rinfo,
 			   PathKey *pathkey);
 static bool cost_qual_eval_walker(Node *node, cost_qual_eval_context *context);
-static Selectivity approx_selectivity(PlannerInfo *root, List *quals,
-				   SpecialJoinInfo *sjinfo);
-static Selectivity join_in_selectivity(JoinPath *path, PlannerInfo *root,
-									   SpecialJoinInfo *sjinfo);
+static double approx_tuple_count(PlannerInfo *root, JoinPath *path,
+								 List *quals, SpecialJoinInfo *sjinfo);
 static void set_rel_width(PlannerInfo *root, RelOptInfo *rel);
 static double relation_byte_size(double tuples, int width);
 static double page_size(double tuples, int width);
@ -1288,20 +1286,10 @@ cost_nestloop(NestPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
 	double		outer_path_rows = PATH_ROWS(outer_path);
 	double		inner_path_rows = nestloop_inner_path_rows(inner_path);
 	double		ntuples;
-	Selectivity joininfactor;

 	if (!enable_nestloop)
 		startup_cost += disable_cost;

-	/*
-	 * If we're doing JOIN_IN then we will stop scanning inner tuples for an
-	 * outer tuple as soon as we have one match.  Account for the effects of
-	 * this by scaling down the cost estimates in proportion to the JOIN_IN
-	 * selectivity.  (This assumes that all the quals attached to the join are
-	 * IN quals, which should be true.)
-	 */
-	joininfactor = join_in_selectivity(path, root, sjinfo);
-
 	/* cost of source data */

 	/*
@ -1328,12 +1316,12 @@ cost_nestloop(NestPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
 		run_cost += (outer_path_rows - 1) * inner_path->startup_cost;
 	}
 	run_cost += outer_path_rows *
-		(inner_path->total_cost - inner_path->startup_cost) * joininfactor;
+		(inner_path->total_cost - inner_path->startup_cost);

 	/*
 	 * Compute number of tuples processed (not number emitted!)
 	 */
-	ntuples = outer_path_rows * inner_path_rows * joininfactor;
+	ntuples = outer_path_rows * inner_path_rows;

 	/* CPU costs */
 	cost_qual_eval(&restrict_qual_cost, path->joinrestrictinfo, root);
@ -1369,7 +1357,6 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
 	Cost		startup_cost = 0;
 	Cost		run_cost = 0;
 	Cost		cpu_per_tuple;
-	Selectivity merge_selec;
 	QualCost	merge_qual_cost;
 	QualCost	qp_qual_cost;
 	double		outer_path_rows = PATH_ROWS(outer_path);
@ -1385,7 +1372,6 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
 				outerendsel,
 				innerstartsel,
 				innerendsel;
-	Selectivity joininfactor;
 	Path		sort_path;		/* dummy for result of cost_sort */

 	/* Protect some assumptions below that rowcounts aren't zero */
@ -1398,21 +1384,21 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
 		startup_cost += disable_cost;

 	/*
-	 * Compute cost and selectivity of the mergequals and qpquals (other
-	 * restriction clauses) separately.  We use approx_selectivity here for
-	 * speed --- in most cases, any errors won't affect the result much.
-	 *
-	 * Note: it's probably bogus to use the normal selectivity calculation
-	 * here when either the outer or inner path is a UniquePath.
+	 * Compute cost of the mergequals and qpquals (other restriction clauses)
+	 * separately.
 	 */
-	merge_selec = approx_selectivity(root, mergeclauses, sjinfo);
 	cost_qual_eval(&merge_qual_cost, mergeclauses, root);
 	cost_qual_eval(&qp_qual_cost, path->jpath.joinrestrictinfo, root);
 	qp_qual_cost.startup -= merge_qual_cost.startup;
 	qp_qual_cost.per_tuple -= merge_qual_cost.per_tuple;

-	/* approx # tuples passing the merge quals */
-	mergejointuples = clamp_row_est(merge_selec * outer_path_rows * inner_path_rows);
+	/*
+	 * Get approx # tuples passing the mergequals.  We use approx_tuple_count
+	 * here for speed --- in most cases, any errors won't affect the result
+	 * much.
+	 */
+	mergejointuples = approx_tuple_count(root, &path->jpath,
+										 mergeclauses, sjinfo);

 	/*
 	 * When there are equal merge keys in the outer relation, the mergejoin
@ -1422,11 +1408,11 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
 	 * but on the other hand we ignore the bookkeeping costs of mark/restore.
 	 * Not clear if it's worth developing a more refined model.
 	 *
-	 * The number of re-fetches can be estimated approximately as size of
-	 * merge join output minus size of inner relation.	Assume that the
-	 * distinct key values are 1, 2, ..., and denote the number of values of
-	 * each key in the outer relation as m1, m2, ...; in the inner relation,
-	 * n1, n2, ... Then we have
+	 * For regular inner and outer joins, the number of re-fetches can be
+	 * estimated approximately as size of merge join output minus size of
+	 * inner relation. Assume that the distinct key values are 1, 2, ..., and
+	 * denote the number of values of each key in the outer relation as m1,
+	 * m2, ...; in the inner relation, n1, n2, ...  Then we have
 	 *
 	 * size of join = m1 * n1 + m2 * n2 + ...
 	 *
@ -1439,8 +1425,17 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
 	 * are effectively subtracting those from the number of rescanned tuples,
 	 * when we should not.	Can we do better without expensive selectivity
 	 * computations?
+	 *
+	 * For SEMI and ANTI joins, only one inner tuple need be rescanned for
+	 * each group of same-keyed outer tuples (assuming that all joinquals
+	 * are merge quals).  This makes the effect small enough to ignore,
+	 * so we just set rescannedtuples = 0.  Likewise, the whole issue is
+	 * moot if we are working from a unique-ified outer input.
 	 */
-	if (IsA(outer_path, UniquePath))
+	if (sjinfo->jointype == JOIN_SEMI ||
+		sjinfo->jointype == JOIN_ANTI)
+		rescannedtuples = 0;
+	else if (IsA(outer_path, UniquePath))
 		rescannedtuples = 0;
 	else
 	{
@ -1505,7 +1500,8 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
 			innerstartsel = cache->leftstartsel;
 			innerendsel = cache->leftendsel;
 		}
-		if (path->jpath.jointype == JOIN_LEFT)
+		if (path->jpath.jointype == JOIN_LEFT ||
+			path->jpath.jointype == JOIN_ANTI)
 		{
 			outerstartsel = 0.0;
 			outerendsel = 1.0;
@ -1600,21 +1596,10 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)

 	/* CPU costs */

-	/*
-	 * If we're doing JOIN_IN then we will stop outputting inner tuples for an
-	 * outer tuple as soon as we have one match.  Account for the effects of
-	 * this by scaling down the cost estimates in proportion to the expected
-	 * output size.  (This assumes that all the quals attached to the join are
-	 * IN quals, which should be true.)
-	 */
-	joininfactor = join_in_selectivity(&path->jpath, root, sjinfo);
-
 	/*
 	 * The number of tuple comparisons needed is approximately number of outer
 	 * rows plus number of inner rows plus number of rescanned tuples (can we
 	 * refine this?).  At each one, we need to evaluate the mergejoin quals.
-	 * NOTE: JOIN_IN mode does not save any work here, so do NOT include
-	 * joininfactor.
 	 */
 	startup_cost += merge_qual_cost.startup;
 	startup_cost += merge_qual_cost.per_tuple *
@ -1627,12 +1612,11 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
 	 * For each tuple that gets through the mergejoin proper, we charge
 	 * cpu_tuple_cost plus the cost of evaluating additional restriction
 	 * clauses that are to be applied at the join.	(This is pessimistic since
-	 * not all of the quals may get evaluated at each tuple.)  This work is
-	 * skipped in JOIN_IN mode, so apply the factor.
+	 * not all of the quals may get evaluated at each tuple.)
 	 */
 	startup_cost += qp_qual_cost.startup;
 	cpu_per_tuple = cpu_tuple_cost + qp_qual_cost.per_tuple;
-	run_cost += cpu_per_tuple * mergejointuples * joininfactor;
+	run_cost += cpu_per_tuple * mergejointuples;

 	path->jpath.path.startup_cost = startup_cost;
 	path->jpath.path.total_cost = startup_cost + run_cost;
@ -1711,7 +1695,6 @@ cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
 	Cost		startup_cost = 0;
 	Cost		run_cost = 0;
 	Cost		cpu_per_tuple;
-	Selectivity hash_selec;
 	QualCost	hash_qual_cost;
 	QualCost	qp_qual_cost;
 	double		hashjointuples;
@ -1722,28 +1705,27 @@ cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
 	int			numbatches;
 	double		virtualbuckets;
 	Selectivity innerbucketsize;
-	Selectivity joininfactor;
 	ListCell   *hcl;

 	if (!enable_hashjoin)
 		startup_cost += disable_cost;

 	/*
-	 * Compute cost and selectivity of the hashquals and qpquals (other
-	 * restriction clauses) separately.  We use approx_selectivity here for
-	 * speed --- in most cases, any errors won't affect the result much.
-	 *
-	 * Note: it's probably bogus to use the normal selectivity calculation
-	 * here when either the outer or inner path is a UniquePath.
+	 * Compute cost of the hashquals and qpquals (other restriction clauses)
+	 * separately.
 	 */
-	hash_selec = approx_selectivity(root, hashclauses, sjinfo);
 	cost_qual_eval(&hash_qual_cost, hashclauses, root);
 	cost_qual_eval(&qp_qual_cost, path->jpath.joinrestrictinfo, root);
 	qp_qual_cost.startup -= hash_qual_cost.startup;
 	qp_qual_cost.per_tuple -= hash_qual_cost.per_tuple;

-	/* approx # tuples passing the hash quals */
-	hashjointuples = clamp_row_est(hash_selec * outer_path_rows * inner_path_rows);
+	/*
+	 * Get approx # tuples passing the hashquals.  We use approx_tuple_count
+	 * here for speed --- in most cases, any errors won't affect the result
+	 * much.
+	 */
+	hashjointuples = approx_tuple_count(root, &path->jpath,
+										hashclauses, sjinfo);

 	/* cost of source data */
 	startup_cost += outer_path->startup_cost;
@ -1858,15 +1840,6 @@ cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)

 	/* CPU costs */

-	/*
-	 * If we're doing JOIN_IN then we will stop comparing inner tuples to an
-	 * outer tuple as soon as we have one match.  Account for the effects of
-	 * this by scaling down the cost estimates in proportion to the expected
-	 * output size.  (This assumes that all the quals attached to the join are
-	 * IN quals, which should be true.)
-	 */
-	joininfactor = join_in_selectivity(&path->jpath, root, sjinfo);
-
 	/*
 	 * The number of tuple comparisons needed is the number of outer tuples
 	 * times the typical number of tuples in a hash bucket, which is the inner
@ -1878,8 +1851,7 @@ cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
 	 */
 	startup_cost += hash_qual_cost.startup;
 	run_cost += hash_qual_cost.per_tuple *
-		outer_path_rows * clamp_row_est(inner_path_rows * innerbucketsize) *
-		joininfactor * 0.5;
+		outer_path_rows * clamp_row_est(inner_path_rows * innerbucketsize) * 0.5;

 	/*
 	 * For each tuple that gets through the hashjoin proper, we charge
@ -1889,7 +1861,7 @@ cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
 	 */
 	startup_cost += qp_qual_cost.startup;
 	cpu_per_tuple = cpu_tuple_cost + qp_qual_cost.per_tuple;
-	run_cost += cpu_per_tuple * hashjointuples * joininfactor;
+	run_cost += cpu_per_tuple * hashjointuples;

 	path->jpath.path.startup_cost = startup_cost;
 	path->jpath.path.total_cost = startup_cost + run_cost;
@ -2213,13 +2185,12 @@ get_initplan_cost(PlannerInfo *root, SubPlan *subplan)


 /*
- * approx_selectivity
- *		Quick-and-dirty estimation of clause selectivities.
- *		The input can be either an implicitly-ANDed list of boolean
- *		expressions, or a list of RestrictInfo nodes (typically the latter).
+ * approx_tuple_count
+ *		Quick-and-dirty estimation of the number of join rows passing
+ *		a set of qual conditions.
 *
- * Currently this is only used in join estimation, so sjinfo should never
- * be NULL.
+ * The quals can be either an implicitly-ANDed list of boolean expressions,
+ * or a list of RestrictInfo nodes (typically the latter).
 *
 * This is quick-and-dirty because we bypass clauselist_selectivity, and
 * simply multiply the independent clause selectivities together.  Now
@ -2232,20 +2203,34 @@ get_initplan_cost(PlannerInfo *root, SubPlan *subplan)
 * output tuples are generated and passed through qpqual checking, it
 * seems OK to live with the approximation.
 */
-static Selectivity
-approx_selectivity(PlannerInfo *root, List *quals, SpecialJoinInfo *sjinfo)
+static double
+approx_tuple_count(PlannerInfo *root, JoinPath *path,
+				   List *quals, SpecialJoinInfo *sjinfo)
 {
-	Selectivity total = 1.0;
+	double		tuples;
+	double		outer_tuples = path->outerjoinpath->parent->rows;
+	double		inner_tuples = path->innerjoinpath->parent->rows;
+	Selectivity selec = 1.0;
 	ListCell   *l;

+	/* Get the approximate selectivity */
 	foreach(l, quals)
 	{
 		Node	   *qual = (Node *) lfirst(l);

 		/* Note that clause_selectivity will be able to cache its result */
-		total *= clause_selectivity(root, qual, 0, sjinfo->jointype, sjinfo);
+		selec *= clause_selectivity(root, qual, 0, sjinfo->jointype, sjinfo);
 	}
-	return total;
+
+	/* Apply it correctly using the input relation sizes */
+	if (sjinfo->jointype == JOIN_SEMI)
+		tuples = selec * outer_tuples;
+	else if (sjinfo->jointype == JOIN_ANTI)
+		tuples = (1.0 - selec) * outer_tuples;
+	else
+		tuples = selec * outer_tuples * inner_tuples;
+
+	return clamp_row_est(tuples);
 }


@ -2315,7 +2300,6 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
 	Selectivity jselec;
 	Selectivity pselec;
 	double		nrows;
-	UniquePath *upath;

 	/*
 	 * Compute joinclause selectivity.	Note that we are only considering
@ -2380,9 +2364,8 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
 	 * pushed-down quals are applied after the outer join, so their
 	 * selectivity applies fully.
 	 *
-	 * For JOIN_IN and variants, the Cartesian product is figured with respect
-	 * to a unique-ified input, and then we can clamp to the size of the other
-	 * input.
+	 * For JOIN_SEMI and JOIN_ANTI, the selectivity is defined as the fraction
+	 * of LHS rows that have matches, and we apply that straightforwardly.
 	 */
 	switch (jointype)
 	{
@ -2404,22 +2387,11 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
 			nrows *= pselec;
 			break;
 		case JOIN_SEMI:
-			/* XXX this is unsafe, could Assert? */
-			upath = create_unique_path(root, inner_rel,
-									   inner_rel->cheapest_total_path,
-									   sjinfo);
-			if (upath)
-				nrows = outer_rel->rows * upath->rows * jselec;
-			else
-				nrows = outer_rel->rows * inner_rel->rows * jselec;
-			if (nrows > outer_rel->rows)
-				nrows = outer_rel->rows;
+			nrows = outer_rel->rows * jselec;
+			nrows *= pselec;
 			break;
 		case JOIN_ANTI:
-			/* XXX this is utterly wrong */
-			nrows = outer_rel->rows * inner_rel->rows * jselec;
-			if (nrows < outer_rel->rows)
-				nrows = outer_rel->rows;
+			nrows = outer_rel->rows * (1.0 - jselec);
 			nrows *= pselec;
 			break;
 		default:
@ -2432,67 +2404,6 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
 	rel->rows = clamp_row_est(nrows);
 }

-/*
- * join_in_selectivity
- *	  Determines the factor by which a JOIN_IN join's result is expected
- *	  to be smaller than an ordinary inner join.
- *
- * 'path' is already filled in except for the cost fields
- * 'sjinfo' must be the JOIN_SEMI SpecialJoinInfo for the join
- */
-static Selectivity
-join_in_selectivity(JoinPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
-{
-	RelOptInfo *innerrel;
-	UniquePath *innerunique;
-	Selectivity selec;
-	double		nrows;
-
-	/* Return 1.0 whenever it's not JOIN_IN */
-	if (path->jointype != JOIN_SEMI)
-		return 1.0;
-	Assert(sjinfo && sjinfo->jointype == JOIN_SEMI);
-
-	/*
-	 * Return 1.0 if the inner side is already known unique.  The case where
-	 * the inner path is already a UniquePath probably cannot happen in
-	 * current usage, but check it anyway for completeness.  The interesting
-	 * case is where we've determined the inner relation itself is unique,
-	 * which we can check by looking at the rows estimate for its UniquePath.
-	 */
-	if (IsA(path->innerjoinpath, UniquePath))
-		return 1.0;
-	innerrel = path->innerjoinpath->parent;
-	/* XXX might assert if sjinfo doesn't exactly match innerrel? */
-	innerunique = create_unique_path(root,
-									 innerrel,
-									 innerrel->cheapest_total_path,
-									 sjinfo);
-	if (innerunique && innerunique->rows >= innerrel->rows)
-		return 1.0;
-
-	/*
-	 * Compute same result set_joinrel_size_estimates would compute for
-	 * JOIN_INNER.	Note that we use the input rels' absolute size estimates,
-	 * not PATH_ROWS() which might be less; if we used PATH_ROWS() we'd be
-	 * double-counting the effects of any join clauses used in input scans.
-	 */
-	selec = clauselist_selectivity(root,
-								   path->joinrestrictinfo,
-								   0,
-								   JOIN_INNER,
-								   NULL);
-	nrows = path->outerjoinpath->parent->rows * innerrel->rows * selec;
-
-	nrows = clamp_row_est(nrows);
-
-	/* See if it's larger than the actual JOIN_IN size estimate */
-	if (nrows > path->path.parent->rows)
-		return path->path.parent->rows / nrows;
-	else
-		return 1.0;
-}
-
 /*
 * set_function_size_estimates
 *		Set the size estimates for a base relation that is a function call.
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@ -9,7 +9,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/util/plancat.c,v 1.148 2008/07/13 20:45:47 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/util/plancat.c,v 1.149 2008/08/16 00:01:36 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -830,7 +830,8 @@ Selectivity
 join_selectivity(PlannerInfo *root,
 				 Oid operator,
 				 List *args,
-				 JoinType jointype)
+				 JoinType jointype,
+				 SpecialJoinInfo *sjinfo)
 {
 	RegProcedure oprjoin = get_oprjoin(operator);
 	float8		result;
@ -842,11 +843,12 @@ join_selectivity(PlannerInfo *root,
 	if (!oprjoin)
 		return (Selectivity) 0.5;

-	result = DatumGetFloat8(OidFunctionCall4(oprjoin,
+	result = DatumGetFloat8(OidFunctionCall5(oprjoin,
 											 PointerGetDatum(root),
 											 ObjectIdGetDatum(operator),
 											 PointerGetDatum(args),
-											 Int16GetDatum(jointype)));
+											 Int16GetDatum(jointype),
+											 PointerGetDatum(sjinfo)));

 	if (result < 0.0 || result > 1.0)
 		elog(ERROR, "invalid join selectivity: %f", result);