IN clauses appearing at top level of WHERE can now be handled as joins.

There are two implementation techniques: the executor understands a new JOIN_IN jointype, which emits at most one matching row per left-hand row, or the result of the IN's sub-select can be fed through a DISTINCT filter and then joined as an ordinary relation. Along the way, some minor code cleanup in the optimizer; notably, break out most of the jointree-rearrangement preprocessing in planner.c and put it in a new file prep/prepjointree.c.
2025-11-13 16:22:44 +03:00 · 2003-01-20 18:55:07 +00:00
parent be2b660ecd
commit bdfbfde1b1
47 changed files with 2075 additions and 875 deletions
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.93 2002/11/30 05:21:02 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.94 2003/01/20 18:54:49 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -750,6 +750,10 @@ print_path(Query *root, Path *path, int indent)
 			ptype = "Material";
 			subpath = ((MaterialPath *) path)->subpath;
 			break;
+		case T_UniquePath:
+			ptype = "Unique";
+			subpath = ((UniquePath *) path)->subpath;
+			break;
 		case T_NestPath:
 			ptype = "NestLoop";
 			join = true;
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -42,7 +42,7 @@
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.100 2003/01/15 19:35:39 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.101 2003/01/20 18:54:49 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -1024,12 +1024,17 @@ cost_hashjoin(Path *path, Query *root,
 	 * Bias against putting larger relation on inside.	We don't want an
 	 * absolute prohibition, though, since larger relation might have
 	 * better bucketsize --- and we can't trust the size estimates
-	 * unreservedly, anyway.  Instead, inflate the startup cost by the
+	 * unreservedly, anyway.  Instead, inflate the run cost by the
 	 * square root of the size ratio.  (Why square root?  No real good
 	 * reason, but it seems reasonable...)
+	 *
+	 * Note: before 7.4 we implemented this by inflating startup cost;
+	 * but if there's a disable_cost component in the input paths'
+	 * startup cost, that unfairly penalizes the hash.  Probably it'd
+	 * be better to keep track of disable penalty separately from cost.
 	 */
 	if (innerbytes > outerbytes && outerbytes > 0)
-		startup_cost *= sqrt(innerbytes / outerbytes);
+		run_cost *= sqrt(innerbytes / outerbytes);

 	path->startup_cost = startup_cost;
 	path->total_cost = startup_cost + run_cost;
@@ -1492,22 +1497,26 @@ set_joinrel_size_estimates(Query *root, RelOptInfo *rel,
 						   JoinType jointype,
 						   List *restrictlist)
 {
+	Selectivity selec;
 	double		temp;
-
-	/* Start with the Cartesian product */
-	temp = outer_rel->rows * inner_rel->rows;
+	UniquePath *upath;

 	/*
-	 * Apply join restrictivity.  Note that we are only considering
+	 * Compute joinclause selectivity.  Note that we are only considering
 	 * clauses that become restriction clauses at this join level; we are
 	 * not double-counting them because they were not considered in
 	 * estimating the sizes of the component rels.
 	 */
-	temp *= restrictlist_selectivity(root,
+	selec = restrictlist_selectivity(root,
 									 restrictlist,
 									 0);

 	/*
+	 * Normally, we multiply size of Cartesian product by selectivity.
+	 * But for JOIN_IN, we just multiply the lefthand size by the selectivity
+	 * (is that really right?).  For UNIQUE_OUTER or UNIQUE_INNER, use
+	 * the estimated number of distinct rows (again, is that right?)
+	 *
 	 * If we are doing an outer join, take that into account: the output
 	 * must be at least as large as the non-nullable input.  (Is there any
 	 * chance of being even smarter?)
@@ -1515,24 +1524,45 @@ set_joinrel_size_estimates(Query *root, RelOptInfo *rel,
 	switch (jointype)
 	{
 		case JOIN_INNER:
+			temp = outer_rel->rows * inner_rel->rows * selec;
 			break;
 		case JOIN_LEFT:
+			temp = outer_rel->rows * inner_rel->rows * selec;
 			if (temp < outer_rel->rows)
 				temp = outer_rel->rows;
 			break;
 		case JOIN_RIGHT:
+			temp = outer_rel->rows * inner_rel->rows * selec;
 			if (temp < inner_rel->rows)
 				temp = inner_rel->rows;
 			break;
 		case JOIN_FULL:
+			temp = outer_rel->rows * inner_rel->rows * selec;
 			if (temp < outer_rel->rows)
 				temp = outer_rel->rows;
 			if (temp < inner_rel->rows)
 				temp = inner_rel->rows;
 			break;
+		case JOIN_IN:
+			temp = outer_rel->rows * selec;
+			break;
+		case JOIN_REVERSE_IN:
+			temp = inner_rel->rows * selec;
+			break;
+		case JOIN_UNIQUE_OUTER:
+			upath = create_unique_path(root, outer_rel,
+									   outer_rel->cheapest_total_path);
+			temp = upath->rows * inner_rel->rows * selec;
+			break;
+		case JOIN_UNIQUE_INNER:
+			upath = create_unique_path(root, inner_rel,
+									   inner_rel->cheapest_total_path);
+			temp = outer_rel->rows * upath->rows * selec;
+			break;
 		default:
 			elog(ERROR, "set_joinrel_size_estimates: unsupported join type %d",
 				 (int) jointype);
+			temp = 0;			/* keep compiler quiet */
 			break;
 	}

--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -9,7 +9,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.131 2003/01/15 19:35:39 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.132 2003/01/20 18:54:49 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -1401,11 +1401,13 @@ best_inner_indexscan(Query *root, RelOptInfo *rel,
 	MemoryContext oldcontext;

 	/*
-	 * Nestloop only supports inner and left joins.
+	 * Nestloop only supports inner, left, and IN joins.
 	 */
 	switch (jointype)
 	{
 		case JOIN_INNER:
+		case JOIN_IN:
+		case JOIN_UNIQUE_OUTER:
 			isouterjoin = false;
 			break;
 		case JOIN_LEFT:
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.75 2003/01/15 19:35:40 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.76 2003/01/20 18:54:50 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -32,13 +32,6 @@ static void match_unsorted_outer(Query *root, RelOptInfo *joinrel,
 					 RelOptInfo *outerrel, RelOptInfo *innerrel,
 					 List *restrictlist, List *mergeclause_list,
 					 JoinType jointype);
-
-#ifdef NOT_USED
-static void match_unsorted_inner(Query *root, RelOptInfo *joinrel,
-					 RelOptInfo *outerrel, RelOptInfo *innerrel,
-					 List *restrictlist, List *mergeclause_list,
-					 JoinType jointype);
-#endif
 static void hash_inner_and_outer(Query *root, RelOptInfo *joinrel,
 					 RelOptInfo *outerrel, RelOptInfo *innerrel,
 					 List *restrictlist, JoinType jointype);
@@ -149,6 +142,8 @@ sort_inner_and_outer(Query *root,
 					 JoinType jointype)
 {
 	bool		useallclauses;
+	Path	   *outer_path;
+	Path	   *inner_path;
 	List	   *all_pathkeys;
 	List	   *i;

@@ -160,6 +155,9 @@ sort_inner_and_outer(Query *root,
 	{
 		case JOIN_INNER:
 		case JOIN_LEFT:
+		case JOIN_IN:
+		case JOIN_UNIQUE_OUTER:
+		case JOIN_UNIQUE_INNER:
 			useallclauses = false;
 			break;
 		case JOIN_RIGHT:
@@ -173,6 +171,28 @@ sort_inner_and_outer(Query *root,
 			break;
 	}

+	/*
+	 * We only consider the cheapest-total-cost input paths, since we are
+	 * assuming here that a sort is required.  We will consider
+	 * cheapest-startup-cost input paths later, and only if they don't
+	 * need a sort.
+	 *
+	 * If unique-ification is requested, do it and then handle as a plain
+	 * inner join.
+	 */
+	outer_path = outerrel->cheapest_total_path;
+	inner_path = innerrel->cheapest_total_path;
+	if (jointype == JOIN_UNIQUE_OUTER)
+	{
+		outer_path = (Path *) create_unique_path(root, outerrel, outer_path);
+		jointype = JOIN_INNER;
+	}
+	else if (jointype == JOIN_UNIQUE_INNER)
+	{
+		inner_path = (Path *) create_unique_path(root, innerrel, inner_path);
+		jointype = JOIN_INNER;
+	}
+
 	/*
 	 * Each possible ordering of the available mergejoin clauses will
 	 * generate a differently-sorted result path at essentially the same
@@ -254,17 +274,14 @@ sort_inner_and_outer(Query *root,
 		merge_pathkeys = build_join_pathkeys(root, joinrel, outerkeys);

 		/*
-		 * And now we can make the path.  We only consider the cheapest-
-		 * total-cost input paths, since we are assuming here that a sort
-		 * is required.  We will consider cheapest-startup-cost input
-		 * paths later, and only if they don't need a sort.
+		 * And now we can make the path.
 		 */
 		add_path(joinrel, (Path *)
 				 create_mergejoin_path(root,
 									   joinrel,
 									   jointype,
-									   outerrel->cheapest_total_path,
-									   innerrel->cheapest_total_path,
+									   outer_path,
+									   inner_path,
 									   restrictlist,
 									   merge_pathkeys,
 									   cur_mergeclauses,
@@ -314,15 +331,18 @@ match_unsorted_outer(Query *root,
 					 List *mergeclause_list,
 					 JoinType jointype)
 {
+	JoinType	save_jointype = jointype;
 	bool		nestjoinOK;
 	bool		useallclauses;
+	Path	   *inner_cheapest_startup = innerrel->cheapest_startup_path;
+	Path	   *inner_cheapest_total = innerrel->cheapest_total_path;
 	Path	   *matpath = NULL;
 	Path	   *bestinnerjoin = NULL;
 	List	   *i;

 	/*
-	 * Nestloop only supports inner and left joins.  Also, if we are doing
-	 * a right or full join, we must use *all* the mergeclauses as join
+	 * Nestloop only supports inner, left, and IN joins.  Also, if we are
+	 * doing a right or full join, we must use *all* the mergeclauses as join
 	 * clauses, else we will not have a valid plan.  (Although these two
 	 * flags are currently inverses, keep them separate for clarity and
 	 * possible future changes.)
@@ -331,6 +351,9 @@ match_unsorted_outer(Query *root,
 	{
 		case JOIN_INNER:
 		case JOIN_LEFT:
+		case JOIN_IN:
+		case JOIN_UNIQUE_OUTER:
+		case JOIN_UNIQUE_INNER:
 			nestjoinOK = true;
 			useallclauses = false;
 			break;
@@ -347,18 +370,28 @@ match_unsorted_outer(Query *root,
 			break;
 	}

-	if (nestjoinOK)
+	/*
+	 * If we need to unique-ify the inner path, we will consider only
+	 * the cheapest inner.
+	 */
+	if (jointype == JOIN_UNIQUE_INNER)
+	{
+		inner_cheapest_total = (Path *)
+			create_unique_path(root, innerrel, inner_cheapest_total);
+		inner_cheapest_startup = inner_cheapest_total;
+		jointype = JOIN_INNER;
+	}
+	else if (nestjoinOK)
 	{
 		/*
 		 * If the cheapest inner path is a join or seqscan, we should consider
 		 * materializing it.  (This is a heuristic: we could consider it
 		 * always, but for inner indexscans it's probably a waste of time.)
 		 */
-		if (!(IsA(innerrel->cheapest_total_path, IndexPath) ||
-			  IsA(innerrel->cheapest_total_path, TidPath)))
+		if (!(IsA(inner_cheapest_total, IndexPath) ||
+			  IsA(inner_cheapest_total, TidPath)))
 			matpath = (Path *)
-				create_material_path(innerrel, 
-									 innerrel->cheapest_total_path);
+				create_material_path(innerrel, inner_cheapest_total);

 		/*
 		 * Get the best innerjoin indexpath (if any) for this outer rel. It's
@@ -380,6 +413,18 @@ match_unsorted_outer(Query *root,
 		int			num_sortkeys;
 		int			sortkeycnt;

+		/*
+		 * If we need to unique-ify the outer path, it's pointless to consider
+		 * any but the cheapest outer.
+		 */
+		if (save_jointype == JOIN_UNIQUE_OUTER)
+		{
+			if (outerpath != outerrel->cheapest_total_path)
+				continue;
+			outerpath = (Path *) create_unique_path(root, outerrel, outerpath);
+			jointype = JOIN_INNER;
+		}
+
 		/*
 		 * The result will have this sort order (even if it is implemented
 		 * as a nestloop, and even if some of the mergeclauses are
@@ -402,7 +447,7 @@ match_unsorted_outer(Query *root,
 										  joinrel,
 										  jointype,
 										  outerpath,
-										  innerrel->cheapest_total_path,
+										  inner_cheapest_total,
 										  restrictlist,
 										  merge_pathkeys));
 			if (matpath != NULL)
@@ -414,14 +459,13 @@ match_unsorted_outer(Query *root,
 											  matpath,
 											  restrictlist,
 											  merge_pathkeys));
-			if (innerrel->cheapest_startup_path !=
-				innerrel->cheapest_total_path)
+			if (inner_cheapest_startup != inner_cheapest_total)
 				add_path(joinrel, (Path *)
 						 create_nestloop_path(root,
 											  joinrel,
 											  jointype,
 											  outerpath,
-										 innerrel->cheapest_startup_path,
+											  inner_cheapest_startup,
 											  restrictlist,
 											  merge_pathkeys));
 			if (bestinnerjoin != NULL)
@@ -435,6 +479,10 @@ match_unsorted_outer(Query *root,
 											  merge_pathkeys));
 		}

+		/* Can't do anything else if outer path needs to be unique'd */
+		if (save_jointype == JOIN_UNIQUE_OUTER)
+			continue;
+
 		/* Look for useful mergeclauses (if any) */
 		mergeclauses = find_mergeclauses_for_pathkeys(root,
 													  outerpath->pathkeys,
@@ -455,27 +503,30 @@ match_unsorted_outer(Query *root,
 		 * Generate a mergejoin on the basis of sorting the cheapest
 		 * inner. Since a sort will be needed, only cheapest total cost
 		 * matters.  (But create_mergejoin_path will do the right thing if
-		 * innerrel->cheapest_total_path is already correctly sorted.)
+		 * inner_cheapest_total is already correctly sorted.)
 		 */
 		add_path(joinrel, (Path *)
 				 create_mergejoin_path(root,
 									   joinrel,
 									   jointype,
 									   outerpath,
-									   innerrel->cheapest_total_path,
+									   inner_cheapest_total,
 									   restrictlist,
 									   merge_pathkeys,
 									   mergeclauses,
 									   NIL,
 									   innersortkeys));

+		/* Can't do anything else if inner path needs to be unique'd */
+		if (save_jointype == JOIN_UNIQUE_INNER)
+			continue;
+
 		/*
 		 * Look for presorted inner paths that satisfy the innersortkey
 		 * list --- or any truncation thereof, if we are allowed to build
 		 * a mergejoin using a subset of the merge clauses.  Here, we
 		 * consider both cheap startup cost and cheap total cost.  Ignore
-		 * innerrel->cheapest_total_path, since we already made a path
-		 * with it.
+		 * inner_cheapest_total, since we already made a path with it.
 		 */
 		num_sortkeys = length(innersortkeys);
 		if (num_sortkeys > 1 && !useallclauses)
@@ -500,7 +551,7 @@ match_unsorted_outer(Query *root,
 													   trialsortkeys,
 													   TOTAL_COST);
 			if (innerpath != NULL &&
-				innerpath != innerrel->cheapest_total_path &&
+				innerpath != inner_cheapest_total &&
 				(cheapest_total_inner == NULL ||
 				 compare_path_costs(innerpath, cheapest_total_inner,
 									TOTAL_COST) < 0))
@@ -535,7 +586,7 @@ match_unsorted_outer(Query *root,
 													   trialsortkeys,
 													   STARTUP_COST);
 			if (innerpath != NULL &&
-				innerpath != innerrel->cheapest_total_path &&
+				innerpath != inner_cheapest_total &&
 				(cheapest_startup_inner == NULL ||
 				 compare_path_costs(innerpath, cheapest_startup_inner,
 									STARTUP_COST) < 0))
@@ -584,146 +635,6 @@ match_unsorted_outer(Query *root,
 	}
 }

-#ifdef NOT_USED
-
-/*
- * match_unsorted_inner
- *	  Generate mergejoin paths that use an explicit sort of the outer path
- *	  with an already-ordered inner path.
- *
- * 'joinrel' is the join result relation
- * 'outerrel' is the outer join relation
- * 'innerrel' is the inner join relation
- * 'restrictlist' contains all of the RestrictInfo nodes for restriction
- *		clauses that apply to this join
- * 'mergeclause_list' is a list of RestrictInfo nodes for available
- *		mergejoin clauses in this join
- * 'jointype' is the type of join to do
- */
-static void
-match_unsorted_inner(Query *root,
-					 RelOptInfo *joinrel,
-					 RelOptInfo *outerrel,
-					 RelOptInfo *innerrel,
-					 List *restrictlist,
-					 List *mergeclause_list,
-					 JoinType jointype)
-{
-	bool		useallclauses;
-	List	   *i;
-
-	switch (jointype)
-	{
-		case JOIN_INNER:
-		case JOIN_LEFT:
-			useallclauses = false;
-			break;
-		case JOIN_RIGHT:
-		case JOIN_FULL:
-			useallclauses = true;
-			break;
-		default:
-			elog(ERROR, "match_unsorted_inner: unexpected join type %d",
-				 (int) jointype);
-			useallclauses = false;		/* keep compiler quiet */
-			break;
-	}
-
-	foreach(i, innerrel->pathlist)
-	{
-		Path	   *innerpath = (Path *) lfirst(i);
-		List	   *mergeclauses;
-		List	   *outersortkeys;
-		List	   *merge_pathkeys;
-		Path	   *totalouterpath;
-		Path	   *startupouterpath;
-
-		/* Look for useful mergeclauses (if any) */
-		mergeclauses = find_mergeclauses_for_pathkeys(root,
-													  innerpath->pathkeys,
-													  mergeclause_list);
-
-		/* Done with this inner path if no chance for a mergejoin */
-		if (mergeclauses == NIL)
-			continue;
-		if (useallclauses && length(mergeclauses) != length(mergeclause_list))
-			continue;
-
-		/* Compute the required ordering of the outer path */
-		outersortkeys = make_pathkeys_for_mergeclauses(root,
-													   mergeclauses,
-													   outerrel);
-
-		/*
-		 * Generate a mergejoin on the basis of sorting the cheapest
-		 * outer. Since a sort will be needed, only cheapest total cost
-		 * matters.
-		 */
-		merge_pathkeys = build_join_pathkeys(root, joinrel, outersortkeys);
-		add_path(joinrel, (Path *)
-				 create_mergejoin_path(root,
-									   joinrel,
-									   jointype,
-									   outerrel->cheapest_total_path,
-									   innerpath,
-									   restrictlist,
-									   merge_pathkeys,
-									   mergeclauses,
-									   outersortkeys,
-									   NIL));
-
-		/*
-		 * Now generate mergejoins based on already-sufficiently-ordered
-		 * outer paths.  There's likely to be some redundancy here with
-		 * paths already generated by merge_unsorted_outer ... but since
-		 * merge_unsorted_outer doesn't consider all permutations of the
-		 * mergeclause list, it may fail to notice that this particular
-		 * innerpath could have been used with this outerpath.
-		 */
-		totalouterpath = get_cheapest_path_for_pathkeys(outerrel->pathlist,
-														outersortkeys,
-														TOTAL_COST);
-		if (totalouterpath == NULL)
-			continue;			/* there won't be a startup-cost path
-								 * either */
-
-		merge_pathkeys = build_join_pathkeys(root, joinrel,
-											 totalouterpath->pathkeys);
-		add_path(joinrel, (Path *)
-				 create_mergejoin_path(root,
-									   joinrel,
-									   jointype,
-									   totalouterpath,
-									   innerpath,
-									   restrictlist,
-									   merge_pathkeys,
-									   mergeclauses,
-									   NIL,
-									   NIL));
-
-		startupouterpath = get_cheapest_path_for_pathkeys(outerrel->pathlist,
-														  outersortkeys,
-														  STARTUP_COST);
-		if (startupouterpath != NULL && startupouterpath != totalouterpath)
-		{
-			merge_pathkeys = build_join_pathkeys(root, joinrel,
-											 startupouterpath->pathkeys);
-			add_path(joinrel, (Path *)
-					 create_mergejoin_path(root,
-										   joinrel,
-										   jointype,
-										   startupouterpath,
-										   innerpath,
-										   restrictlist,
-										   merge_pathkeys,
-										   mergeclauses,
-										   NIL,
-										   NIL));
-		}
-	}
-}
-#endif
-
 /*
 * hash_inner_and_outer
 *	  Create hashjoin join paths by explicitly hashing both the outer and
@@ -749,11 +660,14 @@ hash_inner_and_outer(Query *root,
 	List	   *i;

 	/*
-	 * Hashjoin only supports inner and left joins.
+	 * Hashjoin only supports inner, left, and IN joins.
 	 */
 	switch (jointype)
 	{
 		case JOIN_INNER:
+		case JOIN_IN:
+		case JOIN_UNIQUE_OUTER:
+		case JOIN_UNIQUE_INNER:
 			isouterjoin = false;
 			break;
 		case JOIN_LEFT:
@@ -813,21 +727,40 @@ hash_inner_and_outer(Query *root,
 		 * cheapest-startup-cost outer paths.  There's no need to consider
 		 * any but the cheapest-total-cost inner path, however.
 		 */
+		Path *cheapest_startup_outer = outerrel->cheapest_startup_path;
+		Path *cheapest_total_outer = outerrel->cheapest_total_path;
+		Path *cheapest_total_inner = innerrel->cheapest_total_path;
+
+		/* Unique-ify if need be */
+		if (jointype == JOIN_UNIQUE_OUTER)
+		{
+			cheapest_total_outer = (Path *)
+				create_unique_path(root, outerrel, cheapest_total_outer);
+			cheapest_startup_outer = cheapest_total_outer;
+			jointype = JOIN_INNER;
+		}
+		else if (jointype == JOIN_UNIQUE_INNER)
+		{
+			cheapest_total_inner = (Path *)
+				create_unique_path(root, innerrel, cheapest_total_inner);
+			jointype = JOIN_INNER;
+		}
+
 		add_path(joinrel, (Path *)
 				 create_hashjoin_path(root,
 									  joinrel,
 									  jointype,
-									  outerrel->cheapest_total_path,
-									  innerrel->cheapest_total_path,
+									  cheapest_total_outer,
+									  cheapest_total_inner,
 									  restrictlist,
 									  hashclauses));
-		if (outerrel->cheapest_startup_path != outerrel->cheapest_total_path)
+		if (cheapest_startup_outer != cheapest_total_outer)
 			add_path(joinrel, (Path *)
 					 create_hashjoin_path(root,
 										  joinrel,
 										  jointype,
-										  outerrel->cheapest_startup_path,
-										  innerrel->cheapest_total_path,
+										  cheapest_startup_outer,
+										  cheapest_total_inner,
 										  restrictlist,
 										  hashclauses));
 	}
--- a/src/backend/optimizer/path/joinrels.c
+++ b/src/backend/optimizer/path/joinrels.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinrels.c,v 1.58 2002/12/16 21:30:30 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinrels.c,v 1.59 2003/01/20 18:54:51 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -172,7 +172,7 @@ make_rels_by_joins(Query *root, int level, List **joinrels)
 							jrel = make_join_rel(root, old_rel, new_rel,
 												 JOIN_INNER);
 							/* Avoid making duplicate entries ... */
-							if (!ptrMember(jrel, result_rels))
+							if (jrel && !ptrMember(jrel, result_rels))
 								result_rels = lcons(jrel, result_rels);
 							break;		/* need not consider more
 										 * joininfos */
@@ -276,10 +276,9 @@ make_rels_by_clause_joins(Query *root,

 				/*
 				 * Avoid entering same joinrel into our output list more
-				 * than once.  (make_rels_by_joins doesn't really care,
-				 * but GEQO does.)
+				 * than once.
 				 */
-				if (!ptrMember(jrel, result))
+				if (jrel && !ptrMember(jrel, result))
 					result = lcons(jrel, result);
 			}
 		}
@@ -323,7 +322,8 @@ make_rels_by_clauseless_joins(Query *root,
 			 * As long as given other_rels are distinct, don't need to
 			 * test to see if jrel is already part of output list.
 			 */
-			result = lcons(jrel, result);
+			if (jrel)
+				result = lcons(jrel, result);
 		}
 	}

@@ -367,6 +367,9 @@ make_jointree_rel(Query *root, Node *jtnode)
 		/* Make this join rel */
 		rel = make_join_rel(root, lrel, rrel, j->jointype);

+		if (rel == NULL)
+			elog(ERROR, "make_jointree_rel: invalid join order!?");
+
 		/*
 		 * Since we are only going to consider this one way to do it,
 		 * we're done generating Paths for this joinrel and can now select
@@ -395,19 +398,121 @@ make_jointree_rel(Query *root, Node *jtnode)
 *	   created with the two rels as outer and inner rel.
 *	   (The join rel may already contain paths generated from other
 *	   pairs of rels that add up to the same set of base rels.)
+ *
+ * NB: will return NULL if attempted join is not valid.  This can only
+ * happen when working with IN clauses that have been turned into joins.
 */
 RelOptInfo *
 make_join_rel(Query *root, RelOptInfo *rel1, RelOptInfo *rel2,
 			  JoinType jointype)
 {
+	List	   *joinrelids;
 	RelOptInfo *joinrel;
 	List	   *restrictlist;

+	/* We should never try to join two overlapping sets of rels. */
+	Assert(nonoverlap_setsi(rel1->relids, rel2->relids));
+
+	/* Construct Relids set that identifies the joinrel. */
+	joinrelids = nconc(listCopy(rel1->relids), listCopy(rel2->relids));
+
+	/*
+	 * If we are implementing IN clauses as joins, there are some joins
+	 * that are illegal.  Check to see if the proposed join is trouble.
+	 * We can skip the work if looking at an outer join, however, because
+	 * only top-level joins might be affected.
+	 */
+	if (jointype == JOIN_INNER)
+	{
+		List	   *l;
+
+		foreach(l, root->in_info_list)
+		{
+			InClauseInfo *ininfo = (InClauseInfo *) lfirst(l);
+
+			/*
+			 * Cannot join if proposed join contains part, but only
+			 * part, of the RHS, *and* it contains rels not in the RHS.
+			 *
+			 * Singleton RHS cannot be a problem, so skip expensive tests.
+			 */
+			if (length(ininfo->righthand) > 1 &&
+				overlap_setsi(ininfo->righthand, joinrelids) &&
+				!is_subseti(ininfo->righthand, joinrelids) &&
+				!is_subseti(joinrelids, ininfo->righthand))
+			{
+				freeList(joinrelids);
+				return NULL;
+			}
+
+			/*
+			 * No issue unless we are looking at a join of the IN's RHS
+			 * to other stuff.
+			 */
+			if (! (length(ininfo->righthand) < length(joinrelids) &&
+				   is_subseti(ininfo->righthand, joinrelids)))
+				continue;
+			/*
+			 * If we already joined IN's RHS to any part of its LHS in either
+			 * input path, then this join is not constrained (the necessary
+			 * work was done at a lower level).
+			 */
+			if (overlap_setsi(ininfo->lefthand, rel1->relids) &&
+				is_subseti(ininfo->righthand, rel1->relids))
+				continue;
+			if (overlap_setsi(ininfo->lefthand, rel2->relids) &&
+				is_subseti(ininfo->righthand, rel2->relids))
+				continue;
+			/*
+			 * JOIN_IN technique will work if outerrel includes LHS and
+			 * innerrel is exactly RHS; conversely JOIN_REVERSE_IN handles
+			 * RHS/LHS.
+			 *
+			 * JOIN_UNIQUE_OUTER will work if outerrel is exactly RHS;
+			 * conversely JOIN_UNIQUE_INNER will work if innerrel is
+			 * exactly RHS.
+			 *
+			 * But none of these will work if we already found another IN
+			 * that needs to trigger here.
+			 */
+			if (jointype != JOIN_INNER)
+			{
+				freeList(joinrelids);
+				return NULL;
+			}
+			if (is_subseti(ininfo->lefthand, rel1->relids) &&
+				sameseti(ininfo->righthand, rel2->relids))
+			{
+				jointype = JOIN_IN;
+			}
+			else if (is_subseti(ininfo->lefthand, rel2->relids) &&
+					 sameseti(ininfo->righthand, rel1->relids))
+			{
+				jointype = JOIN_REVERSE_IN;
+			}
+			else if (sameseti(ininfo->righthand, rel1->relids))
+			{
+				jointype = JOIN_UNIQUE_OUTER;
+			}
+			else if (sameseti(ininfo->righthand, rel2->relids))
+			{
+				jointype = JOIN_UNIQUE_INNER;
+			}
+			else
+			{
+				/* invalid join path */
+				freeList(joinrelids);
+				return NULL;
+			}
+		}
+	}
+
 	/*
 	 * Find or build the join RelOptInfo, and compute the restrictlist
 	 * that goes with this particular joining.
 	 */
-	joinrel = build_join_rel(root, rel1, rel2, jointype, &restrictlist);
+	joinrel = build_join_rel(root, joinrelids, rel1, rel2, jointype,
+							 &restrictlist);

 	/*
 	 * Consider paths using each rel as both outer and inner.
@@ -438,11 +543,43 @@ make_join_rel(Query *root, RelOptInfo *rel1, RelOptInfo *rel2,
 			add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_LEFT,
 								 restrictlist);
 			break;
+		case JOIN_IN:
+			add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_IN,
+								 restrictlist);
+			/* REVERSE_IN isn't supported by joinpath.c */
+			add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_UNIQUE_INNER,
+								 restrictlist);
+			add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_UNIQUE_OUTER,
+								 restrictlist);
+			break;
+		case JOIN_REVERSE_IN:
+			/* REVERSE_IN isn't supported by joinpath.c */
+			add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_IN,
+								 restrictlist);
+			add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_UNIQUE_OUTER,
+								 restrictlist);
+			add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_UNIQUE_INNER,
+								 restrictlist);
+			break;
+		case JOIN_UNIQUE_OUTER:
+			add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_UNIQUE_OUTER,
+								 restrictlist);
+			add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_UNIQUE_INNER,
+								 restrictlist);
+			break;
+		case JOIN_UNIQUE_INNER:
+			add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_UNIQUE_INNER,
+								 restrictlist);
+			add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_UNIQUE_OUTER,
+								 restrictlist);
+			break;
 		default:
 			elog(ERROR, "make_join_rel: unsupported join type %d",
 				 (int) jointype);
 			break;
 	}

+	freeList(joinrelids);
+
 	return joinrel;
 }