Major planner/optimizer revision: get rid of PathOrder node type,

store all ordering information in pathkeys lists (which are now lists of lists of PathKeyItem nodes, not just lists of lists of vars). This was a big win --- the code is smaller and IMHO more understandable than it was, even though it handles more cases. I believe the node changes will not force an initdb for anyone; planner nodes don't show up in stored rules.
2025-07-17 06:41:09 +03:00 · 1999-08-16 02:17:58 +00:00
parent 08320bfb22
commit e6381966c1
43 changed files with 1937 additions and 3270 deletions
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@ -1,70 +1,84 @@
 /*-------------------------------------------------------------------------
 *
- * joinutils.c
- *	  Utilities for matching and building join and path keys
+ * pathkeys.c
+ *	  Utilities for matching and building path keys
 *
 * Copyright (c) 1994, Regents of the University of California
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v 1.13 1999/08/13 01:17:16 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v 1.14 1999/08/16 02:17:52 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
 #include "postgres.h"

-
+#include "nodes/makefuncs.h"
+#include "optimizer/clauses.h"
 #include "optimizer/joininfo.h"
-#include "optimizer/keys.h"
-#include "optimizer/ordering.h"
 #include "optimizer/paths.h"
 #include "optimizer/tlist.h"
+#include "optimizer/var.h"
+#include "parser/parsetree.h"
+#include "utils/lsyscache.h"

-static int match_pathkey_joinkeys(List *pathkey, List *joinkeys,
-					   int outer_or_inner);
-static List *new_join_pathkey(List *pathkeys, List *join_rel_tlist,
-				 List *joinclauses);
+static PathKeyItem *makePathKeyItem(Node *key, Oid sortop);
+static bool pathkeyitem_equal(PathKeyItem *a, PathKeyItem *b);
+static bool pathkeyitem_member(PathKeyItem *a, List *l);
+static Var *find_indexkey_var(int indexkey, List *tlist);
+static List *build_join_pathkey(List *pathkeys, List *join_rel_tlist,
+								List *joinclauses);


 /*--------------------
 *	Explanation of Path.pathkeys
 *
- *	Path.pathkeys is a List of List of Var nodes that represent the sort
- *	order of the result generated by the Path.
+ *	Path.pathkeys is a List of Lists of PathKeyItem nodes that represent
+ *	the sort order of the result generated by the Path.  The n'th sublist
+ *	represents the n'th sort key of the result.
 *
- *	In single/base relation RelOptInfo's, the Path's represent various ways
+ *	In single/base relation RelOptInfo's, the Paths represent various ways
 *	of scanning the relation and the resulting ordering of the tuples.
 *	Sequential scan Paths have NIL pathkeys, indicating no known ordering.
 *	Index scans have Path.pathkeys that represent the chosen index's ordering,
 *  if any.  A single-key index would create a pathkey with a single sublist,
- *	e.g. ( (tab1_indexkey1) ).  A multi-key index generates a sublist per key,
- *	e.g. ( (tab1_indexkey1) (tab1_indexkey2) ) which shows major sort by
- *	indexkey1 and minor sort by indexkey2.
+ *	e.g. ( (tab1.indexkey1/sortop1) ).  A multi-key index generates a sublist
+ *	per key, e.g. ( (tab1.indexkey1/sortop1) (tab1.indexkey2/sortop2) ) which
+ *	shows major sort by indexkey1 (ordering by sortop1) and minor sort by
+ *	indexkey2 with sortop2.
 *
 *	Note that a multi-pass indexscan (OR clause scan) has NIL pathkeys since
- *	we can say nothing about the overall order of its result.  Also, an index
- *	scan on an unordered type of index generates no useful pathkeys.  However,
+ *	we can say nothing about the overall order of its result.  Also, an
+ *	indexscan on an unordered type of index generates NIL pathkeys.  However,
 *	we can always create a pathkey by doing an explicit sort.
 *
 *	Multi-relation RelOptInfo Path's are more complicated.  Mergejoins are
- *	only performed with equijoins ("=").  Because of this, the multi-relation
- *	path actually has more than one primary Var key.  For example, a
- *	mergejoin Path of "tab1.col1 = tab2.col1" would generate pathkeys of
- *	( (tab1.col1 tab2.col1) ), indicating that the major sort order of the
- *	Path can be taken to be *either* tab1.col1 or tab2.col1.
+ *	only performed with equijoins ("=").  Because of this, the resulting
+ *	multi-relation path actually has more than one primary key.  For example,
+ *	a mergejoin using a clause "tab1.col1 = tab2.col1" would generate pathkeys
+ *	of ( (tab1.col1/sortop1 tab2.col1/sortop2) ), indicating that the major
+ *	sort order of the Path can be taken to be *either* tab1.col1 or tab2.col1.
 *	They are equal, so they are both primary sort keys.  This allows future
- *	joins to use either Var as a pre-sorted key to prevent upper Mergejoins
+ *	joins to use either var as a pre-sorted key to prevent upper Mergejoins
 *	from having to re-sort the Path.  This is why pathkeys is a List of Lists.
 *
 *	Note that while the order of the top list is meaningful (primary vs.
- *	secondary sort key), the order of each sublist is arbitrary.
+ *	secondary sort key), the order of each sublist is arbitrary.  No code
+ *	working with pathkeys should generate a result that depends on the order
+ *	of a pathkey sublist.
 *
- *	We can actually keep all of the keys of the outer path of a merge or
- *	nestloop join, since the ordering of the outer path will be reflected
- *	in the result.  We add to each pathkey sublist any inner vars that are
- *	equijoined to any of the outer vars in the sublist.  In the nestloop
- *	case we have to be careful to consider only equijoin operators; the
- *	nestloop's join clauses might include non-equijoin operators.
+ *	We keep a sortop associated with each PathKeyItem because cross-data-type
+ *	mergejoins are possible; for example int4=int8 is mergejoinable.  In this
+ *	case we need to remember that the left var is ordered by int4lt while
+ *	the right var is ordered by int8lt.  So the different members of each
+ *	sublist could have different sortops.
+ *
+ *	When producing the pathkeys for a merge or nestloop join, we can keep
+ *	all of the keys of the outer path, since the ordering of the outer path
+ *	will be preserved in the result.  We add to each pathkey sublist any inner
+ *	vars that are equijoined to any of the outer vars in the sublist.  In the
+ *	nestloop case we have to be careful to consider only equijoin operators;
+ *	the nestloop's join clauses might include non-equijoin operators.
 *	(Currently, we do this by considering only mergejoinable operators while
 *	making the pathkeys, since we have no separate marking for operators that
 *	are equijoins but aren't mergejoinable.)
@ -75,180 +89,174 @@ static List *new_join_pathkey(List *pathkeys, List *join_rel_tlist,
 *	executor might have to split the join into multiple batches.  Therefore
 *	a Hashjoin is always given NIL pathkeys.
 *
- *	Notice that pathkeys only say *what* is being ordered, and not *how*
- *	it is ordered.  The actual sort ordering is indicated by a separate
- *	data structure, the PathOrder.  The PathOrder provides a sort operator
- *	OID for each of the sublists of the path key.  This is fairly bogus,
- *	since in cross-datatype cases we really want to keep track of more than
- *	one sort operator...
- *
 *	-- bjm & tgl
 *--------------------
 */

+
+/*
+ * makePathKeyItem
+ *		create a PathKeyItem node
+ */
+static PathKeyItem *
+makePathKeyItem(Node *key, Oid sortop)
+{
+	PathKeyItem	   *item = makeNode(PathKeyItem);
+
+	item->key = key;
+	item->sortop = sortop;
+	return item;
+}
+
 /****************************************************************************
- *		KEY COMPARISONS
+ *		PATHKEY COMPARISONS
 ****************************************************************************/

 /*
- * order_joinkeys_by_pathkeys
- *	  Attempts to match the keys of a path against the keys of join clauses.
- *	  This is done by looking for a matching join key in 'joinkeys' for
- *	  every path key in the list 'path.keys'. If there is a matching join key
- *	  (not necessarily unique) for every path key, then the list of
- *	  corresponding join keys and join clauses are returned in the order in
- *	  which the keys matched the path keys.
+ * Compare two pathkey items for equality.
 *
- * 'pathkeys' is a list of path keys:
- *		( ( (var) (var) ... ) ( (var) ... ) )
- * 'joinkeys' is a list of join keys:
- *		( (outer inner) (outer inner) ... )
- * 'joinclauses' is a list of clauses corresponding to the join keys in
- *		'joinkeys'
- * 'outer_or_inner' is a flag that selects the desired pathkey of a join key
- *		in 'joinkeys'
+ * This is unlike straight equal() because when the two keys are both Vars,
+ * we want to apply the weaker var_equal() condition (doesn't check varnoold
+ * or varoattno).  But if that fails, try equal() so that we recognize
+ * functional-index keys.
+ */
+static bool
+pathkeyitem_equal (PathKeyItem *a, PathKeyItem *b)
+{
+	Assert(a && IsA(a, PathKeyItem));
+	Assert(b && IsA(b, PathKeyItem));
+
+	if (a->sortop != b->sortop)
+		return false;
+	if (var_equal((Var *) a->key, (Var *) b->key))
+		return true;
+	return equal(a->key, b->key);
+}
+
+/*
+ * member() test using pathkeyitem_equal
+ */
+static bool
+pathkeyitem_member (PathKeyItem *a, List *l)
+{
+	List	   *i;
+
+	Assert(a && IsA(a, PathKeyItem));
+
+	foreach(i, l)
+	{
+		if (pathkeyitem_equal(a, (PathKeyItem *) lfirst(i)))
+			return true;
+	}
+	return false;
+}
+
+/*
+ * compare_pathkeys
+ *	  Compare two pathkeys to see if they are equivalent, and if not whether
+ *	  one is "better" than the other.
 *
- * Returns the join keys and corresponding join clauses in a list if all
- * of the path keys were matched:
- *		(
- *		 ( (outerkey0 innerkey0) ... (outerkeyN or innerkeyN) )
- *		 ( clause0 ... clauseN )
- *		)
- * and nil otherwise.
+ *	  A pathkey can be considered better than another if it is a superset:
+ *	  it contains all the keys of the other plus more.  For example, either
+ *	  ((A) (B)) or ((A B)) is better than ((A)).
 *
- * Returns a list of matched join keys and a list of matched join clauses
- * in pointers if valid order can be found.
+ *	This gets called a lot, so it is optimized.
+ */
+PathKeysComparison
+compare_pathkeys(List *keys1, List *keys2)
+{
+	List	   *key1,
+			   *key2;
+	bool		key1_subsetof_key2 = true,
+				key2_subsetof_key1 = true;
+
+	for (key1 = keys1, key2 = keys2;
+		 key1 != NIL && key2 != NIL;
+		 key1 = lnext(key1), key2 = lnext(key2))
+	{
+		List	   *subkey1 = lfirst(key1);
+		List	   *subkey2 = lfirst(key2);
+		List	   *i;
+
+		/* We have to do this the hard way since the ordering of the subkey
+		 * lists is arbitrary.
+		 */
+		if (key1_subsetof_key2)
+		{
+			foreach(i, subkey1)
+			{
+				if (! pathkeyitem_member((PathKeyItem *) lfirst(i), subkey2))
+				{
+					key1_subsetof_key2 = false;
+					break;
+				}
+			}
+		}
+
+		if (key2_subsetof_key1)
+		{
+			foreach(i, subkey2)
+			{
+				if (! pathkeyitem_member((PathKeyItem *) lfirst(i), subkey1))
+				{
+					key2_subsetof_key1 = false;
+					break;
+				}
+			}
+		}
+
+		if (!key1_subsetof_key2 && !key2_subsetof_key1)
+			return PATHKEYS_DIFFERENT; /* no need to keep looking */
+	}
+
+	/* If we reached the end of only one list, the other is longer and
+	 * therefore not a subset.  (We assume the additional sublist(s)
+	 * of the other list are not NIL --- no pathkey list should ever have
+	 * a NIL sublist.)
+	 */
+	if (key1 != NIL)
+		key1_subsetof_key2 = false;
+	if (key2 != NIL)
+		key2_subsetof_key1 = false;
+
+	if (key1_subsetof_key2 && key2_subsetof_key1)
+		return PATHKEYS_EQUAL;
+	if (key1_subsetof_key2)
+		return PATHKEYS_BETTER2;
+	if (key2_subsetof_key1)
+		return PATHKEYS_BETTER1;
+	return PATHKEYS_DIFFERENT;
+}
+
+/*
+ * pathkeys_contained_in
+ *	  Common special case of compare_pathkeys: we just want to know
+ *	  if keys2 are at least as well sorted as keys1.
 */
 bool
-order_joinkeys_by_pathkeys(List *pathkeys,
-						   List *joinkeys,
-						   List *joinclauses,
-						   int outer_or_inner,
-						   List **matchedJoinKeysPtr,
-						   List **matchedJoinClausesPtr)
+pathkeys_contained_in(List *keys1, List *keys2)
 {
-	List	   *matched_joinkeys = NIL;
-	List	   *matched_joinclauses = NIL;
-	List	   *pathkey = NIL;
-	List	   *i = NIL;
-	int			matched_joinkey_index = -1;
-	int			matched_keys = 0;
-
-	/*
-	 * Reorder the joinkeys by picking out one that matches each pathkey,
-	 * and create a new joinkey/joinclause list in pathkey order
-	 */
-	foreach(i, pathkeys)
+	switch (compare_pathkeys(keys1, keys2))
 	{
-		pathkey = lfirst(i);
-		matched_joinkey_index = match_pathkey_joinkeys(pathkey, joinkeys,
-													   outer_or_inner);
-
-		if (matched_joinkey_index != -1)
-		{
-			matched_keys++;
-			if (matchedJoinKeysPtr)
-			{
-				JoinKey    *joinkey = nth(matched_joinkey_index, joinkeys);
-
-				matched_joinkeys = lappend(matched_joinkeys, joinkey);
-			}
-
-			if (matchedJoinClausesPtr)
-			{
-				Expr	   *joinclause = nth(matched_joinkey_index,
-											 joinclauses);
-
-				Assert(joinclauses);
-				matched_joinclauses = lappend(matched_joinclauses, joinclause);
-			}
-		}
-		else
-			/* A pathkey could not be matched. */
+		case PATHKEYS_EQUAL:
+		case PATHKEYS_BETTER2:
+			return true;
+		default:
 			break;
 	}
-
-	/*
-	 * Did we fail to match all the joinkeys? Extra pathkeys are no
-	 * problem.
-	 */
-	if (matched_keys != length(joinkeys))
-	{
-		if (matchedJoinKeysPtr)
-			*matchedJoinKeysPtr = NIL;
-		if (matchedJoinClausesPtr)
-			*matchedJoinClausesPtr = NIL;
-		return false;
-	}
-
-	if (matchedJoinKeysPtr)
-		*matchedJoinKeysPtr = matched_joinkeys;
-	if (matchedJoinClausesPtr)
-		*matchedJoinClausesPtr = matched_joinclauses;
-	return true;
+	return false;
 }

-
 /*
- * match_pathkey_joinkeys
- *	  Returns the 0-based index into 'joinkeys' of the first joinkey whose
- *	  outer or inner pathkey matches any subkey of 'pathkey'.
+ * get_cheapest_path_for_pathkeys
+ *	  Find the cheapest path in 'paths' that satisfies the given pathkeys.
+ *	  Return NULL if no such path.
 *
- *	All these keys are equivalent, so any of them can match.  See above.
- */
-static int
-match_pathkey_joinkeys(List *pathkey,
-					   List *joinkeys,
-					   int outer_or_inner)
-{
-	Var		   *key;
-	int			pos;
-	List	   *i,
-			   *x;
-	JoinKey    *jk;
-
-	foreach(i, pathkey)
-	{
-		key = (Var *) lfirst(i);
-		pos = 0;
-		foreach(x, joinkeys)
-		{
-			jk = (JoinKey *) lfirst(x);
-			if (equal(key, extract_join_key(jk, outer_or_inner)))
-				return pos;
-			pos++;
-		}
-	}
-	return -1;					/* no index found	*/
-}
-
-
-/*
- * get_cheapest_path_for_joinkeys
- *	  Attempts to find a path in 'paths' whose keys match a set of join
- *	  keys 'joinkeys'.	To match,
- *	  1. the path node ordering must equal 'ordering'.
- *	  2. each pathkey of a given path must match(i.e., be(equal) to) the
- *		 appropriate pathkey of the corresponding join key in 'joinkeys',
- *		 i.e., the Nth path key must match its pathkeys against the pathkey of
- *		 the Nth join key in 'joinkeys'.
- *
- * 'joinkeys' is the list of key pairs to which the path keys must be
- *		matched
- * 'ordering' is the ordering of the(outer) path to which 'joinkeys'
- *		must correspond
- * 'paths' is a list of(inner) paths which are to be matched against
- *		each join key in 'joinkeys'
- * 'outer_or_inner' is a flag that selects the desired pathkey of a join key
- *		in 'joinkeys'
- *
- *	Find the cheapest path that matches the join keys
+ * 'paths' is a list of possible paths (either inner or outer)
+ * 'pathkeys' represents a required ordering
 */
 Path *
-get_cheapest_path_for_joinkeys(List *joinkeys,
-							   PathOrder *ordering,
-							   List *paths,
-							   int outer_or_inner)
+get_cheapest_path_for_pathkeys(List *paths, List *pathkeys)
 {
 	Path	   *matched_path = NULL;
 	List	   *i;
@ -256,12 +264,8 @@ get_cheapest_path_for_joinkeys(List *joinkeys,
 	foreach(i, paths)
 	{
 		Path	   *path = (Path *) lfirst(i);
-		int			better_sort;

-		if (order_joinkeys_by_pathkeys(path->pathkeys, joinkeys, NIL,
-									   outer_or_inner, NULL, NULL) &&
-			pathorder_match(ordering, path->pathorder, &better_sort) &&
-			better_sort == 0)
+		if (pathkeys_contained_in(pathkeys, path->pathkeys))
 		{
 			if (matched_path == NULL ||
 				path->path_cost < matched_path->path_cost)
@ -271,78 +275,116 @@ get_cheapest_path_for_joinkeys(List *joinkeys,
 	return matched_path;
 }

-
-/*
- * make_pathkeys_from_joinkeys
- *	  Builds a pathkey list for a path by pulling one of the pathkeys from
- *	  a list of join keys 'joinkeys' and then finding the var node in the
- *	  target list 'tlist' that corresponds to that pathkey.
- *
- * 'joinkeys' is a list of join key pairs
- * 'tlist' is a relation target list
- * 'outer_or_inner' is a flag that selects the desired pathkey of a join key
- *	in 'joinkeys'
- *
- * Returns a list of pathkeys: ((tlvar1)(tlvar2)...(tlvarN)).
- * It is a list of lists because of multi-key indexes.
- */
-List *
-make_pathkeys_from_joinkeys(List *joinkeys,
-							List *tlist,
-							int outer_or_inner)
-{
-	List	   *pathkeys = NIL;
-	List	   *jk;
-
-	foreach(jk, joinkeys)
-	{
-		JoinKey    *jkey = (JoinKey *) lfirst(jk);
-		Var		   *key;
-		List	   *p,
-				   *p2;
-		bool		found = false;
-
-		key = (Var *) extract_join_key(jkey, outer_or_inner);
-
-		/* check to see if it is in the target list */
-		if (matching_tlist_var(key, tlist))
-		{
-
-			/*
-			 * Include it in the pathkeys list if we haven't already done
-			 * so
-			 */
-			foreach(p, pathkeys)
-			{
-				List	   *pathkey = lfirst(p);
-
-				foreach(p2, pathkey)
-				{
-					Var		   *pkey = lfirst(p2);
-
-					if (equal(key, pkey))
-					{
-						found = true;
-						break;
-					}
-				}
-				if (found)
-					break;
-			}
-			if (!found)
-				pathkeys = lappend(pathkeys, lcons(key, NIL));
-		}
-	}
-	return pathkeys;
-}
-
-
 /****************************************************************************
 *		NEW PATHKEY FORMATION
 ****************************************************************************/

 /*
- * new_join_pathkeys
+ * build_index_pathkeys
+ *	  Build a pathkeys list that describes the ordering induced by an index
+ *	  scan using the given index.  (Note that an unordered index doesn't
+ *	  induce any ordering; such an index will have no sortop OIDS in
+ *	  its "ordering" field.)
+ *
+ * Vars in the resulting pathkeys list are taken from the rel's targetlist.
+ * If we can't find the indexkey in the targetlist, we assume that the
+ * ordering of that key is not interesting.
+ */
+List *
+build_index_pathkeys(Query *root, RelOptInfo *rel, RelOptInfo *index)
+{
+	List	   *retval = NIL;
+	int		   *indexkeys = index->indexkeys;
+	Oid		   *ordering = index->ordering;
+
+	if (!indexkeys || indexkeys[0] == 0 ||
+		!ordering || ordering[0] == InvalidOid)
+		return NIL;				/* unordered index? */
+
+	if (index->indproc)
+	{
+		/* Functional index: build a representation of the function call */
+		int			relid = lfirsti(rel->relids);
+		Oid			reloid = getrelid(relid, root->rtable);
+		Func	   *funcnode = makeNode(Func);
+		List	   *funcargs = NIL;
+
+		funcnode->funcid = index->indproc;
+		funcnode->functype = get_func_rettype(index->indproc);
+		funcnode->funcisindex = false;
+		funcnode->funcsize = 0;
+		funcnode->func_fcache = NULL;
+		funcnode->func_tlist = NIL;
+		funcnode->func_planlist = NIL;
+
+		while (*indexkeys != 0)
+		{
+			int			varattno = *indexkeys;
+			Oid			vartypeid = get_atttype(reloid, varattno);
+			int32		type_mod = get_atttypmod(reloid, varattno);
+
+			funcargs = lappend(funcargs,
+							   makeVar(relid, varattno, vartypeid, type_mod,
+									   0, relid, varattno));
+			indexkeys++;
+		}
+
+		/* Make a one-sublist pathkeys list for the function expression */
+		retval = lcons(lcons(
+			makePathKeyItem((Node *) make_funcclause(funcnode, funcargs),
+							*ordering),
+			NIL), NIL);
+	}
+	else
+	{
+		/* Normal non-functional index */
+		List	   *rel_tlist = rel->targetlist;
+
+		while (*indexkeys != 0 && *ordering != InvalidOid)
+		{
+			Var		*relvar = find_indexkey_var(*indexkeys, rel_tlist);
+
+			/* If we can find no tlist entry for the n'th sort key,
+			 * then we're done generating pathkeys; any subsequent sort keys
+			 * no longer apply, since we can't represent the ordering properly
+			 * even if there are tlist entries for them.
+			 */
+			if (!relvar)
+				break;
+			/* OK, make a one-element sublist for this sort key */
+			retval = lappend(retval,
+							 lcons(makePathKeyItem((Node *) relvar,
+												   *ordering),
+								   NIL));
+
+			indexkeys++;
+			ordering++;
+		}
+	}
+
+	return retval;
+}
+
+/*
+ * Find a var in a relation's targetlist that matches an indexkey attrnum.
+ */
+static Var *
+find_indexkey_var(int indexkey, List *tlist)
+{
+	List	   *temp;
+
+	foreach(temp, tlist)
+	{
+		Var	   *tle_var = get_expr(lfirst(temp));
+
+		if (IsA(tle_var, Var) && tle_var->varattno == indexkey)
+			return tle_var;
+	}
+	return NULL;
+}
+
+/*
+ * build_join_pathkeys
 *	  Build the path keys for a join relation constructed by mergejoin or
 *	  nestloop join.  These keys should include all the path key vars of the
 *	  outer path (since the join will retain the ordering of the outer path)
@ -362,21 +404,26 @@ make_pathkeys_from_joinkeys(List *joinkeys,
 *	  the inner var will acquire the outer's ordering no matter which join
 *	  method is actually used.
 *
- * All vars in the result are copied from the join relation's tlist, not from
- * the given pathkeys or the join clauses.  (Is that necessary?  I suspect
- * not --- tgl)
+ *	  We drop pathkeys that are not vars of the join relation's tlist,
+ *	  on the assumption that they are not interesting to higher levels.
+ *	  (Is this correct??  To support expression pathkeys we might want to
+ *	  check that all vars mentioned in the key are in the tlist, instead.)
+ *
+ * All vars in the result are taken from the join relation's tlist,
+ * not from the given pathkeys or joinclauses.
 *
 * 'outer_pathkeys' is the list of the outer path's path keys
 * 'join_rel_tlist' is the target list of the join relation
- * 'joinclauses' is the list of mergejoinable join clauses
+ * 'joinclauses' is the list of mergejoinable clauses to consider (note this
+ *		is a list of RestrictInfos, not just bare qual clauses); can be NIL
 *
 * Returns the list of new path keys.
 *
 */
 List *
-new_join_pathkeys(List *outer_pathkeys,
-				  List *join_rel_tlist,
-				  List *joinclauses)
+build_join_pathkeys(List *outer_pathkeys,
+					List *join_rel_tlist,
+					List *joinclauses)
 {
 	List	   *final_pathkeys = NIL;
 	List	   *i;
@ -386,11 +433,11 @@ new_join_pathkeys(List *outer_pathkeys,
 		List	   *outer_pathkey = lfirst(i);
 		List	   *new_pathkey;

-		new_pathkey = new_join_pathkey(outer_pathkey, join_rel_tlist,
-									   joinclauses);
+		new_pathkey = build_join_pathkey(outer_pathkey, join_rel_tlist,
+										 joinclauses);
 		/* if we can find no sortable vars for the n'th sort key,
-		 * then we're done generating pathkeys; can't expect to order
-		 * subsequent vars.  Not clear that this can really happen.
+		 * then we're done generating pathkeys; any subsequent sort keys
+		 * no longer apply, since we can't represent the ordering properly.
 		 */
 		if (new_pathkey == NIL)
 			break;
@ -400,25 +447,22 @@ new_join_pathkeys(List *outer_pathkeys,
 }

 /*
- * new_join_pathkey
+ * build_join_pathkey
 *	  Generate an individual pathkey sublist, consisting of the outer vars
 *	  already mentioned in 'pathkey' plus any inner vars that are joined to
 *	  them (and thus can now also be considered path keys, per discussion
 *	  at the top of this file).
 *
- *	  Note that each returned pathkey is the var node found in
+ *	  Note that each returned pathkey uses the var node found in
 *	  'join_rel_tlist' rather than the input pathkey or joinclause var node.
- *	  (Is this important?)	Also, we return a fully copied list
- *	  that does not share any subnodes with existing data structures.
- *	  (Is that important, either?)
- *
- * Returns a new pathkey (list of pathkey variables).
+ *	  (Is this important?)
 *
+ * Returns a new pathkey (list of PathKeyItems).
 */
 static List *
-new_join_pathkey(List *pathkey,
-				 List *join_rel_tlist,
-				 List *joinclauses)
+build_join_pathkey(List *pathkey,
+				   List *join_rel_tlist,
+				   List *joinclauses)
 {
 	List	   *new_pathkey = NIL;
 	List	   *i,
@ -426,27 +470,193 @@ new_join_pathkey(List *pathkey,

 	foreach(i, pathkey)
 	{
-		Var		   *key = (Var *) lfirst(i);
+		PathKeyItem *key = (PathKeyItem *) lfirst(i);
 		Expr	   *tlist_key;

-		Assert(key);
+		Assert(key && IsA(key, PathKeyItem));

-		tlist_key = matching_tlist_var(key, join_rel_tlist);
-		if (tlist_key && !member(tlist_key, new_pathkey))
-			new_pathkey = lcons(copyObject(tlist_key), new_pathkey);
+		tlist_key = matching_tlist_var((Var *) key->key, join_rel_tlist);
+		if (tlist_key)
+			new_pathkey = lcons(makePathKeyItem((Node *) tlist_key,
+												key->sortop),
+								new_pathkey);

 		foreach(j, joinclauses)
 		{
-			Expr	   *joinclause = lfirst(j);
-			Expr	   *tlist_other_var;
+			RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(j);
+			Expr	   *joinclause = restrictinfo->clause;
+			/* We assume the clause is a binary opclause... */
+			Var		   *l = get_leftop(joinclause);
+			Var		   *r = get_rightop(joinclause);
+			Var		   *other_var = NULL;
+			Oid			other_sortop = InvalidOid;

-			tlist_other_var = matching_tlist_var(
-								  other_join_clause_var(key, joinclause),
-												 join_rel_tlist);
-			if (tlist_other_var && !member(tlist_other_var, new_pathkey))
-				new_pathkey = lcons(copyObject(tlist_other_var), new_pathkey);
+			if (var_equal((Var *) key->key, l))
+			{
+				other_var = r;
+				other_sortop = restrictinfo->right_sortop;
+			}
+			else if (var_equal((Var *) key->key, r))
+			{
+				other_var = l;
+				other_sortop = restrictinfo->left_sortop;
+			}
+
+			if (other_var && other_sortop)
+			{
+				tlist_key = matching_tlist_var(other_var, join_rel_tlist);
+				if (tlist_key)
+					new_pathkey = lcons(makePathKeyItem((Node *) tlist_key,
+														other_sortop),
+										new_pathkey);
+			}
 		}
 	}

 	return new_pathkey;
 }
+
+/****************************************************************************
+ *		PATHKEYS AND MERGECLAUSES
+ ****************************************************************************/
+
+/*
+ * find_mergeclauses_for_pathkeys
+ *	  This routine attempts to find a set of mergeclauses that can be
+ *	  used with a specified ordering for one of the input relations.
+ *	  If successful, it returns a list of mergeclauses.
+ *
+ * 'pathkeys' is a pathkeys list showing the ordering of an input path.
+ *			It doesn't matter whether it is for the inner or outer path.
+ * 'restrictinfos' is a list of mergejoinable restriction clauses for the
+ *			join relation being formed.
+ *
+ * The result is NIL if no merge can be done, else a maximal list of
+ * usable mergeclauses (represented as a list of their restrictinfo nodes).
+ *
+ * XXX Ideally we ought to be considering context, ie what path orderings
+ * are available on the other side of the join, rather than just making
+ * an arbitrary choice among the mergeclause orders that will work for
+ * this side of the join.
+ */
+List *
+find_mergeclauses_for_pathkeys(List *pathkeys, List *restrictinfos)
+{
+	List	   *mergeclauses = NIL;
+	List	   *i;
+
+	foreach(i, pathkeys)
+	{
+		List		   *pathkey = lfirst(i);
+		RestrictInfo   *matched_restrictinfo = NULL;
+		List		   *j;
+
+		/*
+		 * We can match any of the keys in this pathkey sublist,
+		 * since they're all equivalent.  And we can match against
+		 * either left or right side of any mergejoin clause we haven't
+		 * used yet.  For the moment we use a dumb "greedy" algorithm
+		 * with no backtracking.  Is it worth being any smarter to
+		 * make a longer list of usable mergeclauses?  Probably not.
+		 */
+		foreach(j, pathkey)
+		{
+			PathKeyItem	   *keyitem = lfirst(j);
+			Var			   *keyvar = (Var *) keyitem->key;
+			List		   *k;
+
+			if (! IsA(keyvar, Var))
+				continue;		/* for now, only Vars can be mergejoined */
+
+			foreach(k, restrictinfos)
+			{
+				RestrictInfo   *restrictinfo = lfirst(k);
+
+				Assert(restrictinfo->mergejoinoperator != InvalidOid);
+
+				if ((var_equal(keyvar, get_leftop(restrictinfo->clause)) ||
+					 var_equal(keyvar, get_rightop(restrictinfo->clause))) &&
+					! member(restrictinfo, mergeclauses))
+				{
+					matched_restrictinfo = restrictinfo;
+					break;
+				}
+			}
+			if (matched_restrictinfo)
+				break;
+		}
+
+		/*
+		 * If we didn't find a mergeclause, we're done --- any additional
+		 * sort-key positions in the pathkeys are useless.  (But we can
+		 * still mergejoin if we found at least one mergeclause.)
+		 */
+		if (! matched_restrictinfo)
+			break;
+		/*
+		 * If we did find a usable mergeclause for this sort-key position,
+		 * add it to result list.
+		 */
+		mergeclauses = lappend(mergeclauses, matched_restrictinfo);
+	}
+
+	return mergeclauses;
+}
+
+/*
+ * make_pathkeys_for_mergeclauses
+ *	  Builds a pathkey list representing the explicit sort order that
+ *	  must be applied to a path in order to make it usable for the
+ *	  given mergeclauses.
+ *
+ * 'mergeclauses' is a list of RestrictInfos for mergejoin clauses
+ *			that will be used in a merge join.
+ * 'tlist' is a relation target list for either the inner or outer
+ *			side of the proposed join rel.
+ *
+ * Returns a pathkeys list that can be applied to the indicated relation.
+ *
+ * Note that it is not this routine's job to decide whether sorting is
+ * actually needed for a particular input path.  Assume a sort is necessary;
+ * just make the keys, eh?
+ */
+List *
+make_pathkeys_for_mergeclauses(List *mergeclauses, List *tlist)
+{
+	List	   *pathkeys = NIL;
+	List	   *i;
+
+	foreach(i, mergeclauses)
+	{
+		RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(i);
+		Var		   *key;
+		Oid			sortop;
+
+		/*
+		 * Find the key and sortop needed for this mergeclause.
+		 *
+		 * We can use either side of the mergeclause, since we haven't yet
+		 * committed to which side will be inner.
+		 */
+		Assert(restrictinfo->mergejoinoperator != InvalidOid);
+		key = (Var *) matching_tlist_var(get_leftop(restrictinfo->clause),
+										 tlist);
+		sortop = restrictinfo->left_sortop;
+		if (! key)
+		{
+			key = (Var *) matching_tlist_var(get_rightop(restrictinfo->clause),
+											 tlist);
+			sortop = restrictinfo->right_sortop;
+		}
+		if (! key)
+			elog(ERROR, "make_pathkeys_for_mergeclauses: can't find key");
+		/*
+		 * Add a pathkey sublist for this sort item
+		 */
+		pathkeys = lappend(pathkeys,
+						   lcons(makePathKeyItem((Node *) key, sortop),
+								 NIL));
+	}
+
+	return pathkeys;
+}