Rearrange the querytree representation of ORDER BY/GROUP BY/DISTINCT items

as per my recent proposal: 1. Fold SortClause and GroupClause into a single node type SortGroupClause. We were already relying on them to be struct-equivalent, so using two node tags wasn't accomplishing much except to get in the way of comparing items with equal(). 2. Add an "eqop" field to SortGroupClause to carry the associated equality operator. This is cheap for the parser to get at the same time it's looking up the sort operator, and storing it eliminates the need for repeated not-so-cheap lookups during planning. In future this will also let us represent GROUP/DISTINCT operations on datatypes that have hash opclasses but no btree opclasses (ie, they have equality but no natural sort order). The previous representation simply didn't work for that, since its only indicator of comparison semantics was a sort operator. 3. Add a hasDistinctOn boolean to struct Query to explicitly record whether the distinctClause came from DISTINCT or DISTINCT ON. This allows removing some complicated and not 100% bulletproof code that attempted to figure that out from the distinctClause alone. This patch doesn't in itself create any new capability, but it's necessary infrastructure for future attempts to use hash-based grouping for DISTINCT and UNION/INTERSECT/EXCEPT.
2025-07-02 09:02:37 +03:00 · 2008-08-02 21:32:01 +00:00
parent 49f001d81e
commit 9511304752
33 changed files with 764 additions and 857 deletions
--- a/src/backend/optimizer/README
+++ b/src/backend/optimizer/README
@ -1,4 +1,4 @@
-$PostgreSQL: pgsql/src/backend/optimizer/README,v 1.46 2008/04/09 01:00:46 momjian Exp $
+$PostgreSQL: pgsql/src/backend/optimizer/README,v 1.47 2008/08/02 21:31:59 tgl Exp $

 Optimizer
 =========
@ -563,8 +563,8 @@ competing Paths are equivalently sorted.

 Pathkeys are also useful to represent an ordering that we wish to achieve,
 since they are easily compared to the pathkeys of a potential candidate
-path.  So, SortClause lists are turned into pathkeys lists for use inside
-the optimizer.
+path.  So, SortGroupClause lists are turned into pathkeys lists for use
+inside the optimizer.

 Because we have to generate pathkeys lists from the sort clauses before
 we've finished EquivalenceClass merging, we cannot use the pointer-equality
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.171 2008/06/27 03:56:55 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.172 2008/08/02 21:31:59 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -963,11 +963,12 @@ compare_tlist_datatypes(List *tlist, List *colTypes,
 *
 * 4. If the subquery uses DISTINCT ON, we must not push down any quals that
 * refer to non-DISTINCT output columns, because that could change the set
- * of rows returned.  This condition is vacuous for DISTINCT, because then
- * there are no non-DISTINCT output columns, but unfortunately it's fairly
- * expensive to tell the difference between DISTINCT and DISTINCT ON in the
- * parsetree representation.  It's cheaper to just make sure all the Vars
- * in the qual refer to DISTINCT columns.
+ * of rows returned.  (This condition is vacuous for DISTINCT, because then
+ * there are no non-DISTINCT output columns, so we needn't check.  But note
+ * we are assuming that the qual can't distinguish values that the DISTINCT
+ * operator sees as equal.  This is a bit shaky but we have no way to test
+ * for the case, and it's unlikely enough that we shouldn't refuse the
+ * optimization just because it could theoretically happen.)
 *
 * 5. We must not push down any quals that refer to subselect outputs that
 * return sets, else we'd introduce functions-returning-sets into the
@ -1030,8 +1031,8 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
 		Assert(tle != NULL);
 		Assert(!tle->resjunk);

-		/* If subquery uses DISTINCT or DISTINCT ON, check point 4 */
-		if (subquery->distinctClause != NIL &&
+		/* If subquery uses DISTINCT ON, check point 4 */
+		if (subquery->hasDistinctOn &&
 			!targetIsInSortList(tle, InvalidOid, subquery->distinctClause))
 		{
 			/* non-DISTINCT column, so fail */
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@ -10,7 +10,7 @@
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/path/equivclass.c,v 1.10 2008/03/31 16:59:26 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/path/equivclass.c,v 1.11 2008/08/02 21:31:59 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -355,7 +355,7 @@ add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids,
 *	  class it is a member of; if none, build a new single-member
 *	  EquivalenceClass for it.
 *
- * sortref is the SortGroupRef of the originating SortClause, if any,
+ * sortref is the SortGroupRef of the originating SortGroupClause, if any,
 * or zero if not.
 *
 * This can be used safely both before and after EquivalenceClass merging;
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@ -11,7 +11,7 @@
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/path/pathkeys.c,v 1.93 2008/01/09 20:42:28 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/path/pathkeys.c,v 1.94 2008/08/02 21:31:59 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -227,8 +227,8 @@ canonicalize_pathkeys(PlannerInfo *root, List *pathkeys)
 *	  a PathKey.  If canonicalize = true, the result is a "canonical"
 *	  PathKey, otherwise not.  (But note it might be redundant anyway.)
 *
- * If the PathKey is being generated from a SortClause, sortref should be
- * the SortClause's SortGroupRef; otherwise zero.
+ * If the PathKey is being generated from a SortGroupClause, sortref should be
+ * the SortGroupClause's SortGroupRef; otherwise zero.
 *
 * canonicalize should always be TRUE after EquivalenceClass merging has
 * been performed, but FALSE if we haven't done EquivalenceClass merging yet.
@ -823,7 +823,7 @@ build_join_pathkeys(PlannerInfo *root,
 /*
 * make_pathkeys_for_sortclauses
 *		Generate a pathkeys list that represents the sort order specified
- *		by a list of SortClauses (GroupClauses will work too!)
+ *		by a list of SortGroupClauses
 *
 * If canonicalize is TRUE, the resulting PathKeys are all in canonical form;
 * otherwise not.  canonicalize should always be TRUE after EquivalenceClass
@ -832,7 +832,7 @@ build_join_pathkeys(PlannerInfo *root,
 * be able to represent requested pathkeys before the equivalence classes have
 * been created for the query.)
 *
- * 'sortclauses' is a list of SortClause or GroupClause nodes
+ * 'sortclauses' is a list of SortGroupClause nodes
 * 'tlist' is the targetlist to find the referenced tlist entries in
 */
 List *
@ -846,11 +846,12 @@ make_pathkeys_for_sortclauses(PlannerInfo *root,

 	foreach(l, sortclauses)
 	{
-		SortClause *sortcl = (SortClause *) lfirst(l);
+		SortGroupClause *sortcl = (SortGroupClause *) lfirst(l);
 		Expr	   *sortkey;
 		PathKey    *pathkey;

 		sortkey = (Expr *) get_sortgroupclause_expr(sortcl, tlist);
+		Assert(OidIsValid(sortcl->sortop));
 		pathkey = make_pathkey_from_sortinfo(root,
 											 sortkey,
 											 sortcl->sortop,
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@ -10,7 +10,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.241 2008/06/27 03:56:55 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.242 2008/08/02 21:32:00 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -760,7 +760,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path)
 			Oid			in_oper = lfirst_oid(l);
 			Oid			sortop;
 			TargetEntry *tle;
-			SortClause *sortcl;
+			SortGroupClause *sortcl;

 			sortop = get_ordering_op_for_equality_op(in_oper, false);
 			if (!OidIsValid(sortop))	/* shouldn't happen */
@ -769,9 +769,10 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path)
 			tle = get_tle_by_resno(subplan->targetlist,
 								   groupColIdx[groupColPos]);
 			Assert(tle != NULL);
-			sortcl = makeNode(SortClause);
+			sortcl = makeNode(SortGroupClause);
 			sortcl->tleSortGroupRef = assignSortGroupRef(tle,
 														 subplan->targetlist);
+			sortcl->eqop = in_oper;
 			sortcl->sortop = sortop;
 			sortcl->nulls_first = false;
 			sortList = lappend(sortList, sortcl);
@ -2531,6 +2532,8 @@ add_sort_column(AttrNumber colIdx, Oid sortOp, bool nulls_first,
 {
 	int			i;

+	Assert(OidIsValid(sortOp));
+
 	for (i = 0; i < numCols; i++)
 	{
 		/*
@ -2753,7 +2756,7 @@ make_sort_from_pathkeys(PlannerInfo *root, Plan *lefttree, List *pathkeys,
 * make_sort_from_sortclauses
 *	  Create sort plan to sort according to given sortclauses
 *
- *	  'sortcls' is a list of SortClauses
+ *	  'sortcls' is a list of SortGroupClauses
 *	  'lefttree' is the node which yields input tuples
 */
 Sort *
@ -2778,7 +2781,7 @@ make_sort_from_sortclauses(PlannerInfo *root, List *sortcls, Plan *lefttree)

 	foreach(l, sortcls)
 	{
-		SortClause *sortcl = (SortClause *) lfirst(l);
+		SortGroupClause *sortcl = (SortGroupClause *) lfirst(l);
 		TargetEntry *tle = get_sortgroupclause_tle(sortcl, sub_tlist);

 		/*
@ -2802,14 +2805,14 @@ make_sort_from_sortclauses(PlannerInfo *root, List *sortcls, Plan *lefttree)
 * make_sort_from_groupcols
 *	  Create sort plan to sort based on grouping columns
 *
- * 'groupcls' is the list of GroupClauses
+ * 'groupcls' is the list of SortGroupClauses
 * 'grpColIdx' gives the column numbers to use
 *
 * This might look like it could be merged with make_sort_from_sortclauses,
 * but presently we *must* use the grpColIdx[] array to locate sort columns,
 * because the child plan's tlist is not marked with ressortgroupref info
 * appropriate to the grouping node.  So, only the sort ordering info
- * is used from the GroupClause entries.
+ * is used from the SortGroupClause entries.
 */
 Sort *
 make_sort_from_groupcols(PlannerInfo *root,
@ -2837,7 +2840,7 @@ make_sort_from_groupcols(PlannerInfo *root,

 	foreach(l, groupcls)
 	{
-		GroupClause *grpcl = (GroupClause *) lfirst(l);
+		SortGroupClause *grpcl = (SortGroupClause *) lfirst(l);
 		TargetEntry *tle = get_tle_by_resno(sub_tlist, grpColIdx[grpno]);

 		/*
@ -3038,7 +3041,7 @@ make_group(PlannerInfo *root,
 }

 /*
- * distinctList is a list of SortClauses, identifying the targetlist items
+ * distinctList is a list of SortGroupClauses, identifying the targetlist items
 * that should be considered by the Unique filter.	The input path must
 * already be sorted accordingly.
 */
@ -3074,7 +3077,7 @@ make_unique(Plan *lefttree, List *distinctList)
 	plan->righttree = NULL;

 	/*
-	 * convert SortClause list into arrays of attr indexes and equality
+	 * convert SortGroupClause list into arrays of attr indexes and equality
 	 * operators, as wanted by executor
 	 */
 	Assert(numCols > 0);
@ -3083,14 +3086,12 @@ make_unique(Plan *lefttree, List *distinctList)

 	foreach(slitem, distinctList)
 	{
-		SortClause *sortcl = (SortClause *) lfirst(slitem);
+		SortGroupClause *sortcl = (SortGroupClause *) lfirst(slitem);
 		TargetEntry *tle = get_sortgroupclause_tle(sortcl, plan->targetlist);

 		uniqColIdx[keyno] = tle->resno;
-		uniqOperators[keyno] = get_equality_op_for_ordering_op(sortcl->sortop);
-		if (!OidIsValid(uniqOperators[keyno]))	/* shouldn't happen */
-			elog(ERROR, "could not find equality operator for ordering operator %u",
-				 sortcl->sortop);
+		uniqOperators[keyno] = sortcl->eqop;
+		Assert(OidIsValid(uniqOperators[keyno]));
 		keyno++;
 	}

@ -3102,8 +3103,8 @@ make_unique(Plan *lefttree, List *distinctList)
 }

 /*
- * distinctList is a list of SortClauses, identifying the targetlist items
- * that should be considered by the SetOp filter.  The input path must
+ * distinctList is a list of SortGroupClauses, identifying the targetlist
+ * items that should be considered by the SetOp filter.  The input path must
 * already be sorted accordingly.
 */
 SetOp *
@ -3140,7 +3141,7 @@ make_setop(SetOpCmd cmd, Plan *lefttree,
 	plan->righttree = NULL;

 	/*
-	 * convert SortClause list into arrays of attr indexes and equality
+	 * convert SortGroupClause list into arrays of attr indexes and equality
 	 * operators, as wanted by executor
 	 */
 	Assert(numCols > 0);
@ -3149,14 +3150,12 @@ make_setop(SetOpCmd cmd, Plan *lefttree,

 	foreach(slitem, distinctList)
 	{
-		SortClause *sortcl = (SortClause *) lfirst(slitem);
+		SortGroupClause *sortcl = (SortGroupClause *) lfirst(slitem);
 		TargetEntry *tle = get_sortgroupclause_tle(sortcl, plan->targetlist);

 		dupColIdx[keyno] = tle->resno;
-		dupOperators[keyno] = get_equality_op_for_ordering_op(sortcl->sortop);
-		if (!OidIsValid(dupOperators[keyno]))	/* shouldn't happen */
-			elog(ERROR, "could not find equality operator for ordering operator %u",
-				 sortcl->sortop);
+		dupOperators[keyno] = sortcl->eqop;
+		Assert(OidIsValid(dupOperators[keyno]));
 		keyno++;
 	}

--- a/src/backend/optimizer/plan/planagg.c
+++ b/src/backend/optimizer/plan/planagg.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/planagg.c,v 1.41 2008/07/10 02:14:03 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/planagg.c,v 1.42 2008/08/02 21:32:00 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -477,7 +477,7 @@ make_agg_subplan(PlannerInfo *root, MinMaxAggInfo *info)
 	Plan	   *plan;
 	Plan	   *iplan;
 	TargetEntry *tle;
-	SortClause *sortcl;
+	SortGroupClause *sortcl;

 	/*
 	 * Generate a suitably modified query.	Much of the work here is probably
@ -492,6 +492,7 @@ make_agg_subplan(PlannerInfo *root, MinMaxAggInfo *info)
 	subparse->utilityStmt = NULL;
 	subparse->intoClause = NULL;
 	subparse->hasAggs = false;
+	subparse->hasDistinctOn = false;
 	subparse->groupClause = NIL;
 	subparse->havingQual = NULL;
 	subparse->distinctClause = NIL;
@ -505,8 +506,12 @@ make_agg_subplan(PlannerInfo *root, MinMaxAggInfo *info)
 	subparse->targetList = list_make1(tle);

 	/* set up the appropriate ORDER BY entry */
-	sortcl = makeNode(SortClause);
+	sortcl = makeNode(SortGroupClause);
 	sortcl->tleSortGroupRef = assignSortGroupRef(tle, subparse->targetList);
+	sortcl->eqop = get_equality_op_for_ordering_op(info->aggsortop, NULL);
+	if (!OidIsValid(sortcl->eqop))		/* shouldn't happen */
+		elog(ERROR, "could not find equality operator for ordering operator %u",
+			 info->aggsortop);
 	sortcl->sortop = info->aggsortop;
 	sortcl->nulls_first = info->nulls_first;
 	subparse->sortClause = list_make1(sortcl);
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.235 2008/07/31 22:47:56 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.236 2008/08/02 21:32:00 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -67,6 +67,7 @@ static bool is_dummy_plan(Plan *plan);
 static double preprocess_limit(PlannerInfo *root,
 				 double tuple_fraction,
 				 int64 *offset_est, int64 *count_est);
+static void preprocess_groupclause(PlannerInfo *root);
 static Oid *extract_grouping_ops(List *groupClause);
 static bool choose_hashed_grouping(PlannerInfo *root,
 					   double tuple_fraction, double limit_tuples,
@ -846,11 +847,16 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 		Path	   *best_path;
 		long		numGroups = 0;
 		AggClauseCounts agg_counts;
-		int			numGroupCols = list_length(parse->groupClause);
+		int			numGroupCols;
 		bool		use_hashed_grouping = false;

 		MemSet(&agg_counts, 0, sizeof(AggClauseCounts));

+		/* Preprocess GROUP BY clause, if any */
+		if (parse->groupClause)
+			preprocess_groupclause(root);
+		numGroupCols = list_length(parse->groupClause);
+
 		/* Preprocess targetlist */
 		tlist = preprocess_targetlist(root, tlist);

@ -1476,6 +1482,88 @@ preprocess_limit(PlannerInfo *root, double tuple_fraction,
 	return tuple_fraction;
 }

+
+/*
+ * preprocess_groupclause - do preparatory work on GROUP BY clause
+ *
+ * The idea here is to adjust the ordering of the GROUP BY elements
+ * (which in itself is semantically insignificant) to match ORDER BY,
+ * thereby allowing a single sort operation to both implement the ORDER BY
+ * requirement and set up for a Unique step that implements GROUP BY.
+ *
+ * In principle it might be interesting to consider other orderings of the
+ * GROUP BY elements, which could match the sort ordering of other
+ * possible plans (eg an indexscan) and thereby reduce cost.  We don't
+ * bother with that, though.  Hashed grouping will frequently win anyway.
+ */
+static void
+preprocess_groupclause(PlannerInfo *root)
+{
+	Query	   *parse = root->parse;
+	List	   *new_groupclause;
+	bool		partial_match;
+	ListCell   *sl;
+	ListCell   *gl;
+
+	/* If no ORDER BY, nothing useful to do here anyway */
+	if (parse->sortClause == NIL)
+		return;
+
+	/*
+	 * Scan the ORDER BY clause and construct a list of matching GROUP BY
+	 * items, but only as far as we can make a matching prefix.
+	 *
+	 * This code assumes that the sortClause contains no duplicate items.
+	 */
+	new_groupclause = NIL;
+	foreach(sl, parse->sortClause)
+	{
+		SortGroupClause *sc = (SortGroupClause *) lfirst(sl);
+
+		foreach(gl, parse->groupClause)
+		{
+			SortGroupClause *gc = (SortGroupClause *) lfirst(gl);
+
+			if (equal(gc, sc))
+			{
+				new_groupclause = lappend(new_groupclause, gc);
+				break;
+			}
+		}
+		if (gl == NULL)
+			break;				/* no match, so stop scanning */
+	}
+
+	/* Did we match all of the ORDER BY list, or just some of it? */
+	partial_match = (sl != NULL);
+
+	/* If no match at all, no point in reordering GROUP BY */
+	if (new_groupclause == NIL)
+		return;
+
+	/*
+	 * Add any remaining GROUP BY items to the new list, but only if we
+	 * were able to make a complete match.  In other words, we only
+	 * rearrange the GROUP BY list if the result is that one list is a
+	 * prefix of the other --- otherwise there's no possibility of a
+	 * common sort.
+	 */
+	foreach(gl, parse->groupClause)
+	{
+		SortGroupClause *gc = (SortGroupClause *) lfirst(gl);
+
+		if (list_member_ptr(new_groupclause, gc))
+			continue;			/* it matched an ORDER BY item */
+		if (partial_match)
+			return;				/* give up, no common sort possible */
+		new_groupclause = lappend(new_groupclause, gc);
+	}
+
+	/* Success --- install the rearranged GROUP BY list */
+	Assert(list_length(parse->groupClause) == list_length(new_groupclause));
+	parse->groupClause = new_groupclause;
+}
+
 /*
 * extract_grouping_ops - make an array of the equality operator OIDs
 *		for the GROUP BY clause
@ -1492,12 +1580,10 @@ extract_grouping_ops(List *groupClause)

 	foreach(glitem, groupClause)
 	{
-		GroupClause *groupcl = (GroupClause *) lfirst(glitem);
+		SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);

-		groupOperators[colno] = get_equality_op_for_ordering_op(groupcl->sortop);
-		if (!OidIsValid(groupOperators[colno])) /* shouldn't happen */
-			elog(ERROR, "could not find equality operator for ordering operator %u",
-				 groupcl->sortop);
+		groupOperators[colno] = groupcl->eqop;
+		Assert(OidIsValid(groupOperators[colno]));
 		colno++;
 	}

@ -1738,7 +1824,7 @@ make_subplanTargetList(PlannerInfo *root,

 		foreach(gl, parse->groupClause)
 		{
-			GroupClause *grpcl = (GroupClause *) lfirst(gl);
+			SortGroupClause *grpcl = (SortGroupClause *) lfirst(gl);
 			Node	   *groupexpr = get_sortgroupclause_expr(grpcl, tlist);
 			TargetEntry *te = NULL;
 			ListCell   *sl;
@ -1797,7 +1883,7 @@ locate_grouping_columns(PlannerInfo *root,

 	foreach(gl, root->parse->groupClause)
 	{
-		GroupClause *grpcl = (GroupClause *) lfirst(gl);
+		SortGroupClause *grpcl = (SortGroupClause *) lfirst(gl);
 		Node	   *groupexpr = get_sortgroupclause_expr(grpcl, tlist);
 		TargetEntry *te = NULL;
 		ListCell   *sl;
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@ -22,7 +22,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.148 2008/07/31 22:47:56 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.149 2008/08/02 21:32:00 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -98,7 +98,7 @@ static List *adjust_inherited_tlist(List *tlist,
 * zero means "all the tuples will be fetched".  Any LIMIT present at the
 * top level has already been factored into tuple_fraction.
 *
- * *sortClauses is an output argument: it is set to a list of SortClauses
+ * *sortClauses is an output argument: it is set to a list of SortGroupClauses
 * representing the result ordering of the topmost set operation.
 */
 Plan *
@ -152,7 +152,7 @@ plan_set_operations(PlannerInfo *root, double tuple_fraction,
 * junkOK: if true, child resjunk columns may be left in the result
 * flag: if >= 0, add a resjunk output column indicating value of flag
 * refnames_tlist: targetlist to take column names from
- * *sortClauses: receives list of SortClauses for result plan, if any
+ * *sortClauses: receives list of SortGroupClauses for result plan, if any
 *
 * We don't have to care about typmods here: the only allowed difference
 * between set-op input and output typmods is input is a specific typmod
@ -678,8 +678,11 @@ generate_append_tlist(List *colTypes, bool flag,

 /*
 * generate_setop_sortlist
- *		Build a SortClause list enumerating all the non-resjunk tlist entries,
- *		using default ordering properties.
+ *		Build a SortGroupClause list enumerating all the non-resjunk
+ *		tlist entries, using default ordering properties.
+ *
+ * For now, we require all the items to be sortable.  Eventually we
+ * should implement hashing setops and allow hash-only datatypes.
 */
 static List *
 generate_setop_sortlist(List *targetlist)
@ -692,11 +695,10 @@ generate_setop_sortlist(List *targetlist)
 		TargetEntry *tle = (TargetEntry *) lfirst(l);

 		if (!tle->resjunk)
-			sortlist = addTargetToSortList(NULL, tle,
-										   sortlist, targetlist,
-										   SORTBY_DEFAULT,
-										   SORTBY_NULLS_DEFAULT,
-										   NIL, false);
+			sortlist = addTargetToGroupList(NULL, tle,
+											sortlist, targetlist,
+											true, /* XXX fixme someday */
+											false);
 	}
 	return sortlist;
 }
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.259 2008/05/15 17:37:49 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.260 2008/08/02 21:32:00 tgl Exp $
 *
 * HISTORY
 *	  AUTHOR			DATE			MAJOR EVENT
@ -1334,85 +1334,6 @@ is_pseudo_constant_clause_relids(Node *clause, Relids relids)
 }


-/*****************************************************************************
- *		Tests on clauses of queries
- *
- * Possibly this code should go someplace else, since this isn't quite the
- * same meaning of "clause" as is used elsewhere in this module.  But I can't
- * think of a better place for it...
- *****************************************************************************/
-
-/*
- * Test whether a query uses DISTINCT ON, ie, has a distinct-list that is
- * not the same as the set of output columns.
- */
-bool
-has_distinct_on_clause(Query *query)
-{
-	ListCell   *l;
-
-	/* Is there a DISTINCT clause at all? */
-	if (query->distinctClause == NIL)
-		return false;
-
-	/*
-	 * If the DISTINCT list contains all the nonjunk targetlist items, and
-	 * nothing else (ie, no junk tlist items), then it's a simple DISTINCT,
-	 * else it's DISTINCT ON.  We do not require the lists to be in the same
-	 * order (since the parser may have adjusted the DISTINCT clause ordering
-	 * to agree with ORDER BY).  Furthermore, a non-DISTINCT junk tlist item
-	 * that is in the sortClause is also evidence of DISTINCT ON, since we
-	 * don't allow ORDER BY on junk tlist items when plain DISTINCT is used.
-	 *
-	 * This code assumes that the DISTINCT list is valid, ie, all its entries
-	 * match some entry of the tlist.
-	 */
-	foreach(l, query->targetList)
-	{
-		TargetEntry *tle = (TargetEntry *) lfirst(l);
-
-		if (tle->ressortgroupref == 0)
-		{
-			if (tle->resjunk)
-				continue;		/* we can ignore unsorted junk cols */
-			return true;		/* definitely not in DISTINCT list */
-		}
-		if (targetIsInSortList(tle, InvalidOid, query->distinctClause))
-		{
-			if (tle->resjunk)
-				return true;	/* junk TLE in DISTINCT means DISTINCT ON */
-			/* else this TLE is okay, keep looking */
-		}
-		else
-		{
-			/* This TLE is not in DISTINCT list */
-			if (!tle->resjunk)
-				return true;	/* non-junk, non-DISTINCT, so DISTINCT ON */
-			if (targetIsInSortList(tle, InvalidOid, query->sortClause))
-				return true;	/* sorted, non-distinct junk */
-			/* unsorted junk is okay, keep looking */
-		}
-	}
-	/* It's a simple DISTINCT */
-	return false;
-}
-
-/*
- * Test whether a query uses simple DISTINCT, ie, has a distinct-list that
- * is the same as the set of output columns.
- */
-bool
-has_distinct_clause(Query *query)
-{
-	/* Is there a DISTINCT clause at all? */
-	if (query->distinctClause == NIL)
-		return false;
-
-	/* It's DISTINCT if it's not DISTINCT ON */
-	return !has_distinct_on_clause(query);
-}
-
-
 /*****************************************************************************
 *																			 *
 *		General clause-manipulating routines								 *
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.143 2008/04/21 20:54:15 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.144 2008/08/02 21:32:00 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -935,8 +935,10 @@ translate_sub_tlist(List *tlist, int relid)
 * corresponding upper-level equality operators listed in opids would think
 * the values are distinct.  (Note: the opids entries could be cross-type
 * operators, and thus not exactly the equality operators that the subquery
- * would use itself.  We assume that the subquery is compatible if these
- * operators appear in the same btree opfamily as the ones the subquery uses.)
+ * would use itself.  We use equality_ops_are_compatible() to check
+ * compatibility.  That looks at btree or hash opfamily membership, and so
+ * should give trustworthy answers for all operators that we might need
+ * to deal with here.)
 */
 static bool
 query_is_distinct_for(Query *query, List *colnos, List *opids)
@ -955,13 +957,13 @@ query_is_distinct_for(Query *query, List *colnos, List *opids)
 	{
 		foreach(l, query->distinctClause)
 		{
-			SortClause *scl = (SortClause *) lfirst(l);
-			TargetEntry *tle = get_sortgroupclause_tle(scl,
+			SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
+			TargetEntry *tle = get_sortgroupclause_tle(sgc,
 													   query->targetList);

 			opid = distinct_col_search(tle->resno, colnos, opids);
 			if (!OidIsValid(opid) ||
-				!ops_in_same_btree_opfamily(opid, scl->sortop))
+				!equality_ops_are_compatible(opid, sgc->eqop))
 				break;			/* exit early if no match */
 		}
 		if (l == NULL)			/* had matches for all? */
@ -976,13 +978,13 @@ query_is_distinct_for(Query *query, List *colnos, List *opids)
 	{
 		foreach(l, query->groupClause)
 		{
-			GroupClause *grpcl = (GroupClause *) lfirst(l);
-			TargetEntry *tle = get_sortgroupclause_tle(grpcl,
+			SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
+			TargetEntry *tle = get_sortgroupclause_tle(sgc,
 													   query->targetList);

 			opid = distinct_col_search(tle->resno, colnos, opids);
 			if (!OidIsValid(opid) ||
-				!ops_in_same_btree_opfamily(opid, grpcl->sortop))
+				!equality_ops_are_compatible(opid, sgc->eqop))
 				break;			/* exit early if no match */
 		}
 		if (l == NULL)			/* had matches for all? */
@ -1002,10 +1004,11 @@ query_is_distinct_for(Query *query, List *colnos, List *opids)
 	 * UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row,
 	 * except with ALL.
 	 *
-	 * XXX this code knows that prepunion.c will adopt the default ordering
-	 * operator for each column datatype as the sortop.  It'd probably be
-	 * better if these operators were chosen at parse time and stored into the
-	 * parsetree, instead of leaving bits of the planner to decide semantics.
+	 * XXX this code knows that prepunion.c will adopt the default sort/group
+	 * operators for each column datatype to determine uniqueness.  It'd
+	 * probably be better if these operators were chosen at parse time and
+	 * stored into the parsetree, instead of leaving bits of the planner to
+	 * decide semantics.
 	 */
 	if (query->setOperations)
 	{
@ -1020,14 +1023,20 @@ query_is_distinct_for(Query *query, List *colnos, List *opids)
 			foreach(l, query->targetList)
 			{
 				TargetEntry *tle = (TargetEntry *) lfirst(l);
+				Oid		tle_eq_opr;

 				if (tle->resjunk)
 					continue;	/* ignore resjunk columns */

 				opid = distinct_col_search(tle->resno, colnos, opids);
-				if (!OidIsValid(opid) ||
-					!ops_in_same_btree_opfamily(opid,
-						   ordering_oper_opid(exprType((Node *) tle->expr))))
+				if (!OidIsValid(opid))
+					break;		/* exit early if no match */
+				/* check for compatible semantics */
+				get_sort_group_operators(exprType((Node *) tle->expr),
+										 false, false, false,
+										 NULL, &tle_eq_opr, NULL);
+				if (!OidIsValid(tle_eq_opr) ||
+					!equality_ops_are_compatible(opid, tle_eq_opr))
 					break;		/* exit early if no match */
 			}
 			if (l == NULL)		/* had matches for all? */
--- a/src/backend/optimizer/util/tlist.c
+++ b/src/backend/optimizer/util/tlist.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/util/tlist.c,v 1.78 2008/01/01 19:45:50 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/util/tlist.c,v 1.79 2008/08/02 21:32:00 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -156,49 +156,43 @@ get_sortgroupref_tle(Index sortref, List *targetList)

 /*
 * get_sortgroupclause_tle
- *		Find the targetlist entry matching the given SortClause
- *		(or GroupClause) by ressortgroupref, and return it.
- *
- * Because GroupClause is typedef'd as SortClause, either kind of
- * node can be passed without casting.
+ *		Find the targetlist entry matching the given SortGroupClause
+ *		by ressortgroupref, and return it.
 */
 TargetEntry *
-get_sortgroupclause_tle(SortClause *sortClause,
+get_sortgroupclause_tle(SortGroupClause *sgClause,
 						List *targetList)
 {
-	return get_sortgroupref_tle(sortClause->tleSortGroupRef, targetList);
+	return get_sortgroupref_tle(sgClause->tleSortGroupRef, targetList);
 }

 /*
 * get_sortgroupclause_expr
- *		Find the targetlist entry matching the given SortClause
- *		(or GroupClause) by ressortgroupref, and return its expression.
- *
- * Because GroupClause is typedef'd as SortClause, either kind of
- * node can be passed without casting.
+ *		Find the targetlist entry matching the given SortGroupClause
+ *		by ressortgroupref, and return its expression.
 */
 Node *
-get_sortgroupclause_expr(SortClause *sortClause, List *targetList)
+get_sortgroupclause_expr(SortGroupClause *sgClause, List *targetList)
 {
-	TargetEntry *tle = get_sortgroupclause_tle(sortClause, targetList);
+	TargetEntry *tle = get_sortgroupclause_tle(sgClause, targetList);

 	return (Node *) tle->expr;
 }

 /*
 * get_sortgrouplist_exprs
- *		Given a list of SortClauses (or GroupClauses), build a list
+ *		Given a list of SortGroupClauses, build a list
 *		of the referenced targetlist expressions.
 */
 List *
-get_sortgrouplist_exprs(List *sortClauses, List *targetList)
+get_sortgrouplist_exprs(List *sgClauses, List *targetList)
 {
 	List	   *result = NIL;
 	ListCell   *l;

-	foreach(l, sortClauses)
+	foreach(l, sgClauses)
 	{
-		SortClause *sortcl = (SortClause *) lfirst(l);
+		SortGroupClause *sortcl = (SortGroupClause *) lfirst(l);
 		Node	   *sortexpr;

 		sortexpr = get_sortgroupclause_expr(sortcl, targetList);