Allow planner to use Merge Append to efficiently implement UNION

Until now, UNION queries have often been suboptimal as the planner has only ever considered using an Append node and making the results unique by either using a Hash Aggregate, or by Sorting the entire Append result and running it through the Unique operator. Both of these methods always require reading all rows from the union subqueries. Here we adjust the union planner so that it can request that each subquery produce results in target list order so that these can be Merge Appended together and made unique with a Unique node. This can improve performance significantly as the union child can make use of the likes of btree indexes and/or Merge Joins to provide the top-level UNION with presorted input. This is especially good if the top-level UNION contains a LIMIT node that limits the output rows to a small subset of the unioned rows as cheap startup plans can be used. Author: David Rowley Reviewed-by: Richard Guo, Andy Fan Discussion: https://postgr.es/m/CAApHDvpb_63XQodmxKUF8vb9M7CxyUyT4sWvEgqeQU-GB7QFoQ@mail.gmail.com
2025-12-21 05:21:08 +03:00 · 2024-03-25 14:31:14 +13:00
parent 47f99a407d
commit 66c0185a3d
15 changed files with 707 additions and 253 deletions
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -2867,6 +2867,67 @@ add_child_join_rel_equivalences(PlannerInfo *root,
 	MemoryContextSwitchTo(oldcontext);
 }

+/*
+ * add_setop_child_rel_equivalences
+ *		Add equivalence members for each non-resjunk target in 'child_tlist'
+ *		to the EquivalenceClass in the corresponding setop_pathkey's pk_class.
+ *
+ * 'root' is the PlannerInfo belonging to the top-level set operation.
+ * 'child_rel' is the RelOptInfo of the child relation we're adding
+ * EquivalenceMembers for.
+ * 'child_tlist' is the target list for the setop child relation.  The target
+ * list expressions are what we add as EquivalenceMembers.
+ * 'setop_pathkeys' is a list of PathKeys which must contain an entry for each
+ * non-resjunk target in 'child_tlist'.
+ */
+void
+add_setop_child_rel_equivalences(PlannerInfo *root, RelOptInfo *child_rel,
+								 List *child_tlist, List *setop_pathkeys)
+{
+	ListCell   *lc;
+	ListCell   *lc2 = list_head(setop_pathkeys);
+
+	foreach(lc, child_tlist)
+	{
+		TargetEntry *tle = lfirst_node(TargetEntry, lc);
+		EquivalenceMember *parent_em;
+		PathKey    *pk;
+
+		if (tle->resjunk)
+			continue;
+
+		if (lc2 == NULL)
+			elog(ERROR, "too few pathkeys for set operation");
+
+		pk = lfirst_node(PathKey, lc2);
+		parent_em = linitial(pk->pk_eclass->ec_members);
+
+		/*
+		 * We can safely pass the parent member as the first member in the
+		 * ec_members list as this is added first in generate_union_paths,
+		 * likewise, the JoinDomain can be that of the initial member of the
+		 * Pathkey's EquivalenceClass.
+		 */
+		add_eq_member(pk->pk_eclass,
+					  tle->expr,
+					  child_rel->relids,
+					  parent_em->em_jdomain,
+					  parent_em,
+					  exprType((Node *) tle->expr));
+
+		lc2 = lnext(setop_pathkeys, lc2);
+	}
+
+	/*
+	 * transformSetOperationStmt() ensures that the targetlist never contains
+	 * any resjunk columns, so all eclasses that exist in 'root' must have
+	 * received a new member in the loop above.  Add them to the child_rel's
+	 * eclass_indexes.
+	 */
+	child_rel->eclass_indexes = bms_add_range(child_rel->eclass_indexes, 0,
+											  list_length(root->eq_classes) - 1);
+}
+

 /*
 * generate_implied_equalities_for_column
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -2191,6 +2191,22 @@ pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys)
 	return n;
 }

+/*
+ * pathkeys_useful_for_setop
+ *		Count the number of leading common pathkeys root's 'setop_pathkeys' in
+ *		'pathkeys'.
+ */
+static int
+pathkeys_useful_for_setop(PlannerInfo *root, List *pathkeys)
+{
+	int			n_common_pathkeys;
+
+	(void) pathkeys_count_contained_in(root->setop_pathkeys, pathkeys,
+									   &n_common_pathkeys);
+
+	return n_common_pathkeys;
+}
+
 /*
 * truncate_useless_pathkeys
 *		Shorten the given pathkey list to just the useful pathkeys.
@@ -2208,6 +2224,9 @@ truncate_useless_pathkeys(PlannerInfo *root,
 	if (nuseful2 > nuseful)
 		nuseful = nuseful2;
 	nuseful2 = pathkeys_useful_for_grouping(root, pathkeys);
+	if (nuseful2 > nuseful)
+		nuseful = nuseful2;
+	nuseful2 = pathkeys_useful_for_setop(root, pathkeys);
 	if (nuseful2 > nuseful)
 		nuseful = nuseful2;

--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -54,6 +54,7 @@
 #include "optimizer/tlist.h"
 #include "parser/analyze.h"
 #include "parser/parse_agg.h"
+#include "parser/parse_clause.h"
 #include "parser/parse_relation.h"
 #include "parser/parsetree.h"
 #include "partitioning/partdesc.h"
@@ -119,6 +120,8 @@ typedef struct
 {
 	List	   *activeWindows;	/* active windows, if any */
 	grouping_sets_data *gset_data;	/* grouping sets data, if any */
+	SetOperationStmt *setop;	/* parent set operation or NULL if not a
+								 * subquery belonging to a set operation */
 } standard_qp_extra;

 /* Local functions */
@@ -249,6 +252,8 @@ static bool group_by_has_partkey(RelOptInfo *input_rel,
 								 List *targetList,
 								 List *groupClause);
 static int	common_prefix_cmp(const void *a, const void *b);
+static List *generate_setop_child_grouplist(SetOperationStmt *op,
+											List *targetlist);


 /*****************************************************************************
@@ -1500,6 +1505,18 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 		qp_extra.activeWindows = activeWindows;
 		qp_extra.gset_data = gset_data;

+		/*
+		 * Check if we're a subquery for a set operation.  If we are, store
+		 * the SetOperationStmt in qp_extra.
+		 */
+		if (root->parent_root != NULL &&
+			root->parent_root->parse->setOperations != NULL &&
+			IsA(root->parent_root->parse->setOperations, SetOperationStmt))
+			qp_extra.setop =
+				(SetOperationStmt *) root->parent_root->parse->setOperations;
+		else
+			qp_extra.setop = NULL;
+
 		/*
 		 * Generate the best unsorted and presorted paths for the scan/join
 		 * portion of this Query, ie the processing represented by the
@@ -3433,6 +3450,27 @@ standard_qp_callback(PlannerInfo *root, void *extra)
 									  parse->sortClause,
 									  tlist);

+	/* setting setop_pathkeys might be useful to the union planner */
+	if (qp_extra->setop != NULL &&
+		set_operation_ordered_results_useful(qp_extra->setop))
+	{
+		List	   *groupClauses;
+		bool		sortable;
+
+		groupClauses = generate_setop_child_grouplist(qp_extra->setop, tlist);
+
+		root->setop_pathkeys =
+			make_pathkeys_for_sortclauses_extended(root,
+												   &groupClauses,
+												   tlist,
+												   false,
+												   &sortable);
+		if (!sortable)
+			root->setop_pathkeys = NIL;
+	}
+	else
+		root->setop_pathkeys = NIL;
+
 	/*
 	 * Figure out whether we want a sorted result from query_planner.
 	 *
@@ -3442,7 +3480,9 @@ standard_qp_callback(PlannerInfo *root, void *extra)
 	 * sortable DISTINCT clause that's more rigorous than the ORDER BY clause,
 	 * we try to produce output that's sufficiently well sorted for the
 	 * DISTINCT.  Otherwise, if there is an ORDER BY clause, we want to sort
-	 * by the ORDER BY clause.
+	 * by the ORDER BY clause.  Otherwise, if we're a subquery being planned
+	 * for a set operation which can benefit from presorted results and have a
+	 * sortable targetlist, we want to sort by the target list.
 	 *
 	 * Note: if we have both ORDER BY and GROUP BY, and ORDER BY is a superset
 	 * of GROUP BY, it would be tempting to request sort by ORDER BY --- but
@@ -3460,6 +3500,8 @@ standard_qp_callback(PlannerInfo *root, void *extra)
 		root->query_pathkeys = root->distinct_pathkeys;
 	else if (root->sort_pathkeys)
 		root->query_pathkeys = root->sort_pathkeys;
+	else if (root->setop_pathkeys != NIL)
+		root->query_pathkeys = root->setop_pathkeys;
 	else
 		root->query_pathkeys = NIL;
 }
@@ -7866,3 +7908,44 @@ group_by_has_partkey(RelOptInfo *input_rel,

 	return true;
 }
+
+/*
+ * generate_setop_child_grouplist
+ *		Build a SortGroupClause list defining the sort/grouping properties
+ *		of the child of a set operation.
+ *
+ * This is similar to generate_setop_grouplist() but differs as the setop
+ * child query's targetlist entries may already have a tleSortGroupRef
+ * assigned for other purposes, such as GROUP BYs.  Here we keep the
+ * SortGroupClause list in the same order as 'op' groupClauses and just adjust
+ * the tleSortGroupRef to reference the TargetEntry's 'ressortgroupref'.
+ */
+static List *
+generate_setop_child_grouplist(SetOperationStmt *op, List *targetlist)
+{
+	List	   *grouplist = copyObject(op->groupClauses);
+	ListCell   *lg;
+	ListCell   *lt;
+
+	lg = list_head(grouplist);
+	foreach(lt, targetlist)
+	{
+		TargetEntry *tle = (TargetEntry *) lfirst(lt);
+		SortGroupClause *sgc;
+
+		/* resjunk columns could have sortgrouprefs.  Leave these alone */
+		if (tle->resjunk)
+			continue;
+
+		/* we expect every non-resjunk target to have a SortGroupClause */
+		Assert(lg != NULL);
+		sgc = (SortGroupClause *) lfirst(lg);
+		lg = lnext(grouplist, lg);
+
+		/* assign a tleSortGroupRef, or reuse the existing one */
+		sgc->tleSortGroupRef = assignSortGroupRef(tle, targetlist);
+		tle->ressortgroupref = sgc->tleSortGroupRef;
+	}
+	Assert(lg == NULL);
+	return grouplist;
+}
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -43,11 +43,15 @@ static RelOptInfo *recurse_set_operations(Node *setOp, PlannerInfo *root,
 										  bool junkOK,
 										  int flag, List *refnames_tlist,
 										  List **pTargetList,
-										  double *pNumGroups);
+										  bool *istrivial_tlist);
 static RelOptInfo *generate_recursion_path(SetOperationStmt *setOp,
 										   PlannerInfo *root,
 										   List *refnames_tlist,
 										   List **pTargetList);
+static void build_setop_child_paths(PlannerInfo *root, RelOptInfo *rel,
+									bool trivial_tlist, List *child_tlist,
+									List *interesting_pathkeys,
+									double *pNumGroups);
 static RelOptInfo *generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
 										List *refnames_tlist,
 										List **pTargetList);
@@ -57,9 +61,8 @@ static RelOptInfo *generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *ro
 static List *plan_union_children(PlannerInfo *root,
 								 SetOperationStmt *top_union,
 								 List *refnames_tlist,
-								 List **tlist_list);
-static Path *make_union_unique(SetOperationStmt *op, Path *path, List *tlist,
-							   PlannerInfo *root);
+								 List **tlist_list,
+								 List **istrivial_tlist);
 static void postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel);
 static bool choose_hashed_setop(PlannerInfo *root, List *groupClauses,
 								Path *input_path,
@@ -114,10 +117,10 @@ plan_set_operations(PlannerInfo *root)
 	Assert(parse->distinctClause == NIL);

 	/*
-	 * In the outer query level, we won't have any true equivalences to deal
-	 * with; but we do want to be able to make pathkeys, which will require
-	 * single-member EquivalenceClasses.  Indicate that EC merging is complete
-	 * so that pathkeys.c won't complain.
+	 * In the outer query level, equivalence classes are limited to classes
+	 * which define that the top-level target entry is equivalent to the
+	 * corresponding child target entry.  There won't be any equivalence class
+	 * merging.  Mark that merging is complete to allow us to make pathkeys.
 	 */
 	Assert(root->eq_classes == NIL);
 	root->ec_merging_done = true;
@@ -152,6 +155,8 @@ plan_set_operations(PlannerInfo *root)
 	}
 	else
 	{
+		bool		trivial_tlist;
+
 		/*
 		 * Recurse on setOperations tree to generate paths for set ops. The
 		 * final output paths should have just the column types shown as the
@@ -163,7 +168,7 @@ plan_set_operations(PlannerInfo *root)
 										   true, -1,
 										   leftmostQuery->targetList,
 										   &top_tlist,
-										   NULL);
+										   &trivial_tlist);
 	}

 	/* Must return the built tlist into root->processed_tlist. */
@@ -172,6 +177,31 @@ plan_set_operations(PlannerInfo *root)
 	return setop_rel;
 }

+/*
+ * set_operation_ordered_results_useful
+ *		Return true if the given SetOperationStmt can be executed by utilizing
+ *		paths that provide sorted input according to the setop's targetlist.
+ *		Returns false when sorted paths are not any more useful then unsorted
+ *		ones.
+ */
+bool
+set_operation_ordered_results_useful(SetOperationStmt *setop)
+{
+	/*
+	 * Paths sorted by the targetlist are useful for UNION as we can opt to
+	 * MergeAppend the sorted paths then Unique them.  Ordered paths are no
+	 * more useful than unordered ones for UNION ALL.
+	 */
+	if (!setop->all && setop->op == SETOP_UNION)
+		return true;
+
+	/*
+	 * EXCEPT / EXCEPT ALL / INTERSECT / INTERSECT ALL cannot yet utilize
+	 * correctly sorted input paths.
+	 */
+	return false;
+}
+
 /*
 * recurse_set_operations
 *	  Recursively handle one step in a tree of set operations
@@ -184,8 +214,7 @@ plan_set_operations(PlannerInfo *root)
 *
 * Returns a RelOptInfo for the subtree, as well as these output parameters:
 * *pTargetList: receives the fully-fledged tlist for the subtree's top plan
- * *pNumGroups: if not NULL, we estimate the number of distinct groups
- *		in the result, and store it there
+ * *istrivial_tlist: true iif datatypes between parent and child match.
 *
 * The pTargetList output parameter is mostly redundant with the pathtarget
 * of the returned RelOptInfo, but for the moment we need it because much of
@@ -202,9 +231,11 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
 					   bool junkOK,
 					   int flag, List *refnames_tlist,
 					   List **pTargetList,
-					   double *pNumGroups)
+					   bool *istrivial_tlist)
 {
-	RelOptInfo *rel = NULL;		/* keep compiler quiet */
+	RelOptInfo *rel;
+
+	*istrivial_tlist = true;	/* for now */

 	/* Guard against stack overflow due to overly complex setop nests */
 	check_stack_depth();
@@ -215,9 +246,6 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
 		RangeTblEntry *rte = root->simple_rte_array[rtr->rtindex];
 		Query	   *subquery = rte->subquery;
 		PlannerInfo *subroot;
-		RelOptInfo *final_rel;
-		Path	   *subpath;
-		Path	   *path;
 		List	   *tlist;
 		bool		trivial_tlist;

@@ -254,93 +282,7 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,

 		/* Return the fully-fledged tlist to caller, too */
 		*pTargetList = tlist;
-
-		/*
-		 * Mark rel with estimated output rows, width, etc.  Note that we have
-		 * to do this before generating outer-query paths, else
-		 * cost_subqueryscan is not happy.
-		 */
-		set_subquery_size_estimates(root, rel);
-
-		/*
-		 * Since we may want to add a partial path to this relation, we must
-		 * set its consider_parallel flag correctly.
-		 */
-		final_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL);
-		rel->consider_parallel = final_rel->consider_parallel;
-
-		/*
-		 * For the moment, we consider only a single Path for the subquery.
-		 * This should change soon (make it look more like
-		 * set_subquery_pathlist).
-		 */
-		subpath = get_cheapest_fractional_path(final_rel,
-											   root->tuple_fraction);
-
-		/*
-		 * Stick a SubqueryScanPath atop that.
-		 *
-		 * We don't bother to determine the subquery's output ordering since
-		 * it won't be reflected in the set-op result anyhow; so just label
-		 * the SubqueryScanPath with nil pathkeys.  (XXX that should change
-		 * soon too, likely.)
-		 */
-		path = (Path *) create_subqueryscan_path(root, rel, subpath,
-												 trivial_tlist,
-												 NIL, NULL);
-
-		add_path(rel, path);
-
-		/*
-		 * If we have a partial path for the child relation, we can use that
-		 * to build a partial path for this relation.  But there's no point in
-		 * considering any path but the cheapest.
-		 */
-		if (rel->consider_parallel && bms_is_empty(rel->lateral_relids) &&
-			final_rel->partial_pathlist != NIL)
-		{
-			Path	   *partial_subpath;
-			Path	   *partial_path;
-
-			partial_subpath = linitial(final_rel->partial_pathlist);
-			partial_path = (Path *)
-				create_subqueryscan_path(root, rel, partial_subpath,
-										 trivial_tlist,
-										 NIL, NULL);
-			add_partial_path(rel, partial_path);
-		}
-
-		/*
-		 * Estimate number of groups if caller wants it.  If the subquery used
-		 * grouping or aggregation, its output is probably mostly unique
-		 * anyway; otherwise do statistical estimation.
-		 *
-		 * XXX you don't really want to know about this: we do the estimation
-		 * using the subroot->parse's original targetlist expressions, not the
-		 * subroot->processed_tlist which might seem more appropriate.  The
-		 * reason is that if the subquery is itself a setop, it may return a
-		 * processed_tlist containing "varno 0" Vars generated by
-		 * generate_append_tlist, and those would confuse estimate_num_groups
-		 * mightily.  We ought to get rid of the "varno 0" hack, but that
-		 * requires a redesign of the parsetree representation of setops, so
-		 * that there can be an RTE corresponding to each setop's output.
-		 * Note, we use this not subquery's targetlist but subroot->parse's
-		 * targetlist, because it was revised by self-join removal. subquery's
-		 * targetlist might contain the references to the removed relids.
-		 */
-		if (pNumGroups)
-		{
-			if (subquery->groupClause || subquery->groupingSets ||
-				subquery->distinctClause ||
-				subroot->hasHavingQual || subquery->hasAggs)
-				*pNumGroups = subpath->rows;
-			else
-				*pNumGroups = estimate_num_groups(subroot,
-												  get_tlist_exprs(subroot->parse->targetList, false),
-												  subpath->rows,
-												  NULL,
-												  NULL);
-		}
+		*istrivial_tlist = trivial_tlist;
 	}
 	else if (IsA(setOp, SetOperationStmt))
 	{
@@ -355,8 +297,6 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
 			rel = generate_nonunion_paths(op, root,
 										  refnames_tlist,
 										  pTargetList);
-		if (pNumGroups)
-			*pNumGroups = rel->rows;

 		/*
 		 * If necessary, add a Result node to project the caller-requested
@@ -386,6 +326,7 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
 												*pTargetList,
 												refnames_tlist,
 												&trivial_tlist);
+			*istrivial_tlist = trivial_tlist;
 			target = create_pathtarget(root, *pTargetList);

 			/* Apply projection to each path */
@@ -416,16 +357,16 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
 				lfirst(lc) = path;
 			}
 		}
+		postprocess_setop_rel(root, rel);
 	}
 	else
 	{
 		elog(ERROR, "unrecognized node type: %d",
 			 (int) nodeTag(setOp));
 		*pTargetList = NIL;
+		rel = NULL;				/* keep compiler quiet */
 	}

-	postprocess_setop_rel(root, rel);
-
 	return rel;
 }

@@ -444,7 +385,9 @@ generate_recursion_path(SetOperationStmt *setOp, PlannerInfo *root,
 	Path	   *lpath;
 	Path	   *rpath;
 	List	   *lpath_tlist;
+	bool		lpath_trivial_tlist;
 	List	   *rpath_tlist;
+	bool		rpath_trivial_tlist;
 	List	   *tlist;
 	List	   *groupList;
 	double		dNumGroups;
@@ -464,7 +407,10 @@ generate_recursion_path(SetOperationStmt *setOp, PlannerInfo *root,
 								  false, -1,
 								  refnames_tlist,
 								  &lpath_tlist,
-								  NULL);
+								  &lpath_trivial_tlist);
+	if (lrel->rtekind == RTE_SUBQUERY)
+		build_setop_child_paths(root, lrel, lpath_trivial_tlist, lpath_tlist,
+								NIL, NULL);
 	lpath = lrel->cheapest_total_path;
 	/* The right path will want to look at the left one ... */
 	root->non_recursive_path = lpath;
@@ -473,7 +419,10 @@ generate_recursion_path(SetOperationStmt *setOp, PlannerInfo *root,
 								  false, -1,
 								  refnames_tlist,
 								  &rpath_tlist,
-								  NULL);
+								  &rpath_trivial_tlist);
+	if (rrel->rtekind == RTE_SUBQUERY)
+		build_setop_child_paths(root, rrel, rpath_trivial_tlist, rpath_tlist,
+								NIL, NULL);
 	rpath = rrel->cheapest_total_path;
 	root->non_recursive_path = NULL;

@@ -535,6 +484,207 @@ generate_recursion_path(SetOperationStmt *setOp, PlannerInfo *root,
 	return result_rel;
 }

+/*
+ * build_setop_child_paths
+ *		Build paths for the set op child relation denoted by 'rel'.
+ *
+ * interesting_pathkeys: if not NIL, also include paths that suit these
+ * pathkeys, sorting any unsorted paths as required.
+ * *pNumGroups: if not NULL, we estimate the number of distinct groups
+ *		in the result, and store it there
+ */
+static void
+build_setop_child_paths(PlannerInfo *root, RelOptInfo *rel,
+						bool trivial_tlist, List *child_tlist,
+						List *interesting_pathkeys, double *pNumGroups)
+{
+	RelOptInfo *final_rel;
+	List	   *setop_pathkeys = rel->subroot->setop_pathkeys;
+	ListCell   *lc;
+
+	/* it can't be a set op child rel if it's not a subquery */
+	Assert(rel->rtekind == RTE_SUBQUERY);
+
+	/* when sorting is needed, add child rel equivalences */
+	if (interesting_pathkeys != NIL)
+		add_setop_child_rel_equivalences(root,
+										 rel,
+										 child_tlist,
+										 interesting_pathkeys);
+
+	/*
+	 * Mark rel with estimated output rows, width, etc.  Note that we have to
+	 * do this before generating outer-query paths, else cost_subqueryscan is
+	 * not happy.
+	 */
+	set_subquery_size_estimates(root, rel);
+
+	/*
+	 * Since we may want to add a partial path to this relation, we must set
+	 * its consider_parallel flag correctly.
+	 */
+	final_rel = fetch_upper_rel(rel->subroot, UPPERREL_FINAL, NULL);
+	rel->consider_parallel = final_rel->consider_parallel;
+
+	/* Generate subquery scan paths for any interesting path in final_rel */
+	foreach(lc, final_rel->pathlist)
+	{
+		Path	   *subpath = (Path *) lfirst(lc);
+		List	   *pathkeys;
+		Path	   *cheapest_input_path = final_rel->cheapest_total_path;
+		bool		is_sorted;
+		int			presorted_keys;
+
+		/*
+		 * Include the cheapest path as-is so that the set operation can be
+		 * cheaply implemented using a method which does not require the input
+		 * to be sorted.
+		 */
+		if (subpath == cheapest_input_path)
+		{
+			/* Convert subpath's pathkeys to outer representation */
+			pathkeys = convert_subquery_pathkeys(root, rel, subpath->pathkeys,
+												 make_tlist_from_pathtarget(subpath->pathtarget));
+
+			/* Generate outer path using this subpath */
+			add_path(rel, (Path *) create_subqueryscan_path(root,
+															rel,
+															subpath,
+															trivial_tlist,
+															pathkeys,
+															NULL));
+		}
+
+		/* skip dealing with sorted paths if the setop doesn't need them */
+		if (interesting_pathkeys == NIL)
+			continue;
+
+		/*
+		 * Create paths to suit final sort order required for setop_pathkeys.
+		 * Here we'll sort the cheapest input path (if not sorted already) and
+		 * incremental sort any paths which are partially sorted.
+		 */
+		is_sorted = pathkeys_count_contained_in(setop_pathkeys,
+												subpath->pathkeys,
+												&presorted_keys);
+
+		if (!is_sorted)
+		{
+			double		limittuples = rel->subroot->limit_tuples;
+
+			/*
+			 * Try at least sorting the cheapest path and also try
+			 * incrementally sorting any path which is partially sorted
+			 * already (no need to deal with paths which have presorted keys
+			 * when incremental sort is disabled unless it's the cheapest
+			 * input path).
+			 */
+			if (subpath != cheapest_input_path &&
+				(presorted_keys == 0 || !enable_incremental_sort))
+				continue;
+
+			/*
+			 * We've no need to consider both a sort and incremental sort.
+			 * We'll just do a sort if there are no presorted keys and an
+			 * incremental sort when there are presorted keys.
+			 */
+			if (presorted_keys == 0 || !enable_incremental_sort)
+				subpath = (Path *) create_sort_path(rel->subroot,
+													final_rel,
+													subpath,
+													setop_pathkeys,
+													limittuples);
+			else
+				subpath = (Path *) create_incremental_sort_path(rel->subroot,
+																final_rel,
+																subpath,
+																setop_pathkeys,
+																presorted_keys,
+																limittuples);
+		}
+
+		/*
+		 * subpath is now sorted, so add it to the pathlist.  We already added
+		 * the cheapest_input_path above, so don't add it again unless we just
+		 * sorted it.
+		 */
+		if (subpath != cheapest_input_path)
+		{
+			/* Convert subpath's pathkeys to outer representation */
+			pathkeys = convert_subquery_pathkeys(root, rel, subpath->pathkeys,
+												 make_tlist_from_pathtarget(subpath->pathtarget));
+
+			/* Generate outer path using this subpath */
+			add_path(rel, (Path *) create_subqueryscan_path(root,
+															rel,
+															subpath,
+															trivial_tlist,
+															pathkeys,
+															NULL));
+		}
+	}
+
+	/* if consider_parallel is false, there should be no partial paths */
+	Assert(final_rel->consider_parallel ||
+		   final_rel->partial_pathlist == NIL);
+
+	/*
+	 * If we have a partial path for the child relation, we can use that to
+	 * build a partial path for this relation.  But there's no point in
+	 * considering any path but the cheapest.
+	 */
+	if (rel->consider_parallel && bms_is_empty(rel->lateral_relids) &&
+		final_rel->partial_pathlist != NIL)
+	{
+		Path	   *partial_subpath;
+		Path	   *partial_path;
+
+		partial_subpath = linitial(final_rel->partial_pathlist);
+		partial_path = (Path *)
+			create_subqueryscan_path(root, rel, partial_subpath,
+									 trivial_tlist,
+									 NIL, NULL);
+		add_partial_path(rel, partial_path);
+	}
+
+	postprocess_setop_rel(root, rel);
+
+	/*
+	 * Estimate number of groups if caller wants it.  If the subquery used
+	 * grouping or aggregation, its output is probably mostly unique anyway;
+	 * otherwise do statistical estimation.
+	 *
+	 * XXX you don't really want to know about this: we do the estimation
+	 * using the subroot->parse's original targetlist expressions, not the
+	 * subroot->processed_tlist which might seem more appropriate.  The reason
+	 * is that if the subquery is itself a setop, it may return a
+	 * processed_tlist containing "varno 0" Vars generated by
+	 * generate_append_tlist, and those would confuse estimate_num_groups
+	 * mightily.  We ought to get rid of the "varno 0" hack, but that requires
+	 * a redesign of the parsetree representation of setops, so that there can
+	 * be an RTE corresponding to each setop's output.  Note, we use this not
+	 * subquery's targetlist but subroot->parse's targetlist, because it was
+	 * revised by self-join removal. subquery's targetlist might contain the
+	 * references to the removed relids.
+	 */
+	if (pNumGroups)
+	{
+		PlannerInfo *subroot = rel->subroot;
+		Query	   *subquery = subroot->parse;
+
+		if (subquery->groupClause || subquery->groupingSets ||
+			subquery->distinctClause || subroot->hasHavingQual ||
+			subquery->hasAggs)
+			*pNumGroups = rel->cheapest_total_path->rows;
+		else
+			*pNumGroups = estimate_num_groups(subroot,
+											  get_tlist_exprs(subroot->parse->targetList, false),
+											  rel->cheapest_total_path->rows,
+											  NULL,
+											  NULL);
+	}
+}
+
 /*
 * Generate paths for a UNION or UNION ALL node
 */
@@ -545,41 +695,38 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
 {
 	Relids		relids = NULL;
 	RelOptInfo *result_rel;
-	double		save_fraction = root->tuple_fraction;
 	ListCell   *lc;
-	List	   *pathlist = NIL;
+	ListCell   *lc2;
+	ListCell   *lc3;
+	List	   *cheapest_pathlist = NIL;
+	List	   *ordered_pathlist = NIL;
 	List	   *partial_pathlist = NIL;
 	bool		partial_paths_valid = true;
 	bool		consider_parallel = true;
 	List	   *rellist;
 	List	   *tlist_list;
+	List	   *trivial_tlist_list;
 	List	   *tlist;
-	Path	   *path;
-
-	/*
-	 * If plain UNION, tell children to fetch all tuples.
-	 *
-	 * Note: in UNION ALL, we pass the top-level tuple_fraction unmodified to
-	 * each arm of the UNION ALL.  One could make a case for reducing the
-	 * tuple fraction for later arms (discounting by the expected size of the
-	 * earlier arms' results) but it seems not worth the trouble. The normal
-	 * case where tuple_fraction isn't already zero is a LIMIT at top level,
-	 * and passing it down as-is is usually enough to get the desired result
-	 * of preferring fast-start plans.
-	 */
-	if (!op->all)
-		root->tuple_fraction = 0.0;
+	List	   *groupList = NIL;
+	Path	   *apath;
+	Path	   *gpath = NULL;
+	bool		try_sorted;
+	List	   *union_pathkeys = NIL;

 	/*
 	 * If any of my children are identical UNION nodes (same op, all-flag, and
 	 * colTypes) then they can be merged into this node so that we generate
-	 * only one Append and unique-ification for the lot.  Recurse to find such
-	 * nodes and compute their children's paths.
+	 * only one Append/MergeAppend and unique-ification for the lot.  Recurse
+	 * to find such nodes.
 	 */
-	rellist = plan_union_children(root, op, refnames_tlist, &tlist_list);
+	rellist = plan_union_children(root,
+								  op,
+								  refnames_tlist,
+								  &tlist_list,
+								  &trivial_tlist_list);

 	/*
-	 * Generate tlist for Append plan node.
+	 * Generate tlist for Append/MergeAppend plan node.
 	 *
 	 * The tlist for an Append plan isn't important as far as the Append is
 	 * concerned, but we must make it look real anyway for the benefit of the
@@ -587,15 +734,68 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
 	 */
 	tlist = generate_append_tlist(op->colTypes, op->colCollations, false,
 								  tlist_list, refnames_tlist);
-
 	*pTargetList = tlist;

+	/* For for UNIONs (not UNION ALL), try sorting, if sorting is possible */
+	try_sorted = !op->all && grouping_is_sortable(op->groupClauses);
+
+	if (try_sorted)
+	{
+		/* Identify the grouping semantics */
+		groupList = generate_setop_grouplist(op, tlist);
+
+		/* Determine the pathkeys for sorting by the whole target list */
+		union_pathkeys = make_pathkeys_for_sortclauses(root, groupList, tlist);
+
+		root->query_pathkeys = union_pathkeys;
+	}
+
+	/*
+	 * Now that we've got the append target list, we can build the union child
+	 * paths.
+	 */
+	forthree(lc, rellist, lc2, trivial_tlist_list, lc3, tlist_list)
+	{
+		RelOptInfo *rel = lfirst(lc);
+		bool		trivial_tlist = lfirst_int(lc2);
+		List	   *child_tlist = lfirst_node(List, lc3);
+
+		/* only build paths for the union children */
+		if (rel->rtekind == RTE_SUBQUERY)
+			build_setop_child_paths(root, rel, trivial_tlist, child_tlist,
+									union_pathkeys, NULL);
+	}
+
 	/* Build path lists and relid set. */
 	foreach(lc, rellist)
 	{
 		RelOptInfo *rel = lfirst(lc);
+		Path	   *ordered_path;

-		pathlist = lappend(pathlist, rel->cheapest_total_path);
+		cheapest_pathlist = lappend(cheapest_pathlist,
+									rel->cheapest_total_path);
+
+		if (try_sorted)
+		{
+			ordered_path = get_cheapest_path_for_pathkeys(rel->pathlist,
+														  union_pathkeys,
+														  NULL,
+														  TOTAL_COST,
+														  false);
+
+			if (ordered_path != NULL)
+				ordered_pathlist = lappend(ordered_pathlist, ordered_path);
+			else
+			{
+				/*
+				 * If we can't find a sorted path, just give up trying to
+				 * generate a list of correctly sorted child paths.  This can
+				 * happen when type coercion was added to the targetlist due
+				 * to mismatching types from the union children.
+				 */
+				try_sorted = false;
+			}
+		}

 		if (consider_parallel)
 		{
@@ -618,28 +818,21 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
 	result_rel = fetch_upper_rel(root, UPPERREL_SETOP, relids);
 	result_rel->reltarget = create_pathtarget(root, tlist);
 	result_rel->consider_parallel = consider_parallel;
+	result_rel->consider_startup = (root->tuple_fraction > 0);

 	/*
-	 * Append the child results together.
+	 * Append the child results together using the cheapest paths from each
+	 * union child.
 	 */
-	path = (Path *) create_append_path(root, result_rel, pathlist, NIL,
-									   NIL, NULL, 0, false, -1);
-
-	/*
-	 * For UNION ALL, we just need the Append path.  For UNION, need to add
-	 * node(s) to remove duplicates.
-	 */
-	if (!op->all)
-		path = make_union_unique(op, path, tlist, root);
-
-	add_path(result_rel, path);
+	apath = (Path *) create_append_path(root, result_rel, cheapest_pathlist,
+										NIL, NIL, NULL, 0, false, -1);

 	/*
 	 * Estimate number of groups.  For now we just assume the output is unique
 	 * --- this is certainly true for the UNION case, and we want worst-case
 	 * estimates anyway.
 	 */
-	result_rel->rows = path->rows;
+	result_rel->rows = apath->rows;

 	/*
 	 * Now consider doing the same thing using the partial paths plus Append
@@ -647,7 +840,7 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
 	 */
 	if (partial_paths_valid)
 	{
-		Path	   *ppath;
+		Path	   *papath;
 		int			parallel_workers = 0;

 		/* Find the highest number of workers requested for any subpath. */
@@ -676,21 +869,137 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
 		}
 		Assert(parallel_workers > 0);

-		ppath = (Path *)
+		papath = (Path *)
 			create_append_path(root, result_rel, NIL, partial_pathlist,
-							   NIL, NULL,
-							   parallel_workers, enable_parallel_append,
-							   -1);
-		ppath = (Path *)
-			create_gather_path(root, result_rel, ppath,
+							   NIL, NULL, parallel_workers,
+							   enable_parallel_append, -1);
+		gpath = (Path *)
+			create_gather_path(root, result_rel, papath,
 							   result_rel->reltarget, NULL, NULL);
-		if (!op->all)
-			ppath = make_union_unique(op, ppath, tlist, root);
-		add_path(result_rel, ppath);
 	}

-	/* Undo effects of possibly forcing tuple_fraction to 0 */
-	root->tuple_fraction = save_fraction;
+	if (!op->all)
+	{
+		double		dNumGroups;
+		bool		can_sort = grouping_is_sortable(groupList);
+		bool		can_hash = grouping_is_hashable(groupList);
+
+		/*
+		 * XXX for the moment, take the number of distinct groups as equal to
+		 * the total input size, i.e., the worst case.  This is too
+		 * conservative, but it's not clear how to get a decent estimate of
+		 * the true size.  One should note as well the propensity of novices
+		 * to write UNION rather than UNION ALL even when they don't expect
+		 * any duplicates...
+		 */
+		dNumGroups = apath->rows;
+
+		if (can_hash)
+		{
+			Path	   *path;
+
+			/*
+			 * Try a hash aggregate plan on 'apath'.  This is the cheapest
+			 * available path containing each append child.
+			 */
+			path = (Path *) create_agg_path(root,
+											result_rel,
+											apath,
+											create_pathtarget(root, tlist),
+											AGG_HASHED,
+											AGGSPLIT_SIMPLE,
+											groupList,
+											NIL,
+											NULL,
+											dNumGroups);
+			add_path(result_rel, path);
+
+			/* Try hash aggregate on the Gather path, if valid */
+			if (gpath != NULL)
+			{
+				/* Hashed aggregate plan --- no sort needed */
+				path = (Path *) create_agg_path(root,
+												result_rel,
+												gpath,
+												create_pathtarget(root, tlist),
+												AGG_HASHED,
+												AGGSPLIT_SIMPLE,
+												groupList,
+												NIL,
+												NULL,
+												dNumGroups);
+				add_path(result_rel, path);
+			}
+		}
+
+		if (can_sort)
+		{
+			Path	   *path = apath;
+
+			/* Try Sort -> Unique on the Append path */
+			if (groupList != NIL)
+				path = (Path *) create_sort_path(root, result_rel, path,
+												 make_pathkeys_for_sortclauses(root, groupList, tlist),
+												 -1.0);
+
+			path = (Path *) create_upper_unique_path(root,
+													 result_rel,
+													 path,
+													 list_length(path->pathkeys),
+													 dNumGroups);
+
+			add_path(result_rel, path);
+
+			/* Try Sort -> Unique on the Gather path, if set */
+			if (gpath != NULL)
+			{
+				path = gpath;
+
+				path = (Path *) create_sort_path(root, result_rel, path,
+												 make_pathkeys_for_sortclauses(root, groupList, tlist),
+												 -1.0);
+
+				path = (Path *) create_upper_unique_path(root,
+														 result_rel,
+														 path,
+														 list_length(path->pathkeys),
+														 dNumGroups);
+				add_path(result_rel, path);
+			}
+		}
+
+		/*
+		 * Try making a MergeAppend path if we managed to find a path with the
+		 * correct pathkeys in each union child query.
+		 */
+		if (try_sorted && groupList != NIL)
+		{
+			Path	   *path;
+
+			path = (Path *) create_merge_append_path(root,
+													 result_rel,
+													 ordered_pathlist,
+													 union_pathkeys,
+													 NULL);
+
+			/* and make the MergeAppend unique */
+			path = (Path *) create_upper_unique_path(root,
+													 result_rel,
+													 path,
+													 list_length(tlist),
+													 dNumGroups);
+
+			add_path(result_rel, path);
+		}
+	}
+	else
+	{
+		/* UNION ALL */
+		add_path(result_rel, apath);
+
+		if (gpath != NULL)
+			add_path(result_rel, gpath);
+	}

 	return result_rel;
 }
@@ -716,6 +1025,8 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
 			   *tlist,
 			   *groupList,
 			   *pathlist;
+	bool		lpath_trivial_tlist,
+				rpath_trivial_tlist;
 	double		dLeftGroups,
 				dRightGroups,
 				dNumGroups,
@@ -735,14 +1046,26 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
 								  false, 0,
 								  refnames_tlist,
 								  &lpath_tlist,
-								  &dLeftGroups);
+								  &lpath_trivial_tlist);
+	if (lrel->rtekind == RTE_SUBQUERY)
+		build_setop_child_paths(root, lrel, lpath_trivial_tlist, lpath_tlist,
+								NIL, &dLeftGroups);
+	else
+		dLeftGroups = lrel->rows;
+
 	lpath = lrel->cheapest_total_path;
 	rrel = recurse_set_operations(op->rarg, root,
 								  op->colTypes, op->colCollations,
 								  false, 1,
 								  refnames_tlist,
 								  &rpath_tlist,
-								  &dRightGroups);
+								  &rpath_trivial_tlist);
+	if (rrel->rtekind == RTE_SUBQUERY)
+		build_setop_child_paths(root, rrel, rpath_trivial_tlist, rpath_tlist,
+								NIL, &dRightGroups);
+	else
+		dRightGroups = rrel->rows;
+
 	rpath = rrel->cheapest_total_path;

 	/* Undo effects of forcing tuple_fraction to 0 */
@@ -879,13 +1202,16 @@ static List *
 plan_union_children(PlannerInfo *root,
 					SetOperationStmt *top_union,
 					List *refnames_tlist,
-					List **tlist_list)
+					List **tlist_list,
+					List **istrivial_tlist)
 {
 	List	   *pending_rels = list_make1(top_union);
 	List	   *result = NIL;
 	List	   *child_tlist;
+	bool		trivial_tlist;

 	*tlist_list = NIL;
+	*istrivial_tlist = NIL;

 	while (pending_rels != NIL)
 	{
@@ -924,75 +1250,14 @@ plan_union_children(PlannerInfo *root,
 														false, -1,
 														refnames_tlist,
 														&child_tlist,
-														NULL));
+														&trivial_tlist));
 		*tlist_list = lappend(*tlist_list, child_tlist);
+		*istrivial_tlist = lappend_int(*istrivial_tlist, trivial_tlist);
 	}

 	return result;
 }

-/*
- * Add nodes to the given path tree to unique-ify the result of a UNION.
- */
-static Path *
-make_union_unique(SetOperationStmt *op, Path *path, List *tlist,
-				  PlannerInfo *root)
-{
-	RelOptInfo *result_rel = fetch_upper_rel(root, UPPERREL_SETOP, NULL);
-	List	   *groupList;
-	double		dNumGroups;
-
-	/* Identify the grouping semantics */
-	groupList = generate_setop_grouplist(op, tlist);
-
-	/*
-	 * XXX for the moment, take the number of distinct groups as equal to the
-	 * total input size, ie, the worst case.  This is too conservative, but
-	 * it's not clear how to get a decent estimate of the true size.  One
-	 * should note as well the propensity of novices to write UNION rather
-	 * than UNION ALL even when they don't expect any duplicates...
-	 */
-	dNumGroups = path->rows;
-
-	/* Decide whether to hash or sort */
-	if (choose_hashed_setop(root, groupList, path,
-							dNumGroups, dNumGroups,
-							"UNION"))
-	{
-		/* Hashed aggregate plan --- no sort needed */
-		path = (Path *) create_agg_path(root,
-										result_rel,
-										path,
-										create_pathtarget(root, tlist),
-										AGG_HASHED,
-										AGGSPLIT_SIMPLE,
-										groupList,
-										NIL,
-										NULL,
-										dNumGroups);
-	}
-	else
-	{
-		/* Sort and Unique */
-		if (groupList)
-			path = (Path *)
-				create_sort_path(root,
-								 result_rel,
-								 path,
-								 make_pathkeys_for_sortclauses(root,
-															   groupList,
-															   tlist),
-								 -1.0);
-		path = (Path *) create_upper_unique_path(root,
-												 result_rel,
-												 path,
-												 list_length(path->pathkeys),
-												 dNumGroups);
-	}
-
-	return path;
-}
-
 /*
 * postprocess_setop_rel - perform steps required after adding paths
 */
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -1890,7 +1890,8 @@ transformSetOperationStmt(ParseState *pstate, SelectStmt *stmt)
 	 * For now, we don't support resjunk sort clauses on the output of a
 	 * setOperation tree --- you can only use the SQL92-spec options of
 	 * selecting an output column by name or number.  Enforce by checking that
-	 * transformSortClause doesn't add any items to tlist.
+	 * transformSortClause doesn't add any items to tlist.  Note, if changing
+	 * this, add_setop_child_rel_equivalences() will need to be updated.
 	 */
 	tllen = list_length(qry->targetList);