Support MergeAppend plans, to allow sorted output from append relations.

This patch eliminates the former need to sort the output of an Append scan when an ordered scan of an inheritance tree is wanted. This should be particularly useful for fast-start cases such as queries with LIMIT. Original patch by Greg Stark, with further hacking by Hans-Jurgen Schonig, Robert Haas, and Tom Lane.
2025-07-14 08:21:07 +03:00 · 2010-10-14 16:56:39 -04:00
parent 30e749dece
commit 11cad29c91
26 changed files with 1316 additions and 68 deletions
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@ -51,6 +51,7 @@ static void set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
 					   RangeTblEntry *rte);
 static void set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
 						Index rti, RangeTblEntry *rte);
+static List *accumulate_append_subpath(List *subpaths, Path *path);
 static void set_dummy_rel_pathlist(RelOptInfo *rel);
 static void set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
 					  Index rti, RangeTblEntry *rte);
@ -283,7 +284,9 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
 						Index rti, RangeTblEntry *rte)
 {
 	int			parentRTindex = rti;
+	List	   *live_childrels = NIL;
 	List	   *subpaths = NIL;
+	List	   *all_child_pathkeys = NIL;
 	double		parent_rows;
 	double		parent_size;
 	double	   *parent_attrsizes;
@ -321,7 +324,7 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
 		RelOptInfo *childrel;
 		List	   *childquals;
 		Node	   *childqual;
-		Path	   *childpath;
+		ListCell   *lcp;
 		ListCell   *parentvars;
 		ListCell   *childvars;

@ -395,13 +398,15 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,

 		/*
 		 * We have to make child entries in the EquivalenceClass data
-		 * structures as well.
+		 * structures as well.  This is needed either if the parent
+		 * participates in some eclass joins (because we will want to
+		 * consider inner-indexscan joins on the individual children)
+		 * or if the parent has useful pathkeys (because we should try
+		 * to build MergeAppend paths that produce those sort orderings).
 		 */
-		if (rel->has_eclass_joins)
-		{
+		if (rel->has_eclass_joins || has_useful_pathkeys(root, rel))
 			add_child_rel_equivalences(root, appinfo, rel, childrel);
-			childrel->has_eclass_joins = true;
-		}
+		childrel->has_eclass_joins = rel->has_eclass_joins;

 		/*
 		 * Note: we could compute appropriate attr_needed data for the child's
@ -411,23 +416,52 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
 		 * otherrels.  So we just leave the child's attr_needed empty.
 		 */

+		/* Remember which childrels are live, for MergeAppend logic below */
+		live_childrels = lappend(live_childrels, childrel);
+
 		/*
 		 * Compute the child's access paths, and add the cheapest one to the
 		 * Append path we are constructing for the parent.
-		 *
-		 * It's possible that the child is itself an appendrel, in which case
-		 * we can "cut out the middleman" and just add its child paths to our
-		 * own list.  (We don't try to do this earlier because we need to
-		 * apply both levels of transformation to the quals.)
 		 */
 		set_rel_pathlist(root, childrel, childRTindex, childRTE);

-		childpath = childrel->cheapest_total_path;
-		if (IsA(childpath, AppendPath))
-			subpaths = list_concat(subpaths,
-							list_copy(((AppendPath *) childpath)->subpaths));
-		else
-			subpaths = lappend(subpaths, childpath);
+		subpaths = accumulate_append_subpath(subpaths,
+											 childrel->cheapest_total_path);
+
+		/*
+		 * Collect a list of all the available path orderings for all the
+		 * children.  We use this as a heuristic to indicate which sort
+		 * orderings we should build MergeAppend paths for.
+		 */
+		foreach(lcp, childrel->pathlist)
+		{
+			Path	   *childpath = (Path *) lfirst(lcp);
+			List	   *childkeys = childpath->pathkeys;
+			ListCell   *lpk;
+			bool		found = false;
+
+			/* Ignore unsorted paths */
+			if (childkeys == NIL)
+				continue;
+
+			/* Have we already seen this ordering? */
+			foreach(lpk, all_child_pathkeys)
+			{
+				List   *existing_pathkeys = (List *) lfirst(lpk);
+
+				if (compare_pathkeys(existing_pathkeys,
+									 childkeys) == PATHKEYS_EQUAL)
+				{
+					found = true;
+					break;
+				}
+			}
+			if (!found)
+			{
+				/* No, so add it to all_child_pathkeys */
+				all_child_pathkeys = lappend(all_child_pathkeys, childkeys);
+			}
+		}

 		/*
 		 * Accumulate size information from each child.
@ -483,16 +517,106 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
 	pfree(parent_attrsizes);

 	/*
-	 * Finally, build Append path and install it as the only access path for
-	 * the parent rel.	(Note: this is correct even if we have zero or one
-	 * live subpath due to constraint exclusion.)
+	 * Next, build an unordered Append path for the rel.  (Note: this is
+	 * correct even if we have zero or one live subpath due to constraint
+	 * exclusion.)
 	 */
 	add_path(rel, (Path *) create_append_path(rel, subpaths));

-	/* Select cheapest path (pretty easy in this case...) */
+	/*
+	 * Next, build MergeAppend paths based on the collected list of child
+	 * pathkeys.  We consider both cheapest-startup and cheapest-total
+	 * cases, ie, for each interesting ordering, collect all the cheapest
+	 * startup subpaths and all the cheapest total paths, and build a
+	 * MergeAppend path for each list.
+	 */
+	foreach(l, all_child_pathkeys)
+	{
+		List   *pathkeys = (List *) lfirst(l);
+		List   *startup_subpaths = NIL;
+		List   *total_subpaths = NIL;
+		bool	startup_neq_total = false;
+		ListCell *lcr;
+
+		/* Select the child paths for this ordering... */
+		foreach(lcr, live_childrels)
+		{
+			RelOptInfo *childrel = (RelOptInfo *) lfirst(lcr);
+			Path	   *cheapest_startup,
+					   *cheapest_total;
+
+			/* Locate the right paths, if they are available. */
+			cheapest_startup =
+				get_cheapest_path_for_pathkeys(childrel->pathlist,
+											   pathkeys,
+											   STARTUP_COST);
+			cheapest_total =
+				get_cheapest_path_for_pathkeys(childrel->pathlist,
+											   pathkeys,
+											   TOTAL_COST);
+
+			/*
+			 * If we can't find any paths with the right order just add the
+			 * cheapest-total path; we'll have to sort it.
+			 */
+			if (cheapest_startup == NULL)
+				cheapest_startup = childrel->cheapest_total_path;
+			if (cheapest_total == NULL)
+				cheapest_total = childrel->cheapest_total_path;
+
+			/*
+			 * Notice whether we actually have different paths for the
+			 * "cheapest" and "total" cases; frequently there will be no
+			 * point in two create_merge_append_path() calls.
+			 */
+			if (cheapest_startup != cheapest_total)
+				startup_neq_total = true;
+
+			startup_subpaths =
+				accumulate_append_subpath(startup_subpaths, cheapest_startup);
+			total_subpaths =
+				accumulate_append_subpath(total_subpaths, cheapest_total);
+		}
+
+		/* ... and build the MergeAppend paths */
+		add_path(rel, (Path *) create_merge_append_path(root,
+														rel,
+														startup_subpaths,
+														pathkeys));
+		if (startup_neq_total)
+			add_path(rel, (Path *) create_merge_append_path(root,
+															rel,
+															total_subpaths,
+															pathkeys));
+	}
+
+	/* Select cheapest path */
 	set_cheapest(rel);
 }

+/*
+ * accumulate_append_subpath
+ *		Add a subpath to the list being built for an Append or MergeAppend
+ *
+ * It's possible that the child is itself an Append path, in which case
+ * we can "cut out the middleman" and just add its child paths to our
+ * own list.  (We don't try to do this earlier because we need to
+ * apply both levels of transformation to the quals.)
+ */
+static List *
+accumulate_append_subpath(List *subpaths, Path *path)
+{
+	if (IsA(path, AppendPath))
+	{
+		AppendPath	*apath = (AppendPath *) path;
+
+		/* list_copy is important here to avoid sharing list substructure */
+		return list_concat(subpaths, list_copy(apath->subpaths));
+	}
+	else
+		return lappend(subpaths, path);
+}
+
 /*
 * set_dummy_rel_pathlist
 *	  Build a dummy path for a relation that's been excluded by constraints
@ -1385,6 +1509,9 @@ print_path(PlannerInfo *root, Path *path, int indent)
 		case T_AppendPath:
 			ptype = "Append";
 			break;
+		case T_MergeAppendPath:
+			ptype = "MergeAppend";
+			break;
 		case T_ResultPath:
 			ptype = "Result";
 			break;
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@ -1209,6 +1209,70 @@ cost_sort(Path *path, PlannerInfo *root,
 	path->total_cost = startup_cost + run_cost;
 }

+/*
+ * cost_merge_append
+ *	  Determines and returns the cost of a MergeAppend node.
+ *
+ * MergeAppend merges several pre-sorted input streams, using a heap that
+ * at any given instant holds the next tuple from each stream.  If there
+ * are N streams, we need about N*log2(N) tuple comparisons to construct
+ * the heap at startup, and then for each output tuple, about log2(N)
+ * comparisons to delete the top heap entry and another log2(N) comparisons
+ * to insert its successor from the same stream.
+ *
+ * (The effective value of N will drop once some of the input streams are
+ * exhausted, but it seems unlikely to be worth trying to account for that.)
+ *
+ * The heap is never spilled to disk, since we assume N is not very large.
+ * So this is much simpler than cost_sort.
+ *
+ * As in cost_sort, we charge two operator evals per tuple comparison.
+ *
+ * 'pathkeys' is a list of sort keys
+ * 'n_streams' is the number of input streams
+ * 'input_startup_cost' is the sum of the input streams' startup costs
+ * 'input_total_cost' is the sum of the input streams' total costs
+ * 'tuples' is the number of tuples in all the streams
+ */
+void
+cost_merge_append(Path *path, PlannerInfo *root,
+				  List *pathkeys, int n_streams,
+				  Cost input_startup_cost, Cost input_total_cost,
+				  double tuples)
+{
+	Cost		startup_cost = 0;
+	Cost		run_cost = 0;
+	Cost		comparison_cost;
+	double		N;
+	double		logN;
+
+	/*
+	 * Avoid log(0)...
+	 */
+	N = (n_streams < 2) ? 2.0 : (double) n_streams;
+	logN = LOG2(N);
+
+	/* Assumed cost per tuple comparison */
+	comparison_cost = 2.0 * cpu_operator_cost;
+
+	/* Heap creation cost */
+	startup_cost += comparison_cost * N * logN;
+
+	/* Per-tuple heap maintenance cost */
+	run_cost += tuples * comparison_cost * 2.0 * logN;
+
+	/*
+	 * Also charge a small amount (arbitrarily set equal to operator cost) per
+	 * extracted tuple.  We don't charge cpu_tuple_cost because a MergeAppend
+	 * node doesn't do qual-checking or projection, so it has less overhead
+	 * than most plan nodes.
+	 */
+	run_cost += cpu_operator_cost * tuples;
+
+	path->startup_cost = startup_cost + input_startup_cost;
+	path->total_cost = startup_cost + run_cost + input_total_cost;
+}
+
 /*
 * cost_material
 *	  Determines and returns the cost of materializing a relation, including
@ -1405,7 +1469,9 @@ cost_group(Path *path, PlannerInfo *root,
 * output row count, which may be lower than the restriction-clause-only row
 * count of its parent.  (We don't include this case in the PATH_ROWS macro
 * because it applies *only* to a nestloop's inner relation.)  We have to
- * be prepared to recurse through Append nodes in case of an appendrel.
+ * be prepared to recurse through Append or MergeAppend nodes in case of an
+ * appendrel.  (It's not clear MergeAppend can be seen here, but we may as
+ * well handle it if so.)
 */
 static double
 nestloop_inner_path_rows(Path *path)
@ -1426,6 +1492,16 @@ nestloop_inner_path_rows(Path *path)
 			result += nestloop_inner_path_rows((Path *) lfirst(l));
 		}
 	}
+	else if (IsA(path, MergeAppendPath))
+	{
+		ListCell   *l;
+
+		result = 0;
+		foreach(l, ((MergeAppendPath *) path)->subpaths)
+		{
+			result += nestloop_inner_path_rows((Path *) lfirst(l));
+		}
+	}
 	else
 		result = PATH_ROWS(path);

--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@ -1611,8 +1611,8 @@ exprs_known_equal(PlannerInfo *root, Node *item1, Node *item2)
 *	  Search for EC members that reference (only) the parent_rel, and
 *	  add transformed members referencing the child_rel.
 *
- * We only need to do this for ECs that could generate join conditions,
- * since the child members are only used for creating inner-indexscan paths.
+ * Note that this function won't be called at all unless we have at least some
+ * reason to believe that the EC members it generates will be useful.
 *
 * parent_rel and child_rel could be derived from appinfo, but since the
 * caller has already computed them, we might as well just pass them in.
@ -1631,10 +1631,14 @@ add_child_rel_equivalences(PlannerInfo *root,
 		ListCell   *lc2;

 		/*
-		 * Won't generate joinclauses if const or single-member (the latter
-		 * test covers the volatile case too)
+		 * If this EC contains a constant, then it's not useful for sorting
+		 * or driving an inner index-scan, so we skip generating child EMs.
+		 *
+		 * If this EC contains a volatile expression, then generating child
+		 * EMs would be downright dangerous.  We rely on a volatile EC having
+		 * only one EM.
 		 */
-		if (cur_ec->ec_has_const || list_length(cur_ec->ec_members) <= 1)
+		if (cur_ec->ec_has_const || cur_ec->ec_has_volatile)
 			continue;

 		/* No point in searching if parent rel not mentioned in eclass */