Simplify partial path generation in GROUP BY/ORDER BY

Here we consolidate the generation of partial sort and partial incremental sort paths in a similar way to what was done in 4a29eabd1. Since the cost penalty for incremental sort was removed by that commit, there's no point in creating a sort path on the cheapest partial path if an incremental sort could be done instead. This has the added benefit of reducing the amount of code required to build these paths. Author: Richard Guo Reviewed-by: Etsuro Fujita, Shubham Khanna, David Rowley Discussion: https://postgr.es/m/CAMbWs49PaKxBZU9cN7k3DKB7id+YfGfOfS9H_Fo5tkqPMt=fDg@mail.gmail.com
2025-07-05 07:21:24 +03:00 · 2024-01-31 10:10:59 +13:00
parent 7b745d85b8
commit 8ee9c25087
3 changed files with 154 additions and 122 deletions
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@ -5102,8 +5102,9 @@ create_ordered_paths(PlannerInfo *root,
 	 * have generated order-preserving Gather Merge plans which can be used
 	 * without sorting if they happen to match the sort_pathkeys, and the loop
 	 * above will have handled those as well.  However, there's one more
-	 * possibility: it may make sense to sort the cheapest partial path
+	 * possibility: it may make sense to sort the cheapest partial path or
-	 * according to the required output order and then use Gather Merge.
+	 * incrementally sort any partial path that is partially sorted according
 	 * to the required output order and then use Gather Merge.
 	 */
 	if (ordered_rel->consider_parallel && root->sort_pathkeys != NIL &&
 		input_rel->partial_pathlist != NIL)
@ -5112,97 +5113,65 @@ create_ordered_paths(PlannerInfo *root,
 		cheapest_partial_path = linitial(input_rel->partial_pathlist);
-		/*
+		foreach(lc, input_rel->partial_pathlist)
 		 * If cheapest partial path doesn't need a sort, this is redundant
 		 * with what's already been tried.
 		 */
 		if (!pathkeys_contained_in(root->sort_pathkeys,
 								   cheapest_partial_path->pathkeys))
 		{
-			Path	   *path;
+			Path	   *input_path = (Path *) lfirst(lc);
 			Path	   *sorted_path;
 			bool		is_sorted;
 			int			presorted_keys;
 			double		total_groups;
-			path = (Path *) create_sort_path(root,
+			is_sorted = pathkeys_count_contained_in(root->sort_pathkeys,
-											 ordered_rel,
+													input_path->pathkeys,
-											 cheapest_partial_path,
+													&presorted_keys);
 											 root->sort_pathkeys,
 											 limit_tuples);
-			total_groups = cheapest_partial_path->rows *
+			if (is_sorted)
-				cheapest_partial_path->parallel_workers;
+				continue;
 			path = (Path *)
 				create_gather_merge_path(root, ordered_rel,
 										 path,
 										 path->pathtarget,
 										 root->sort_pathkeys, NULL,
 										 &total_groups);
-			/* Add projection step if needed */
+			/*
-			if (path->pathtarget != target)
+			 * Try at least sorting the cheapest path and also try
-				path = apply_projection_to_path(root, ordered_rel,
+			 * incrementally sorting any path which is partially sorted
-												path, target);
+			 * already (no need to deal with paths which have presorted keys
 			 * when incremental sort is disabled unless it's the cheapest
 			 * partial path).
 			 */
 			if (input_path != cheapest_partial_path &&
 				(presorted_keys == 0 || !enable_incremental_sort))
 				continue;
-			add_path(ordered_rel, path);
+			/*
-		}
+			 * We've no need to consider both a sort and incremental sort.
-
+			 * We'll just do a sort if there are no presorted keys and an
-		/*
+			 * incremental sort when there are presorted keys.
-		 * Consider incremental sort with a gather merge on partial paths.
+			 */
-		 *
+			if (presorted_keys == 0 || !enable_incremental_sort)
-		 * We can also skip the entire loop when we only have a single-item
+				sorted_path = (Path *) create_sort_path(root,
-		 * sort_pathkeys because then we can't possibly have a presorted
+														ordered_rel,
-		 * prefix of the list without having the list be fully sorted.
+														input_path,
-		 */
+														root->sort_pathkeys,
-		if (enable_incremental_sort && list_length(root->sort_pathkeys) > 1)
+														limit_tuples);
-		{
+			else
 			foreach(lc, input_rel->partial_pathlist)
 			{
 				Path	   *input_path = (Path *) lfirst(lc);
 				Path	   *sorted_path;
 				bool		is_sorted;
 				int			presorted_keys;
 				double		total_groups;
 				/*
 				 * We don't care if this is the cheapest partial path - we
 				 * can't simply skip it, because it may be partially sorted in
 				 * which case we want to consider adding incremental sort
 				 * (instead of full sort, which is what happens above).
 				 */
 				is_sorted = pathkeys_count_contained_in(root->sort_pathkeys,
 														input_path->pathkeys,
 														&presorted_keys);
 				/* No point in adding incremental sort on fully sorted paths. */
 				if (is_sorted)
 					continue;
 				if (presorted_keys == 0)
 					continue;
 				/* Since we have presorted keys, consider incremental sort. */
 				sorted_path = (Path *) create_incremental_sort_path(root,
 																	ordered_rel,
 																	input_path,
 																	root->sort_pathkeys,
 																	presorted_keys,
 																	limit_tuples);
-				total_groups = input_path->rows *
+			total_groups = input_path->rows *
-					input_path->parallel_workers;
+				input_path->parallel_workers;
-				sorted_path = (Path *)
+			sorted_path = (Path *)
-					create_gather_merge_path(root, ordered_rel,
+				create_gather_merge_path(root, ordered_rel,
-											 sorted_path,
+										 sorted_path,
-											 sorted_path->pathtarget,
+										 sorted_path->pathtarget,
-											 root->sort_pathkeys, NULL,
+										 root->sort_pathkeys, NULL,
-											 &total_groups);
+										 &total_groups);
-				/* Add projection step if needed */
+			/* Add projection step if needed */
-				if (sorted_path->pathtarget != target)
+			if (sorted_path->pathtarget != target)
-					sorted_path = apply_projection_to_path(root, ordered_rel,
+				sorted_path = apply_projection_to_path(root, ordered_rel,
-														   sorted_path, target);
+													   sorted_path, target);
-				add_path(ordered_rel, sorted_path);
+			add_path(ordered_rel, sorted_path);
 			}
 		}
 	}
@ -7322,44 +7291,9 @@ gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel)
 	/* Try Gather for unordered paths and Gather Merge for ordered ones. */
 	generate_useful_gather_paths(root, rel, true);
 	/* Try cheapest partial path + explicit Sort + Gather Merge. */
 	cheapest_partial_path = linitial(rel->partial_pathlist);
 	if (!pathkeys_contained_in(root->group_pathkeys,
 							   cheapest_partial_path->pathkeys))
 	{
 		Path	   *path;
 		double		total_groups;
-		total_groups =
+	/* XXX Shouldn't this also consider the group-key-reordering? */
 			cheapest_partial_path->rows * cheapest_partial_path->parallel_workers;
 		path = (Path *) create_sort_path(root, rel, cheapest_partial_path,
 										 root->group_pathkeys,
 										 -1.0);
 		path = (Path *)
 			create_gather_merge_path(root,
 									 rel,
 									 path,
 									 rel->reltarget,
 									 root->group_pathkeys,
 									 NULL,
 									 &total_groups);
 		add_path(rel, path);
 	}
 	/*
 	 * Consider incremental sort on all partial paths, if enabled.
 	 *
 	 * We can also skip the entire loop when we only have a single-item
 	 * group_pathkeys because then we can't possibly have a presorted prefix
 	 * of the list without having the list be fully sorted.
 	 *
 	 * XXX Shouldn't this also consider the group-key-reordering?
 	 */
 	if (!enable_incremental_sort || list_length(root->group_pathkeys) == 1)
 		return;
 	/* also consider incremental sort on partial paths, if enabled */
 	foreach(lc, rel->partial_pathlist)
 	{
 		Path	   *path = (Path *) lfirst(lc);
@ -7374,15 +7308,34 @@ gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel)
 		if (is_sorted)
 			continue;
-		if (presorted_keys == 0)
+		/*
 		 * Try at least sorting the cheapest path and also try incrementally
 		 * sorting any path which is partially sorted already (no need to deal
 		 * with paths which have presorted keys when incremental sort is
 		 * disabled unless it's the cheapest input path).
 		 */
 		if (path != cheapest_partial_path &&
 			(presorted_keys == 0 || !enable_incremental_sort))
 			continue;
-		path = (Path *) create_incremental_sort_path(root,
+		total_groups = path->rows * path->parallel_workers;
-													 rel,
+
-													 path,
+		/*
-													 root->group_pathkeys,
+		 * We've no need to consider both a sort and incremental sort. We'll
-													 presorted_keys,
+		 * just do a sort if there are no presorted keys and an incremental
-													 -1.0);
+		 * sort when there are presorted keys.
 		 */
 		if (presorted_keys == 0 || !enable_incremental_sort)
 			path = (Path *) create_sort_path(root, rel, path,
 											 root->group_pathkeys,
 											 -1.0);
 		else
 			path = (Path *) create_incremental_sort_path(root,
 														 rel,
 														 path,
 														 root->group_pathkeys,
 														 presorted_keys,
 														 -1.0);
 		path = (Path *)
 			create_gather_merge_path(root,
--- a/src/test/regress/expected/select_parallel.out
+++ b/src/test/regress/expected/select_parallel.out
@ -937,6 +937,59 @@ select string4 from tenk1 order by string4 limit 5;
 reset parallel_leader_participation;
 reset max_parallel_workers;
 create function parallel_safe_volatile(a int) returns int as
  $$ begin return a; end; $$ parallel safe volatile language plpgsql;
 -- Test gather merge atop of a sort of a partial path
 explain (costs off)
 select * from tenk1 where four = 2
 order by four, hundred, parallel_safe_volatile(thousand);
                          QUERY PLAN                           
 ---------------------------------------------------------------
 Gather Merge
   Workers Planned: 4
   ->  Sort
         Sort Key: hundred, (parallel_safe_volatile(thousand))
         ->  Parallel Seq Scan on tenk1
               Filter: (four = 2)
 (6 rows)
 -- Test gather merge atop of an incremental sort a of partial path
 set min_parallel_index_scan_size = 0;
 set enable_seqscan = off;
 explain (costs off)
 select * from tenk1 where four = 2
 order by four, hundred, parallel_safe_volatile(thousand);
                          QUERY PLAN                           
 ---------------------------------------------------------------
 Gather Merge
   Workers Planned: 4
   ->  Incremental Sort
         Sort Key: hundred, (parallel_safe_volatile(thousand))
         Presorted Key: hundred
         ->  Parallel Index Scan using tenk1_hundred on tenk1
               Filter: (four = 2)
 (7 rows)
 reset min_parallel_index_scan_size;
 reset enable_seqscan;
 -- Test GROUP BY with a gather merge path atop of a sort of a partial path
 explain (costs off)
 select count(*) from tenk1
 group by twenty, parallel_safe_volatile(two);
                             QUERY PLAN                             
 --------------------------------------------------------------------
 Finalize GroupAggregate
   Group Key: twenty, (parallel_safe_volatile(two))
   ->  Gather Merge
         Workers Planned: 4
         ->  Sort
               Sort Key: twenty, (parallel_safe_volatile(two))
               ->  Partial HashAggregate
                     Group Key: twenty, parallel_safe_volatile(two)
                     ->  Parallel Seq Scan on tenk1
 (9 rows)
 drop function parallel_safe_volatile(int);
 SAVEPOINT settings;
 SET LOCAL debug_parallel_query = 1;
 explain (costs off)
--- a/src/test/regress/sql/select_parallel.sql
+++ b/src/test/regress/sql/select_parallel.sql
@ -343,6 +343,32 @@ select string4 from tenk1 order by string4 limit 5;
 reset parallel_leader_participation;
 reset max_parallel_workers;
 create function parallel_safe_volatile(a int) returns int as
  $$ begin return a; end; $$ parallel safe volatile language plpgsql;
 -- Test gather merge atop of a sort of a partial path
 explain (costs off)
 select * from tenk1 where four = 2
 order by four, hundred, parallel_safe_volatile(thousand);
 -- Test gather merge atop of an incremental sort a of partial path
 set min_parallel_index_scan_size = 0;
 set enable_seqscan = off;
 explain (costs off)
 select * from tenk1 where four = 2
 order by four, hundred, parallel_safe_volatile(thousand);
 reset min_parallel_index_scan_size;
 reset enable_seqscan;
 -- Test GROUP BY with a gather merge path atop of a sort of a partial path
 explain (costs off)
 select count(*) from tenk1
 group by twenty, parallel_safe_volatile(two);
 drop function parallel_safe_volatile(int);
 SAVEPOINT settings;
 SET LOCAL debug_parallel_query = 1;
 explain (costs off)