From 8ee9c250875157d42f7ed53508e239d23ce514fb Mon Sep 17 00:00:00 2001 From: David Rowley Date: Wed, 31 Jan 2024 10:10:59 +1300 Subject: [PATCH] Simplify partial path generation in GROUP BY/ORDER BY Here we consolidate the generation of partial sort and partial incremental sort paths in a similar way to what was done in 4a29eabd1. Since the cost penalty for incremental sort was removed by that commit, there's no point in creating a sort path on the cheapest partial path if an incremental sort could be done instead. This has the added benefit of reducing the amount of code required to build these paths. Author: Richard Guo Reviewed-by: Etsuro Fujita, Shubham Khanna, David Rowley Discussion: https://postgr.es/m/CAMbWs49PaKxBZU9cN7k3DKB7id+YfGfOfS9H_Fo5tkqPMt=fDg@mail.gmail.com --- src/backend/optimizer/plan/planner.c | 197 +++++++----------- src/test/regress/expected/select_parallel.out | 53 +++++ src/test/regress/sql/select_parallel.sql | 26 +++ 3 files changed, 154 insertions(+), 122 deletions(-) diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 2e2458b1284..01fa45b9255 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -5102,8 +5102,9 @@ create_ordered_paths(PlannerInfo *root, * have generated order-preserving Gather Merge plans which can be used * without sorting if they happen to match the sort_pathkeys, and the loop * above will have handled those as well. However, there's one more - * possibility: it may make sense to sort the cheapest partial path - * according to the required output order and then use Gather Merge. + * possibility: it may make sense to sort the cheapest partial path or + * incrementally sort any partial path that is partially sorted according + * to the required output order and then use Gather Merge. */ if (ordered_rel->consider_parallel && root->sort_pathkeys != NIL && input_rel->partial_pathlist != NIL) @@ -5112,97 +5113,65 @@ create_ordered_paths(PlannerInfo *root, cheapest_partial_path = linitial(input_rel->partial_pathlist); - /* - * If cheapest partial path doesn't need a sort, this is redundant - * with what's already been tried. - */ - if (!pathkeys_contained_in(root->sort_pathkeys, - cheapest_partial_path->pathkeys)) + foreach(lc, input_rel->partial_pathlist) { - Path *path; + Path *input_path = (Path *) lfirst(lc); + Path *sorted_path; + bool is_sorted; + int presorted_keys; double total_groups; - path = (Path *) create_sort_path(root, - ordered_rel, - cheapest_partial_path, - root->sort_pathkeys, - limit_tuples); + is_sorted = pathkeys_count_contained_in(root->sort_pathkeys, + input_path->pathkeys, + &presorted_keys); - total_groups = cheapest_partial_path->rows * - cheapest_partial_path->parallel_workers; - path = (Path *) - create_gather_merge_path(root, ordered_rel, - path, - path->pathtarget, - root->sort_pathkeys, NULL, - &total_groups); + if (is_sorted) + continue; - /* Add projection step if needed */ - if (path->pathtarget != target) - path = apply_projection_to_path(root, ordered_rel, - path, target); + /* + * Try at least sorting the cheapest path and also try + * incrementally sorting any path which is partially sorted + * already (no need to deal with paths which have presorted keys + * when incremental sort is disabled unless it's the cheapest + * partial path). + */ + if (input_path != cheapest_partial_path && + (presorted_keys == 0 || !enable_incremental_sort)) + continue; - add_path(ordered_rel, path); - } - - /* - * Consider incremental sort with a gather merge on partial paths. - * - * We can also skip the entire loop when we only have a single-item - * sort_pathkeys because then we can't possibly have a presorted - * prefix of the list without having the list be fully sorted. - */ - if (enable_incremental_sort && list_length(root->sort_pathkeys) > 1) - { - foreach(lc, input_rel->partial_pathlist) - { - Path *input_path = (Path *) lfirst(lc); - Path *sorted_path; - bool is_sorted; - int presorted_keys; - double total_groups; - - /* - * We don't care if this is the cheapest partial path - we - * can't simply skip it, because it may be partially sorted in - * which case we want to consider adding incremental sort - * (instead of full sort, which is what happens above). - */ - - is_sorted = pathkeys_count_contained_in(root->sort_pathkeys, - input_path->pathkeys, - &presorted_keys); - - /* No point in adding incremental sort on fully sorted paths. */ - if (is_sorted) - continue; - - if (presorted_keys == 0) - continue; - - /* Since we have presorted keys, consider incremental sort. */ + /* + * We've no need to consider both a sort and incremental sort. + * We'll just do a sort if there are no presorted keys and an + * incremental sort when there are presorted keys. + */ + if (presorted_keys == 0 || !enable_incremental_sort) + sorted_path = (Path *) create_sort_path(root, + ordered_rel, + input_path, + root->sort_pathkeys, + limit_tuples); + else sorted_path = (Path *) create_incremental_sort_path(root, ordered_rel, input_path, root->sort_pathkeys, presorted_keys, limit_tuples); - total_groups = input_path->rows * - input_path->parallel_workers; - sorted_path = (Path *) - create_gather_merge_path(root, ordered_rel, - sorted_path, - sorted_path->pathtarget, - root->sort_pathkeys, NULL, - &total_groups); + total_groups = input_path->rows * + input_path->parallel_workers; + sorted_path = (Path *) + create_gather_merge_path(root, ordered_rel, + sorted_path, + sorted_path->pathtarget, + root->sort_pathkeys, NULL, + &total_groups); - /* Add projection step if needed */ - if (sorted_path->pathtarget != target) - sorted_path = apply_projection_to_path(root, ordered_rel, - sorted_path, target); + /* Add projection step if needed */ + if (sorted_path->pathtarget != target) + sorted_path = apply_projection_to_path(root, ordered_rel, + sorted_path, target); - add_path(ordered_rel, sorted_path); - } + add_path(ordered_rel, sorted_path); } } @@ -7322,44 +7291,9 @@ gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel) /* Try Gather for unordered paths and Gather Merge for ordered ones. */ generate_useful_gather_paths(root, rel, true); - /* Try cheapest partial path + explicit Sort + Gather Merge. */ cheapest_partial_path = linitial(rel->partial_pathlist); - if (!pathkeys_contained_in(root->group_pathkeys, - cheapest_partial_path->pathkeys)) - { - Path *path; - double total_groups; - total_groups = - cheapest_partial_path->rows * cheapest_partial_path->parallel_workers; - path = (Path *) create_sort_path(root, rel, cheapest_partial_path, - root->group_pathkeys, - -1.0); - path = (Path *) - create_gather_merge_path(root, - rel, - path, - rel->reltarget, - root->group_pathkeys, - NULL, - &total_groups); - - add_path(rel, path); - } - - /* - * Consider incremental sort on all partial paths, if enabled. - * - * We can also skip the entire loop when we only have a single-item - * group_pathkeys because then we can't possibly have a presorted prefix - * of the list without having the list be fully sorted. - * - * XXX Shouldn't this also consider the group-key-reordering? - */ - if (!enable_incremental_sort || list_length(root->group_pathkeys) == 1) - return; - - /* also consider incremental sort on partial paths, if enabled */ + /* XXX Shouldn't this also consider the group-key-reordering? */ foreach(lc, rel->partial_pathlist) { Path *path = (Path *) lfirst(lc); @@ -7374,15 +7308,34 @@ gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel) if (is_sorted) continue; - if (presorted_keys == 0) + /* + * Try at least sorting the cheapest path and also try incrementally + * sorting any path which is partially sorted already (no need to deal + * with paths which have presorted keys when incremental sort is + * disabled unless it's the cheapest input path). + */ + if (path != cheapest_partial_path && + (presorted_keys == 0 || !enable_incremental_sort)) continue; - path = (Path *) create_incremental_sort_path(root, - rel, - path, - root->group_pathkeys, - presorted_keys, - -1.0); + total_groups = path->rows * path->parallel_workers; + + /* + * We've no need to consider both a sort and incremental sort. We'll + * just do a sort if there are no presorted keys and an incremental + * sort when there are presorted keys. + */ + if (presorted_keys == 0 || !enable_incremental_sort) + path = (Path *) create_sort_path(root, rel, path, + root->group_pathkeys, + -1.0); + else + path = (Path *) create_incremental_sort_path(root, + rel, + path, + root->group_pathkeys, + presorted_keys, + -1.0); path = (Path *) create_gather_merge_path(root, diff --git a/src/test/regress/expected/select_parallel.out b/src/test/regress/expected/select_parallel.out index d88353d496c..7a0d78dfe30 100644 --- a/src/test/regress/expected/select_parallel.out +++ b/src/test/regress/expected/select_parallel.out @@ -937,6 +937,59 @@ select string4 from tenk1 order by string4 limit 5; reset parallel_leader_participation; reset max_parallel_workers; +create function parallel_safe_volatile(a int) returns int as + $$ begin return a; end; $$ parallel safe volatile language plpgsql; +-- Test gather merge atop of a sort of a partial path +explain (costs off) +select * from tenk1 where four = 2 +order by four, hundred, parallel_safe_volatile(thousand); + QUERY PLAN +--------------------------------------------------------------- + Gather Merge + Workers Planned: 4 + -> Sort + Sort Key: hundred, (parallel_safe_volatile(thousand)) + -> Parallel Seq Scan on tenk1 + Filter: (four = 2) +(6 rows) + +-- Test gather merge atop of an incremental sort a of partial path +set min_parallel_index_scan_size = 0; +set enable_seqscan = off; +explain (costs off) +select * from tenk1 where four = 2 +order by four, hundred, parallel_safe_volatile(thousand); + QUERY PLAN +--------------------------------------------------------------- + Gather Merge + Workers Planned: 4 + -> Incremental Sort + Sort Key: hundred, (parallel_safe_volatile(thousand)) + Presorted Key: hundred + -> Parallel Index Scan using tenk1_hundred on tenk1 + Filter: (four = 2) +(7 rows) + +reset min_parallel_index_scan_size; +reset enable_seqscan; +-- Test GROUP BY with a gather merge path atop of a sort of a partial path +explain (costs off) +select count(*) from tenk1 +group by twenty, parallel_safe_volatile(two); + QUERY PLAN +-------------------------------------------------------------------- + Finalize GroupAggregate + Group Key: twenty, (parallel_safe_volatile(two)) + -> Gather Merge + Workers Planned: 4 + -> Sort + Sort Key: twenty, (parallel_safe_volatile(two)) + -> Partial HashAggregate + Group Key: twenty, parallel_safe_volatile(two) + -> Parallel Seq Scan on tenk1 +(9 rows) + +drop function parallel_safe_volatile(int); SAVEPOINT settings; SET LOCAL debug_parallel_query = 1; explain (costs off) diff --git a/src/test/regress/sql/select_parallel.sql b/src/test/regress/sql/select_parallel.sql index 80c914dc02b..c43a5b21191 100644 --- a/src/test/regress/sql/select_parallel.sql +++ b/src/test/regress/sql/select_parallel.sql @@ -343,6 +343,32 @@ select string4 from tenk1 order by string4 limit 5; reset parallel_leader_participation; reset max_parallel_workers; +create function parallel_safe_volatile(a int) returns int as + $$ begin return a; end; $$ parallel safe volatile language plpgsql; + +-- Test gather merge atop of a sort of a partial path +explain (costs off) +select * from tenk1 where four = 2 +order by four, hundred, parallel_safe_volatile(thousand); + +-- Test gather merge atop of an incremental sort a of partial path +set min_parallel_index_scan_size = 0; +set enable_seqscan = off; + +explain (costs off) +select * from tenk1 where four = 2 +order by four, hundred, parallel_safe_volatile(thousand); + +reset min_parallel_index_scan_size; +reset enable_seqscan; + +-- Test GROUP BY with a gather merge path atop of a sort of a partial path +explain (costs off) +select count(*) from tenk1 +group by twenty, parallel_safe_volatile(two); + +drop function parallel_safe_volatile(int); + SAVEPOINT settings; SET LOCAL debug_parallel_query = 1; explain (costs off)