1
0
mirror of https://github.com/postgres/postgres.git synced 2025-04-22 23:02:54 +03:00

Simplify partial path generation in GROUP BY/ORDER BY

Here we consolidate the generation of partial sort and partial incremental
sort paths in a similar way to what was done in 4a29eabd1.  Since the cost
penalty for incremental sort was removed by that commit, there's no
point in creating a sort path on the cheapest partial path if an
incremental sort could be done instead.

This has the added benefit of reducing the amount of code required to
build these paths.

Author: Richard Guo
Reviewed-by: Etsuro Fujita, Shubham Khanna, David Rowley
Discussion: https://postgr.es/m/CAMbWs49PaKxBZU9cN7k3DKB7id+YfGfOfS9H_Fo5tkqPMt=fDg@mail.gmail.com
This commit is contained in:
David Rowley 2024-01-31 10:10:59 +13:00
parent 7b745d85b8
commit 8ee9c25087
3 changed files with 154 additions and 122 deletions

View File

@ -5102,8 +5102,9 @@ create_ordered_paths(PlannerInfo *root,
* have generated order-preserving Gather Merge plans which can be used * have generated order-preserving Gather Merge plans which can be used
* without sorting if they happen to match the sort_pathkeys, and the loop * without sorting if they happen to match the sort_pathkeys, and the loop
* above will have handled those as well. However, there's one more * above will have handled those as well. However, there's one more
* possibility: it may make sense to sort the cheapest partial path * possibility: it may make sense to sort the cheapest partial path or
* according to the required output order and then use Gather Merge. * incrementally sort any partial path that is partially sorted according
* to the required output order and then use Gather Merge.
*/ */
if (ordered_rel->consider_parallel && root->sort_pathkeys != NIL && if (ordered_rel->consider_parallel && root->sort_pathkeys != NIL &&
input_rel->partial_pathlist != NIL) input_rel->partial_pathlist != NIL)
@ -5112,97 +5113,65 @@ create_ordered_paths(PlannerInfo *root,
cheapest_partial_path = linitial(input_rel->partial_pathlist); cheapest_partial_path = linitial(input_rel->partial_pathlist);
/* foreach(lc, input_rel->partial_pathlist)
* If cheapest partial path doesn't need a sort, this is redundant
* with what's already been tried.
*/
if (!pathkeys_contained_in(root->sort_pathkeys,
cheapest_partial_path->pathkeys))
{ {
Path *path; Path *input_path = (Path *) lfirst(lc);
Path *sorted_path;
bool is_sorted;
int presorted_keys;
double total_groups; double total_groups;
path = (Path *) create_sort_path(root, is_sorted = pathkeys_count_contained_in(root->sort_pathkeys,
ordered_rel, input_path->pathkeys,
cheapest_partial_path, &presorted_keys);
root->sort_pathkeys,
limit_tuples);
total_groups = cheapest_partial_path->rows * if (is_sorted)
cheapest_partial_path->parallel_workers; continue;
path = (Path *)
create_gather_merge_path(root, ordered_rel,
path,
path->pathtarget,
root->sort_pathkeys, NULL,
&total_groups);
/* Add projection step if needed */ /*
if (path->pathtarget != target) * Try at least sorting the cheapest path and also try
path = apply_projection_to_path(root, ordered_rel, * incrementally sorting any path which is partially sorted
path, target); * already (no need to deal with paths which have presorted keys
* when incremental sort is disabled unless it's the cheapest
* partial path).
*/
if (input_path != cheapest_partial_path &&
(presorted_keys == 0 || !enable_incremental_sort))
continue;
add_path(ordered_rel, path); /*
} * We've no need to consider both a sort and incremental sort.
* We'll just do a sort if there are no presorted keys and an
/* * incremental sort when there are presorted keys.
* Consider incremental sort with a gather merge on partial paths. */
* if (presorted_keys == 0 || !enable_incremental_sort)
* We can also skip the entire loop when we only have a single-item sorted_path = (Path *) create_sort_path(root,
* sort_pathkeys because then we can't possibly have a presorted ordered_rel,
* prefix of the list without having the list be fully sorted. input_path,
*/ root->sort_pathkeys,
if (enable_incremental_sort && list_length(root->sort_pathkeys) > 1) limit_tuples);
{ else
foreach(lc, input_rel->partial_pathlist)
{
Path *input_path = (Path *) lfirst(lc);
Path *sorted_path;
bool is_sorted;
int presorted_keys;
double total_groups;
/*
* We don't care if this is the cheapest partial path - we
* can't simply skip it, because it may be partially sorted in
* which case we want to consider adding incremental sort
* (instead of full sort, which is what happens above).
*/
is_sorted = pathkeys_count_contained_in(root->sort_pathkeys,
input_path->pathkeys,
&presorted_keys);
/* No point in adding incremental sort on fully sorted paths. */
if (is_sorted)
continue;
if (presorted_keys == 0)
continue;
/* Since we have presorted keys, consider incremental sort. */
sorted_path = (Path *) create_incremental_sort_path(root, sorted_path = (Path *) create_incremental_sort_path(root,
ordered_rel, ordered_rel,
input_path, input_path,
root->sort_pathkeys, root->sort_pathkeys,
presorted_keys, presorted_keys,
limit_tuples); limit_tuples);
total_groups = input_path->rows * total_groups = input_path->rows *
input_path->parallel_workers; input_path->parallel_workers;
sorted_path = (Path *) sorted_path = (Path *)
create_gather_merge_path(root, ordered_rel, create_gather_merge_path(root, ordered_rel,
sorted_path, sorted_path,
sorted_path->pathtarget, sorted_path->pathtarget,
root->sort_pathkeys, NULL, root->sort_pathkeys, NULL,
&total_groups); &total_groups);
/* Add projection step if needed */ /* Add projection step if needed */
if (sorted_path->pathtarget != target) if (sorted_path->pathtarget != target)
sorted_path = apply_projection_to_path(root, ordered_rel, sorted_path = apply_projection_to_path(root, ordered_rel,
sorted_path, target); sorted_path, target);
add_path(ordered_rel, sorted_path); add_path(ordered_rel, sorted_path);
}
} }
} }
@ -7322,44 +7291,9 @@ gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel)
/* Try Gather for unordered paths and Gather Merge for ordered ones. */ /* Try Gather for unordered paths and Gather Merge for ordered ones. */
generate_useful_gather_paths(root, rel, true); generate_useful_gather_paths(root, rel, true);
/* Try cheapest partial path + explicit Sort + Gather Merge. */
cheapest_partial_path = linitial(rel->partial_pathlist); cheapest_partial_path = linitial(rel->partial_pathlist);
if (!pathkeys_contained_in(root->group_pathkeys,
cheapest_partial_path->pathkeys))
{
Path *path;
double total_groups;
total_groups = /* XXX Shouldn't this also consider the group-key-reordering? */
cheapest_partial_path->rows * cheapest_partial_path->parallel_workers;
path = (Path *) create_sort_path(root, rel, cheapest_partial_path,
root->group_pathkeys,
-1.0);
path = (Path *)
create_gather_merge_path(root,
rel,
path,
rel->reltarget,
root->group_pathkeys,
NULL,
&total_groups);
add_path(rel, path);
}
/*
* Consider incremental sort on all partial paths, if enabled.
*
* We can also skip the entire loop when we only have a single-item
* group_pathkeys because then we can't possibly have a presorted prefix
* of the list without having the list be fully sorted.
*
* XXX Shouldn't this also consider the group-key-reordering?
*/
if (!enable_incremental_sort || list_length(root->group_pathkeys) == 1)
return;
/* also consider incremental sort on partial paths, if enabled */
foreach(lc, rel->partial_pathlist) foreach(lc, rel->partial_pathlist)
{ {
Path *path = (Path *) lfirst(lc); Path *path = (Path *) lfirst(lc);
@ -7374,15 +7308,34 @@ gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel)
if (is_sorted) if (is_sorted)
continue; continue;
if (presorted_keys == 0) /*
* Try at least sorting the cheapest path and also try incrementally
* sorting any path which is partially sorted already (no need to deal
* with paths which have presorted keys when incremental sort is
* disabled unless it's the cheapest input path).
*/
if (path != cheapest_partial_path &&
(presorted_keys == 0 || !enable_incremental_sort))
continue; continue;
path = (Path *) create_incremental_sort_path(root, total_groups = path->rows * path->parallel_workers;
rel,
path, /*
root->group_pathkeys, * We've no need to consider both a sort and incremental sort. We'll
presorted_keys, * just do a sort if there are no presorted keys and an incremental
-1.0); * sort when there are presorted keys.
*/
if (presorted_keys == 0 || !enable_incremental_sort)
path = (Path *) create_sort_path(root, rel, path,
root->group_pathkeys,
-1.0);
else
path = (Path *) create_incremental_sort_path(root,
rel,
path,
root->group_pathkeys,
presorted_keys,
-1.0);
path = (Path *) path = (Path *)
create_gather_merge_path(root, create_gather_merge_path(root,

View File

@ -937,6 +937,59 @@ select string4 from tenk1 order by string4 limit 5;
reset parallel_leader_participation; reset parallel_leader_participation;
reset max_parallel_workers; reset max_parallel_workers;
create function parallel_safe_volatile(a int) returns int as
$$ begin return a; end; $$ parallel safe volatile language plpgsql;
-- Test gather merge atop of a sort of a partial path
explain (costs off)
select * from tenk1 where four = 2
order by four, hundred, parallel_safe_volatile(thousand);
QUERY PLAN
---------------------------------------------------------------
Gather Merge
Workers Planned: 4
-> Sort
Sort Key: hundred, (parallel_safe_volatile(thousand))
-> Parallel Seq Scan on tenk1
Filter: (four = 2)
(6 rows)
-- Test gather merge atop of an incremental sort a of partial path
set min_parallel_index_scan_size = 0;
set enable_seqscan = off;
explain (costs off)
select * from tenk1 where four = 2
order by four, hundred, parallel_safe_volatile(thousand);
QUERY PLAN
---------------------------------------------------------------
Gather Merge
Workers Planned: 4
-> Incremental Sort
Sort Key: hundred, (parallel_safe_volatile(thousand))
Presorted Key: hundred
-> Parallel Index Scan using tenk1_hundred on tenk1
Filter: (four = 2)
(7 rows)
reset min_parallel_index_scan_size;
reset enable_seqscan;
-- Test GROUP BY with a gather merge path atop of a sort of a partial path
explain (costs off)
select count(*) from tenk1
group by twenty, parallel_safe_volatile(two);
QUERY PLAN
--------------------------------------------------------------------
Finalize GroupAggregate
Group Key: twenty, (parallel_safe_volatile(two))
-> Gather Merge
Workers Planned: 4
-> Sort
Sort Key: twenty, (parallel_safe_volatile(two))
-> Partial HashAggregate
Group Key: twenty, parallel_safe_volatile(two)
-> Parallel Seq Scan on tenk1
(9 rows)
drop function parallel_safe_volatile(int);
SAVEPOINT settings; SAVEPOINT settings;
SET LOCAL debug_parallel_query = 1; SET LOCAL debug_parallel_query = 1;
explain (costs off) explain (costs off)

View File

@ -343,6 +343,32 @@ select string4 from tenk1 order by string4 limit 5;
reset parallel_leader_participation; reset parallel_leader_participation;
reset max_parallel_workers; reset max_parallel_workers;
create function parallel_safe_volatile(a int) returns int as
$$ begin return a; end; $$ parallel safe volatile language plpgsql;
-- Test gather merge atop of a sort of a partial path
explain (costs off)
select * from tenk1 where four = 2
order by four, hundred, parallel_safe_volatile(thousand);
-- Test gather merge atop of an incremental sort a of partial path
set min_parallel_index_scan_size = 0;
set enable_seqscan = off;
explain (costs off)
select * from tenk1 where four = 2
order by four, hundred, parallel_safe_volatile(thousand);
reset min_parallel_index_scan_size;
reset enable_seqscan;
-- Test GROUP BY with a gather merge path atop of a sort of a partial path
explain (costs off)
select count(*) from tenk1
group by twenty, parallel_safe_volatile(two);
drop function parallel_safe_volatile(int);
SAVEPOINT settings; SAVEPOINT settings;
SET LOCAL debug_parallel_query = 1; SET LOCAL debug_parallel_query = 1;
explain (costs off) explain (costs off)