1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-07 00:36:50 +03:00

Fix rowcount estimate for gather (merge) paths

In the case of a parallel plan, when computing the number of tuples
processed per worker, we divide the total number of tuples by the
parallel_divisor obtained from get_parallel_divisor(), which accounts
for the leader's contribution in addition to the number of workers.

Accordingly, when estimating the number of tuples for gather (merge)
nodes, we should multiply the number of tuples per worker by the same
parallel_divisor to reverse the division.  However, currently we use
parallel_workers rather than parallel_divisor for the multiplication.
This could result in an underestimation of the number of tuples for
gather (merge) nodes, especially when there are fewer than four
workers.

This patch fixes this issue by using the same parallel_divisor for the
multiplication.  There is one ensuing plan change in the regression
tests, but it looks reasonable and does not compromise its original
purpose of testing parallel-aware hash join.

In passing, this patch removes an unnecessary assignment for path.rows
in create_gather_merge_path, and fixes an uninitialized-variable issue
in generate_useful_gather_paths.

No backpatch as this could result in plan changes.

Author: Anthonin Bonnefoy
Reviewed-by: Rafia Sabih, Richard Guo
Discussion: https://postgr.es/m/CAO6_Xqr9+51NxgO=XospEkUeAg-p=EjAWmtpdcZwjRgGKJ53iA@mail.gmail.com
This commit is contained in:
Richard Guo
2024-07-23 10:33:26 +09:00
parent d2cba4f2cb
commit 581df21487
6 changed files with 33 additions and 20 deletions

View File

@ -3079,8 +3079,7 @@ generate_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_rows)
* of partial_pathlist because of the way add_partial_path works.
*/
cheapest_partial_path = linitial(rel->partial_pathlist);
rows =
cheapest_partial_path->rows * cheapest_partial_path->parallel_workers;
rows = compute_gather_rows(cheapest_partial_path);
simple_gather_path = (Path *)
create_gather_path(root, rel, cheapest_partial_path, rel->reltarget,
NULL, rowsp);
@ -3098,7 +3097,7 @@ generate_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_rows)
if (subpath->pathkeys == NIL)
continue;
rows = subpath->rows * subpath->parallel_workers;
rows = compute_gather_rows(subpath);
path = create_gather_merge_path(root, rel, subpath, rel->reltarget,
subpath->pathkeys, NULL, rowsp);
add_path(rel, &path->path);
@ -3282,7 +3281,6 @@ generate_useful_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_r
subpath,
useful_pathkeys,
-1.0);
rows = subpath->rows * subpath->parallel_workers;
}
else
subpath = (Path *) create_incremental_sort_path(root,
@ -3291,6 +3289,7 @@ generate_useful_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_r
useful_pathkeys,
presorted_keys,
-1);
rows = compute_gather_rows(subpath);
path = create_gather_merge_path(root, rel,
subpath,
rel->reltarget,

View File

@ -6473,3 +6473,21 @@ compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel,
return pages_fetched;
}
/*
* compute_gather_rows
* Estimate number of rows for gather (merge) nodes.
*
* In a parallel plan, each worker's row estimate is determined by dividing the
* total number of rows by parallel_divisor, which accounts for the leader's
* contribution in addition to the number of workers. Accordingly, when
* estimating the number of rows for gather (merge) nodes, we multiply the rows
* per worker by the same parallel_divisor to undo the division.
*/
double
compute_gather_rows(Path *path)
{
Assert(path->parallel_workers > 0);
return clamp_row_est(path->rows * get_parallel_divisor(path));
}

View File

@ -5370,8 +5370,7 @@ create_ordered_paths(PlannerInfo *root,
root->sort_pathkeys,
presorted_keys,
limit_tuples);
total_groups = input_path->rows *
input_path->parallel_workers;
total_groups = compute_gather_rows(sorted_path);
sorted_path = (Path *)
create_gather_merge_path(root, ordered_rel,
sorted_path,
@ -7543,8 +7542,6 @@ gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel)
(presorted_keys == 0 || !enable_incremental_sort))
continue;
total_groups = path->rows * path->parallel_workers;
/*
* We've no need to consider both a sort and incremental sort. We'll
* just do a sort if there are no presorted keys and an incremental
@ -7561,7 +7558,7 @@ gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel)
groupby_pathkeys,
presorted_keys,
-1.0);
total_groups = compute_gather_rows(path);
path = (Path *)
create_gather_merge_path(root,
rel,

View File

@ -1899,7 +1899,6 @@ create_gather_merge_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
pathnode->num_workers = subpath->parallel_workers;
pathnode->path.pathkeys = pathkeys;
pathnode->path.pathtarget = target ? target : rel->reltarget;
pathnode->path.rows += subpath->rows;
if (pathkeys_contained_in(pathkeys, subpath->pathkeys))
{