diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index aa78c0af0cd..057b4b79ebb 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -3079,8 +3079,7 @@ generate_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_rows) * of partial_pathlist because of the way add_partial_path works. */ cheapest_partial_path = linitial(rel->partial_pathlist); - rows = - cheapest_partial_path->rows * cheapest_partial_path->parallel_workers; + rows = compute_gather_rows(cheapest_partial_path); simple_gather_path = (Path *) create_gather_path(root, rel, cheapest_partial_path, rel->reltarget, NULL, rowsp); @@ -3098,7 +3097,7 @@ generate_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_rows) if (subpath->pathkeys == NIL) continue; - rows = subpath->rows * subpath->parallel_workers; + rows = compute_gather_rows(subpath); path = create_gather_merge_path(root, rel, subpath, rel->reltarget, subpath->pathkeys, NULL, rowsp); add_path(rel, &path->path); @@ -3282,7 +3281,6 @@ generate_useful_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_r subpath, useful_pathkeys, -1.0); - rows = subpath->rows * subpath->parallel_workers; } else subpath = (Path *) create_incremental_sort_path(root, @@ -3291,6 +3289,7 @@ generate_useful_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_r useful_pathkeys, presorted_keys, -1); + rows = compute_gather_rows(subpath); path = create_gather_merge_path(root, rel, subpath, rel->reltarget, diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 2021c481b46..79991b19807 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -6473,3 +6473,21 @@ compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, return pages_fetched; } + +/* + * compute_gather_rows + * Estimate number of rows for gather (merge) nodes. + * + * In a parallel plan, each worker's row estimate is determined by dividing the + * total number of rows by parallel_divisor, which accounts for the leader's + * contribution in addition to the number of workers. Accordingly, when + * estimating the number of rows for gather (merge) nodes, we multiply the rows + * per worker by the same parallel_divisor to undo the division. + */ +double +compute_gather_rows(Path *path) +{ + Assert(path->parallel_workers > 0); + + return clamp_row_est(path->rows * get_parallel_divisor(path)); +} diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 4711f912390..948afd90948 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -5370,8 +5370,7 @@ create_ordered_paths(PlannerInfo *root, root->sort_pathkeys, presorted_keys, limit_tuples); - total_groups = input_path->rows * - input_path->parallel_workers; + total_groups = compute_gather_rows(sorted_path); sorted_path = (Path *) create_gather_merge_path(root, ordered_rel, sorted_path, @@ -7543,8 +7542,6 @@ gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel) (presorted_keys == 0 || !enable_incremental_sort)) continue; - total_groups = path->rows * path->parallel_workers; - /* * We've no need to consider both a sort and incremental sort. We'll * just do a sort if there are no presorted keys and an incremental @@ -7561,7 +7558,7 @@ gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel) groupby_pathkeys, presorted_keys, -1.0); - + total_groups = compute_gather_rows(path); path = (Path *) create_gather_merge_path(root, rel, diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index c42742d2c7b..d1c4e1a6aa7 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -1899,7 +1899,6 @@ create_gather_merge_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, pathnode->num_workers = subpath->parallel_workers; pathnode->path.pathkeys = pathkeys; pathnode->path.pathtarget = target ? target : rel->reltarget; - pathnode->path.rows += subpath->rows; if (pathkeys_contained_in(pathkeys, subpath->pathkeys)) { diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h index b1c51a4e70f..57861bfb446 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -212,5 +212,6 @@ extern PathTarget *set_pathtarget_cost_width(PlannerInfo *root, PathTarget *targ extern double compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, Path *bitmapqual, double loop_count, Cost *cost_p, double *tuples_p); +extern double compute_gather_rows(Path *path); #endif /* COST_H */ diff --git a/src/test/regress/expected/join_hash.out b/src/test/regress/expected/join_hash.out index 262fa71ed8d..4fc34a0e72a 100644 --- a/src/test/regress/expected/join_hash.out +++ b/src/test/regress/expected/join_hash.out @@ -508,18 +508,17 @@ set local hash_mem_multiplier = 1.0; set local enable_parallel_hash = on; explain (costs off) select count(*) from simple r join extremely_skewed s using (id); - QUERY PLAN ------------------------------------------------------------------------ - Finalize Aggregate + QUERY PLAN +----------------------------------------------------------------- + Aggregate -> Gather Workers Planned: 1 - -> Partial Aggregate - -> Parallel Hash Join - Hash Cond: (r.id = s.id) - -> Parallel Seq Scan on simple r - -> Parallel Hash - -> Parallel Seq Scan on extremely_skewed s -(9 rows) + -> Parallel Hash Join + Hash Cond: (r.id = s.id) + -> Parallel Seq Scan on simple r + -> Parallel Hash + -> Parallel Seq Scan on extremely_skewed s +(8 rows) select count(*) from simple r join extremely_skewed s using (id); count