From c40ba5f318f96a6a5a29729b987ead11c5dc65c1 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 4 May 2022 14:44:40 -0400 Subject: [PATCH] Fix rowcount estimate for SubqueryScan that's under a Gather. SubqueryScan was always getting labeled with a rowcount estimate appropriate for non-parallel cases. However, nodes that are underneath a Gather should be treated as processing only one worker's share of the rows, whether the particular node is explicitly parallel-aware or not. Most non-scan-level node types get this right automatically because they base their rowcount estimate on that of their input sub-Path(s). But SubqueryScan didn't do that, instead using the whole-relation rowcount estimate as if it were a non-parallel-aware scan node. If there is a parallel-aware node below the SubqueryScan, this is wrong, and it results in inflating the cost estimates for nodes above the SubqueryScan, which can cause us to not choose a parallel plan, or choose a silly one --- as indeed is visible in the one regression test whose results change with this patch. (Although that plan tree appears to contain no SubqueryScans, there were some in it before setrefs.c deleted them.) To fix, use path->subpath->rows not baserel->tuples as the number of input tuples we'll process. This requires estimating the quals' selectivity afresh, which is slightly annoying; but it shouldn't really add much cost thanks to the caching done in RestrictInfo. This is pretty clearly a bug fix, but I'll refrain from back-patching as people might not appreciate plan choices changing in stable branches. The fact that it took us this long to identify the bug suggests that it's not a major problem. Per report from bucoo, though this is not his proposed patch. Discussion: https://postgr.es/m/202204121457159307248@sohu.com --- src/backend/optimizer/path/costsize.c | 22 +++++++++++++++---- .../regress/expected/incremental_sort.out | 10 ++++----- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index b787c6f81a8..6673d271c26 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -1395,6 +1395,7 @@ cost_subqueryscan(SubqueryScanPath *path, PlannerInfo *root, { Cost startup_cost; Cost run_cost; + List *qpquals; QualCost qpqual_cost; Cost cpu_per_tuple; @@ -1402,11 +1403,24 @@ cost_subqueryscan(SubqueryScanPath *path, PlannerInfo *root, Assert(baserel->relid > 0); Assert(baserel->rtekind == RTE_SUBQUERY); - /* Mark the path with the correct row estimate */ + /* + * We compute the rowcount estimate as the subplan's estimate times the + * selectivity of relevant restriction clauses. In simple cases this will + * come out the same as baserel->rows; but when dealing with parallelized + * paths we must do it like this to get the right answer. + */ if (param_info) - path->path.rows = param_info->ppi_rows; + qpquals = list_concat_copy(param_info->ppi_clauses, + baserel->baserestrictinfo); else - path->path.rows = baserel->rows; + qpquals = baserel->baserestrictinfo; + + path->path.rows = clamp_row_est(path->subpath->rows * + clauselist_selectivity(root, + qpquals, + 0, + JOIN_INNER, + NULL)); /* * Cost of path is cost of evaluating the subplan, plus cost of evaluating @@ -1421,7 +1435,7 @@ cost_subqueryscan(SubqueryScanPath *path, PlannerInfo *root, startup_cost = qpqual_cost.startup; cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple; - run_cost = cpu_per_tuple * baserel->tuples; + run_cost = cpu_per_tuple * path->subpath->rows; /* tlist eval costs are paid per output row, not per tuple scanned */ startup_cost += path->path.pathtarget->cost.startup; diff --git a/src/test/regress/expected/incremental_sort.out b/src/test/regress/expected/incremental_sort.out index 21c429226f7..0d8d77140a4 100644 --- a/src/test/regress/expected/incremental_sort.out +++ b/src/test/regress/expected/incremental_sort.out @@ -1487,14 +1487,12 @@ explain (costs off) select * from t union select * from t order by 1,3; -> Unique -> Sort Sort Key: t.a, t.b, t.c - -> Append - -> Gather - Workers Planned: 2 + -> Gather + Workers Planned: 2 + -> Parallel Append -> Parallel Seq Scan on t - -> Gather - Workers Planned: 2 -> Parallel Seq Scan on t t_1 -(13 rows) +(11 rows) -- Full sort, not just incremental sort can be pushed below a gather merge path -- by generate_useful_gather_paths.