diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 01fa45b9255..342f5ad8d0a 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -4737,11 +4737,45 @@ create_partial_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel, -1.0); } - add_partial_path(partial_distinct_rel, (Path *) - create_upper_unique_path(root, partial_distinct_rel, - sorted_path, - list_length(root->distinct_pathkeys), - numDistinctRows)); + /* + * An empty distinct_pathkeys means all tuples have the same value + * for the DISTINCT clause. See create_final_distinct_paths() + */ + if (root->distinct_pathkeys == NIL) + { + Node *limitCount; + + limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid, + sizeof(int64), + Int64GetDatum(1), false, + FLOAT8PASSBYVAL); + + /* + * Apply a LimitPath onto the partial path to restrict the + * tuples from each worker to 1. create_final_distinct_paths + * will need to apply an additional LimitPath to restrict this + * to a single row after the Gather node. If the query + * already has a LIMIT clause, then we could end up with three + * Limit nodes in the final plan. Consolidating the top two + * of these could be done, but does not seem worth troubling + * over. + */ + add_partial_path(partial_distinct_rel, (Path *) + create_limit_path(root, partial_distinct_rel, + sorted_path, + NULL, + limitCount, + LIMIT_OPTION_COUNT, + 0, 1)); + } + else + { + add_partial_path(partial_distinct_rel, (Path *) + create_upper_unique_path(root, partial_distinct_rel, + sorted_path, + list_length(root->distinct_pathkeys), + numDistinctRows)); + } } } diff --git a/src/test/regress/expected/select_distinct.out b/src/test/regress/expected/select_distinct.out index 9d44ea8056d..1f72756ccb4 100644 --- a/src/test/regress/expected/select_distinct.out +++ b/src/test/regress/expected/select_distinct.out @@ -348,6 +348,26 @@ SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0; 0 | 1 | 2 | 3 (1 row) +SET parallel_setup_cost=0; +SET min_parallel_table_scan_size=0; +SET max_parallel_workers_per_gather=2; +-- Ensure we get a plan with a Limit 1 in both partial distinct and final +-- distinct +EXPLAIN (COSTS OFF) +SELECT DISTINCT four FROM tenk1 WHERE four = 10; + QUERY PLAN +---------------------------------------------- + Limit + -> Gather + Workers Planned: 2 + -> Limit + -> Parallel Seq Scan on tenk1 + Filter: (four = 10) +(6 rows) + +RESET max_parallel_workers_per_gather; +RESET min_parallel_table_scan_size; +RESET parallel_setup_cost; -- -- Also, some tests of IS DISTINCT FROM, which doesn't quite deserve its -- very own regression file. diff --git a/src/test/regress/sql/select_distinct.sql b/src/test/regress/sql/select_distinct.sql index 1643526d991..da92c197aba 100644 --- a/src/test/regress/sql/select_distinct.sql +++ b/src/test/regress/sql/select_distinct.sql @@ -180,6 +180,19 @@ SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0; -- Ensure we only get 1 row SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0; +SET parallel_setup_cost=0; +SET min_parallel_table_scan_size=0; +SET max_parallel_workers_per_gather=2; + +-- Ensure we get a plan with a Limit 1 in both partial distinct and final +-- distinct +EXPLAIN (COSTS OFF) +SELECT DISTINCT four FROM tenk1 WHERE four = 10; + +RESET max_parallel_workers_per_gather; +RESET min_parallel_table_scan_size; +RESET parallel_setup_cost; + -- -- Also, some tests of IS DISTINCT FROM, which doesn't quite deserve its -- very own regression file.