1
0
mirror of https://github.com/postgres/postgres.git synced 2025-04-22 23:02:54 +03:00

Consider the "LIMIT 1" optimization with parallel DISTINCT

Similar to what was done in 5543677ec for non-parallel DISTINCT, apply
the same optimization when the distinct_pathkeys are empty for the
partial paths too.

This can be faster than the non-parallel version when the first row
matching the WHERE clause of the query takes a while to find.  Parallel
workers could speed that process up considerably.

Author: Richard Guo
Reviewed-by: David Rowley
Discussion: https://postgr.es/m/CAMbWs49JC0qvfUbzs-TVzgMpSSBiMJ_6sN=BaA9iohBgYkr=LA@mail.gmail.com
This commit is contained in:
David Rowley 2024-01-31 17:22:02 +13:00
parent 3e91dba8b0
commit b588cad688
3 changed files with 72 additions and 5 deletions

View File

@ -4737,6 +4737,39 @@ create_partial_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
-1.0); -1.0);
} }
/*
* An empty distinct_pathkeys means all tuples have the same value
* for the DISTINCT clause. See create_final_distinct_paths()
*/
if (root->distinct_pathkeys == NIL)
{
Node *limitCount;
limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,
sizeof(int64),
Int64GetDatum(1), false,
FLOAT8PASSBYVAL);
/*
* Apply a LimitPath onto the partial path to restrict the
* tuples from each worker to 1. create_final_distinct_paths
* will need to apply an additional LimitPath to restrict this
* to a single row after the Gather node. If the query
* already has a LIMIT clause, then we could end up with three
* Limit nodes in the final plan. Consolidating the top two
* of these could be done, but does not seem worth troubling
* over.
*/
add_partial_path(partial_distinct_rel, (Path *)
create_limit_path(root, partial_distinct_rel,
sorted_path,
NULL,
limitCount,
LIMIT_OPTION_COUNT,
0, 1));
}
else
{
add_partial_path(partial_distinct_rel, (Path *) add_partial_path(partial_distinct_rel, (Path *)
create_upper_unique_path(root, partial_distinct_rel, create_upper_unique_path(root, partial_distinct_rel,
sorted_path, sorted_path,
@ -4744,6 +4777,7 @@ create_partial_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
numDistinctRows)); numDistinctRows));
} }
} }
}
/* /*
* Now try hash aggregate paths, if enabled and hashing is possible. Since * Now try hash aggregate paths, if enabled and hashing is possible. Since

View File

@ -348,6 +348,26 @@ SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0;
0 | 1 | 2 | 3 0 | 1 | 2 | 3
(1 row) (1 row)
SET parallel_setup_cost=0;
SET min_parallel_table_scan_size=0;
SET max_parallel_workers_per_gather=2;
-- Ensure we get a plan with a Limit 1 in both partial distinct and final
-- distinct
EXPLAIN (COSTS OFF)
SELECT DISTINCT four FROM tenk1 WHERE four = 10;
QUERY PLAN
----------------------------------------------
Limit
-> Gather
Workers Planned: 2
-> Limit
-> Parallel Seq Scan on tenk1
Filter: (four = 10)
(6 rows)
RESET max_parallel_workers_per_gather;
RESET min_parallel_table_scan_size;
RESET parallel_setup_cost;
-- --
-- Also, some tests of IS DISTINCT FROM, which doesn't quite deserve its -- Also, some tests of IS DISTINCT FROM, which doesn't quite deserve its
-- very own regression file. -- very own regression file.

View File

@ -180,6 +180,19 @@ SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0;
-- Ensure we only get 1 row -- Ensure we only get 1 row
SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0; SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0;
SET parallel_setup_cost=0;
SET min_parallel_table_scan_size=0;
SET max_parallel_workers_per_gather=2;
-- Ensure we get a plan with a Limit 1 in both partial distinct and final
-- distinct
EXPLAIN (COSTS OFF)
SELECT DISTINCT four FROM tenk1 WHERE four = 10;
RESET max_parallel_workers_per_gather;
RESET min_parallel_table_scan_size;
RESET parallel_setup_cost;
-- --
-- Also, some tests of IS DISTINCT FROM, which doesn't quite deserve its -- Also, some tests of IS DISTINCT FROM, which doesn't quite deserve its
-- very own regression file. -- very own regression file.