mirror of
https://github.com/postgres/postgres.git
synced 2025-04-22 23:02:54 +03:00
Consider the "LIMIT 1" optimization with parallel DISTINCT
Similar to what was done in 5543677ec for non-parallel DISTINCT, apply the same optimization when the distinct_pathkeys are empty for the partial paths too. This can be faster than the non-parallel version when the first row matching the WHERE clause of the query takes a while to find. Parallel workers could speed that process up considerably. Author: Richard Guo Reviewed-by: David Rowley Discussion: https://postgr.es/m/CAMbWs49JC0qvfUbzs-TVzgMpSSBiMJ_6sN=BaA9iohBgYkr=LA@mail.gmail.com
This commit is contained in:
parent
3e91dba8b0
commit
b588cad688
@ -4737,6 +4737,39 @@ create_partial_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
|
|||||||
-1.0);
|
-1.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* An empty distinct_pathkeys means all tuples have the same value
|
||||||
|
* for the DISTINCT clause. See create_final_distinct_paths()
|
||||||
|
*/
|
||||||
|
if (root->distinct_pathkeys == NIL)
|
||||||
|
{
|
||||||
|
Node *limitCount;
|
||||||
|
|
||||||
|
limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,
|
||||||
|
sizeof(int64),
|
||||||
|
Int64GetDatum(1), false,
|
||||||
|
FLOAT8PASSBYVAL);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Apply a LimitPath onto the partial path to restrict the
|
||||||
|
* tuples from each worker to 1. create_final_distinct_paths
|
||||||
|
* will need to apply an additional LimitPath to restrict this
|
||||||
|
* to a single row after the Gather node. If the query
|
||||||
|
* already has a LIMIT clause, then we could end up with three
|
||||||
|
* Limit nodes in the final plan. Consolidating the top two
|
||||||
|
* of these could be done, but does not seem worth troubling
|
||||||
|
* over.
|
||||||
|
*/
|
||||||
|
add_partial_path(partial_distinct_rel, (Path *)
|
||||||
|
create_limit_path(root, partial_distinct_rel,
|
||||||
|
sorted_path,
|
||||||
|
NULL,
|
||||||
|
limitCount,
|
||||||
|
LIMIT_OPTION_COUNT,
|
||||||
|
0, 1));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
add_partial_path(partial_distinct_rel, (Path *)
|
add_partial_path(partial_distinct_rel, (Path *)
|
||||||
create_upper_unique_path(root, partial_distinct_rel,
|
create_upper_unique_path(root, partial_distinct_rel,
|
||||||
sorted_path,
|
sorted_path,
|
||||||
@ -4744,6 +4777,7 @@ create_partial_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
|
|||||||
numDistinctRows));
|
numDistinctRows));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Now try hash aggregate paths, if enabled and hashing is possible. Since
|
* Now try hash aggregate paths, if enabled and hashing is possible. Since
|
||||||
|
@ -348,6 +348,26 @@ SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0;
|
|||||||
0 | 1 | 2 | 3
|
0 | 1 | 2 | 3
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
|
SET parallel_setup_cost=0;
|
||||||
|
SET min_parallel_table_scan_size=0;
|
||||||
|
SET max_parallel_workers_per_gather=2;
|
||||||
|
-- Ensure we get a plan with a Limit 1 in both partial distinct and final
|
||||||
|
-- distinct
|
||||||
|
EXPLAIN (COSTS OFF)
|
||||||
|
SELECT DISTINCT four FROM tenk1 WHERE four = 10;
|
||||||
|
QUERY PLAN
|
||||||
|
----------------------------------------------
|
||||||
|
Limit
|
||||||
|
-> Gather
|
||||||
|
Workers Planned: 2
|
||||||
|
-> Limit
|
||||||
|
-> Parallel Seq Scan on tenk1
|
||||||
|
Filter: (four = 10)
|
||||||
|
(6 rows)
|
||||||
|
|
||||||
|
RESET max_parallel_workers_per_gather;
|
||||||
|
RESET min_parallel_table_scan_size;
|
||||||
|
RESET parallel_setup_cost;
|
||||||
--
|
--
|
||||||
-- Also, some tests of IS DISTINCT FROM, which doesn't quite deserve its
|
-- Also, some tests of IS DISTINCT FROM, which doesn't quite deserve its
|
||||||
-- very own regression file.
|
-- very own regression file.
|
||||||
|
@ -180,6 +180,19 @@ SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0;
|
|||||||
-- Ensure we only get 1 row
|
-- Ensure we only get 1 row
|
||||||
SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0;
|
SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0;
|
||||||
|
|
||||||
|
SET parallel_setup_cost=0;
|
||||||
|
SET min_parallel_table_scan_size=0;
|
||||||
|
SET max_parallel_workers_per_gather=2;
|
||||||
|
|
||||||
|
-- Ensure we get a plan with a Limit 1 in both partial distinct and final
|
||||||
|
-- distinct
|
||||||
|
EXPLAIN (COSTS OFF)
|
||||||
|
SELECT DISTINCT four FROM tenk1 WHERE four = 10;
|
||||||
|
|
||||||
|
RESET max_parallel_workers_per_gather;
|
||||||
|
RESET min_parallel_table_scan_size;
|
||||||
|
RESET parallel_setup_cost;
|
||||||
|
|
||||||
--
|
--
|
||||||
-- Also, some tests of IS DISTINCT FROM, which doesn't quite deserve its
|
-- Also, some tests of IS DISTINCT FROM, which doesn't quite deserve its
|
||||||
-- very own regression file.
|
-- very own regression file.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user