mirror of
https://github.com/postgres/postgres.git
synced 2025-06-11 20:28:21 +03:00
Improve performance of ORDER BY / DISTINCT aggregates
ORDER BY / DISTINCT aggreagtes have, since implemented in Postgres, been executed by always performing a sort in nodeAgg.c to sort the tuples in the current group into the correct order before calling the transition function on the sorted tuples. This was not great as often there might be an index that could have provided pre-sorted input and allowed the transition functions to be called as the rows come in, rather than having to store them in a tuplestore in order to sort them once all the tuples for the group have arrived. Here we change the planner so it requests a path with a sort order which supports the most amount of ORDER BY / DISTINCT aggregate functions and add new code to the executor to allow it to support the processing of ORDER BY / DISTINCT aggregates where the tuples are already sorted in the correct order. Since there can be many ORDER BY / DISTINCT aggregates in any given query level, it's very possible that we can't find an order that suits all of these aggregates. The sort order that the planner chooses is simply the one that suits the most aggregate functions. We take the most strictly sorted variation of each order and see how many aggregate functions can use that, then we try again with the order of the remaining aggregates to see if another order would suit more aggregate functions. For example: SELECT agg(a ORDER BY a),agg2(a ORDER BY a,b) ... would request the sort order to be {a, b} because {a} is a subset of the sort order of {a,b}, but; SELECT agg(a ORDER BY a),agg2(a ORDER BY c) ... would just pick a plan ordered by {a} (we give precedence to aggregates which are earlier in the targetlist). SELECT agg(a ORDER BY a),agg2(a ORDER BY b),agg3(a ORDER BY b) ... would choose to order by {b} since two aggregates suit that vs just one that requires input ordered by {a}. Author: David Rowley Reviewed-by: Ronan Dunklau, James Coleman, Ranier Vilela, Richard Guo, Tom Lane Discussion: https://postgr.es/m/CAApHDvpHzfo92%3DR4W0%2BxVua3BUYCKMckWAmo-2t_KiXN-wYH%3Dw%40mail.gmail.com
This commit is contained in:
@ -3295,15 +3295,18 @@ create operator class my_op_class for type int using btree family my_op_family a
|
||||
-- extension yet.
|
||||
explain (verbose, costs off)
|
||||
select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6 and c1 < 100 group by c2;
|
||||
QUERY PLAN
|
||||
--------------------------------------------------------------------------------------------
|
||||
QUERY PLAN
|
||||
--------------------------------------------------------------------------------------------------
|
||||
GroupAggregate
|
||||
Output: array_agg(c1 ORDER BY c1 USING <^ NULLS LAST), c2
|
||||
Group Key: ft2.c2
|
||||
-> Foreign Scan on public.ft2
|
||||
Output: c1, c2
|
||||
Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE (("C 1" < 100)) AND ((c2 = 6))
|
||||
(6 rows)
|
||||
-> Sort
|
||||
Output: c2, c1
|
||||
Sort Key: ft2.c1 USING <^
|
||||
-> Foreign Scan on public.ft2
|
||||
Output: c2, c1
|
||||
Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE (("C 1" < 100)) AND ((c2 = 6))
|
||||
(9 rows)
|
||||
|
||||
-- This should not be pushed either.
|
||||
explain (verbose, costs off)
|
||||
@ -3329,6 +3332,7 @@ alter extension postgres_fdw add operator public.=^(int, int);
|
||||
alter extension postgres_fdw add operator public.>^(int, int);
|
||||
alter server loopback options (set extensions 'postgres_fdw');
|
||||
-- Now this will be pushed as sort operator is part of the extension.
|
||||
alter server loopback options (add fdw_tuple_cost '0.5');
|
||||
explain (verbose, costs off)
|
||||
select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6 and c1 < 100 group by c2;
|
||||
QUERY PLAN
|
||||
@ -3345,6 +3349,7 @@ select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6
|
||||
{6,16,26,36,46,56,66,76,86,96}
|
||||
(1 row)
|
||||
|
||||
alter server loopback options (drop fdw_tuple_cost);
|
||||
-- This should be pushed too.
|
||||
explain (verbose, costs off)
|
||||
select * from ft2 order by c1 using operator(public.<^);
|
||||
@ -3366,15 +3371,18 @@ alter server loopback options (set extensions 'postgres_fdw');
|
||||
-- This will not be pushed as sort operator is now removed from the extension.
|
||||
explain (verbose, costs off)
|
||||
select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6 and c1 < 100 group by c2;
|
||||
QUERY PLAN
|
||||
--------------------------------------------------------------------------------------------
|
||||
QUERY PLAN
|
||||
--------------------------------------------------------------------------------------------------
|
||||
GroupAggregate
|
||||
Output: array_agg(c1 ORDER BY c1 USING <^ NULLS LAST), c2
|
||||
Group Key: ft2.c2
|
||||
-> Foreign Scan on public.ft2
|
||||
Output: c1, c2
|
||||
Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE (("C 1" < 100)) AND ((c2 = 6))
|
||||
(6 rows)
|
||||
-> Sort
|
||||
Output: c2, c1
|
||||
Sort Key: ft2.c1 USING <^
|
||||
-> Foreign Scan on public.ft2
|
||||
Output: c2, c1
|
||||
Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE (("C 1" < 100)) AND ((c2 = 6))
|
||||
(9 rows)
|
||||
|
||||
-- Cleanup
|
||||
drop operator class my_op_class using btree;
|
||||
|
Reference in New Issue
Block a user