1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-18 17:42:25 +03:00

Avoid generating bogus paths with partitionwise aggregate.

Previously, if some or all partitions had no partially aggregated path,
we would still try to generate a partially aggregated path for the
parent, leading to assertion failures or wrong answers.

Report by Rajkumar Raghuwanshi.  Patch by Jeevan Chalke, reviewed
by Ashutosh Bapat.  A few changes by me.

Discussion: http://postgr.es/m/CAKcux6=q4+Mw8gOOX16ef6ZMFp9Cve7KWFstUsrDa4GiFaXGUQ@mail.gmail.com
This commit is contained in:
Robert Haas
2018-06-22 09:14:34 -04:00
parent 2448adf29c
commit c6f28af5d7
3 changed files with 142 additions and 8 deletions

View File

@ -1394,3 +1394,108 @@ SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) <
11 | 16500 | 11.0000000000000000 | 1500
(4 rows)
-- Test when parent can produce parallel paths but not any (or some) of its children
ALTER TABLE pagg_tab_para_p1 SET (parallel_workers = 0);
ALTER TABLE pagg_tab_para_p3 SET (parallel_workers = 0);
ANALYZE pagg_tab_para;
EXPLAIN (COSTS OFF)
SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3;
QUERY PLAN
--------------------------------------------------------------------------------------
Sort
Sort Key: pagg_tab_para_p1.x, (sum(pagg_tab_para_p1.y)), (avg(pagg_tab_para_p1.y))
-> Finalize GroupAggregate
Group Key: pagg_tab_para_p1.x
Filter: (avg(pagg_tab_para_p1.y) < '7'::numeric)
-> Gather Merge
Workers Planned: 2
-> Sort
Sort Key: pagg_tab_para_p1.x
-> Partial HashAggregate
Group Key: pagg_tab_para_p1.x
-> Parallel Append
-> Seq Scan on pagg_tab_para_p1
-> Seq Scan on pagg_tab_para_p3
-> Parallel Seq Scan on pagg_tab_para_p2
(15 rows)
SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3;
x | sum | avg | count
----+------+--------------------+-------
0 | 5000 | 5.0000000000000000 | 1000
1 | 6000 | 6.0000000000000000 | 1000
10 | 5000 | 5.0000000000000000 | 1000
11 | 6000 | 6.0000000000000000 | 1000
20 | 5000 | 5.0000000000000000 | 1000
21 | 6000 | 6.0000000000000000 | 1000
(6 rows)
ALTER TABLE pagg_tab_para_p2 SET (parallel_workers = 0);
ANALYZE pagg_tab_para;
EXPLAIN (COSTS OFF)
SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3;
QUERY PLAN
--------------------------------------------------------------------------------------
Sort
Sort Key: pagg_tab_para_p1.x, (sum(pagg_tab_para_p1.y)), (avg(pagg_tab_para_p1.y))
-> Finalize GroupAggregate
Group Key: pagg_tab_para_p1.x
Filter: (avg(pagg_tab_para_p1.y) < '7'::numeric)
-> Gather Merge
Workers Planned: 2
-> Sort
Sort Key: pagg_tab_para_p1.x
-> Partial HashAggregate
Group Key: pagg_tab_para_p1.x
-> Parallel Append
-> Seq Scan on pagg_tab_para_p1
-> Seq Scan on pagg_tab_para_p2
-> Seq Scan on pagg_tab_para_p3
(15 rows)
SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3;
x | sum | avg | count
----+------+--------------------+-------
0 | 5000 | 5.0000000000000000 | 1000
1 | 6000 | 6.0000000000000000 | 1000
10 | 5000 | 5.0000000000000000 | 1000
11 | 6000 | 6.0000000000000000 | 1000
20 | 5000 | 5.0000000000000000 | 1000
21 | 6000 | 6.0000000000000000 | 1000
(6 rows)
-- Reset parallelism parameters to get partitionwise aggregation plan.
RESET min_parallel_table_scan_size;
RESET parallel_setup_cost;
EXPLAIN (COSTS OFF)
SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3;
QUERY PLAN
--------------------------------------------------------------------------------------
Sort
Sort Key: pagg_tab_para_p1.x, (sum(pagg_tab_para_p1.y)), (avg(pagg_tab_para_p1.y))
-> Append
-> HashAggregate
Group Key: pagg_tab_para_p1.x
Filter: (avg(pagg_tab_para_p1.y) < '7'::numeric)
-> Seq Scan on pagg_tab_para_p1
-> HashAggregate
Group Key: pagg_tab_para_p2.x
Filter: (avg(pagg_tab_para_p2.y) < '7'::numeric)
-> Seq Scan on pagg_tab_para_p2
-> HashAggregate
Group Key: pagg_tab_para_p3.x
Filter: (avg(pagg_tab_para_p3.y) < '7'::numeric)
-> Seq Scan on pagg_tab_para_p3
(15 rows)
SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3;
x | sum | avg | count
----+------+--------------------+-------
0 | 5000 | 5.0000000000000000 | 1000
1 | 6000 | 6.0000000000000000 | 1000
10 | 5000 | 5.0000000000000000 | 1000
11 | 6000 | 6.0000000000000000 | 1000
20 | 5000 | 5.0000000000000000 | 1000
21 | 6000 | 6.0000000000000000 | 1000
(6 rows)

View File

@ -294,3 +294,27 @@ SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) <
EXPLAIN (COSTS OFF)
SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3;
SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3;
-- Test when parent can produce parallel paths but not any (or some) of its children
ALTER TABLE pagg_tab_para_p1 SET (parallel_workers = 0);
ALTER TABLE pagg_tab_para_p3 SET (parallel_workers = 0);
ANALYZE pagg_tab_para;
EXPLAIN (COSTS OFF)
SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3;
SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3;
ALTER TABLE pagg_tab_para_p2 SET (parallel_workers = 0);
ANALYZE pagg_tab_para;
EXPLAIN (COSTS OFF)
SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3;
SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3;
-- Reset parallelism parameters to get partitionwise aggregation plan.
RESET min_parallel_table_scan_size;
RESET parallel_setup_cost;
EXPLAIN (COSTS OFF)
SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3;
SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3;