diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index aca8f73a50d..c6294df9362 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -4546,23 +4546,6 @@ ANY num_sync (
- enable_groupingsets_hash_disk (boolean)
-
- enable_groupingsets_hash_disk configuration parameter
-
-
-
-
- Enables or disables the query planner's use of hashed aggregation plan
- types for grouping sets when the total size of the hash tables is
- expected to exceed work_mem. See . The default is
- off.
-
-
-
-
enable_hashagg (boolean)
@@ -4577,17 +4560,19 @@ ANY num_sync (
- enable_hashagg_disk (boolean)
+
+ hashagg_avoid_disk_plan (boolean)
- enable_hashagg_disk configuration parameter
+ hashagg_avoid_disk_plan configuration parameter
- Enables or disables the query planner's use of hashed aggregation plan
- types when the memory usage is expected to exceed
- work_mem. The default is on.
+ If set to on, causes the planner to avoid choosing
+ hashed aggregation plans that are expected to use the disk. If hashed
+ aggregation is chosen, it may still require the use of disk at
+ execution time, even if this parameter is enabled. The default is
+ off.
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index b976afb69df..4ff3c7a2fd3 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -130,8 +130,7 @@ bool enable_tidscan = true;
bool enable_sort = true;
bool enable_incrementalsort = true;
bool enable_hashagg = true;
-bool enable_hashagg_disk = true;
-bool enable_groupingsets_hash_disk = false;
+bool hashagg_avoid_disk_plan = true;
bool enable_nestloop = true;
bool enable_material = true;
bool enable_mergejoin = true;
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 5ac7fed6a4d..4131019fc98 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -4256,12 +4256,11 @@ consider_groupingsets_paths(PlannerInfo *root,
dNumGroups - exclude_groups);
/*
- * If we have sortable columns to work with (gd->rollups is non-empty)
- * and enable_groupingsets_hash_disk is disabled, don't generate
- * hash-based paths that will exceed work_mem.
+ * gd->rollups is empty if we have only unsortable columns to work
+ * with. Override work_mem in that case; otherwise, we'll rely on the
+ * sorted-input case to generate usable mixed paths.
*/
- if (!enable_groupingsets_hash_disk &&
- hashsize > work_mem * 1024L && gd->rollups)
+ if (hashsize > work_mem * 1024L && gd->rollups)
return; /* nope, won't fit */
/*
@@ -4868,7 +4867,7 @@ create_distinct_paths(PlannerInfo *root,
{
Size hashentrysize = hash_agg_entry_size(0, cheapest_input_path->pathtarget->width, 0);
- allow_hash = enable_hashagg_disk ||
+ allow_hash = !hashagg_avoid_disk_plan ||
(hashentrysize * numDistinctRows <= work_mem * 1024L);
}
@@ -6773,7 +6772,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
* were unable to sort above, then we'd better generate a Path, so
* that we at least have one.
*/
- if (enable_hashagg_disk ||
+ if (!hashagg_avoid_disk_plan ||
hashaggtablesize < work_mem * 1024L ||
grouped_rel->pathlist == NIL)
{
@@ -6807,7 +6806,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
agg_final_costs,
dNumGroups);
- if (enable_hashagg_disk ||
+ if (!hashagg_avoid_disk_plan ||
hashaggtablesize < work_mem * 1024L)
add_path(grouped_rel, (Path *)
create_agg_path(root,
@@ -7188,7 +7187,7 @@ create_partial_grouping_paths(PlannerInfo *root,
* Tentatively produce a partial HashAgg Path, depending on if it
* looks as if the hash table will fit in work_mem.
*/
- if ((enable_hashagg_disk || hashaggtablesize < work_mem * 1024L) &&
+ if ((!hashagg_avoid_disk_plan || hashaggtablesize < work_mem * 1024L) &&
cheapest_total_path != NULL)
{
add_path(partially_grouped_rel, (Path *)
@@ -7215,7 +7214,8 @@ create_partial_grouping_paths(PlannerInfo *root,
dNumPartialPartialGroups);
/* Do the same for partial paths. */
- if ((enable_hashagg_disk || hashaggtablesize < work_mem * 1024L) &&
+ if ((!hashagg_avoid_disk_plan ||
+ hashaggtablesize < work_mem * 1024L) &&
cheapest_partial_path != NULL)
{
add_partial_path(partially_grouped_rel, (Path *)
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 17c15348efc..fe7e2f8b913 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -1011,22 +1011,12 @@ static struct config_bool ConfigureNamesBool[] =
NULL, NULL, NULL
},
{
- {"enable_hashagg_disk", PGC_USERSET, QUERY_TUNING_METHOD,
- gettext_noop("Enables the planner's use of hashed aggregation plans that are expected to exceed work_mem."),
+ {"hashagg_avoid_disk_plan", PGC_USERSET, QUERY_TUNING_METHOD,
+ gettext_noop("Causes the planner to avoid hashed aggregation plans that are expected to use the disk."),
NULL,
GUC_EXPLAIN
},
- &enable_hashagg_disk,
- true,
- NULL, NULL, NULL
- },
- {
- {"enable_groupingsets_hash_disk", PGC_USERSET, QUERY_TUNING_METHOD,
- gettext_noop("Enables the planner's use of hashed aggregation plans for groupingsets when the total size of the hash tables is expected to exceed work_mem."),
- NULL,
- GUC_EXPLAIN
- },
- &enable_groupingsets_hash_disk,
+ &hashagg_avoid_disk_plan,
false,
NULL, NULL, NULL
},
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index 9710e5c0a45..92e70ec0d9b 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -55,8 +55,7 @@ extern PGDLLIMPORT bool enable_tidscan;
extern PGDLLIMPORT bool enable_sort;
extern PGDLLIMPORT bool enable_incrementalsort;
extern PGDLLIMPORT bool enable_hashagg;
-extern PGDLLIMPORT bool enable_hashagg_disk;
-extern PGDLLIMPORT bool enable_groupingsets_hash_disk;
+extern PGDLLIMPORT bool hashagg_avoid_disk_plan;
extern PGDLLIMPORT bool enable_nestloop;
extern PGDLLIMPORT bool enable_material;
extern PGDLLIMPORT bool enable_mergejoin;
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 14cdcfcca6e..1e7eb8da491 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -2449,26 +2449,30 @@ set enable_sort to default;
-- aggregation. Force spilling in both cases by setting work_mem low.
--
set work_mem='64kB';
+create table agg_data_2k as
+select g from generate_series(0, 1999) g;
+analyze agg_data_2k;
+create table agg_data_20k as
+select g from generate_series(0, 19999) g;
+analyze agg_data_20k;
-- Produce results with sorting.
set enable_hashagg = false;
set jit_above_cost = 0;
explain (costs off)
select g%10000 as c1, sum(g::numeric) as c2, count(*) as c3
- from generate_series(0, 19999) g
- group by g%10000;
- QUERY PLAN
-------------------------------------------------
+ from agg_data_20k group by g%10000;
+ QUERY PLAN
+--------------------------------------
GroupAggregate
Group Key: ((g % 10000))
-> Sort
Sort Key: ((g % 10000))
- -> Function Scan on generate_series g
+ -> Seq Scan on agg_data_20k
(5 rows)
create table agg_group_1 as
select g%10000 as c1, sum(g::numeric) as c2, count(*) as c3
- from generate_series(0, 19999) g
- group by g%10000;
+ from agg_data_20k group by g%10000;
create table agg_group_2 as
select * from
(values (100), (300), (500)) as r(a),
@@ -2476,37 +2480,33 @@ select * from
select (g/2)::numeric as c1,
array_agg(g::numeric) as c2,
count(*) as c3
- from generate_series(0, 1999) g
+ from agg_data_2k
where g < r.a
group by g/2) as s;
set jit_above_cost to default;
create table agg_group_3 as
select (g/2)::numeric as c1, sum(7::int4) as c2, count(*) as c3
- from generate_series(0, 1999) g
- group by g/2;
+ from agg_data_2k group by g/2;
create table agg_group_4 as
select (g/2)::numeric as c1, array_agg(g::numeric) as c2, count(*) as c3
- from generate_series(0, 1999) g
- group by g/2;
+ from agg_data_2k group by g/2;
-- Produce results with hash aggregation
set enable_hashagg = true;
set enable_sort = false;
set jit_above_cost = 0;
explain (costs off)
select g%10000 as c1, sum(g::numeric) as c2, count(*) as c3
- from generate_series(0, 19999) g
- group by g%10000;
- QUERY PLAN
-------------------------------------------
+ from agg_data_20k group by g%10000;
+ QUERY PLAN
+--------------------------------
HashAggregate
Group Key: (g % 10000)
- -> Function Scan on generate_series g
+ -> Seq Scan on agg_data_20k
(3 rows)
create table agg_hash_1 as
select g%10000 as c1, sum(g::numeric) as c2, count(*) as c3
- from generate_series(0, 19999) g
- group by g%10000;
+ from agg_data_20k group by g%10000;
create table agg_hash_2 as
select * from
(values (100), (300), (500)) as r(a),
@@ -2514,18 +2514,16 @@ select * from
select (g/2)::numeric as c1,
array_agg(g::numeric) as c2,
count(*) as c3
- from generate_series(0, 1999) g
+ from agg_data_2k
where g < r.a
group by g/2) as s;
set jit_above_cost to default;
create table agg_hash_3 as
select (g/2)::numeric as c1, sum(7::int4) as c2, count(*) as c3
- from generate_series(0, 1999) g
- group by g/2;
+ from agg_data_2k group by g/2;
create table agg_hash_4 as
select (g/2)::numeric as c1, array_agg(g::numeric) as c2, count(*) as c3
- from generate_series(0, 1999) g
- group by g/2;
+ from agg_data_2k group by g/2;
set enable_sort = true;
set work_mem to default;
-- Compare group aggregation results to hash aggregation results
diff --git a/src/test/regress/expected/groupingsets.out b/src/test/regress/expected/groupingsets.out
index 05ff204f02c..03ada654bb5 100644
--- a/src/test/regress/expected/groupingsets.out
+++ b/src/test/regress/expected/groupingsets.out
@@ -1636,68 +1636,65 @@ select v||'a', case when grouping(v||'a') = 1 then 1 else 0 end, count(*)
--
-- Compare results between plans using sorting and plans using hash
-- aggregation. Force spilling in both cases by setting work_mem low
--- and turning on enable_groupingsets_hash_disk.
+-- and altering the statistics.
--
-SET enable_groupingsets_hash_disk = true;
+create table gs_data_1 as
+select g%1000 as g1000, g%100 as g100, g%10 as g10, g
+ from generate_series(0,1999) g;
+analyze gs_data_1;
+alter table gs_data_1 set (autovacuum_enabled = 'false');
+update pg_class set reltuples = 10 where relname='gs_data_1';
SET work_mem='64kB';
-- Produce results with sorting.
set enable_hashagg = false;
set jit_above_cost = 0;
explain (costs off)
-select g100, g10, sum(g::numeric), count(*), max(g::text) from
- (select g%1000 as g1000, g%100 as g100, g%10 as g10, g
- from generate_series(0,1999) g) s
-group by cube (g1000, g100,g10);
- QUERY PLAN
----------------------------------------------------------------
+select g100, g10, sum(g::numeric), count(*), max(g::text)
+from gs_data_1 group by cube (g1000, g100,g10);
+ QUERY PLAN
+------------------------------------
GroupAggregate
- Group Key: ((g.g % 1000)), ((g.g % 100)), ((g.g % 10))
- Group Key: ((g.g % 1000)), ((g.g % 100))
- Group Key: ((g.g % 1000))
+ Group Key: g1000, g100, g10
+ Group Key: g1000, g100
+ Group Key: g1000
Group Key: ()
- Sort Key: ((g.g % 100)), ((g.g % 10))
- Group Key: ((g.g % 100)), ((g.g % 10))
- Group Key: ((g.g % 100))
- Sort Key: ((g.g % 10)), ((g.g % 1000))
- Group Key: ((g.g % 10)), ((g.g % 1000))
- Group Key: ((g.g % 10))
+ Sort Key: g100, g10
+ Group Key: g100, g10
+ Group Key: g100
+ Sort Key: g10, g1000
+ Group Key: g10, g1000
+ Group Key: g10
-> Sort
- Sort Key: ((g.g % 1000)), ((g.g % 100)), ((g.g % 10))
- -> Function Scan on generate_series g
+ Sort Key: g1000, g100, g10
+ -> Seq Scan on gs_data_1
(14 rows)
create table gs_group_1 as
-select g100, g10, sum(g::numeric), count(*), max(g::text) from
- (select g%1000 as g1000, g%100 as g100, g%10 as g10, g
- from generate_series(0,1999) g) s
-group by cube (g1000, g100,g10);
+select g100, g10, sum(g::numeric), count(*), max(g::text)
+from gs_data_1 group by cube (g1000, g100,g10);
-- Produce results with hash aggregation.
set enable_hashagg = true;
set enable_sort = false;
explain (costs off)
-select g100, g10, sum(g::numeric), count(*), max(g::text) from
- (select g%1000 as g1000, g%100 as g100, g%10 as g10, g
- from generate_series(0,1999) g) s
-group by cube (g1000, g100,g10);
- QUERY PLAN
----------------------------------------------------
+select g100, g10, sum(g::numeric), count(*), max(g::text)
+from gs_data_1 group by cube (g1000, g100,g10);
+ QUERY PLAN
+------------------------------
MixedAggregate
- Hash Key: (g.g % 1000), (g.g % 100), (g.g % 10)
- Hash Key: (g.g % 1000), (g.g % 100)
- Hash Key: (g.g % 1000)
- Hash Key: (g.g % 100), (g.g % 10)
- Hash Key: (g.g % 100)
- Hash Key: (g.g % 10), (g.g % 1000)
- Hash Key: (g.g % 10)
+ Hash Key: g1000, g100, g10
+ Hash Key: g1000, g100
+ Hash Key: g1000
+ Hash Key: g100, g10
+ Hash Key: g100
+ Hash Key: g10, g1000
+ Hash Key: g10
Group Key: ()
- -> Function Scan on generate_series g
+ -> Seq Scan on gs_data_1
(10 rows)
create table gs_hash_1 as
-select g100, g10, sum(g::numeric), count(*), max(g::text) from
- (select g%1000 as g1000, g%100 as g100, g%10 as g10, g
- from generate_series(0,1999) g) s
-group by cube (g1000, g100,g10);
+select g100, g10, sum(g::numeric), count(*), max(g::text)
+from gs_data_1 group by cube (g1000, g100,g10);
set enable_sort = true;
set work_mem to default;
-- Compare results
@@ -1710,5 +1707,4 @@ set work_mem to default;
drop table gs_group_1;
drop table gs_hash_1;
-SET enable_groupingsets_hash_disk TO DEFAULT;
-- end
diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out
index a126f0ad613..01b7786f014 100644
--- a/src/test/regress/expected/sysviews.out
+++ b/src/test/regress/expected/sysviews.out
@@ -74,9 +74,7 @@ select name, setting from pg_settings where name like 'enable%';
--------------------------------+---------
enable_bitmapscan | on
enable_gathermerge | on
- enable_groupingsets_hash_disk | off
enable_hashagg | on
- enable_hashagg_disk | on
enable_hashjoin | on
enable_incrementalsort | on
enable_indexonlyscan | on
@@ -92,7 +90,7 @@ select name, setting from pg_settings where name like 'enable%';
enable_seqscan | on
enable_sort | on
enable_tidscan | on
-(20 rows)
+(18 rows)
-- Test that the pg_timezone_names and pg_timezone_abbrevs views are
-- more-or-less working. We can't test their contents in any great detail
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 9480abd577e..e89e884230b 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -1059,6 +1059,14 @@ set enable_sort to default;
set work_mem='64kB';
+create table agg_data_2k as
+select g from generate_series(0, 1999) g;
+analyze agg_data_2k;
+
+create table agg_data_20k as
+select g from generate_series(0, 19999) g;
+analyze agg_data_20k;
+
-- Produce results with sorting.
set enable_hashagg = false;
@@ -1067,13 +1075,11 @@ set jit_above_cost = 0;
explain (costs off)
select g%10000 as c1, sum(g::numeric) as c2, count(*) as c3
- from generate_series(0, 19999) g
- group by g%10000;
+ from agg_data_20k group by g%10000;
create table agg_group_1 as
select g%10000 as c1, sum(g::numeric) as c2, count(*) as c3
- from generate_series(0, 19999) g
- group by g%10000;
+ from agg_data_20k group by g%10000;
create table agg_group_2 as
select * from
@@ -1082,7 +1088,7 @@ select * from
select (g/2)::numeric as c1,
array_agg(g::numeric) as c2,
count(*) as c3
- from generate_series(0, 1999) g
+ from agg_data_2k
where g < r.a
group by g/2) as s;
@@ -1090,13 +1096,11 @@ set jit_above_cost to default;
create table agg_group_3 as
select (g/2)::numeric as c1, sum(7::int4) as c2, count(*) as c3
- from generate_series(0, 1999) g
- group by g/2;
+ from agg_data_2k group by g/2;
create table agg_group_4 as
select (g/2)::numeric as c1, array_agg(g::numeric) as c2, count(*) as c3
- from generate_series(0, 1999) g
- group by g/2;
+ from agg_data_2k group by g/2;
-- Produce results with hash aggregation
@@ -1107,13 +1111,11 @@ set jit_above_cost = 0;
explain (costs off)
select g%10000 as c1, sum(g::numeric) as c2, count(*) as c3
- from generate_series(0, 19999) g
- group by g%10000;
+ from agg_data_20k group by g%10000;
create table agg_hash_1 as
select g%10000 as c1, sum(g::numeric) as c2, count(*) as c3
- from generate_series(0, 19999) g
- group by g%10000;
+ from agg_data_20k group by g%10000;
create table agg_hash_2 as
select * from
@@ -1122,7 +1124,7 @@ select * from
select (g/2)::numeric as c1,
array_agg(g::numeric) as c2,
count(*) as c3
- from generate_series(0, 1999) g
+ from agg_data_2k
where g < r.a
group by g/2) as s;
@@ -1130,13 +1132,11 @@ set jit_above_cost to default;
create table agg_hash_3 as
select (g/2)::numeric as c1, sum(7::int4) as c2, count(*) as c3
- from generate_series(0, 1999) g
- group by g/2;
+ from agg_data_2k group by g/2;
create table agg_hash_4 as
select (g/2)::numeric as c1, array_agg(g::numeric) as c2, count(*) as c3
- from generate_series(0, 1999) g
- group by g/2;
+ from agg_data_2k group by g/2;
set enable_sort = true;
set work_mem to default;
diff --git a/src/test/regress/sql/groupingsets.sql b/src/test/regress/sql/groupingsets.sql
index 77e196798ac..e6c28743a44 100644
--- a/src/test/regress/sql/groupingsets.sql
+++ b/src/test/regress/sql/groupingsets.sql
@@ -444,10 +444,17 @@ select v||'a', case when grouping(v||'a') = 1 then 1 else 0 end, count(*)
--
-- Compare results between plans using sorting and plans using hash
-- aggregation. Force spilling in both cases by setting work_mem low
--- and turning on enable_groupingsets_hash_disk.
+-- and altering the statistics.
--
-SET enable_groupingsets_hash_disk = true;
+create table gs_data_1 as
+select g%1000 as g1000, g%100 as g100, g%10 as g10, g
+ from generate_series(0,1999) g;
+
+analyze gs_data_1;
+alter table gs_data_1 set (autovacuum_enabled = 'false');
+update pg_class set reltuples = 10 where relname='gs_data_1';
+
SET work_mem='64kB';
-- Produce results with sorting.
@@ -456,16 +463,12 @@ set enable_hashagg = false;
set jit_above_cost = 0;
explain (costs off)
-select g100, g10, sum(g::numeric), count(*), max(g::text) from
- (select g%1000 as g1000, g%100 as g100, g%10 as g10, g
- from generate_series(0,1999) g) s
-group by cube (g1000, g100,g10);
+select g100, g10, sum(g::numeric), count(*), max(g::text)
+from gs_data_1 group by cube (g1000, g100,g10);
create table gs_group_1 as
-select g100, g10, sum(g::numeric), count(*), max(g::text) from
- (select g%1000 as g1000, g%100 as g100, g%10 as g10, g
- from generate_series(0,1999) g) s
-group by cube (g1000, g100,g10);
+select g100, g10, sum(g::numeric), count(*), max(g::text)
+from gs_data_1 group by cube (g1000, g100,g10);
-- Produce results with hash aggregation.
@@ -473,17 +476,12 @@ set enable_hashagg = true;
set enable_sort = false;
explain (costs off)
-select g100, g10, sum(g::numeric), count(*), max(g::text) from
- (select g%1000 as g1000, g%100 as g100, g%10 as g10, g
- from generate_series(0,1999) g) s
-group by cube (g1000, g100,g10);
+select g100, g10, sum(g::numeric), count(*), max(g::text)
+from gs_data_1 group by cube (g1000, g100,g10);
create table gs_hash_1 as
-select g100, g10, sum(g::numeric), count(*), max(g::text) from
- (select g%1000 as g1000, g%100 as g100, g%10 as g10, g
- from generate_series(0,1999) g) s
-group by cube (g1000, g100,g10);
-
+select g100, g10, sum(g::numeric), count(*), max(g::text)
+from gs_data_1 group by cube (g1000, g100,g10);
set enable_sort = true;
set work_mem to default;
@@ -497,6 +495,4 @@ set work_mem to default;
drop table gs_group_1;
drop table gs_hash_1;
-SET enable_groupingsets_hash_disk TO DEFAULT;
-
-- end