Have the planner replace COUNT(ANY) with COUNT(*), when possible

This adds SupportRequestSimplifyAggref to allow pg_proc.prosupport functions to receive an Aggref and allow them to determine if there is a way that the Aggref call can be optimized. Also added is a support function to allow transformation of COUNT(ANY) into COUNT(*). This is possible to do when the given "ANY" cannot be NULL and also that there are no ORDER BY / DISTINCT clauses within the Aggref. This is a useful transformation to do as it is common that people write COUNT(1), which until now has added unneeded overhead. When counting a NOT NULL column. The overheads can be worse as that might mean deforming more of the tuple, which for large fact tables may be many columns in. It may be possible to add prosupport functions for other aggregates. We could consider if ORDER BY could be dropped for some calls, e.g. the ORDER BY is quite useless in MAX(c ORDER BY c). There is a little bit of passing fallout from adjusting expr_is_nonnullable() to handle Const which results in a plan change in the aggregates.out regression test. Previously, nothing was able to determine that "One-Time Filter: (100 IS NOT NULL)" was always true, therefore useless to include in the plan. Author: David Rowley <dgrowleyml@gmail.com> Reviewed-by: Corey Huinker <corey.huinker@gmail.com> Reviewed-by: Matheus Alcantara <matheusssilv97@gmail.com> Discussion: https://postgr.es/m/CAApHDvqGcPTagXpKfH=CrmHBqALpziThJEDs_MrPqjKVeDF9wA@mail.gmail.com
2025-12-19 17:02:53 +03:00 · 2025-11-27 10:43:28 +13:00
parent dbdc717ac6
commit 42473b3b31
9 changed files with 325 additions and 37 deletions
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1219,19 +1219,18 @@ select max(unique2), generate_series(1,3) as g from tenk1 order by g desc;
 9999 | 1
 (3 rows)

-- interesting corner case: constant gets optimized into a seqscan
+-- two interesting corner cases: both non-null and null constant gets
+-- optimized into a seqscan
 explain (costs off)
  select max(100) from tenk1;
-                     QUERY PLAN                     
----------------------------------------------------
+           QUERY PLAN            
+---------------------------------
 Result
   Replaces: MinMaxAggregate
   InitPlan minmax_1
     ->  Limit
-           ->  Result
-                 One-Time Filter: (100 IS NOT NULL)
-                 ->  Seq Scan on tenk1
-(7 rows)
+           ->  Seq Scan on tenk1
+(5 rows)

 select max(100) from tenk1;
 max 
@@ -1239,6 +1238,25 @@ select max(100) from tenk1;
 100
 (1 row)

+explain (costs off)
+  select max(null) from tenk1;
+                        QUERY PLAN                         
+-----------------------------------------------------------
+ Result
+   Replaces: MinMaxAggregate
+   InitPlan minmax_1
+     ->  Limit
+           ->  Result
+                 One-Time Filter: (NULL::text IS NOT NULL)
+                 ->  Seq Scan on tenk1
+(7 rows)
+
+select max(null) from tenk1;
+ max 
+-----
+ 
+(1 row)
+
 -- try it on an inheritance tree
 create table minmaxtest(f1 int);
 create table minmaxtest1() inherits (minmaxtest);
@@ -2821,6 +2839,101 @@ select pg_typeof(cleast_agg(variadic array[4.5,f1])) from int4_tbl;
 numeric
 (1 row)

+--
+-- Test SupportRequestSimplifyAggref code
+--
+begin;
+create table agg_simplify (a int, not_null_col int not null, nullable_col int);
+-- Ensure count(not_null_col) uses count(*)
+explain (costs off, verbose)
+select count(not_null_col) from agg_simplify;
+                  QUERY PLAN                   
+-----------------------------------------------
+ Aggregate
+   Output: count(*)
+   ->  Seq Scan on public.agg_simplify
+         Output: a, not_null_col, nullable_col
+(4 rows)
+
+-- Ensure count(<not null const>) uses count(*)
+explain (costs off, verbose)
+select count('bananas') from agg_simplify;
+                  QUERY PLAN                   
+-----------------------------------------------
+ Aggregate
+   Output: count(*)
+   ->  Seq Scan on public.agg_simplify
+         Output: a, not_null_col, nullable_col
+(4 rows)
+
+-- Ensure count(null) isn't optimized
+explain (costs off, verbose)
+select count(null) from agg_simplify;
+                  QUERY PLAN                   
+-----------------------------------------------
+ Aggregate
+   Output: count(NULL::unknown)
+   ->  Seq Scan on public.agg_simplify
+         Output: a, not_null_col, nullable_col
+(4 rows)
+
+-- Ensure count(nullable_col) does not use count(*)
+explain (costs off, verbose)
+select count(nullable_col) from agg_simplify;
+                  QUERY PLAN                   
+-----------------------------------------------
+ Aggregate
+   Output: count(nullable_col)
+   ->  Seq Scan on public.agg_simplify
+         Output: a, not_null_col, nullable_col
+(4 rows)
+
+-- Ensure there's no optimization with DISTINCT aggs
+explain (costs off, verbose)
+select count(distinct not_null_col) from agg_simplify;
+                 QUERY PLAN                  
+---------------------------------------------
+ Aggregate
+   Output: count(DISTINCT not_null_col)
+   ->  Sort
+         Output: not_null_col
+         Sort Key: agg_simplify.not_null_col
+         ->  Seq Scan on public.agg_simplify
+               Output: not_null_col
+(7 rows)
+
+-- Ensure there's no optimization with ORDER BY aggs
+explain (costs off, verbose)
+select count(not_null_col order by not_null_col) from agg_simplify;
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Aggregate
+   Output: count(not_null_col ORDER BY not_null_col)
+   ->  Sort
+         Output: not_null_col
+         Sort Key: agg_simplify.not_null_col
+         ->  Seq Scan on public.agg_simplify
+               Output: not_null_col
+(7 rows)
+
+-- Ensure we don't optimize to count(*) with agglevelsup > 0
+explain (costs off, verbose)
+select a from agg_simplify a group by a
+having exists (select 1 from onek b where count(a.not_null_col) = b.four);
+                     QUERY PLAN                      
+-----------------------------------------------------
+ HashAggregate
+   Output: a.a
+   Group Key: a.a
+   Filter: EXISTS(SubPlan exists_1)
+   ->  Seq Scan on public.agg_simplify a
+         Output: a.a, a.not_null_col, a.nullable_col
+   SubPlan exists_1
+     ->  Seq Scan on public.onek b
+           Filter: (count(a.not_null_col) = b.four)
+(9 rows)
+
+rollback;
 -- test aggregates with common transition functions share the same states
 begin work;
 create type avg_state as (total bigint, count bigint);
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -416,11 +416,16 @@ explain (costs off)
  select max(unique2), generate_series(1,3) as g from tenk1 order by g desc;
 select max(unique2), generate_series(1,3) as g from tenk1 order by g desc;

-- interesting corner case: constant gets optimized into a seqscan
+-- two interesting corner cases: both non-null and null constant gets
+-- optimized into a seqscan
 explain (costs off)
  select max(100) from tenk1;
 select max(100) from tenk1;

+explain (costs off)
+  select max(null) from tenk1;
+select max(null) from tenk1;
+
 -- try it on an inheritance tree
 create table minmaxtest(f1 int);
 create table minmaxtest1() inherits (minmaxtest);
@@ -1108,6 +1113,43 @@ select cleast_agg(4.5,f1) from int4_tbl;
 select cleast_agg(variadic array[4.5,f1]) from int4_tbl;
 select pg_typeof(cleast_agg(variadic array[4.5,f1])) from int4_tbl;

+--
+-- Test SupportRequestSimplifyAggref code
+--
+begin;
+create table agg_simplify (a int, not_null_col int not null, nullable_col int);
+
+-- Ensure count(not_null_col) uses count(*)
+explain (costs off, verbose)
+select count(not_null_col) from agg_simplify;
+
+-- Ensure count(<not null const>) uses count(*)
+explain (costs off, verbose)
+select count('bananas') from agg_simplify;
+
+-- Ensure count(null) isn't optimized
+explain (costs off, verbose)
+select count(null) from agg_simplify;
+
+-- Ensure count(nullable_col) does not use count(*)
+explain (costs off, verbose)
+select count(nullable_col) from agg_simplify;
+
+-- Ensure there's no optimization with DISTINCT aggs
+explain (costs off, verbose)
+select count(distinct not_null_col) from agg_simplify;
+
+-- Ensure there's no optimization with ORDER BY aggs
+explain (costs off, verbose)
+select count(not_null_col order by not_null_col) from agg_simplify;
+
+-- Ensure we don't optimize to count(*) with agglevelsup > 0
+explain (costs off, verbose)
+select a from agg_simplify a group by a
+having exists (select 1 from onek b where count(a.not_null_col) = b.four);
+
+rollback;
+
 -- test aggregates with common transition functions share the same states
 begin work;