Have the planner replace COUNT(ANY) with COUNT(*), when possible

This adds SupportRequestSimplifyAggref to allow pg_proc.prosupport functions to receive an Aggref and allow them to determine if there is a way that the Aggref call can be optimized. Also added is a support function to allow transformation of COUNT(ANY) into COUNT(*). This is possible to do when the given "ANY" cannot be NULL and also that there are no ORDER BY / DISTINCT clauses within the Aggref. This is a useful transformation to do as it is common that people write COUNT(1), which until now has added unneeded overhead. When counting a NOT NULL column. The overheads can be worse as that might mean deforming more of the tuple, which for large fact tables may be many columns in. It may be possible to add prosupport functions for other aggregates. We could consider if ORDER BY could be dropped for some calls, e.g. the ORDER BY is quite useless in MAX(c ORDER BY c). There is a little bit of passing fallout from adjusting expr_is_nonnullable() to handle Const which results in a plan change in the aggregates.out regression test. Previously, nothing was able to determine that "One-Time Filter: (100 IS NOT NULL)" was always true, therefore useless to include in the plan. Author: David Rowley <dgrowleyml@gmail.com> Reviewed-by: Corey Huinker <corey.huinker@gmail.com> Reviewed-by: Matheus Alcantara <matheusssilv97@gmail.com> Discussion: https://postgr.es/m/CAApHDvqGcPTagXpKfH=CrmHBqALpziThJEDs_MrPqjKVeDF9wA@mail.gmail.com
2025-11-28 11:44:57 +03:00 · 2025-11-27 10:43:28 +13:00
parent dbdc717ac6
commit 42473b3b31
9 changed files with 325 additions and 37 deletions
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -2975,9 +2975,9 @@ select sum(t1.c1), count(t2.c1) from ft1 t1 inner join ft2 t2 on (t1.c1 = t2.c1)
                                                         QUERY PLAN                                                         
 ----------------------------------------------------------------------------------------------------------------------------
 Aggregate
-   Output: sum(t1.c1), count(t2.c1)
+   Output: sum(t1.c1), count(*)
   ->  Foreign Scan
-         Output: t1.c1, t2.c1
+         Output: t1.c1
         Filter: (((((t1.c1 * t2.c1) / (t1.c1 * t2.c1)))::double precision * random()) <= '1'::double precision)
         Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
         Remote SQL: SELECT r1."C 1", r2."C 1" FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r2."C 1" = r1."C 1"))))
@@ -3073,12 +3073,12 @@ select c2 * (random() <= 1)::int as c2 from ft2 group by c2 * (random() <= 1)::i
 -- GROUP BY clause in various forms, cardinal, alias and constant expression
 explain (verbose, costs off)
 select count(c2) w, c2 x, 5 y, 7.0 z from ft1 group by 2, y, 9.0::int order by 2;
-                                                 QUERY PLAN                                                 
------------------------------------------------------------------------------------------------------------
+                                                QUERY PLAN                                                 
+-----------------------------------------------------------------------------------------------------------
 Foreign Scan
-   Output: (count(c2)), c2, 5, 7.0, 9
+   Output: (count(*)), c2, 5, 7.0, 9
   Relations: Aggregate on (public.ft1)
-   Remote SQL: SELECT count(c2), c2, 5, 7.0, 9 FROM "S 1"."T 1" GROUP BY 2, 3, 5 ORDER BY c2 ASC NULLS LAST
+   Remote SQL: SELECT count(*), c2, 5, 7.0, 9 FROM "S 1"."T 1" GROUP BY 2, 3, 5 ORDER BY c2 ASC NULLS LAST
 (4 rows)

 select count(c2) w, c2 x, 5 y, 7.0 z from ft1 group by 2, y, 9.0::int order by 2;
@@ -3379,8 +3379,8 @@ select distinct (select count(*) filter (where t2.c2 = 6 and t2.c1 < 10) from ft
 -- Inner query is aggregation query
 explain (verbose, costs off)
 select distinct (select count(t1.c1) filter (where t2.c2 = 6 and t2.c1 < 10) from ft1 t1 where t1.c1 = 6) from ft2 t2 where t2.c2 % 6 = 0 order by 1;
-                                                                      QUERY PLAN                                                                      
------------------------------------------------------------------------------------------------------------------------------------------------------
+                                                                    QUERY PLAN                                                                    
+--------------------------------------------------------------------------------------------------------------------------------------------------
 Unique
   Output: ((SubPlan expr_1))
   ->  Sort
@@ -3391,9 +3391,9 @@ select distinct (select count(t1.c1) filter (where t2.c2 = 6 and t2.c1 < 10) fro
               Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE (((c2 % 6) = 0))
               SubPlan expr_1
                 ->  Foreign Scan
-                       Output: (count(t1.c1) FILTER (WHERE ((t2.c2 = 6) AND (t2.c1 < 10))))
+                       Output: (count(*) FILTER (WHERE ((t2.c2 = 6) AND (t2.c1 < 10))))
                       Relations: Aggregate on (public.ft1 t1)
-                       Remote SQL: SELECT count("C 1") FILTER (WHERE (($1::integer = 6) AND ($2::integer < 10))) FROM "S 1"."T 1" WHERE (("C 1" = 6))
+                       Remote SQL: SELECT count(*) FILTER (WHERE (($1::integer = 6) AND ($2::integer < 10))) FROM "S 1"."T 1" WHERE (("C 1" = 6))
 (13 rows)

 select distinct (select count(t1.c1) filter (where t2.c2 = 6 and t2.c1 < 10) from ft1 t1 where t1.c1 = 6) from ft2 t2 where t2.c2 % 6 = 0 order by 1;
--- a/src/backend/optimizer/plan/initsplan.c
+++ b/src/backend/optimizer/plan/initsplan.c
@@ -3413,22 +3413,6 @@ add_base_clause_to_rel(PlannerInfo *root, Index relid,
 										 restrictinfo->security_level);
 }

-/*
- * expr_is_nonnullable
- *	  Check to see if the Expr cannot be NULL
- *
- * Currently we only support simple Vars.
- */
-static bool
-expr_is_nonnullable(PlannerInfo *root, Expr *expr)
-{
-	/* For now only check simple Vars */
-	if (!IsA(expr, Var))
-		return false;
-
-	return var_is_nonnullable(root, (Var *) expr, true);
-}
-
 /*
 * restriction_is_always_true
 *	  Check to see if the RestrictInfo is always true.
@@ -3465,7 +3449,7 @@ restriction_is_always_true(PlannerInfo *root,
 		if (nulltest->argisrow)
 			return false;

-		return expr_is_nonnullable(root, nulltest->arg);
+		return expr_is_nonnullable(root, nulltest->arg, true);
 	}

 	/* If it's an OR, check its sub-clauses */
@@ -3530,7 +3514,7 @@ restriction_is_always_false(PlannerInfo *root,
 		if (nulltest->argisrow)
 			return false;

-		return expr_is_nonnullable(root, nulltest->arg);
+		return expr_is_nonnullable(root, nulltest->arg, true);
 	}

 	/* If it's an OR, check its sub-clauses */
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@@ -131,6 +131,8 @@ static Expr *simplify_function(Oid funcid,
 							   Oid result_collid, Oid input_collid, List **args_p,
 							   bool funcvariadic, bool process_args, bool allow_non_const,
 							   eval_const_expressions_context *context);
+static Node *simplify_aggref(Aggref *aggref,
+							 eval_const_expressions_context *context);
 static List *reorder_function_arguments(List *args, int pronargs,
 										HeapTuple func_tuple);
 static List *add_function_defaults(List *args, int pronargs,
@@ -2634,6 +2636,9 @@ eval_const_expressions_mutator(Node *node,
 				newexpr->location = expr->location;
 				return (Node *) newexpr;
 			}
+		case T_Aggref:
+			node = ece_generic_processing(node);
+			return simplify_aggref((Aggref *) node, context);
 		case T_OpExpr:
 			{
 				OpExpr	   *expr = (OpExpr *) node;
@@ -4200,6 +4205,50 @@ simplify_function(Oid funcid, Oid result_type, int32 result_typmod,
 	return newexpr;
 }

+/*
+ * simplify_aggref
+ *		Call the Aggref.aggfnoid's prosupport function to allow it to
+ *		determine if simplification of the Aggref is possible.  Returns the
+ *		newly simplified node if conversion took place; otherwise, returns the
+ *		original Aggref.
+ *
+ * See SupportRequestSimplifyAggref comments in supportnodes.h for further
+ * details.
+ */
+static Node *
+simplify_aggref(Aggref *aggref, eval_const_expressions_context *context)
+{
+	Oid			prosupport = get_func_support(aggref->aggfnoid);
+
+	if (OidIsValid(prosupport))
+	{
+		SupportRequestSimplifyAggref req;
+		Node	   *newnode;
+
+		/*
+		 * Build a SupportRequestSimplifyAggref node to pass to the support
+		 * function.
+		 */
+		req.type = T_SupportRequestSimplifyAggref;
+		req.root = context->root;
+		req.aggref = aggref;
+
+		newnode = (Node *) DatumGetPointer(OidFunctionCall1(prosupport,
+															PointerGetDatum(&req)));
+
+		/*
+		 * We expect the support function to return either a new Node or NULL
+		 * (when simplification isn't possible).
+		 */
+		Assert(newnode != (Node *) aggref || newnode == NULL);
+
+		if (newnode != NULL)
+			return newnode;
+	}
+
+	return (Node *) aggref;
+}
+
 /*
 * var_is_nonnullable: check to see if the Var cannot be NULL
 *
@@ -4261,6 +4310,30 @@ var_is_nonnullable(PlannerInfo *root, Var *var, bool use_rel_info)
 	return false;
 }

+/*
+ * expr_is_nonnullable: check to see if the Expr cannot be NULL
+ *
+ * Returns true iff the given 'expr' cannot produce SQL NULLs.
+ *
+ * If 'use_rel_info' is true, nullability of Vars is checked via the
+ * corresponding RelOptInfo for the given Var.  Some callers require
+ * nullability information before RelOptInfos are generated.  These should
+ * pass 'use_rel_info' as false.
+ *
+ * For now, we only support Var and Const.  Support for other node types may
+ * be possible.
+ */
+bool
+expr_is_nonnullable(PlannerInfo *root, Expr *expr, bool use_rel_info)
+{
+	if (IsA(expr, Var))
+		return var_is_nonnullable(root, (Var *) expr, use_rel_info);
+	if (IsA(expr, Const))
+		return !castNode(Const, expr)->constisnull;
+
+	return false;
+}
+
 /*
 * expand_function_arguments: convert named-notation args to positional args
 * and/or insert default args, as needed
--- a/src/backend/utils/adt/int8.c
+++ b/src/backend/utils/adt/int8.c
@@ -24,7 +24,7 @@
 #include "nodes/supportnodes.h"
 #include "optimizer/optimizer.h"
 #include "utils/builtins.h"
-
+#include "utils/fmgroids.h"

 typedef struct
 {
@@ -811,6 +811,53 @@ int8inc_support(PG_FUNCTION_ARGS)
 		PG_RETURN_POINTER(req);
 	}

+	if (IsA(rawreq, SupportRequestSimplifyAggref))
+	{
+		SupportRequestSimplifyAggref *req = (SupportRequestSimplifyAggref *) rawreq;
+		Aggref	   *agg = req->aggref;
+
+		/*
+		 * Check for COUNT(ANY) and try to convert to COUNT(*). The input
+		 * argument cannot be NULL, we can't have an ORDER BY / DISTINCT in
+		 * the aggregate, and agglevelsup must be 0.
+		 *
+		 * Technically COUNT(ANY) must have 1 arg, but be paranoid and check.
+		 */
+		if (agg->aggfnoid == F_COUNT_ANY && list_length(agg->args) == 1)
+		{
+			TargetEntry *tle = (TargetEntry *) linitial(agg->args);
+			Expr	   *arg = tle->expr;
+
+			/* Check for unsupported cases */
+			if (agg->aggdistinct != NIL || agg->aggorder != NIL ||
+				agg->agglevelsup != 0)
+				PG_RETURN_POINTER(NULL);
+
+			/* If the arg isn't NULLable, do the conversion */
+			if (expr_is_nonnullable(req->root, arg, false))
+			{
+				Aggref	   *newagg;
+
+				/* We don't expect these to have been set yet */
+				Assert(agg->aggtransno == -1);
+				Assert(agg->aggtranstype == InvalidOid);
+
+				/* Convert COUNT(ANY) to COUNT(*) by making a new Aggref */
+				newagg = makeNode(Aggref);
+				memcpy(newagg, agg, sizeof(Aggref));
+				newagg->aggfnoid = F_COUNT_;
+
+				/* count(*) has no args */
+				newagg->aggargtypes = NULL;
+				newagg->args = NULL;
+				newagg->aggstar = true;
+				newagg->location = -1;
+
+				PG_RETURN_POINTER(newagg);
+			}
+		}
+	}
+
 	PG_RETURN_POINTER(NULL);
 }

--- a/src/include/nodes/supportnodes.h
+++ b/src/include/nodes/supportnodes.h
@@ -71,6 +71,31 @@ typedef struct SupportRequestSimplify
 	FuncExpr   *fcall;			/* Function call to be simplified */
 } SupportRequestSimplify;

+/*
+ * Similar to SupportRequestSimplify but for Aggref node types.
+ *
+ * This supports conversions such as swapping COUNT(1) or COUNT(notnullcol)
+ * for COUNT(*).
+ *
+ * Supporting functions can consult 'root' and the input 'aggref'.  When the
+ * implementing support function deems the simplification is possible, it must
+ * create a new Node (probably another Aggref) and not modify the original.
+ * The newly created Node should then be returned to indicate that the
+ * conversion is to take place.  When no conversion is possible, a NULL
+ * pointer should be returned.
+ *
+ * It is important to consider that implementing support functions can receive
+ * Aggrefs with agglevelsup > 0.  Careful consideration should be given to
+ * whether the simplification is still possible at levels above 0.
+ */
+typedef struct SupportRequestSimplifyAggref
+{
+	NodeTag		type;
+
+	PlannerInfo *root;			/* Planner's infrastructure */
+	Aggref	   *aggref;			/* Aggref to be simplified */
+} SupportRequestSimplifyAggref;
+
 /*
 * The InlineInFrom request allows the support function to perform plan-time
 * simplification of a call to its target function that appears in FROM.
--- a/src/include/optimizer/optimizer.h
+++ b/src/include/optimizer/optimizer.h
@@ -147,6 +147,9 @@ extern Expr *evaluate_expr(Expr *expr, Oid result_type, int32 result_typmod,

 extern bool var_is_nonnullable(PlannerInfo *root, Var *var, bool use_rel_info);

+extern bool expr_is_nonnullable(PlannerInfo *root, Expr *expr,
+								bool use_rel_info);
+
 extern List *expand_function_arguments(List *args, bool include_out_arguments,
 									   Oid result_type,
 									   HeapTuple func_tuple);
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1219,19 +1219,18 @@ select max(unique2), generate_series(1,3) as g from tenk1 order by g desc;
 9999 | 1
 (3 rows)

-- interesting corner case: constant gets optimized into a seqscan
+-- two interesting corner cases: both non-null and null constant gets
+-- optimized into a seqscan
 explain (costs off)
  select max(100) from tenk1;
-                     QUERY PLAN                     
----------------------------------------------------
+           QUERY PLAN            
+---------------------------------
 Result
   Replaces: MinMaxAggregate
   InitPlan minmax_1
     ->  Limit
-           ->  Result
-                 One-Time Filter: (100 IS NOT NULL)
-                 ->  Seq Scan on tenk1
-(7 rows)
+           ->  Seq Scan on tenk1
+(5 rows)

 select max(100) from tenk1;
 max 
@@ -1239,6 +1238,25 @@ select max(100) from tenk1;
 100
 (1 row)

+explain (costs off)
+  select max(null) from tenk1;
+                        QUERY PLAN                         
+-----------------------------------------------------------
+ Result
+   Replaces: MinMaxAggregate
+   InitPlan minmax_1
+     ->  Limit
+           ->  Result
+                 One-Time Filter: (NULL::text IS NOT NULL)
+                 ->  Seq Scan on tenk1
+(7 rows)
+
+select max(null) from tenk1;
+ max 
+-----
+ 
+(1 row)
+
 -- try it on an inheritance tree
 create table minmaxtest(f1 int);
 create table minmaxtest1() inherits (minmaxtest);
@@ -2821,6 +2839,101 @@ select pg_typeof(cleast_agg(variadic array[4.5,f1])) from int4_tbl;
 numeric
 (1 row)

+--
+-- Test SupportRequestSimplifyAggref code
+--
+begin;
+create table agg_simplify (a int, not_null_col int not null, nullable_col int);
+-- Ensure count(not_null_col) uses count(*)
+explain (costs off, verbose)
+select count(not_null_col) from agg_simplify;
+                  QUERY PLAN                   
+-----------------------------------------------
+ Aggregate
+   Output: count(*)
+   ->  Seq Scan on public.agg_simplify
+         Output: a, not_null_col, nullable_col
+(4 rows)
+
+-- Ensure count(<not null const>) uses count(*)
+explain (costs off, verbose)
+select count('bananas') from agg_simplify;
+                  QUERY PLAN                   
+-----------------------------------------------
+ Aggregate
+   Output: count(*)
+   ->  Seq Scan on public.agg_simplify
+         Output: a, not_null_col, nullable_col
+(4 rows)
+
+-- Ensure count(null) isn't optimized
+explain (costs off, verbose)
+select count(null) from agg_simplify;
+                  QUERY PLAN                   
+-----------------------------------------------
+ Aggregate
+   Output: count(NULL::unknown)
+   ->  Seq Scan on public.agg_simplify
+         Output: a, not_null_col, nullable_col
+(4 rows)
+
+-- Ensure count(nullable_col) does not use count(*)
+explain (costs off, verbose)
+select count(nullable_col) from agg_simplify;
+                  QUERY PLAN                   
+-----------------------------------------------
+ Aggregate
+   Output: count(nullable_col)
+   ->  Seq Scan on public.agg_simplify
+         Output: a, not_null_col, nullable_col
+(4 rows)
+
+-- Ensure there's no optimization with DISTINCT aggs
+explain (costs off, verbose)
+select count(distinct not_null_col) from agg_simplify;
+                 QUERY PLAN                  
+---------------------------------------------
+ Aggregate
+   Output: count(DISTINCT not_null_col)
+   ->  Sort
+         Output: not_null_col
+         Sort Key: agg_simplify.not_null_col
+         ->  Seq Scan on public.agg_simplify
+               Output: not_null_col
+(7 rows)
+
+-- Ensure there's no optimization with ORDER BY aggs
+explain (costs off, verbose)
+select count(not_null_col order by not_null_col) from agg_simplify;
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Aggregate
+   Output: count(not_null_col ORDER BY not_null_col)
+   ->  Sort
+         Output: not_null_col
+         Sort Key: agg_simplify.not_null_col
+         ->  Seq Scan on public.agg_simplify
+               Output: not_null_col
+(7 rows)
+
+-- Ensure we don't optimize to count(*) with agglevelsup > 0
+explain (costs off, verbose)
+select a from agg_simplify a group by a
+having exists (select 1 from onek b where count(a.not_null_col) = b.four);
+                     QUERY PLAN                      
+-----------------------------------------------------
+ HashAggregate
+   Output: a.a
+   Group Key: a.a
+   Filter: EXISTS(SubPlan exists_1)
+   ->  Seq Scan on public.agg_simplify a
+         Output: a.a, a.not_null_col, a.nullable_col
+   SubPlan exists_1
+     ->  Seq Scan on public.onek b
+           Filter: (count(a.not_null_col) = b.four)
+(9 rows)
+
+rollback;
 -- test aggregates with common transition functions share the same states
 begin work;
 create type avg_state as (total bigint, count bigint);
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -416,11 +416,16 @@ explain (costs off)
  select max(unique2), generate_series(1,3) as g from tenk1 order by g desc;
 select max(unique2), generate_series(1,3) as g from tenk1 order by g desc;

-- interesting corner case: constant gets optimized into a seqscan
+-- two interesting corner cases: both non-null and null constant gets
+-- optimized into a seqscan
 explain (costs off)
  select max(100) from tenk1;
 select max(100) from tenk1;

+explain (costs off)
+  select max(null) from tenk1;
+select max(null) from tenk1;
+
 -- try it on an inheritance tree
 create table minmaxtest(f1 int);
 create table minmaxtest1() inherits (minmaxtest);
@@ -1108,6 +1113,43 @@ select cleast_agg(4.5,f1) from int4_tbl;
 select cleast_agg(variadic array[4.5,f1]) from int4_tbl;
 select pg_typeof(cleast_agg(variadic array[4.5,f1])) from int4_tbl;

+--
+-- Test SupportRequestSimplifyAggref code
+--
+begin;
+create table agg_simplify (a int, not_null_col int not null, nullable_col int);
+
+-- Ensure count(not_null_col) uses count(*)
+explain (costs off, verbose)
+select count(not_null_col) from agg_simplify;
+
+-- Ensure count(<not null const>) uses count(*)
+explain (costs off, verbose)
+select count('bananas') from agg_simplify;
+
+-- Ensure count(null) isn't optimized
+explain (costs off, verbose)
+select count(null) from agg_simplify;
+
+-- Ensure count(nullable_col) does not use count(*)
+explain (costs off, verbose)
+select count(nullable_col) from agg_simplify;
+
+-- Ensure there's no optimization with DISTINCT aggs
+explain (costs off, verbose)
+select count(distinct not_null_col) from agg_simplify;
+
+-- Ensure there's no optimization with ORDER BY aggs
+explain (costs off, verbose)
+select count(not_null_col order by not_null_col) from agg_simplify;
+
+-- Ensure we don't optimize to count(*) with agglevelsup > 0
+explain (costs off, verbose)
+select a from agg_simplify a group by a
+having exists (select 1 from onek b where count(a.not_null_col) = b.four);
+
+rollback;
+
 -- test aggregates with common transition functions share the same states
 begin work;

--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -2927,6 +2927,7 @@ SupportRequestOptimizeWindowClause
 SupportRequestRows
 SupportRequestSelectivity
 SupportRequestSimplify
+SupportRequestSimplifyAggref
 SupportRequestWFuncMonotonic
 Syn
 SyncOps