1
0
mirror of https://github.com/postgres/postgres.git synced 2025-12-19 17:02:53 +03:00

Have the planner replace COUNT(ANY) with COUNT(*), when possible

This adds SupportRequestSimplifyAggref to allow pg_proc.prosupport
functions to receive an Aggref and allow them to determine if there is a
way that the Aggref call can be optimized.

Also added is a support function to allow transformation of COUNT(ANY)
into COUNT(*).  This is possible to do when the given "ANY" cannot be
NULL and also that there are no ORDER BY / DISTINCT clauses within the
Aggref.  This is a useful transformation to do as it is common that
people write COUNT(1), which until now has added unneeded overhead.
When counting a NOT NULL column.  The overheads can be worse as that
might mean deforming more of the tuple, which for large fact tables may
be many columns in.

It may be possible to add prosupport functions for other aggregates.  We
could consider if ORDER BY could be dropped for some calls, e.g. the
ORDER BY is quite useless in MAX(c ORDER BY c).

There is a little bit of passing fallout from adjusting
expr_is_nonnullable() to handle Const which results in a plan change in
the aggregates.out regression test.  Previously, nothing was able to
determine that "One-Time Filter: (100 IS NOT NULL)" was always true,
therefore useless to include in the plan.

Author: David Rowley <dgrowleyml@gmail.com>
Reviewed-by: Corey Huinker <corey.huinker@gmail.com>
Reviewed-by: Matheus Alcantara <matheusssilv97@gmail.com>
Discussion: https://postgr.es/m/CAApHDvqGcPTagXpKfH=CrmHBqALpziThJEDs_MrPqjKVeDF9wA@mail.gmail.com
This commit is contained in:
David Rowley
2025-11-27 10:43:28 +13:00
parent dbdc717ac6
commit 42473b3b31
9 changed files with 325 additions and 37 deletions

View File

@@ -1219,19 +1219,18 @@ select max(unique2), generate_series(1,3) as g from tenk1 order by g desc;
9999 | 1
(3 rows)
-- interesting corner case: constant gets optimized into a seqscan
-- two interesting corner cases: both non-null and null constant gets
-- optimized into a seqscan
explain (costs off)
select max(100) from tenk1;
QUERY PLAN
----------------------------------------------------
QUERY PLAN
---------------------------------
Result
Replaces: MinMaxAggregate
InitPlan minmax_1
-> Limit
-> Result
One-Time Filter: (100 IS NOT NULL)
-> Seq Scan on tenk1
(7 rows)
-> Seq Scan on tenk1
(5 rows)
select max(100) from tenk1;
max
@@ -1239,6 +1238,25 @@ select max(100) from tenk1;
100
(1 row)
explain (costs off)
select max(null) from tenk1;
QUERY PLAN
-----------------------------------------------------------
Result
Replaces: MinMaxAggregate
InitPlan minmax_1
-> Limit
-> Result
One-Time Filter: (NULL::text IS NOT NULL)
-> Seq Scan on tenk1
(7 rows)
select max(null) from tenk1;
max
-----
(1 row)
-- try it on an inheritance tree
create table minmaxtest(f1 int);
create table minmaxtest1() inherits (minmaxtest);
@@ -2821,6 +2839,101 @@ select pg_typeof(cleast_agg(variadic array[4.5,f1])) from int4_tbl;
numeric
(1 row)
--
-- Test SupportRequestSimplifyAggref code
--
begin;
create table agg_simplify (a int, not_null_col int not null, nullable_col int);
-- Ensure count(not_null_col) uses count(*)
explain (costs off, verbose)
select count(not_null_col) from agg_simplify;
QUERY PLAN
-----------------------------------------------
Aggregate
Output: count(*)
-> Seq Scan on public.agg_simplify
Output: a, not_null_col, nullable_col
(4 rows)
-- Ensure count(<not null const>) uses count(*)
explain (costs off, verbose)
select count('bananas') from agg_simplify;
QUERY PLAN
-----------------------------------------------
Aggregate
Output: count(*)
-> Seq Scan on public.agg_simplify
Output: a, not_null_col, nullable_col
(4 rows)
-- Ensure count(null) isn't optimized
explain (costs off, verbose)
select count(null) from agg_simplify;
QUERY PLAN
-----------------------------------------------
Aggregate
Output: count(NULL::unknown)
-> Seq Scan on public.agg_simplify
Output: a, not_null_col, nullable_col
(4 rows)
-- Ensure count(nullable_col) does not use count(*)
explain (costs off, verbose)
select count(nullable_col) from agg_simplify;
QUERY PLAN
-----------------------------------------------
Aggregate
Output: count(nullable_col)
-> Seq Scan on public.agg_simplify
Output: a, not_null_col, nullable_col
(4 rows)
-- Ensure there's no optimization with DISTINCT aggs
explain (costs off, verbose)
select count(distinct not_null_col) from agg_simplify;
QUERY PLAN
---------------------------------------------
Aggregate
Output: count(DISTINCT not_null_col)
-> Sort
Output: not_null_col
Sort Key: agg_simplify.not_null_col
-> Seq Scan on public.agg_simplify
Output: not_null_col
(7 rows)
-- Ensure there's no optimization with ORDER BY aggs
explain (costs off, verbose)
select count(not_null_col order by not_null_col) from agg_simplify;
QUERY PLAN
-----------------------------------------------------
Aggregate
Output: count(not_null_col ORDER BY not_null_col)
-> Sort
Output: not_null_col
Sort Key: agg_simplify.not_null_col
-> Seq Scan on public.agg_simplify
Output: not_null_col
(7 rows)
-- Ensure we don't optimize to count(*) with agglevelsup > 0
explain (costs off, verbose)
select a from agg_simplify a group by a
having exists (select 1 from onek b where count(a.not_null_col) = b.four);
QUERY PLAN
-----------------------------------------------------
HashAggregate
Output: a.a
Group Key: a.a
Filter: EXISTS(SubPlan exists_1)
-> Seq Scan on public.agg_simplify a
Output: a.a, a.not_null_col, a.nullable_col
SubPlan exists_1
-> Seq Scan on public.onek b
Filter: (count(a.not_null_col) = b.four)
(9 rows)
rollback;
-- test aggregates with common transition functions share the same states
begin work;
create type avg_state as (total bigint, count bigint);

View File

@@ -416,11 +416,16 @@ explain (costs off)
select max(unique2), generate_series(1,3) as g from tenk1 order by g desc;
select max(unique2), generate_series(1,3) as g from tenk1 order by g desc;
-- interesting corner case: constant gets optimized into a seqscan
-- two interesting corner cases: both non-null and null constant gets
-- optimized into a seqscan
explain (costs off)
select max(100) from tenk1;
select max(100) from tenk1;
explain (costs off)
select max(null) from tenk1;
select max(null) from tenk1;
-- try it on an inheritance tree
create table minmaxtest(f1 int);
create table minmaxtest1() inherits (minmaxtest);
@@ -1108,6 +1113,43 @@ select cleast_agg(4.5,f1) from int4_tbl;
select cleast_agg(variadic array[4.5,f1]) from int4_tbl;
select pg_typeof(cleast_agg(variadic array[4.5,f1])) from int4_tbl;
--
-- Test SupportRequestSimplifyAggref code
--
begin;
create table agg_simplify (a int, not_null_col int not null, nullable_col int);
-- Ensure count(not_null_col) uses count(*)
explain (costs off, verbose)
select count(not_null_col) from agg_simplify;
-- Ensure count(<not null const>) uses count(*)
explain (costs off, verbose)
select count('bananas') from agg_simplify;
-- Ensure count(null) isn't optimized
explain (costs off, verbose)
select count(null) from agg_simplify;
-- Ensure count(nullable_col) does not use count(*)
explain (costs off, verbose)
select count(nullable_col) from agg_simplify;
-- Ensure there's no optimization with DISTINCT aggs
explain (costs off, verbose)
select count(distinct not_null_col) from agg_simplify;
-- Ensure there's no optimization with ORDER BY aggs
explain (costs off, verbose)
select count(not_null_col order by not_null_col) from agg_simplify;
-- Ensure we don't optimize to count(*) with agglevelsup > 0
explain (costs off, verbose)
select a from agg_simplify a group by a
having exists (select 1 from onek b where count(a.not_null_col) = b.four);
rollback;
-- test aggregates with common transition functions share the same states
begin work;