1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-28 11:44:57 +03:00

Have the planner replace COUNT(ANY) with COUNT(*), when possible

This adds SupportRequestSimplifyAggref to allow pg_proc.prosupport
functions to receive an Aggref and allow them to determine if there is a
way that the Aggref call can be optimized.

Also added is a support function to allow transformation of COUNT(ANY)
into COUNT(*).  This is possible to do when the given "ANY" cannot be
NULL and also that there are no ORDER BY / DISTINCT clauses within the
Aggref.  This is a useful transformation to do as it is common that
people write COUNT(1), which until now has added unneeded overhead.
When counting a NOT NULL column.  The overheads can be worse as that
might mean deforming more of the tuple, which for large fact tables may
be many columns in.

It may be possible to add prosupport functions for other aggregates.  We
could consider if ORDER BY could be dropped for some calls, e.g. the
ORDER BY is quite useless in MAX(c ORDER BY c).

There is a little bit of passing fallout from adjusting
expr_is_nonnullable() to handle Const which results in a plan change in
the aggregates.out regression test.  Previously, nothing was able to
determine that "One-Time Filter: (100 IS NOT NULL)" was always true,
therefore useless to include in the plan.

Author: David Rowley <dgrowleyml@gmail.com>
Reviewed-by: Corey Huinker <corey.huinker@gmail.com>
Reviewed-by: Matheus Alcantara <matheusssilv97@gmail.com>
Discussion: https://postgr.es/m/CAApHDvqGcPTagXpKfH=CrmHBqALpziThJEDs_MrPqjKVeDF9wA@mail.gmail.com
This commit is contained in:
David Rowley
2025-11-27 10:43:28 +13:00
parent dbdc717ac6
commit 42473b3b31
9 changed files with 325 additions and 37 deletions

View File

@@ -2975,9 +2975,9 @@ select sum(t1.c1), count(t2.c1) from ft1 t1 inner join ft2 t2 on (t1.c1 = t2.c1)
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------
Aggregate
Output: sum(t1.c1), count(t2.c1)
Output: sum(t1.c1), count(*)
-> Foreign Scan
Output: t1.c1, t2.c1
Output: t1.c1
Filter: (((((t1.c1 * t2.c1) / (t1.c1 * t2.c1)))::double precision * random()) <= '1'::double precision)
Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
Remote SQL: SELECT r1."C 1", r2."C 1" FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r2."C 1" = r1."C 1"))))
@@ -3073,12 +3073,12 @@ select c2 * (random() <= 1)::int as c2 from ft2 group by c2 * (random() <= 1)::i
-- GROUP BY clause in various forms, cardinal, alias and constant expression
explain (verbose, costs off)
select count(c2) w, c2 x, 5 y, 7.0 z from ft1 group by 2, y, 9.0::int order by 2;
QUERY PLAN
------------------------------------------------------------------------------------------------------------
QUERY PLAN
-----------------------------------------------------------------------------------------------------------
Foreign Scan
Output: (count(c2)), c2, 5, 7.0, 9
Output: (count(*)), c2, 5, 7.0, 9
Relations: Aggregate on (public.ft1)
Remote SQL: SELECT count(c2), c2, 5, 7.0, 9 FROM "S 1"."T 1" GROUP BY 2, 3, 5 ORDER BY c2 ASC NULLS LAST
Remote SQL: SELECT count(*), c2, 5, 7.0, 9 FROM "S 1"."T 1" GROUP BY 2, 3, 5 ORDER BY c2 ASC NULLS LAST
(4 rows)
select count(c2) w, c2 x, 5 y, 7.0 z from ft1 group by 2, y, 9.0::int order by 2;
@@ -3379,8 +3379,8 @@ select distinct (select count(*) filter (where t2.c2 = 6 and t2.c1 < 10) from ft
-- Inner query is aggregation query
explain (verbose, costs off)
select distinct (select count(t1.c1) filter (where t2.c2 = 6 and t2.c1 < 10) from ft1 t1 where t1.c1 = 6) from ft2 t2 where t2.c2 % 6 = 0 order by 1;
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------------------
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------------
Unique
Output: ((SubPlan expr_1))
-> Sort
@@ -3391,9 +3391,9 @@ select distinct (select count(t1.c1) filter (where t2.c2 = 6 and t2.c1 < 10) fro
Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE (((c2 % 6) = 0))
SubPlan expr_1
-> Foreign Scan
Output: (count(t1.c1) FILTER (WHERE ((t2.c2 = 6) AND (t2.c1 < 10))))
Output: (count(*) FILTER (WHERE ((t2.c2 = 6) AND (t2.c1 < 10))))
Relations: Aggregate on (public.ft1 t1)
Remote SQL: SELECT count("C 1") FILTER (WHERE (($1::integer = 6) AND ($2::integer < 10))) FROM "S 1"."T 1" WHERE (("C 1" = 6))
Remote SQL: SELECT count(*) FILTER (WHERE (($1::integer = 6) AND ($2::integer < 10))) FROM "S 1"."T 1" WHERE (("C 1" = 6))
(13 rows)
select distinct (select count(t1.c1) filter (where t2.c2 = 6 and t2.c1 < 10) from ft1 t1 where t1.c1 = 6) from ft2 t2 where t2.c2 % 6 = 0 order by 1;

View File

@@ -3413,22 +3413,6 @@ add_base_clause_to_rel(PlannerInfo *root, Index relid,
restrictinfo->security_level);
}
/*
* expr_is_nonnullable
* Check to see if the Expr cannot be NULL
*
* Currently we only support simple Vars.
*/
static bool
expr_is_nonnullable(PlannerInfo *root, Expr *expr)
{
/* For now only check simple Vars */
if (!IsA(expr, Var))
return false;
return var_is_nonnullable(root, (Var *) expr, true);
}
/*
* restriction_is_always_true
* Check to see if the RestrictInfo is always true.
@@ -3465,7 +3449,7 @@ restriction_is_always_true(PlannerInfo *root,
if (nulltest->argisrow)
return false;
return expr_is_nonnullable(root, nulltest->arg);
return expr_is_nonnullable(root, nulltest->arg, true);
}
/* If it's an OR, check its sub-clauses */
@@ -3530,7 +3514,7 @@ restriction_is_always_false(PlannerInfo *root,
if (nulltest->argisrow)
return false;
return expr_is_nonnullable(root, nulltest->arg);
return expr_is_nonnullable(root, nulltest->arg, true);
}
/* If it's an OR, check its sub-clauses */

View File

@@ -131,6 +131,8 @@ static Expr *simplify_function(Oid funcid,
Oid result_collid, Oid input_collid, List **args_p,
bool funcvariadic, bool process_args, bool allow_non_const,
eval_const_expressions_context *context);
static Node *simplify_aggref(Aggref *aggref,
eval_const_expressions_context *context);
static List *reorder_function_arguments(List *args, int pronargs,
HeapTuple func_tuple);
static List *add_function_defaults(List *args, int pronargs,
@@ -2634,6 +2636,9 @@ eval_const_expressions_mutator(Node *node,
newexpr->location = expr->location;
return (Node *) newexpr;
}
case T_Aggref:
node = ece_generic_processing(node);
return simplify_aggref((Aggref *) node, context);
case T_OpExpr:
{
OpExpr *expr = (OpExpr *) node;
@@ -4200,6 +4205,50 @@ simplify_function(Oid funcid, Oid result_type, int32 result_typmod,
return newexpr;
}
/*
* simplify_aggref
* Call the Aggref.aggfnoid's prosupport function to allow it to
* determine if simplification of the Aggref is possible. Returns the
* newly simplified node if conversion took place; otherwise, returns the
* original Aggref.
*
* See SupportRequestSimplifyAggref comments in supportnodes.h for further
* details.
*/
static Node *
simplify_aggref(Aggref *aggref, eval_const_expressions_context *context)
{
Oid prosupport = get_func_support(aggref->aggfnoid);
if (OidIsValid(prosupport))
{
SupportRequestSimplifyAggref req;
Node *newnode;
/*
* Build a SupportRequestSimplifyAggref node to pass to the support
* function.
*/
req.type = T_SupportRequestSimplifyAggref;
req.root = context->root;
req.aggref = aggref;
newnode = (Node *) DatumGetPointer(OidFunctionCall1(prosupport,
PointerGetDatum(&req)));
/*
* We expect the support function to return either a new Node or NULL
* (when simplification isn't possible).
*/
Assert(newnode != (Node *) aggref || newnode == NULL);
if (newnode != NULL)
return newnode;
}
return (Node *) aggref;
}
/*
* var_is_nonnullable: check to see if the Var cannot be NULL
*
@@ -4261,6 +4310,30 @@ var_is_nonnullable(PlannerInfo *root, Var *var, bool use_rel_info)
return false;
}
/*
* expr_is_nonnullable: check to see if the Expr cannot be NULL
*
* Returns true iff the given 'expr' cannot produce SQL NULLs.
*
* If 'use_rel_info' is true, nullability of Vars is checked via the
* corresponding RelOptInfo for the given Var. Some callers require
* nullability information before RelOptInfos are generated. These should
* pass 'use_rel_info' as false.
*
* For now, we only support Var and Const. Support for other node types may
* be possible.
*/
bool
expr_is_nonnullable(PlannerInfo *root, Expr *expr, bool use_rel_info)
{
if (IsA(expr, Var))
return var_is_nonnullable(root, (Var *) expr, use_rel_info);
if (IsA(expr, Const))
return !castNode(Const, expr)->constisnull;
return false;
}
/*
* expand_function_arguments: convert named-notation args to positional args
* and/or insert default args, as needed

View File

@@ -24,7 +24,7 @@
#include "nodes/supportnodes.h"
#include "optimizer/optimizer.h"
#include "utils/builtins.h"
#include "utils/fmgroids.h"
typedef struct
{
@@ -811,6 +811,53 @@ int8inc_support(PG_FUNCTION_ARGS)
PG_RETURN_POINTER(req);
}
if (IsA(rawreq, SupportRequestSimplifyAggref))
{
SupportRequestSimplifyAggref *req = (SupportRequestSimplifyAggref *) rawreq;
Aggref *agg = req->aggref;
/*
* Check for COUNT(ANY) and try to convert to COUNT(*). The input
* argument cannot be NULL, we can't have an ORDER BY / DISTINCT in
* the aggregate, and agglevelsup must be 0.
*
* Technically COUNT(ANY) must have 1 arg, but be paranoid and check.
*/
if (agg->aggfnoid == F_COUNT_ANY && list_length(agg->args) == 1)
{
TargetEntry *tle = (TargetEntry *) linitial(agg->args);
Expr *arg = tle->expr;
/* Check for unsupported cases */
if (agg->aggdistinct != NIL || agg->aggorder != NIL ||
agg->agglevelsup != 0)
PG_RETURN_POINTER(NULL);
/* If the arg isn't NULLable, do the conversion */
if (expr_is_nonnullable(req->root, arg, false))
{
Aggref *newagg;
/* We don't expect these to have been set yet */
Assert(agg->aggtransno == -1);
Assert(agg->aggtranstype == InvalidOid);
/* Convert COUNT(ANY) to COUNT(*) by making a new Aggref */
newagg = makeNode(Aggref);
memcpy(newagg, agg, sizeof(Aggref));
newagg->aggfnoid = F_COUNT_;
/* count(*) has no args */
newagg->aggargtypes = NULL;
newagg->args = NULL;
newagg->aggstar = true;
newagg->location = -1;
PG_RETURN_POINTER(newagg);
}
}
}
PG_RETURN_POINTER(NULL);
}

View File

@@ -71,6 +71,31 @@ typedef struct SupportRequestSimplify
FuncExpr *fcall; /* Function call to be simplified */
} SupportRequestSimplify;
/*
* Similar to SupportRequestSimplify but for Aggref node types.
*
* This supports conversions such as swapping COUNT(1) or COUNT(notnullcol)
* for COUNT(*).
*
* Supporting functions can consult 'root' and the input 'aggref'. When the
* implementing support function deems the simplification is possible, it must
* create a new Node (probably another Aggref) and not modify the original.
* The newly created Node should then be returned to indicate that the
* conversion is to take place. When no conversion is possible, a NULL
* pointer should be returned.
*
* It is important to consider that implementing support functions can receive
* Aggrefs with agglevelsup > 0. Careful consideration should be given to
* whether the simplification is still possible at levels above 0.
*/
typedef struct SupportRequestSimplifyAggref
{
NodeTag type;
PlannerInfo *root; /* Planner's infrastructure */
Aggref *aggref; /* Aggref to be simplified */
} SupportRequestSimplifyAggref;
/*
* The InlineInFrom request allows the support function to perform plan-time
* simplification of a call to its target function that appears in FROM.

View File

@@ -147,6 +147,9 @@ extern Expr *evaluate_expr(Expr *expr, Oid result_type, int32 result_typmod,
extern bool var_is_nonnullable(PlannerInfo *root, Var *var, bool use_rel_info);
extern bool expr_is_nonnullable(PlannerInfo *root, Expr *expr,
bool use_rel_info);
extern List *expand_function_arguments(List *args, bool include_out_arguments,
Oid result_type,
HeapTuple func_tuple);

View File

@@ -1219,19 +1219,18 @@ select max(unique2), generate_series(1,3) as g from tenk1 order by g desc;
9999 | 1
(3 rows)
-- interesting corner case: constant gets optimized into a seqscan
-- two interesting corner cases: both non-null and null constant gets
-- optimized into a seqscan
explain (costs off)
select max(100) from tenk1;
QUERY PLAN
----------------------------------------------------
QUERY PLAN
---------------------------------
Result
Replaces: MinMaxAggregate
InitPlan minmax_1
-> Limit
-> Result
One-Time Filter: (100 IS NOT NULL)
-> Seq Scan on tenk1
(7 rows)
-> Seq Scan on tenk1
(5 rows)
select max(100) from tenk1;
max
@@ -1239,6 +1238,25 @@ select max(100) from tenk1;
100
(1 row)
explain (costs off)
select max(null) from tenk1;
QUERY PLAN
-----------------------------------------------------------
Result
Replaces: MinMaxAggregate
InitPlan minmax_1
-> Limit
-> Result
One-Time Filter: (NULL::text IS NOT NULL)
-> Seq Scan on tenk1
(7 rows)
select max(null) from tenk1;
max
-----
(1 row)
-- try it on an inheritance tree
create table minmaxtest(f1 int);
create table minmaxtest1() inherits (minmaxtest);
@@ -2821,6 +2839,101 @@ select pg_typeof(cleast_agg(variadic array[4.5,f1])) from int4_tbl;
numeric
(1 row)
--
-- Test SupportRequestSimplifyAggref code
--
begin;
create table agg_simplify (a int, not_null_col int not null, nullable_col int);
-- Ensure count(not_null_col) uses count(*)
explain (costs off, verbose)
select count(not_null_col) from agg_simplify;
QUERY PLAN
-----------------------------------------------
Aggregate
Output: count(*)
-> Seq Scan on public.agg_simplify
Output: a, not_null_col, nullable_col
(4 rows)
-- Ensure count(<not null const>) uses count(*)
explain (costs off, verbose)
select count('bananas') from agg_simplify;
QUERY PLAN
-----------------------------------------------
Aggregate
Output: count(*)
-> Seq Scan on public.agg_simplify
Output: a, not_null_col, nullable_col
(4 rows)
-- Ensure count(null) isn't optimized
explain (costs off, verbose)
select count(null) from agg_simplify;
QUERY PLAN
-----------------------------------------------
Aggregate
Output: count(NULL::unknown)
-> Seq Scan on public.agg_simplify
Output: a, not_null_col, nullable_col
(4 rows)
-- Ensure count(nullable_col) does not use count(*)
explain (costs off, verbose)
select count(nullable_col) from agg_simplify;
QUERY PLAN
-----------------------------------------------
Aggregate
Output: count(nullable_col)
-> Seq Scan on public.agg_simplify
Output: a, not_null_col, nullable_col
(4 rows)
-- Ensure there's no optimization with DISTINCT aggs
explain (costs off, verbose)
select count(distinct not_null_col) from agg_simplify;
QUERY PLAN
---------------------------------------------
Aggregate
Output: count(DISTINCT not_null_col)
-> Sort
Output: not_null_col
Sort Key: agg_simplify.not_null_col
-> Seq Scan on public.agg_simplify
Output: not_null_col
(7 rows)
-- Ensure there's no optimization with ORDER BY aggs
explain (costs off, verbose)
select count(not_null_col order by not_null_col) from agg_simplify;
QUERY PLAN
-----------------------------------------------------
Aggregate
Output: count(not_null_col ORDER BY not_null_col)
-> Sort
Output: not_null_col
Sort Key: agg_simplify.not_null_col
-> Seq Scan on public.agg_simplify
Output: not_null_col
(7 rows)
-- Ensure we don't optimize to count(*) with agglevelsup > 0
explain (costs off, verbose)
select a from agg_simplify a group by a
having exists (select 1 from onek b where count(a.not_null_col) = b.four);
QUERY PLAN
-----------------------------------------------------
HashAggregate
Output: a.a
Group Key: a.a
Filter: EXISTS(SubPlan exists_1)
-> Seq Scan on public.agg_simplify a
Output: a.a, a.not_null_col, a.nullable_col
SubPlan exists_1
-> Seq Scan on public.onek b
Filter: (count(a.not_null_col) = b.four)
(9 rows)
rollback;
-- test aggregates with common transition functions share the same states
begin work;
create type avg_state as (total bigint, count bigint);

View File

@@ -416,11 +416,16 @@ explain (costs off)
select max(unique2), generate_series(1,3) as g from tenk1 order by g desc;
select max(unique2), generate_series(1,3) as g from tenk1 order by g desc;
-- interesting corner case: constant gets optimized into a seqscan
-- two interesting corner cases: both non-null and null constant gets
-- optimized into a seqscan
explain (costs off)
select max(100) from tenk1;
select max(100) from tenk1;
explain (costs off)
select max(null) from tenk1;
select max(null) from tenk1;
-- try it on an inheritance tree
create table minmaxtest(f1 int);
create table minmaxtest1() inherits (minmaxtest);
@@ -1108,6 +1113,43 @@ select cleast_agg(4.5,f1) from int4_tbl;
select cleast_agg(variadic array[4.5,f1]) from int4_tbl;
select pg_typeof(cleast_agg(variadic array[4.5,f1])) from int4_tbl;
--
-- Test SupportRequestSimplifyAggref code
--
begin;
create table agg_simplify (a int, not_null_col int not null, nullable_col int);
-- Ensure count(not_null_col) uses count(*)
explain (costs off, verbose)
select count(not_null_col) from agg_simplify;
-- Ensure count(<not null const>) uses count(*)
explain (costs off, verbose)
select count('bananas') from agg_simplify;
-- Ensure count(null) isn't optimized
explain (costs off, verbose)
select count(null) from agg_simplify;
-- Ensure count(nullable_col) does not use count(*)
explain (costs off, verbose)
select count(nullable_col) from agg_simplify;
-- Ensure there's no optimization with DISTINCT aggs
explain (costs off, verbose)
select count(distinct not_null_col) from agg_simplify;
-- Ensure there's no optimization with ORDER BY aggs
explain (costs off, verbose)
select count(not_null_col order by not_null_col) from agg_simplify;
-- Ensure we don't optimize to count(*) with agglevelsup > 0
explain (costs off, verbose)
select a from agg_simplify a group by a
having exists (select 1 from onek b where count(a.not_null_col) = b.four);
rollback;
-- test aggregates with common transition functions share the same states
begin work;

View File

@@ -2927,6 +2927,7 @@ SupportRequestOptimizeWindowClause
SupportRequestRows
SupportRequestSelectivity
SupportRequestSimplify
SupportRequestSimplifyAggref
SupportRequestWFuncMonotonic
Syn
SyncOps