1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-19 13:42:17 +03:00

Don't allow CTEs to determine semantic levels of aggregates.

The fix for bug #19055 (commit b0cc0a71e) allowed CTE references in
sub-selects within aggregate functions to affect the semantic levels
assigned to such aggregates.  It turns out this broke some related
cases, leading to assertion failures or strange planner errors such
as "unexpected outer reference in CTE query".  After experimenting
with some alternative rules for assigning the semantic level in
such cases, we've come to the conclusion that changing the level
is more likely to break things than be helpful.

Therefore, this patch undoes what b0cc0a71e changed, and instead
installs logic to throw an error if there is any reference to a
CTE that's below the semantic level that standard SQL rules would
assign to the aggregate based on its contained Var and Aggref nodes.
(The SQL standard disallows sub-selects within aggregate functions,
so it can't reach the troublesome case and hence has no rule for
what to do.)

Perhaps someone will come along with a legitimate query that this
logic rejects, and if so probably the example will help us craft
a level-adjustment rule that works better than what b0cc0a71e did.
I'm not holding my breath for that though, because the previous
logic had been there for a very long time before bug #19055 without
complaints, and that bug report sure looks to have originated from
fuzzing not from real usage.

Like b0cc0a71e, back-patch to all supported branches, though
sadly that no longer includes v13.

Bug: #19106
Reported-by: Kamil Monicz <kamil@monicz.dev>
Author: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://postgr.es/m/19106-9dd3668a0734cd72@postgresql.org
Backpatch-through: 14
This commit is contained in:
Tom Lane
2025-11-18 12:56:55 -05:00
parent faf4128ad3
commit 35b5c62c3a
3 changed files with 89 additions and 44 deletions

View File

@@ -38,6 +38,8 @@ typedef struct
ParseState *pstate; ParseState *pstate;
int min_varlevel; int min_varlevel;
int min_agglevel; int min_agglevel;
int min_ctelevel;
RangeTblEntry *min_cte;
int sublevels_up; int sublevels_up;
} check_agg_arguments_context; } check_agg_arguments_context;
@@ -58,7 +60,8 @@ typedef struct
static int check_agg_arguments(ParseState *pstate, static int check_agg_arguments(ParseState *pstate,
List *directargs, List *directargs,
List *args, List *args,
Expr *filter); Expr *filter,
int agglocation);
static bool check_agg_arguments_walker(Node *node, static bool check_agg_arguments_walker(Node *node,
check_agg_arguments_context *context); check_agg_arguments_context *context);
static Node *substitute_grouped_columns(Node *node, ParseState *pstate, Query *qry, static Node *substitute_grouped_columns(Node *node, ParseState *pstate, Query *qry,
@@ -339,7 +342,8 @@ check_agglevels_and_constraints(ParseState *pstate, Node *expr)
min_varlevel = check_agg_arguments(pstate, min_varlevel = check_agg_arguments(pstate,
directargs, directargs,
args, args,
filter); filter,
location);
*p_levelsup = min_varlevel; *p_levelsup = min_varlevel;
@@ -641,7 +645,8 @@ static int
check_agg_arguments(ParseState *pstate, check_agg_arguments(ParseState *pstate,
List *directargs, List *directargs,
List *args, List *args,
Expr *filter) Expr *filter,
int agglocation)
{ {
int agglevel; int agglevel;
check_agg_arguments_context context; check_agg_arguments_context context;
@@ -649,6 +654,8 @@ check_agg_arguments(ParseState *pstate,
context.pstate = pstate; context.pstate = pstate;
context.min_varlevel = -1; /* signifies nothing found yet */ context.min_varlevel = -1; /* signifies nothing found yet */
context.min_agglevel = -1; context.min_agglevel = -1;
context.min_ctelevel = -1;
context.min_cte = NULL;
context.sublevels_up = 0; context.sublevels_up = 0;
(void) check_agg_arguments_walker((Node *) args, &context); (void) check_agg_arguments_walker((Node *) args, &context);
@@ -686,6 +693,20 @@ check_agg_arguments(ParseState *pstate,
parser_errposition(pstate, aggloc))); parser_errposition(pstate, aggloc)));
} }
/*
* If there's a non-local CTE that's below the aggregate's semantic level,
* complain. It's not quite clear what we should do to fix up such a case
* (treating the CTE reference like a Var seems wrong), and it's also
* unclear whether there is a real-world use for such cases.
*/
if (context.min_ctelevel >= 0 && context.min_ctelevel < agglevel)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("outer-level aggregate cannot use a nested CTE"),
errdetail("CTE \"%s\" is below the aggregate's semantic level.",
context.min_cte->eref->aliasname),
parser_errposition(pstate, agglocation)));
/* /*
* Now check for vars/aggs in the direct arguments, and throw error if * Now check for vars/aggs in the direct arguments, and throw error if
* needed. Note that we allow a Var of the agg's semantic level, but not * needed. Note that we allow a Var of the agg's semantic level, but not
@@ -699,6 +720,7 @@ check_agg_arguments(ParseState *pstate,
{ {
context.min_varlevel = -1; context.min_varlevel = -1;
context.min_agglevel = -1; context.min_agglevel = -1;
context.min_ctelevel = -1;
(void) check_agg_arguments_walker((Node *) directargs, &context); (void) check_agg_arguments_walker((Node *) directargs, &context);
if (context.min_varlevel >= 0 && context.min_varlevel < agglevel) if (context.min_varlevel >= 0 && context.min_varlevel < agglevel)
ereport(ERROR, ereport(ERROR,
@@ -714,6 +736,13 @@ check_agg_arguments(ParseState *pstate,
parser_errposition(pstate, parser_errposition(pstate,
locate_agg_of_level((Node *) directargs, locate_agg_of_level((Node *) directargs,
context.min_agglevel)))); context.min_agglevel))));
if (context.min_ctelevel >= 0 && context.min_ctelevel < agglevel)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("outer-level aggregate cannot use a nested CTE"),
errdetail("CTE \"%s\" is below the aggregate's semantic level.",
context.min_cte->eref->aliasname),
parser_errposition(pstate, agglocation)));
} }
return agglevel; return agglevel;
} }
@@ -794,11 +823,6 @@ check_agg_arguments_walker(Node *node,
if (IsA(node, RangeTblEntry)) if (IsA(node, RangeTblEntry))
{ {
/*
* CTE references act similarly to Vars of the CTE's level. Without
* this we might conclude that the Agg can be evaluated above the CTE,
* leading to trouble.
*/
RangeTblEntry *rte = (RangeTblEntry *) node; RangeTblEntry *rte = (RangeTblEntry *) node;
if (rte->rtekind == RTE_CTE) if (rte->rtekind == RTE_CTE)
@@ -810,9 +834,12 @@ check_agg_arguments_walker(Node *node,
/* ignore local CTEs of subqueries */ /* ignore local CTEs of subqueries */
if (ctelevelsup >= 0) if (ctelevelsup >= 0)
{ {
if (context->min_varlevel < 0 || if (context->min_ctelevel < 0 ||
context->min_varlevel > ctelevelsup) context->min_ctelevel > ctelevelsup)
context->min_varlevel = ctelevelsup; {
context->min_ctelevel = ctelevelsup;
context->min_cte = rte;
}
} }
} }
return false; /* allow range_table_walker to continue */ return false; /* allow range_table_walker to continue */

View File

@@ -2300,36 +2300,44 @@ from int4_tbl;
-- --
-- test for bug #19055: interaction of WITH with aggregates -- test for bug #19055: interaction of WITH with aggregates
-- --
-- The reference to cte1 must determine the aggregate's level, -- For now, we just throw an error if there's a use of a CTE below the
-- even though it contains no Vars referencing cte1 -- semantic level that the SQL standard assigns to the aggregate.
-- It's not entirely clear what we could do instead that doesn't risk
-- breaking more things than it fixes.
select f1, (with cte1(x,y) as (select 1,2)
select count((select i4.f1 from cte1))) as ss
from int4_tbl i4;
ERROR: outer-level aggregate cannot use a nested CTE
LINE 2: select count((select i4.f1 from cte1))) as ss
^
DETAIL: CTE "cte1" is below the aggregate's semantic level.
--
-- test for bug #19106: interaction of WITH with aggregates
--
-- the initial fix for #19055 was too aggressive and broke this case
explain (verbose, costs off) explain (verbose, costs off)
select f1, (with cte1(x,y) as (select 1,2) with a as ( select id from (values (1), (2)) as v(id) ),
select count((select i4.f1 from cte1))) as ss b as ( select max((select sum(id) from a)) as agg )
from int4_tbl i4; select agg from b;
QUERY PLAN QUERY PLAN
------------------------------------------------- --------------------------------------------
Seq Scan on public.int4_tbl i4 Aggregate
Output: i4.f1, (SubPlan expr_1) Output: max((InitPlan expr_1).col1)
SubPlan expr_1 InitPlan expr_1
-> Aggregate -> Aggregate
Output: count((InitPlan expr_2).col1) Output: sum("*VALUES*".column1)
InitPlan expr_2 -> Values Scan on "*VALUES*"
-> Result Output: "*VALUES*".column1
Output: i4.f1 -> Result
-> Result (8 rows)
(9 rows)
select f1, (with cte1(x,y) as (select 1,2) with a as ( select id from (values (1), (2)) as v(id) ),
select count((select i4.f1 from cte1))) as ss b as ( select max((select sum(id) from a)) as agg )
from int4_tbl i4; select agg from b;
f1 | ss agg
-------------+---- -----
0 | 1 3
123456 | 1 (1 row)
-123456 | 1
2147483647 | 1
-2147483647 | 1
(5 rows)
-- --
-- test for nested-recursive-WITH bug -- test for nested-recursive-WITH bug

View File

@@ -1100,16 +1100,26 @@ from int4_tbl;
-- --
-- test for bug #19055: interaction of WITH with aggregates -- test for bug #19055: interaction of WITH with aggregates
-- --
-- The reference to cte1 must determine the aggregate's level, -- For now, we just throw an error if there's a use of a CTE below the
-- even though it contains no Vars referencing cte1 -- semantic level that the SQL standard assigns to the aggregate.
explain (verbose, costs off) -- It's not entirely clear what we could do instead that doesn't risk
-- breaking more things than it fixes.
select f1, (with cte1(x,y) as (select 1,2) select f1, (with cte1(x,y) as (select 1,2)
select count((select i4.f1 from cte1))) as ss select count((select i4.f1 from cte1))) as ss
from int4_tbl i4; from int4_tbl i4;
select f1, (with cte1(x,y) as (select 1,2) --
select count((select i4.f1 from cte1))) as ss -- test for bug #19106: interaction of WITH with aggregates
from int4_tbl i4; --
-- the initial fix for #19055 was too aggressive and broke this case
explain (verbose, costs off)
with a as ( select id from (values (1), (2)) as v(id) ),
b as ( select max((select sum(id) from a)) as agg )
select agg from b;
with a as ( select id from (values (1), (2)) as v(id) ),
b as ( select max((select sum(id) from a)) as agg )
select agg from b;
-- --
-- test for nested-recursive-WITH bug -- test for nested-recursive-WITH bug