From 34740b90bc123d645a3a71231b765b778bdcf049 Mon Sep 17 00:00:00 2001 From: Richard Guo Date: Mon, 19 Jan 2026 11:13:23 +0900 Subject: [PATCH] Fix unsafe pushdown of quals referencing grouping Vars When checking a subquery's output expressions to see if it's safe to push down an upper-level qual, check_output_expressions() previously treated grouping Vars as opaque Vars. This implicitly assumed they were stable and scalar. However, a grouping Var's underlying expression corresponds to the grouping clause, which may be volatile or set-returning. If an upper-level qual references such an output column, pushing it down into the subquery is unsafe. This can cause strange results due to multiple evaluation of a volatile function, or introduce SRFs into the subquery's WHERE/HAVING quals. This patch teaches check_output_expressions() to look through grouping Vars to their underlying expressions. This ensures that any volatility or set-returning properties in the grouping clause are detected, preventing the unsafe pushdown. We do not need to recursively examine the Vars contained in these underlying expressions. Even if they reference outputs from lower-level subqueries (at any depth), those references are guaranteed not to expand to volatile or set-returning functions, because subqueries containing such functions in their targetlists are never pulled up. Backpatch to v18, where this issue was introduced. Reported-by: Eric Ridge Diagnosed-by: Tom Lane Author: Richard Guo Discussion: https://postgr.es/m/7900964C-F99E-481E-BEE5-4338774CEB9F@gmail.com Backpatch-through: 18 --- src/backend/optimizer/path/allpaths.c | 26 +++++- src/backend/optimizer/util/var.c | 8 +- src/test/regress/expected/subselect.out | 104 ++++++++++++++++++++++++ src/test/regress/sql/subselect.sql | 40 +++++++++ 4 files changed, 175 insertions(+), 3 deletions(-) diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 6e641c146a3..e00d1700817 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -4204,9 +4204,33 @@ recurse_pushdown_safe(Node *setOp, Query *topquery, static void check_output_expressions(Query *subquery, pushdown_safety_info *safetyInfo) { + List *flattened_targetList = subquery->targetList; ListCell *lc; - foreach(lc, subquery->targetList) + /* + * We must be careful with grouping Vars and join alias Vars in the + * subquery's outputs, as they hide the underlying expressions. + * + * We need to expand grouping Vars to their underlying expressions (the + * grouping clauses) because the grouping expressions themselves might be + * volatile or set-returning. However, we do not need to expand join + * alias Vars, as their underlying structure does not introduce volatile + * or set-returning functions at the current level. + * + * In neither case do we need to recursively examine the Vars contained in + * these underlying expressions. Even if they reference outputs from + * lower-level subqueries (at any depth), those references are guaranteed + * not to expand to volatile or set-returning functions, because + * subqueries containing such functions in their targetlists are never + * pulled up. + */ + if (subquery->hasGroupRTE) + { + flattened_targetList = (List *) + flatten_group_exprs(NULL, subquery, (Node *) subquery->targetList); + } + + foreach(lc, flattened_targetList) { TargetEntry *tle = (TargetEntry *) lfirst(lc); diff --git a/src/backend/optimizer/util/var.c b/src/backend/optimizer/util/var.c index 99da622507b..66f5b598f0c 100644 --- a/src/backend/optimizer/util/var.c +++ b/src/backend/optimizer/util/var.c @@ -959,10 +959,14 @@ flatten_join_alias_vars_mutator(Node *node, * wrapper. * * NOTE: this is also used by ruleutils.c, to deparse one query parsetree back - * to source text. For that use-case, root will be NULL, which is why we have - * to pass the Query separately. We need the root itself only for preserving + * to source text, and by check_output_expressions() to check for unsafe + * pushdowns. For these use-cases, root will be NULL, which is why we have to + * pass the Query separately. We need the root itself only for preserving * varnullingrels. We can avoid preserving varnullingrels in the ruleutils.c's * usage because it does not make any difference to the deparsed source text. + * We can also avoid it in check_output_expressions() because that function + * uses the expanded expressions solely to check for volatile or set-returning + * functions, which is independent of the Vars' nullingrels. */ Node * flatten_group_exprs(PlannerInfo *root, Query *query, Node *node) diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out index 37ed59e73bf..2135d82884d 100644 --- a/src/test/regress/expected/subselect.out +++ b/src/test/regress/expected/subselect.out @@ -1925,6 +1925,110 @@ NOTICE: x = 9, y = 13 9 | 3 (3 rows) +-- +-- check that an upper-level qual is not pushed down if it references a grouped +-- Var whose underlying expression contains SRFs +-- +explain (verbose, costs off) +select * from + (select generate_series(1, ten) as g, count(*) from tenk1 group by 1) ss + where ss.g = 1; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Subquery Scan on ss + Output: ss.g, ss.count + Filter: (ss.g = 1) + -> HashAggregate + Output: (generate_series(1, tenk1.ten)), count(*) + Group Key: generate_series(1, tenk1.ten) + -> ProjectSet + Output: generate_series(1, tenk1.ten) + -> Seq Scan on public.tenk1 + Output: tenk1.unique1, tenk1.unique2, tenk1.two, tenk1.four, tenk1.ten, tenk1.twenty, tenk1.hundred, tenk1.thousand, tenk1.twothousand, tenk1.fivethous, tenk1.tenthous, tenk1.odd, tenk1.even, tenk1.stringu1, tenk1.stringu2, tenk1.string4 +(10 rows) + +select * from + (select generate_series(1, ten) as g, count(*) from tenk1 group by 1) ss + where ss.g = 1; + g | count +---+------- + 1 | 9000 +(1 row) + +-- +-- check that an upper-level qual is not pushed down if it references a grouped +-- Var whose underlying expression contains volatile functions +-- +alter function tattle(x int, y int) volatile; +explain (verbose, costs off) +select * from + (select tattle(3, ten) as v, count(*) from tenk1 where unique1 < 3 group by 1) ss + where ss.v; + QUERY PLAN +------------------------------------------------------------ + Subquery Scan on ss + Output: ss.v, ss.count + Filter: ss.v + -> GroupAggregate + Output: (tattle(3, tenk1.ten)), count(*) + Group Key: (tattle(3, tenk1.ten)) + -> Sort + Output: (tattle(3, tenk1.ten)) + Sort Key: (tattle(3, tenk1.ten)) + -> Bitmap Heap Scan on public.tenk1 + Output: tattle(3, tenk1.ten) + Recheck Cond: (tenk1.unique1 < 3) + -> Bitmap Index Scan on tenk1_unique1 + Index Cond: (tenk1.unique1 < 3) +(14 rows) + +select * from + (select tattle(3, ten) as v, count(*) from tenk1 where unique1 < 3 group by 1) ss + where ss.v; +NOTICE: x = 3, y = 2 +NOTICE: x = 3, y = 1 +NOTICE: x = 3, y = 0 + v | count +---+------- + t | 3 +(1 row) + +-- if we pretend it's stable, we get different results: +alter function tattle(x int, y int) stable; +explain (verbose, costs off) +select * from + (select tattle(3, ten) as v, count(*) from tenk1 where unique1 < 3 group by 1) ss + where ss.v; + QUERY PLAN +------------------------------------------------------ + GroupAggregate + Output: (tattle(3, tenk1.ten)), count(*) + Group Key: (tattle(3, tenk1.ten)) + -> Sort + Output: (tattle(3, tenk1.ten)) + Sort Key: (tattle(3, tenk1.ten)) + -> Bitmap Heap Scan on public.tenk1 + Output: tattle(3, tenk1.ten) + Recheck Cond: (tenk1.unique1 < 3) + Filter: tattle(3, tenk1.ten) + -> Bitmap Index Scan on tenk1_unique1 + Index Cond: (tenk1.unique1 < 3) +(12 rows) + +select * from + (select tattle(3, ten) as v, count(*) from tenk1 where unique1 < 3 group by 1) ss + where ss.v; +NOTICE: x = 3, y = 2 +NOTICE: x = 3, y = 2 +NOTICE: x = 3, y = 1 +NOTICE: x = 3, y = 1 +NOTICE: x = 3, y = 0 +NOTICE: x = 3, y = 0 + v | count +---+------- + t | 3 +(1 row) + drop function tattle(x int, y int); -- -- Test that LIMIT can be pushed to SORT through a subquery that just projects diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql index 192a6f96b93..cadc3293687 100644 --- a/src/test/regress/sql/subselect.sql +++ b/src/test/regress/sql/subselect.sql @@ -952,6 +952,46 @@ select * from (select 9 as x, unnest(array[1,2,3,11,12,13]) as u) ss where tattle(x, u); +-- +-- check that an upper-level qual is not pushed down if it references a grouped +-- Var whose underlying expression contains SRFs +-- +explain (verbose, costs off) +select * from + (select generate_series(1, ten) as g, count(*) from tenk1 group by 1) ss + where ss.g = 1; + +select * from + (select generate_series(1, ten) as g, count(*) from tenk1 group by 1) ss + where ss.g = 1; + +-- +-- check that an upper-level qual is not pushed down if it references a grouped +-- Var whose underlying expression contains volatile functions +-- +alter function tattle(x int, y int) volatile; + +explain (verbose, costs off) +select * from + (select tattle(3, ten) as v, count(*) from tenk1 where unique1 < 3 group by 1) ss + where ss.v; + +select * from + (select tattle(3, ten) as v, count(*) from tenk1 where unique1 < 3 group by 1) ss + where ss.v; + +-- if we pretend it's stable, we get different results: +alter function tattle(x int, y int) stable; + +explain (verbose, costs off) +select * from + (select tattle(3, ten) as v, count(*) from tenk1 where unique1 < 3 group by 1) ss + where ss.v; + +select * from + (select tattle(3, ten) as v, count(*) from tenk1 where unique1 < 3 group by 1) ss + where ss.v; + drop function tattle(x int, y int); --