From 18d2614093481cb7ae89e1f5b72f3ddf5fb49b4c Mon Sep 17 00:00:00 2001 From: Richard Guo Date: Tue, 21 Oct 2025 12:35:36 +0900 Subject: [PATCH] Fix pushdown of degenerate HAVING clauses 67a54b9e8 taught the planner to push down HAVING clauses even when grouping sets are present, as long as the clause does not reference any columns that are nullable by the grouping sets. However, there was an oversight: if any empty grouping sets are present, the aggregation node can produce a row that did not come from the input, and pushing down a HAVING clause in this case may cause us to fail to filter out that row. Currently, non-degenerate HAVING clauses are not pushed down when empty grouping sets are present, since the empty grouping sets would nullify the vars they reference. However, degenerate (variable-free) HAVING clauses are not subject to this restriction and may be incorrectly pushed down. To fix, explicitly check for the presence of empty grouping sets and retain degenerate clauses in HAVING when they are present. This ensures that we don't emit a bogus aggregated row. A copy of each such clause is also put in WHERE so that query_planner() can use it in a gating Result node. To facilitate this check, this patch expands the groupingSets tree of the query to a flat list of grouping sets before applying the HAVING pushdown optimization. This does not add any additional planning overhead, since we need to do this expansion anyway. In passing, make a small tweak to preprocess_grouping_sets() by reordering its initial operations a bit. Backpatch to v18, where this issue was introduced. Reported-by: Yuhang Qiu Author: Richard Guo Author: Tom Lane Discussion: https://postgr.es/m/0879D9C9-7FE2-4A20-9593-B23F7A0B5290@gmail.com Backpatch-through: 18 --- src/backend/optimizer/plan/planner.c | 67 +++++++++++------- src/test/regress/expected/groupingsets.out | 82 +++++++++++++++++++++- src/test/regress/sql/groupingsets.sql | 20 +++++- 3 files changed, 143 insertions(+), 26 deletions(-) diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 342d782c74b..c4fd646b999 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -1127,15 +1127,28 @@ subquery_planner(PlannerGlobal *glob, Query *parse, char *plan_name, if (parse->hasTargetSRFs) parse->hasTargetSRFs = expression_returns_set((Node *) parse->targetList); + /* + * If we have grouping sets, expand the groupingSets tree of this query to + * a flat list of grouping sets. We need to do this before optimizing + * HAVING, since we can't easily tell if there's an empty grouping set + * until we have this representation. + */ + if (parse->groupingSets) + { + parse->groupingSets = + expand_grouping_sets(parse->groupingSets, parse->groupDistinct, -1); + } + /* * In some cases we may want to transfer a HAVING clause into WHERE. We * cannot do so if the HAVING clause contains aggregates (obviously) or * volatile functions (since a HAVING clause is supposed to be executed - * only once per group). We also can't do this if there are any nonempty - * grouping sets and the clause references any columns that are nullable - * by the grouping sets; moving such a clause into WHERE would potentially - * change the results. (If there are only empty grouping sets, then the - * HAVING clause must be degenerate as discussed below.) + * only once per group). We also can't do this if there are any grouping + * sets and the clause references any columns that are nullable by the + * grouping sets; the nulled values of those columns are not available + * before the grouping step. (The test on groupClause might seem wrong, + * but it's okay: it's just an optimization to avoid running pull_varnos + * when there cannot be any Vars in the HAVING clause.) * * Also, it may be that the clause is so expensive to execute that we're * better off doing it only once per group, despite the loss of @@ -1145,19 +1158,19 @@ subquery_planner(PlannerGlobal *glob, Query *parse, char *plan_name, * clause into WHERE, in hopes of eliminating tuples before aggregation * instead of after. * - * If the query has explicit grouping then we can simply move such a + * If the query has no empty grouping set then we can simply move such a * clause into WHERE; any group that fails the clause will not be in the * output because none of its tuples will reach the grouping or - * aggregation stage. Otherwise we must have a degenerate (variable-free) - * HAVING clause, which we put in WHERE so that query_planner() can use it - * in a gating Result node, but also keep in HAVING to ensure that we - * don't emit a bogus aggregated row. (This could be done better, but it - * seems not worth optimizing.) + * aggregation stage. Otherwise we have to keep the clause in HAVING to + * ensure that we don't emit a bogus aggregated row. But then the HAVING + * clause must be degenerate (variable-free), so we can copy it into WHERE + * so that query_planner() can use it in a gating Result node. (This could + * be done better, but it seems not worth optimizing.) * * Note that a HAVING clause may contain expressions that are not fully * preprocessed. This can happen if these expressions are part of * grouping items. In such cases, they are replaced with GROUP Vars in - * the parser and then replaced back after we've done with expression + * the parser and then replaced back after we're done with expression * preprocessing on havingQual. This is not an issue if the clause * remains in HAVING, because these expressions will be matched to lower * target items in setrefs.c. However, if the clause is moved or copied @@ -1182,8 +1195,11 @@ subquery_planner(PlannerGlobal *glob, Query *parse, char *plan_name, /* keep it in HAVING */ newHaving = lappend(newHaving, havingclause); } - else if (parse->groupClause) + else if (parse->groupClause && + (parse->groupingSets == NIL || + (List *) linitial(parse->groupingSets) != NIL)) { + /* There is GROUP BY, but no empty grouping set */ Node *whereclause; /* Preprocess the HAVING clause fully */ @@ -1196,6 +1212,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse, char *plan_name, } else { + /* There is an empty grouping set (perhaps implicitly) */ Node *whereclause; /* Preprocess the HAVING clause fully */ @@ -2181,10 +2198,13 @@ grouping_planner(PlannerInfo *root, double tuple_fraction, } /* - * Do preprocessing for groupingSets clause and related data. This handles the - * preliminary steps of expanding the grouping sets, organizing them into lists - * of rollups, and preparing annotations which will later be filled in with - * size estimates. + * Do preprocessing for groupingSets clause and related data. + * + * We expect that parse->groupingSets has already been expanded into a flat + * list of grouping sets (that is, just integer Lists of ressortgroupref + * numbers) by expand_grouping_sets(). This function handles the preliminary + * steps of organizing the grouping sets into lists of rollups, and preparing + * annotations which will later be filled in with size estimates. */ static grouping_sets_data * preprocess_grouping_sets(PlannerInfo *root) @@ -2195,19 +2215,18 @@ preprocess_grouping_sets(PlannerInfo *root) ListCell *lc_set; grouping_sets_data *gd = palloc0(sizeof(grouping_sets_data)); - parse->groupingSets = expand_grouping_sets(parse->groupingSets, parse->groupDistinct, -1); - - gd->any_hashable = false; - gd->unhashable_refs = NULL; - gd->unsortable_refs = NULL; - gd->unsortable_sets = NIL; - /* * We don't currently make any attempt to optimize the groupClause when * there are grouping sets, so just duplicate it in processed_groupClause. */ root->processed_groupClause = parse->groupClause; + /* Detect unhashable and unsortable grouping expressions */ + gd->any_hashable = false; + gd->unhashable_refs = NULL; + gd->unsortable_refs = NULL; + gd->unsortable_sets = NIL; + if (parse->groupClause) { ListCell *lc; diff --git a/src/test/regress/expected/groupingsets.out b/src/test/regress/expected/groupingsets.out index 991121545c5..398cf6965e0 100644 --- a/src/test/regress/expected/groupingsets.out +++ b/src/test/regress/expected/groupingsets.out @@ -890,7 +890,8 @@ explain (costs off) -> Seq Scan on gstest2 (10 rows) --- test pushdown of HAVING clause that does not reference any columns that are nullable by grouping sets +-- test pushdown of non-degenerate HAVING clause that does not reference any +-- columns that are nullable by grouping sets explain (costs off) select a, b, count(*) from gstest2 group by grouping sets ((a, b), (a)) having a > 1 and b > 1; QUERY PLAN @@ -911,6 +912,85 @@ select a, b, count(*) from gstest2 group by grouping sets ((a, b), (a)) having a 2 | 2 | 1 (1 row) +explain (costs off) +select a, b, count(*) from gstest2 group by rollup(a), b having b > 1; + QUERY PLAN +--------------------------------- + GroupAggregate + Group Key: b, a + Group Key: b + -> Sort + Sort Key: b, a + -> Seq Scan on gstest2 + Filter: (b > 1) +(7 rows) + +select a, b, count(*) from gstest2 group by rollup(a), b having b > 1; + a | b | count +---+---+------- + 1 | 2 | 1 + 2 | 2 | 1 + | 2 | 2 +(3 rows) + +-- test pushdown of degenerate HAVING clause +explain (costs off) +select count(*) from gstest2 group by grouping sets (()) having false; + QUERY PLAN +----------------------------------- + Aggregate + Group Key: () + Filter: false + -> Result + Replaces: Scan on gstest2 + One-Time Filter: false +(6 rows) + +select count(*) from gstest2 group by grouping sets (()) having false; + count +------- +(0 rows) + +explain (costs off) +select a, count(*) from gstest2 group by grouping sets ((a), ()) having false; + QUERY PLAN +----------------------------------------- + GroupAggregate + Group Key: a + Group Key: () + Filter: false + -> Sort + Sort Key: a + -> Result + Replaces: Scan on gstest2 + One-Time Filter: false +(9 rows) + +select a, count(*) from gstest2 group by grouping sets ((a), ()) having false; + a | count +---+------- +(0 rows) + +explain (costs off) +select a, b, count(*) from gstest2 group by grouping sets ((a), (b)) having false; + QUERY PLAN +----------------------------------------- + GroupAggregate + Group Key: a + Sort Key: b + Group Key: b + -> Sort + Sort Key: a + -> Result + Replaces: Scan on gstest2 + One-Time Filter: false +(9 rows) + +select a, b, count(*) from gstest2 group by grouping sets ((a), (b)) having false; + a | b | count +---+---+------- +(0 rows) + -- HAVING with GROUPING queries select ten, grouping(ten) from onek group by grouping sets(ten) having grouping(ten) >= 0 diff --git a/src/test/regress/sql/groupingsets.sql b/src/test/regress/sql/groupingsets.sql index 38d3cdd0fd8..6d875475fae 100644 --- a/src/test/regress/sql/groupingsets.sql +++ b/src/test/regress/sql/groupingsets.sql @@ -290,11 +290,29 @@ explain (costs off) select v.c, (select count(*) from gstest2 group by () having v.c) from (values (false),(true)) v(c) order by v.c; --- test pushdown of HAVING clause that does not reference any columns that are nullable by grouping sets +-- test pushdown of non-degenerate HAVING clause that does not reference any +-- columns that are nullable by grouping sets explain (costs off) select a, b, count(*) from gstest2 group by grouping sets ((a, b), (a)) having a > 1 and b > 1; select a, b, count(*) from gstest2 group by grouping sets ((a, b), (a)) having a > 1 and b > 1; +explain (costs off) +select a, b, count(*) from gstest2 group by rollup(a), b having b > 1; +select a, b, count(*) from gstest2 group by rollup(a), b having b > 1; + +-- test pushdown of degenerate HAVING clause +explain (costs off) +select count(*) from gstest2 group by grouping sets (()) having false; +select count(*) from gstest2 group by grouping sets (()) having false; + +explain (costs off) +select a, count(*) from gstest2 group by grouping sets ((a), ()) having false; +select a, count(*) from gstest2 group by grouping sets ((a), ()) having false; + +explain (costs off) +select a, b, count(*) from gstest2 group by grouping sets ((a), (b)) having false; +select a, b, count(*) from gstest2 group by grouping sets ((a), (b)) having false; + -- HAVING with GROUPING queries select ten, grouping(ten) from onek group by grouping sets(ten) having grouping(ten) >= 0