1
0
mirror of https://github.com/postgres/postgres.git synced 2025-10-25 13:17:41 +03:00

Fix pushdown of degenerate HAVING clauses

67a54b9e8 taught the planner to push down HAVING clauses even when
grouping sets are present, as long as the clause does not reference
any columns that are nullable by the grouping sets.  However, there
was an oversight: if any empty grouping sets are present, the
aggregation node can produce a row that did not come from the input,
and pushing down a HAVING clause in this case may cause us to fail to
filter out that row.

Currently, non-degenerate HAVING clauses are not pushed down when
empty grouping sets are present, since the empty grouping sets would
nullify the vars they reference.  However, degenerate (variable-free)
HAVING clauses are not subject to this restriction and may be
incorrectly pushed down.

To fix, explicitly check for the presence of empty grouping sets and
retain degenerate clauses in HAVING when they are present.  This
ensures that we don't emit a bogus aggregated row.  A copy of each
such clause is also put in WHERE so that query_planner() can use it in
a gating Result node.

To facilitate this check, this patch expands the groupingSets tree of
the query to a flat list of grouping sets before applying the HAVING
pushdown optimization.  This does not add any additional planning
overhead, since we need to do this expansion anyway.

In passing, make a small tweak to preprocess_grouping_sets() by
reordering its initial operations a bit.

Backpatch to v18, where this issue was introduced.

Reported-by: Yuhang Qiu <iamqyh@gmail.com>
Author: Richard Guo <guofenglinux@gmail.com>
Author: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://postgr.es/m/0879D9C9-7FE2-4A20-9593-B23F7A0B5290@gmail.com
Backpatch-through: 18
This commit is contained in:
Richard Guo
2025-10-21 12:35:36 +09:00
parent 29b039e916
commit 18d2614093
3 changed files with 143 additions and 26 deletions

View File

@@ -1127,15 +1127,28 @@ subquery_planner(PlannerGlobal *glob, Query *parse, char *plan_name,
if (parse->hasTargetSRFs) if (parse->hasTargetSRFs)
parse->hasTargetSRFs = expression_returns_set((Node *) parse->targetList); parse->hasTargetSRFs = expression_returns_set((Node *) parse->targetList);
/*
* If we have grouping sets, expand the groupingSets tree of this query to
* a flat list of grouping sets. We need to do this before optimizing
* HAVING, since we can't easily tell if there's an empty grouping set
* until we have this representation.
*/
if (parse->groupingSets)
{
parse->groupingSets =
expand_grouping_sets(parse->groupingSets, parse->groupDistinct, -1);
}
/* /*
* In some cases we may want to transfer a HAVING clause into WHERE. We * In some cases we may want to transfer a HAVING clause into WHERE. We
* cannot do so if the HAVING clause contains aggregates (obviously) or * cannot do so if the HAVING clause contains aggregates (obviously) or
* volatile functions (since a HAVING clause is supposed to be executed * volatile functions (since a HAVING clause is supposed to be executed
* only once per group). We also can't do this if there are any nonempty * only once per group). We also can't do this if there are any grouping
* grouping sets and the clause references any columns that are nullable * sets and the clause references any columns that are nullable by the
* by the grouping sets; moving such a clause into WHERE would potentially * grouping sets; the nulled values of those columns are not available
* change the results. (If there are only empty grouping sets, then the * before the grouping step. (The test on groupClause might seem wrong,
* HAVING clause must be degenerate as discussed below.) * but it's okay: it's just an optimization to avoid running pull_varnos
* when there cannot be any Vars in the HAVING clause.)
* *
* Also, it may be that the clause is so expensive to execute that we're * Also, it may be that the clause is so expensive to execute that we're
* better off doing it only once per group, despite the loss of * better off doing it only once per group, despite the loss of
@@ -1145,19 +1158,19 @@ subquery_planner(PlannerGlobal *glob, Query *parse, char *plan_name,
* clause into WHERE, in hopes of eliminating tuples before aggregation * clause into WHERE, in hopes of eliminating tuples before aggregation
* instead of after. * instead of after.
* *
* If the query has explicit grouping then we can simply move such a * If the query has no empty grouping set then we can simply move such a
* clause into WHERE; any group that fails the clause will not be in the * clause into WHERE; any group that fails the clause will not be in the
* output because none of its tuples will reach the grouping or * output because none of its tuples will reach the grouping or
* aggregation stage. Otherwise we must have a degenerate (variable-free) * aggregation stage. Otherwise we have to keep the clause in HAVING to
* HAVING clause, which we put in WHERE so that query_planner() can use it * ensure that we don't emit a bogus aggregated row. But then the HAVING
* in a gating Result node, but also keep in HAVING to ensure that we * clause must be degenerate (variable-free), so we can copy it into WHERE
* don't emit a bogus aggregated row. (This could be done better, but it * so that query_planner() can use it in a gating Result node. (This could
* seems not worth optimizing.) * be done better, but it seems not worth optimizing.)
* *
* Note that a HAVING clause may contain expressions that are not fully * Note that a HAVING clause may contain expressions that are not fully
* preprocessed. This can happen if these expressions are part of * preprocessed. This can happen if these expressions are part of
* grouping items. In such cases, they are replaced with GROUP Vars in * grouping items. In such cases, they are replaced with GROUP Vars in
* the parser and then replaced back after we've done with expression * the parser and then replaced back after we're done with expression
* preprocessing on havingQual. This is not an issue if the clause * preprocessing on havingQual. This is not an issue if the clause
* remains in HAVING, because these expressions will be matched to lower * remains in HAVING, because these expressions will be matched to lower
* target items in setrefs.c. However, if the clause is moved or copied * target items in setrefs.c. However, if the clause is moved or copied
@@ -1182,8 +1195,11 @@ subquery_planner(PlannerGlobal *glob, Query *parse, char *plan_name,
/* keep it in HAVING */ /* keep it in HAVING */
newHaving = lappend(newHaving, havingclause); newHaving = lappend(newHaving, havingclause);
} }
else if (parse->groupClause) else if (parse->groupClause &&
(parse->groupingSets == NIL ||
(List *) linitial(parse->groupingSets) != NIL))
{ {
/* There is GROUP BY, but no empty grouping set */
Node *whereclause; Node *whereclause;
/* Preprocess the HAVING clause fully */ /* Preprocess the HAVING clause fully */
@@ -1196,6 +1212,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse, char *plan_name,
} }
else else
{ {
/* There is an empty grouping set (perhaps implicitly) */
Node *whereclause; Node *whereclause;
/* Preprocess the HAVING clause fully */ /* Preprocess the HAVING clause fully */
@@ -2181,10 +2198,13 @@ grouping_planner(PlannerInfo *root, double tuple_fraction,
} }
/* /*
* Do preprocessing for groupingSets clause and related data. This handles the * Do preprocessing for groupingSets clause and related data.
* preliminary steps of expanding the grouping sets, organizing them into lists *
* of rollups, and preparing annotations which will later be filled in with * We expect that parse->groupingSets has already been expanded into a flat
* size estimates. * list of grouping sets (that is, just integer Lists of ressortgroupref
* numbers) by expand_grouping_sets(). This function handles the preliminary
* steps of organizing the grouping sets into lists of rollups, and preparing
* annotations which will later be filled in with size estimates.
*/ */
static grouping_sets_data * static grouping_sets_data *
preprocess_grouping_sets(PlannerInfo *root) preprocess_grouping_sets(PlannerInfo *root)
@@ -2195,19 +2215,18 @@ preprocess_grouping_sets(PlannerInfo *root)
ListCell *lc_set; ListCell *lc_set;
grouping_sets_data *gd = palloc0(sizeof(grouping_sets_data)); grouping_sets_data *gd = palloc0(sizeof(grouping_sets_data));
parse->groupingSets = expand_grouping_sets(parse->groupingSets, parse->groupDistinct, -1);
gd->any_hashable = false;
gd->unhashable_refs = NULL;
gd->unsortable_refs = NULL;
gd->unsortable_sets = NIL;
/* /*
* We don't currently make any attempt to optimize the groupClause when * We don't currently make any attempt to optimize the groupClause when
* there are grouping sets, so just duplicate it in processed_groupClause. * there are grouping sets, so just duplicate it in processed_groupClause.
*/ */
root->processed_groupClause = parse->groupClause; root->processed_groupClause = parse->groupClause;
/* Detect unhashable and unsortable grouping expressions */
gd->any_hashable = false;
gd->unhashable_refs = NULL;
gd->unsortable_refs = NULL;
gd->unsortable_sets = NIL;
if (parse->groupClause) if (parse->groupClause)
{ {
ListCell *lc; ListCell *lc;

View File

@@ -890,7 +890,8 @@ explain (costs off)
-> Seq Scan on gstest2 -> Seq Scan on gstest2
(10 rows) (10 rows)
-- test pushdown of HAVING clause that does not reference any columns that are nullable by grouping sets -- test pushdown of non-degenerate HAVING clause that does not reference any
-- columns that are nullable by grouping sets
explain (costs off) explain (costs off)
select a, b, count(*) from gstest2 group by grouping sets ((a, b), (a)) having a > 1 and b > 1; select a, b, count(*) from gstest2 group by grouping sets ((a, b), (a)) having a > 1 and b > 1;
QUERY PLAN QUERY PLAN
@@ -911,6 +912,85 @@ select a, b, count(*) from gstest2 group by grouping sets ((a, b), (a)) having a
2 | 2 | 1 2 | 2 | 1
(1 row) (1 row)
explain (costs off)
select a, b, count(*) from gstest2 group by rollup(a), b having b > 1;
QUERY PLAN
---------------------------------
GroupAggregate
Group Key: b, a
Group Key: b
-> Sort
Sort Key: b, a
-> Seq Scan on gstest2
Filter: (b > 1)
(7 rows)
select a, b, count(*) from gstest2 group by rollup(a), b having b > 1;
a | b | count
---+---+-------
1 | 2 | 1
2 | 2 | 1
| 2 | 2
(3 rows)
-- test pushdown of degenerate HAVING clause
explain (costs off)
select count(*) from gstest2 group by grouping sets (()) having false;
QUERY PLAN
-----------------------------------
Aggregate
Group Key: ()
Filter: false
-> Result
Replaces: Scan on gstest2
One-Time Filter: false
(6 rows)
select count(*) from gstest2 group by grouping sets (()) having false;
count
-------
(0 rows)
explain (costs off)
select a, count(*) from gstest2 group by grouping sets ((a), ()) having false;
QUERY PLAN
-----------------------------------------
GroupAggregate
Group Key: a
Group Key: ()
Filter: false
-> Sort
Sort Key: a
-> Result
Replaces: Scan on gstest2
One-Time Filter: false
(9 rows)
select a, count(*) from gstest2 group by grouping sets ((a), ()) having false;
a | count
---+-------
(0 rows)
explain (costs off)
select a, b, count(*) from gstest2 group by grouping sets ((a), (b)) having false;
QUERY PLAN
-----------------------------------------
GroupAggregate
Group Key: a
Sort Key: b
Group Key: b
-> Sort
Sort Key: a
-> Result
Replaces: Scan on gstest2
One-Time Filter: false
(9 rows)
select a, b, count(*) from gstest2 group by grouping sets ((a), (b)) having false;
a | b | count
---+---+-------
(0 rows)
-- HAVING with GROUPING queries -- HAVING with GROUPING queries
select ten, grouping(ten) from onek select ten, grouping(ten) from onek
group by grouping sets(ten) having grouping(ten) >= 0 group by grouping sets(ten) having grouping(ten) >= 0

View File

@@ -290,11 +290,29 @@ explain (costs off)
select v.c, (select count(*) from gstest2 group by () having v.c) select v.c, (select count(*) from gstest2 group by () having v.c)
from (values (false),(true)) v(c) order by v.c; from (values (false),(true)) v(c) order by v.c;
-- test pushdown of HAVING clause that does not reference any columns that are nullable by grouping sets -- test pushdown of non-degenerate HAVING clause that does not reference any
-- columns that are nullable by grouping sets
explain (costs off) explain (costs off)
select a, b, count(*) from gstest2 group by grouping sets ((a, b), (a)) having a > 1 and b > 1; select a, b, count(*) from gstest2 group by grouping sets ((a, b), (a)) having a > 1 and b > 1;
select a, b, count(*) from gstest2 group by grouping sets ((a, b), (a)) having a > 1 and b > 1; select a, b, count(*) from gstest2 group by grouping sets ((a, b), (a)) having a > 1 and b > 1;
explain (costs off)
select a, b, count(*) from gstest2 group by rollup(a), b having b > 1;
select a, b, count(*) from gstest2 group by rollup(a), b having b > 1;
-- test pushdown of degenerate HAVING clause
explain (costs off)
select count(*) from gstest2 group by grouping sets (()) having false;
select count(*) from gstest2 group by grouping sets (()) having false;
explain (costs off)
select a, count(*) from gstest2 group by grouping sets ((a), ()) having false;
select a, count(*) from gstest2 group by grouping sets ((a), ()) having false;
explain (costs off)
select a, b, count(*) from gstest2 group by grouping sets ((a), (b)) having false;
select a, b, count(*) from gstest2 group by grouping sets ((a), (b)) having false;
-- HAVING with GROUPING queries -- HAVING with GROUPING queries
select ten, grouping(ten) from onek select ten, grouping(ten) from onek
group by grouping sets(ten) having grouping(ten) >= 0 group by grouping sets(ten) having grouping(ten) >= 0