1
0
mirror of https://github.com/postgres/postgres.git synced 2025-10-15 05:46:52 +03:00

Teach planner to short-circuit EXCEPT/INTERSECT with dummy inputs

When either inputs of an INTERSECT [ALL] operator are proven not to return
any results (a dummy rel), then mark the entire INTERSECT operation as
dummy.

Likewise, if an EXCEPT [ALL] operation's left input is proven empty, then
mark the entire operation as dummy.

With EXCEPT ALL, we can easily handle the right input being dummy as
we can return the left input without any processing.  That can lead to
significant performance gains during query execution.  We can't easily
handle dummy right inputs for EXCEPT (without ALL), as that would require
deduplication of the left input.  Wiring up those Paths is likely more
complex than it's worth as the gains during execution aren't that great,
so let's leave that one to be handled by the normal Path generation code.

Author: David Rowley <dgrowleyml@gmail.com>
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://postgr.es/m/CAApHDvri53PPF76c3M94_QNWbJfXjyCnjXuj_2=LYM-0m8WZtw@mail.gmail.com
This commit is contained in:
David Rowley
2025-10-07 17:17:52 +13:00
parent 928df067d1
commit 9c9d41af4d
3 changed files with 185 additions and 2 deletions

View File

@@ -1185,6 +1185,69 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
result_rel->reltarget = create_setop_pathtarget(root, tlist,
list_make2(lpath, rpath));
/* Check for provably empty setop inputs and add short-circuit paths. */
if (op->op == SETOP_EXCEPT)
{
/*
* For EXCEPTs, if the left side is dummy then there's no need to
* inspect the right-hand side as scanning the right to find tuples to
* remove won't make the left-hand input any more empty.
*/
if (is_dummy_rel(lrel))
{
mark_dummy_rel(result_rel);
return result_rel;
}
/* Handle EXCEPTs with dummy right input */
if (is_dummy_rel(rrel))
{
if (op->all)
{
Path *apath;
/*
* EXCEPT ALL: If the right-hand input is dummy then we can
* simply scan the left-hand input. To keep createplan.c
* happy, use a single child Append to handle the translation
* between the set op targetlist and the targetlist of the
* left input. The Append will be removed in setrefs.c.
*/
apath = (Path *) create_append_path(root, result_rel, list_make1(lpath),
NIL, NIL, NULL, 0, false, -1);
add_path(result_rel, apath);
return result_rel;
}
else
{
/*
* To make EXCEPT with a dummy RHS work means having to
* deduplicate the left input. That could be done with
* AggPaths, but it doesn't seem worth the effort. Let the
* normal path generation code below handle this one.
*/
}
}
}
else
{
/*
* For INTERSECT, if either input is a dummy rel then we can mark the
* result_rel as dummy since intersecting with an empty relation can
* never yield any results. This is true regardless of INTERSECT or
* INTERSECT ALL.
*/
if (is_dummy_rel(lrel) || is_dummy_rel(rrel))
{
mark_dummy_rel(result_rel);
return result_rel;
}
}
/*
* Estimate number of distinct groups that we'll need hashtable entries
* for; this is the size of the left-hand input for EXCEPT, or the smaller

View File

@@ -1217,7 +1217,7 @@ select event_id
drop table events_child, events, other_events;
reset enable_indexonlyscan;
--
-- Test handling of UNION with provably empty inputs
-- Test handling of UNION / EXCEPT / INTERSECT with provably empty inputs
--
-- Ensure the empty UNION input is pruned and de-duplication is done for the
-- remaining relation.
@@ -1271,6 +1271,90 @@ ORDER BY 1;
One-Time Filter: false
(7 rows)
-- Ensure the planner provides a const-false Result node
EXPLAIN (COSTS OFF, VERBOSE)
SELECT two FROM tenk1 WHERE 1=2
INTERSECT
SELECT four FROM tenk1
ORDER BY 1;
QUERY PLAN
---------------------------------------------------------------------
Sort
Output: unnamed_subquery.two
Sort Key: unnamed_subquery.two
-> Result
Output: unnamed_subquery.two
Replaces: Aggregate on unnamed_subquery, unnamed_subquery_1
One-Time Filter: false
(7 rows)
-- As above, with the inputs swapped
EXPLAIN (COSTS OFF, VERBOSE)
SELECT four FROM tenk1
INTERSECT
SELECT two FROM tenk1 WHERE 1=2
ORDER BY 1;
QUERY PLAN
---------------------------------------------------------------------
Sort
Output: unnamed_subquery.four
Sort Key: unnamed_subquery.four
-> Result
Output: unnamed_subquery.four
Replaces: Aggregate on unnamed_subquery, unnamed_subquery_1
One-Time Filter: false
(7 rows)
-- Try with both inputs dummy
EXPLAIN (COSTS OFF, VERBOSE)
SELECT four FROM tenk1 WHERE 1=2
INTERSECT
SELECT two FROM tenk1 WHERE 1=2
ORDER BY 1;
QUERY PLAN
---------------------------------------------------------------------
Sort
Output: unnamed_subquery.four
Sort Key: unnamed_subquery.four
-> Result
Output: unnamed_subquery.four
Replaces: Aggregate on unnamed_subquery, unnamed_subquery_1
One-Time Filter: false
(7 rows)
-- Ensure the planner provides a const-false Result node when the left input
-- is empty
EXPLAIN (COSTS OFF, VERBOSE)
SELECT two FROM tenk1 WHERE 1=2
EXCEPT
SELECT four FROM tenk1
ORDER BY 1;
QUERY PLAN
---------------------------------------------------------------------
Sort
Output: unnamed_subquery.two
Sort Key: unnamed_subquery.two
-> Result
Output: unnamed_subquery.two
Replaces: Aggregate on unnamed_subquery, unnamed_subquery_1
One-Time Filter: false
(7 rows)
-- Ensure the planner only scans the left input when right input is empty
EXPLAIN (COSTS OFF, VERBOSE)
SELECT two FROM tenk1
EXCEPT ALL
SELECT four FROM tenk1 WHERE 1=2
ORDER BY 1;
QUERY PLAN
--------------------------------
Sort
Output: tenk1.two
Sort Key: tenk1.two
-> Seq Scan on public.tenk1
Output: tenk1.two
(5 rows)
-- Test constraint exclusion of UNION ALL subqueries
explain (costs off)
SELECT * FROM

View File

@@ -460,7 +460,7 @@ drop table events_child, events, other_events;
reset enable_indexonlyscan;
--
-- Test handling of UNION with provably empty inputs
-- Test handling of UNION / EXCEPT / INTERSECT with provably empty inputs
--
-- Ensure the empty UNION input is pruned and de-duplication is done for the
@@ -487,6 +487,42 @@ UNION
SELECT ten FROM tenk1 WHERE 1=2
ORDER BY 1;
-- Ensure the planner provides a const-false Result node
EXPLAIN (COSTS OFF, VERBOSE)
SELECT two FROM tenk1 WHERE 1=2
INTERSECT
SELECT four FROM tenk1
ORDER BY 1;
-- As above, with the inputs swapped
EXPLAIN (COSTS OFF, VERBOSE)
SELECT four FROM tenk1
INTERSECT
SELECT two FROM tenk1 WHERE 1=2
ORDER BY 1;
-- Try with both inputs dummy
EXPLAIN (COSTS OFF, VERBOSE)
SELECT four FROM tenk1 WHERE 1=2
INTERSECT
SELECT two FROM tenk1 WHERE 1=2
ORDER BY 1;
-- Ensure the planner provides a const-false Result node when the left input
-- is empty
EXPLAIN (COSTS OFF, VERBOSE)
SELECT two FROM tenk1 WHERE 1=2
EXCEPT
SELECT four FROM tenk1
ORDER BY 1;
-- Ensure the planner only scans the left input when right input is empty
EXPLAIN (COSTS OFF, VERBOSE)
SELECT two FROM tenk1
EXCEPT ALL
SELECT four FROM tenk1 WHERE 1=2
ORDER BY 1;
-- Test constraint exclusion of UNION ALL subqueries
explain (costs off)
SELECT * FROM