1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-28 23:42:10 +03:00

Allow user control of CTE materialization, and change the default behavior.

Historically we've always materialized the full output of a CTE query,
treating WITH as an optimization fence (so that, for example, restrictions
from the outer query cannot be pushed into it).  This is appropriate when
the CTE query is INSERT/UPDATE/DELETE, or is recursive; but when the CTE
query is non-recursive and side-effect-free, there's no hazard of changing
the query results by pushing restrictions down.

Another argument for materialization is that it can avoid duplicate
computation of an expensive WITH query --- but that only applies if
the WITH query is called more than once in the outer query.  Even then
it could still be a net loss, if each call has restrictions that
would allow just a small part of the WITH query to be computed.

Hence, let's change the behavior for WITH queries that are non-recursive
and side-effect-free.  By default, we will inline them into the outer
query (removing the optimization fence) if they are called just once.
If they are called more than once, we will keep the old behavior by
default, but the user can override this and force inlining by specifying
NOT MATERIALIZED.  Lastly, the user can force the old behavior by
specifying MATERIALIZED; this would mainly be useful when the query had
deliberately been employing WITH as an optimization fence to prevent a
poor choice of plan.

Andreas Karlsson, Andrew Gierth, David Fetter

Discussion: https://postgr.es/m/87sh48ffhb.fsf@news-spur.riddles.org.uk
This commit is contained in:
Tom Lane
2019-02-16 16:11:12 -05:00
parent 79730e2a9b
commit 608b167f9f
23 changed files with 580 additions and 44 deletions

View File

@ -2179,7 +2179,7 @@ EXPLAIN (COSTS OFF) EXECUTE plancache_test;
Filter: (((a % 2) = 0) AND f_leak(b))
(2 rows)
PREPARE plancache_test2 AS WITH q AS (SELECT * FROM z1 WHERE f_leak(b)) SELECT * FROM q,z2;
PREPARE plancache_test2 AS WITH q AS MATERIALIZED (SELECT * FROM z1 WHERE f_leak(b)) SELECT * FROM q,z2;
EXPLAIN (COSTS OFF) EXECUTE plancache_test2;
QUERY PLAN
-------------------------------------------------
@ -2192,7 +2192,7 @@ EXPLAIN (COSTS OFF) EXECUTE plancache_test2;
-> Seq Scan on z2
(7 rows)
PREPARE plancache_test3 AS WITH q AS (SELECT * FROM z2) SELECT * FROM q,z1 WHERE f_leak(z1.b);
PREPARE plancache_test3 AS WITH q AS MATERIALIZED (SELECT * FROM z2) SELECT * FROM q,z1 WHERE f_leak(z1.b);
EXPLAIN (COSTS OFF) EXECUTE plancache_test3;
QUERY PLAN
-----------------------------------------------------
@ -2826,7 +2826,7 @@ ALTER TABLE t1 ENABLE ROW LEVEL SECURITY;
GRANT ALL ON t1 TO regress_rls_bob;
INSERT INTO t1 (SELECT x, md5(x::text) FROM generate_series(0,20) x);
SET SESSION AUTHORIZATION regress_rls_bob;
WITH cte1 AS (SELECT * FROM t1 WHERE f_leak(b)) SELECT * FROM cte1;
WITH cte1 AS MATERIALIZED (SELECT * FROM t1 WHERE f_leak(b)) SELECT * FROM cte1;
NOTICE: f_leak => cfcd208495d565ef66e7dff9f98764da
NOTICE: f_leak => c81e728d9d4c2f636f067f89cc14862c
NOTICE: f_leak => a87ff679a2f3e71d9181a67b7542122c
@ -2853,7 +2853,8 @@ NOTICE: f_leak => 98f13708210194c475687be6106a3b84
20 | 98f13708210194c475687be6106a3b84
(11 rows)
EXPLAIN (COSTS OFF) WITH cte1 AS (SELECT * FROM t1 WHERE f_leak(b)) SELECT * FROM cte1;
EXPLAIN (COSTS OFF)
WITH cte1 AS MATERIALIZED (SELECT * FROM t1 WHERE f_leak(b)) SELECT * FROM cte1;
QUERY PLAN
-------------------------------------------------
CTE Scan on cte1

View File

@ -1147,7 +1147,7 @@ from (values (1,row(1,2)), (1,row(null,null)), (1,null),
(6 rows)
explain (verbose, costs off)
with r(a,b) as
with r(a,b) as materialized
(values (1,row(1,2)), (1,row(null,null)), (1,null),
(null,row(1,2)), (null,row(null,null)), (null,null) )
select r, r is null as isnull, r is not null as isnotnull from r;
@ -1160,7 +1160,7 @@ select r, r is null as isnull, r is not null as isnotnull from r;
Output: "*VALUES*".column1, "*VALUES*".column2
(5 rows)
with r(a,b) as
with r(a,b) as materialized
(values (1,row(1,2)), (1,row(null,null)), (1,null),
(null,row(1,2)), (null,row(null,null)), (null,null) )
select r, r is null as isnull, r is not null as isnotnull from r;

View File

@ -3110,7 +3110,7 @@ explain (costs off) INSERT INTO hats VALUES ('h8', 'forbidden') RETURNING *;
(5 rows)
-- ensure upserting into a rule, with a CTE (different offsets!) works
WITH data(hat_name, hat_color) AS (
WITH data(hat_name, hat_color) AS MATERIALIZED (
VALUES ('h8', 'green'),
('h9', 'blue'),
('h7', 'forbidden')
@ -3124,7 +3124,8 @@ RETURNING *;
h9 | blue
(2 rows)
EXPLAIN (costs off) WITH data(hat_name, hat_color) AS (
EXPLAIN (costs off)
WITH data(hat_name, hat_color) AS MATERIALIZED (
VALUES ('h8', 'green'),
('h9', 'blue'),
('h7', 'forbidden')

View File

@ -1154,3 +1154,159 @@ fetch backward all in c1;
(2 rows)
commit;
--
-- Tests for CTE inlining behavior
--
-- Basic subquery that can be inlined
explain (verbose, costs off)
with x as (select * from (select f1 from subselect_tbl) ss)
select * from x where f1 = 1;
QUERY PLAN
----------------------------------
Seq Scan on public.subselect_tbl
Output: subselect_tbl.f1
Filter: (subselect_tbl.f1 = 1)
(3 rows)
-- Explicitly request materialization
explain (verbose, costs off)
with x as materialized (select * from (select f1 from subselect_tbl) ss)
select * from x where f1 = 1;
QUERY PLAN
------------------------------------------
CTE Scan on x
Output: x.f1
Filter: (x.f1 = 1)
CTE x
-> Seq Scan on public.subselect_tbl
Output: subselect_tbl.f1
(6 rows)
-- Stable functions are safe to inline
explain (verbose, costs off)
with x as (select * from (select f1, now() from subselect_tbl) ss)
select * from x where f1 = 1;
QUERY PLAN
-----------------------------------
Seq Scan on public.subselect_tbl
Output: subselect_tbl.f1, now()
Filter: (subselect_tbl.f1 = 1)
(3 rows)
-- Volatile functions prevent inlining
explain (verbose, costs off)
with x as (select * from (select f1, random() from subselect_tbl) ss)
select * from x where f1 = 1;
QUERY PLAN
----------------------------------------------
CTE Scan on x
Output: x.f1, x.random
Filter: (x.f1 = 1)
CTE x
-> Seq Scan on public.subselect_tbl
Output: subselect_tbl.f1, random()
(6 rows)
-- SELECT FOR UPDATE cannot be inlined
explain (verbose, costs off)
with x as (select * from (select f1 from subselect_tbl for update) ss)
select * from x where f1 = 1;
QUERY PLAN
--------------------------------------------------------------------
CTE Scan on x
Output: x.f1
Filter: (x.f1 = 1)
CTE x
-> Subquery Scan on ss
Output: ss.f1
-> LockRows
Output: subselect_tbl.f1, subselect_tbl.ctid
-> Seq Scan on public.subselect_tbl
Output: subselect_tbl.f1, subselect_tbl.ctid
(10 rows)
-- Multiply-referenced CTEs are inlined only when requested
explain (verbose, costs off)
with x as (select * from (select f1, now() as n from subselect_tbl) ss)
select * from x, x x2 where x.n = x2.n;
QUERY PLAN
-------------------------------------------
Merge Join
Output: x.f1, x.n, x2.f1, x2.n
Merge Cond: (x.n = x2.n)
CTE x
-> Seq Scan on public.subselect_tbl
Output: subselect_tbl.f1, now()
-> Sort
Output: x.f1, x.n
Sort Key: x.n
-> CTE Scan on x
Output: x.f1, x.n
-> Sort
Output: x2.f1, x2.n
Sort Key: x2.n
-> CTE Scan on x x2
Output: x2.f1, x2.n
(16 rows)
explain (verbose, costs off)
with x as not materialized (select * from (select f1, now() as n from subselect_tbl) ss)
select * from x, x x2 where x.n = x2.n;
QUERY PLAN
----------------------------------------------------------------------------
Result
Output: subselect_tbl.f1, now(), subselect_tbl_1.f1, now()
One-Time Filter: (now() = now())
-> Nested Loop
Output: subselect_tbl.f1, subselect_tbl_1.f1
-> Seq Scan on public.subselect_tbl
Output: subselect_tbl.f1, subselect_tbl.f2, subselect_tbl.f3
-> Materialize
Output: subselect_tbl_1.f1
-> Seq Scan on public.subselect_tbl subselect_tbl_1
Output: subselect_tbl_1.f1
(11 rows)
-- Check handling of outer references
explain (verbose, costs off)
with x as (select * from int4_tbl)
select * from (with y as (select * from x) select * from y) ss;
QUERY PLAN
-----------------------------
Seq Scan on public.int4_tbl
Output: int4_tbl.f1
(2 rows)
explain (verbose, costs off)
with x as materialized (select * from int4_tbl)
select * from (with y as (select * from x) select * from y) ss;
QUERY PLAN
-------------------------------------
CTE Scan on x
Output: x.f1
CTE x
-> Seq Scan on public.int4_tbl
Output: int4_tbl.f1
(5 rows)
-- Ensure that we inline the currect CTE when there are
-- multiple CTEs with the same name
explain (verbose, costs off)
with x as (select 1 as y)
select * from (with x as (select 2 as y) select * from x) ss;
QUERY PLAN
-------------
Result
Output: 2
(2 rows)
-- Row marks are not pushed into CTEs
explain (verbose, costs off)
with x as (select * from subselect_tbl)
select * from x for update;
QUERY PLAN
----------------------------------------------------------------
Seq Scan on public.subselect_tbl
Output: subselect_tbl.f1, subselect_tbl.f2, subselect_tbl.f3
(2 rows)

View File

@ -840,10 +840,10 @@ EXPLAIN (COSTS OFF) SELECT * FROM z1 WHERE f_leak(b);
PREPARE plancache_test AS SELECT * FROM z1 WHERE f_leak(b);
EXPLAIN (COSTS OFF) EXECUTE plancache_test;
PREPARE plancache_test2 AS WITH q AS (SELECT * FROM z1 WHERE f_leak(b)) SELECT * FROM q,z2;
PREPARE plancache_test2 AS WITH q AS MATERIALIZED (SELECT * FROM z1 WHERE f_leak(b)) SELECT * FROM q,z2;
EXPLAIN (COSTS OFF) EXECUTE plancache_test2;
PREPARE plancache_test3 AS WITH q AS (SELECT * FROM z2) SELECT * FROM q,z1 WHERE f_leak(z1.b);
PREPARE plancache_test3 AS WITH q AS MATERIALIZED (SELECT * FROM z2) SELECT * FROM q,z1 WHERE f_leak(z1.b);
EXPLAIN (COSTS OFF) EXECUTE plancache_test3;
SET ROLE regress_rls_group1;
@ -1071,8 +1071,9 @@ INSERT INTO t1 (SELECT x, md5(x::text) FROM generate_series(0,20) x);
SET SESSION AUTHORIZATION regress_rls_bob;
WITH cte1 AS (SELECT * FROM t1 WHERE f_leak(b)) SELECT * FROM cte1;
EXPLAIN (COSTS OFF) WITH cte1 AS (SELECT * FROM t1 WHERE f_leak(b)) SELECT * FROM cte1;
WITH cte1 AS MATERIALIZED (SELECT * FROM t1 WHERE f_leak(b)) SELECT * FROM cte1;
EXPLAIN (COSTS OFF)
WITH cte1 AS MATERIALIZED (SELECT * FROM t1 WHERE f_leak(b)) SELECT * FROM cte1;
WITH cte1 AS (UPDATE t1 SET a = a + 1 RETURNING *) SELECT * FROM cte1; --fail
WITH cte1 AS (UPDATE t1 SET a = a RETURNING *) SELECT * FROM cte1; --ok

View File

@ -461,12 +461,12 @@ from (values (1,row(1,2)), (1,row(null,null)), (1,null),
(null,row(1,2)), (null,row(null,null)), (null,null) ) r(a,b);
explain (verbose, costs off)
with r(a,b) as
with r(a,b) as materialized
(values (1,row(1,2)), (1,row(null,null)), (1,null),
(null,row(1,2)), (null,row(null,null)), (null,null) )
select r, r is null as isnull, r is not null as isnotnull from r;
with r(a,b) as
with r(a,b) as materialized
(values (1,row(1,2)), (1,row(null,null)), (1,null),
(null,row(1,2)), (null,row(null,null)), (null,null) )
select r, r is null as isnull, r is not null as isnotnull from r;

View File

@ -1132,7 +1132,7 @@ SELECT tablename, rulename, definition FROM pg_rules
explain (costs off) INSERT INTO hats VALUES ('h8', 'forbidden') RETURNING *;
-- ensure upserting into a rule, with a CTE (different offsets!) works
WITH data(hat_name, hat_color) AS (
WITH data(hat_name, hat_color) AS MATERIALIZED (
VALUES ('h8', 'green'),
('h9', 'blue'),
('h7', 'forbidden')
@ -1140,7 +1140,8 @@ WITH data(hat_name, hat_color) AS (
INSERT INTO hats
SELECT * FROM data
RETURNING *;
EXPLAIN (costs off) WITH data(hat_name, hat_color) AS (
EXPLAIN (costs off)
WITH data(hat_name, hat_color) AS MATERIALIZED (
VALUES ('h8', 'green'),
('h9', 'blue'),
('h7', 'forbidden')

View File

@ -625,3 +625,61 @@ move forward all in c1;
fetch backward all in c1;
commit;
--
-- Tests for CTE inlining behavior
--
-- Basic subquery that can be inlined
explain (verbose, costs off)
with x as (select * from (select f1 from subselect_tbl) ss)
select * from x where f1 = 1;
-- Explicitly request materialization
explain (verbose, costs off)
with x as materialized (select * from (select f1 from subselect_tbl) ss)
select * from x where f1 = 1;
-- Stable functions are safe to inline
explain (verbose, costs off)
with x as (select * from (select f1, now() from subselect_tbl) ss)
select * from x where f1 = 1;
-- Volatile functions prevent inlining
explain (verbose, costs off)
with x as (select * from (select f1, random() from subselect_tbl) ss)
select * from x where f1 = 1;
-- SELECT FOR UPDATE cannot be inlined
explain (verbose, costs off)
with x as (select * from (select f1 from subselect_tbl for update) ss)
select * from x where f1 = 1;
-- Multiply-referenced CTEs are inlined only when requested
explain (verbose, costs off)
with x as (select * from (select f1, now() as n from subselect_tbl) ss)
select * from x, x x2 where x.n = x2.n;
explain (verbose, costs off)
with x as not materialized (select * from (select f1, now() as n from subselect_tbl) ss)
select * from x, x x2 where x.n = x2.n;
-- Check handling of outer references
explain (verbose, costs off)
with x as (select * from int4_tbl)
select * from (with y as (select * from x) select * from y) ss;
explain (verbose, costs off)
with x as materialized (select * from int4_tbl)
select * from (with y as (select * from x) select * from y) ss;
-- Ensure that we inline the currect CTE when there are
-- multiple CTEs with the same name
explain (verbose, costs off)
with x as (select 1 as y)
select * from (with x as (select 2 as y) select * from x) ss;
-- Row marks are not pushed into CTEs
explain (verbose, costs off)
with x as (select * from subselect_tbl)
select * from x for update;