From a65724dfa73db8b451d0c874a9161935a34a914e Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 26 Mar 2024 13:05:49 -0400 Subject: [PATCH] Propagate pathkeys from CTEs up to the outer query. If we know the sort order of a CTE's output, and it is relevant to the outer query, label the CTE's outer-query access path using those pathkeys. This may enable optimizations such as avoiding a sort in the outer query. The code for hoisting pathkeys into the outer query already exists for regular RTE_SUBQUERY subqueries, but it wasn't getting used for CTEs, possibly out of concern for maintaining an optimization fence between the CTE and the outer query. However, on the same arguments used for commit f7816aec2, there seems no harm in letting the outer query know what the inner query decided to do. In support of this, we now remember the best Path as well as Plan for each subquery for the rest of the planner run. There may be future applications for having that at hand, and it surely costs little to build one more List. Richard Guo (minor mods by me) Discussion: https://postgr.es/m/CAMbWs49xYd3f8CrE8-WW3--dV1zH_sDSDn-vs2DzHj81Wcnsew@mail.gmail.com --- src/backend/optimizer/path/allpaths.c | 16 +++++++++++++-- src/backend/optimizer/plan/planner.c | 1 + src/backend/optimizer/plan/subselect.c | 28 +++++++++++++++++--------- src/backend/optimizer/util/pathnode.c | 5 +++-- src/include/nodes/pathnodes.h | 3 +++ src/include/optimizer/pathnode.h | 2 +- src/test/regress/expected/with.out | 17 ++++++++++++++++ src/test/regress/sql/with.sql | 7 +++++++ 8 files changed, 64 insertions(+), 15 deletions(-) diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 0b98f0856e9..95f3368c214 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -2872,16 +2872,19 @@ set_tablefunc_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) static void set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) { + Path *ctepath; Plan *cteplan; PlannerInfo *cteroot; Index levelsup; + List *pathkeys; int ndx; ListCell *lc; int plan_id; Relids required_outer; /* - * Find the referenced CTE, and locate the plan previously made for it. + * Find the referenced CTE, and locate the path and plan previously made + * for it. */ levelsup = rte->ctelevelsup; cteroot = root; @@ -2913,11 +2916,20 @@ set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) plan_id = list_nth_int(cteroot->cte_plan_ids, ndx); if (plan_id <= 0) elog(ERROR, "no plan was made for CTE \"%s\"", rte->ctename); + + Assert(list_length(root->glob->subpaths) == list_length(root->glob->subplans)); + ctepath = (Path *) list_nth(root->glob->subpaths, plan_id - 1); cteplan = (Plan *) list_nth(root->glob->subplans, plan_id - 1); /* Mark rel with estimated output rows, width, etc */ set_cte_size_estimates(root, rel, cteplan->plan_rows); + /* Convert the ctepath's pathkeys to outer query's representation */ + pathkeys = convert_subquery_pathkeys(root, + rel, + ctepath->pathkeys, + cteplan->targetlist); + /* * We don't support pushing join clauses into the quals of a CTE scan, but * it could still have required parameterization due to LATERAL refs in @@ -2926,7 +2938,7 @@ set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) required_outer = rel->lateral_relids; /* Generate appropriate path */ - add_path(rel, create_ctescan_path(root, rel, required_outer)); + add_path(rel, create_ctescan_path(root, rel, pathkeys, required_outer)); } /* diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 6d08cc8cdd5..38d070fa004 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -306,6 +306,7 @@ standard_planner(Query *parse, const char *query_string, int cursorOptions, glob->boundParams = boundParams; glob->subplans = NIL; + glob->subpaths = NIL; glob->subroots = NIL; glob->rewindPlanIDs = NULL; glob->finalrtable = NIL; diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index d6954a7e867..d5fa281b102 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -65,8 +65,8 @@ typedef struct inline_cte_walker_context } inline_cte_walker_context; -static Node *build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot, - List *plan_params, +static Node *build_subplan(PlannerInfo *root, Plan *plan, Path *path, + PlannerInfo *subroot, List *plan_params, SubLinkType subLinkType, int subLinkId, Node *testexpr, List *testexpr_paramids, bool unknownEqFalse); @@ -236,7 +236,8 @@ make_subplan(PlannerInfo *root, Query *orig_subquery, plan = create_plan(subroot, best_path); /* And convert to SubPlan or InitPlan format. */ - result = build_subplan(root, plan, subroot, plan_params, + result = build_subplan(root, plan, best_path, + subroot, plan_params, subLinkType, subLinkId, testexpr, NIL, isTopQual); @@ -288,8 +289,8 @@ make_subplan(PlannerInfo *root, Query *orig_subquery, /* ... and convert to SubPlan format */ hashplan = castNode(SubPlan, - build_subplan(root, plan, subroot, - plan_params, + build_subplan(root, plan, best_path, + subroot, plan_params, ANY_SUBLINK, 0, newtestexpr, paramIds, @@ -317,8 +318,8 @@ make_subplan(PlannerInfo *root, Query *orig_subquery, * make it an InitPlan, as explained in the comments for make_subplan. */ static Node * -build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot, - List *plan_params, +build_subplan(PlannerInfo *root, Plan *plan, Path *path, + PlannerInfo *subroot, List *plan_params, SubLinkType subLinkType, int subLinkId, Node *testexpr, List *testexpr_paramids, bool unknownEqFalse) @@ -539,9 +540,10 @@ build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot, } /* - * Add the subplan and its PlannerInfo to the global lists. + * Add the subplan, its path, and its PlannerInfo to the global lists. */ root->glob->subplans = lappend(root->glob->subplans, plan); + root->glob->subpaths = lappend(root->glob->subpaths, path); root->glob->subroots = lappend(root->glob->subroots, subroot); splan->plan_id = list_length(root->glob->subplans); @@ -1029,9 +1031,10 @@ SS_process_ctes(PlannerInfo *root) splan->setParam = list_make1_int(paramid); /* - * Add the subplan and its PlannerInfo to the global lists. + * Add the subplan, its path, and its PlannerInfo to the global lists. */ root->glob->subplans = lappend(root->glob->subplans, plan); + root->glob->subpaths = lappend(root->glob->subpaths, best_path); root->glob->subroots = lappend(root->glob->subroots, subroot); splan->plan_id = list_length(root->glob->subplans); @@ -3021,9 +3024,14 @@ SS_make_initplan_from_plan(PlannerInfo *root, SubPlan *node; /* - * Add the subplan and its PlannerInfo to the global lists. + * Add the subplan and its PlannerInfo, as well as a dummy path entry, to + * the global lists. Ideally we'd save a real path, but right now our + * sole caller doesn't build a path that exactly matches the plan. Since + * we're not currently going to need the path for an initplan, it's not + * worth requiring construction of such a path. */ root->glob->subplans = lappend(root->glob->subplans, plan); + root->glob->subpaths = lappend(root->glob->subpaths, NULL); root->glob->subroots = lappend(root->glob->subroots, subroot); /* diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index c29ca5a0da2..e4a5ae83e4a 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -2121,7 +2121,8 @@ create_valuesscan_path(PlannerInfo *root, RelOptInfo *rel, * returning the pathnode. */ Path * -create_ctescan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer) +create_ctescan_path(PlannerInfo *root, RelOptInfo *rel, + List *pathkeys, Relids required_outer) { Path *pathnode = makeNode(Path); @@ -2133,7 +2134,7 @@ create_ctescan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer) pathnode->parallel_aware = false; pathnode->parallel_safe = rel->consider_parallel; pathnode->parallel_workers = 0; - pathnode->pathkeys = NIL; /* XXX for now, result is always unordered */ + pathnode->pathkeys = pathkeys; cost_ctescan(pathnode, root, rel, pathnode->param_info); diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index 08dbee002fe..595eec2cbbd 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -104,6 +104,9 @@ typedef struct PlannerGlobal /* Plans for SubPlan nodes */ List *subplans; + /* Paths from which the SubPlan Plans were made */ + List *subpaths; + /* PlannerInfos for SubPlan nodes */ List *subroots pg_node_attr(read_write_ignore); diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index 99c2f955aab..7cc481b8c2d 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -115,7 +115,7 @@ extern Path *create_valuesscan_path(PlannerInfo *root, RelOptInfo *rel, extern Path *create_tablefuncscan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer); extern Path *create_ctescan_path(PlannerInfo *root, RelOptInfo *rel, - Relids required_outer); + List *pathkeys, Relids required_outer); extern Path *create_namedtuplestorescan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer); extern Path *create_resultscan_path(PlannerInfo *root, RelOptInfo *rel, diff --git a/src/test/regress/expected/with.out b/src/test/regress/expected/with.out index 7bef181d78c..b4f3121751c 100644 --- a/src/test/regress/expected/with.out +++ b/src/test/regress/expected/with.out @@ -672,6 +672,23 @@ select count(*) from tenk1 a Index Cond: (unique1 = x.unique1) (10 rows) +-- test that pathkeys from a materialized CTE are propagated up to the +-- outer query +explain (costs off) +with x as materialized (select unique1 from tenk1 b order by unique1) +select count(*) from tenk1 a + where unique1 in (select * from x); + QUERY PLAN +------------------------------------------------------------ + Aggregate + CTE x + -> Index Only Scan using tenk1_unique1 on tenk1 b + -> Merge Semi Join + Merge Cond: (a.unique1 = x.unique1) + -> Index Only Scan using tenk1_unique1 on tenk1 a + -> CTE Scan on x +(7 rows) + -- SEARCH clause create temp table graph0( f int, t int, label text ); insert into graph0 values diff --git a/src/test/regress/sql/with.sql b/src/test/regress/sql/with.sql index 037bc0a511b..3fb0b33fce9 100644 --- a/src/test/regress/sql/with.sql +++ b/src/test/regress/sql/with.sql @@ -359,6 +359,13 @@ with x as materialized (insert into tenk1 default values returning unique1) select count(*) from tenk1 a where unique1 in (select * from x); +-- test that pathkeys from a materialized CTE are propagated up to the +-- outer query +explain (costs off) +with x as materialized (select unique1 from tenk1 b order by unique1) +select count(*) from tenk1 a + where unique1 in (select * from x); + -- SEARCH clause create temp table graph0( f int, t int, label text );