mirror of
https://github.com/postgres/postgres.git
synced 2025-07-07 00:36:50 +03:00
Allow user control of CTE materialization, and change the default behavior.
Historically we've always materialized the full output of a CTE query, treating WITH as an optimization fence (so that, for example, restrictions from the outer query cannot be pushed into it). This is appropriate when the CTE query is INSERT/UPDATE/DELETE, or is recursive; but when the CTE query is non-recursive and side-effect-free, there's no hazard of changing the query results by pushing restrictions down. Another argument for materialization is that it can avoid duplicate computation of an expensive WITH query --- but that only applies if the WITH query is called more than once in the outer query. Even then it could still be a net loss, if each call has restrictions that would allow just a small part of the WITH query to be computed. Hence, let's change the behavior for WITH queries that are non-recursive and side-effect-free. By default, we will inline them into the outer query (removing the optimization fence) if they are called just once. If they are called more than once, we will keep the old behavior by default, but the user can override this and force inlining by specifying NOT MATERIALIZED. Lastly, the user can force the old behavior by specifying MATERIALIZED; this would mainly be useful when the query had deliberately been employing WITH as an optimization fence to prevent a poor choice of plan. Andreas Karlsson, Andrew Gierth, David Fetter Discussion: https://postgr.es/m/87sh48ffhb.fsf@news-spur.riddles.org.uk
This commit is contained in:
@ -646,8 +646,8 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
|
||||
root->partColsUpdated = false;
|
||||
|
||||
/*
|
||||
* If there is a WITH list, process each WITH query and build an initplan
|
||||
* SubPlan structure for it.
|
||||
* If there is a WITH list, process each WITH query and either convert it
|
||||
* to RTE_SUBQUERY RTE(s) or build an initplan SubPlan structure for it.
|
||||
*/
|
||||
if (parse->cteList)
|
||||
SS_process_ctes(root);
|
||||
|
@ -57,6 +57,14 @@ typedef struct finalize_primnode_context
|
||||
Bitmapset *paramids; /* Non-local PARAM_EXEC paramids found */
|
||||
} finalize_primnode_context;
|
||||
|
||||
typedef struct inline_cte_walker_context
|
||||
{
|
||||
const char *ctename; /* name and relative level of target CTE */
|
||||
int levelsup;
|
||||
int refcount; /* number of remaining references */
|
||||
Query *ctequery; /* query to substitute */
|
||||
} inline_cte_walker_context;
|
||||
|
||||
|
||||
static Node *build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot,
|
||||
List *plan_params,
|
||||
@ -75,6 +83,10 @@ static Node *convert_testexpr_mutator(Node *node,
|
||||
static bool subplan_is_hashable(Plan *plan);
|
||||
static bool testexpr_is_hashable(Node *testexpr);
|
||||
static bool hash_ok_operator(OpExpr *expr);
|
||||
static bool contain_dml(Node *node);
|
||||
static bool contain_dml_walker(Node *node, void *context);
|
||||
static void inline_cte(PlannerInfo *root, CommonTableExpr *cte);
|
||||
static bool inline_cte_walker(Node *node, inline_cte_walker_context *context);
|
||||
static bool simplify_EXISTS_query(PlannerInfo *root, Query *query);
|
||||
static Query *convert_EXISTS_to_ANY(PlannerInfo *root, Query *subselect,
|
||||
Node **testexpr, List **paramIds);
|
||||
@ -804,10 +816,13 @@ hash_ok_operator(OpExpr *expr)
|
||||
/*
|
||||
* SS_process_ctes: process a query's WITH list
|
||||
*
|
||||
* We plan each interesting WITH item and convert it to an initplan.
|
||||
* Consider each CTE in the WITH list and either ignore it (if it's an
|
||||
* unreferenced SELECT), "inline" it to create a regular sub-SELECT-in-FROM,
|
||||
* or convert it to an initplan.
|
||||
*
|
||||
* A side effect is to fill in root->cte_plan_ids with a list that
|
||||
* parallels root->parse->cteList and provides the subplan ID for
|
||||
* each CTE's initplan.
|
||||
* each CTE's initplan, or a dummy ID (-1) if we didn't make an initplan.
|
||||
*/
|
||||
void
|
||||
SS_process_ctes(PlannerInfo *root)
|
||||
@ -838,6 +853,44 @@ SS_process_ctes(PlannerInfo *root)
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* Consider inlining the CTE (creating RTE_SUBQUERY RTE(s)) instead of
|
||||
* implementing it as a separately-planned CTE.
|
||||
*
|
||||
* We cannot inline if any of these conditions hold:
|
||||
*
|
||||
* 1. The user said not to (the CTEMaterializeAlways option).
|
||||
*
|
||||
* 2. The CTE is recursive.
|
||||
*
|
||||
* 3. The CTE has side-effects; this includes either not being a plain
|
||||
* SELECT, or containing volatile functions. Inlining might change
|
||||
* the side-effects, which would be bad.
|
||||
*
|
||||
* Otherwise, we have an option whether to inline or not. That should
|
||||
* always be a win if there's just a single reference, but if the CTE
|
||||
* is multiply-referenced then it's unclear: inlining adds duplicate
|
||||
* computations, but the ability to absorb restrictions from the outer
|
||||
* query level could outweigh that. We do not have nearly enough
|
||||
* information at this point to tell whether that's true, so we let
|
||||
* the user express a preference. Our default behavior is to inline
|
||||
* only singly-referenced CTEs, but a CTE marked CTEMaterializeNever
|
||||
* will be inlined even if multiply referenced.
|
||||
*/
|
||||
if ((cte->ctematerialized == CTEMaterializeNever ||
|
||||
(cte->ctematerialized == CTEMaterializeDefault &&
|
||||
cte->cterefcount == 1)) &&
|
||||
!cte->cterecursive &&
|
||||
cmdType == CMD_SELECT &&
|
||||
!contain_dml(cte->ctequery) &&
|
||||
!contain_volatile_functions(cte->ctequery))
|
||||
{
|
||||
inline_cte(root, cte);
|
||||
/* Make a dummy entry in cte_plan_ids */
|
||||
root->cte_plan_ids = lappend_int(root->cte_plan_ids, -1);
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* Copy the source Query node. Probably not necessary, but let's keep
|
||||
* this similar to make_subplan.
|
||||
@ -934,6 +987,127 @@ SS_process_ctes(PlannerInfo *root)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* contain_dml: is any subquery not a plain SELECT?
|
||||
*
|
||||
* We reject SELECT FOR UPDATE/SHARE as well as INSERT etc.
|
||||
*/
|
||||
static bool
|
||||
contain_dml(Node *node)
|
||||
{
|
||||
return contain_dml_walker(node, NULL);
|
||||
}
|
||||
|
||||
static bool
|
||||
contain_dml_walker(Node *node, void *context)
|
||||
{
|
||||
if (node == NULL)
|
||||
return false;
|
||||
if (IsA(node, Query))
|
||||
{
|
||||
Query *query = (Query *) node;
|
||||
|
||||
if (query->commandType != CMD_SELECT ||
|
||||
query->rowMarks != NIL)
|
||||
return true;
|
||||
|
||||
return query_tree_walker(query, contain_dml_walker, context, 0);
|
||||
}
|
||||
return expression_tree_walker(node, contain_dml_walker, context);
|
||||
}
|
||||
|
||||
/*
|
||||
* inline_cte: convert RTE_CTE references to given CTE into RTE_SUBQUERYs
|
||||
*/
|
||||
static void
|
||||
inline_cte(PlannerInfo *root, CommonTableExpr *cte)
|
||||
{
|
||||
struct inline_cte_walker_context context;
|
||||
|
||||
context.ctename = cte->ctename;
|
||||
/* Start at levelsup = -1 because we'll immediately increment it */
|
||||
context.levelsup = -1;
|
||||
context.refcount = cte->cterefcount;
|
||||
context.ctequery = castNode(Query, cte->ctequery);
|
||||
|
||||
(void) inline_cte_walker((Node *) root->parse, &context);
|
||||
|
||||
/* Assert we replaced all references */
|
||||
Assert(context.refcount == 0);
|
||||
}
|
||||
|
||||
static bool
|
||||
inline_cte_walker(Node *node, inline_cte_walker_context *context)
|
||||
{
|
||||
if (node == NULL)
|
||||
return false;
|
||||
if (IsA(node, Query))
|
||||
{
|
||||
Query *query = (Query *) node;
|
||||
|
||||
context->levelsup++;
|
||||
|
||||
/*
|
||||
* Visit the query's RTE nodes after their contents; otherwise
|
||||
* query_tree_walker would descend into the newly inlined CTE query,
|
||||
* which we don't want.
|
||||
*/
|
||||
(void) query_tree_walker(query, inline_cte_walker, context,
|
||||
QTW_EXAMINE_RTES_AFTER);
|
||||
|
||||
context->levelsup--;
|
||||
|
||||
return false;
|
||||
}
|
||||
else if (IsA(node, RangeTblEntry))
|
||||
{
|
||||
RangeTblEntry *rte = (RangeTblEntry *) node;
|
||||
|
||||
if (rte->rtekind == RTE_CTE &&
|
||||
strcmp(rte->ctename, context->ctename) == 0 &&
|
||||
rte->ctelevelsup == context->levelsup)
|
||||
{
|
||||
/*
|
||||
* Found a reference to replace. Generate a copy of the CTE query
|
||||
* with appropriate level adjustment for outer references (e.g.,
|
||||
* to other CTEs).
|
||||
*/
|
||||
Query *newquery = copyObject(context->ctequery);
|
||||
|
||||
if (context->levelsup > 0)
|
||||
IncrementVarSublevelsUp((Node *) newquery, context->levelsup, 1);
|
||||
|
||||
/*
|
||||
* Convert the RTE_CTE RTE into a RTE_SUBQUERY.
|
||||
*
|
||||
* Historically, a FOR UPDATE clause has been treated as extending
|
||||
* into views and subqueries, but not into CTEs. We preserve this
|
||||
* distinction by not trying to push rowmarks into the new
|
||||
* subquery.
|
||||
*/
|
||||
rte->rtekind = RTE_SUBQUERY;
|
||||
rte->subquery = newquery;
|
||||
rte->security_barrier = false;
|
||||
|
||||
/* Zero out CTE-specific fields */
|
||||
rte->ctename = NULL;
|
||||
rte->ctelevelsup = 0;
|
||||
rte->self_reference = false;
|
||||
rte->coltypes = NIL;
|
||||
rte->coltypmods = NIL;
|
||||
rte->colcollations = NIL;
|
||||
|
||||
/* Count the number of replacements we've done */
|
||||
context->refcount--;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
return expression_tree_walker(node, inline_cte_walker, context);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* convert_ANY_sublink_to_join: try to convert an ANY SubLink to a join
|
||||
*
|
||||
|
Reference in New Issue
Block a user