1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-14 08:21:07 +03:00

Fix up planner infrastructure to support LATERAL properly.

This patch takes care of a number of problems having to do with failure
to choose valid join orders and incorrect handling of lateral references
pulled up from subqueries.  Notable changes:

* Add a LateralJoinInfo data structure similar to SpecialJoinInfo, to
represent join ordering constraints created by lateral references.
(I first considered extending the SpecialJoinInfo structure, but the
semantics are different enough that a separate data structure seems
better.)  Extend join_is_legal() and related functions to prevent trying
to form unworkable joins, and to ensure that we will consider joins that
satisfy lateral references even if the joins would be clauseless.

* Fill in the infrastructure needed for the last few types of relation scan
paths to support parameterization.  We'd have wanted this eventually
anyway, but it is necessary now because a relation that gets pulled up out
of a UNION ALL subquery may acquire a reltargetlist containing lateral
references, meaning that its paths *have* to be parameterized whether or
not we have any code that can push join quals down into the scan.

* Compute data about lateral references early in query_planner(), and save
in RelOptInfo nodes, to avoid repetitive calculations later.

* Assorted corner-case bug fixes.

There's probably still some bugs left, but this is a lot closer to being
real than it was before.
This commit is contained in:
Tom Lane
2012-08-26 22:48:55 -04:00
parent de87d47044
commit 9ff79b9d4e
30 changed files with 818 additions and 183 deletions

View File

@ -22,9 +22,11 @@
#include "optimizer/paths.h"
#include "optimizer/placeholder.h"
#include "optimizer/planmain.h"
#include "optimizer/planner.h"
#include "optimizer/prep.h"
#include "optimizer/restrictinfo.h"
#include "optimizer/var.h"
#include "rewrite/rewriteManip.h"
#include "utils/lsyscache.h"
@ -33,6 +35,9 @@ int from_collapse_limit;
int join_collapse_limit;
static void extract_lateral_references(PlannerInfo *root, RelOptInfo *brel,
Index rtindex);
static void add_lateral_info(PlannerInfo *root, Index rhs, Relids lhs);
static List *deconstruct_recurse(PlannerInfo *root, Node *jtnode,
bool below_outer_join,
Relids *qualscope, Relids *inner_join_rels);
@ -204,18 +209,75 @@ add_vars_to_targetlist(PlannerInfo *root, List *vars,
}
}
/*
* extract_lateral_references
* If the specified RTE is a LATERAL subquery, extract all its references
* to Vars of the current query level, and make sure those Vars will be
* available for evaluation of the RTE.
/*****************************************************************************
*
* XXX this is rather duplicative of processing that has to happen elsewhere.
* Maybe it'd be a good idea to do this type of extraction further upstream
* and save the results?
* LATERAL REFERENCES
*
*****************************************************************************/
/*
* find_lateral_references
* For each LATERAL subquery, extract all its references to Vars and
* PlaceHolderVars of the current query level, and make sure those values
* will be available for evaluation of the subquery.
*
* While later planning steps ensure that the Var/PHV source rels are on the
* outside of nestloops relative to the LATERAL subquery, we also need to
* ensure that the Vars/PHVs propagate up to the nestloop join level; this
* means setting suitable where_needed values for them.
*
* This has to run before deconstruct_jointree, since it might result in
* creation of PlaceHolderInfos or extension of their ph_may_need sets.
*/
void
find_lateral_references(PlannerInfo *root)
{
Index rti;
/* We need do nothing if the query contains no LATERAL RTEs */
if (!root->hasLateralRTEs)
return;
/*
* Examine all baserels (the rel array has been set up by now).
*/
for (rti = 1; rti < root->simple_rel_array_size; rti++)
{
RelOptInfo *brel = root->simple_rel_array[rti];
/* there may be empty slots corresponding to non-baserel RTEs */
if (brel == NULL)
continue;
Assert(brel->relid == rti); /* sanity check on array */
/*
* This bit is less obvious than it might look. We ignore appendrel
* otherrels and consider only their parent baserels. In a case where
* a LATERAL-containing UNION ALL subquery was pulled up, it is the
* otherrels that are actually going to be in the plan. However, we
* want to mark all their lateral references as needed by the parent,
* because it is the parent's relid that will be used for join
* planning purposes. And the parent's RTE will contain all the
* lateral references we need to know, since the pulled-up members are
* nothing but copies of parts of the original RTE's subquery. We
* could visit the children instead and transform their references
* back to the parent's relid, but it would be much more complicated
* for no real gain. (Important here is that the child members have
* not yet received any processing beyond being pulled up.)
*/
/* ignore RTEs that are "other rels" */
if (brel->reloptkind != RELOPT_BASEREL)
continue;
extract_lateral_references(root, brel, rti);
}
}
static void
extract_lateral_references(PlannerInfo *root, int rtindex)
extract_lateral_references(PlannerInfo *root, RelOptInfo *brel, Index rtindex)
{
RangeTblEntry *rte = root->simple_rte_array[rtindex];
List *vars;
@ -235,35 +297,52 @@ extract_lateral_references(PlannerInfo *root, int rtindex)
else if (rte->rtekind == RTE_VALUES)
vars = pull_vars_of_level((Node *) rte->values_lists, 0);
else
return;
{
Assert(false);
return; /* keep compiler quiet */
}
if (vars == NIL)
return; /* nothing to do */
/* Copy each Var (or PlaceHolderVar) and adjust it to match our level */
newvars = NIL;
foreach(lc, vars)
{
Node *var = (Node *) lfirst(lc);
Node *node = (Node *) lfirst(lc);
var = copyObject(var);
if (IsA(var, Var))
node = copyObject(node);
if (IsA(node, Var))
{
((Var *) var)->varlevelsup = 0;
Var *var = (Var *) node;
/* Adjustment is easy since it's just one node */
var->varlevelsup = 0;
}
else if (IsA(var, PlaceHolderVar))
else if (IsA(node, PlaceHolderVar))
{
PlaceHolderVar *phv = (PlaceHolderVar *) node;
int levelsup = phv->phlevelsup;
/* Have to work harder to adjust the contained expression too */
if (levelsup != 0)
IncrementVarSublevelsUp(node, -levelsup, 0);
/*
* It's sufficient to set phlevelsup = 0, because we call
* add_vars_to_targetlist with create_new_ph = false (as we must,
* because deconstruct_jointree has already started); therefore
* nobody is going to look at the contained expression to notice
* whether its Vars have the right level.
* If we pulled the PHV out of a subquery RTE, its expression
* needs to be preprocessed. subquery_planner() already did this
* for level-zero PHVs in function and values RTEs, though.
*/
((PlaceHolderVar *) var)->phlevelsup = 0;
if (levelsup > 0)
phv->phexpr = preprocess_phv_expression(root, phv->phexpr);
}
else
Assert(false);
newvars = lappend(newvars, var);
newvars = lappend(newvars, node);
}
list_free(vars);
/*
* We mark the Vars as being "needed" at the LATERAL RTE. This is a bit
* of a cheat: a more formal approach would be to mark each one as needed
@ -274,10 +353,146 @@ extract_lateral_references(PlannerInfo *root, int rtindex)
where_needed = bms_make_singleton(rtindex);
/* Push the Vars into their source relations' targetlists */
add_vars_to_targetlist(root, newvars, where_needed, false);
add_vars_to_targetlist(root, newvars, where_needed, true);
list_free(newvars);
list_free(vars);
/* Remember the lateral references for create_lateral_join_info */
brel->lateral_vars = newvars;
}
/*
* create_lateral_join_info
* For each LATERAL subquery, create LateralJoinInfo(s) and add them to
* root->lateral_info_list, and fill in the per-rel lateral_relids sets.
*
* This has to run after deconstruct_jointree, because we need to know the
* final ph_eval_at values for referenced PlaceHolderVars.
*/
void
create_lateral_join_info(PlannerInfo *root)
{
Index rti;
/* We need do nothing if the query contains no LATERAL RTEs */
if (!root->hasLateralRTEs)
return;
/*
* Examine all baserels (the rel array has been set up by now).
*/
for (rti = 1; rti < root->simple_rel_array_size; rti++)
{
RelOptInfo *brel = root->simple_rel_array[rti];
Relids lateral_relids;
ListCell *lc;
/* there may be empty slots corresponding to non-baserel RTEs */
if (brel == NULL)
continue;
Assert(brel->relid == rti); /* sanity check on array */
/* ignore RTEs that are "other rels" */
if (brel->reloptkind != RELOPT_BASEREL)
continue;
lateral_relids = NULL;
/* consider each laterally-referenced Var or PHV */
foreach(lc, brel->lateral_vars)
{
Node *node = (Node *) lfirst(lc);
if (IsA(node, Var))
{
Var *var = (Var *) node;
add_lateral_info(root, rti, bms_make_singleton(var->varno));
lateral_relids = bms_add_member(lateral_relids,
var->varno);
}
else if (IsA(node, PlaceHolderVar))
{
PlaceHolderVar *phv = (PlaceHolderVar *) node;
PlaceHolderInfo *phinfo = find_placeholder_info(root, phv,
false);
add_lateral_info(root, rti, bms_copy(phinfo->ph_eval_at));
lateral_relids = bms_add_members(lateral_relids,
phinfo->ph_eval_at);
}
else
Assert(false);
}
/* We now know all the relids needed for lateral refs in this rel */
if (bms_is_empty(lateral_relids))
continue; /* ensure lateral_relids is NULL if empty */
brel->lateral_relids = lateral_relids;
/*
* If it's an appendrel parent, copy its lateral_relids to each child
* rel. We intentionally give each child rel the same minimum
* parameterization, even though it's quite possible that some don't
* reference all the lateral rels. This is because any append path
* for the parent will have to have the same parameterization for
* every child anyway, and there's no value in forcing extra
* reparameterize_path() calls.
*/
if (root->simple_rte_array[rti]->inh)
{
foreach(lc, root->append_rel_list)
{
AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(lc);
RelOptInfo *childrel;
if (appinfo->parent_relid != rti)
continue;
childrel = root->simple_rel_array[appinfo->child_relid];
Assert(childrel->reloptkind == RELOPT_OTHER_MEMBER_REL);
Assert(childrel->lateral_relids == NULL);
childrel->lateral_relids = lateral_relids;
}
}
}
}
/*
* add_lateral_info
* Add a LateralJoinInfo to root->lateral_info_list, if needed
*
* We suppress redundant list entries. The passed lhs set must be freshly
* made; we free it if not used in a new list entry.
*/
static void
add_lateral_info(PlannerInfo *root, Index rhs, Relids lhs)
{
LateralJoinInfo *ljinfo;
ListCell *l;
Assert(!bms_is_member(rhs, lhs));
/*
* If an existing list member has the same RHS and an LHS that is a subset
* of the new one, it's redundant, but we don't trouble to get rid of it.
* The only case that is really worth worrying about is identical entries,
* and we handle that well enough with this simple logic.
*/
foreach(l, root->lateral_info_list)
{
ljinfo = (LateralJoinInfo *) lfirst(l);
if (rhs == ljinfo->lateral_rhs &&
bms_is_subset(lhs, ljinfo->lateral_lhs))
{
bms_free(lhs);
return;
}
}
/* Not there, so make a new entry */
ljinfo = makeNode(LateralJoinInfo);
ljinfo->lateral_rhs = rhs;
ljinfo->lateral_lhs = lhs;
root->lateral_info_list = lappend(root->lateral_info_list, ljinfo);
}
@ -362,9 +577,7 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join,
{
int varno = ((RangeTblRef *) jtnode)->rtindex;
/* No quals to deal with, but do check for LATERAL subqueries */
extract_lateral_references(root, varno);
/* Result qualscope is just the one Relid */
/* No quals to deal with, just return correct result */
*qualscope = bms_make_singleton(varno);
/* A single baserel does not create an inner join */
*inner_join_rels = NULL;