mirror of
https://github.com/postgres/postgres.git
synced 2025-07-15 19:21:59 +03:00
Revise parameterized-path mechanism to fix assorted issues.
This patch adjusts the treatment of parameterized paths so that all paths with the same parameterization (same set of required outer rels) for the same relation will have the same rowcount estimate. We cache the rowcount estimates to ensure that property, and hopefully save a few cycles too. Doing this makes it practical for add_path_precheck to operate without a rowcount estimate: it need only assume that paths with different parameterizations never dominate each other, which is close enough to true anyway for coarse filtering, because normally a more-parameterized path should yield fewer rows thanks to having more join clauses to apply. In add_path, we do the full nine yards of comparing rowcount estimates along with everything else, so that we can discard parameterized paths that don't actually have an advantage. This fixes some issues I'd found with add_path rejecting parameterized paths on the grounds that they were more expensive than not-parameterized ones, even though they yielded many fewer rows and hence would be cheaper once subsequent joining was considered. To make the same-rowcounts assumption valid, we have to require that any parameterized path enforce *all* join clauses that could be obtained from the particular set of outer rels, even if not all of them are useful for indexing. This is required at both base scans and joins. It's a good thing anyway since the net impact is that join quals are checked at the lowest practical level in the join tree. Hence, discard the original rather ad-hoc mechanism for choosing parameterization joinquals, and build a better one that has a more principled rule for when clauses can be moved. The original rule was actually buggy anyway for lack of knowledge about which relations are part of an outer join's outer side; getting this right requires adding an outer_relids field to RestrictInfo.
This commit is contained in:
@ -19,6 +19,7 @@
|
||||
#include "optimizer/paths.h"
|
||||
#include "optimizer/placeholder.h"
|
||||
#include "optimizer/plancat.h"
|
||||
#include "optimizer/restrictinfo.h"
|
||||
#include "utils/hsearch.h"
|
||||
|
||||
|
||||
@ -100,6 +101,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptKind reloptkind)
|
||||
rel->width = 0;
|
||||
rel->reltargetlist = NIL;
|
||||
rel->pathlist = NIL;
|
||||
rel->ppilist = NIL;
|
||||
rel->cheapest_startup_path = NULL;
|
||||
rel->cheapest_total_path = NULL;
|
||||
rel->cheapest_unique_path = NULL;
|
||||
@ -352,6 +354,7 @@ build_join_rel(PlannerInfo *root,
|
||||
joinrel->width = 0;
|
||||
joinrel->reltargetlist = NIL;
|
||||
joinrel->pathlist = NIL;
|
||||
joinrel->ppilist = NIL;
|
||||
joinrel->cheapest_startup_path = NULL;
|
||||
joinrel->cheapest_total_path = NULL;
|
||||
joinrel->cheapest_unique_path = NULL;
|
||||
@ -574,8 +577,8 @@ build_joinrel_restrictlist(PlannerInfo *root,
|
||||
*/
|
||||
result = list_concat(result,
|
||||
generate_join_implied_equalities(root,
|
||||
joinrel,
|
||||
outer_rel,
|
||||
joinrel->relids,
|
||||
outer_rel->relids,
|
||||
inner_rel));
|
||||
|
||||
return result;
|
||||
@ -667,3 +670,296 @@ subbuild_joinrel_joinlist(RelOptInfo *joinrel,
|
||||
|
||||
return new_joininfo;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* find_childrel_appendrelinfo
|
||||
* Get the AppendRelInfo associated with an appendrel child rel.
|
||||
*
|
||||
* This search could be eliminated by storing a link in child RelOptInfos,
|
||||
* but for now it doesn't seem performance-critical.
|
||||
*/
|
||||
AppendRelInfo *
|
||||
find_childrel_appendrelinfo(PlannerInfo *root, RelOptInfo *rel)
|
||||
{
|
||||
Index relid = rel->relid;
|
||||
ListCell *lc;
|
||||
|
||||
/* Should only be called on child rels */
|
||||
Assert(rel->reloptkind == RELOPT_OTHER_MEMBER_REL);
|
||||
|
||||
foreach(lc, root->append_rel_list)
|
||||
{
|
||||
AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(lc);
|
||||
|
||||
if (appinfo->child_relid == relid)
|
||||
return appinfo;
|
||||
}
|
||||
/* should have found the entry ... */
|
||||
elog(ERROR, "child rel %d not found in append_rel_list", relid);
|
||||
return NULL; /* not reached */
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* get_baserel_parampathinfo
|
||||
* Get the ParamPathInfo for a parameterized path for a base relation,
|
||||
* constructing one if we don't have one already.
|
||||
*
|
||||
* This centralizes estimating the rowcounts for parameterized paths.
|
||||
* We need to cache those to be sure we use the same rowcount for all paths
|
||||
* of the same parameterization for a given rel. This is also a convenient
|
||||
* place to determine which movable join clauses the parameterized path will
|
||||
* be responsible for evaluating.
|
||||
*/
|
||||
ParamPathInfo *
|
||||
get_baserel_parampathinfo(PlannerInfo *root, RelOptInfo *baserel,
|
||||
Relids required_outer)
|
||||
{
|
||||
ParamPathInfo *ppi;
|
||||
Relids joinrelids;
|
||||
List *pclauses;
|
||||
double rows;
|
||||
ListCell *lc;
|
||||
|
||||
/* Unparameterized paths have no ParamPathInfo */
|
||||
if (bms_is_empty(required_outer))
|
||||
return NULL;
|
||||
|
||||
Assert(!bms_overlap(baserel->relids, required_outer));
|
||||
|
||||
/* If we already have a PPI for this parameterization, just return it */
|
||||
foreach(lc, baserel->ppilist)
|
||||
{
|
||||
ppi = (ParamPathInfo *) lfirst(lc);
|
||||
if (bms_equal(ppi->ppi_req_outer, required_outer))
|
||||
return ppi;
|
||||
}
|
||||
|
||||
/*
|
||||
* Identify all joinclauses that are movable to this base rel given this
|
||||
* parameterization.
|
||||
*/
|
||||
joinrelids = bms_union(baserel->relids, required_outer);
|
||||
pclauses = NIL;
|
||||
foreach(lc, baserel->joininfo)
|
||||
{
|
||||
RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
|
||||
|
||||
if (join_clause_is_movable_into(rinfo,
|
||||
baserel->relids,
|
||||
joinrelids))
|
||||
pclauses = lappend(pclauses, rinfo);
|
||||
}
|
||||
|
||||
/*
|
||||
* Add in joinclauses generated by EquivalenceClasses, too. (These
|
||||
* necessarily satisfy join_clause_is_movable_into.)
|
||||
*/
|
||||
pclauses = list_concat(pclauses,
|
||||
generate_join_implied_equalities(root,
|
||||
joinrelids,
|
||||
required_outer,
|
||||
baserel));
|
||||
|
||||
/* Estimate the number of rows returned by the parameterized scan */
|
||||
rows = get_parameterized_baserel_size(root, baserel, pclauses);
|
||||
|
||||
/* And now we can build the ParamPathInfo */
|
||||
ppi = makeNode(ParamPathInfo);
|
||||
ppi->ppi_req_outer = required_outer;
|
||||
ppi->ppi_rows = rows;
|
||||
ppi->ppi_clauses = pclauses;
|
||||
baserel->ppilist = lappend(baserel->ppilist, ppi);
|
||||
|
||||
return ppi;
|
||||
}
|
||||
|
||||
/*
|
||||
* get_joinrel_parampathinfo
|
||||
* Get the ParamPathInfo for a parameterized path for a join relation,
|
||||
* constructing one if we don't have one already.
|
||||
*
|
||||
* This centralizes estimating the rowcounts for parameterized paths.
|
||||
* We need to cache those to be sure we use the same rowcount for all paths
|
||||
* of the same parameterization for a given rel. This is also a convenient
|
||||
* place to determine which movable join clauses the parameterized path will
|
||||
* be responsible for evaluating.
|
||||
*
|
||||
* outer_path and inner_path are a pair of input paths that can be used to
|
||||
* construct the join, and restrict_clauses is the list of regular join
|
||||
* clauses (including clauses derived from EquivalenceClasses) that must be
|
||||
* applied at the join node when using these inputs.
|
||||
*
|
||||
* Unlike the situation for base rels, the set of movable join clauses to be
|
||||
* enforced at a join varies with the selected pair of input paths, so we
|
||||
* must calculate that and pass it back, even if we already have a matching
|
||||
* ParamPathInfo. We handle this by adding any clauses moved down to this
|
||||
* join to *restrict_clauses, which is an in/out parameter. (The addition
|
||||
* is done in such a way as to not modify the passed-in List structure.)
|
||||
*
|
||||
* Note: when considering a nestloop join, the caller must have removed from
|
||||
* restrict_clauses any movable clauses that are themselves scheduled to be
|
||||
* pushed into the right-hand path. We do not do that here since it's
|
||||
* unnecessary for other join types.
|
||||
*/
|
||||
ParamPathInfo *
|
||||
get_joinrel_parampathinfo(PlannerInfo *root, RelOptInfo *joinrel,
|
||||
Path *outer_path,
|
||||
Path *inner_path,
|
||||
SpecialJoinInfo *sjinfo,
|
||||
Relids required_outer,
|
||||
List **restrict_clauses)
|
||||
{
|
||||
ParamPathInfo *ppi;
|
||||
Relids join_and_req;
|
||||
Relids outer_and_req;
|
||||
Relids inner_and_req;
|
||||
List *pclauses;
|
||||
List *eclauses;
|
||||
double rows;
|
||||
ListCell *lc;
|
||||
|
||||
/* Unparameterized paths have no ParamPathInfo or extra join clauses */
|
||||
if (bms_is_empty(required_outer))
|
||||
return NULL;
|
||||
|
||||
Assert(!bms_overlap(joinrel->relids, required_outer));
|
||||
|
||||
/*
|
||||
* Identify all joinclauses that are movable to this join rel given this
|
||||
* parameterization. These are the clauses that are movable into this
|
||||
* join, but not movable into either input path. Treat an unparameterized
|
||||
* input path as not accepting parameterized clauses (because it won't,
|
||||
* per the shortcut exit above), even though the joinclause movement rules
|
||||
* might allow the same clauses to be moved into a parameterized path for
|
||||
* that rel.
|
||||
*/
|
||||
join_and_req = bms_union(joinrel->relids, required_outer);
|
||||
if (outer_path->param_info)
|
||||
outer_and_req = bms_union(outer_path->parent->relids,
|
||||
PATH_REQ_OUTER(outer_path));
|
||||
else
|
||||
outer_and_req = NULL; /* outer path does not accept parameters */
|
||||
if (inner_path->param_info)
|
||||
inner_and_req = bms_union(inner_path->parent->relids,
|
||||
PATH_REQ_OUTER(inner_path));
|
||||
else
|
||||
inner_and_req = NULL; /* inner path does not accept parameters */
|
||||
|
||||
pclauses = NIL;
|
||||
foreach(lc, joinrel->joininfo)
|
||||
{
|
||||
RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
|
||||
|
||||
if (join_clause_is_movable_into(rinfo,
|
||||
joinrel->relids,
|
||||
join_and_req) &&
|
||||
!join_clause_is_movable_into(rinfo,
|
||||
outer_path->parent->relids,
|
||||
outer_and_req) &&
|
||||
!join_clause_is_movable_into(rinfo,
|
||||
inner_path->parent->relids,
|
||||
inner_and_req))
|
||||
pclauses = lappend(pclauses, rinfo);
|
||||
}
|
||||
|
||||
/* Consider joinclauses generated by EquivalenceClasses, too */
|
||||
eclauses = generate_join_implied_equalities(root,
|
||||
join_and_req,
|
||||
required_outer,
|
||||
joinrel);
|
||||
/* We only want ones that aren't movable to lower levels */
|
||||
foreach(lc, eclauses)
|
||||
{
|
||||
RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
|
||||
|
||||
Assert(join_clause_is_movable_into(rinfo,
|
||||
joinrel->relids,
|
||||
join_and_req));
|
||||
if (!join_clause_is_movable_into(rinfo,
|
||||
outer_path->parent->relids,
|
||||
outer_and_req) &&
|
||||
!join_clause_is_movable_into(rinfo,
|
||||
inner_path->parent->relids,
|
||||
inner_and_req))
|
||||
pclauses = lappend(pclauses, rinfo);
|
||||
}
|
||||
|
||||
/*
|
||||
* Now, attach the identified moved-down clauses to the caller's
|
||||
* restrict_clauses list. By using list_concat in this order, we leave
|
||||
* the original list structure of restrict_clauses undamaged.
|
||||
*/
|
||||
*restrict_clauses = list_concat(pclauses, *restrict_clauses);
|
||||
|
||||
/* If we already have a PPI for this parameterization, just return it */
|
||||
foreach(lc, joinrel->ppilist)
|
||||
{
|
||||
ppi = (ParamPathInfo *) lfirst(lc);
|
||||
if (bms_equal(ppi->ppi_req_outer, required_outer))
|
||||
return ppi;
|
||||
}
|
||||
|
||||
/* Estimate the number of rows returned by the parameterized join */
|
||||
rows = get_parameterized_joinrel_size(root, joinrel,
|
||||
outer_path->rows,
|
||||
inner_path->rows,
|
||||
sjinfo,
|
||||
*restrict_clauses);
|
||||
|
||||
/*
|
||||
* And now we can build the ParamPathInfo. No point in saving the
|
||||
* input-pair-dependent clause list, though.
|
||||
*
|
||||
* Note: in GEQO mode, we'll be called in a temporary memory context, but
|
||||
* the joinrel structure is there too, so no problem.
|
||||
*/
|
||||
ppi = makeNode(ParamPathInfo);
|
||||
ppi->ppi_req_outer = required_outer;
|
||||
ppi->ppi_rows = rows;
|
||||
ppi->ppi_clauses = NIL;
|
||||
joinrel->ppilist = lappend(joinrel->ppilist, ppi);
|
||||
|
||||
return ppi;
|
||||
}
|
||||
|
||||
/*
|
||||
* get_appendrel_parampathinfo
|
||||
* Get the ParamPathInfo for a parameterized path for an append relation.
|
||||
*
|
||||
* For an append relation, the rowcount estimate will just be the sum of
|
||||
* the estimates for its children. However, we still need a ParamPathInfo
|
||||
* to flag the fact that the path requires parameters. So this just creates
|
||||
* a suitable struct with zero ppi_rows (and no ppi_clauses either, since
|
||||
* the Append node isn't responsible for checking quals).
|
||||
*/
|
||||
ParamPathInfo *
|
||||
get_appendrel_parampathinfo(RelOptInfo *appendrel, Relids required_outer)
|
||||
{
|
||||
ParamPathInfo *ppi;
|
||||
ListCell *lc;
|
||||
|
||||
/* Unparameterized paths have no ParamPathInfo */
|
||||
if (bms_is_empty(required_outer))
|
||||
return NULL;
|
||||
|
||||
Assert(!bms_overlap(appendrel->relids, required_outer));
|
||||
|
||||
/* If we already have a PPI for this parameterization, just return it */
|
||||
foreach(lc, appendrel->ppilist)
|
||||
{
|
||||
ppi = (ParamPathInfo *) lfirst(lc);
|
||||
if (bms_equal(ppi->ppi_req_outer, required_outer))
|
||||
return ppi;
|
||||
}
|
||||
|
||||
/* Else build the ParamPathInfo */
|
||||
ppi = makeNode(ParamPathInfo);
|
||||
ppi->ppi_req_outer = required_outer;
|
||||
ppi->ppi_rows = 0;
|
||||
ppi->ppi_clauses = NIL;
|
||||
appendrel->ppilist = lappend(appendrel->ppilist, ppi);
|
||||
|
||||
return ppi;
|
||||
}
|
||||
|
Reference in New Issue
Block a user