mirror of
https://github.com/postgres/postgres.git
synced 2025-07-05 07:21:24 +03:00
Fix planner crash from pfree'ing a partial path that a GatherPath uses.
We mustn't run generate_gather_paths() during add_paths_to_joinrel(), because that function can be invoked multiple times for the same target joinrel. Not only is it wasteful to build GatherPaths repeatedly, but a later add_partial_path() could delete the partial path that a previously created GatherPath depends on. Instead establish the convention that we do generate_gather_paths() for a rel only just before set_cheapest(). The code was accidentally not broken for baserels, because as of today there never is more than one partial path for a baserel. But that assumption obviously has a pretty short half-life, so move the generate_gather_paths() calls for those cases as well. Also add some generic comments explaining how and why this all works. Per fuzz testing by Andreas Seltenreich. Report: <871t5pgwdt.fsf@credativ.de>
This commit is contained in:
@ -167,6 +167,16 @@ all the ways to produce the same set of joined base rels will share the
|
||||
same RelOptInfo, so the paths produced from different join combinations
|
||||
that produce equivalent joinrels will compete in add_path().
|
||||
|
||||
The dynamic-programming approach has an important property that's not
|
||||
immediately obvious: we will finish constructing all paths for a given
|
||||
relation before we construct any paths for relations containing that rel.
|
||||
This means that we can reliably identify the "cheapest path" for each rel
|
||||
before higher-level relations need to know that. Also, we can safely
|
||||
discard a path when we find that another path for the same rel is better,
|
||||
without worrying that maybe there is already a reference to that path in
|
||||
some higher-level join path. Without this, memory management for paths
|
||||
would be much more complicated.
|
||||
|
||||
Once we have built the final join rel, we use either the cheapest path
|
||||
for it or the cheapest path with the desired ordering (if that's cheaper
|
||||
than applying a sort to the cheapest other path).
|
||||
@ -321,8 +331,9 @@ set up for recursive handling of subqueries
|
||||
For each joinrel of the prior level, do make_rels_by_clause_joins()
|
||||
if it has join clauses, or make_rels_by_clauseless_joins() if not.
|
||||
Also generate "bushy plan" joins between joinrels of lower levels.
|
||||
Back at standard_join_search(), apply set_cheapest() to extract the
|
||||
cheapest path for each newly constructed joinrel.
|
||||
Back at standard_join_search(), generate gather paths if needed for
|
||||
each newly constructed joinrel, then apply set_cheapest() to extract
|
||||
the cheapest path for it.
|
||||
Loop back if this wasn't the top join level.
|
||||
Back at grouping_planner:
|
||||
do grouping (GROUP BY) and aggregation
|
||||
|
@ -266,6 +266,9 @@ merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump, bool force)
|
||||
/* Keep searching if join order is not valid */
|
||||
if (joinrel)
|
||||
{
|
||||
/* Create GatherPaths for any useful partial paths for rel */
|
||||
generate_gather_paths(root, joinrel);
|
||||
|
||||
/* Find and save the cheapest paths for this joinrel */
|
||||
set_cheapest(joinrel);
|
||||
|
||||
|
@ -73,7 +73,7 @@ static void set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
|
||||
Index rti, RangeTblEntry *rte);
|
||||
static void set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel,
|
||||
RangeTblEntry *rte);
|
||||
static void create_parallel_paths(PlannerInfo *root, RelOptInfo *rel);
|
||||
static void create_plain_partial_paths(PlannerInfo *root, RelOptInfo *rel);
|
||||
static void set_rel_consider_parallel(PlannerInfo *root, RelOptInfo *rel,
|
||||
RangeTblEntry *rte);
|
||||
static bool function_rte_parallel_ok(RangeTblEntry *rte);
|
||||
@ -447,6 +447,16 @@ set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If this is a baserel, consider gathering any partial paths we may have
|
||||
* created for it. (If we tried to gather inheritance children, we could
|
||||
* end up with a very large number of gather nodes, each trying to grab
|
||||
* its own pool of workers, so don't do this for otherrels. Instead,
|
||||
* we'll consider gathering partial paths for the parent appendrel.)
|
||||
*/
|
||||
if (rel->reloptkind == RELOPT_BASEREL)
|
||||
generate_gather_paths(root, rel);
|
||||
|
||||
/*
|
||||
* Allow a plugin to editorialize on the set of Paths for this base
|
||||
* relation. It could add new paths (such as CustomPaths) by calling
|
||||
@ -643,7 +653,7 @@ set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
|
||||
|
||||
/* If appropriate, consider parallel sequential scan */
|
||||
if (rel->consider_parallel && required_outer == NULL)
|
||||
create_parallel_paths(root, rel);
|
||||
create_plain_partial_paths(root, rel);
|
||||
|
||||
/* Consider index scans */
|
||||
create_index_paths(root, rel);
|
||||
@ -653,11 +663,11 @@ set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
|
||||
}
|
||||
|
||||
/*
|
||||
* create_parallel_paths
|
||||
* Build parallel access paths for a plain relation
|
||||
* create_plain_partial_paths
|
||||
* Build partial access paths for parallel scan of a plain relation
|
||||
*/
|
||||
static void
|
||||
create_parallel_paths(PlannerInfo *root, RelOptInfo *rel)
|
||||
create_plain_partial_paths(PlannerInfo *root, RelOptInfo *rel)
|
||||
{
|
||||
int parallel_degree = 1;
|
||||
|
||||
@ -712,16 +722,6 @@ create_parallel_paths(PlannerInfo *root, RelOptInfo *rel)
|
||||
|
||||
/* Add an unordered partial path based on a parallel sequential scan. */
|
||||
add_partial_path(rel, create_seqscan_path(root, rel, NULL, parallel_degree));
|
||||
|
||||
/*
|
||||
* If this is a baserel, consider gathering any partial paths we may have
|
||||
* just created. If we gathered an inheritance child, we could end up
|
||||
* with a very large number of gather nodes, each trying to grab its own
|
||||
* pool of workers, so don't do this in that case. Instead, we'll
|
||||
* consider gathering partial paths for the appendrel.
|
||||
*/
|
||||
if (rel->reloptkind == RELOPT_BASEREL)
|
||||
generate_gather_paths(root, rel);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1262,9 +1262,6 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
|
||||
appendpath = create_append_path(rel, partial_subpaths, NULL,
|
||||
parallel_degree);
|
||||
add_partial_path(rel, (Path *) appendpath);
|
||||
|
||||
/* Consider gathering it. */
|
||||
generate_gather_paths(root, rel);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1970,6 +1967,10 @@ set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
|
||||
* generate_gather_paths
|
||||
* Generate parallel access paths for a relation by pushing a Gather on
|
||||
* top of a partial path.
|
||||
*
|
||||
* This must not be called until after we're done creating all partial paths
|
||||
* for the specified relation. (Otherwise, add_partial_path might delete a
|
||||
* path that some GatherPath has a reference to.)
|
||||
*/
|
||||
void
|
||||
generate_gather_paths(PlannerInfo *root, RelOptInfo *rel)
|
||||
@ -1983,7 +1984,9 @@ generate_gather_paths(PlannerInfo *root, RelOptInfo *rel)
|
||||
|
||||
/*
|
||||
* The output of Gather is currently always unsorted, so there's only one
|
||||
* partial path of interest: the cheapest one.
|
||||
* partial path of interest: the cheapest one. That will be the one at
|
||||
* the front of partial_pathlist because of the way add_partial_path
|
||||
* works.
|
||||
*
|
||||
* Eventually, we should have a Gather Merge operation that can merge
|
||||
* multiple tuple streams together while preserving their ordering. We
|
||||
@ -2148,12 +2151,19 @@ standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels)
|
||||
join_search_one_level(root, lev);
|
||||
|
||||
/*
|
||||
* Do cleanup work on each just-processed rel.
|
||||
* Run generate_gather_paths() for each just-processed joinrel. We
|
||||
* could not do this earlier because both regular and partial paths
|
||||
* can get added to a particular joinrel at multiple times within
|
||||
* join_search_one_level. After that, we're done creating paths
|
||||
* for the joinrel, so run set_cheapest().
|
||||
*/
|
||||
foreach(lc, root->join_rel_level[lev])
|
||||
{
|
||||
rel = (RelOptInfo *) lfirst(lc);
|
||||
|
||||
/* Create GatherPaths for any useful partial paths for rel */
|
||||
generate_gather_paths(root, rel);
|
||||
|
||||
/* Find and save the cheapest paths for this rel */
|
||||
set_cheapest(rel);
|
||||
|
||||
|
@ -223,12 +223,7 @@ add_paths_to_joinrel(PlannerInfo *root,
|
||||
jointype, &extra);
|
||||
|
||||
/*
|
||||
* 6. Consider gathering partial paths.
|
||||
*/
|
||||
generate_gather_paths(root, joinrel);
|
||||
|
||||
/*
|
||||
* 7. Finally, give extensions a chance to manipulate the path list.
|
||||
* 6. Finally, give extensions a chance to manipulate the path list.
|
||||
*/
|
||||
if (set_join_pathlist_hook)
|
||||
set_join_pathlist_hook(root, joinrel, outerrel, innerrel,
|
||||
|
@ -394,8 +394,14 @@ set_cheapest(RelOptInfo *parent_rel)
|
||||
* but just recycling discarded Path nodes is a very useful savings in
|
||||
* a large join tree. We can recycle the List nodes of pathlist, too.
|
||||
*
|
||||
* BUT: we do not pfree IndexPath objects, since they may be referenced as
|
||||
* children of BitmapHeapPaths as well as being paths in their own right.
|
||||
* As noted in optimizer/README, deleting a previously-accepted Path is
|
||||
* safe because we know that Paths of this rel cannot yet be referenced
|
||||
* from any other rel, such as a higher-level join. However, in some cases
|
||||
* it is possible that a Path is referenced by another Path for its own
|
||||
* rel; we must not delete such a Path, even if it is dominated by the new
|
||||
* Path. Currently this occurs only for IndexPath objects, which may be
|
||||
* referenced as children of BitmapHeapPaths as well as being paths in
|
||||
* their own right. Hence, we don't pfree IndexPaths when rejecting them.
|
||||
*
|
||||
* 'parent_rel' is the relation entry to which the path corresponds.
|
||||
* 'new_path' is a potential path for parent_rel.
|
||||
@ -711,6 +717,10 @@ add_path_precheck(RelOptInfo *parent_rel,
|
||||
* parallel such that each worker will generate a subset of the path's
|
||||
* overall result.
|
||||
*
|
||||
* As in add_path, the partial_pathlist is kept sorted with the cheapest
|
||||
* total path in front. This is depended on by multiple places, which
|
||||
* just take the front entry as the cheapest path without searching.
|
||||
*
|
||||
* We don't generate parameterized partial paths for several reasons. Most
|
||||
* importantly, they're not safe to execute, because there's nothing to
|
||||
* make sure that a parallel scan within the parameterized portion of the
|
||||
@ -721,8 +731,8 @@ add_path_precheck(RelOptInfo *parent_rel,
|
||||
* side of the plan will be small anyway. There could be rare cases where
|
||||
* this wins big - e.g. if join order constraints put a 1-row relation on
|
||||
* the outer side of the topmost join with a parameterized plan on the inner
|
||||
* side - but we'll have to be content not to handle such cases until somebody
|
||||
* builds an executor infrastructure that can cope with them.
|
||||
* side - but we'll have to be content not to handle such cases until
|
||||
* somebody builds an executor infrastructure that can cope with them.
|
||||
*
|
||||
* Because we don't consider parameterized paths here, we also don't
|
||||
* need to consider the row counts as a measure of quality: every path will
|
||||
@ -730,6 +740,14 @@ add_path_precheck(RelOptInfo *parent_rel,
|
||||
* costs: parallelism is only used for plans that will be run to completion.
|
||||
* Therefore, this routine is much simpler than add_path: it needs to
|
||||
* consider only pathkeys and total cost.
|
||||
*
|
||||
* As with add_path, we pfree paths that are found to be dominated by
|
||||
* another partial path; this requires that there be no other references to
|
||||
* such paths yet. Hence, GatherPaths must not be created for a rel until
|
||||
* we're done creating all partial paths for it. We do not currently build
|
||||
* partial indexscan paths, so there is no need for an exception for
|
||||
* IndexPaths here; for safety, we instead Assert that a path to be freed
|
||||
* isn't an IndexPath.
|
||||
*/
|
||||
void
|
||||
add_partial_path(RelOptInfo *parent_rel, Path *new_path)
|
||||
@ -808,7 +826,7 @@ add_partial_path(RelOptInfo *parent_rel, Path *new_path)
|
||||
{
|
||||
parent_rel->partial_pathlist =
|
||||
list_delete_cell(parent_rel->partial_pathlist, p1, p1_prev);
|
||||
/* add_path has a special case for IndexPath; we don't need it */
|
||||
/* we should not see IndexPaths here, so always safe to delete */
|
||||
Assert(!IsA(old_path, IndexPath));
|
||||
pfree(old_path);
|
||||
/* p1_prev does not advance */
|
||||
@ -842,7 +860,7 @@ add_partial_path(RelOptInfo *parent_rel, Path *new_path)
|
||||
}
|
||||
else
|
||||
{
|
||||
/* add_path has a special case for IndexPath; we don't need it */
|
||||
/* we should not see IndexPaths here, so always safe to delete */
|
||||
Assert(!IsA(new_path, IndexPath));
|
||||
/* Reject and recycle the new path */
|
||||
pfree(new_path);
|
||||
|
Reference in New Issue
Block a user