mirror of
https://github.com/postgres/postgres.git
synced 2025-07-05 07:21:24 +03:00
Fix planner crash from pfree'ing a partial path that a GatherPath uses.
We mustn't run generate_gather_paths() during add_paths_to_joinrel(), because that function can be invoked multiple times for the same target joinrel. Not only is it wasteful to build GatherPaths repeatedly, but a later add_partial_path() could delete the partial path that a previously created GatherPath depends on. Instead establish the convention that we do generate_gather_paths() for a rel only just before set_cheapest(). The code was accidentally not broken for baserels, because as of today there never is more than one partial path for a baserel. But that assumption obviously has a pretty short half-life, so move the generate_gather_paths() calls for those cases as well. Also add some generic comments explaining how and why this all works. Per fuzz testing by Andreas Seltenreich. Report: <871t5pgwdt.fsf@credativ.de>
This commit is contained in:
@ -167,6 +167,16 @@ all the ways to produce the same set of joined base rels will share the
|
|||||||
same RelOptInfo, so the paths produced from different join combinations
|
same RelOptInfo, so the paths produced from different join combinations
|
||||||
that produce equivalent joinrels will compete in add_path().
|
that produce equivalent joinrels will compete in add_path().
|
||||||
|
|
||||||
|
The dynamic-programming approach has an important property that's not
|
||||||
|
immediately obvious: we will finish constructing all paths for a given
|
||||||
|
relation before we construct any paths for relations containing that rel.
|
||||||
|
This means that we can reliably identify the "cheapest path" for each rel
|
||||||
|
before higher-level relations need to know that. Also, we can safely
|
||||||
|
discard a path when we find that another path for the same rel is better,
|
||||||
|
without worrying that maybe there is already a reference to that path in
|
||||||
|
some higher-level join path. Without this, memory management for paths
|
||||||
|
would be much more complicated.
|
||||||
|
|
||||||
Once we have built the final join rel, we use either the cheapest path
|
Once we have built the final join rel, we use either the cheapest path
|
||||||
for it or the cheapest path with the desired ordering (if that's cheaper
|
for it or the cheapest path with the desired ordering (if that's cheaper
|
||||||
than applying a sort to the cheapest other path).
|
than applying a sort to the cheapest other path).
|
||||||
@ -321,8 +331,9 @@ set up for recursive handling of subqueries
|
|||||||
For each joinrel of the prior level, do make_rels_by_clause_joins()
|
For each joinrel of the prior level, do make_rels_by_clause_joins()
|
||||||
if it has join clauses, or make_rels_by_clauseless_joins() if not.
|
if it has join clauses, or make_rels_by_clauseless_joins() if not.
|
||||||
Also generate "bushy plan" joins between joinrels of lower levels.
|
Also generate "bushy plan" joins between joinrels of lower levels.
|
||||||
Back at standard_join_search(), apply set_cheapest() to extract the
|
Back at standard_join_search(), generate gather paths if needed for
|
||||||
cheapest path for each newly constructed joinrel.
|
each newly constructed joinrel, then apply set_cheapest() to extract
|
||||||
|
the cheapest path for it.
|
||||||
Loop back if this wasn't the top join level.
|
Loop back if this wasn't the top join level.
|
||||||
Back at grouping_planner:
|
Back at grouping_planner:
|
||||||
do grouping (GROUP BY) and aggregation
|
do grouping (GROUP BY) and aggregation
|
||||||
|
@ -266,6 +266,9 @@ merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump, bool force)
|
|||||||
/* Keep searching if join order is not valid */
|
/* Keep searching if join order is not valid */
|
||||||
if (joinrel)
|
if (joinrel)
|
||||||
{
|
{
|
||||||
|
/* Create GatherPaths for any useful partial paths for rel */
|
||||||
|
generate_gather_paths(root, joinrel);
|
||||||
|
|
||||||
/* Find and save the cheapest paths for this joinrel */
|
/* Find and save the cheapest paths for this joinrel */
|
||||||
set_cheapest(joinrel);
|
set_cheapest(joinrel);
|
||||||
|
|
||||||
|
@ -73,7 +73,7 @@ static void set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
|
|||||||
Index rti, RangeTblEntry *rte);
|
Index rti, RangeTblEntry *rte);
|
||||||
static void set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel,
|
static void set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel,
|
||||||
RangeTblEntry *rte);
|
RangeTblEntry *rte);
|
||||||
static void create_parallel_paths(PlannerInfo *root, RelOptInfo *rel);
|
static void create_plain_partial_paths(PlannerInfo *root, RelOptInfo *rel);
|
||||||
static void set_rel_consider_parallel(PlannerInfo *root, RelOptInfo *rel,
|
static void set_rel_consider_parallel(PlannerInfo *root, RelOptInfo *rel,
|
||||||
RangeTblEntry *rte);
|
RangeTblEntry *rte);
|
||||||
static bool function_rte_parallel_ok(RangeTblEntry *rte);
|
static bool function_rte_parallel_ok(RangeTblEntry *rte);
|
||||||
@ -447,6 +447,16 @@ set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If this is a baserel, consider gathering any partial paths we may have
|
||||||
|
* created for it. (If we tried to gather inheritance children, we could
|
||||||
|
* end up with a very large number of gather nodes, each trying to grab
|
||||||
|
* its own pool of workers, so don't do this for otherrels. Instead,
|
||||||
|
* we'll consider gathering partial paths for the parent appendrel.)
|
||||||
|
*/
|
||||||
|
if (rel->reloptkind == RELOPT_BASEREL)
|
||||||
|
generate_gather_paths(root, rel);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Allow a plugin to editorialize on the set of Paths for this base
|
* Allow a plugin to editorialize on the set of Paths for this base
|
||||||
* relation. It could add new paths (such as CustomPaths) by calling
|
* relation. It could add new paths (such as CustomPaths) by calling
|
||||||
@ -643,7 +653,7 @@ set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
|
|||||||
|
|
||||||
/* If appropriate, consider parallel sequential scan */
|
/* If appropriate, consider parallel sequential scan */
|
||||||
if (rel->consider_parallel && required_outer == NULL)
|
if (rel->consider_parallel && required_outer == NULL)
|
||||||
create_parallel_paths(root, rel);
|
create_plain_partial_paths(root, rel);
|
||||||
|
|
||||||
/* Consider index scans */
|
/* Consider index scans */
|
||||||
create_index_paths(root, rel);
|
create_index_paths(root, rel);
|
||||||
@ -653,11 +663,11 @@ set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* create_parallel_paths
|
* create_plain_partial_paths
|
||||||
* Build parallel access paths for a plain relation
|
* Build partial access paths for parallel scan of a plain relation
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
create_parallel_paths(PlannerInfo *root, RelOptInfo *rel)
|
create_plain_partial_paths(PlannerInfo *root, RelOptInfo *rel)
|
||||||
{
|
{
|
||||||
int parallel_degree = 1;
|
int parallel_degree = 1;
|
||||||
|
|
||||||
@ -712,16 +722,6 @@ create_parallel_paths(PlannerInfo *root, RelOptInfo *rel)
|
|||||||
|
|
||||||
/* Add an unordered partial path based on a parallel sequential scan. */
|
/* Add an unordered partial path based on a parallel sequential scan. */
|
||||||
add_partial_path(rel, create_seqscan_path(root, rel, NULL, parallel_degree));
|
add_partial_path(rel, create_seqscan_path(root, rel, NULL, parallel_degree));
|
||||||
|
|
||||||
/*
|
|
||||||
* If this is a baserel, consider gathering any partial paths we may have
|
|
||||||
* just created. If we gathered an inheritance child, we could end up
|
|
||||||
* with a very large number of gather nodes, each trying to grab its own
|
|
||||||
* pool of workers, so don't do this in that case. Instead, we'll
|
|
||||||
* consider gathering partial paths for the appendrel.
|
|
||||||
*/
|
|
||||||
if (rel->reloptkind == RELOPT_BASEREL)
|
|
||||||
generate_gather_paths(root, rel);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1262,9 +1262,6 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
|
|||||||
appendpath = create_append_path(rel, partial_subpaths, NULL,
|
appendpath = create_append_path(rel, partial_subpaths, NULL,
|
||||||
parallel_degree);
|
parallel_degree);
|
||||||
add_partial_path(rel, (Path *) appendpath);
|
add_partial_path(rel, (Path *) appendpath);
|
||||||
|
|
||||||
/* Consider gathering it. */
|
|
||||||
generate_gather_paths(root, rel);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1970,6 +1967,10 @@ set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
|
|||||||
* generate_gather_paths
|
* generate_gather_paths
|
||||||
* Generate parallel access paths for a relation by pushing a Gather on
|
* Generate parallel access paths for a relation by pushing a Gather on
|
||||||
* top of a partial path.
|
* top of a partial path.
|
||||||
|
*
|
||||||
|
* This must not be called until after we're done creating all partial paths
|
||||||
|
* for the specified relation. (Otherwise, add_partial_path might delete a
|
||||||
|
* path that some GatherPath has a reference to.)
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
generate_gather_paths(PlannerInfo *root, RelOptInfo *rel)
|
generate_gather_paths(PlannerInfo *root, RelOptInfo *rel)
|
||||||
@ -1983,7 +1984,9 @@ generate_gather_paths(PlannerInfo *root, RelOptInfo *rel)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* The output of Gather is currently always unsorted, so there's only one
|
* The output of Gather is currently always unsorted, so there's only one
|
||||||
* partial path of interest: the cheapest one.
|
* partial path of interest: the cheapest one. That will be the one at
|
||||||
|
* the front of partial_pathlist because of the way add_partial_path
|
||||||
|
* works.
|
||||||
*
|
*
|
||||||
* Eventually, we should have a Gather Merge operation that can merge
|
* Eventually, we should have a Gather Merge operation that can merge
|
||||||
* multiple tuple streams together while preserving their ordering. We
|
* multiple tuple streams together while preserving their ordering. We
|
||||||
@ -2148,12 +2151,19 @@ standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels)
|
|||||||
join_search_one_level(root, lev);
|
join_search_one_level(root, lev);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Do cleanup work on each just-processed rel.
|
* Run generate_gather_paths() for each just-processed joinrel. We
|
||||||
|
* could not do this earlier because both regular and partial paths
|
||||||
|
* can get added to a particular joinrel at multiple times within
|
||||||
|
* join_search_one_level. After that, we're done creating paths
|
||||||
|
* for the joinrel, so run set_cheapest().
|
||||||
*/
|
*/
|
||||||
foreach(lc, root->join_rel_level[lev])
|
foreach(lc, root->join_rel_level[lev])
|
||||||
{
|
{
|
||||||
rel = (RelOptInfo *) lfirst(lc);
|
rel = (RelOptInfo *) lfirst(lc);
|
||||||
|
|
||||||
|
/* Create GatherPaths for any useful partial paths for rel */
|
||||||
|
generate_gather_paths(root, rel);
|
||||||
|
|
||||||
/* Find and save the cheapest paths for this rel */
|
/* Find and save the cheapest paths for this rel */
|
||||||
set_cheapest(rel);
|
set_cheapest(rel);
|
||||||
|
|
||||||
|
@ -223,12 +223,7 @@ add_paths_to_joinrel(PlannerInfo *root,
|
|||||||
jointype, &extra);
|
jointype, &extra);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* 6. Consider gathering partial paths.
|
* 6. Finally, give extensions a chance to manipulate the path list.
|
||||||
*/
|
|
||||||
generate_gather_paths(root, joinrel);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* 7. Finally, give extensions a chance to manipulate the path list.
|
|
||||||
*/
|
*/
|
||||||
if (set_join_pathlist_hook)
|
if (set_join_pathlist_hook)
|
||||||
set_join_pathlist_hook(root, joinrel, outerrel, innerrel,
|
set_join_pathlist_hook(root, joinrel, outerrel, innerrel,
|
||||||
|
@ -394,8 +394,14 @@ set_cheapest(RelOptInfo *parent_rel)
|
|||||||
* but just recycling discarded Path nodes is a very useful savings in
|
* but just recycling discarded Path nodes is a very useful savings in
|
||||||
* a large join tree. We can recycle the List nodes of pathlist, too.
|
* a large join tree. We can recycle the List nodes of pathlist, too.
|
||||||
*
|
*
|
||||||
* BUT: we do not pfree IndexPath objects, since they may be referenced as
|
* As noted in optimizer/README, deleting a previously-accepted Path is
|
||||||
* children of BitmapHeapPaths as well as being paths in their own right.
|
* safe because we know that Paths of this rel cannot yet be referenced
|
||||||
|
* from any other rel, such as a higher-level join. However, in some cases
|
||||||
|
* it is possible that a Path is referenced by another Path for its own
|
||||||
|
* rel; we must not delete such a Path, even if it is dominated by the new
|
||||||
|
* Path. Currently this occurs only for IndexPath objects, which may be
|
||||||
|
* referenced as children of BitmapHeapPaths as well as being paths in
|
||||||
|
* their own right. Hence, we don't pfree IndexPaths when rejecting them.
|
||||||
*
|
*
|
||||||
* 'parent_rel' is the relation entry to which the path corresponds.
|
* 'parent_rel' is the relation entry to which the path corresponds.
|
||||||
* 'new_path' is a potential path for parent_rel.
|
* 'new_path' is a potential path for parent_rel.
|
||||||
@ -711,6 +717,10 @@ add_path_precheck(RelOptInfo *parent_rel,
|
|||||||
* parallel such that each worker will generate a subset of the path's
|
* parallel such that each worker will generate a subset of the path's
|
||||||
* overall result.
|
* overall result.
|
||||||
*
|
*
|
||||||
|
* As in add_path, the partial_pathlist is kept sorted with the cheapest
|
||||||
|
* total path in front. This is depended on by multiple places, which
|
||||||
|
* just take the front entry as the cheapest path without searching.
|
||||||
|
*
|
||||||
* We don't generate parameterized partial paths for several reasons. Most
|
* We don't generate parameterized partial paths for several reasons. Most
|
||||||
* importantly, they're not safe to execute, because there's nothing to
|
* importantly, they're not safe to execute, because there's nothing to
|
||||||
* make sure that a parallel scan within the parameterized portion of the
|
* make sure that a parallel scan within the parameterized portion of the
|
||||||
@ -721,8 +731,8 @@ add_path_precheck(RelOptInfo *parent_rel,
|
|||||||
* side of the plan will be small anyway. There could be rare cases where
|
* side of the plan will be small anyway. There could be rare cases where
|
||||||
* this wins big - e.g. if join order constraints put a 1-row relation on
|
* this wins big - e.g. if join order constraints put a 1-row relation on
|
||||||
* the outer side of the topmost join with a parameterized plan on the inner
|
* the outer side of the topmost join with a parameterized plan on the inner
|
||||||
* side - but we'll have to be content not to handle such cases until somebody
|
* side - but we'll have to be content not to handle such cases until
|
||||||
* builds an executor infrastructure that can cope with them.
|
* somebody builds an executor infrastructure that can cope with them.
|
||||||
*
|
*
|
||||||
* Because we don't consider parameterized paths here, we also don't
|
* Because we don't consider parameterized paths here, we also don't
|
||||||
* need to consider the row counts as a measure of quality: every path will
|
* need to consider the row counts as a measure of quality: every path will
|
||||||
@ -730,6 +740,14 @@ add_path_precheck(RelOptInfo *parent_rel,
|
|||||||
* costs: parallelism is only used for plans that will be run to completion.
|
* costs: parallelism is only used for plans that will be run to completion.
|
||||||
* Therefore, this routine is much simpler than add_path: it needs to
|
* Therefore, this routine is much simpler than add_path: it needs to
|
||||||
* consider only pathkeys and total cost.
|
* consider only pathkeys and total cost.
|
||||||
|
*
|
||||||
|
* As with add_path, we pfree paths that are found to be dominated by
|
||||||
|
* another partial path; this requires that there be no other references to
|
||||||
|
* such paths yet. Hence, GatherPaths must not be created for a rel until
|
||||||
|
* we're done creating all partial paths for it. We do not currently build
|
||||||
|
* partial indexscan paths, so there is no need for an exception for
|
||||||
|
* IndexPaths here; for safety, we instead Assert that a path to be freed
|
||||||
|
* isn't an IndexPath.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
add_partial_path(RelOptInfo *parent_rel, Path *new_path)
|
add_partial_path(RelOptInfo *parent_rel, Path *new_path)
|
||||||
@ -808,7 +826,7 @@ add_partial_path(RelOptInfo *parent_rel, Path *new_path)
|
|||||||
{
|
{
|
||||||
parent_rel->partial_pathlist =
|
parent_rel->partial_pathlist =
|
||||||
list_delete_cell(parent_rel->partial_pathlist, p1, p1_prev);
|
list_delete_cell(parent_rel->partial_pathlist, p1, p1_prev);
|
||||||
/* add_path has a special case for IndexPath; we don't need it */
|
/* we should not see IndexPaths here, so always safe to delete */
|
||||||
Assert(!IsA(old_path, IndexPath));
|
Assert(!IsA(old_path, IndexPath));
|
||||||
pfree(old_path);
|
pfree(old_path);
|
||||||
/* p1_prev does not advance */
|
/* p1_prev does not advance */
|
||||||
@ -842,7 +860,7 @@ add_partial_path(RelOptInfo *parent_rel, Path *new_path)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/* add_path has a special case for IndexPath; we don't need it */
|
/* we should not see IndexPaths here, so always safe to delete */
|
||||||
Assert(!IsA(new_path, IndexPath));
|
Assert(!IsA(new_path, IndexPath));
|
||||||
/* Reject and recycle the new path */
|
/* Reject and recycle the new path */
|
||||||
pfree(new_path);
|
pfree(new_path);
|
||||||
|
Reference in New Issue
Block a user