1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-11 10:01:57 +03:00

Support parallel joins, and make related improvements.

The core innovation of this patch is the introduction of the concept
of a partial path; that is, a path which if executed in parallel will
generate a subset of the output rows in each process.  Gathering a
partial path produces an ordinary (complete) path.  This allows us to
generate paths for parallel joins by joining a partial path for one
side (which at the baserel level is currently always a Partial Seq
Scan) to an ordinary path on the other side.  This is subject to
various restrictions at present, especially that this strategy seems
unlikely to be sensible for merge joins, so only nested loops and
hash joins paths are generated.

This also allows an Append node to be pushed below a Gather node in
the case of a partitioned table.

Testing revealed that early versions of this patch made poor decisions
in some cases, which turned out to be caused by the fact that the
original cost model for Parallel Seq Scan wasn't very good.  So this
patch tries to make some modest improvements in that area.

There is much more to be done in the area of generating good parallel
plans in all cases, but this seems like a useful step forward.

Patch by me, reviewed by Dilip Kumar and Amit Kapila.
This commit is contained in:
Robert Haas
2016-01-20 14:29:22 -05:00
parent a7de3dc5c3
commit 45be99f8cd
15 changed files with 875 additions and 119 deletions

View File

@ -34,6 +34,12 @@ static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel,
RelOptInfo *outerrel, RelOptInfo *innerrel,
JoinType jointype, JoinPathExtraData *extra);
static void consider_parallel_nestloop(PlannerInfo *root,
RelOptInfo *joinrel,
RelOptInfo *outerrel,
RelOptInfo *innerrel,
JoinType jointype,
JoinPathExtraData *extra);
static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
RelOptInfo *outerrel, RelOptInfo *innerrel,
JoinType jointype, JoinPathExtraData *extra);
@ -216,7 +222,12 @@ add_paths_to_joinrel(PlannerInfo *root,
jointype, &extra);
/*
* 6. Finally, give extensions a chance to manipulate the path list.
* 6. Consider gathering partial paths.
*/
generate_gather_paths(root, joinrel);
/*
* 7. Finally, give extensions a chance to manipulate the path list.
*/
if (set_join_pathlist_hook)
set_join_pathlist_hook(root, joinrel, outerrel, innerrel,
@ -329,6 +340,62 @@ try_nestloop_path(PlannerInfo *root,
}
}
/*
* try_partial_nestloop_path
* Consider a partial nestloop join path; if it appears useful, push it into
* the joinrel's partial_pathlist via add_partial_path().
*/
static void
try_partial_nestloop_path(PlannerInfo *root,
RelOptInfo *joinrel,
Path *outer_path,
Path *inner_path,
List *pathkeys,
JoinType jointype,
JoinPathExtraData *extra)
{
JoinCostWorkspace workspace;
/*
* If the inner path is parameterized, the parameterization must be fully
* satisfied by the proposed outer path. Parameterized partial paths are
* not supported. The caller should already have verified that no
* extra_lateral_rels are required here.
*/
Assert(bms_is_empty(joinrel->lateral_relids));
if (inner_path->param_info != NULL)
{
Relids inner_paramrels = inner_path->param_info->ppi_req_outer;
if (!bms_is_subset(inner_paramrels, outer_path->parent->relids))
return;
}
/*
* Before creating a path, get a quick lower bound on what it is likely
* to cost. Bail out right away if it looks terrible.
*/
initial_cost_nestloop(root, &workspace, jointype,
outer_path, inner_path,
extra->sjinfo, &extra->semifactors);
if (!add_partial_path_precheck(joinrel, workspace.total_cost, pathkeys))
return;
/* Might be good enough to be worth trying, so let's try it. */
add_partial_path(joinrel, (Path *)
create_nestloop_path(root,
joinrel,
jointype,
&workspace,
extra->sjinfo,
&extra->semifactors,
outer_path,
inner_path,
extra->restrictlist,
pathkeys,
NULL));
}
/*
* try_mergejoin_path
* Consider a merge join path; if it appears useful, push it into
@ -471,6 +538,62 @@ try_hashjoin_path(PlannerInfo *root,
}
}
/*
* try_partial_hashjoin_path
* Consider a partial hashjoin join path; if it appears useful, push it into
* the joinrel's partial_pathlist via add_partial_path().
*/
static void
try_partial_hashjoin_path(PlannerInfo *root,
RelOptInfo *joinrel,
Path *outer_path,
Path *inner_path,
List *hashclauses,
JoinType jointype,
JoinPathExtraData *extra)
{
JoinCostWorkspace workspace;
/*
* If the inner path is parameterized, the parameterization must be fully
* satisfied by the proposed outer path. Parameterized partial paths are
* not supported. The caller should already have verified that no
* extra_lateral_rels are required here.
*/
Assert(bms_is_empty(joinrel->lateral_relids));
if (inner_path->param_info != NULL)
{
Relids inner_paramrels = inner_path->param_info->ppi_req_outer;
if (!bms_is_empty(inner_paramrels))
return;
}
/*
* Before creating a path, get a quick lower bound on what it is likely
* to cost. Bail out right away if it looks terrible.
*/
initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
outer_path, inner_path,
extra->sjinfo, &extra->semifactors);
if (!add_partial_path_precheck(joinrel, workspace.total_cost, NIL))
return;
/* Might be good enough to be worth trying, so let's try it. */
add_partial_path(joinrel, (Path *)
create_hashjoin_path(root,
joinrel,
jointype,
&workspace,
extra->sjinfo,
&extra->semifactors,
outer_path,
inner_path,
extra->restrictlist,
NULL,
hashclauses));
}
/*
* clause_sides_match_join
* Determine whether a join clause is of the right form to use in this join.
@ -1063,6 +1186,85 @@ match_unsorted_outer(PlannerInfo *root,
break;
}
}
/*
* If the joinrel is parallel-safe and the join type supports nested loops,
* we may be able to consider a partial nestloop plan. However, we can't
* handle JOIN_UNIQUE_OUTER, because the outer path will be partial, and
* therefore we won't be able to properly guarantee uniqueness. Nor can
* we handle extra_lateral_rels, since partial paths must not be
* parameterized.
*/
if (joinrel->consider_parallel && nestjoinOK &&
save_jointype != JOIN_UNIQUE_OUTER &&
bms_is_empty(joinrel->lateral_relids))
consider_parallel_nestloop(root, joinrel, outerrel, innerrel,
save_jointype, extra);
}
/*
* consider_parallel_nestloop
* Try to build partial paths for a joinrel by joining a partial path for the
* outer relation to a complete path for the inner relation.
*
* 'joinrel' is the join relation
* 'outerrel' is the outer join relation
* 'innerrel' is the inner join relation
* 'jointype' is the type of join to do
* 'extra' contains additional input values
*/
static void
consider_parallel_nestloop(PlannerInfo *root,
RelOptInfo *joinrel,
RelOptInfo *outerrel,
RelOptInfo *innerrel,
JoinType jointype,
JoinPathExtraData *extra)
{
ListCell *lc1;
foreach(lc1, outerrel->partial_pathlist)
{
Path *outerpath = (Path *) lfirst(lc1);
List *pathkeys;
ListCell *lc2;
/* Figure out what useful ordering any paths we create will have. */
pathkeys = build_join_pathkeys(root, joinrel, jointype,
outerpath->pathkeys);
/*
* Try the cheapest parameterized paths; only those which will
* produce an unparameterized path when joined to this outerrel
* will survive try_partial_nestloop_path. The cheapest
* unparameterized path is also in this list.
*/
foreach(lc2, innerrel->cheapest_parameterized_paths)
{
Path *innerpath = (Path *) lfirst(lc2);
/* Can't join to an inner path that is not parallel-safe */
if (!innerpath->parallel_safe)
continue;
/*
* Like match_unsorted_outer, we only consider a single nestloop
* path when the jointype is JOIN_UNIQUE_INNER. But we have to scan
* cheapest_parameterized_paths to find the one we want to consider,
* because cheapest_total_path might not be parallel-safe.
*/
if (jointype == JOIN_UNIQUE_INNER)
{
if (!bms_is_empty(PATH_REQ_OUTER(innerpath)))
continue;
innerpath = (Path *) create_unique_path(root, innerrel,
innerpath, extra->sjinfo);
}
try_partial_nestloop_path(root, joinrel, outerpath, innerpath,
pathkeys, jointype, extra);
}
}
}
/*
@ -1240,6 +1442,55 @@ hash_inner_and_outer(PlannerInfo *root,
}
}
}
/*
* If the joinrel is parallel-safe, we may be able to consider a
* partial hash join. However, we can't handle JOIN_UNIQUE_OUTER,
* because the outer path will be partial, and therefore we won't be
* able to properly guarantee uniqueness. Also, the resulting path
* must not be parameterized.
*/
if (joinrel->consider_parallel && jointype != JOIN_UNIQUE_OUTER &&
outerrel->partial_pathlist != NIL &&
bms_is_empty(joinrel->lateral_relids))
{
Path *cheapest_partial_outer;
Path *cheapest_safe_inner = NULL;
cheapest_partial_outer =
(Path *) linitial(outerrel->partial_pathlist);
/*
* Normally, given that the joinrel is parallel-safe, the cheapest
* total inner path will also be parallel-safe, but if not, we'll
* have to search cheapest_parameterized_paths for the cheapest
* unparameterized inner path.
*/
if (cheapest_total_inner->parallel_safe)
cheapest_safe_inner = cheapest_total_inner;
else
{
ListCell *lc;
foreach(lc, innerrel->cheapest_parameterized_paths)
{
Path *innerpath = (Path *) lfirst(lc);
if (innerpath->parallel_safe &&
bms_is_empty(PATH_REQ_OUTER(innerpath)))
{
cheapest_safe_inner = innerpath;
break;
}
}
}
if (cheapest_safe_inner != NULL)
try_partial_hashjoin_path(root, joinrel,
cheapest_partial_outer,
cheapest_safe_inner,
hashclauses, jointype, extra);
}
}
}