diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index 8bb54733303..11f02dd8da8 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -28,6 +28,16 @@ set_join_pathlist_hook_type set_join_pathlist_hook = NULL; #define PATH_PARAM_BY_REL(path, rel) \ ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), (rel)->relids)) +static void try_partial_mergejoin_path(PlannerInfo *root, + RelOptInfo *joinrel, + Path *outer_path, + Path *inner_path, + List *pathkeys, + List *mergeclauses, + List *outersortkeys, + List *innersortkeys, + JoinType jointype, + JoinPathExtraData *extra); static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, JoinType jointype, JoinPathExtraData *extra); @@ -40,6 +50,13 @@ static void consider_parallel_nestloop(PlannerInfo *root, RelOptInfo *innerrel, JoinType jointype, JoinPathExtraData *extra); +static void consider_parallel_mergejoin(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + JoinType jointype, + JoinPathExtraData *extra, + Path *inner_cheapest_total); static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, JoinType jointype, JoinPathExtraData *extra); @@ -58,7 +75,8 @@ static void generate_mergejoin_paths(PlannerInfo *root, JoinPathExtraData *extra, bool useallclauses, Path *inner_cheapest_total, - List *merge_pathkeys); + List *merge_pathkeys, + bool is_partial); /* @@ -416,11 +434,27 @@ try_mergejoin_path(PlannerInfo *root, List *outersortkeys, List *innersortkeys, JoinType jointype, - JoinPathExtraData *extra) + JoinPathExtraData *extra, + bool is_partial) { Relids required_outer; JoinCostWorkspace workspace; + if (is_partial) + { + try_partial_mergejoin_path(root, + joinrel, + outer_path, + inner_path, + pathkeys, + mergeclauses, + outersortkeys, + innersortkeys, + jointype, + extra); + return; + } + /* * Check to see if proposed path is still parameterized, and reject if the * parameterization wouldn't be sensible. @@ -480,6 +514,76 @@ try_mergejoin_path(PlannerInfo *root, } } +/* + * try_partial_mergejoin_path + * Consider a partial merge join path; if it appears useful, push it into + * the joinrel's pathlist via add_partial_path(). + */ +static void +try_partial_mergejoin_path(PlannerInfo *root, + RelOptInfo *joinrel, + Path *outer_path, + Path *inner_path, + List *pathkeys, + List *mergeclauses, + List *outersortkeys, + List *innersortkeys, + JoinType jointype, + JoinPathExtraData *extra) +{ + JoinCostWorkspace workspace; + + /* + * See comments in try_partial_hashjoin_path(). + */ + Assert(bms_is_empty(joinrel->lateral_relids)); + if (inner_path->param_info != NULL) + { + Relids inner_paramrels = inner_path->param_info->ppi_req_outer; + + if (!bms_is_empty(inner_paramrels)) + return; + } + + /* + * If the given paths are already well enough ordered, we can skip doing + * an explicit sort. + */ + if (outersortkeys && + pathkeys_contained_in(outersortkeys, outer_path->pathkeys)) + outersortkeys = NIL; + if (innersortkeys && + pathkeys_contained_in(innersortkeys, inner_path->pathkeys)) + innersortkeys = NIL; + + /* + * See comments in try_partial_nestloop_path(). + */ + initial_cost_mergejoin(root, &workspace, jointype, mergeclauses, + outer_path, inner_path, + outersortkeys, innersortkeys, + extra->sjinfo); + + if (!add_partial_path_precheck(joinrel, workspace.total_cost, pathkeys)) + return; + + /* Might be good enough to be worth trying, so let's try it. */ + add_partial_path(joinrel, (Path *) + create_mergejoin_path(root, + joinrel, + jointype, + &workspace, + extra->sjinfo, + outer_path, + inner_path, + extra->restrictlist, + pathkeys, + NULL, + mergeclauses, + outersortkeys, + innersortkeys)); +} + /* * try_hashjoin_path * Consider a hash join path; if it appears useful, push it into @@ -649,8 +753,11 @@ sort_inner_and_outer(PlannerInfo *root, JoinType jointype, JoinPathExtraData *extra) { + JoinType save_jointype = jointype; Path *outer_path; Path *inner_path; + Path *cheapest_partial_outer; + Path *cheapest_safe_inner = NULL; List *all_pathkeys; ListCell *l; @@ -699,6 +806,30 @@ sort_inner_and_outer(PlannerInfo *root, jointype = JOIN_INNER; } + /* + * If the joinrel is parallel-safe, we may be able to consider a partial + * merge join. However, we can't handle JOIN_UNIQUE_OUTER, because the + * outer path will be partial, and therefore we won't be able to properly + * guarantee uniqueness. Similarly, we can't handle JOIN_FULL and + * JOIN_RIGHT, because they can produce false null extended rows. Also, + * the resulting path must not be parameterized. + */ + if (joinrel->consider_parallel && + save_jointype != JOIN_UNIQUE_OUTER && + save_jointype != JOIN_FULL && + save_jointype != JOIN_RIGHT && + outerrel->partial_pathlist != NIL && + bms_is_empty(joinrel->lateral_relids)) + { + cheapest_partial_outer = (Path *) linitial(outerrel->partial_pathlist); + + if (inner_path->parallel_safe) + cheapest_safe_inner = inner_path; + else if (save_jointype != JOIN_UNIQUE_INNER) + cheapest_safe_inner = + get_cheapest_parallel_safe_total_inner(innerrel->pathlist); + } + /* * Each possible ordering of the available mergejoin clauses will generate * a differently-sorted result path at essentially the same cost. We have @@ -781,7 +912,24 @@ sort_inner_and_outer(PlannerInfo *root, outerkeys, innerkeys, jointype, - extra); + extra, + false); + + /* + * If we have partial outer and parallel safe inner path then try + * partial mergejoin path. + */ + if (cheapest_partial_outer && cheapest_safe_inner) + try_partial_mergejoin_path(root, + joinrel, + cheapest_partial_outer, + cheapest_safe_inner, + merge_pathkeys, + cur_mergeclauses, + outerkeys, + innerkeys, + jointype, + extra); } } @@ -808,7 +956,8 @@ generate_mergejoin_paths(PlannerInfo *root, JoinPathExtraData *extra, bool useallclauses, Path *inner_cheapest_total, - List *merge_pathkeys) + List *merge_pathkeys, + bool is_partial) { List *mergeclauses; List *innersortkeys; @@ -868,7 +1017,8 @@ generate_mergejoin_paths(PlannerInfo *root, NIL, innersortkeys, jointype, - extra); + extra, + is_partial); /* Can't do anything else if inner path needs to be unique'd */ if (save_jointype == JOIN_UNIQUE_INNER) @@ -937,7 +1087,7 @@ generate_mergejoin_paths(PlannerInfo *root, trialsortkeys, NULL, TOTAL_COST, - false); + is_partial); if (innerpath != NULL && (cheapest_total_inner == NULL || compare_path_costs(innerpath, cheapest_total_inner, @@ -965,7 +1115,8 @@ generate_mergejoin_paths(PlannerInfo *root, NIL, NIL, jointype, - extra); + extra, + is_partial); cheapest_total_inner = innerpath; } /* Same on the basis of cheapest startup cost ... */ @@ -973,7 +1124,7 @@ generate_mergejoin_paths(PlannerInfo *root, trialsortkeys, NULL, STARTUP_COST, - false); + is_partial); if (innerpath != NULL && (cheapest_startup_inner == NULL || compare_path_costs(innerpath, cheapest_startup_inner, @@ -1009,7 +1160,8 @@ generate_mergejoin_paths(PlannerInfo *root, NIL, NIL, jointype, - extra); + extra, + is_partial); } cheapest_startup_inner = innerpath; } @@ -1221,22 +1373,91 @@ match_unsorted_outer(PlannerInfo *root, /* Generate merge join paths */ generate_mergejoin_paths(root, joinrel, innerrel, outerpath, save_jointype, extra, useallclauses, - inner_cheapest_total, merge_pathkeys); + inner_cheapest_total, merge_pathkeys, + false); } /* - * If the joinrel is parallel-safe and the join type supports nested - * loops, we may be able to consider a partial nestloop plan. However, we - * can't handle JOIN_UNIQUE_OUTER, because the outer path will be partial, - * and therefore we won't be able to properly guarantee uniqueness. Nor - * can we handle extra_lateral_rels, since partial paths must not be - * parameterized. + * Consider partial nestloop and mergejoin plan if outerrel has any + * partial path and the joinrel is parallel-safe. However, we can't + * handle JOIN_UNIQUE_OUTER, because the outer path will be partial, and + * therefore we won't be able to properly guarantee uniqueness. Nor can + * we handle extra_lateral_rels, since partial paths must not be + * parameterized. Similarly, we can't handle JOIN_FULL and JOIN_RIGHT, + * because they can produce false null extended rows. */ - if (joinrel->consider_parallel && nestjoinOK && + if (joinrel->consider_parallel && save_jointype != JOIN_UNIQUE_OUTER && + save_jointype != JOIN_FULL && + save_jointype != JOIN_RIGHT && + outerrel->partial_pathlist != NIL && bms_is_empty(joinrel->lateral_relids)) - consider_parallel_nestloop(root, joinrel, outerrel, innerrel, - save_jointype, extra); + { + if (nestjoinOK) + consider_parallel_nestloop(root, joinrel, outerrel, innerrel, + save_jointype, extra); + + /* + * If inner_cheapest_total is NULL or non parallel-safe then find the + * cheapest total parallel safe path. If doing JOIN_UNIQUE_INNER, we + * can't use any alternative inner path. + */ + if (inner_cheapest_total == NULL || + !inner_cheapest_total->parallel_safe) + { + if (save_jointype == JOIN_UNIQUE_INNER) + return; + + inner_cheapest_total = get_cheapest_parallel_safe_total_inner( + innerrel->pathlist); + } + + if (inner_cheapest_total) + consider_parallel_mergejoin(root, joinrel, outerrel, innerrel, + save_jointype, extra, + inner_cheapest_total); + } +} + +/* + * consider_parallel_mergejoin + * Try to build partial paths for a joinrel by joining a partial path + * for the outer relation to a complete path for the inner relation. + * + * 'joinrel' is the join relation + * 'outerrel' is the outer join relation + * 'innerrel' is the inner join relation + * 'jointype' is the type of join to do + * 'extra' contains additional input values + * 'inner_cheapest_total' cheapest total path for innerrel + */ +static void +consider_parallel_mergejoin(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + JoinType jointype, + JoinPathExtraData *extra, + Path *inner_cheapest_total) +{ + ListCell *lc1; + + /* generate merge join path for each partial outer path */ + foreach(lc1, outerrel->partial_pathlist) + { + Path *outerpath = (Path *) lfirst(lc1); + List *merge_pathkeys; + + /* + * Figure out what useful ordering any paths we create will have. + */ + merge_pathkeys = build_join_pathkeys(root, joinrel, jointype, + outerpath->pathkeys); + + generate_mergejoin_paths(root, joinrel, innerrel, outerpath, jointype, + extra, false, inner_cheapest_total, + merge_pathkeys, true); + } } /* diff --git a/src/test/regress/expected/select_parallel.out b/src/test/regress/expected/select_parallel.out index a5a22323c17..75558d05e08 100644 --- a/src/test/regress/expected/select_parallel.out +++ b/src/test/regress/expected/select_parallel.out @@ -169,6 +169,31 @@ select count(*) from tenk1 where thousand > 95; reset enable_seqscan; reset enable_bitmapscan; +-- test parallel merge join path. +set enable_hashjoin to off; +set enable_nestloop to off; +explain (costs off) + select count(*) from tenk1, tenk2 where tenk1.unique1 = tenk2.unique1; + QUERY PLAN +------------------------------------------------------------------------------- + Finalize Aggregate + -> Gather + Workers Planned: 4 + -> Partial Aggregate + -> Merge Join + Merge Cond: (tenk1.unique1 = tenk2.unique1) + -> Parallel Index Only Scan using tenk1_unique1 on tenk1 + -> Index Only Scan using tenk2_unique1 on tenk2 +(8 rows) + +select count(*) from tenk1, tenk2 where tenk1.unique1 = tenk2.unique1; + count +------- + 10000 +(1 row) + +reset enable_hashjoin; +reset enable_nestloop; set force_parallel_mode=1; explain (costs off) select stringu1::int2 from tenk1 where unique1 = 1; diff --git a/src/test/regress/sql/select_parallel.sql b/src/test/regress/sql/select_parallel.sql index d72addf5a24..ebdae7e9391 100644 --- a/src/test/regress/sql/select_parallel.sql +++ b/src/test/regress/sql/select_parallel.sql @@ -64,6 +64,16 @@ select count(*) from tenk1 where thousand > 95; reset enable_seqscan; reset enable_bitmapscan; +-- test parallel merge join path. +set enable_hashjoin to off; +set enable_nestloop to off; + +explain (costs off) + select count(*) from tenk1, tenk2 where tenk1.unique1 = tenk2.unique1; +select count(*) from tenk1, tenk2 where tenk1.unique1 = tenk2.unique1; + +reset enable_hashjoin; +reset enable_nestloop; set force_parallel_mode=1; explain (costs off)