mirror of
https://github.com/postgres/postgres.git
synced 2025-07-30 11:03:19 +03:00
Support "Right Anti Join" plan shapes.
Merge and hash joins can support antijoin with the non-nullable input on the right, using very simple combinations of their existing logic for right join and anti join. This gives the planner more freedom about how to order the join. It's particularly useful for hash join, since we may now have the option to hash the smaller table instead of the larger. Richard Guo, reviewed by Ronan Dunklau and myself Discussion: https://postgr.es/m/CAMbWs48xh9hMzXzSy3VaPzGAz+fkxXXTUbCLohX1_L8THFRm2Q@mail.gmail.com
This commit is contained in:
@ -3330,7 +3330,8 @@ initial_cost_mergejoin(PlannerInfo *root, JoinCostWorkspace *workspace,
|
||||
outerstartsel = 0.0;
|
||||
outerendsel = 1.0;
|
||||
}
|
||||
else if (jointype == JOIN_RIGHT)
|
||||
else if (jointype == JOIN_RIGHT ||
|
||||
jointype == JOIN_RIGHT_ANTI)
|
||||
{
|
||||
innerstartsel = 0.0;
|
||||
innerendsel = 1.0;
|
||||
|
@ -286,8 +286,9 @@ add_paths_to_joinrel(PlannerInfo *root,
|
||||
* 2. Consider paths where the outer relation need not be explicitly
|
||||
* sorted. This includes both nestloops and mergejoins where the outer
|
||||
* path is already ordered. Again, skip this if we can't mergejoin.
|
||||
* (That's okay because we know that nestloop can't handle right/full
|
||||
* joins at all, so it wouldn't work in the prohibited cases either.)
|
||||
* (That's okay because we know that nestloop can't handle
|
||||
* right/right-anti/full joins at all, so it wouldn't work in the
|
||||
* prohibited cases either.)
|
||||
*/
|
||||
if (mergejoin_allowed)
|
||||
match_unsorted_outer(root, joinrel, outerrel, innerrel,
|
||||
@ -1261,14 +1262,15 @@ sort_inner_and_outer(PlannerInfo *root,
|
||||
* If the joinrel is parallel-safe, we may be able to consider a partial
|
||||
* merge join. However, we can't handle JOIN_UNIQUE_OUTER, because the
|
||||
* outer path will be partial, and therefore we won't be able to properly
|
||||
* guarantee uniqueness. Similarly, we can't handle JOIN_FULL and
|
||||
* JOIN_RIGHT, because they can produce false null extended rows. Also,
|
||||
* the resulting path must not be parameterized.
|
||||
* guarantee uniqueness. Similarly, we can't handle JOIN_FULL, JOIN_RIGHT
|
||||
* and JOIN_RIGHT_ANTI, because they can produce false null extended rows.
|
||||
* Also, the resulting path must not be parameterized.
|
||||
*/
|
||||
if (joinrel->consider_parallel &&
|
||||
save_jointype != JOIN_UNIQUE_OUTER &&
|
||||
save_jointype != JOIN_FULL &&
|
||||
save_jointype != JOIN_RIGHT &&
|
||||
save_jointype != JOIN_RIGHT_ANTI &&
|
||||
outerrel->partial_pathlist != NIL &&
|
||||
bms_is_empty(joinrel->lateral_relids))
|
||||
{
|
||||
@ -1663,10 +1665,10 @@ match_unsorted_outer(PlannerInfo *root,
|
||||
|
||||
/*
|
||||
* Nestloop only supports inner, left, semi, and anti joins. Also, if we
|
||||
* are doing a right or full mergejoin, we must use *all* the mergeclauses
|
||||
* as join clauses, else we will not have a valid plan. (Although these
|
||||
* two flags are currently inverses, keep them separate for clarity and
|
||||
* possible future changes.)
|
||||
* are doing a right, right-anti or full mergejoin, we must use *all* the
|
||||
* mergeclauses as join clauses, else we will not have a valid plan.
|
||||
* (Although these two flags are currently inverses, keep them separate
|
||||
* for clarity and possible future changes.)
|
||||
*/
|
||||
switch (jointype)
|
||||
{
|
||||
@ -1678,6 +1680,7 @@ match_unsorted_outer(PlannerInfo *root,
|
||||
useallclauses = false;
|
||||
break;
|
||||
case JOIN_RIGHT:
|
||||
case JOIN_RIGHT_ANTI:
|
||||
case JOIN_FULL:
|
||||
nestjoinOK = false;
|
||||
useallclauses = true;
|
||||
@ -1849,13 +1852,14 @@ match_unsorted_outer(PlannerInfo *root,
|
||||
* handle JOIN_UNIQUE_OUTER, because the outer path will be partial, and
|
||||
* therefore we won't be able to properly guarantee uniqueness. Nor can
|
||||
* we handle joins needing lateral rels, since partial paths must not be
|
||||
* parameterized. Similarly, we can't handle JOIN_FULL and JOIN_RIGHT,
|
||||
* because they can produce false null extended rows.
|
||||
* parameterized. Similarly, we can't handle JOIN_FULL, JOIN_RIGHT and
|
||||
* JOIN_RIGHT_ANTI, because they can produce false null extended rows.
|
||||
*/
|
||||
if (joinrel->consider_parallel &&
|
||||
save_jointype != JOIN_UNIQUE_OUTER &&
|
||||
save_jointype != JOIN_FULL &&
|
||||
save_jointype != JOIN_RIGHT &&
|
||||
save_jointype != JOIN_RIGHT_ANTI &&
|
||||
outerrel->partial_pathlist != NIL &&
|
||||
bms_is_empty(joinrel->lateral_relids))
|
||||
{
|
||||
@ -2228,11 +2232,13 @@ hash_inner_and_outer(PlannerInfo *root,
|
||||
* total inner path will also be parallel-safe, but if not, we'll
|
||||
* have to search for the cheapest safe, unparameterized inner
|
||||
* path. If doing JOIN_UNIQUE_INNER, we can't use any alternative
|
||||
* inner path. If full or right join, we can't use parallelism
|
||||
* (building the hash table in each backend) because no one
|
||||
* process has all the match bits.
|
||||
* inner path. If full, right, or right-anti join, we can't use
|
||||
* parallelism (building the hash table in each backend) because
|
||||
* no one process has all the match bits.
|
||||
*/
|
||||
if (save_jointype == JOIN_FULL || save_jointype == JOIN_RIGHT)
|
||||
if (save_jointype == JOIN_FULL ||
|
||||
save_jointype == JOIN_RIGHT ||
|
||||
save_jointype == JOIN_RIGHT_ANTI)
|
||||
cheapest_safe_inner = NULL;
|
||||
else if (cheapest_total_inner->parallel_safe)
|
||||
cheapest_safe_inner = cheapest_total_inner;
|
||||
@ -2256,10 +2262,10 @@ hash_inner_and_outer(PlannerInfo *root,
|
||||
* Returns a list of RestrictInfo nodes for those clauses.
|
||||
*
|
||||
* *mergejoin_allowed is normally set to true, but it is set to false if
|
||||
* this is a right/full join and there are nonmergejoinable join clauses.
|
||||
* The executor's mergejoin machinery cannot handle such cases, so we have
|
||||
* to avoid generating a mergejoin plan. (Note that this flag does NOT
|
||||
* consider whether there are actually any mergejoinable clauses. This is
|
||||
* this is a right/right-anti/full join and there are nonmergejoinable join
|
||||
* clauses. The executor's mergejoin machinery cannot handle such cases, so
|
||||
* we have to avoid generating a mergejoin plan. (Note that this flag does
|
||||
* NOT consider whether there are actually any mergejoinable clauses. This is
|
||||
* correct because in some cases we need to build a clauseless mergejoin.
|
||||
* Simply returning NIL is therefore not enough to distinguish safe from
|
||||
* unsafe cases.)
|
||||
@ -2305,8 +2311,8 @@ select_mergejoin_clauses(PlannerInfo *root,
|
||||
{
|
||||
/*
|
||||
* The executor can handle extra joinquals that are constants, but
|
||||
* not anything else, when doing right/full merge join. (The
|
||||
* reason to support constants is so we can do FULL JOIN ON
|
||||
* not anything else, when doing right/right-anti/full merge join.
|
||||
* (The reason to support constants is so we can do FULL JOIN ON
|
||||
* FALSE.)
|
||||
*/
|
||||
if (!restrictinfo->clause || !IsA(restrictinfo->clause, Const))
|
||||
@ -2349,6 +2355,7 @@ select_mergejoin_clauses(PlannerInfo *root,
|
||||
switch (jointype)
|
||||
{
|
||||
case JOIN_RIGHT:
|
||||
case JOIN_RIGHT_ANTI:
|
||||
case JOIN_FULL:
|
||||
*mergejoin_allowed = !have_nonmergeable_joinclause;
|
||||
break;
|
||||
|
@ -925,6 +925,9 @@ populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1,
|
||||
add_paths_to_joinrel(root, joinrel, rel1, rel2,
|
||||
JOIN_ANTI, sjinfo,
|
||||
restrictlist);
|
||||
add_paths_to_joinrel(root, joinrel, rel2, rel1,
|
||||
JOIN_RIGHT_ANTI, sjinfo,
|
||||
restrictlist);
|
||||
break;
|
||||
default:
|
||||
/* other values not expected here */
|
||||
|
@ -1077,9 +1077,9 @@ find_var_for_subquery_tle(RelOptInfo *rel, TargetEntry *tle)
|
||||
* Build the path keys for a join relation constructed by mergejoin or
|
||||
* nestloop join. This is normally the same as the outer path's keys.
|
||||
*
|
||||
* EXCEPTION: in a FULL or RIGHT join, we cannot treat the result as
|
||||
* having the outer path's path keys, because null lefthand rows may be
|
||||
* inserted at random points. It must be treated as unsorted.
|
||||
* EXCEPTION: in a FULL, RIGHT or RIGHT_ANTI join, we cannot treat the
|
||||
* result as having the outer path's path keys, because null lefthand rows
|
||||
* may be inserted at random points. It must be treated as unsorted.
|
||||
*
|
||||
* We truncate away any pathkeys that are uninteresting for higher joins.
|
||||
*
|
||||
@ -1095,7 +1095,9 @@ build_join_pathkeys(PlannerInfo *root,
|
||||
JoinType jointype,
|
||||
List *outer_pathkeys)
|
||||
{
|
||||
if (jointype == JOIN_FULL || jointype == JOIN_RIGHT)
|
||||
if (jointype == JOIN_FULL ||
|
||||
jointype == JOIN_RIGHT ||
|
||||
jointype == JOIN_RIGHT_ANTI)
|
||||
return NIL;
|
||||
|
||||
/*
|
||||
|
@ -406,8 +406,8 @@ pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node *jtnode,
|
||||
* point of the available_rels machinations is to ensure that we only
|
||||
* pull up quals for which that's okay.
|
||||
*
|
||||
* We don't expect to see any pre-existing JOIN_SEMI or JOIN_ANTI
|
||||
* nodes here.
|
||||
* We don't expect to see any pre-existing JOIN_SEMI, JOIN_ANTI, or
|
||||
* JOIN_RIGHT_ANTI jointypes here.
|
||||
*/
|
||||
switch (j->jointype)
|
||||
{
|
||||
@ -2640,9 +2640,10 @@ flatten_simple_union_all(PlannerInfo *root)
|
||||
* distribute_qual_to_rels to get rid of such clauses.
|
||||
*
|
||||
* Also, we get rid of JOIN_RIGHT cases by flipping them around to become
|
||||
* JOIN_LEFT. This saves some code here and in some later planner routines,
|
||||
* but the main reason to do it is to not need to invent a JOIN_REVERSE_ANTI
|
||||
* join type.
|
||||
* JOIN_LEFT. This saves some code here and in some later planner routines;
|
||||
* the main benefit is to reduce the number of jointypes that can appear in
|
||||
* SpecialJoinInfo nodes. Note that we can still generate Paths and Plans
|
||||
* that use JOIN_RIGHT (or JOIN_RIGHT_ANTI) by switching the inputs again.
|
||||
*
|
||||
* To ease recognition of strict qual clauses, we require this routine to be
|
||||
* run after expression preprocessing (i.e., qual canonicalization and JOIN
|
||||
@ -2896,7 +2897,8 @@ reduce_outer_joins_pass2(Node *jtnode,
|
||||
/*
|
||||
* These could only have been introduced by pull_up_sublinks,
|
||||
* so there's no way that upper quals could refer to their
|
||||
* righthand sides, and no point in checking.
|
||||
* righthand sides, and no point in checking. We don't expect
|
||||
* to see JOIN_RIGHT_ANTI yet.
|
||||
*/
|
||||
break;
|
||||
default:
|
||||
|
Reference in New Issue
Block a user