1
0
mirror of https://github.com/postgres/postgres.git synced 2025-09-08 00:47:37 +03:00

Support "Right Semi Join" plan shapes

Hash joins can support semijoin with the LHS input on the right, using
the existing logic for inner join, combined with the assurance that only
the first match for each inner tuple is considered, which can be
achieved by leveraging the HEAP_TUPLE_HAS_MATCH flag.  This can be very
useful in some cases since we may now have the option to hash the
smaller table instead of the larger.

Merge join could likely support "Right Semi Join" too.  However, the
benefit of swapping inputs tends to be small here, so we do not address
that in this patch.

Note that this patch also modifies a test query in join.sql to ensure it
continues testing as intended.  With this patch the original query would
result in a right-semi-join rather than semi-join, compromising its
original purpose of testing the fix for neqjoinsel's behavior for
semi-joins.

Author: Richard Guo
Reviewed-by: wenhui qiu, Alena Rybakina, Japin Li
Discussion: https://postgr.es/m/CAMbWs4_X1mN=ic+SxcyymUqFx9bB8pqSLTGJ-F=MHy4PW3eRXw@mail.gmail.com
This commit is contained in:
Richard Guo
2024-07-05 09:26:48 +09:00
parent 5a519abedd
commit aa86129e19
14 changed files with 222 additions and 173 deletions

View File

@@ -288,8 +288,8 @@ add_paths_to_joinrel(PlannerInfo *root,
* sorted. This includes both nestloops and mergejoins where the outer
* path is already ordered. Again, skip this if we can't mergejoin.
* (That's okay because we know that nestloop can't handle
* right/right-anti/full joins at all, so it wouldn't work in the
* prohibited cases either.)
* right/right-anti/right-semi/full joins at all, so it wouldn't work in
* the prohibited cases either.)
*/
if (mergejoin_allowed)
match_unsorted_outer(root, joinrel, outerrel, innerrel,
@@ -1728,6 +1728,13 @@ match_unsorted_outer(PlannerInfo *root,
Path *matpath = NULL;
ListCell *lc1;
/*
* For now we do not support RIGHT_SEMI join in mergejoin or nestloop
* join.
*/
if (jointype == JOIN_RIGHT_SEMI)
return;
/*
* Nestloop only supports inner, left, semi, and anti joins. Also, if we
* are doing a right, right-anti or full mergejoin, we must use *all* the
@@ -2297,12 +2304,13 @@ hash_inner_and_outer(PlannerInfo *root,
* total inner path will also be parallel-safe, but if not, we'll
* have to search for the cheapest safe, unparameterized inner
* path. If doing JOIN_UNIQUE_INNER, we can't use any alternative
* inner path. If full, right, or right-anti join, we can't use
* parallelism (building the hash table in each backend) because
* no one process has all the match bits.
* inner path. If full, right, right-semi or right-anti join, we
* can't use parallelism (building the hash table in each backend)
* because no one process has all the match bits.
*/
if (save_jointype == JOIN_FULL ||
save_jointype == JOIN_RIGHT ||
save_jointype == JOIN_RIGHT_SEMI ||
save_jointype == JOIN_RIGHT_ANTI)
cheapest_safe_inner = NULL;
else if (cheapest_total_inner->parallel_safe)
@@ -2327,13 +2335,13 @@ hash_inner_and_outer(PlannerInfo *root,
* Returns a list of RestrictInfo nodes for those clauses.
*
* *mergejoin_allowed is normally set to true, but it is set to false if
* this is a right/right-anti/full join and there are nonmergejoinable join
* clauses. The executor's mergejoin machinery cannot handle such cases, so
* we have to avoid generating a mergejoin plan. (Note that this flag does
* NOT consider whether there are actually any mergejoinable clauses. This is
* correct because in some cases we need to build a clauseless mergejoin.
* Simply returning NIL is therefore not enough to distinguish safe from
* unsafe cases.)
* this is a right-semi join, or this is a right/right-anti/full join and
* there are nonmergejoinable join clauses. The executor's mergejoin
* machinery cannot handle such cases, so we have to avoid generating a
* mergejoin plan. (Note that this flag does NOT consider whether there are
* actually any mergejoinable clauses. This is correct because in some
* cases we need to build a clauseless mergejoin. Simply returning NIL is
* therefore not enough to distinguish safe from unsafe cases.)
*
* We also mark each selected RestrictInfo to show which side is currently
* being considered as outer. These are transient markings that are only
@@ -2357,6 +2365,16 @@ select_mergejoin_clauses(PlannerInfo *root,
bool have_nonmergeable_joinclause = false;
ListCell *l;
/*
* For now we do not support RIGHT_SEMI join in mergejoin: the benefit of
* swapping inputs tends to be small here.
*/
if (jointype == JOIN_RIGHT_SEMI)
{
*mergejoin_allowed = false;
return NIL;
}
foreach(l, restrictlist)
{
RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);

View File

@@ -991,6 +991,9 @@ populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1,
add_paths_to_joinrel(root, joinrel, rel1, rel2,
JOIN_SEMI, sjinfo,
restrictlist);
add_paths_to_joinrel(root, joinrel, rel2, rel1,
JOIN_RIGHT_SEMI, sjinfo,
restrictlist);
}
/*

View File

@@ -1294,6 +1294,9 @@ build_join_pathkeys(PlannerInfo *root,
JoinType jointype,
List *outer_pathkeys)
{
/* RIGHT_SEMI should not come here */
Assert(jointype != JOIN_RIGHT_SEMI);
if (jointype == JOIN_FULL ||
jointype == JOIN_RIGHT ||
jointype == JOIN_RIGHT_ANTI)

View File

@@ -455,8 +455,8 @@ pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node *jtnode,
* point of the available_rels machinations is to ensure that we only
* pull up quals for which that's okay.
*
* We don't expect to see any pre-existing JOIN_SEMI, JOIN_ANTI, or
* JOIN_RIGHT_ANTI jointypes here.
* We don't expect to see any pre-existing JOIN_SEMI, JOIN_ANTI,
* JOIN_RIGHT_SEMI, or JOIN_RIGHT_ANTI jointypes here.
*/
switch (j->jointype)
{
@@ -2950,7 +2950,7 @@ reduce_outer_joins_pass2(Node *jtnode,
* These could only have been introduced by pull_up_sublinks,
* so there's no way that upper quals could refer to their
* righthand sides, and no point in checking. We don't expect
* to see JOIN_RIGHT_ANTI yet.
* to see JOIN_RIGHT_SEMI or JOIN_RIGHT_ANTI yet.
*/
break;
default: