mirror of
https://github.com/postgres/postgres.git
synced 2025-05-05 09:19:17 +03:00
Further fixes for degenerate outer join clauses.
Further testing revealed that commit f69b4b9495269cc4 was still a few bricks shy of a load: minor tweaking of the previous test cases resulted in the same wrong-outer-join-order problem coming back. After study I concluded that my previous changes in make_outerjoininfo() were just accidentally masking the problem, and should be reverted in favor of forcing syntactic join order whenever an upper outer join's predicate doesn't mention a lower outer join's LHS. This still allows the chained-outer-joins style that is the normally optimizable case. I also tightened things up some more in join_is_legal(). It seems to me on review that what's really happening in the exception case where we ignore a mismatched special join is that we're allowing the proposed join to associate into the RHS of the outer join we're comparing it to. As such, we should *always* insist that the proposed join be a left join, which eliminates a bunch of rather dubious argumentation. The case where we weren't enforcing that was the one that was already known buggy anyway (it had a violatable Assert before the aforesaid commit) so it hardly deserves a lot of deference. Back-patch to all active branches, like the previous patch. The added regression test case failed in all branches back to 9.1, and I think it's only an unrelated change in costing calculations that kept 9.0 from choosing a broken plan.
This commit is contained in:
parent
6d9864d900
commit
df3b0f47b9
@ -241,12 +241,23 @@ non-FULL joins can be freely associated into the lefthand side of an
|
|||||||
OJ, but in some cases they can't be associated into the righthand side.
|
OJ, but in some cases they can't be associated into the righthand side.
|
||||||
So the restriction enforced by join_is_legal is that a proposed join
|
So the restriction enforced by join_is_legal is that a proposed join
|
||||||
can't join a rel within or partly within an RHS boundary to one outside
|
can't join a rel within or partly within an RHS boundary to one outside
|
||||||
the boundary, unless the join validly implements some outer join.
|
the boundary, unless the proposed join is a LEFT join that can associate
|
||||||
(To support use of identity 3, we have to allow cases where an apparent
|
into the SpecialJoinInfo's RHS using identity 3.
|
||||||
violation of a lower OJ's RHS is committed while forming an upper OJ.
|
|
||||||
If this wouldn't in fact be legal, the upper OJ's minimum LHS or RHS
|
The use of minimum Relid sets has some pitfalls; consider a query like
|
||||||
set must be expanded to include the whole of the lower OJ, thereby
|
A leftjoin (B leftjoin (C innerjoin D) on (Pbcd)) on Pa
|
||||||
preventing it from being formed before the lower OJ is.)
|
where Pa doesn't mention B/C/D at all. In this case a naive computation
|
||||||
|
would give the upper leftjoin's min LHS as {A} and min RHS as {C,D} (since
|
||||||
|
we know that the innerjoin can't associate out of the leftjoin's RHS, and
|
||||||
|
enforce that by including its relids in the leftjoin's min RHS). And the
|
||||||
|
lower leftjoin has min LHS of {B} and min RHS of {C,D}. Given such
|
||||||
|
information, join_is_legal would think it's okay to associate the upper
|
||||||
|
join into the lower join's RHS, transforming the query to
|
||||||
|
B leftjoin (A leftjoin (C innerjoin D) on Pa) on (Pbcd)
|
||||||
|
which yields totally wrong answers. We prevent that by forcing the min LHS
|
||||||
|
for the upper join to include B. This is perhaps overly restrictive, but
|
||||||
|
such cases don't arise often so it's not clear that it's worth developing a
|
||||||
|
more complicated system.
|
||||||
|
|
||||||
|
|
||||||
Pulling Up Subqueries
|
Pulling Up Subqueries
|
||||||
|
@ -331,7 +331,7 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
|
|||||||
SpecialJoinInfo *match_sjinfo;
|
SpecialJoinInfo *match_sjinfo;
|
||||||
bool reversed;
|
bool reversed;
|
||||||
bool unique_ified;
|
bool unique_ified;
|
||||||
bool is_valid_inner;
|
bool must_be_leftjoin;
|
||||||
bool lateral_fwd;
|
bool lateral_fwd;
|
||||||
bool lateral_rev;
|
bool lateral_rev;
|
||||||
ListCell *l;
|
ListCell *l;
|
||||||
@ -346,12 +346,12 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
|
|||||||
/*
|
/*
|
||||||
* If we have any special joins, the proposed join might be illegal; and
|
* If we have any special joins, the proposed join might be illegal; and
|
||||||
* in any case we have to determine its join type. Scan the join info
|
* in any case we have to determine its join type. Scan the join info
|
||||||
* list for conflicts.
|
* list for matches and conflicts.
|
||||||
*/
|
*/
|
||||||
match_sjinfo = NULL;
|
match_sjinfo = NULL;
|
||||||
reversed = false;
|
reversed = false;
|
||||||
unique_ified = false;
|
unique_ified = false;
|
||||||
is_valid_inner = true;
|
must_be_leftjoin = false;
|
||||||
|
|
||||||
foreach(l, root->join_info_list)
|
foreach(l, root->join_info_list)
|
||||||
{
|
{
|
||||||
@ -402,7 +402,8 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
|
|||||||
* If one input contains min_lefthand and the other contains
|
* If one input contains min_lefthand and the other contains
|
||||||
* min_righthand, then we can perform the SJ at this join.
|
* min_righthand, then we can perform the SJ at this join.
|
||||||
*
|
*
|
||||||
* Barf if we get matches to more than one SJ (is that possible?)
|
* Reject if we get matches to more than one SJ; that implies we're
|
||||||
|
* considering something that's not really valid.
|
||||||
*/
|
*/
|
||||||
if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) &&
|
if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) &&
|
||||||
bms_is_subset(sjinfo->min_righthand, rel2->relids))
|
bms_is_subset(sjinfo->min_righthand, rel2->relids))
|
||||||
@ -470,58 +471,38 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
|
|||||||
/*
|
/*
|
||||||
* Otherwise, the proposed join overlaps the RHS but isn't a valid
|
* Otherwise, the proposed join overlaps the RHS but isn't a valid
|
||||||
* implementation of this SJ. It might still be a legal join,
|
* implementation of this SJ. It might still be a legal join,
|
||||||
* however, if it does not overlap the LHS. But we never allow
|
* however, if we're allowed to associate it into the RHS of this
|
||||||
* violations of the RHS of SEMI or ANTI joins. (In practice,
|
* SJ. That means this SJ must be a LEFT join (not SEMI or ANTI,
|
||||||
* therefore, only LEFT joins ever allow RHS violation.)
|
* and certainly not FULL) and the proposed join must not overlap
|
||||||
|
* the LHS.
|
||||||
*/
|
*/
|
||||||
if (sjinfo->jointype == JOIN_SEMI ||
|
if (sjinfo->jointype != JOIN_LEFT ||
|
||||||
sjinfo->jointype == JOIN_ANTI ||
|
|
||||||
bms_overlap(joinrelids, sjinfo->min_lefthand))
|
bms_overlap(joinrelids, sjinfo->min_lefthand))
|
||||||
return false; /* invalid join path */
|
return false; /* invalid join path */
|
||||||
|
|
||||||
/*----------
|
/*
|
||||||
* If both inputs overlap the RHS, assume that it's OK. Since the
|
* To be valid, the proposed join must be a LEFT join; otherwise
|
||||||
* inputs presumably got past this function's checks previously,
|
* it can't associate into this SJ's RHS. But we may not yet have
|
||||||
* their violations of the RHS boundary must represent SJs that
|
* found the SpecialJoinInfo matching the proposed join, so we
|
||||||
* have been determined to commute with this one.
|
* can't test that yet. Remember the requirement for later.
|
||||||
* We have to allow this to work correctly in cases like
|
|
||||||
* (a LEFT JOIN (b JOIN (c LEFT JOIN d)))
|
|
||||||
* when the c/d join has been determined to commute with the join
|
|
||||||
* to a, and hence d is not part of min_righthand for the upper
|
|
||||||
* join. It should be legal to join b to c/d but this will appear
|
|
||||||
* as a violation of the upper join's RHS.
|
|
||||||
*
|
|
||||||
* Furthermore, if one input overlaps the RHS and the other does
|
|
||||||
* not, we should still allow the join if it is a valid
|
|
||||||
* implementation of some other SJ. We have to allow this to
|
|
||||||
* support the associative identity
|
|
||||||
* (a LJ b on Pab) LJ c ON Pbc = a LJ (b LJ c ON Pbc) on Pab
|
|
||||||
* since joining B directly to C violates the lower SJ's RHS.
|
|
||||||
* We assume that make_outerjoininfo() set things up correctly
|
|
||||||
* so that we'll only match to some SJ if the join is valid.
|
|
||||||
* Set flag here to check at bottom of loop.
|
|
||||||
*----------
|
|
||||||
*/
|
*/
|
||||||
if (bms_overlap(rel1->relids, sjinfo->min_righthand) &&
|
must_be_leftjoin = true;
|
||||||
bms_overlap(rel2->relids, sjinfo->min_righthand))
|
|
||||||
{
|
|
||||||
/* both overlap; assume OK */
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/* one overlaps, the other doesn't */
|
|
||||||
is_valid_inner = false;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Fail if violated some SJ's RHS and didn't match to another SJ. However,
|
* Fail if violated any SJ's RHS and didn't match to a LEFT SJ: the
|
||||||
* "matching" to a semijoin we are implementing by unique-ification
|
* proposed join can't associate into an SJ's RHS.
|
||||||
* doesn't count (think: it's really an inner join).
|
*
|
||||||
|
* Also, fail if the proposed join's predicate isn't strict; we're
|
||||||
|
* essentially checking to see if we can apply outer-join identity 3, and
|
||||||
|
* that's a requirement. (This check may be redundant with checks in
|
||||||
|
* make_outerjoininfo, but I'm not quite sure, and it's cheap to test.)
|
||||||
*/
|
*/
|
||||||
if (!is_valid_inner &&
|
if (must_be_leftjoin &&
|
||||||
(match_sjinfo == NULL || unique_ified))
|
(match_sjinfo == NULL ||
|
||||||
|
match_sjinfo->jointype != JOIN_LEFT ||
|
||||||
|
!match_sjinfo->lhs_strict))
|
||||||
return false; /* invalid join path */
|
return false; /* invalid join path */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1128,17 +1128,6 @@ make_outerjoininfo(PlannerInfo *root,
|
|||||||
min_righthand = bms_int_members(bms_union(clause_relids, inner_join_rels),
|
min_righthand = bms_int_members(bms_union(clause_relids, inner_join_rels),
|
||||||
right_rels);
|
right_rels);
|
||||||
|
|
||||||
/*
|
|
||||||
* If we have a degenerate join clause that doesn't mention any RHS rels,
|
|
||||||
* force the min RHS to be the syntactic RHS; otherwise we can end up
|
|
||||||
* making serious errors, like putting the LHS on the wrong side of an
|
|
||||||
* outer join. It seems to be safe to not do this when we have a
|
|
||||||
* contribution from inner_join_rels, though; that's enough to pin the SJ
|
|
||||||
* to occur at a reasonable place in the tree.
|
|
||||||
*/
|
|
||||||
if (bms_is_empty(min_righthand))
|
|
||||||
min_righthand = bms_copy(right_rels);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Now check previous outer joins for ordering restrictions.
|
* Now check previous outer joins for ordering restrictions.
|
||||||
*/
|
*/
|
||||||
@ -1181,9 +1170,15 @@ make_outerjoininfo(PlannerInfo *root,
|
|||||||
* For a lower OJ in our RHS, if our join condition does not use the
|
* For a lower OJ in our RHS, if our join condition does not use the
|
||||||
* lower join's RHS and the lower OJ's join condition is strict, we
|
* lower join's RHS and the lower OJ's join condition is strict, we
|
||||||
* can interchange the ordering of the two OJs; otherwise we must add
|
* can interchange the ordering of the two OJs; otherwise we must add
|
||||||
* the lower OJ's full syntactic relset to min_righthand. Also, we
|
* the lower OJ's full syntactic relset to min_righthand.
|
||||||
* must preserve ordering anyway if either the current join or the
|
*
|
||||||
* lower OJ is either a semijoin or an antijoin.
|
* Also, if our join condition does not use the lower join's LHS
|
||||||
|
* either, force the ordering to be preserved. Otherwise we can end
|
||||||
|
* up with SpecialJoinInfos with identical min_righthands, which can
|
||||||
|
* confuse join_is_legal (see discussion in backend/optimizer/README).
|
||||||
|
*
|
||||||
|
* Also, we must preserve ordering anyway if either the current join
|
||||||
|
* or the lower OJ is either a semijoin or an antijoin.
|
||||||
*
|
*
|
||||||
* Here, we have to consider that "our join condition" includes any
|
* Here, we have to consider that "our join condition" includes any
|
||||||
* clauses that syntactically appeared above the lower OJ and below
|
* clauses that syntactically appeared above the lower OJ and below
|
||||||
@ -1199,6 +1194,7 @@ make_outerjoininfo(PlannerInfo *root,
|
|||||||
if (bms_overlap(right_rels, otherinfo->syn_righthand))
|
if (bms_overlap(right_rels, otherinfo->syn_righthand))
|
||||||
{
|
{
|
||||||
if (bms_overlap(clause_relids, otherinfo->syn_righthand) ||
|
if (bms_overlap(clause_relids, otherinfo->syn_righthand) ||
|
||||||
|
!bms_overlap(clause_relids, otherinfo->min_lefthand) ||
|
||||||
jointype == JOIN_SEMI ||
|
jointype == JOIN_SEMI ||
|
||||||
jointype == JOIN_ANTI ||
|
jointype == JOIN_ANTI ||
|
||||||
otherinfo->jointype == JOIN_SEMI ||
|
otherinfo->jointype == JOIN_SEMI ||
|
||||||
@ -1238,10 +1234,12 @@ make_outerjoininfo(PlannerInfo *root,
|
|||||||
* If we found nothing to put in min_lefthand, punt and make it the full
|
* If we found nothing to put in min_lefthand, punt and make it the full
|
||||||
* LHS, to avoid having an empty min_lefthand which will confuse later
|
* LHS, to avoid having an empty min_lefthand which will confuse later
|
||||||
* processing. (We don't try to be smart about such cases, just correct.)
|
* processing. (We don't try to be smart about such cases, just correct.)
|
||||||
* We already forced min_righthand nonempty, so nothing to do for that.
|
* Likewise for min_righthand.
|
||||||
*/
|
*/
|
||||||
if (bms_is_empty(min_lefthand))
|
if (bms_is_empty(min_lefthand))
|
||||||
min_lefthand = bms_copy(left_rels);
|
min_lefthand = bms_copy(left_rels);
|
||||||
|
if (bms_is_empty(min_righthand))
|
||||||
|
min_righthand = bms_copy(right_rels);
|
||||||
|
|
||||||
/* Now they'd better be nonempty */
|
/* Now they'd better be nonempty */
|
||||||
Assert(!bms_is_empty(min_lefthand));
|
Assert(!bms_is_empty(min_lefthand));
|
||||||
|
@ -3425,7 +3425,7 @@ select t1.* from
|
|||||||
Output: t1.f1
|
Output: t1.f1
|
||||||
Hash Cond: (i8.q2 = i4.f1)
|
Hash Cond: (i8.q2 = i4.f1)
|
||||||
-> Nested Loop Left Join
|
-> Nested Loop Left Join
|
||||||
Output: i8.q2, t1.f1
|
Output: t1.f1, i8.q2
|
||||||
Join Filter: (t1.f1 = '***'::text)
|
Join Filter: (t1.f1 = '***'::text)
|
||||||
-> Seq Scan on public.text_tbl t1
|
-> Seq Scan on public.text_tbl t1
|
||||||
Output: t1.f1
|
Output: t1.f1
|
||||||
@ -3473,6 +3473,76 @@ select t1.* from
|
|||||||
hi de ho neighbor
|
hi de ho neighbor
|
||||||
(2 rows)
|
(2 rows)
|
||||||
|
|
||||||
|
explain (verbose, costs off)
|
||||||
|
select t1.* from
|
||||||
|
text_tbl t1
|
||||||
|
left join (select *, '***'::text as d1 from int8_tbl i8b1) b1
|
||||||
|
left join int8_tbl i8
|
||||||
|
left join (select *, null::int as d2 from int8_tbl i8b2, int4_tbl i4b2
|
||||||
|
where q1 = f1) b2
|
||||||
|
on (i8.q1 = b2.q1)
|
||||||
|
on (b2.d2 = b1.q2)
|
||||||
|
on (t1.f1 = b1.d1)
|
||||||
|
left join int4_tbl i4
|
||||||
|
on (i8.q2 = i4.f1);
|
||||||
|
QUERY PLAN
|
||||||
|
----------------------------------------------------------------------------
|
||||||
|
Hash Left Join
|
||||||
|
Output: t1.f1
|
||||||
|
Hash Cond: (i8.q2 = i4.f1)
|
||||||
|
-> Nested Loop Left Join
|
||||||
|
Output: t1.f1, i8.q2
|
||||||
|
Join Filter: (t1.f1 = '***'::text)
|
||||||
|
-> Seq Scan on public.text_tbl t1
|
||||||
|
Output: t1.f1
|
||||||
|
-> Materialize
|
||||||
|
Output: i8.q2
|
||||||
|
-> Hash Right Join
|
||||||
|
Output: i8.q2
|
||||||
|
Hash Cond: ((NULL::integer) = i8b1.q2)
|
||||||
|
-> Hash Right Join
|
||||||
|
Output: i8.q2, (NULL::integer)
|
||||||
|
Hash Cond: (i8b2.q1 = i8.q1)
|
||||||
|
-> Hash Join
|
||||||
|
Output: i8b2.q1, NULL::integer
|
||||||
|
Hash Cond: (i8b2.q1 = i4b2.f1)
|
||||||
|
-> Seq Scan on public.int8_tbl i8b2
|
||||||
|
Output: i8b2.q1, i8b2.q2
|
||||||
|
-> Hash
|
||||||
|
Output: i4b2.f1
|
||||||
|
-> Seq Scan on public.int4_tbl i4b2
|
||||||
|
Output: i4b2.f1
|
||||||
|
-> Hash
|
||||||
|
Output: i8.q1, i8.q2
|
||||||
|
-> Seq Scan on public.int8_tbl i8
|
||||||
|
Output: i8.q1, i8.q2
|
||||||
|
-> Hash
|
||||||
|
Output: i8b1.q2
|
||||||
|
-> Seq Scan on public.int8_tbl i8b1
|
||||||
|
Output: i8b1.q2
|
||||||
|
-> Hash
|
||||||
|
Output: i4.f1
|
||||||
|
-> Seq Scan on public.int4_tbl i4
|
||||||
|
Output: i4.f1
|
||||||
|
(37 rows)
|
||||||
|
|
||||||
|
select t1.* from
|
||||||
|
text_tbl t1
|
||||||
|
left join (select *, '***'::text as d1 from int8_tbl i8b1) b1
|
||||||
|
left join int8_tbl i8
|
||||||
|
left join (select *, null::int as d2 from int8_tbl i8b2, int4_tbl i4b2
|
||||||
|
where q1 = f1) b2
|
||||||
|
on (i8.q1 = b2.q1)
|
||||||
|
on (b2.d2 = b1.q2)
|
||||||
|
on (t1.f1 = b1.d1)
|
||||||
|
left join int4_tbl i4
|
||||||
|
on (i8.q2 = i4.f1);
|
||||||
|
f1
|
||||||
|
-------------------
|
||||||
|
doh!
|
||||||
|
hi de ho neighbor
|
||||||
|
(2 rows)
|
||||||
|
|
||||||
--
|
--
|
||||||
-- test ability to push constants through outer join clauses
|
-- test ability to push constants through outer join clauses
|
||||||
--
|
--
|
||||||
|
@ -1065,6 +1065,31 @@ select t1.* from
|
|||||||
left join int4_tbl i4
|
left join int4_tbl i4
|
||||||
on (i8.q2 = i4.f1);
|
on (i8.q2 = i4.f1);
|
||||||
|
|
||||||
|
explain (verbose, costs off)
|
||||||
|
select t1.* from
|
||||||
|
text_tbl t1
|
||||||
|
left join (select *, '***'::text as d1 from int8_tbl i8b1) b1
|
||||||
|
left join int8_tbl i8
|
||||||
|
left join (select *, null::int as d2 from int8_tbl i8b2, int4_tbl i4b2
|
||||||
|
where q1 = f1) b2
|
||||||
|
on (i8.q1 = b2.q1)
|
||||||
|
on (b2.d2 = b1.q2)
|
||||||
|
on (t1.f1 = b1.d1)
|
||||||
|
left join int4_tbl i4
|
||||||
|
on (i8.q2 = i4.f1);
|
||||||
|
|
||||||
|
select t1.* from
|
||||||
|
text_tbl t1
|
||||||
|
left join (select *, '***'::text as d1 from int8_tbl i8b1) b1
|
||||||
|
left join int8_tbl i8
|
||||||
|
left join (select *, null::int as d2 from int8_tbl i8b2, int4_tbl i4b2
|
||||||
|
where q1 = f1) b2
|
||||||
|
on (i8.q1 = b2.q1)
|
||||||
|
on (b2.d2 = b1.q2)
|
||||||
|
on (t1.f1 = b1.d1)
|
||||||
|
left join int4_tbl i4
|
||||||
|
on (i8.q2 = i4.f1);
|
||||||
|
|
||||||
--
|
--
|
||||||
-- test ability to push constants through outer join clauses
|
-- test ability to push constants through outer join clauses
|
||||||
--
|
--
|
||||||
|
Loading…
x
Reference in New Issue
Block a user