mirror of
				https://github.com/postgres/postgres.git
				synced 2025-10-25 13:17:41 +03:00 
			
		
		
		
	Further fixes for degenerate outer join clauses.
Further testing revealed that commit f69b4b9495 was still a few
bricks shy of a load: minor tweaking of the previous test cases resulted
in the same wrong-outer-join-order problem coming back.  After study
I concluded that my previous changes in make_outerjoininfo() were just
accidentally masking the problem, and should be reverted in favor of
forcing syntactic join order whenever an upper outer join's predicate
doesn't mention a lower outer join's LHS.  This still allows the
chained-outer-joins style that is the normally optimizable case.
I also tightened things up some more in join_is_legal().  It seems to me
on review that what's really happening in the exception case where we
ignore a mismatched special join is that we're allowing the proposed join
to associate into the RHS of the outer join we're comparing it to.  As
such, we should *always* insist that the proposed join be a left join,
which eliminates a bunch of rather dubious argumentation.  The case where
we weren't enforcing that was the one that was already known buggy anyway
(it had a violatable Assert before the aforesaid commit) so it hardly
deserves a lot of deference.
Back-patch to all active branches, like the previous patch.  The added
regression test case failed in all branches back to 9.1, and I think it's
only an unrelated change in costing calculations that kept 9.0 from
choosing a broken plan.
			
			
This commit is contained in:
		| @@ -241,12 +241,23 @@ non-FULL joins can be freely associated into the lefthand side of an | |||||||
| OJ, but in some cases they can't be associated into the righthand side. | OJ, but in some cases they can't be associated into the righthand side. | ||||||
| So the restriction enforced by join_is_legal is that a proposed join | So the restriction enforced by join_is_legal is that a proposed join | ||||||
| can't join a rel within or partly within an RHS boundary to one outside | can't join a rel within or partly within an RHS boundary to one outside | ||||||
| the boundary, unless the join validly implements some outer join. | the boundary, unless the proposed join is a LEFT join that can associate | ||||||
| (To support use of identity 3, we have to allow cases where an apparent | into the SpecialJoinInfo's RHS using identity 3. | ||||||
| violation of a lower OJ's RHS is committed while forming an upper OJ. |  | ||||||
| If this wouldn't in fact be legal, the upper OJ's minimum LHS or RHS | The use of minimum Relid sets has some pitfalls; consider a query like | ||||||
| set must be expanded to include the whole of the lower OJ, thereby | 	A leftjoin (B leftjoin (C innerjoin D) on (Pbcd)) on Pa | ||||||
| preventing it from being formed before the lower OJ is.) | where Pa doesn't mention B/C/D at all.  In this case a naive computation | ||||||
|  | would give the upper leftjoin's min LHS as {A} and min RHS as {C,D} (since | ||||||
|  | we know that the innerjoin can't associate out of the leftjoin's RHS, and | ||||||
|  | enforce that by including its relids in the leftjoin's min RHS).  And the | ||||||
|  | lower leftjoin has min LHS of {B} and min RHS of {C,D}.  Given such | ||||||
|  | information, join_is_legal would think it's okay to associate the upper | ||||||
|  | join into the lower join's RHS, transforming the query to | ||||||
|  | 	B leftjoin (A leftjoin (C innerjoin D) on Pa) on (Pbcd) | ||||||
|  | which yields totally wrong answers.  We prevent that by forcing the min LHS | ||||||
|  | for the upper join to include B.  This is perhaps overly restrictive, but | ||||||
|  | such cases don't arise often so it's not clear that it's worth developing a | ||||||
|  | more complicated system. | ||||||
|  |  | ||||||
|  |  | ||||||
| Pulling Up Subqueries | Pulling Up Subqueries | ||||||
|   | |||||||
| @@ -331,7 +331,7 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, | |||||||
| 	SpecialJoinInfo *match_sjinfo; | 	SpecialJoinInfo *match_sjinfo; | ||||||
| 	bool		reversed; | 	bool		reversed; | ||||||
| 	bool		unique_ified; | 	bool		unique_ified; | ||||||
| 	bool		is_valid_inner; | 	bool		must_be_leftjoin; | ||||||
| 	bool		lateral_fwd; | 	bool		lateral_fwd; | ||||||
| 	bool		lateral_rev; | 	bool		lateral_rev; | ||||||
| 	ListCell   *l; | 	ListCell   *l; | ||||||
| @@ -346,12 +346,12 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, | |||||||
| 	/* | 	/* | ||||||
| 	 * If we have any special joins, the proposed join might be illegal; and | 	 * If we have any special joins, the proposed join might be illegal; and | ||||||
| 	 * in any case we have to determine its join type.  Scan the join info | 	 * in any case we have to determine its join type.  Scan the join info | ||||||
| 	 * list for conflicts. | 	 * list for matches and conflicts. | ||||||
| 	 */ | 	 */ | ||||||
| 	match_sjinfo = NULL; | 	match_sjinfo = NULL; | ||||||
| 	reversed = false; | 	reversed = false; | ||||||
| 	unique_ified = false; | 	unique_ified = false; | ||||||
| 	is_valid_inner = true; | 	must_be_leftjoin = false; | ||||||
|  |  | ||||||
| 	foreach(l, root->join_info_list) | 	foreach(l, root->join_info_list) | ||||||
| 	{ | 	{ | ||||||
| @@ -402,7 +402,8 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, | |||||||
| 		 * If one input contains min_lefthand and the other contains | 		 * If one input contains min_lefthand and the other contains | ||||||
| 		 * min_righthand, then we can perform the SJ at this join. | 		 * min_righthand, then we can perform the SJ at this join. | ||||||
| 		 * | 		 * | ||||||
| 		 * Barf if we get matches to more than one SJ (is that possible?) | 		 * Reject if we get matches to more than one SJ; that implies we're | ||||||
|  | 		 * considering something that's not really valid. | ||||||
| 		 */ | 		 */ | ||||||
| 		if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) && | 		if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) && | ||||||
| 			bms_is_subset(sjinfo->min_righthand, rel2->relids)) | 			bms_is_subset(sjinfo->min_righthand, rel2->relids)) | ||||||
| @@ -470,58 +471,38 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, | |||||||
| 			/* | 			/* | ||||||
| 			 * Otherwise, the proposed join overlaps the RHS but isn't a valid | 			 * Otherwise, the proposed join overlaps the RHS but isn't a valid | ||||||
| 			 * implementation of this SJ.  It might still be a legal join, | 			 * implementation of this SJ.  It might still be a legal join, | ||||||
| 			 * however, if it does not overlap the LHS.  But we never allow | 			 * however, if we're allowed to associate it into the RHS of this | ||||||
| 			 * violations of the RHS of SEMI or ANTI joins.  (In practice, | 			 * SJ.  That means this SJ must be a LEFT join (not SEMI or ANTI, | ||||||
| 			 * therefore, only LEFT joins ever allow RHS violation.) | 			 * and certainly not FULL) and the proposed join must not overlap | ||||||
|  | 			 * the LHS. | ||||||
| 			 */ | 			 */ | ||||||
| 			if (sjinfo->jointype == JOIN_SEMI || | 			if (sjinfo->jointype != JOIN_LEFT || | ||||||
| 				sjinfo->jointype == JOIN_ANTI || |  | ||||||
| 				bms_overlap(joinrelids, sjinfo->min_lefthand)) | 				bms_overlap(joinrelids, sjinfo->min_lefthand)) | ||||||
| 				return false;	/* invalid join path */ | 				return false;	/* invalid join path */ | ||||||
|  |  | ||||||
| 			/*---------- | 			/* | ||||||
| 			 * If both inputs overlap the RHS, assume that it's OK.  Since the | 			 * To be valid, the proposed join must be a LEFT join; otherwise | ||||||
| 			 * inputs presumably got past this function's checks previously, | 			 * it can't associate into this SJ's RHS.  But we may not yet have | ||||||
| 			 * their violations of the RHS boundary must represent SJs that | 			 * found the SpecialJoinInfo matching the proposed join, so we | ||||||
| 			 * have been determined to commute with this one. | 			 * can't test that yet.  Remember the requirement for later. | ||||||
| 			 * We have to allow this to work correctly in cases like |  | ||||||
| 			 *		(a LEFT JOIN (b JOIN (c LEFT JOIN d))) |  | ||||||
| 			 * when the c/d join has been determined to commute with the join |  | ||||||
| 			 * to a, and hence d is not part of min_righthand for the upper |  | ||||||
| 			 * join.  It should be legal to join b to c/d but this will appear |  | ||||||
| 			 * as a violation of the upper join's RHS. |  | ||||||
| 			 * |  | ||||||
| 			 * Furthermore, if one input overlaps the RHS and the other does |  | ||||||
| 			 * not, we should still allow the join if it is a valid |  | ||||||
| 			 * implementation of some other SJ.  We have to allow this to |  | ||||||
| 			 * support the associative identity |  | ||||||
| 			 *		(a LJ b on Pab) LJ c ON Pbc = a LJ (b LJ c ON Pbc) on Pab |  | ||||||
| 			 * since joining B directly to C violates the lower SJ's RHS. |  | ||||||
| 			 * We assume that make_outerjoininfo() set things up correctly |  | ||||||
| 			 * so that we'll only match to some SJ if the join is valid. |  | ||||||
| 			 * Set flag here to check at bottom of loop. |  | ||||||
| 			 *---------- |  | ||||||
| 			 */ | 			 */ | ||||||
| 			if (bms_overlap(rel1->relids, sjinfo->min_righthand) && | 			must_be_leftjoin = true; | ||||||
| 				bms_overlap(rel2->relids, sjinfo->min_righthand)) |  | ||||||
| 			{ |  | ||||||
| 				/* both overlap; assume OK */ |  | ||||||
| 			} |  | ||||||
| 			else |  | ||||||
| 			{ |  | ||||||
| 				/* one overlaps, the other doesn't */ |  | ||||||
| 				is_valid_inner = false; |  | ||||||
| 			} |  | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	/* | 	/* | ||||||
| 	 * Fail if violated some SJ's RHS and didn't match to another SJ. However, | 	 * Fail if violated any SJ's RHS and didn't match to a LEFT SJ: the | ||||||
| 	 * "matching" to a semijoin we are implementing by unique-ification | 	 * proposed join can't associate into an SJ's RHS. | ||||||
| 	 * doesn't count (think: it's really an inner join). | 	 * | ||||||
|  | 	 * Also, fail if the proposed join's predicate isn't strict; we're | ||||||
|  | 	 * essentially checking to see if we can apply outer-join identity 3, and | ||||||
|  | 	 * that's a requirement.  (This check may be redundant with checks in | ||||||
|  | 	 * make_outerjoininfo, but I'm not quite sure, and it's cheap to test.) | ||||||
| 	 */ | 	 */ | ||||||
| 	if (!is_valid_inner && | 	if (must_be_leftjoin && | ||||||
| 		(match_sjinfo == NULL || unique_ified)) | 		(match_sjinfo == NULL || | ||||||
|  | 		 match_sjinfo->jointype != JOIN_LEFT || | ||||||
|  | 		 !match_sjinfo->lhs_strict)) | ||||||
| 		return false;			/* invalid join path */ | 		return false;			/* invalid join path */ | ||||||
|  |  | ||||||
| 	/* | 	/* | ||||||
|   | |||||||
| @@ -1128,17 +1128,6 @@ make_outerjoininfo(PlannerInfo *root, | |||||||
| 	min_righthand = bms_int_members(bms_union(clause_relids, inner_join_rels), | 	min_righthand = bms_int_members(bms_union(clause_relids, inner_join_rels), | ||||||
| 									right_rels); | 									right_rels); | ||||||
|  |  | ||||||
| 	/* |  | ||||||
| 	 * If we have a degenerate join clause that doesn't mention any RHS rels, |  | ||||||
| 	 * force the min RHS to be the syntactic RHS; otherwise we can end up |  | ||||||
| 	 * making serious errors, like putting the LHS on the wrong side of an |  | ||||||
| 	 * outer join.  It seems to be safe to not do this when we have a |  | ||||||
| 	 * contribution from inner_join_rels, though; that's enough to pin the SJ |  | ||||||
| 	 * to occur at a reasonable place in the tree. |  | ||||||
| 	 */ |  | ||||||
| 	if (bms_is_empty(min_righthand)) |  | ||||||
| 		min_righthand = bms_copy(right_rels); |  | ||||||
|  |  | ||||||
| 	/* | 	/* | ||||||
| 	 * Now check previous outer joins for ordering restrictions. | 	 * Now check previous outer joins for ordering restrictions. | ||||||
| 	 */ | 	 */ | ||||||
| @@ -1181,9 +1170,15 @@ make_outerjoininfo(PlannerInfo *root, | |||||||
| 		 * For a lower OJ in our RHS, if our join condition does not use the | 		 * For a lower OJ in our RHS, if our join condition does not use the | ||||||
| 		 * lower join's RHS and the lower OJ's join condition is strict, we | 		 * lower join's RHS and the lower OJ's join condition is strict, we | ||||||
| 		 * can interchange the ordering of the two OJs; otherwise we must add | 		 * can interchange the ordering of the two OJs; otherwise we must add | ||||||
| 		 * the lower OJ's full syntactic relset to min_righthand.  Also, we | 		 * the lower OJ's full syntactic relset to min_righthand. | ||||||
| 		 * must preserve ordering anyway if either the current join or the | 		 * | ||||||
| 		 * lower OJ is either a semijoin or an antijoin. | 		 * Also, if our join condition does not use the lower join's LHS | ||||||
|  | 		 * either, force the ordering to be preserved.  Otherwise we can end | ||||||
|  | 		 * up with SpecialJoinInfos with identical min_righthands, which can | ||||||
|  | 		 * confuse join_is_legal (see discussion in backend/optimizer/README). | ||||||
|  | 		 * | ||||||
|  | 		 * Also, we must preserve ordering anyway if either the current join | ||||||
|  | 		 * or the lower OJ is either a semijoin or an antijoin. | ||||||
| 		 * | 		 * | ||||||
| 		 * Here, we have to consider that "our join condition" includes any | 		 * Here, we have to consider that "our join condition" includes any | ||||||
| 		 * clauses that syntactically appeared above the lower OJ and below | 		 * clauses that syntactically appeared above the lower OJ and below | ||||||
| @@ -1199,6 +1194,7 @@ make_outerjoininfo(PlannerInfo *root, | |||||||
| 		if (bms_overlap(right_rels, otherinfo->syn_righthand)) | 		if (bms_overlap(right_rels, otherinfo->syn_righthand)) | ||||||
| 		{ | 		{ | ||||||
| 			if (bms_overlap(clause_relids, otherinfo->syn_righthand) || | 			if (bms_overlap(clause_relids, otherinfo->syn_righthand) || | ||||||
|  | 				!bms_overlap(clause_relids, otherinfo->min_lefthand) || | ||||||
| 				jointype == JOIN_SEMI || | 				jointype == JOIN_SEMI || | ||||||
| 				jointype == JOIN_ANTI || | 				jointype == JOIN_ANTI || | ||||||
| 				otherinfo->jointype == JOIN_SEMI || | 				otherinfo->jointype == JOIN_SEMI || | ||||||
| @@ -1238,10 +1234,12 @@ make_outerjoininfo(PlannerInfo *root, | |||||||
| 	 * If we found nothing to put in min_lefthand, punt and make it the full | 	 * If we found nothing to put in min_lefthand, punt and make it the full | ||||||
| 	 * LHS, to avoid having an empty min_lefthand which will confuse later | 	 * LHS, to avoid having an empty min_lefthand which will confuse later | ||||||
| 	 * processing. (We don't try to be smart about such cases, just correct.) | 	 * processing. (We don't try to be smart about such cases, just correct.) | ||||||
| 	 * We already forced min_righthand nonempty, so nothing to do for that. | 	 * Likewise for min_righthand. | ||||||
| 	 */ | 	 */ | ||||||
| 	if (bms_is_empty(min_lefthand)) | 	if (bms_is_empty(min_lefthand)) | ||||||
| 		min_lefthand = bms_copy(left_rels); | 		min_lefthand = bms_copy(left_rels); | ||||||
|  | 	if (bms_is_empty(min_righthand)) | ||||||
|  | 		min_righthand = bms_copy(right_rels); | ||||||
|  |  | ||||||
| 	/* Now they'd better be nonempty */ | 	/* Now they'd better be nonempty */ | ||||||
| 	Assert(!bms_is_empty(min_lefthand)); | 	Assert(!bms_is_empty(min_lefthand)); | ||||||
|   | |||||||
| @@ -3425,7 +3425,7 @@ select t1.* from | |||||||
|    Output: t1.f1 |    Output: t1.f1 | ||||||
|    Hash Cond: (i8.q2 = i4.f1) |    Hash Cond: (i8.q2 = i4.f1) | ||||||
|    ->  Nested Loop Left Join |    ->  Nested Loop Left Join | ||||||
|          Output: i8.q2, t1.f1 |          Output: t1.f1, i8.q2 | ||||||
|          Join Filter: (t1.f1 = '***'::text) |          Join Filter: (t1.f1 = '***'::text) | ||||||
|          ->  Seq Scan on public.text_tbl t1 |          ->  Seq Scan on public.text_tbl t1 | ||||||
|                Output: t1.f1 |                Output: t1.f1 | ||||||
| @@ -3473,6 +3473,76 @@ select t1.* from | |||||||
|  hi de ho neighbor |  hi de ho neighbor | ||||||
| (2 rows) | (2 rows) | ||||||
|  |  | ||||||
|  | explain (verbose, costs off) | ||||||
|  | select t1.* from | ||||||
|  |   text_tbl t1 | ||||||
|  |   left join (select *, '***'::text as d1 from int8_tbl i8b1) b1 | ||||||
|  |     left join int8_tbl i8 | ||||||
|  |       left join (select *, null::int as d2 from int8_tbl i8b2, int4_tbl i4b2 | ||||||
|  |                  where q1 = f1) b2 | ||||||
|  |       on (i8.q1 = b2.q1) | ||||||
|  |     on (b2.d2 = b1.q2) | ||||||
|  |   on (t1.f1 = b1.d1) | ||||||
|  |   left join int4_tbl i4 | ||||||
|  |   on (i8.q2 = i4.f1); | ||||||
|  |                                  QUERY PLAN                                  | ||||||
|  | ---------------------------------------------------------------------------- | ||||||
|  |  Hash Left Join | ||||||
|  |    Output: t1.f1 | ||||||
|  |    Hash Cond: (i8.q2 = i4.f1) | ||||||
|  |    ->  Nested Loop Left Join | ||||||
|  |          Output: t1.f1, i8.q2 | ||||||
|  |          Join Filter: (t1.f1 = '***'::text) | ||||||
|  |          ->  Seq Scan on public.text_tbl t1 | ||||||
|  |                Output: t1.f1 | ||||||
|  |          ->  Materialize | ||||||
|  |                Output: i8.q2 | ||||||
|  |                ->  Hash Right Join | ||||||
|  |                      Output: i8.q2 | ||||||
|  |                      Hash Cond: ((NULL::integer) = i8b1.q2) | ||||||
|  |                      ->  Hash Right Join | ||||||
|  |                            Output: i8.q2, (NULL::integer) | ||||||
|  |                            Hash Cond: (i8b2.q1 = i8.q1) | ||||||
|  |                            ->  Hash Join | ||||||
|  |                                  Output: i8b2.q1, NULL::integer | ||||||
|  |                                  Hash Cond: (i8b2.q1 = i4b2.f1) | ||||||
|  |                                  ->  Seq Scan on public.int8_tbl i8b2 | ||||||
|  |                                        Output: i8b2.q1, i8b2.q2 | ||||||
|  |                                  ->  Hash | ||||||
|  |                                        Output: i4b2.f1 | ||||||
|  |                                        ->  Seq Scan on public.int4_tbl i4b2 | ||||||
|  |                                              Output: i4b2.f1 | ||||||
|  |                            ->  Hash | ||||||
|  |                                  Output: i8.q1, i8.q2 | ||||||
|  |                                  ->  Seq Scan on public.int8_tbl i8 | ||||||
|  |                                        Output: i8.q1, i8.q2 | ||||||
|  |                      ->  Hash | ||||||
|  |                            Output: i8b1.q2 | ||||||
|  |                            ->  Seq Scan on public.int8_tbl i8b1 | ||||||
|  |                                  Output: i8b1.q2 | ||||||
|  |    ->  Hash | ||||||
|  |          Output: i4.f1 | ||||||
|  |          ->  Seq Scan on public.int4_tbl i4 | ||||||
|  |                Output: i4.f1 | ||||||
|  | (37 rows) | ||||||
|  |  | ||||||
|  | select t1.* from | ||||||
|  |   text_tbl t1 | ||||||
|  |   left join (select *, '***'::text as d1 from int8_tbl i8b1) b1 | ||||||
|  |     left join int8_tbl i8 | ||||||
|  |       left join (select *, null::int as d2 from int8_tbl i8b2, int4_tbl i4b2 | ||||||
|  |                  where q1 = f1) b2 | ||||||
|  |       on (i8.q1 = b2.q1) | ||||||
|  |     on (b2.d2 = b1.q2) | ||||||
|  |   on (t1.f1 = b1.d1) | ||||||
|  |   left join int4_tbl i4 | ||||||
|  |   on (i8.q2 = i4.f1); | ||||||
|  |         f1          | ||||||
|  | ------------------- | ||||||
|  |  doh! | ||||||
|  |  hi de ho neighbor | ||||||
|  | (2 rows) | ||||||
|  |  | ||||||
| -- | -- | ||||||
| -- test ability to push constants through outer join clauses | -- test ability to push constants through outer join clauses | ||||||
| -- | -- | ||||||
|   | |||||||
| @@ -1065,6 +1065,31 @@ select t1.* from | |||||||
|   left join int4_tbl i4 |   left join int4_tbl i4 | ||||||
|   on (i8.q2 = i4.f1); |   on (i8.q2 = i4.f1); | ||||||
|  |  | ||||||
|  | explain (verbose, costs off) | ||||||
|  | select t1.* from | ||||||
|  |   text_tbl t1 | ||||||
|  |   left join (select *, '***'::text as d1 from int8_tbl i8b1) b1 | ||||||
|  |     left join int8_tbl i8 | ||||||
|  |       left join (select *, null::int as d2 from int8_tbl i8b2, int4_tbl i4b2 | ||||||
|  |                  where q1 = f1) b2 | ||||||
|  |       on (i8.q1 = b2.q1) | ||||||
|  |     on (b2.d2 = b1.q2) | ||||||
|  |   on (t1.f1 = b1.d1) | ||||||
|  |   left join int4_tbl i4 | ||||||
|  |   on (i8.q2 = i4.f1); | ||||||
|  |  | ||||||
|  | select t1.* from | ||||||
|  |   text_tbl t1 | ||||||
|  |   left join (select *, '***'::text as d1 from int8_tbl i8b1) b1 | ||||||
|  |     left join int8_tbl i8 | ||||||
|  |       left join (select *, null::int as d2 from int8_tbl i8b2, int4_tbl i4b2 | ||||||
|  |                  where q1 = f1) b2 | ||||||
|  |       on (i8.q1 = b2.q1) | ||||||
|  |     on (b2.d2 = b1.q2) | ||||||
|  |   on (t1.f1 = b1.d1) | ||||||
|  |   left join int4_tbl i4 | ||||||
|  |   on (i8.q2 = i4.f1); | ||||||
|  |  | ||||||
| -- | -- | ||||||
| -- test ability to push constants through outer join clauses | -- test ability to push constants through outer join clauses | ||||||
| -- | -- | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user