mirror of
				https://github.com/postgres/postgres.git
				synced 2025-10-25 13:17:41 +03:00 
			
		
		
		
	If we expect a hash join to be performed in multiple batches, suppress
"physical tlist" optimization on the outer relation (ie, force a projection step to occur in its scan). This avoids storing useless column values when the outer relation's tuples are written to temporary batch files. Modified version of a patch by Michael Henderson and Ramon Lawrence.
This commit is contained in:
		| @@ -8,7 +8,7 @@ | ||||
|  * | ||||
|  * | ||||
|  * IDENTIFICATION | ||||
|  *	  $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.355 2009/03/21 00:04:39 tgl Exp $ | ||||
|  *	  $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.356 2009/03/26 17:15:34 tgl Exp $ | ||||
|  * | ||||
|  * NOTES | ||||
|  *	  Every node type that can appear in stored rules' parsetrees *must* | ||||
| @@ -1448,6 +1448,7 @@ _outHashPath(StringInfo str, HashPath *node) | ||||
| 	_outJoinPathInfo(str, (JoinPath *) node); | ||||
|  | ||||
| 	WRITE_NODE_FIELD(path_hashclauses); | ||||
| 	WRITE_INT_FIELD(num_batches); | ||||
| } | ||||
|  | ||||
| static void | ||||
|   | ||||
| @@ -54,7 +54,7 @@ | ||||
|  * Portions Copyright (c) 1994, Regents of the University of California | ||||
|  * | ||||
|  * IDENTIFICATION | ||||
|  *	  $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.205 2009/03/21 00:04:39 tgl Exp $ | ||||
|  *	  $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.206 2009/03/26 17:15:35 tgl Exp $ | ||||
|  * | ||||
|  *------------------------------------------------------------------------- | ||||
|  */ | ||||
| @@ -1880,6 +1880,8 @@ cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo) | ||||
| 							&numbatches, | ||||
| 							&num_skew_mcvs); | ||||
| 	virtualbuckets = (double) numbuckets *(double) numbatches; | ||||
| 	/* mark the path with estimated # of batches */ | ||||
| 	path->num_batches = numbatches; | ||||
|  | ||||
| 	/* | ||||
| 	 * Determine bucketsize fraction for inner relation.  We use the smallest | ||||
|   | ||||
| @@ -10,7 +10,7 @@ | ||||
|  * | ||||
|  * | ||||
|  * IDENTIFICATION | ||||
|  *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.256 2009/03/21 00:04:39 tgl Exp $ | ||||
|  *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.257 2009/03/26 17:15:35 tgl Exp $ | ||||
|  * | ||||
|  *------------------------------------------------------------------------- | ||||
|  */ | ||||
| @@ -1910,6 +1910,10 @@ create_hashjoin_plan(PlannerInfo *root, | ||||
| 	/* We don't want any excess columns in the hashed tuples */ | ||||
| 	disuse_physical_tlist(inner_plan, best_path->jpath.innerjoinpath); | ||||
|  | ||||
| 	/* If we expect batching, suppress excess columns in outer tuples too */ | ||||
| 	if (best_path->num_batches > 1) | ||||
| 		disuse_physical_tlist(outer_plan, best_path->jpath.outerjoinpath); | ||||
|  | ||||
| 	/* | ||||
| 	 * If there is a single join clause and we can identify the outer | ||||
| 	 * variable as a simple column reference, supply its identity for | ||||
|   | ||||
| @@ -8,7 +8,7 @@ | ||||
|  * | ||||
|  * | ||||
|  * IDENTIFICATION | ||||
|  *	  $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.150 2009/02/27 00:06:27 tgl Exp $ | ||||
|  *	  $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.151 2009/03/26 17:15:35 tgl Exp $ | ||||
|  * | ||||
|  *------------------------------------------------------------------------- | ||||
|  */ | ||||
| @@ -1480,9 +1480,20 @@ create_hashjoin_path(PlannerInfo *root, | ||||
| 	pathnode->jpath.outerjoinpath = outer_path; | ||||
| 	pathnode->jpath.innerjoinpath = inner_path; | ||||
| 	pathnode->jpath.joinrestrictinfo = restrict_clauses; | ||||
| 	/* A hashjoin never has pathkeys, since its ordering is unpredictable */ | ||||
| 	/* | ||||
| 	 * A hashjoin never has pathkeys, since its output ordering is | ||||
| 	 * unpredictable due to possible batching.  XXX If the inner relation is | ||||
| 	 * small enough, we could instruct the executor that it must not batch, | ||||
| 	 * and then we could assume that the output inherits the outer relation's | ||||
| 	 * ordering, which might save a sort step.  However there is considerable | ||||
| 	 * downside if our estimate of the inner relation size is badly off. | ||||
| 	 * For the moment we don't risk it.  (Note also that if we wanted to take | ||||
| 	 * this seriously, joinpath.c would have to consider many more paths for | ||||
| 	 * the outer rel than it does now.) | ||||
| 	 */ | ||||
| 	pathnode->jpath.path.pathkeys = NIL; | ||||
| 	pathnode->path_hashclauses = hashclauses; | ||||
| 	/* cost_hashjoin will fill in pathnode->num_batches */ | ||||
|  | ||||
| 	cost_hashjoin(pathnode, root, sjinfo); | ||||
|  | ||||
|   | ||||
| @@ -7,7 +7,7 @@ | ||||
|  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group | ||||
|  * Portions Copyright (c) 1994, Regents of the University of California | ||||
|  * | ||||
|  * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.170 2009/03/05 23:06:45 tgl Exp $ | ||||
|  * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.171 2009/03/26 17:15:35 tgl Exp $ | ||||
|  * | ||||
|  *------------------------------------------------------------------------- | ||||
|  */ | ||||
| @@ -845,6 +845,7 @@ typedef struct HashPath | ||||
| { | ||||
| 	JoinPath	jpath; | ||||
| 	List	   *path_hashclauses;		/* join clauses used for hashing */ | ||||
| 	int			num_batches;			/* number of batches expected */ | ||||
| } HashPath; | ||||
|  | ||||
| /* | ||||
|   | ||||
		Reference in New Issue
	
	Block a user