mirror of
				https://github.com/postgres/postgres.git
				synced 2025-10-25 13:17:41 +03:00 
			
		
		
		
	Revert "Fix race in Parallel Hash Join batch cleanup."
This reverts commit378802e371. This reverts commit3b8981b6e1. Discussion: https://postgr.es/m/CA%2BhUKGJmcqAE3MZeDCLLXa62cWM0AJbKmp2JrJYaJ86bz36LFA%40mail.gmail.com
This commit is contained in:
		| @@ -246,10 +246,10 @@ MultiExecParallelHash(HashState *node) | ||||
| 	 */ | ||||
| 	pstate = hashtable->parallel_state; | ||||
| 	build_barrier = &pstate->build_barrier; | ||||
| 	Assert(BarrierPhase(build_barrier) >= PHJ_BUILD_ALLOCATE); | ||||
| 	Assert(BarrierPhase(build_barrier) >= PHJ_BUILD_ALLOCATING); | ||||
| 	switch (BarrierPhase(build_barrier)) | ||||
| 	{ | ||||
| 		case PHJ_BUILD_ALLOCATE: | ||||
| 		case PHJ_BUILD_ALLOCATING: | ||||
|  | ||||
| 			/* | ||||
| 			 * Either I just allocated the initial hash table in | ||||
| @@ -259,7 +259,7 @@ MultiExecParallelHash(HashState *node) | ||||
| 			BarrierArriveAndWait(build_barrier, WAIT_EVENT_HASH_BUILD_ALLOCATE); | ||||
| 			/* Fall through. */ | ||||
|  | ||||
| 		case PHJ_BUILD_HASH_INNER: | ||||
| 		case PHJ_BUILD_HASHING_INNER: | ||||
|  | ||||
| 			/* | ||||
| 			 * It's time to begin hashing, or if we just arrived here then | ||||
| @@ -271,10 +271,10 @@ MultiExecParallelHash(HashState *node) | ||||
| 			 * below. | ||||
| 			 */ | ||||
| 			if (PHJ_GROW_BATCHES_PHASE(BarrierAttach(&pstate->grow_batches_barrier)) != | ||||
| 				PHJ_GROW_BATCHES_ELECT) | ||||
| 				PHJ_GROW_BATCHES_ELECTING) | ||||
| 				ExecParallelHashIncreaseNumBatches(hashtable); | ||||
| 			if (PHJ_GROW_BUCKETS_PHASE(BarrierAttach(&pstate->grow_buckets_barrier)) != | ||||
| 				PHJ_GROW_BUCKETS_ELECT) | ||||
| 				PHJ_GROW_BUCKETS_ELECTING) | ||||
| 				ExecParallelHashIncreaseNumBuckets(hashtable); | ||||
| 			ExecParallelHashEnsureBatchAccessors(hashtable); | ||||
| 			ExecParallelHashTableSetCurrentBatch(hashtable, 0); | ||||
| @@ -333,22 +333,15 @@ MultiExecParallelHash(HashState *node) | ||||
| 	hashtable->nbuckets = pstate->nbuckets; | ||||
| 	hashtable->log2_nbuckets = my_log2(hashtable->nbuckets); | ||||
| 	hashtable->totalTuples = pstate->total_tuples; | ||||
|  | ||||
| 	/* | ||||
| 	 * Unless we're completely done and the batch state has been freed, make | ||||
| 	 * sure we have accessors. | ||||
| 	 */ | ||||
| 	if (BarrierPhase(build_barrier) < PHJ_BUILD_FREE) | ||||
| 		ExecParallelHashEnsureBatchAccessors(hashtable); | ||||
| 	ExecParallelHashEnsureBatchAccessors(hashtable); | ||||
|  | ||||
| 	/* | ||||
| 	 * The next synchronization point is in ExecHashJoin's HJ_BUILD_HASHTABLE | ||||
| 	 * case, which will bring the build phase to PHJ_BUILD_RUN (if it isn't | ||||
| 	 * case, which will bring the build phase to PHJ_BUILD_DONE (if it isn't | ||||
| 	 * there already). | ||||
| 	 */ | ||||
| 	Assert(BarrierPhase(build_barrier) == PHJ_BUILD_HASH_OUTER || | ||||
| 		   BarrierPhase(build_barrier) == PHJ_BUILD_RUN || | ||||
| 		   BarrierPhase(build_barrier) == PHJ_BUILD_FREE); | ||||
| 	Assert(BarrierPhase(build_barrier) == PHJ_BUILD_HASHING_OUTER || | ||||
| 		   BarrierPhase(build_barrier) == PHJ_BUILD_DONE); | ||||
| } | ||||
|  | ||||
| /* ---------------------------------------------------------------- | ||||
| @@ -596,8 +589,8 @@ ExecHashTableCreate(HashState *state, List *hashOperators, List *hashCollations, | ||||
| 		 * Attach to the build barrier.  The corresponding detach operation is | ||||
| 		 * in ExecHashTableDetach.  Note that we won't attach to the | ||||
| 		 * batch_barrier for batch 0 yet.  We'll attach later and start it out | ||||
| 		 * in PHJ_BATCH_PROBE phase, because batch 0 is allocated up front and | ||||
| 		 * then loaded while hashing (the standard hybrid hash join | ||||
| 		 * in PHJ_BATCH_PROBING phase, because batch 0 is allocated up front | ||||
| 		 * and then loaded while hashing (the standard hybrid hash join | ||||
| 		 * algorithm), and we'll coordinate that using build_barrier. | ||||
| 		 */ | ||||
| 		build_barrier = &pstate->build_barrier; | ||||
| @@ -610,7 +603,7 @@ ExecHashTableCreate(HashState *state, List *hashOperators, List *hashCollations, | ||||
| 		 * SharedHashJoinBatch objects and the hash table for batch 0.  One | ||||
| 		 * backend will be elected to do that now if necessary. | ||||
| 		 */ | ||||
| 		if (BarrierPhase(build_barrier) == PHJ_BUILD_ELECT && | ||||
| 		if (BarrierPhase(build_barrier) == PHJ_BUILD_ELECTING && | ||||
| 			BarrierArriveAndWait(build_barrier, WAIT_EVENT_HASH_BUILD_ELECT)) | ||||
| 		{ | ||||
| 			pstate->nbatch = nbatch; | ||||
| @@ -631,7 +624,7 @@ ExecHashTableCreate(HashState *state, List *hashOperators, List *hashCollations, | ||||
| 		/* | ||||
| 		 * The next Parallel Hash synchronization point is in | ||||
| 		 * MultiExecParallelHash(), which will progress it all the way to | ||||
| 		 * PHJ_BUILD_RUN.  The caller must not return control from this | ||||
| 		 * PHJ_BUILD_DONE.  The caller must not return control from this | ||||
| 		 * executor node between now and then. | ||||
| 		 */ | ||||
| 	} | ||||
| @@ -1067,7 +1060,7 @@ ExecParallelHashIncreaseNumBatches(HashJoinTable hashtable) | ||||
| 	ParallelHashJoinState *pstate = hashtable->parallel_state; | ||||
| 	int			i; | ||||
|  | ||||
| 	Assert(BarrierPhase(&pstate->build_barrier) == PHJ_BUILD_HASH_INNER); | ||||
| 	Assert(BarrierPhase(&pstate->build_barrier) == PHJ_BUILD_HASHING_INNER); | ||||
|  | ||||
| 	/* | ||||
| 	 * It's unlikely, but we need to be prepared for new participants to show | ||||
| @@ -1076,7 +1069,7 @@ ExecParallelHashIncreaseNumBatches(HashJoinTable hashtable) | ||||
| 	 */ | ||||
| 	switch (PHJ_GROW_BATCHES_PHASE(BarrierPhase(&pstate->grow_batches_barrier))) | ||||
| 	{ | ||||
| 		case PHJ_GROW_BATCHES_ELECT: | ||||
| 		case PHJ_GROW_BATCHES_ELECTING: | ||||
|  | ||||
| 			/* | ||||
| 			 * Elect one participant to prepare to grow the number of batches. | ||||
| @@ -1194,13 +1187,13 @@ ExecParallelHashIncreaseNumBatches(HashJoinTable hashtable) | ||||
| 			} | ||||
| 			/* Fall through. */ | ||||
|  | ||||
| 		case PHJ_GROW_BATCHES_REALLOCATE: | ||||
| 		case PHJ_GROW_BATCHES_ALLOCATING: | ||||
| 			/* Wait for the above to be finished. */ | ||||
| 			BarrierArriveAndWait(&pstate->grow_batches_barrier, | ||||
| 								 WAIT_EVENT_HASH_GROW_BATCHES_REALLOCATE); | ||||
| 								 WAIT_EVENT_HASH_GROW_BATCHES_ALLOCATE); | ||||
| 			/* Fall through. */ | ||||
|  | ||||
| 		case PHJ_GROW_BATCHES_REPARTITION: | ||||
| 		case PHJ_GROW_BATCHES_REPARTITIONING: | ||||
| 			/* Make sure that we have the current dimensions and buckets. */ | ||||
| 			ExecParallelHashEnsureBatchAccessors(hashtable); | ||||
| 			ExecParallelHashTableSetCurrentBatch(hashtable, 0); | ||||
| @@ -1213,7 +1206,7 @@ ExecParallelHashIncreaseNumBatches(HashJoinTable hashtable) | ||||
| 								 WAIT_EVENT_HASH_GROW_BATCHES_REPARTITION); | ||||
| 			/* Fall through. */ | ||||
|  | ||||
| 		case PHJ_GROW_BATCHES_DECIDE: | ||||
| 		case PHJ_GROW_BATCHES_DECIDING: | ||||
|  | ||||
| 			/* | ||||
| 			 * Elect one participant to clean up and decide whether further | ||||
| @@ -1268,7 +1261,7 @@ ExecParallelHashIncreaseNumBatches(HashJoinTable hashtable) | ||||
| 			} | ||||
| 			/* Fall through. */ | ||||
|  | ||||
| 		case PHJ_GROW_BATCHES_FINISH: | ||||
| 		case PHJ_GROW_BATCHES_FINISHING: | ||||
| 			/* Wait for the above to complete. */ | ||||
| 			BarrierArriveAndWait(&pstate->grow_batches_barrier, | ||||
| 								 WAIT_EVENT_HASH_GROW_BATCHES_FINISH); | ||||
| @@ -1508,7 +1501,7 @@ ExecParallelHashIncreaseNumBuckets(HashJoinTable hashtable) | ||||
| 	HashMemoryChunk chunk; | ||||
| 	dsa_pointer chunk_s; | ||||
|  | ||||
| 	Assert(BarrierPhase(&pstate->build_barrier) == PHJ_BUILD_HASH_INNER); | ||||
| 	Assert(BarrierPhase(&pstate->build_barrier) == PHJ_BUILD_HASHING_INNER); | ||||
|  | ||||
| 	/* | ||||
| 	 * It's unlikely, but we need to be prepared for new participants to show | ||||
| @@ -1517,7 +1510,7 @@ ExecParallelHashIncreaseNumBuckets(HashJoinTable hashtable) | ||||
| 	 */ | ||||
| 	switch (PHJ_GROW_BUCKETS_PHASE(BarrierPhase(&pstate->grow_buckets_barrier))) | ||||
| 	{ | ||||
| 		case PHJ_GROW_BUCKETS_ELECT: | ||||
| 		case PHJ_GROW_BUCKETS_ELECTING: | ||||
| 			/* Elect one participant to prepare to increase nbuckets. */ | ||||
| 			if (BarrierArriveAndWait(&pstate->grow_buckets_barrier, | ||||
| 									 WAIT_EVENT_HASH_GROW_BUCKETS_ELECT)) | ||||
| @@ -1546,13 +1539,13 @@ ExecParallelHashIncreaseNumBuckets(HashJoinTable hashtable) | ||||
| 			} | ||||
| 			/* Fall through. */ | ||||
|  | ||||
| 		case PHJ_GROW_BUCKETS_REALLOCATE: | ||||
| 		case PHJ_GROW_BUCKETS_ALLOCATING: | ||||
| 			/* Wait for the above to complete. */ | ||||
| 			BarrierArriveAndWait(&pstate->grow_buckets_barrier, | ||||
| 								 WAIT_EVENT_HASH_GROW_BUCKETS_REALLOCATE); | ||||
| 								 WAIT_EVENT_HASH_GROW_BUCKETS_ALLOCATE); | ||||
| 			/* Fall through. */ | ||||
|  | ||||
| 		case PHJ_GROW_BUCKETS_REINSERT: | ||||
| 		case PHJ_GROW_BUCKETS_REINSERTING: | ||||
| 			/* Reinsert all tuples into the hash table. */ | ||||
| 			ExecParallelHashEnsureBatchAccessors(hashtable); | ||||
| 			ExecParallelHashTableSetCurrentBatch(hashtable, 0); | ||||
| @@ -1708,7 +1701,7 @@ retry: | ||||
|  | ||||
| 		/* Try to load it into memory. */ | ||||
| 		Assert(BarrierPhase(&hashtable->parallel_state->build_barrier) == | ||||
| 			   PHJ_BUILD_HASH_INNER); | ||||
| 			   PHJ_BUILD_HASHING_INNER); | ||||
| 		hashTuple = ExecParallelHashTupleAlloc(hashtable, | ||||
| 											   HJTUPLE_OVERHEAD + tuple->t_len, | ||||
| 											   &shared); | ||||
| @@ -2862,7 +2855,7 @@ ExecParallelHashTupleAlloc(HashJoinTable hashtable, size_t size, | ||||
| 	if (pstate->growth != PHJ_GROWTH_DISABLED) | ||||
| 	{ | ||||
| 		Assert(curbatch == 0); | ||||
| 		Assert(BarrierPhase(&pstate->build_barrier) == PHJ_BUILD_HASH_INNER); | ||||
| 		Assert(BarrierPhase(&pstate->build_barrier) == PHJ_BUILD_HASHING_INNER); | ||||
|  | ||||
| 		/* | ||||
| 		 * Check if our space limit would be exceeded.  To avoid choking on | ||||
| @@ -2982,7 +2975,7 @@ ExecParallelHashJoinSetUpBatches(HashJoinTable hashtable, int nbatch) | ||||
| 		{ | ||||
| 			/* Batch 0 doesn't need to be loaded. */ | ||||
| 			BarrierAttach(&shared->batch_barrier); | ||||
| 			while (BarrierPhase(&shared->batch_barrier) < PHJ_BATCH_PROBE) | ||||
| 			while (BarrierPhase(&shared->batch_barrier) < PHJ_BATCH_PROBING) | ||||
| 				BarrierArriveAndWait(&shared->batch_barrier, 0); | ||||
| 			BarrierDetach(&shared->batch_barrier); | ||||
| 		} | ||||
| @@ -3055,11 +3048,14 @@ ExecParallelHashEnsureBatchAccessors(HashJoinTable hashtable) | ||||
| 	} | ||||
|  | ||||
| 	/* | ||||
| 	 * We should never see a state where the batch-tracking array is freed, | ||||
| 	 * because we should have given up sooner if we join when the build | ||||
| 	 * barrier has reached the PHJ_BUILD_FREE phase. | ||||
| 	 * It's possible for a backend to start up very late so that the whole | ||||
| 	 * join is finished and the shm state for tracking batches has already | ||||
| 	 * been freed by ExecHashTableDetach().  In that case we'll just leave | ||||
| 	 * hashtable->batches as NULL so that ExecParallelHashJoinNewBatch() gives | ||||
| 	 * up early. | ||||
| 	 */ | ||||
| 	Assert(DsaPointerIsValid(pstate->batches)); | ||||
| 	if (!DsaPointerIsValid(pstate->batches)) | ||||
| 		return; | ||||
|  | ||||
| 	/* Use hash join memory context. */ | ||||
| 	oldcxt = MemoryContextSwitchTo(hashtable->hashCxt); | ||||
| @@ -3140,7 +3136,7 @@ ExecHashTableDetachBatch(HashJoinTable hashtable) | ||||
| 			 * longer attached, but since there is no way it's moving after | ||||
| 			 * this point it seems safe to make the following assertion. | ||||
| 			 */ | ||||
| 			Assert(BarrierPhase(&batch->batch_barrier) == PHJ_BATCH_FREE); | ||||
| 			Assert(BarrierPhase(&batch->batch_barrier) == PHJ_BATCH_DONE); | ||||
|  | ||||
| 			/* Free shared chunks and buckets. */ | ||||
| 			while (DsaPointerIsValid(batch->chunks)) | ||||
| @@ -3179,17 +3175,9 @@ ExecHashTableDetachBatch(HashJoinTable hashtable) | ||||
| void | ||||
| ExecHashTableDetach(HashJoinTable hashtable) | ||||
| { | ||||
| 	ParallelHashJoinState *pstate = hashtable->parallel_state; | ||||
|  | ||||
| 	/* | ||||
| 	 * If we're involved in a parallel query, we must either have got all the | ||||
| 	 * way to PHJ_BUILD_RUN, or joined too late and be in PHJ_BUILD_FREE. | ||||
| 	 */ | ||||
| 	Assert(!pstate || | ||||
| 		   BarrierPhase(&pstate->build_barrier) >= PHJ_BUILD_RUN); | ||||
|  | ||||
| 	if (pstate && BarrierPhase(&pstate->build_barrier) == PHJ_BUILD_RUN) | ||||
| 	if (hashtable->parallel_state) | ||||
| 	{ | ||||
| 		ParallelHashJoinState *pstate = hashtable->parallel_state; | ||||
| 		int			i; | ||||
|  | ||||
| 		/* Make sure any temporary files are closed. */ | ||||
| @@ -3205,22 +3193,17 @@ ExecHashTableDetach(HashJoinTable hashtable) | ||||
| 		} | ||||
|  | ||||
| 		/* If we're last to detach, clean up shared memory. */ | ||||
| 		if (BarrierArriveAndDetach(&pstate->build_barrier)) | ||||
| 		if (BarrierDetach(&pstate->build_barrier)) | ||||
| 		{ | ||||
| 			/* | ||||
| 			 * Late joining processes will see this state and give up | ||||
| 			 * immediately. | ||||
| 			 */ | ||||
| 			Assert(BarrierPhase(&pstate->build_barrier) == PHJ_BUILD_FREE); | ||||
|  | ||||
| 			if (DsaPointerIsValid(pstate->batches)) | ||||
| 			{ | ||||
| 				dsa_free(hashtable->area, pstate->batches); | ||||
| 				pstate->batches = InvalidDsaPointer; | ||||
| 			} | ||||
| 		} | ||||
|  | ||||
| 		hashtable->parallel_state = NULL; | ||||
| 	} | ||||
| 	hashtable->parallel_state = NULL; | ||||
| } | ||||
|  | ||||
| /* | ||||
|   | ||||
		Reference in New Issue
	
	Block a user