mirror of
				https://github.com/postgres/postgres.git
				synced 2025-10-31 10:30:33 +03:00 
			
		
		
		
	Fix LOCK_TIMEOUT handling during parallel apply.
Previously, the parallel apply worker used SIGINT to receive a graceful shutdown signal from the leader apply worker. However, SIGINT is also used by the LOCK_TIMEOUT handler to trigger a query-cancel interrupt. This overlap caused the parallel apply worker to miss LOCK_TIMEOUT signals, leading to incorrect behavior during lock wait/contention. This patch resolves the conflict by switching the graceful shutdown signal from SIGINT to SIGUSR2. Reported-by: Zane Duffield <duffieldzane@gmail.com> Diagnosed-by: Zhijie Hou <houzj.fnst@fujitsu.com> Author: Hayato Kuroda <kuroda.hayato@fujitsu.com> Reviewed-by: Amit Kapila <amit.kapila16@gmail.com> Backpatch-through: 16, where it was introduced Discussion: https://postgr.es/m/CACMiCkXyC4au74kvE2g6Y=mCEF8X6r-Ne_ty4r7qWkUjRE4+oQ@mail.gmail.com
This commit is contained in:
		| @@ -98,9 +98,8 @@ SignalHandlerForCrashExit(SIGNAL_ARGS) | ||||
|  * shut down and exit. | ||||
|  * | ||||
|  * Typically, this handler would be used for SIGTERM, but some processes use | ||||
|  * other signals. In particular, the checkpointer exits on SIGUSR2, and the WAL | ||||
|  * writer and the logical replication parallel apply worker exits on either | ||||
|  * SIGINT or SIGTERM. | ||||
|  * other signals. In particular, the checkpointer and parallel apply worker | ||||
|  * exit on SIGUSR2, and the WAL writer exits on either SIGINT or SIGTERM. | ||||
|  * | ||||
|  * ShutdownRequestPending should be checked at a convenient place within the | ||||
|  * main loop, or else the main loop should call HandleMainLoopInterrupts. | ||||
|   | ||||
| @@ -872,10 +872,17 @@ ParallelApplyWorkerMain(Datum main_arg) | ||||
|  | ||||
| 	InitializingApplyWorker = true; | ||||
|  | ||||
| 	/* Setup signal handling. */ | ||||
| 	/* | ||||
| 	 * Setup signal handling. | ||||
| 	 * | ||||
| 	 * Note: We intentionally used SIGUSR2 to trigger a graceful shutdown | ||||
| 	 * initiated by the leader apply worker. This helps to differentiate it | ||||
| 	 * from the case where we abort the current transaction and exit on | ||||
| 	 * receiving SIGTERM. | ||||
| 	 */ | ||||
| 	pqsignal(SIGHUP, SignalHandlerForConfigReload); | ||||
| 	pqsignal(SIGINT, SignalHandlerForShutdownRequest); | ||||
| 	pqsignal(SIGTERM, die); | ||||
| 	pqsignal(SIGUSR2, SignalHandlerForShutdownRequest); | ||||
| 	BackgroundWorkerUnblockSignals(); | ||||
|  | ||||
| 	/* | ||||
| @@ -974,9 +981,9 @@ ParallelApplyWorkerMain(Datum main_arg) | ||||
|  | ||||
| 	/* | ||||
| 	 * The parallel apply worker must not get here because the parallel apply | ||||
| 	 * worker will only stop when it receives a SIGTERM or SIGINT from the | ||||
| 	 * leader, or when there is an error. None of these cases will allow the | ||||
| 	 * code to reach here. | ||||
| 	 * worker will only stop when it receives a SIGTERM or SIGUSR2 from the | ||||
| 	 * leader, or SIGINT from itself, or when there is an error. None of these | ||||
| 	 * cases will allow the code to reach here. | ||||
| 	 */ | ||||
| 	Assert(false); | ||||
| } | ||||
|   | ||||
| @@ -624,7 +624,7 @@ logicalrep_worker_stop(Oid subid, Oid relid) | ||||
| /* | ||||
|  * Stop the given logical replication parallel apply worker. | ||||
|  * | ||||
|  * Node that the function sends SIGINT instead of SIGTERM to the parallel apply | ||||
|  * Node that the function sends SIGUSR2 instead of SIGTERM to the parallel apply | ||||
|  * worker so that the worker exits cleanly. | ||||
|  */ | ||||
| void | ||||
| @@ -662,7 +662,7 @@ logicalrep_pa_worker_stop(ParallelApplyWorkerInfo *winfo) | ||||
| 	 * Only stop the worker if the generation matches and the worker is alive. | ||||
| 	 */ | ||||
| 	if (worker->generation == generation && worker->proc) | ||||
| 		logicalrep_worker_stop_internal(worker, SIGINT); | ||||
| 		logicalrep_worker_stop_internal(worker, SIGUSR2); | ||||
|  | ||||
| 	LWLockRelease(LogicalRepWorkerLock); | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user