1
0
mirror of https://github.com/postgres/postgres.git synced 2025-10-27 00:12:01 +03:00

Fix LOCK_TIMEOUT handling during parallel apply.

Previously, the parallel apply worker used SIGINT to receive a graceful
shutdown signal from the leader apply worker. However, SIGINT is also used
by the LOCK_TIMEOUT handler to trigger a query-cancel interrupt. This
overlap caused the parallel apply worker to miss LOCK_TIMEOUT signals,
leading to incorrect behavior during lock wait/contention.

This patch resolves the conflict by switching the graceful shutdown signal
from SIGINT to SIGUSR2.

Reported-by: Zane Duffield <duffieldzane@gmail.com>
Diagnosed-by: Zhijie Hou <houzj.fnst@fujitsu.com>
Author: Hayato Kuroda <kuroda.hayato@fujitsu.com>
Reviewed-by: Amit Kapila <amit.kapila16@gmail.com>
Backpatch-through: 16, where it was introduced
Discussion: https://postgr.es/m/CACMiCkXyC4au74kvE2g6Y=mCEF8X6r-Ne_ty4r7qWkUjRE4+oQ@mail.gmail.com
This commit is contained in:
Amit Kapila
2025-09-24 04:00:15 +00:00
parent 178bbf403c
commit 37fc5de438
3 changed files with 16 additions and 10 deletions

View File

@@ -94,9 +94,8 @@ SignalHandlerForCrashExit(SIGNAL_ARGS)
* shut down and exit. * shut down and exit.
* *
* Typically, this handler would be used for SIGTERM, but some processes use * Typically, this handler would be used for SIGTERM, but some processes use
* other signals. In particular, the checkpointer exits on SIGUSR2, and the WAL * other signals. In particular, the checkpointer and parallel apply worker
* writer and the logical replication parallel apply worker exits on either * exit on SIGUSR2, and the WAL writer exits on either SIGINT or SIGTERM.
* SIGINT or SIGTERM.
* *
* ShutdownRequestPending should be checked at a convenient place within the * ShutdownRequestPending should be checked at a convenient place within the
* main loop, or else the main loop should call ProcessMainLoopInterrupts. * main loop, or else the main loop should call ProcessMainLoopInterrupts.

View File

@@ -869,10 +869,17 @@ ParallelApplyWorkerMain(Datum main_arg)
InitializingApplyWorker = true; InitializingApplyWorker = true;
/* Setup signal handling. */ /*
* Setup signal handling.
*
* Note: We intentionally used SIGUSR2 to trigger a graceful shutdown
* initiated by the leader apply worker. This helps to differentiate it
* from the case where we abort the current transaction and exit on
* receiving SIGTERM.
*/
pqsignal(SIGHUP, SignalHandlerForConfigReload); pqsignal(SIGHUP, SignalHandlerForConfigReload);
pqsignal(SIGINT, SignalHandlerForShutdownRequest);
pqsignal(SIGTERM, die); pqsignal(SIGTERM, die);
pqsignal(SIGUSR2, SignalHandlerForShutdownRequest);
BackgroundWorkerUnblockSignals(); BackgroundWorkerUnblockSignals();
/* /*
@@ -971,9 +978,9 @@ ParallelApplyWorkerMain(Datum main_arg)
/* /*
* The parallel apply worker must not get here because the parallel apply * The parallel apply worker must not get here because the parallel apply
* worker will only stop when it receives a SIGTERM or SIGINT from the * worker will only stop when it receives a SIGTERM or SIGUSR2 from the
* leader, or when there is an error. None of these cases will allow the * leader, or SIGINT from itself, or when there is an error. None of these
* code to reach here. * cases will allow the code to reach here.
*/ */
Assert(false); Assert(false);
} }

View File

@@ -636,7 +636,7 @@ logicalrep_worker_stop(Oid subid, Oid relid)
/* /*
* Stop the given logical replication parallel apply worker. * Stop the given logical replication parallel apply worker.
* *
* Node that the function sends SIGINT instead of SIGTERM to the parallel apply * Node that the function sends SIGUSR2 instead of SIGTERM to the parallel apply
* worker so that the worker exits cleanly. * worker so that the worker exits cleanly.
*/ */
void void
@@ -674,7 +674,7 @@ logicalrep_pa_worker_stop(ParallelApplyWorkerInfo *winfo)
* Only stop the worker if the generation matches and the worker is alive. * Only stop the worker if the generation matches and the worker is alive.
*/ */
if (worker->generation == generation && worker->proc) if (worker->generation == generation && worker->proc)
logicalrep_worker_stop_internal(worker, SIGINT); logicalrep_worker_stop_internal(worker, SIGUSR2);
LWLockRelease(LogicalRepWorkerLock); LWLockRelease(LogicalRepWorkerLock);
} }