mirror of
https://github.com/postgres/postgres.git
synced 2025-10-27 00:12:01 +03:00
Fix LOCK_TIMEOUT handling during parallel apply.
Previously, the parallel apply worker used SIGINT to receive a graceful shutdown signal from the leader apply worker. However, SIGINT is also used by the LOCK_TIMEOUT handler to trigger a query-cancel interrupt. This overlap caused the parallel apply worker to miss LOCK_TIMEOUT signals, leading to incorrect behavior during lock wait/contention. This patch resolves the conflict by switching the graceful shutdown signal from SIGINT to SIGUSR2. Reported-by: Zane Duffield <duffieldzane@gmail.com> Diagnosed-by: Zhijie Hou <houzj.fnst@fujitsu.com> Author: Hayato Kuroda <kuroda.hayato@fujitsu.com> Reviewed-by: Amit Kapila <amit.kapila16@gmail.com> Backpatch-through: 16, where it was introduced Discussion: https://postgr.es/m/CACMiCkXyC4au74kvE2g6Y=mCEF8X6r-Ne_ty4r7qWkUjRE4+oQ@mail.gmail.com
This commit is contained in:
@@ -94,9 +94,8 @@ SignalHandlerForCrashExit(SIGNAL_ARGS)
|
|||||||
* shut down and exit.
|
* shut down and exit.
|
||||||
*
|
*
|
||||||
* Typically, this handler would be used for SIGTERM, but some processes use
|
* Typically, this handler would be used for SIGTERM, but some processes use
|
||||||
* other signals. In particular, the checkpointer exits on SIGUSR2, and the WAL
|
* other signals. In particular, the checkpointer and parallel apply worker
|
||||||
* writer and the logical replication parallel apply worker exits on either
|
* exit on SIGUSR2, and the WAL writer exits on either SIGINT or SIGTERM.
|
||||||
* SIGINT or SIGTERM.
|
|
||||||
*
|
*
|
||||||
* ShutdownRequestPending should be checked at a convenient place within the
|
* ShutdownRequestPending should be checked at a convenient place within the
|
||||||
* main loop, or else the main loop should call ProcessMainLoopInterrupts.
|
* main loop, or else the main loop should call ProcessMainLoopInterrupts.
|
||||||
|
|||||||
@@ -869,10 +869,17 @@ ParallelApplyWorkerMain(Datum main_arg)
|
|||||||
|
|
||||||
InitializingApplyWorker = true;
|
InitializingApplyWorker = true;
|
||||||
|
|
||||||
/* Setup signal handling. */
|
/*
|
||||||
|
* Setup signal handling.
|
||||||
|
*
|
||||||
|
* Note: We intentionally used SIGUSR2 to trigger a graceful shutdown
|
||||||
|
* initiated by the leader apply worker. This helps to differentiate it
|
||||||
|
* from the case where we abort the current transaction and exit on
|
||||||
|
* receiving SIGTERM.
|
||||||
|
*/
|
||||||
pqsignal(SIGHUP, SignalHandlerForConfigReload);
|
pqsignal(SIGHUP, SignalHandlerForConfigReload);
|
||||||
pqsignal(SIGINT, SignalHandlerForShutdownRequest);
|
|
||||||
pqsignal(SIGTERM, die);
|
pqsignal(SIGTERM, die);
|
||||||
|
pqsignal(SIGUSR2, SignalHandlerForShutdownRequest);
|
||||||
BackgroundWorkerUnblockSignals();
|
BackgroundWorkerUnblockSignals();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -971,9 +978,9 @@ ParallelApplyWorkerMain(Datum main_arg)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* The parallel apply worker must not get here because the parallel apply
|
* The parallel apply worker must not get here because the parallel apply
|
||||||
* worker will only stop when it receives a SIGTERM or SIGINT from the
|
* worker will only stop when it receives a SIGTERM or SIGUSR2 from the
|
||||||
* leader, or when there is an error. None of these cases will allow the
|
* leader, or SIGINT from itself, or when there is an error. None of these
|
||||||
* code to reach here.
|
* cases will allow the code to reach here.
|
||||||
*/
|
*/
|
||||||
Assert(false);
|
Assert(false);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -636,7 +636,7 @@ logicalrep_worker_stop(Oid subid, Oid relid)
|
|||||||
/*
|
/*
|
||||||
* Stop the given logical replication parallel apply worker.
|
* Stop the given logical replication parallel apply worker.
|
||||||
*
|
*
|
||||||
* Node that the function sends SIGINT instead of SIGTERM to the parallel apply
|
* Node that the function sends SIGUSR2 instead of SIGTERM to the parallel apply
|
||||||
* worker so that the worker exits cleanly.
|
* worker so that the worker exits cleanly.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
@@ -674,7 +674,7 @@ logicalrep_pa_worker_stop(ParallelApplyWorkerInfo *winfo)
|
|||||||
* Only stop the worker if the generation matches and the worker is alive.
|
* Only stop the worker if the generation matches and the worker is alive.
|
||||||
*/
|
*/
|
||||||
if (worker->generation == generation && worker->proc)
|
if (worker->generation == generation && worker->proc)
|
||||||
logicalrep_worker_stop_internal(worker, SIGINT);
|
logicalrep_worker_stop_internal(worker, SIGUSR2);
|
||||||
|
|
||||||
LWLockRelease(LogicalRepWorkerLock);
|
LWLockRelease(LogicalRepWorkerLock);
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user