1
0
mirror of https://github.com/postgres/postgres.git synced 2025-06-04 12:42:24 +03:00
postgres/src/backend/storage/ipc/procsignal.c
Andres Freund c6c3334364 Prevent possibility of panics during shutdown checkpoint.
When the checkpointer writes the shutdown checkpoint, it checks
afterwards whether any WAL has been written since it started and
throws a PANIC if so.  At that point, only walsenders are still
active, so one might think this could not happen, but walsenders can
also generate WAL, for instance in BASE_BACKUP and logical decoding
related commands (e.g. via hint bits).  So they can trigger this panic
if such a command is run while the shutdown checkpoint is being
written.

To fix this, divide the walsender shutdown into two phases.  First,
checkpointer, itself triggered by postmaster, sends a
PROCSIG_WALSND_INIT_STOPPING signal to all walsenders.  If the backend
is idle or runs an SQL query this causes the backend to shutdown, if
logical replication is in progress all existing WAL records are
processed followed by a shutdown.  Otherwise this causes the walsender
to switch to the "stopping" state. In this state, the walsender will
reject any further replication commands. The checkpointer begins the
shutdown checkpoint once all walsenders are confirmed as
stopping. When the shutdown checkpoint finishes, the postmaster sends
us SIGUSR2. This instructs walsender to send any outstanding WAL,
including the shutdown checkpoint record, wait for it to be replicated
to the standby, and then exit.

Author: Andres Freund, based on an earlier patch by Michael Paquier
Reported-By: Fujii Masao, Andres Freund
Reviewed-By: Michael Paquier
Discussion: https://postgr.es/m/20170602002912.tqlwn4gymzlxpvs2@alap3.anarazel.de
Backpatch: 9.4, where logical decoding was introduced
2017-06-05 19:18:15 -07:00

301 lines
8.1 KiB
C

/*-------------------------------------------------------------------------
*
* procsignal.c
* Routines for interprocess signalling
*
*
* Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* src/backend/storage/ipc/procsignal.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <signal.h>
#include <unistd.h>
#include "access/parallel.h"
#include "commands/async.h"
#include "miscadmin.h"
#include "replication/walsender.h"
#include "storage/latch.h"
#include "storage/ipc.h"
#include "storage/proc.h"
#include "storage/shmem.h"
#include "storage/sinval.h"
#include "tcop/tcopprot.h"
/*
* The SIGUSR1 signal is multiplexed to support signalling multiple event
* types. The specific reason is communicated via flags in shared memory.
* We keep a boolean flag for each possible "reason", so that different
* reasons can be signaled to a process concurrently. (However, if the same
* reason is signaled more than once nearly simultaneously, the process may
* observe it only once.)
*
* Each process that wants to receive signals registers its process ID
* in the ProcSignalSlots array. The array is indexed by backend ID to make
* slot allocation simple, and to avoid having to search the array when you
* know the backend ID of the process you're signalling. (We do support
* signalling without backend ID, but it's a bit less efficient.)
*
* The flags are actually declared as "volatile sig_atomic_t" for maximum
* portability. This should ensure that loads and stores of the flag
* values are atomic, allowing us to dispense with any explicit locking.
*/
typedef struct
{
pid_t pss_pid;
sig_atomic_t pss_signalFlags[NUM_PROCSIGNALS];
} ProcSignalSlot;
/*
* We reserve a slot for each possible BackendId, plus one for each
* possible auxiliary process type. (This scheme assumes there is not
* more than one of any auxiliary process type at a time.)
*/
#define NumProcSignalSlots (MaxBackends + NUM_AUXPROCTYPES)
static ProcSignalSlot *ProcSignalSlots = NULL;
static volatile ProcSignalSlot *MyProcSignalSlot = NULL;
static bool CheckProcSignal(ProcSignalReason reason);
static void CleanupProcSignalState(int status, Datum arg);
/*
* ProcSignalShmemSize
* Compute space needed for procsignal's shared memory
*/
Size
ProcSignalShmemSize(void)
{
return NumProcSignalSlots * sizeof(ProcSignalSlot);
}
/*
* ProcSignalShmemInit
* Allocate and initialize procsignal's shared memory
*/
void
ProcSignalShmemInit(void)
{
Size size = ProcSignalShmemSize();
bool found;
ProcSignalSlots = (ProcSignalSlot *)
ShmemInitStruct("ProcSignalSlots", size, &found);
/* If we're first, set everything to zeroes */
if (!found)
MemSet(ProcSignalSlots, 0, size);
}
/*
* ProcSignalInit
* Register the current process in the procsignal array
*
* The passed index should be my BackendId if the process has one,
* or MaxBackends + aux process type if not.
*/
void
ProcSignalInit(int pss_idx)
{
volatile ProcSignalSlot *slot;
Assert(pss_idx >= 1 && pss_idx <= NumProcSignalSlots);
slot = &ProcSignalSlots[pss_idx - 1];
/* sanity check */
if (slot->pss_pid != 0)
elog(LOG, "process %d taking over ProcSignal slot %d, but it's not empty",
MyProcPid, pss_idx);
/* Clear out any leftover signal reasons */
MemSet(slot->pss_signalFlags, 0, NUM_PROCSIGNALS * sizeof(sig_atomic_t));
/* Mark slot with my PID */
slot->pss_pid = MyProcPid;
/* Remember slot location for CheckProcSignal */
MyProcSignalSlot = slot;
/* Set up to release the slot on process exit */
on_shmem_exit(CleanupProcSignalState, Int32GetDatum(pss_idx));
}
/*
* CleanupProcSignalState
* Remove current process from ProcSignalSlots
*
* This function is called via on_shmem_exit() during backend shutdown.
*/
static void
CleanupProcSignalState(int status, Datum arg)
{
int pss_idx = DatumGetInt32(arg);
volatile ProcSignalSlot *slot;
slot = &ProcSignalSlots[pss_idx - 1];
Assert(slot == MyProcSignalSlot);
/*
* Clear MyProcSignalSlot, so that a SIGUSR1 received after this point
* won't try to access it after it's no longer ours (and perhaps even
* after we've unmapped the shared memory segment).
*/
MyProcSignalSlot = NULL;
/* sanity check */
if (slot->pss_pid != MyProcPid)
{
/*
* don't ERROR here. We're exiting anyway, and don't want to get into
* infinite loop trying to exit
*/
elog(LOG, "process %d releasing ProcSignal slot %d, but it contains %d",
MyProcPid, pss_idx, (int) slot->pss_pid);
return; /* XXX better to zero the slot anyway? */
}
slot->pss_pid = 0;
}
/*
* SendProcSignal
* Send a signal to a Postgres process
*
* Providing backendId is optional, but it will speed up the operation.
*
* On success (a signal was sent), zero is returned.
* On error, -1 is returned, and errno is set (typically to ESRCH or EPERM).
*
* Not to be confused with ProcSendSignal
*/
int
SendProcSignal(pid_t pid, ProcSignalReason reason, BackendId backendId)
{
volatile ProcSignalSlot *slot;
if (backendId != InvalidBackendId)
{
slot = &ProcSignalSlots[backendId - 1];
/*
* Note: Since there's no locking, it's possible that the target
* process detaches from shared memory and exits right after this
* test, before we set the flag and send signal. And the signal slot
* might even be recycled by a new process, so it's remotely possible
* that we set a flag for a wrong process. That's OK, all the signals
* are such that no harm is done if they're mistakenly fired.
*/
if (slot->pss_pid == pid)
{
/* Atomically set the proper flag */
slot->pss_signalFlags[reason] = true;
/* Send signal */
return kill(pid, SIGUSR1);
}
}
else
{
/*
* BackendId not provided, so search the array using pid. We search
* the array back to front so as to reduce search overhead. Passing
* InvalidBackendId means that the target is most likely an auxiliary
* process, which will have a slot near the end of the array.
*/
int i;
for (i = NumProcSignalSlots - 1; i >= 0; i--)
{
slot = &ProcSignalSlots[i];
if (slot->pss_pid == pid)
{
/* the above note about race conditions applies here too */
/* Atomically set the proper flag */
slot->pss_signalFlags[reason] = true;
/* Send signal */
return kill(pid, SIGUSR1);
}
}
}
errno = ESRCH;
return -1;
}
/*
* CheckProcSignal - check to see if a particular reason has been
* signaled, and clear the signal flag. Should be called after receiving
* SIGUSR1.
*/
static bool
CheckProcSignal(ProcSignalReason reason)
{
volatile ProcSignalSlot *slot = MyProcSignalSlot;
if (slot != NULL)
{
/* Careful here --- don't clear flag if we haven't seen it set */
if (slot->pss_signalFlags[reason])
{
slot->pss_signalFlags[reason] = false;
return true;
}
}
return false;
}
/*
* procsignal_sigusr1_handler - handle SIGUSR1 signal.
*/
void
procsignal_sigusr1_handler(SIGNAL_ARGS)
{
int save_errno = errno;
if (CheckProcSignal(PROCSIG_CATCHUP_INTERRUPT))
HandleCatchupInterrupt();
if (CheckProcSignal(PROCSIG_NOTIFY_INTERRUPT))
HandleNotifyInterrupt();
if (CheckProcSignal(PROCSIG_PARALLEL_MESSAGE))
HandleParallelMessageInterrupt();
if (CheckProcSignal(PROCSIG_WALSND_INIT_STOPPING))
HandleWalSndInitStopping();
if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_DATABASE))
RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_DATABASE);
if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_TABLESPACE))
RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_TABLESPACE);
if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_LOCK))
RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_LOCK);
if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_SNAPSHOT))
RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_SNAPSHOT);
if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK))
RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK);
if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN))
RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN);
SetLatch(MyLatch);
latch_sigusr1_handler();
errno = save_errno;
}