mirror of
https://github.com/postgres/postgres.git
synced 2025-04-20 00:42:27 +03:00
A new function EmitProcSignalBarrier() can be used to emit a global barrier which all backends that participate in the ProcSignal mechanism must absorb, and a new function WaitForProcSignalBarrier() can be used to wait until all relevant backends have in fact absorbed the barrier. This can be used to coordinate global state changes, such as turning checksums on while the system is running. There's no real client of this mechanism yet, although two are proposed, but an enum has to have at least one element, so this includes a placeholder type (PROCSIGNAL_BARRIER_PLACEHOLDER) which should be replaced by the first real client of this mechanism to get committed. Andres Freund and Robert Haas, reviewed by Daniel Gustafsson and, in earlier versions, by Magnus Hagander. Discussion: http://postgr.es/m/CA+TgmoZwDk=BguVDVa+qdA6SBKef=PKbaKDQALTC_9qoz1mJqg@mail.gmail.com
579 lines
17 KiB
C
579 lines
17 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* procsignal.c
|
|
* Routines for interprocess signalling
|
|
*
|
|
*
|
|
* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
* IDENTIFICATION
|
|
* src/backend/storage/ipc/procsignal.c
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
#include "postgres.h"
|
|
|
|
#include <signal.h>
|
|
#include <unistd.h>
|
|
|
|
#include "access/parallel.h"
|
|
#include "commands/async.h"
|
|
#include "miscadmin.h"
|
|
#include "pgstat.h"
|
|
#include "replication/walsender.h"
|
|
#include "storage/ipc.h"
|
|
#include "storage/latch.h"
|
|
#include "storage/proc.h"
|
|
#include "storage/shmem.h"
|
|
#include "storage/sinval.h"
|
|
#include "tcop/tcopprot.h"
|
|
|
|
/*
|
|
* The SIGUSR1 signal is multiplexed to support signalling multiple event
|
|
* types. The specific reason is communicated via flags in shared memory.
|
|
* We keep a boolean flag for each possible "reason", so that different
|
|
* reasons can be signaled to a process concurrently. (However, if the same
|
|
* reason is signaled more than once nearly simultaneously, the process may
|
|
* observe it only once.)
|
|
*
|
|
* Each process that wants to receive signals registers its process ID
|
|
* in the ProcSignalSlots array. The array is indexed by backend ID to make
|
|
* slot allocation simple, and to avoid having to search the array when you
|
|
* know the backend ID of the process you're signalling. (We do support
|
|
* signalling without backend ID, but it's a bit less efficient.)
|
|
*
|
|
* The flags are actually declared as "volatile sig_atomic_t" for maximum
|
|
* portability. This should ensure that loads and stores of the flag
|
|
* values are atomic, allowing us to dispense with any explicit locking.
|
|
*
|
|
* pss_signalFlags are intended to be set in cases where we don't need to
|
|
* keep track of whether or not the target process has handled the signal,
|
|
* but sometimes we need confirmation, as when making a global state change
|
|
* that cannot be considered complete until all backends have taken notice
|
|
* of it. For such use cases, we set a bit in pss_barrierCheckMask and then
|
|
* increment the current "barrier generation"; when the new barrier generation
|
|
* (or greater) appears in the pss_barrierGeneration flag of every process,
|
|
* we know that the message has been received everywhere.
|
|
*/
|
|
typedef struct
|
|
{
|
|
pid_t pss_pid;
|
|
sig_atomic_t pss_signalFlags[NUM_PROCSIGNALS];
|
|
pg_atomic_uint64 pss_barrierGeneration;
|
|
pg_atomic_uint32 pss_barrierCheckMask;
|
|
} ProcSignalSlot;
|
|
|
|
/*
|
|
* Information that is global to the entire ProcSignal system can be stored
|
|
* here.
|
|
*
|
|
* psh_barrierGeneration is the highest barrier generation in existence.
|
|
*/
|
|
typedef struct
|
|
{
|
|
pg_atomic_uint64 psh_barrierGeneration;
|
|
ProcSignalSlot psh_slot[FLEXIBLE_ARRAY_MEMBER];
|
|
} ProcSignalHeader;
|
|
|
|
/*
|
|
* We reserve a slot for each possible BackendId, plus one for each
|
|
* possible auxiliary process type. (This scheme assumes there is not
|
|
* more than one of any auxiliary process type at a time.)
|
|
*/
|
|
#define NumProcSignalSlots (MaxBackends + NUM_AUXPROCTYPES)
|
|
|
|
/* Check whether the relevant type bit is set in the flags. */
|
|
#define BARRIER_SHOULD_CHECK(flags, type) \
|
|
(((flags) & (((uint32) 1) << (uint32) (type))) != 0)
|
|
|
|
static ProcSignalHeader *ProcSignal = NULL;
|
|
static volatile ProcSignalSlot *MyProcSignalSlot = NULL;
|
|
|
|
static bool CheckProcSignal(ProcSignalReason reason);
|
|
static void CleanupProcSignalState(int status, Datum arg);
|
|
static void ProcessBarrierPlaceholder(void);
|
|
|
|
/*
|
|
* ProcSignalShmemSize
|
|
* Compute space needed for procsignal's shared memory
|
|
*/
|
|
Size
|
|
ProcSignalShmemSize(void)
|
|
{
|
|
Size size;
|
|
|
|
size = mul_size(NumProcSignalSlots, sizeof(ProcSignalSlot));
|
|
size = add_size(size, offsetof(ProcSignalHeader, psh_slot));
|
|
return size;
|
|
}
|
|
|
|
/*
|
|
* ProcSignalShmemInit
|
|
* Allocate and initialize procsignal's shared memory
|
|
*/
|
|
void
|
|
ProcSignalShmemInit(void)
|
|
{
|
|
Size size = ProcSignalShmemSize();
|
|
bool found;
|
|
|
|
ProcSignal = (ProcSignalHeader *)
|
|
ShmemInitStruct("ProcSignal", size, &found);
|
|
|
|
/* If we're first, initialize. */
|
|
if (!found)
|
|
{
|
|
int i;
|
|
|
|
pg_atomic_init_u64(&ProcSignal->psh_barrierGeneration, 0);
|
|
|
|
for (i = 0; i < NumProcSignalSlots; ++i)
|
|
{
|
|
ProcSignalSlot *slot = &ProcSignal->psh_slot[i];
|
|
|
|
slot->pss_pid = 0;
|
|
MemSet(slot->pss_signalFlags, 0, sizeof(slot->pss_signalFlags));
|
|
pg_atomic_init_u64(&slot->pss_barrierGeneration, PG_UINT64_MAX);
|
|
pg_atomic_init_u32(&slot->pss_barrierCheckMask, 0);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* ProcSignalInit
|
|
* Register the current process in the procsignal array
|
|
*
|
|
* The passed index should be my BackendId if the process has one,
|
|
* or MaxBackends + aux process type if not.
|
|
*/
|
|
void
|
|
ProcSignalInit(int pss_idx)
|
|
{
|
|
volatile ProcSignalSlot *slot;
|
|
uint64 barrier_generation;
|
|
|
|
Assert(pss_idx >= 1 && pss_idx <= NumProcSignalSlots);
|
|
|
|
slot = &ProcSignal->psh_slot[pss_idx - 1];
|
|
|
|
/* sanity check */
|
|
if (slot->pss_pid != 0)
|
|
elog(LOG, "process %d taking over ProcSignal slot %d, but it's not empty",
|
|
MyProcPid, pss_idx);
|
|
|
|
/* Clear out any leftover signal reasons */
|
|
MemSet(slot->pss_signalFlags, 0, NUM_PROCSIGNALS * sizeof(sig_atomic_t));
|
|
|
|
/*
|
|
* Initialize barrier state. Since we're a brand-new process, there
|
|
* shouldn't be any leftover backend-private state that needs to be
|
|
* updated. Therefore, we can broadcast the latest barrier generation
|
|
* and disregard any previously-set check bits.
|
|
*
|
|
* NB: This only works if this initialization happens early enough in the
|
|
* startup sequence that we haven't yet cached any state that might need
|
|
* to be invalidated. That's also why we have a memory barrier here, to
|
|
* be sure that any later reads of memory happen strictly after this.
|
|
*/
|
|
pg_atomic_write_u32(&slot->pss_barrierCheckMask, 0);
|
|
barrier_generation =
|
|
pg_atomic_read_u64(&ProcSignal->psh_barrierGeneration);
|
|
pg_atomic_write_u64(&slot->pss_barrierGeneration, barrier_generation);
|
|
pg_memory_barrier();
|
|
|
|
/* Mark slot with my PID */
|
|
slot->pss_pid = MyProcPid;
|
|
|
|
/* Remember slot location for CheckProcSignal */
|
|
MyProcSignalSlot = slot;
|
|
|
|
/* Set up to release the slot on process exit */
|
|
on_shmem_exit(CleanupProcSignalState, Int32GetDatum(pss_idx));
|
|
}
|
|
|
|
/*
|
|
* CleanupProcSignalState
|
|
* Remove current process from ProcSignal mechanism
|
|
*
|
|
* This function is called via on_shmem_exit() during backend shutdown.
|
|
*/
|
|
static void
|
|
CleanupProcSignalState(int status, Datum arg)
|
|
{
|
|
int pss_idx = DatumGetInt32(arg);
|
|
volatile ProcSignalSlot *slot;
|
|
|
|
slot = &ProcSignal->psh_slot[pss_idx - 1];
|
|
Assert(slot == MyProcSignalSlot);
|
|
|
|
/*
|
|
* Clear MyProcSignalSlot, so that a SIGUSR1 received after this point
|
|
* won't try to access it after it's no longer ours (and perhaps even
|
|
* after we've unmapped the shared memory segment).
|
|
*/
|
|
MyProcSignalSlot = NULL;
|
|
|
|
/* sanity check */
|
|
if (slot->pss_pid != MyProcPid)
|
|
{
|
|
/*
|
|
* don't ERROR here. We're exiting anyway, and don't want to get into
|
|
* infinite loop trying to exit
|
|
*/
|
|
elog(LOG, "process %d releasing ProcSignal slot %d, but it contains %d",
|
|
MyProcPid, pss_idx, (int) slot->pss_pid);
|
|
return; /* XXX better to zero the slot anyway? */
|
|
}
|
|
|
|
/*
|
|
* Make this slot look like it's absorbed all possible barriers, so that
|
|
* no barrier waits block on it.
|
|
*/
|
|
pg_atomic_write_u64(&slot->pss_barrierGeneration, PG_UINT64_MAX);
|
|
|
|
slot->pss_pid = 0;
|
|
}
|
|
|
|
/*
|
|
* SendProcSignal
|
|
* Send a signal to a Postgres process
|
|
*
|
|
* Providing backendId is optional, but it will speed up the operation.
|
|
*
|
|
* On success (a signal was sent), zero is returned.
|
|
* On error, -1 is returned, and errno is set (typically to ESRCH or EPERM).
|
|
*
|
|
* Not to be confused with ProcSendSignal
|
|
*/
|
|
int
|
|
SendProcSignal(pid_t pid, ProcSignalReason reason, BackendId backendId)
|
|
{
|
|
volatile ProcSignalSlot *slot;
|
|
|
|
if (backendId != InvalidBackendId)
|
|
{
|
|
slot = &ProcSignal->psh_slot[backendId - 1];
|
|
|
|
/*
|
|
* Note: Since there's no locking, it's possible that the target
|
|
* process detaches from shared memory and exits right after this
|
|
* test, before we set the flag and send signal. And the signal slot
|
|
* might even be recycled by a new process, so it's remotely possible
|
|
* that we set a flag for a wrong process. That's OK, all the signals
|
|
* are such that no harm is done if they're mistakenly fired.
|
|
*/
|
|
if (slot->pss_pid == pid)
|
|
{
|
|
/* Atomically set the proper flag */
|
|
slot->pss_signalFlags[reason] = true;
|
|
/* Send signal */
|
|
return kill(pid, SIGUSR1);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
* BackendId not provided, so search the array using pid. We search
|
|
* the array back to front so as to reduce search overhead. Passing
|
|
* InvalidBackendId means that the target is most likely an auxiliary
|
|
* process, which will have a slot near the end of the array.
|
|
*/
|
|
int i;
|
|
|
|
for (i = NumProcSignalSlots - 1; i >= 0; i--)
|
|
{
|
|
slot = &ProcSignal->psh_slot[i];
|
|
|
|
if (slot->pss_pid == pid)
|
|
{
|
|
/* the above note about race conditions applies here too */
|
|
|
|
/* Atomically set the proper flag */
|
|
slot->pss_signalFlags[reason] = true;
|
|
/* Send signal */
|
|
return kill(pid, SIGUSR1);
|
|
}
|
|
}
|
|
}
|
|
|
|
errno = ESRCH;
|
|
return -1;
|
|
}
|
|
|
|
/*
|
|
* EmitProcSignalBarrier
|
|
* Send a signal to every Postgres process
|
|
*
|
|
* The return value of this function is the barrier "generation" created
|
|
* by this operation. This value can be passed to WaitForProcSignalBarrier
|
|
* to wait until it is known that every participant in the ProcSignal
|
|
* mechanism has absorbed the signal (or started afterwards).
|
|
*
|
|
* Note that it would be a bad idea to use this for anything that happens
|
|
* frequently, as interrupting every backend could cause a noticeable
|
|
* performance hit.
|
|
*
|
|
* Callers are entitled to assume that this function will not throw ERROR
|
|
* or FATAL.
|
|
*/
|
|
uint64
|
|
EmitProcSignalBarrier(ProcSignalBarrierType type)
|
|
{
|
|
uint64 flagbit = UINT64CONST(1) << (uint64) type;
|
|
uint64 generation;
|
|
|
|
/*
|
|
* Set all the flags.
|
|
*
|
|
* Note that pg_atomic_fetch_or_u32 has full barrier semantics, so this
|
|
* is totally ordered with respect to anything the caller did before, and
|
|
* anything that we do afterwards. (This is also true of the later call
|
|
* to pg_atomic_add_fetch_u64.)
|
|
*/
|
|
for (int i = 0; i < NumProcSignalSlots; i++)
|
|
{
|
|
volatile ProcSignalSlot *slot = &ProcSignal->psh_slot[i];
|
|
|
|
pg_atomic_fetch_or_u32(&slot->pss_barrierCheckMask, flagbit);
|
|
}
|
|
|
|
/*
|
|
* Increment the generation counter.
|
|
*/
|
|
generation =
|
|
pg_atomic_add_fetch_u64(&ProcSignal->psh_barrierGeneration, 1);
|
|
|
|
/*
|
|
* Signal all the processes, so that they update their advertised barrier
|
|
* generation.
|
|
*
|
|
* Concurrency is not a problem here. Backends that have exited don't
|
|
* matter, and new backends that have joined since we entered this function
|
|
* must already have current state, since the caller is responsible for
|
|
* making sure that the relevant state is entirely visible before calling
|
|
* this function in the first place. We still have to wake them up -
|
|
* because we can't distinguish between such backends and older backends
|
|
* that need to update state - but they won't actually need to change
|
|
* any state.
|
|
*/
|
|
for (int i = NumProcSignalSlots - 1; i >= 0; i--)
|
|
{
|
|
volatile ProcSignalSlot *slot = &ProcSignal->psh_slot[i];
|
|
pid_t pid = slot->pss_pid;
|
|
|
|
if (pid != 0)
|
|
kill(pid, SIGUSR1);
|
|
}
|
|
|
|
return generation;
|
|
}
|
|
|
|
/*
|
|
* WaitForProcSignalBarrier - wait until it is guaranteed that all changes
|
|
* requested by a specific call to EmitProcSignalBarrier() have taken effect.
|
|
*
|
|
* We expect that the barrier will normally be absorbed very quickly by other
|
|
* backends, so we start by waiting just 1/8 of a second and then back off
|
|
* by a factor of two every time we time out, to a maximum wait time of
|
|
* 1 second.
|
|
*/
|
|
void
|
|
WaitForProcSignalBarrier(uint64 generation)
|
|
{
|
|
long timeout = 125L;
|
|
|
|
for (int i = NumProcSignalSlots - 1; i >= 0; i--)
|
|
{
|
|
volatile ProcSignalSlot *slot = &ProcSignal->psh_slot[i];
|
|
uint64 oldval;
|
|
|
|
oldval = pg_atomic_read_u64(&slot->pss_barrierGeneration);
|
|
while (oldval < generation)
|
|
{
|
|
int events;
|
|
|
|
CHECK_FOR_INTERRUPTS();
|
|
|
|
events =
|
|
WaitLatch(MyLatch,
|
|
WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
|
|
timeout, WAIT_EVENT_PROC_SIGNAL_BARRIER);
|
|
ResetLatch(MyLatch);
|
|
|
|
oldval = pg_atomic_read_u64(&slot->pss_barrierGeneration);
|
|
if (events & WL_TIMEOUT)
|
|
timeout = Min(timeout * 2, 1000L);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* The caller is probably calling this function because it wants to
|
|
* read the shared state or perform further writes to shared state once
|
|
* all backends are known to have absorbed the barrier. However, the
|
|
* read of pss_barrierGeneration was performed unlocked; insert a memory
|
|
* barrier to separate it from whatever follows.
|
|
*/
|
|
pg_memory_barrier();
|
|
}
|
|
|
|
/*
|
|
* Perform global barrier related interrupt checking.
|
|
*
|
|
* Any backend that participates in ProcSignal signalling must arrange to
|
|
* call this function periodically. It is called from CHECK_FOR_INTERRUPTS(),
|
|
* which is enough for normal backends, but not necessarily for all types of
|
|
* background processes.
|
|
*/
|
|
void
|
|
ProcessProcSignalBarrier(void)
|
|
{
|
|
uint64 generation;
|
|
uint32 flags;
|
|
|
|
/* Exit quickly if there's no work to do. */
|
|
if (!ProcSignalBarrierPending)
|
|
return;
|
|
ProcSignalBarrierPending = false;
|
|
|
|
/*
|
|
* Read the current barrier generation, and then get the flags that
|
|
* are set for this backend. Note that pg_atomic_exchange_u32 is a full
|
|
* barrier, so we're guaranteed that the read of the barrier generation
|
|
* happens before we atomically extract the flags, and that any subsequent
|
|
* state changes happen afterward.
|
|
*/
|
|
generation = pg_atomic_read_u64(&ProcSignal->psh_barrierGeneration);
|
|
flags = pg_atomic_exchange_u32(&MyProcSignalSlot->pss_barrierCheckMask, 0);
|
|
|
|
/*
|
|
* Process each type of barrier. It's important that nothing we call from
|
|
* here throws an error, because pss_barrierCheckMask has already been
|
|
* cleared. If we jumped out of here before processing all barrier types,
|
|
* then we'd forget about the need to do so later.
|
|
*
|
|
* NB: It ought to be OK to call the barrier-processing functions
|
|
* unconditionally, but it's more efficient to call only the ones that
|
|
* might need us to do something based on the flags.
|
|
*/
|
|
if (BARRIER_SHOULD_CHECK(flags, PROCSIGNAL_BARRIER_PLACEHOLDER))
|
|
ProcessBarrierPlaceholder();
|
|
|
|
/*
|
|
* State changes related to all types of barriers that might have been
|
|
* emitted have now been handled, so we can update our notion of the
|
|
* generation to the one we observed before beginning the updates. If
|
|
* things have changed further, it'll get fixed up when this function is
|
|
* next called.
|
|
*/
|
|
pg_atomic_write_u64(&MyProcSignalSlot->pss_barrierGeneration, generation);
|
|
}
|
|
|
|
static void
|
|
ProcessBarrierPlaceholder(void)
|
|
{
|
|
/*
|
|
* XXX. This is just a placeholder until the first real user of this
|
|
* machinery gets committed. Rename PROCSIGNAL_BARRIER_PLACEHOLDER to
|
|
* PROCSIGNAL_BARRIER_SOMETHING_ELSE where SOMETHING_ELSE is something
|
|
* appropriately descriptive. Get rid of this function and instead have
|
|
* ProcessBarrierSomethingElse. Most likely, that function should live
|
|
* in the file pertaining to that subsystem, rather than here.
|
|
*/
|
|
}
|
|
|
|
/*
|
|
* CheckProcSignal - check to see if a particular reason has been
|
|
* signaled, and clear the signal flag. Should be called after receiving
|
|
* SIGUSR1.
|
|
*/
|
|
static bool
|
|
CheckProcSignal(ProcSignalReason reason)
|
|
{
|
|
volatile ProcSignalSlot *slot = MyProcSignalSlot;
|
|
|
|
if (slot != NULL)
|
|
{
|
|
/* Careful here --- don't clear flag if we haven't seen it set */
|
|
if (slot->pss_signalFlags[reason])
|
|
{
|
|
slot->pss_signalFlags[reason] = false;
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* CheckProcSignalBarrier - check for new barriers we need to absorb
|
|
*/
|
|
static bool
|
|
CheckProcSignalBarrier(void)
|
|
{
|
|
volatile ProcSignalSlot *slot = MyProcSignalSlot;
|
|
|
|
if (slot != NULL)
|
|
{
|
|
uint64 mygen;
|
|
uint64 curgen;
|
|
|
|
mygen = pg_atomic_read_u64(&slot->pss_barrierGeneration);
|
|
curgen = pg_atomic_read_u64(&ProcSignal->psh_barrierGeneration);
|
|
return (mygen != curgen);
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* procsignal_sigusr1_handler - handle SIGUSR1 signal.
|
|
*/
|
|
void
|
|
procsignal_sigusr1_handler(SIGNAL_ARGS)
|
|
{
|
|
int save_errno = errno;
|
|
|
|
if (CheckProcSignal(PROCSIG_CATCHUP_INTERRUPT))
|
|
HandleCatchupInterrupt();
|
|
|
|
if (CheckProcSignal(PROCSIG_NOTIFY_INTERRUPT))
|
|
HandleNotifyInterrupt();
|
|
|
|
if (CheckProcSignal(PROCSIG_PARALLEL_MESSAGE))
|
|
HandleParallelMessageInterrupt();
|
|
|
|
if (CheckProcSignal(PROCSIG_WALSND_INIT_STOPPING))
|
|
HandleWalSndInitStopping();
|
|
|
|
if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_DATABASE))
|
|
RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_DATABASE);
|
|
|
|
if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_TABLESPACE))
|
|
RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_TABLESPACE);
|
|
|
|
if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_LOCK))
|
|
RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_LOCK);
|
|
|
|
if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_SNAPSHOT))
|
|
RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_SNAPSHOT);
|
|
|
|
if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK))
|
|
RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK);
|
|
|
|
if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN))
|
|
RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN);
|
|
|
|
if (CheckProcSignalBarrier())
|
|
{
|
|
InterruptPending = true;
|
|
ProcSignalBarrierPending = true;
|
|
}
|
|
|
|
SetLatch(MyLatch);
|
|
|
|
latch_sigusr1_handler();
|
|
|
|
errno = save_errno;
|
|
}
|