mirror of
https://github.com/postgres/postgres.git
synced 2025-04-24 10:47:04 +03:00
Harden pmsignal.c against clobbered shared memory.
The postmaster is not supposed to do anything that depends fundamentally on shared memory contents, because that creates the risk that a backend crash that trashes shared memory will take the postmaster down with it, preventing automatic recovery. In commit 969d7cd43 I lost sight of this principle and coded AssignPostmasterChildSlot() in such a way that it could fail or even crash if the shared PMSignalState structure became corrupted. Remarkably, we've not seen field reports of such crashes; but I managed to induce one while testing the recent changes around palloc chunk headers. To fix, make a semi-duplicative state array inside the postmaster so that we need consult only local state while choosing a "child slot" for a new backend. Ensure that other postmaster-executed routines in pmsignal.c don't have critical dependencies on the shared state, either. Corruption of PMSignalState might now lead ReleasePostmasterChildSlot() to conclude that backend X failed, when actually backend Y was the one that trashed things. But that doesn't matter, because we'll force a cluster-wide reset regardless. Back-patch to all supported branches, since this is an old bug. Discussion: https://postgr.es/m/3436789.1665187055@sss.pgh.pa.us
This commit is contained in:
parent
abc510fa2a
commit
8f98352b5e
@ -26,6 +26,7 @@
|
||||
#include "replication/walsender.h"
|
||||
#include "storage/pmsignal.h"
|
||||
#include "storage/shmem.h"
|
||||
#include "utils/memutils.h"
|
||||
|
||||
|
||||
/*
|
||||
@ -69,12 +70,21 @@ struct PMSignalData
|
||||
sig_atomic_t PMSignalFlags[NUM_PMSIGNALS];
|
||||
/* per-child-process flags */
|
||||
int num_child_flags; /* # of entries in PMChildFlags[] */
|
||||
int next_child_flag; /* next slot to try to assign */
|
||||
sig_atomic_t PMChildFlags[FLEXIBLE_ARRAY_MEMBER];
|
||||
};
|
||||
|
||||
/* PMSignalState pointer is valid in both postmaster and child processes */
|
||||
NON_EXEC_STATIC volatile PMSignalData *PMSignalState = NULL;
|
||||
|
||||
/*
|
||||
* These static variables are valid only in the postmaster. We keep a
|
||||
* duplicative private array so that we can trust its state even if some
|
||||
* failing child has clobbered the PMSignalData struct in shared memory.
|
||||
*/
|
||||
static int num_child_inuse; /* # of entries in PMChildInUse[] */
|
||||
static int next_child_inuse; /* next slot to try to assign */
|
||||
static bool *PMChildInUse; /* true if i'th flag slot is assigned */
|
||||
|
||||
/*
|
||||
* Signal handler to be notified if postmaster dies.
|
||||
*/
|
||||
@ -135,7 +145,25 @@ PMSignalShmemInit(void)
|
||||
if (!found)
|
||||
{
|
||||
MemSet(unvolatize(PMSignalData *, PMSignalState), 0, PMSignalShmemSize());
|
||||
PMSignalState->num_child_flags = MaxLivePostmasterChildren();
|
||||
num_child_inuse = MaxLivePostmasterChildren();
|
||||
PMSignalState->num_child_flags = num_child_inuse;
|
||||
|
||||
/*
|
||||
* Also allocate postmaster's private PMChildInUse[] array. We
|
||||
* might've already done that in a previous shared-memory creation
|
||||
* cycle, in which case free the old array to avoid a leak. (Do it
|
||||
* like this to support the possibility that MaxLivePostmasterChildren
|
||||
* changed.) In a standalone backend, we do not need this.
|
||||
*/
|
||||
if (PostmasterContext != NULL)
|
||||
{
|
||||
if (PMChildInUse)
|
||||
pfree(PMChildInUse);
|
||||
PMChildInUse = (bool *)
|
||||
MemoryContextAllocZero(PostmasterContext,
|
||||
num_child_inuse * sizeof(bool));
|
||||
}
|
||||
next_child_inuse = 0;
|
||||
}
|
||||
}
|
||||
|
||||
@ -183,21 +211,24 @@ CheckPostmasterSignal(PMSignalReason reason)
|
||||
int
|
||||
AssignPostmasterChildSlot(void)
|
||||
{
|
||||
int slot = PMSignalState->next_child_flag;
|
||||
int slot = next_child_inuse;
|
||||
int n;
|
||||
|
||||
/*
|
||||
* Scan for a free slot. We track the last slot assigned so as not to
|
||||
* waste time repeatedly rescanning low-numbered slots.
|
||||
* Scan for a free slot. Notice that we trust nothing about the contents
|
||||
* of PMSignalState, but use only postmaster-local data for this decision.
|
||||
* We track the last slot assigned so as not to waste time repeatedly
|
||||
* rescanning low-numbered slots.
|
||||
*/
|
||||
for (n = PMSignalState->num_child_flags; n > 0; n--)
|
||||
for (n = num_child_inuse; n > 0; n--)
|
||||
{
|
||||
if (--slot < 0)
|
||||
slot = PMSignalState->num_child_flags - 1;
|
||||
if (PMSignalState->PMChildFlags[slot] == PM_CHILD_UNUSED)
|
||||
slot = num_child_inuse - 1;
|
||||
if (!PMChildInUse[slot])
|
||||
{
|
||||
PMChildInUse[slot] = true;
|
||||
PMSignalState->PMChildFlags[slot] = PM_CHILD_ASSIGNED;
|
||||
PMSignalState->next_child_flag = slot;
|
||||
next_child_inuse = slot;
|
||||
return slot + 1;
|
||||
}
|
||||
}
|
||||
@ -219,7 +250,7 @@ ReleasePostmasterChildSlot(int slot)
|
||||
{
|
||||
bool result;
|
||||
|
||||
Assert(slot > 0 && slot <= PMSignalState->num_child_flags);
|
||||
Assert(slot > 0 && slot <= num_child_inuse);
|
||||
slot--;
|
||||
|
||||
/*
|
||||
@ -229,17 +260,18 @@ ReleasePostmasterChildSlot(int slot)
|
||||
*/
|
||||
result = (PMSignalState->PMChildFlags[slot] == PM_CHILD_ASSIGNED);
|
||||
PMSignalState->PMChildFlags[slot] = PM_CHILD_UNUSED;
|
||||
PMChildInUse[slot] = false;
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* IsPostmasterChildWalSender - check if given slot is in use by a
|
||||
* walsender process.
|
||||
* walsender process. This is called only by the postmaster.
|
||||
*/
|
||||
bool
|
||||
IsPostmasterChildWalSender(int slot)
|
||||
{
|
||||
Assert(slot > 0 && slot <= PMSignalState->num_child_flags);
|
||||
Assert(slot > 0 && slot <= num_child_inuse);
|
||||
slot--;
|
||||
|
||||
if (PMSignalState->PMChildFlags[slot] == PM_CHILD_WALSENDER)
|
||||
|
Loading…
x
Reference in New Issue
Block a user