1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-30 11:03:19 +03:00

Allow discovery of whether a dynamic background worker is running.

Using the infrastructure provided by this patch, it's possible either
to wait for the startup of a dynamically-registered background worker,
or to poll the status of such a worker without waiting.  In either
case, the current PID of the worker process can also be obtained.
As usual, worker_spi is updated to demonstrate the new functionality.

Patch by me.  Review by Andres Freund.
This commit is contained in:
Robert Haas
2013-08-28 14:08:13 -04:00
parent c9e2e2db5c
commit 090d0f2050
12 changed files with 367 additions and 12 deletions

View File

@ -207,8 +207,6 @@ typedef struct QueueBackendStatus
QueuePosition pos; /* backend has read queue up to here */
} QueueBackendStatus;
#define InvalidPid (-1)
/*
* Shared memory state for LISTEN/NOTIFY (excluding its SLRU stuff)
*

View File

@ -18,6 +18,7 @@
#include "miscadmin.h"
#include "libpq/pqsignal.h"
#include "postmaster/bgworker_internals.h"
#include "postmaster/postmaster.h"
#include "storage/barrier.h"
#include "storage/ipc.h"
#include "storage/latch.h"
@ -66,6 +67,8 @@ slist_head BackgroundWorkerList = SLIST_STATIC_INIT(BackgroundWorkerList);
typedef struct BackgroundWorkerSlot
{
bool in_use;
pid_t pid; /* InvalidPid = not started yet; 0 = dead */
uint64 generation; /* incremented when slot is recycled */
BackgroundWorker worker;
} BackgroundWorkerSlot;
@ -75,6 +78,12 @@ typedef struct BackgroundWorkerArray
BackgroundWorkerSlot slot[FLEXIBLE_ARRAY_MEMBER];
} BackgroundWorkerArray;
struct BackgroundWorkerHandle
{
int slot;
uint64 generation;
};
BackgroundWorkerArray *BackgroundWorkerData;
/*
@ -125,7 +134,10 @@ BackgroundWorkerShmemInit(void)
rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
Assert(slotno < max_worker_processes);
slot->in_use = true;
slot->pid = InvalidPid;
slot->generation = 0;
rw->rw_shmem_slot = slotno;
rw->rw_worker.bgw_notify_pid = 0; /* might be reinit after crash */
memcpy(&slot->worker, &rw->rw_worker, sizeof(BackgroundWorker));
++slotno;
}
@ -244,7 +256,7 @@ BackgroundWorkerStateChange(void)
slot->worker.bgw_function_name, BGW_MAXLEN);
/*
* Copy remaining fields.
* Copy various fixed-size fields.
*
* flags, start_time, and restart_time are examined by the
* postmaster, but nothing too bad will happen if they are
@ -257,6 +269,23 @@ BackgroundWorkerStateChange(void)
rw->rw_worker.bgw_main = slot->worker.bgw_main;
rw->rw_worker.bgw_main_arg = slot->worker.bgw_main_arg;
/*
* Copy the PID to be notified about state changes, but only if
* the postmaster knows about a backend with that PID. It isn't
* an error if the postmaster doesn't know about the PID, because
* the backend that requested the worker could have died (or been
* killed) just after doing so. Nonetheless, at least until we get
* some experience with how this plays out in the wild, log a message
* at a relative high debug level.
*/
rw->rw_worker.bgw_notify_pid = slot->worker.bgw_notify_pid;
if (!PostmasterMarkPIDForWorkerNotify(rw->rw_worker.bgw_notify_pid))
{
elog(DEBUG1, "worker notification PID %u is not valid",
rw->rw_worker.bgw_notify_pid);
rw->rw_worker.bgw_notify_pid = 0;
}
/* Initialize postmaster bookkeeping. */
rw->rw_backend = NULL;
rw->rw_pid = 0;
@ -302,6 +331,44 @@ ForgetBackgroundWorker(slist_mutable_iter *cur)
free(rw);
}
/*
* Report the PID of a newly-launched background worker in shared memory.
*
* This function should only be called from the postmaster.
*/
void
ReportBackgroundWorkerPID(RegisteredBgWorker *rw)
{
BackgroundWorkerSlot *slot;
Assert(rw->rw_shmem_slot < max_worker_processes);
slot = &BackgroundWorkerData->slot[rw->rw_shmem_slot];
slot->pid = rw->rw_pid;
if (rw->rw_worker.bgw_notify_pid != 0)
kill(rw->rw_worker.bgw_notify_pid, SIGUSR1);
}
/*
* Cancel SIGUSR1 notifications for a PID belonging to an exiting backend.
*
* This function should only be called from the postmaster.
*/
void
BackgroundWorkerStopNotifications(pid_t pid)
{
slist_iter siter;
slist_foreach(siter, &BackgroundWorkerList)
{
RegisteredBgWorker *rw;
rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
if (rw->rw_worker.bgw_notify_pid == pid)
rw->rw_worker.bgw_notify_pid = 0;
}
}
#ifdef EXEC_BACKEND
/*
* In EXEC_BACKEND mode, workers use this to retrieve their details from
@ -602,6 +669,15 @@ RegisterBackgroundWorker(BackgroundWorker *worker)
if (!SanityCheckBackgroundWorker(worker, LOG))
return;
if (worker->bgw_notify_pid != 0)
{
ereport(LOG,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("background worker \"%s\": only dynamic background workers can request notification",
worker->bgw_name)));
return;
}
/*
* Enforce maximum number of workers. Note this is overly restrictive: we
* could allow more non-shmem-connected workers, because these don't count
@ -647,12 +723,18 @@ RegisterBackgroundWorker(BackgroundWorker *worker)
*
* Returns true on success and false on failure. Failure typically indicates
* that no background worker slots are currently available.
*
* If handle != NULL, we'll set *handle to a pointer that can subsequently
* be used as an argument to GetBackgroundWorkerPid(). The caller can
* free this pointer using pfree(), if desired.
*/
bool
RegisterDynamicBackgroundWorker(BackgroundWorker *worker)
RegisterDynamicBackgroundWorker(BackgroundWorker *worker,
BackgroundWorkerHandle **handle)
{
int slotno;
bool success = false;
uint64 generation;
/*
* We can't register dynamic background workers from the postmaster.
@ -680,6 +762,9 @@ RegisterDynamicBackgroundWorker(BackgroundWorker *worker)
if (!slot->in_use)
{
memcpy(&slot->worker, worker, sizeof(BackgroundWorker));
slot->pid = InvalidPid; /* indicates not started yet */
slot->generation++;
generation = slot->generation;
/*
* Make sure postmaster doesn't see the slot as in use before
@ -699,5 +784,122 @@ RegisterDynamicBackgroundWorker(BackgroundWorker *worker)
if (success)
SendPostmasterSignal(PMSIGNAL_BACKGROUND_WORKER_CHANGE);
/*
* If we found a slot and the user has provided a handle, initialize it.
*/
if (success && handle)
{
*handle = palloc(sizeof(BackgroundWorkerHandle));
(*handle)->slot = slotno;
(*handle)->generation = generation;
}
return success;
}
/*
* Get the PID of a dynamically-registered background worker.
*
* If the worker is determined to be running, the return value will be
* BGWH_STARTED and *pidp will get the PID of the worker process.
* Otherwise, the return value will be BGWH_NOT_YET_STARTED if the worker
* hasn't been started yet, and BGWH_STOPPED if the worker was previously
* running but is no longer.
*
* In the latter case, the worker may be stopped temporarily (if it is
* configured for automatic restart, or if it exited with code 0) or gone
* for good (if it is configured not to restart and exited with code 1).
*/
BgwHandleStatus
GetBackgroundWorkerPid(BackgroundWorkerHandle *handle, pid_t *pidp)
{
BackgroundWorkerSlot *slot;
pid_t pid;
Assert(handle->slot < max_worker_processes);
slot = &BackgroundWorkerData->slot[handle->slot];
/*
* We could probably arrange to synchronize access to data using
* memory barriers only, but for now, let's just keep it simple and
* grab the lock. It seems unlikely that there will be enough traffic
* here to result in meaningful contention.
*/
LWLockAcquire(BackgroundWorkerLock, LW_SHARED);
/*
* The generation number can't be concurrently changed while we hold the
* lock. The pid, which is updated by the postmaster, can change at any
* time, but we assume such changes are atomic. So the value we read
* won't be garbage, but it might be out of date by the time the caller
* examines it (but that's unavoidable anyway).
*/
if (handle->generation != slot->generation)
pid = 0;
else
pid = slot->pid;
/* All done. */
LWLockRelease(BackgroundWorkerLock);
if (pid == 0)
return BGWH_STOPPED;
else if (pid == InvalidPid)
return BGWH_NOT_YET_STARTED;
*pidp = pid;
return BGWH_STARTED;
}
/*
* Wait for a background worker to start up.
*
* This is like GetBackgroundWorkerPid(), except that if the worker has not
* yet started, we wait for it to do so; thus, BGWH_NOT_YET_STARTED is never
* returned. However, if the postmaster has died, we give up and return
* BGWH_POSTMASTER_DIED, since it that case we know that startup will not
* take place.
*/
BgwHandleStatus
WaitForBackgroundWorkerStartup(BackgroundWorkerHandle *handle, pid_t *pidp)
{
BgwHandleStatus status;
pid_t pid;
int rc;
bool save_set_latch_on_sigusr1;
save_set_latch_on_sigusr1 = set_latch_on_sigusr1;
set_latch_on_sigusr1 = true;
PG_TRY();
{
for (;;)
{
CHECK_FOR_INTERRUPTS();
status = GetBackgroundWorkerPid(handle, &pid);
if (status != BGWH_NOT_YET_STARTED)
break;
rc = WaitLatch(&MyProc->procLatch,
WL_LATCH_SET | WL_POSTMASTER_DEATH, 0);
if (rc & WL_POSTMASTER_DEATH)
{
status = BGWH_POSTMASTER_DIED;
break;
}
ResetLatch(&MyProc->procLatch);
}
}
PG_CATCH();
{
set_latch_on_sigusr1 = save_set_latch_on_sigusr1;
PG_RE_THROW();
}
PG_END_TRY();
set_latch_on_sigusr1 = save_set_latch_on_sigusr1;
*pidp = pid;
return status;
}

View File

@ -170,6 +170,7 @@ typedef struct bkend
*/
int bkend_type;
bool dead_end; /* is it going to send an error and quit? */
bool bgworker_notify; /* gets bgworker start/stop notifications */
dlist_node elem; /* list link in BackendList */
} Backend;
@ -2877,11 +2878,20 @@ CleanupBackgroundWorker(int pid,
#ifdef EXEC_BACKEND
ShmemBackendArrayRemove(rw->rw_backend);
#endif
/*
* It's possible that this background worker started some OTHER
* background worker and asked to be notified when that worker
* started or stopped. If so, cancel any notifications destined
* for the now-dead backend.
*/
if (rw->rw_backend->bgworker_notify)
BackgroundWorkerStopNotifications(rw->rw_pid);
free(rw->rw_backend);
rw->rw_backend = NULL;
}
rw->rw_pid = 0;
rw->rw_child_slot = 0;
ReportBackgroundWorkerPID(rw); /* report child death */
LogChildExit(LOG, namebuf, pid, exitstatus);
@ -2955,6 +2965,18 @@ CleanupBackend(int pid,
ShmemBackendArrayRemove(bp);
#endif
}
if (bp->bgworker_notify)
{
/*
* This backend may have been slated to receive SIGUSR1
* when some background worker started or stopped. Cancel
* those notifications, as we don't want to signal PIDs that
* are not PostgreSQL backends. This gets skipped in the
* (probably very common) case where the backend has never
* requested any such notifications.
*/
BackgroundWorkerStopNotifications(bp->pid);
}
dlist_delete(iter.cur);
free(bp);
break;
@ -3018,6 +3040,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
rw->rw_pid = 0;
rw->rw_child_slot = 0;
/* don't reset crashed_at */
/* don't report child stop, either */
/* Keep looping so we can signal remaining workers */
}
else
@ -3712,6 +3735,9 @@ BackendStartup(Port *port)
else
bn->child_slot = 0;
/* Hasn't asked to be notified about any bgworkers yet */
bn->bgworker_notify = false;
#ifdef EXEC_BACKEND
pid = backend_forkexec(port);
#else /* !EXEC_BACKEND */
@ -5122,6 +5148,7 @@ StartAutovacuumWorker(void)
/* Autovac workers are not dead_end and need a child slot */
bn->dead_end = false;
bn->child_slot = MyPMChildSlot = AssignPostmasterChildSlot();
bn->bgworker_notify = false;
bn->pid = StartAutoVacWorker();
if (bn->pid > 0)
@ -5318,6 +5345,7 @@ do_start_bgworker(RegisteredBgWorker *rw)
rw->rw_pid = worker_pid;
if (rw->rw_backend)
rw->rw_backend->pid = rw->rw_pid;
ReportBackgroundWorkerPID(rw);
}
}
@ -5400,6 +5428,7 @@ assign_backendlist_entry(RegisteredBgWorker *rw)
bn->child_slot = MyPMChildSlot = AssignPostmasterChildSlot();
bn->bkend_type = BACKEND_TYPE_BGWORKER;
bn->dead_end = false;
bn->bgworker_notify = false;
rw->rw_backend = bn;
rw->rw_child_slot = bn->child_slot;
@ -5510,6 +5539,29 @@ maybe_start_bgworker(void)
StartWorkerNeeded = false;
}
/*
* When a backend asks to be notified about worker state changes, we
* set a flag in its backend entry. The background worker machinery needs
* to know when such backends exit.
*/
bool
PostmasterMarkPIDForWorkerNotify(int pid)
{
dlist_iter iter;
Backend *bp;
dlist_foreach(iter, &BackendList)
{
bp = dlist_container(Backend, elem, iter.cur);
if (bp->pid == pid)
{
bp->bgworker_notify = true;
return true;
}
}
return false;
}
#ifdef EXEC_BACKEND
/*

View File

@ -21,6 +21,7 @@
#include "miscadmin.h"
#include "storage/latch.h"
#include "storage/ipc.h"
#include "storage/proc.h"
#include "storage/shmem.h"
#include "storage/sinval.h"
#include "tcop/tcopprot.h"
@ -57,6 +58,14 @@ typedef struct
*/
#define NumProcSignalSlots (MaxBackends + NUM_AUXPROCTYPES)
/*
* If this flag is set, the process latch will be set whenever SIGUSR1
* is received. This is useful when waiting for a signal from the postmaster.
* Spurious wakeups must be expected. Make sure that the flag is cleared
* in the error path.
*/
bool set_latch_on_sigusr1;
static ProcSignalSlot *ProcSignalSlots = NULL;
static volatile ProcSignalSlot *MyProcSignalSlot = NULL;
@ -276,6 +285,9 @@ procsignal_sigusr1_handler(SIGNAL_ARGS)
if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN))
RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN);
if (set_latch_on_sigusr1)
SetLatch(&MyProc->procLatch);
latch_sigusr1_handler();
errno = save_errno;

View File

@ -28,6 +28,8 @@
#define PG_BACKEND_VERSIONSTR "postgres (PostgreSQL) " PG_VERSION "\n"
#define InvalidPid (-1)
/*****************************************************************************
* System interrupt and critical section handling

View File

@ -80,13 +80,32 @@ typedef struct BackgroundWorker
char bgw_library_name[BGW_MAXLEN]; /* only if bgw_main is NULL */
char bgw_function_name[BGW_MAXLEN]; /* only if bgw_main is NULL */
Datum bgw_main_arg;
pid_t bgw_notify_pid; /* SIGUSR1 this backend on start/stop */
} BackgroundWorker;
typedef enum BgwHandleStatus
{
BGWH_STARTED, /* worker is running */
BGWH_NOT_YET_STARTED, /* worker hasn't been started yet */
BGWH_STOPPED, /* worker has exited */
BGWH_POSTMASTER_DIED /* postmaster died; worker status unclear */
} BgwHandleStatus;
struct BackgroundWorkerHandle;
typedef struct BackgroundWorkerHandle BackgroundWorkerHandle;
/* Register a new bgworker during shared_preload_libraries */
extern void RegisterBackgroundWorker(BackgroundWorker *worker);
/* Register a new bgworker from a regular backend */
extern bool RegisterDynamicBackgroundWorker(BackgroundWorker *worker);
extern bool RegisterDynamicBackgroundWorker(BackgroundWorker *worker,
BackgroundWorkerHandle **handle);
/* Query the status of a bgworker */
extern BgwHandleStatus GetBackgroundWorkerPid(BackgroundWorkerHandle *handle,
pid_t *pidp);
extern BgwHandleStatus WaitForBackgroundWorkerStartup(BackgroundWorkerHandle *
handle, pid_t *pid);
/* This is valid in a running worker */
extern BackgroundWorker *MyBgworkerEntry;

View File

@ -40,6 +40,8 @@ extern Size BackgroundWorkerShmemSize(void);
extern void BackgroundWorkerShmemInit(void);
extern void BackgroundWorkerStateChange(void);
extern void ForgetBackgroundWorker(slist_mutable_iter *cur);
extern void ReportBackgroundWorkerPID(RegisteredBgWorker *);
extern void BackgroundWorkerStopNotifications(pid_t pid);
/* Function to start a background worker, called from postmaster.c */
extern void StartBackgroundWorker(void);

View File

@ -52,6 +52,7 @@ extern void ClosePostmasterPorts(bool am_syslogger);
extern int MaxLivePostmasterChildren(void);
extern int GetNumShmemAttachedBgworkers(void);
extern bool PostmasterMarkPIDForWorkerNotify(int);
#ifdef EXEC_BACKEND
extern pid_t postmaster_forkexec(int argc, char *argv[]);

View File

@ -54,5 +54,6 @@ extern int SendProcSignal(pid_t pid, ProcSignalReason reason,
BackendId backendId);
extern void procsignal_sigusr1_handler(SIGNAL_ARGS);
extern bool set_latch_on_sigusr1;
#endif /* PROCSIGNAL_H */