1
0
mirror of https://github.com/postgres/postgres.git synced 2025-10-18 04:29:09 +03:00
Files
postgres/src/backend/postmaster/pmchild.c
Tom Lane db01c90b2f Silence Valgrind leakage complaints in more-or-less-hackish ways.
These changes don't actually fix any leaks.  They just make sure that
Valgrind will find pointers to data structures that remain allocated
at process exit, and thus not falsely complain about leaks.  In
particular, we are trying to avoid situations where there is no
pointer to the beginning of an allocated block (except possibly
within the block itself, which Valgrind won't count).

* Because dynahash.c never frees hashtable storage except by deleting
the whole hashtable context, it doesn't bother to track the individual
blocks of elements allocated by element_alloc().  This results in
"possibly lost" complaints from Valgrind except when the first element
of each block is actively in use.  (Otherwise it'll be on a freelist,
but very likely only reachable via "interior pointers" within element
blocks, which doesn't satisfy Valgrind.)

To fix, if we're building with USE_VALGRIND, expend an extra pointer's
worth of space in each element block so that we can chain them all
together from the HTAB header.  Skip this in shared hashtables though:
Valgrind doesn't track those, and we'd need additional locking to make
it safe to manipulate a shared chain.

While here, update a comment obsoleted by 9c911ec06.

* Put the dlist_node fields of catctup and catclist structs first.
This ensures that the dlist pointers point to the starts of these
palloc blocks, and thus that Valgrind won't consider them
"possibly lost".

* The postmaster's PMChild structs and the autovac launcher's
avl_dbase structs also have the dlist_node-is-not-first problem,
but putting it first still wouldn't silence the warning because we
bulk-allocate those structs in an array, so that Valgrind sees a
single allocation.  Commonly the first array element will be pointed
to only from some later element, so that the reference would be an
interior pointer even if it pointed to the array start.  (This is the
same issue as for dynahash elements.)  Since these are pretty simple
data structures, I don't feel too bad about faking out Valgrind by
just keeping a static pointer to the array start.

(This is all quite hacky, and it's not hard to imagine usages where
we'd need some other idea in order to have reasonable leak tracking of
structures that are only accessible via dlist_node lists.  But these
changes seem to be enough to silence this class of leakage complaints
for the moment.)

* Free a couple of data structures manually near the end of an
autovacuum worker's run when USE_VALGRIND, and ensure that the final
vac_update_datfrozenxid() call is done in a non-permanent context.
This doesn't have any real effect on the process's total memory
consumption, since we're going to exit as soon as that last
transaction is done.  But it does pacify Valgrind.

* Valgrind complains about the postmaster's socket-files and
lock-files lists being leaked, which we can silence by just
not nulling out the static pointers to them.

* Valgrind seems not to consider the global "environ" variable as
a valid root pointer; so when we allocate a new environment array,
it claims that data is leaked.  To fix that, keep our own
statically-allocated copy of the pointer, similarly to the previous
item.

Author: Tom Lane <tgl@sss.pgh.pa.us>
Reviewed-by: Andres Freund <andres@anarazel.de>
Discussion: https://postgr.es/m/285483.1746756246@sss.pgh.pa.us
2025-08-02 21:59:46 -04:00

303 lines
8.7 KiB
C

/*-------------------------------------------------------------------------
*
* pmchild.c
* Functions for keeping track of postmaster child processes.
*
* Postmaster keeps track of all child processes so that when a process exits,
* it knows what kind of a process it was and can clean up accordingly. Every
* child process is allocated a PMChild struct from a fixed pool of structs.
* The size of the pool is determined by various settings that configure how
* many worker processes and backend connections are allowed, i.e.
* autovacuum_worker_slots, max_worker_processes, max_wal_senders, and
* max_connections.
*
* Dead-end backends are handled slightly differently. There is no limit
* on the number of dead-end backends, and they do not need unique IDs, so
* their PMChild structs are allocated dynamically, not from a pool.
*
* The structures and functions in this file are private to the postmaster
* process. But note that there is an array in shared memory, managed by
* pmsignal.c, that mirrors this.
*
*
* Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* src/backend/postmaster/pmchild.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "miscadmin.h"
#include "postmaster/autovacuum.h"
#include "postmaster/postmaster.h"
#include "replication/walsender.h"
#include "storage/pmsignal.h"
#include "storage/proc.h"
/*
* Freelists for different kinds of child processes. We maintain separate
* pools for each, so that for example launching a lot of regular backends
* cannot prevent autovacuum or an aux process from launching.
*/
typedef struct PMChildPool
{
int size; /* number of PMChild slots reserved for this
* kind of processes */
int first_slotno; /* first slot belonging to this pool */
dlist_head freelist; /* currently unused PMChild entries */
} PMChildPool;
static PMChildPool pmchild_pools[BACKEND_NUM_TYPES];
NON_EXEC_STATIC int num_pmchild_slots = 0;
/*
* List of active child processes. This includes dead-end children.
*/
dlist_head ActiveChildList;
/*
* Dummy pointer to persuade Valgrind that we've not leaked the array of
* PMChild structs. Make it global to ensure the compiler doesn't
* optimize it away.
*/
#ifdef USE_VALGRIND
extern PMChild *pmchild_array;
PMChild *pmchild_array;
#endif
/*
* MaxLivePostmasterChildren
*
* This reports the number of postmaster child processes that can be active.
* It includes all children except for dead-end children. This allows the
* array in shared memory (PMChildFlags) to have a fixed maximum size.
*/
int
MaxLivePostmasterChildren(void)
{
if (num_pmchild_slots == 0)
elog(ERROR, "PM child array not initialized yet");
return num_pmchild_slots;
}
/*
* Initialize at postmaster startup
*
* Note: This is not called on crash restart. We rely on PMChild entries to
* remain valid through the restart process. This is important because the
* syslogger survives through the crash restart process, so we must not
* invalidate its PMChild slot.
*/
void
InitPostmasterChildSlots(void)
{
int slotno;
PMChild *slots;
/*
* We allow more connections here than we can have backends because some
* might still be authenticating; they might fail auth, or some existing
* backend might exit before the auth cycle is completed. The exact
* MaxConnections limit is enforced when a new backend tries to join the
* PGPROC array.
*
* WAL senders start out as regular backends, so they share the same pool.
*/
pmchild_pools[B_BACKEND].size = 2 * (MaxConnections + max_wal_senders);
pmchild_pools[B_AUTOVAC_WORKER].size = autovacuum_worker_slots;
pmchild_pools[B_BG_WORKER].size = max_worker_processes;
pmchild_pools[B_IO_WORKER].size = MAX_IO_WORKERS;
/*
* There can be only one of each of these running at a time. They each
* get their own pool of just one entry.
*/
pmchild_pools[B_AUTOVAC_LAUNCHER].size = 1;
pmchild_pools[B_SLOTSYNC_WORKER].size = 1;
pmchild_pools[B_ARCHIVER].size = 1;
pmchild_pools[B_BG_WRITER].size = 1;
pmchild_pools[B_CHECKPOINTER].size = 1;
pmchild_pools[B_STARTUP].size = 1;
pmchild_pools[B_WAL_RECEIVER].size = 1;
pmchild_pools[B_WAL_SUMMARIZER].size = 1;
pmchild_pools[B_WAL_WRITER].size = 1;
pmchild_pools[B_LOGGER].size = 1;
/* The rest of the pmchild_pools are left at zero size */
/* Count the total number of slots */
num_pmchild_slots = 0;
for (int i = 0; i < BACKEND_NUM_TYPES; i++)
num_pmchild_slots += pmchild_pools[i].size;
/* Allocate enough slots, and make sure Valgrind doesn't complain */
slots = palloc(num_pmchild_slots * sizeof(PMChild));
#ifdef USE_VALGRIND
pmchild_array = slots;
#endif
/* Initialize them */
slotno = 0;
for (int btype = 0; btype < BACKEND_NUM_TYPES; btype++)
{
pmchild_pools[btype].first_slotno = slotno + 1;
dlist_init(&pmchild_pools[btype].freelist);
for (int j = 0; j < pmchild_pools[btype].size; j++)
{
slots[slotno].pid = 0;
slots[slotno].child_slot = slotno + 1;
slots[slotno].bkend_type = B_INVALID;
slots[slotno].rw = NULL;
slots[slotno].bgworker_notify = false;
dlist_push_tail(&pmchild_pools[btype].freelist, &slots[slotno].elem);
slotno++;
}
}
Assert(slotno == num_pmchild_slots);
/* Initialize other structures */
dlist_init(&ActiveChildList);
}
/*
* Allocate a PMChild entry for a postmaster child process of given type.
*
* The entry is taken from the right pool for the type.
*
* pmchild->child_slot in the returned struct is unique among all active child
* processes.
*/
PMChild *
AssignPostmasterChildSlot(BackendType btype)
{
dlist_head *freelist;
PMChild *pmchild;
if (pmchild_pools[btype].size == 0)
elog(ERROR, "cannot allocate a PMChild slot for backend type %d", btype);
freelist = &pmchild_pools[btype].freelist;
if (dlist_is_empty(freelist))
return NULL;
pmchild = dlist_container(PMChild, elem, dlist_pop_head_node(freelist));
pmchild->pid = 0;
pmchild->bkend_type = btype;
pmchild->rw = NULL;
pmchild->bgworker_notify = true;
/*
* pmchild->child_slot for each entry was initialized when the array of
* slots was allocated. Sanity check it.
*/
if (!(pmchild->child_slot >= pmchild_pools[btype].first_slotno &&
pmchild->child_slot < pmchild_pools[btype].first_slotno + pmchild_pools[btype].size))
{
elog(ERROR, "pmchild freelist for backend type %d is corrupt",
pmchild->bkend_type);
}
dlist_push_head(&ActiveChildList, &pmchild->elem);
/* Update the status in the shared memory array */
MarkPostmasterChildSlotAssigned(pmchild->child_slot);
elog(DEBUG2, "assigned pm child slot %d for %s",
pmchild->child_slot, PostmasterChildName(btype));
return pmchild;
}
/*
* Allocate a PMChild struct for a dead-end backend. Dead-end children are
* not assigned a child_slot number. The struct is palloc'd; returns NULL if
* out of memory.
*/
PMChild *
AllocDeadEndChild(void)
{
PMChild *pmchild;
elog(DEBUG2, "allocating dead-end child");
pmchild = (PMChild *) palloc_extended(sizeof(PMChild), MCXT_ALLOC_NO_OOM);
if (pmchild)
{
pmchild->pid = 0;
pmchild->child_slot = 0;
pmchild->bkend_type = B_DEAD_END_BACKEND;
pmchild->rw = NULL;
pmchild->bgworker_notify = false;
dlist_push_head(&ActiveChildList, &pmchild->elem);
}
return pmchild;
}
/*
* Release a PMChild slot, after the child process has exited.
*
* Returns true if the child detached cleanly from shared memory, false
* otherwise (see MarkPostmasterChildSlotUnassigned).
*/
bool
ReleasePostmasterChildSlot(PMChild *pmchild)
{
dlist_delete(&pmchild->elem);
if (pmchild->bkend_type == B_DEAD_END_BACKEND)
{
elog(DEBUG2, "releasing dead-end backend");
pfree(pmchild);
return true;
}
else
{
PMChildPool *pool;
elog(DEBUG2, "releasing pm child slot %d", pmchild->child_slot);
/* WAL senders start out as regular backends, and share the pool */
if (pmchild->bkend_type == B_WAL_SENDER)
pool = &pmchild_pools[B_BACKEND];
else
pool = &pmchild_pools[pmchild->bkend_type];
/* sanity check that we return the entry to the right pool */
if (!(pmchild->child_slot >= pool->first_slotno &&
pmchild->child_slot < pool->first_slotno + pool->size))
{
elog(ERROR, "pmchild freelist for backend type %d is corrupt",
pmchild->bkend_type);
}
dlist_push_head(&pool->freelist, &pmchild->elem);
return MarkPostmasterChildSlotUnassigned(pmchild->child_slot);
}
}
/*
* Find the PMChild entry of a running child process by PID.
*/
PMChild *
FindPostmasterChildByPid(int pid)
{
dlist_iter iter;
dlist_foreach(iter, &ActiveChildList)
{
PMChild *bp = dlist_container(PMChild, elem, iter.cur);
if (bp->pid == pid)
return bp;
}
return NULL;
}