mirror of
https://github.com/postgres/postgres.git
synced 2025-11-21 00:42:43 +03:00
This set of changes removes the list of available buffers and instead simply uses the clock-sweep algorithm to find and return an available buffer. This also removes the have_free_buffer() function and simply caps the pg_autoprewarm process to at most NBuffers. While on the surface this appears to be removing an optimization it is in fact eliminating code that induces overhead in the form of synchronization that is problematic for multi-core systems. The main reason for removing the freelist, however, is not the moderate improvement in scalability, but that having the freelist would require dedicated complexity in several upcoming patches. As we have not been able to find a case benefiting from the freelist... Author: Greg Burd <greg@burd.me> Reviewed-by: Tomas Vondra <tomas@vondra.me> Reviewed-by: Andres Freund <andres@anarazel.de> Discussion: https://postgr.es/m/70C6A5B5-2A20-4D0B-BC73-EB09DD62D61C@getmailspring.com
180 lines
5.1 KiB
C
180 lines
5.1 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* buf_init.c
|
|
* buffer manager initialization routines
|
|
*
|
|
* Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
*
|
|
* IDENTIFICATION
|
|
* src/backend/storage/buffer/buf_init.c
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
#include "postgres.h"
|
|
|
|
#include "storage/aio.h"
|
|
#include "storage/buf_internals.h"
|
|
#include "storage/bufmgr.h"
|
|
|
|
BufferDescPadded *BufferDescriptors;
|
|
char *BufferBlocks;
|
|
ConditionVariableMinimallyPadded *BufferIOCVArray;
|
|
WritebackContext BackendWritebackContext;
|
|
CkptSortItem *CkptBufferIds;
|
|
|
|
|
|
/*
|
|
* Data Structures:
|
|
* buffers live in a freelist and a lookup data structure.
|
|
*
|
|
*
|
|
* Buffer Lookup:
|
|
* Two important notes. First, the buffer has to be
|
|
* available for lookup BEFORE an IO begins. Otherwise
|
|
* a second process trying to read the buffer will
|
|
* allocate its own copy and the buffer pool will
|
|
* become inconsistent.
|
|
*
|
|
* Buffer Replacement:
|
|
* see freelist.c. A buffer cannot be replaced while in
|
|
* use either by data manager or during IO.
|
|
*
|
|
*
|
|
* Synchronization/Locking:
|
|
*
|
|
* IO_IN_PROGRESS -- this is a flag in the buffer descriptor.
|
|
* It must be set when an IO is initiated and cleared at
|
|
* the end of the IO. It is there to make sure that one
|
|
* process doesn't start to use a buffer while another is
|
|
* faulting it in. see WaitIO and related routines.
|
|
*
|
|
* refcount -- Counts the number of processes holding pins on a buffer.
|
|
* A buffer is pinned during IO and immediately after a BufferAlloc().
|
|
* Pins must be released before end of transaction. For efficiency the
|
|
* shared refcount isn't increased if an individual backend pins a buffer
|
|
* multiple times. Check the PrivateRefCount infrastructure in bufmgr.c.
|
|
*/
|
|
|
|
|
|
/*
|
|
* Initialize shared buffer pool
|
|
*
|
|
* This is called once during shared-memory initialization (either in the
|
|
* postmaster, or in a standalone backend).
|
|
*/
|
|
void
|
|
BufferManagerShmemInit(void)
|
|
{
|
|
bool foundBufs,
|
|
foundDescs,
|
|
foundIOCV,
|
|
foundBufCkpt;
|
|
|
|
/* Align descriptors to a cacheline boundary. */
|
|
BufferDescriptors = (BufferDescPadded *)
|
|
ShmemInitStruct("Buffer Descriptors",
|
|
NBuffers * sizeof(BufferDescPadded),
|
|
&foundDescs);
|
|
|
|
/* Align buffer pool on IO page size boundary. */
|
|
BufferBlocks = (char *)
|
|
TYPEALIGN(PG_IO_ALIGN_SIZE,
|
|
ShmemInitStruct("Buffer Blocks",
|
|
NBuffers * (Size) BLCKSZ + PG_IO_ALIGN_SIZE,
|
|
&foundBufs));
|
|
|
|
/* Align condition variables to cacheline boundary. */
|
|
BufferIOCVArray = (ConditionVariableMinimallyPadded *)
|
|
ShmemInitStruct("Buffer IO Condition Variables",
|
|
NBuffers * sizeof(ConditionVariableMinimallyPadded),
|
|
&foundIOCV);
|
|
|
|
/*
|
|
* The array used to sort to-be-checkpointed buffer ids is located in
|
|
* shared memory, to avoid having to allocate significant amounts of
|
|
* memory at runtime. As that'd be in the middle of a checkpoint, or when
|
|
* the checkpointer is restarted, memory allocation failures would be
|
|
* painful.
|
|
*/
|
|
CkptBufferIds = (CkptSortItem *)
|
|
ShmemInitStruct("Checkpoint BufferIds",
|
|
NBuffers * sizeof(CkptSortItem), &foundBufCkpt);
|
|
|
|
if (foundDescs || foundBufs || foundIOCV || foundBufCkpt)
|
|
{
|
|
/* should find all of these, or none of them */
|
|
Assert(foundDescs && foundBufs && foundIOCV && foundBufCkpt);
|
|
/* note: this path is only taken in EXEC_BACKEND case */
|
|
}
|
|
else
|
|
{
|
|
int i;
|
|
|
|
/*
|
|
* Initialize all the buffer headers.
|
|
*/
|
|
for (i = 0; i < NBuffers; i++)
|
|
{
|
|
BufferDesc *buf = GetBufferDescriptor(i);
|
|
|
|
ClearBufferTag(&buf->tag);
|
|
|
|
pg_atomic_init_u32(&buf->state, 0);
|
|
buf->wait_backend_pgprocno = INVALID_PROC_NUMBER;
|
|
|
|
buf->buf_id = i;
|
|
|
|
pgaio_wref_clear(&buf->io_wref);
|
|
|
|
LWLockInitialize(BufferDescriptorGetContentLock(buf),
|
|
LWTRANCHE_BUFFER_CONTENT);
|
|
|
|
ConditionVariableInit(BufferDescriptorGetIOCV(buf));
|
|
}
|
|
}
|
|
|
|
/* Init other shared buffer-management stuff */
|
|
StrategyInitialize(!foundDescs);
|
|
|
|
/* Initialize per-backend file flush context */
|
|
WritebackContextInit(&BackendWritebackContext,
|
|
&backend_flush_after);
|
|
}
|
|
|
|
/*
|
|
* BufferManagerShmemSize
|
|
*
|
|
* compute the size of shared memory for the buffer pool including
|
|
* data pages, buffer descriptors, hash tables, etc.
|
|
*/
|
|
Size
|
|
BufferManagerShmemSize(void)
|
|
{
|
|
Size size = 0;
|
|
|
|
/* size of buffer descriptors */
|
|
size = add_size(size, mul_size(NBuffers, sizeof(BufferDescPadded)));
|
|
/* to allow aligning buffer descriptors */
|
|
size = add_size(size, PG_CACHE_LINE_SIZE);
|
|
|
|
/* size of data pages, plus alignment padding */
|
|
size = add_size(size, PG_IO_ALIGN_SIZE);
|
|
size = add_size(size, mul_size(NBuffers, BLCKSZ));
|
|
|
|
/* size of stuff controlled by freelist.c */
|
|
size = add_size(size, StrategyShmemSize());
|
|
|
|
/* size of I/O condition variables */
|
|
size = add_size(size, mul_size(NBuffers,
|
|
sizeof(ConditionVariableMinimallyPadded)));
|
|
/* to allow aligning the above */
|
|
size = add_size(size, PG_CACHE_LINE_SIZE);
|
|
|
|
/* size of checkpoint sort array in bufmgr.c */
|
|
size = add_size(size, mul_size(NBuffers, sizeof(CkptSortItem)));
|
|
|
|
return size;
|
|
}
|