mirror of
https://github.com/postgres/postgres.git
synced 2025-05-02 11:44:50 +03:00
The existing io_combine_limit can be changed by users. The new io_max_combine_limit is fixed at server startup time, and functions as a silent clamp on the user setting. That in itself is probably quite useful, but the primary motivation is: aio_init.c allocates shared memory for all asynchronous IOs including some per-block data, and we didn't want to waste memory you'd never used by assuming they could be up to PG_IOV_MAX. This commit already halves the size of 'AioHandleIov' and 'AioHandleData'. A follow-up commit can now expand PG_IOV_MAX without affecting that. Since our GUC system doesn't support dependencies or cross-checks between GUCs, the user-settable one now assigns a "raw" value to io_combine_limit_guc, and the lower of io_combine_limit_guc and io_max_combine_limit is maintained in io_combine_limit. Reviewed-by: Andres Freund <andres@anarazel.de> (earlier version) Discussion: https://postgr.es/m/CA%2BhUKG%2B2T9p-%2BzM6Eeou-RAJjTML6eit1qn26f9twznX59qtCA%40mail.gmail.com
240 lines
5.9 KiB
C
240 lines
5.9 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* aio_init.c
|
|
* AIO - Subsystem Initialization
|
|
*
|
|
* Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
* IDENTIFICATION
|
|
* src/backend/storage/aio/aio_init.c
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
|
|
#include "postgres.h"
|
|
|
|
#include "miscadmin.h"
|
|
#include "storage/aio.h"
|
|
#include "storage/aio_internal.h"
|
|
#include "storage/aio_subsys.h"
|
|
#include "storage/bufmgr.h"
|
|
#include "storage/io_worker.h"
|
|
#include "storage/ipc.h"
|
|
#include "storage/proc.h"
|
|
#include "storage/shmem.h"
|
|
#include "utils/guc.h"
|
|
|
|
|
|
|
|
static Size
|
|
AioCtlShmemSize(void)
|
|
{
|
|
Size sz;
|
|
|
|
/* pgaio_ctl itself */
|
|
sz = offsetof(PgAioCtl, io_handles);
|
|
|
|
return sz;
|
|
}
|
|
|
|
static uint32
|
|
AioProcs(void)
|
|
{
|
|
/*
|
|
* While AIO workers don't need their own AIO context, we can't currently
|
|
* guarantee nothing gets assigned to the a ProcNumber for an IO worker if
|
|
* we just subtracted MAX_IO_WORKERS.
|
|
*/
|
|
return MaxBackends + NUM_AUXILIARY_PROCS;
|
|
}
|
|
|
|
static Size
|
|
AioBackendShmemSize(void)
|
|
{
|
|
return mul_size(AioProcs(), sizeof(PgAioBackend));
|
|
}
|
|
|
|
static Size
|
|
AioHandleShmemSize(void)
|
|
{
|
|
Size sz;
|
|
|
|
/* verify AioChooseMaxConcurrency() did its thing */
|
|
Assert(io_max_concurrency > 0);
|
|
|
|
/* io handles */
|
|
sz = mul_size(AioProcs(),
|
|
mul_size(io_max_concurrency, sizeof(PgAioHandle)));
|
|
|
|
return sz;
|
|
}
|
|
|
|
static Size
|
|
AioHandleIOVShmemSize(void)
|
|
{
|
|
/* each IO handle can have up to io_max_combine_limit iovec objects */
|
|
return mul_size(sizeof(struct iovec),
|
|
mul_size(mul_size(io_max_combine_limit, AioProcs()),
|
|
io_max_concurrency));
|
|
}
|
|
|
|
static Size
|
|
AioHandleDataShmemSize(void)
|
|
{
|
|
/* each buffer referenced by an iovec can have associated data */
|
|
return mul_size(sizeof(uint64),
|
|
mul_size(mul_size(io_max_combine_limit, AioProcs()),
|
|
io_max_concurrency));
|
|
}
|
|
|
|
/*
|
|
* Choose a suitable value for io_max_concurrency.
|
|
*
|
|
* It's unlikely that we could have more IOs in flight than buffers that we
|
|
* would be allowed to pin.
|
|
*
|
|
* On the upper end, apply a cap too - just because shared_buffers is large,
|
|
* it doesn't make sense have millions of buffers undergo IO concurrently.
|
|
*/
|
|
static int
|
|
AioChooseMaxConcurrency(void)
|
|
{
|
|
uint32 max_backends;
|
|
int max_proportional_pins;
|
|
|
|
/* Similar logic to LimitAdditionalPins() */
|
|
max_backends = MaxBackends + NUM_AUXILIARY_PROCS;
|
|
max_proportional_pins = NBuffers / max_backends;
|
|
|
|
max_proportional_pins = Max(max_proportional_pins, 1);
|
|
|
|
/* apply upper limit */
|
|
return Min(max_proportional_pins, 64);
|
|
}
|
|
|
|
Size
|
|
AioShmemSize(void)
|
|
{
|
|
Size sz = 0;
|
|
|
|
/*
|
|
* We prefer to report this value's source as PGC_S_DYNAMIC_DEFAULT.
|
|
* However, if the DBA explicitly set io_max_concurrency = -1 in the
|
|
* config file, then PGC_S_DYNAMIC_DEFAULT will fail to override that and
|
|
* we must force the matter with PGC_S_OVERRIDE.
|
|
*/
|
|
if (io_max_concurrency == -1)
|
|
{
|
|
char buf[32];
|
|
|
|
snprintf(buf, sizeof(buf), "%d", AioChooseMaxConcurrency());
|
|
SetConfigOption("io_max_concurrency", buf, PGC_POSTMASTER,
|
|
PGC_S_DYNAMIC_DEFAULT);
|
|
if (io_max_concurrency == -1) /* failed to apply it? */
|
|
SetConfigOption("io_max_concurrency", buf, PGC_POSTMASTER,
|
|
PGC_S_OVERRIDE);
|
|
}
|
|
|
|
sz = add_size(sz, AioCtlShmemSize());
|
|
sz = add_size(sz, AioBackendShmemSize());
|
|
sz = add_size(sz, AioHandleShmemSize());
|
|
sz = add_size(sz, AioHandleIOVShmemSize());
|
|
sz = add_size(sz, AioHandleDataShmemSize());
|
|
|
|
/* Reserve space for method specific resources. */
|
|
if (pgaio_method_ops->shmem_size)
|
|
sz = add_size(sz, pgaio_method_ops->shmem_size());
|
|
|
|
return sz;
|
|
}
|
|
|
|
void
|
|
AioShmemInit(void)
|
|
{
|
|
bool found;
|
|
uint32 io_handle_off = 0;
|
|
uint32 iovec_off = 0;
|
|
uint32 per_backend_iovecs = io_max_concurrency * io_max_combine_limit;
|
|
|
|
pgaio_ctl = (PgAioCtl *)
|
|
ShmemInitStruct("AioCtl", AioCtlShmemSize(), &found);
|
|
|
|
if (found)
|
|
goto out;
|
|
|
|
memset(pgaio_ctl, 0, AioCtlShmemSize());
|
|
|
|
pgaio_ctl->io_handle_count = AioProcs() * io_max_concurrency;
|
|
pgaio_ctl->iovec_count = AioProcs() * per_backend_iovecs;
|
|
|
|
pgaio_ctl->backend_state = (PgAioBackend *)
|
|
ShmemInitStruct("AioBackend", AioBackendShmemSize(), &found);
|
|
|
|
pgaio_ctl->io_handles = (PgAioHandle *)
|
|
ShmemInitStruct("AioHandle", AioHandleShmemSize(), &found);
|
|
|
|
pgaio_ctl->iovecs = (struct iovec *)
|
|
ShmemInitStruct("AioHandleIOV", AioHandleIOVShmemSize(), &found);
|
|
pgaio_ctl->handle_data = (uint64 *)
|
|
ShmemInitStruct("AioHandleData", AioHandleDataShmemSize(), &found);
|
|
|
|
for (int procno = 0; procno < AioProcs(); procno++)
|
|
{
|
|
PgAioBackend *bs = &pgaio_ctl->backend_state[procno];
|
|
|
|
bs->io_handle_off = io_handle_off;
|
|
io_handle_off += io_max_concurrency;
|
|
|
|
dclist_init(&bs->idle_ios);
|
|
memset(bs->staged_ios, 0, sizeof(PgAioHandle *) * PGAIO_SUBMIT_BATCH_SIZE);
|
|
dclist_init(&bs->in_flight_ios);
|
|
|
|
/* initialize per-backend IOs */
|
|
for (int i = 0; i < io_max_concurrency; i++)
|
|
{
|
|
PgAioHandle *ioh = &pgaio_ctl->io_handles[bs->io_handle_off + i];
|
|
|
|
ioh->generation = 1;
|
|
ioh->owner_procno = procno;
|
|
ioh->iovec_off = iovec_off;
|
|
ioh->handle_data_len = 0;
|
|
ioh->report_return = NULL;
|
|
ioh->resowner = NULL;
|
|
ioh->num_callbacks = 0;
|
|
ioh->distilled_result.status = ARS_UNKNOWN;
|
|
ioh->flags = 0;
|
|
|
|
ConditionVariableInit(&ioh->cv);
|
|
|
|
dclist_push_tail(&bs->idle_ios, &ioh->node);
|
|
iovec_off += io_max_combine_limit;
|
|
}
|
|
}
|
|
|
|
out:
|
|
/* Initialize IO method specific resources. */
|
|
if (pgaio_method_ops->shmem_init)
|
|
pgaio_method_ops->shmem_init(!found);
|
|
}
|
|
|
|
void
|
|
pgaio_init_backend(void)
|
|
{
|
|
/* shouldn't be initialized twice */
|
|
Assert(!pgaio_my_backend);
|
|
|
|
if (MyBackendType == B_IO_WORKER)
|
|
return;
|
|
|
|
if (MyProc == NULL || MyProcNumber >= AioProcs())
|
|
elog(ERROR, "aio requires a normal PGPROC");
|
|
|
|
pgaio_my_backend = &pgaio_ctl->backend_state[MyProcNumber];
|
|
|
|
if (pgaio_method_ops->init_backend)
|
|
pgaio_method_ops->init_backend();
|
|
|
|
before_shmem_exit(pgaio_shutdown, 0);
|
|
}
|