1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-30 11:03:19 +03:00

Add io_direct setting (developer-only).

Provide a way to ask the kernel to use O_DIRECT (or local equivalent)
where available for data and WAL files, to avoid or minimize kernel
caching.  This hurts performance currently and is not intended for end
users yet.  Later proposed work would introduce our own I/O clustering,
read-ahead, etc to replace the facilities the kernel disables with this
option.

The only user-visible change, if the developer-only GUC is not used, is
that this commit also removes the obscure logic that would activate
O_DIRECT for the WAL when wal_sync_method=open_[data]sync and
wal_level=minimal (which also requires max_wal_senders=0).  Those are
non-default and unlikely settings, and this behavior wasn't (correctly)
documented.  The same effect can be achieved with io_direct=wal.

Author: Thomas Munro <thomas.munro@gmail.com>
Author: Andres Freund <andres@anarazel.de>
Author: Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com>
Reviewed-by: Justin Pryzby <pryzby@telsasoft.com>
Reviewed-by: Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com>
Discussion: https://postgr.es/m/CA%2BhUKGK1X532hYqJ_MzFWt0n1zt8trz980D79WbjwnT-yYLZpg%40mail.gmail.com
This commit is contained in:
Thomas Munro
2023-04-08 11:04:49 +12:00
parent faeedbcefd
commit d4e71df6d7
14 changed files with 263 additions and 35 deletions

View File

@ -98,7 +98,9 @@
#include "storage/fd.h"
#include "storage/ipc.h"
#include "utils/guc.h"
#include "utils/guc_hooks.h"
#include "utils/resowner_private.h"
#include "utils/varlena.h"
/* Define PG_FLUSH_DATA_WORKS if we have an implementation for pg_flush_data */
#if defined(HAVE_SYNC_FILE_RANGE)
@ -162,6 +164,9 @@ bool data_sync_retry = false;
/* How SyncDataDirectory() should do its job. */
int recovery_init_sync_method = RECOVERY_INIT_SYNC_METHOD_FSYNC;
/* Which kinds of files should be opened with PG_O_DIRECT. */
int io_direct_flags;
/* Debugging.... */
#ifdef FDDEBUG
@ -2022,6 +2027,9 @@ FileWriteback(File file, off_t offset, off_t nbytes, uint32 wait_event_info)
if (nbytes <= 0)
return;
if (VfdCache[file].fileFlags & PG_O_DIRECT)
return;
returnCode = FileAccess(file);
if (returnCode < 0)
return;
@ -3826,3 +3834,93 @@ data_sync_elevel(int elevel)
{
return data_sync_retry ? elevel : PANIC;
}
bool
check_io_direct(char **newval, void **extra, GucSource source)
{
bool result = true;
int flags;
#if PG_O_DIRECT == 0
if (strcmp(*newval, "") != 0)
{
GUC_check_errdetail("io_direct is not supported on this platform.");
result = false;
}
flags = 0;
#else
List *elemlist;
ListCell *l;
char *rawstring;
/* Need a modifiable copy of string */
rawstring = pstrdup(*newval);
if (!SplitGUCList(rawstring, ',', &elemlist))
{
GUC_check_errdetail("invalid list syntax in parameter \"%s\"",
"io_direct");
pfree(rawstring);
list_free(elemlist);
return false;
}
flags = 0;
foreach(l, elemlist)
{
char *item = (char *) lfirst(l);
if (pg_strcasecmp(item, "data") == 0)
flags |= IO_DIRECT_DATA;
else if (pg_strcasecmp(item, "wal") == 0)
flags |= IO_DIRECT_WAL;
else if (pg_strcasecmp(item, "wal_init") == 0)
flags |= IO_DIRECT_WAL_INIT;
else
{
GUC_check_errdetail("invalid option \"%s\"", item);
result = false;
break;
}
}
/*
* It's possible to configure block sizes smaller than our assumed I/O
* alignment size, which could result in invalid I/O requests.
*/
#if XLOG_BLCKSZ < PG_IO_ALIGN_SIZE
if (result && (flags & (IO_DIRECT_WAL | IO_DIRECT_WAL_INIT)))
{
GUC_check_errdetail("io_direct is not supported for WAL because XLOG_BLCKSZ is too small");
result = false;
}
#endif
#if BLCKSZ < PG_IO_ALIGN_SIZE
if (result && (flags & IO_DIRECT_DATA))
{
GUC_check_errdetail("io_direct is not supported for data because BLCKSZ is too small");
result = false;
}
#endif
pfree(rawstring);
list_free(elemlist);
#endif
if (!result)
return result;
/* Save the flags in *extra, for use by assign_io_direct */
*extra = guc_malloc(ERROR, sizeof(int));
*((int *) *extra) = flags;
return result;
}
extern void
assign_io_direct(const char *newval, void *extra)
{
int *flags = (int *) extra;
io_direct_flags = *flags;
}