mirror of
https://github.com/postgres/postgres.git
synced 2025-07-30 11:03:19 +03:00
Add io_direct setting (developer-only).
Provide a way to ask the kernel to use O_DIRECT (or local equivalent) where available for data and WAL files, to avoid or minimize kernel caching. This hurts performance currently and is not intended for end users yet. Later proposed work would introduce our own I/O clustering, read-ahead, etc to replace the facilities the kernel disables with this option. The only user-visible change, if the developer-only GUC is not used, is that this commit also removes the obscure logic that would activate O_DIRECT for the WAL when wal_sync_method=open_[data]sync and wal_level=minimal (which also requires max_wal_senders=0). Those are non-default and unlikely settings, and this behavior wasn't (correctly) documented. The same effect can be achieved with io_direct=wal. Author: Thomas Munro <thomas.munro@gmail.com> Author: Andres Freund <andres@anarazel.de> Author: Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com> Reviewed-by: Justin Pryzby <pryzby@telsasoft.com> Reviewed-by: Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com> Discussion: https://postgr.es/m/CA%2BhUKGK1X532hYqJ_MzFWt0n1zt8trz980D79WbjwnT-yYLZpg%40mail.gmail.com
This commit is contained in:
@ -98,7 +98,9 @@
|
||||
#include "storage/fd.h"
|
||||
#include "storage/ipc.h"
|
||||
#include "utils/guc.h"
|
||||
#include "utils/guc_hooks.h"
|
||||
#include "utils/resowner_private.h"
|
||||
#include "utils/varlena.h"
|
||||
|
||||
/* Define PG_FLUSH_DATA_WORKS if we have an implementation for pg_flush_data */
|
||||
#if defined(HAVE_SYNC_FILE_RANGE)
|
||||
@ -162,6 +164,9 @@ bool data_sync_retry = false;
|
||||
/* How SyncDataDirectory() should do its job. */
|
||||
int recovery_init_sync_method = RECOVERY_INIT_SYNC_METHOD_FSYNC;
|
||||
|
||||
/* Which kinds of files should be opened with PG_O_DIRECT. */
|
||||
int io_direct_flags;
|
||||
|
||||
/* Debugging.... */
|
||||
|
||||
#ifdef FDDEBUG
|
||||
@ -2022,6 +2027,9 @@ FileWriteback(File file, off_t offset, off_t nbytes, uint32 wait_event_info)
|
||||
if (nbytes <= 0)
|
||||
return;
|
||||
|
||||
if (VfdCache[file].fileFlags & PG_O_DIRECT)
|
||||
return;
|
||||
|
||||
returnCode = FileAccess(file);
|
||||
if (returnCode < 0)
|
||||
return;
|
||||
@ -3826,3 +3834,93 @@ data_sync_elevel(int elevel)
|
||||
{
|
||||
return data_sync_retry ? elevel : PANIC;
|
||||
}
|
||||
|
||||
bool
|
||||
check_io_direct(char **newval, void **extra, GucSource source)
|
||||
{
|
||||
bool result = true;
|
||||
int flags;
|
||||
|
||||
#if PG_O_DIRECT == 0
|
||||
if (strcmp(*newval, "") != 0)
|
||||
{
|
||||
GUC_check_errdetail("io_direct is not supported on this platform.");
|
||||
result = false;
|
||||
}
|
||||
flags = 0;
|
||||
#else
|
||||
List *elemlist;
|
||||
ListCell *l;
|
||||
char *rawstring;
|
||||
|
||||
/* Need a modifiable copy of string */
|
||||
rawstring = pstrdup(*newval);
|
||||
|
||||
if (!SplitGUCList(rawstring, ',', &elemlist))
|
||||
{
|
||||
GUC_check_errdetail("invalid list syntax in parameter \"%s\"",
|
||||
"io_direct");
|
||||
pfree(rawstring);
|
||||
list_free(elemlist);
|
||||
return false;
|
||||
}
|
||||
|
||||
flags = 0;
|
||||
foreach(l, elemlist)
|
||||
{
|
||||
char *item = (char *) lfirst(l);
|
||||
|
||||
if (pg_strcasecmp(item, "data") == 0)
|
||||
flags |= IO_DIRECT_DATA;
|
||||
else if (pg_strcasecmp(item, "wal") == 0)
|
||||
flags |= IO_DIRECT_WAL;
|
||||
else if (pg_strcasecmp(item, "wal_init") == 0)
|
||||
flags |= IO_DIRECT_WAL_INIT;
|
||||
else
|
||||
{
|
||||
GUC_check_errdetail("invalid option \"%s\"", item);
|
||||
result = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* It's possible to configure block sizes smaller than our assumed I/O
|
||||
* alignment size, which could result in invalid I/O requests.
|
||||
*/
|
||||
#if XLOG_BLCKSZ < PG_IO_ALIGN_SIZE
|
||||
if (result && (flags & (IO_DIRECT_WAL | IO_DIRECT_WAL_INIT)))
|
||||
{
|
||||
GUC_check_errdetail("io_direct is not supported for WAL because XLOG_BLCKSZ is too small");
|
||||
result = false;
|
||||
}
|
||||
#endif
|
||||
#if BLCKSZ < PG_IO_ALIGN_SIZE
|
||||
if (result && (flags & IO_DIRECT_DATA))
|
||||
{
|
||||
GUC_check_errdetail("io_direct is not supported for data because BLCKSZ is too small");
|
||||
result = false;
|
||||
}
|
||||
#endif
|
||||
|
||||
pfree(rawstring);
|
||||
list_free(elemlist);
|
||||
#endif
|
||||
|
||||
if (!result)
|
||||
return result;
|
||||
|
||||
/* Save the flags in *extra, for use by assign_io_direct */
|
||||
*extra = guc_malloc(ERROR, sizeof(int));
|
||||
*((int *) *extra) = flags;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
extern void
|
||||
assign_io_direct(const char *newval, void *extra)
|
||||
{
|
||||
int *flags = (int *) extra;
|
||||
|
||||
io_direct_flags = *flags;
|
||||
}
|
||||
|
Reference in New Issue
Block a user