1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-28 23:42:10 +03:00

Support syncing WAL log to disk using either fsync(), fdatasync(),

O_SYNC, or O_DSYNC (as available on a given platform).  Add GUC parameter
to control sync method.
Also, add defense to XLogWrite to prevent it from going nuts if passed
a target write position that's past the end of the buffers so far filled
by XLogInsert.
This commit is contained in:
Tom Lane
2001-03-16 05:44:33 +00:00
parent 4eb5e27a28
commit 9d645fd84c
6 changed files with 287 additions and 62 deletions

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Header: /cvsroot/pgsql/src/backend/access/transam/xlog.c,v 1.58 2001/03/14 20:23:04 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/access/transam/xlog.c,v 1.59 2001/03/16 05:44:33 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -42,6 +42,47 @@
#include "miscadmin.h"
/*
* This chunk of hackery attempts to determine which file sync methods
* are available on the current platform, and to choose an appropriate
* default method. We assume that fsync() is always available, and that
* configure determined whether fdatasync() is.
*/
#define SYNC_METHOD_FSYNC 0
#define SYNC_METHOD_FDATASYNC 1
#define SYNC_METHOD_OPEN 2 /* used for both O_SYNC and O_DSYNC */
#if defined(O_SYNC)
# define OPEN_SYNC_FLAG O_SYNC
#else
# if defined(O_FSYNC)
# define OPEN_SYNC_FLAG O_FSYNC
# endif
#endif
#if defined(OPEN_SYNC_FLAG)
# if defined(O_DSYNC) && (O_DSYNC != OPEN_SYNC_FLAG)
# define OPEN_DATASYNC_FLAG O_DSYNC
# endif
#endif
#if defined(OPEN_DATASYNC_FLAG)
# define DEFAULT_SYNC_METHOD_STR "open_datasync"
# define DEFAULT_SYNC_METHOD SYNC_METHOD_OPEN
# define DEFAULT_SYNC_FLAGBIT OPEN_DATASYNC_FLAG
#else
# if defined(HAVE_FDATASYNC)
# define DEFAULT_SYNC_METHOD_STR "fdatasync"
# define DEFAULT_SYNC_METHOD SYNC_METHOD_FDATASYNC
# define DEFAULT_SYNC_FLAGBIT 0
# else
# define DEFAULT_SYNC_METHOD_STR "fsync"
# define DEFAULT_SYNC_METHOD SYNC_METHOD_FSYNC
# define DEFAULT_SYNC_FLAGBIT 0
# endif
#endif
/* Max time to wait to acquire XLog activity locks */
#define XLOG_LOCK_TIMEOUT (5*60*1000000) /* 5 minutes */
/* Max time to wait to acquire checkpoint lock */
@ -52,10 +93,18 @@ int CheckPointSegments = 3;
int XLOGbuffers = 8;
int XLOGfiles = 0; /* how many files to pre-allocate during ckpt */
int XLOG_DEBUG = 0;
char *XLOG_sync_method = NULL;
const char XLOG_sync_method_default[] = DEFAULT_SYNC_METHOD_STR;
char XLOG_archive_dir[MAXPGPATH]; /* null string means delete 'em */
/* these are derived from XLOG_sync_method by assign_xlog_sync_method */
static int sync_method = DEFAULT_SYNC_METHOD;
static int open_sync_bit = DEFAULT_SYNC_FLAGBIT;
#define MinXLOGbuffers 4
#define XLOG_SYNC_BIT (enableFsync ? open_sync_bit : 0)
/*
* ThisStartUpID will be same in all backends --- it identifies current
@ -365,6 +414,7 @@ static void WriteControlFile(void);
static void ReadControlFile(void);
static char *str_time(time_t tnow);
static void xlog_outrec(char *buf, XLogRecord *record);
static void issue_xlog_fsync(void);
/*
@ -917,6 +967,15 @@ XLogWrite(XLogwrtRqst WriteRqst)
while (XLByteLT(LogwrtResult.Write, WriteRqst.Write))
{
/*
* Make sure we're not ahead of the insert process. This could
* happen if we're passed a bogus WriteRqst.Write that is past the
* end of the last page that's been initialized by
* AdvanceXLInsertBuffer.
*/
if (!XLByteLT(LogwrtResult.Write, XLogCtl->xlblocks[Write->curridx]))
elog(STOP, "XLogWrite: write request is past end of log");
/* Advance LogwrtResult.Write to end of current buffer page */
LogwrtResult.Write = XLogCtl->xlblocks[Write->curridx];
ispartialpage = XLByteLT(WriteRqst.Write, LogwrtResult.Write);
@ -1004,9 +1063,7 @@ XLogWrite(XLogwrtRqst WriteRqst)
*/
if (openLogOff >= XLogSegSize && !ispartialpage)
{
if (pg_fdatasync(openLogFile) != 0)
elog(STOP, "fsync(logfile %u seg %u) failed: %m",
openLogId, openLogSeg);
issue_xlog_fsync();
LogwrtResult.Flush = LogwrtResult.Write; /* end of current page */
}
@ -1030,24 +1087,24 @@ XLogWrite(XLogwrtRqst WriteRqst)
* we might have no open file or the wrong one. However, we do
* not need to fsync more than one file.
*/
if (openLogFile >= 0 &&
!XLByteInPrevSeg(LogwrtResult.Write, openLogId, openLogSeg))
if (sync_method != SYNC_METHOD_OPEN)
{
if (close(openLogFile) != 0)
elog(STOP, "close(logfile %u seg %u) failed: %m",
openLogId, openLogSeg);
openLogFile = -1;
if (openLogFile >= 0 &&
!XLByteInPrevSeg(LogwrtResult.Write, openLogId, openLogSeg))
{
if (close(openLogFile) != 0)
elog(STOP, "close(logfile %u seg %u) failed: %m",
openLogId, openLogSeg);
openLogFile = -1;
}
if (openLogFile < 0)
{
XLByteToPrevSeg(LogwrtResult.Write, openLogId, openLogSeg);
openLogFile = XLogFileOpen(openLogId, openLogSeg, false);
openLogOff = 0;
}
issue_xlog_fsync();
}
if (openLogFile < 0)
{
XLByteToPrevSeg(LogwrtResult.Write, openLogId, openLogSeg);
openLogFile = XLogFileOpen(openLogId, openLogSeg, false);
openLogOff = 0;
}
if (pg_fdatasync(openLogFile) != 0)
elog(STOP, "fsync(logfile %u seg %u) failed: %m",
openLogId, openLogSeg);
LogwrtResult.Flush = LogwrtResult.Write;
}
@ -1191,7 +1248,8 @@ XLogFileInit(uint32 log, uint32 seg, bool *usexistent)
*/
if (*usexistent)
{
fd = BasicOpenFile(path, O_RDWR | PG_BINARY, S_IRUSR | S_IWUSR);
fd = BasicOpenFile(path, O_RDWR | PG_BINARY | XLOG_SYNC_BIT,
S_IRUSR | S_IWUSR);
if (fd < 0)
{
if (errno != ENOENT)
@ -1208,6 +1266,7 @@ XLogFileInit(uint32 log, uint32 seg, bool *usexistent)
unlink(tpath);
unlink(path);
/* do not use XLOG_SYNC_BIT here --- want to fsync only at end of fill */
fd = BasicOpenFile(tpath, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
S_IRUSR | S_IWUSR);
if (fd < 0)
@ -1220,8 +1279,8 @@ XLogFileInit(uint32 log, uint32 seg, bool *usexistent)
* allow "holes" in files, just seeking to the end doesn't allocate
* intermediate space. This way, we know that we have all the space
* and (after the fsync below) that all the indirect blocks are down
* on disk. Therefore, fdatasync(2) will be sufficient to sync future
* writes to the log file.
* on disk. Therefore, fdatasync(2) or O_DSYNC will be sufficient to
* sync future writes to the log file.
*/
MemSet(zbuffer, 0, sizeof(zbuffer));
for (nbytes = 0; nbytes < XLogSegSize; nbytes += sizeof(zbuffer))
@ -1261,7 +1320,8 @@ XLogFileInit(uint32 log, uint32 seg, bool *usexistent)
log, seg);
#endif
fd = BasicOpenFile(path, O_RDWR | PG_BINARY, S_IRUSR | S_IWUSR);
fd = BasicOpenFile(path, O_RDWR | PG_BINARY | XLOG_SYNC_BIT,
S_IRUSR | S_IWUSR);
if (fd < 0)
elog(STOP, "InitReopen(logfile %u seg %u) failed: %m",
log, seg);
@ -1280,7 +1340,8 @@ XLogFileOpen(uint32 log, uint32 seg, bool econt)
XLogFileName(path, log, seg);
fd = BasicOpenFile(path, O_RDWR | PG_BINARY, S_IRUSR | S_IWUSR);
fd = BasicOpenFile(path, O_RDWR | PG_BINARY | XLOG_SYNC_BIT,
S_IRUSR | S_IWUSR);
if (fd < 0)
{
if (econt && errno == ENOENT)
@ -1845,7 +1906,8 @@ WriteControlFile(void)
memset(buffer, 0, BLCKSZ);
memcpy(buffer, ControlFile, sizeof(ControlFileData));
fd = BasicOpenFile(ControlFilePath, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, S_IRUSR | S_IWUSR);
fd = BasicOpenFile(ControlFilePath, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
S_IRUSR | S_IWUSR);
if (fd < 0)
elog(STOP, "WriteControlFile failed to create control file (%s): %m",
ControlFilePath);
@ -2852,3 +2914,120 @@ xlog_outrec(char *buf, XLogRecord *record)
sprintf(buf + strlen(buf), ": %s",
RmgrTable[record->xl_rmid].rm_name);
}
/*
* GUC support routines
*/
bool
check_xlog_sync_method(const char *method)
{
if (strcasecmp(method, "fsync") == 0) return true;
#ifdef HAVE_FDATASYNC
if (strcasecmp(method, "fdatasync") == 0) return true;
#endif
#ifdef OPEN_SYNC_FLAG
if (strcasecmp(method, "open_sync") == 0) return true;
#endif
#ifdef OPEN_DATASYNC_FLAG
if (strcasecmp(method, "open_datasync") == 0) return true;
#endif
return false;
}
void
assign_xlog_sync_method(const char *method)
{
int new_sync_method;
int new_sync_bit;
if (strcasecmp(method, "fsync") == 0)
{
new_sync_method = SYNC_METHOD_FSYNC;
new_sync_bit = 0;
}
#ifdef HAVE_FDATASYNC
else if (strcasecmp(method, "fdatasync") == 0)
{
new_sync_method = SYNC_METHOD_FDATASYNC;
new_sync_bit = 0;
}
#endif
#ifdef OPEN_SYNC_FLAG
else if (strcasecmp(method, "open_sync") == 0)
{
new_sync_method = SYNC_METHOD_OPEN;
new_sync_bit = OPEN_SYNC_FLAG;
}
#endif
#ifdef OPEN_DATASYNC_FLAG
else if (strcasecmp(method, "open_datasync") == 0)
{
new_sync_method = SYNC_METHOD_OPEN;
new_sync_bit = OPEN_DATASYNC_FLAG;
}
#endif
else
{
/* Can't get here unless guc.c screwed up */
elog(ERROR, "Bogus xlog sync method %s", method);
new_sync_method = 0; /* keep compiler quiet */
new_sync_bit = 0;
}
if (sync_method != new_sync_method || open_sync_bit != new_sync_bit)
{
/*
* To ensure that no blocks escape unsynced, force an fsync on
* the currently open log segment (if any). Also, if the open
* flag is changing, close the log file so it will be reopened
* (with new flag bit) at next use.
*/
if (openLogFile >= 0)
{
if (pg_fsync(openLogFile) != 0)
elog(STOP, "fsync(logfile %u seg %u) failed: %m",
openLogId, openLogSeg);
if (open_sync_bit != new_sync_bit)
{
if (close(openLogFile) != 0)
elog(STOP, "close(logfile %u seg %u) failed: %m",
openLogId, openLogSeg);
openLogFile = -1;
}
}
sync_method = new_sync_method;
open_sync_bit = new_sync_bit;
}
}
/*
* Issue appropriate kind of fsync (if any) on the current XLOG output file
*/
static void
issue_xlog_fsync(void)
{
switch (sync_method)
{
case SYNC_METHOD_FSYNC:
if (pg_fsync(openLogFile) != 0)
elog(STOP, "fsync(logfile %u seg %u) failed: %m",
openLogId, openLogSeg);
break;
#ifdef HAVE_FDATASYNC
case SYNC_METHOD_FDATASYNC:
if (pg_fdatasync(openLogFile) != 0)
elog(STOP, "fdatasync(logfile %u seg %u) failed: %m",
openLogId, openLogSeg);
break;
#endif
case SYNC_METHOD_OPEN:
/* write synced it already */
break;
default:
elog(STOP, "bogus sync_method %d", sync_method);
break;
}
}

View File

@ -4,7 +4,7 @@
* Support for grand unified configuration scheme, including SET
* command, configuration file, and command line options.
*
* $Header: /cvsroot/pgsql/src/backend/utils/misc/guc.c,v 1.32 2001/03/13 01:17:06 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/utils/misc/guc.c,v 1.33 2001/03/16 05:44:33 tgl Exp $
*
* Copyright 2000 by PostgreSQL Global Development Group
* Written by Peter Eisentraut <peter_e@gmx.net>.
@ -20,6 +20,7 @@
#include "utils/guc.h"
#include "access/xlog.h"
#include "commands/async.h"
#include "libpq/auth.h"
#include "libpq/pqcomm.h"
@ -33,23 +34,17 @@
#include "tcop/tcopprot.h"
/* XXX should be in a header file */
/* XXX these should be in other modules' header files */
extern bool Log_connections;
extern int CheckPointSegments;
extern int CheckPointTimeout;
extern int XLOGbuffers;
extern int XLOGfiles;
extern int XLOG_DEBUG;
extern int CommitDelay;
extern int CommitSiblings;
extern bool FixBTree;
#ifdef ENABLE_SYSLOG
extern char *Syslog_facility;
extern char *Syslog_ident;
bool check_facility(const char *facility);
static bool check_facility(const char *facility);
#endif
/*
@ -138,7 +133,8 @@ struct config_string
GucContext context;
char **variable;
const char *default_val;
bool (*parse_hook)(const char *);
bool (*parse_hook)(const char *proposed);
void (*assign_hook)(const char *newval);
};
@ -330,25 +326,29 @@ static struct config_string
ConfigureNamesString[] =
{
{"krb_server_keyfile", PGC_POSTMASTER, &pg_krb_server_keyfile,
PG_KRB_SRVTAB, NULL},
{"unix_socket_group", PGC_POSTMASTER, &Unix_socket_group,
"", NULL},
PG_KRB_SRVTAB, NULL, NULL},
#ifdef ENABLE_SYSLOG
{"syslog_facility", PGC_POSTMASTER, &Syslog_facility,
"LOCAL0", check_facility},
"LOCAL0", check_facility, NULL},
{"syslog_ident", PGC_POSTMASTER, &Syslog_ident,
"postgres", NULL},
"postgres", NULL, NULL},
#endif
{"unix_socket_group", PGC_POSTMASTER, &Unix_socket_group,
"", NULL, NULL},
{"unix_socket_directory", PGC_POSTMASTER, &UnixSocketDir,
"", NULL},
"", NULL, NULL},
{"virtual_host", PGC_POSTMASTER, &VirtualHost,
"", NULL},
"", NULL, NULL},
{NULL, 0, NULL, NULL, NULL}
{"wal_sync_method", PGC_SIGHUP, &XLOG_sync_method,
XLOG_sync_method_default,
check_xlog_sync_method, assign_xlog_sync_method},
{NULL, 0, NULL, NULL, NULL, NULL}
};
/******** end of options list ********/
@ -723,7 +723,10 @@ set_config_option(const char * name, const char * value, GucContext
elog(elevel, "out of memory");
return false;
}
free(*conf->variable);
if (conf->assign_hook)
(conf->assign_hook)(str);
if (*conf->variable)
free(*conf->variable);
*conf->variable = str;
}
}
@ -737,7 +740,10 @@ set_config_option(const char * name, const char * value, GucContext
elog(elevel, "out of memory");
return false;
}
free(*conf->variable);
if (conf->assign_hook)
(conf->assign_hook)(str);
if (*conf->variable)
free(*conf->variable);
*conf->variable = str;
}
break;
@ -855,7 +861,7 @@ ParseLongOption(const char * string, char ** name, char ** value)
#ifdef ENABLE_SYSLOG
bool
static bool
check_facility(const char *facility)
{
if (strcasecmp(facility,"LOCAL0") == 0) return true;

View File

@ -107,6 +107,8 @@
#
#wal_buffers = 8 # min 4
#wal_files = 0 # range 0-64
#wal_sync_method = fsync # fsync or fdatasync or open_sync or open_datasync
# Note: default wal_sync_method varies across platforms
#wal_debug = 0 # range 0-16
#commit_delay = 0 # range 0-100000
#commit_siblings = 5 # range 1-1000

View File

@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: xlog.h,v 1.20 2001/03/13 20:32:37 tgl Exp $
* $Id: xlog.h,v 1.21 2001/03/16 05:44:33 tgl Exp $
*/
#ifndef XLOG_H
#define XLOG_H
@ -176,6 +176,15 @@ extern StartUpID ThisStartUpID; /* current SUI */
extern bool InRecovery;
extern XLogRecPtr MyLastRecPtr;
/* these variables are GUC parameters related to XLOG */
extern int CheckPointSegments;
extern int XLOGbuffers;
extern int XLOGfiles;
extern int XLOG_DEBUG;
extern char *XLOG_sync_method;
extern const char XLOG_sync_method_default[];
extern XLogRecPtr XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata);
extern void XLogFlush(XLogRecPtr RecPtr);
@ -202,4 +211,7 @@ extern void GetRedoRecPtr(void);
*/
extern XLogRecPtr GetUndoRecPtr(void);
extern bool check_xlog_sync_method(const char *method);
extern void assign_xlog_sync_method(const char *method);
#endif /* XLOG_H */