diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 81b0ba34457..0ca5e402e0c 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -1,4 +1,4 @@ - + Server Configuration @@ -1823,6 +1823,34 @@ archive_command = 'copy "%p" "C:\\server\\archivedir\\%f"' # Windows + + + standby_keep_segments (integer) + + standby_keep_segments configuration parameter + + + + Specifies the number of log file segments kept in pg_xlog + directory, in case a standby server needs to fetch them via streaming + replciation. Each segment is normally 16 megabytes. If a standby + server connected to the primary falls behind more than + standby_keep_segments segments, the primary might remove + a WAL segment still needed by the standby and the replication + connection will be terminated. + + This sets only the minimum number of segments retained for standby + purposes, the system might need to retain more segments for WAL + archival or to recover from a checkpoint. If standby_keep_segments + is zero (the default), the system doesn't keep any extra segments + for standby purposes, and the number of old WAL segments available + for standbys is determined based only on the location of the previous + checkpoint and status of WAL archival. + This parameter can only be set in the postgresql.conf + file or on the server command line. + + + diff --git a/doc/src/sgml/high-availability.sgml b/doc/src/sgml/high-availability.sgml index 13b783bc864..cff0339b523 100644 --- a/doc/src/sgml/high-availability.sgml +++ b/doc/src/sgml/high-availability.sgml @@ -1,4 +1,4 @@ - + High Availability, Load Balancing, and Replication @@ -732,7 +732,12 @@ trigger_file = '/path/to/trigger_file' Streaming replication relies on file-based continuous archiving for making the base backup and for allowing the standby to catch up if it is disconnected from the primary for long enough for the primary to - delete old WAL files still required by the standby. + delete old WAL files still required by the standby. It is possible + to use streaming replication without WAL archiving, but if a standby + falls behind too much, the primary will delete old WAL files still + needed by the standby, and the standby will have to be manually restored + from a base backup. You can control how long the primary retains old WAL + segments using the standby_keep_segments setting. diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 12392f8cfc0..c5b7f7a98ce 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.391 2010/04/07 10:58:49 heikki Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.392 2010/04/12 09:52:29 heikki Exp $ * *------------------------------------------------------------------------- */ @@ -66,6 +66,7 @@ /* User-settable parameters */ int CheckPointSegments = 3; +int StandbySegments = 0; int XLOGbuffers = 8; int XLogArchiveTimeout = 0; bool XLogArchiveMode = false; @@ -356,6 +357,8 @@ typedef struct XLogCtlData uint32 ckptXidEpoch; /* nextXID & epoch of latest checkpoint */ TransactionId ckptXid; XLogRecPtr asyncCommitLSN; /* LSN of newest async commit */ + uint32 lastRemovedLog; /* latest removed/recycled XLOG segment */ + uint32 lastRemovedSeg; /* Protected by WALWriteLock: */ XLogCtlWrite Write; @@ -3149,6 +3152,22 @@ PreallocXlogFiles(XLogRecPtr endptr) } } +/* + * Get the log/seg of the latest removed or recycled WAL segment. + * Returns 0 if no WAL segments have been removed since startup. + */ +void +XLogGetLastRemoved(uint32 *log, uint32 *seg) +{ + /* use volatile pointer to prevent code rearrangement */ + volatile XLogCtlData *xlogctl = XLogCtl; + + SpinLockAcquire(&xlogctl->info_lck); + *log = xlogctl->lastRemovedLog; + *seg = xlogctl->lastRemovedSeg; + SpinLockRelease(&xlogctl->info_lck); +} + /* * Recycle or remove all log files older or equal to passed log/seg# * @@ -3170,6 +3189,20 @@ RemoveOldXlogFiles(uint32 log, uint32 seg, XLogRecPtr endptr) char newpath[MAXPGPATH]; #endif struct stat statbuf; + /* use volatile pointer to prevent code rearrangement */ + volatile XLogCtlData *xlogctl = XLogCtl; + + /* Update the last removed location in shared memory first */ + SpinLockAcquire(&xlogctl->info_lck); + if (log > xlogctl->lastRemovedLog || + (log == xlogctl->lastRemovedLog && seg > xlogctl->lastRemovedSeg)) + { + xlogctl->lastRemovedLog = log; + xlogctl->lastRemovedSeg = seg; + } + SpinLockRelease(&xlogctl->info_lck); + + elog(DEBUG1, "removing WAL segments older than %X/%X", log, seg); /* * Initialize info about where to try to recycle to. We allow recycling @@ -7172,36 +7205,51 @@ CreateCheckPoint(int flags) smgrpostckpt(); /* - * If there's connected standby servers doing XLOG streaming, don't delete - * XLOG files that have not been streamed to all of them yet. This does - * nothing to prevent them from being deleted when the standby is - * disconnected (e.g because of network problems), but at least it avoids - * an open replication connection from failing because of that. + * Delete old log files (those no longer needed even for previous + * checkpoint or the standbys in XLOG streaming). */ - if ((_logId || _logSeg) && max_wal_senders > 0) + if (_logId || _logSeg) { - XLogRecPtr oldest; - uint32 log; - uint32 seg; - - oldest = GetOldestWALSendPointer(); - if (oldest.xlogid != 0 || oldest.xrecoff != 0) + /* + * Calculate the last segment that we need to retain because of + * standby_keep_segments, by subtracting StandbySegments from the + * new checkpoint location. + */ + if (StandbySegments > 0) { - XLByteToSeg(oldest, log, seg); + uint32 log; + uint32 seg; + int d_log; + int d_seg; + + XLByteToSeg(recptr, log, seg); + + d_seg = StandbySegments % XLogSegsPerFile; + d_log = StandbySegments / XLogSegsPerFile; + if (seg < d_seg) + { + d_log += 1; + seg = seg - d_seg + XLogSegsPerFile; + } + else + seg = seg - d_seg; + /* avoid underflow, don't go below (0,1) */ + if (log < d_log || (log == d_log && seg == 0)) + { + log = 0; + seg = 1; + } + else + log = log - d_log; + + /* don't delete WAL segments newer than the calculated segment */ if (log < _logId || (log == _logId && seg < _logSeg)) { _logId = log; _logSeg = seg; } } - } - /* - * Delete old log files (those no longer needed even for previous - * checkpoint or the standbys in XLOG streaming). - */ - if (_logId || _logSeg) - { PrevLogSeg(_logId, _logSeg); RemoveOldXlogFiles(_logId, _logSeg, recptr); } diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c index e04e5ba65ca..aa8fbc1a40b 100644 --- a/src/backend/replication/walsender.c +++ b/src/backend/replication/walsender.c @@ -30,7 +30,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/replication/walsender.c,v 1.14 2010/04/01 00:43:29 rhaas Exp $ + * $PostgreSQL: pgsql/src/backend/replication/walsender.c,v 1.15 2010/04/12 09:52:29 heikki Exp $ * *------------------------------------------------------------------------- */ @@ -508,6 +508,10 @@ XLogRead(char *buf, XLogRecPtr recptr, Size nbytes) { char path[MAXPGPATH]; uint32 startoff; + uint32 lastRemovedLog; + uint32 lastRemovedSeg; + uint32 log; + uint32 seg; while (nbytes > 0) { @@ -527,10 +531,27 @@ XLogRead(char *buf, XLogRecPtr recptr, Size nbytes) sendFile = BasicOpenFile(path, O_RDONLY | PG_BINARY, 0); if (sendFile < 0) - ereport(FATAL, /* XXX: Why FATAL? */ - (errcode_for_file_access(), - errmsg("could not open file \"%s\" (log file %u, segment %u): %m", - path, sendId, sendSeg))); + { + /* + * If the file is not found, assume it's because the + * standby asked for a too old WAL segment that has already + * been removed or recycled. + */ + if (errno == ENOENT) + { + char filename[MAXFNAMELEN]; + XLogFileName(filename, ThisTimeLineID, sendId, sendSeg); + ereport(ERROR, + (errcode_for_file_access(), + errmsg("requested WAL segment %s has already been removed", + filename))); + } + else + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not open file \"%s\" (log file %u, segment %u): %m", + path, sendId, sendSeg))); + } sendOff = 0; } @@ -538,7 +559,7 @@ XLogRead(char *buf, XLogRecPtr recptr, Size nbytes) if (sendOff != startoff) { if (lseek(sendFile, (off_t) startoff, SEEK_SET) < 0) - ereport(FATAL, + ereport(ERROR, (errcode_for_file_access(), errmsg("could not seek in log file %u, segment %u to offset %u: %m", sendId, sendSeg, startoff))); @@ -553,7 +574,7 @@ XLogRead(char *buf, XLogRecPtr recptr, Size nbytes) readbytes = read(sendFile, buf, segbytes); if (readbytes <= 0) - ereport(FATAL, + ereport(ERROR, (errcode_for_file_access(), errmsg("could not read from log file %u, segment %u, offset %u, " "length %lu: %m", @@ -566,6 +587,26 @@ XLogRead(char *buf, XLogRecPtr recptr, Size nbytes) nbytes -= readbytes; buf += readbytes; } + + /* + * After reading into the buffer, check that what we read was valid. + * We do this after reading, because even though the segment was present + * when we opened it, it might get recycled or removed while we read it. + * The read() succeeds in that case, but the data we tried to read might + * already have been overwritten with new WAL records. + */ + XLogGetLastRemoved(&lastRemovedLog, &lastRemovedSeg); + XLByteToPrevSeg(recptr, log, seg); + if (log < lastRemovedLog || + (log == lastRemovedLog && seg <= lastRemovedSeg)) + { + char filename[MAXFNAMELEN]; + XLogFileName(filename, ThisTimeLineID, log, seg); + ereport(ERROR, + (errcode_for_file_access(), + errmsg("requested WAL segment %s has already been removed", + filename))); + } } /* @@ -801,6 +842,12 @@ WalSndShmemInit(void) } } +/* + * This isn't currently used for anything. Monitoring tools might be + * interested in the future, and we'll need something like this in the + * future for synchronous replication. + */ +#ifdef NOT_USED /* * Returns the oldest Send position among walsenders. Or InvalidXLogRecPtr * if none. @@ -834,3 +881,4 @@ GetOldestWALSendPointer(void) } return oldest; } +#endif diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 5f8cc494893..9d72a0e5736 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -10,7 +10,7 @@ * Written by Peter Eisentraut . * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.546 2010/04/01 00:43:29 rhaas Exp $ + * $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.547 2010/04/12 09:52:29 heikki Exp $ * *-------------------------------------------------------------------- */ @@ -1647,6 +1647,15 @@ static struct config_int ConfigureNamesInt[] = 0, 0, 60, NULL, NULL }, + { + {"standby_keep_segments", PGC_SIGHUP, WAL_CHECKPOINTS, + gettext_noop("Sets the number of WAL files held for standby servers"), + NULL + }, + &StandbySegments, + 0, 0, INT_MAX, NULL, NULL + }, + { {"checkpoint_segments", PGC_SIGHUP, WAL_CHECKPOINTS, gettext_noop("Sets the maximum distance in log segments between automatic WAL checkpoints."), diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 02f1df01038..48c09d14670 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -193,6 +193,7 @@ #max_wal_senders = 0 # max number of walsender processes #wal_sender_delay = 200ms # 1-10000 milliseconds +#standby_keep_segments = 0 # in logfile segments, 16MB each; 0 disables #------------------------------------------------------------------------------ diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index 9a66e9134d4..de7406a808b 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.105 2010/04/01 00:43:29 rhaas Exp $ + * $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.106 2010/04/12 09:52:29 heikki Exp $ */ #ifndef XLOG_H #define XLOG_H @@ -187,6 +187,7 @@ extern XLogRecPtr XactLastRecEnd; /* these variables are GUC parameters related to XLOG */ extern int CheckPointSegments; +extern int StandbySegments; extern int XLOGbuffers; extern bool XLogArchiveMode; extern char *XLogArchiveCommand; @@ -267,6 +268,7 @@ extern int XLogFileInit(uint32 log, uint32 seg, extern int XLogFileOpen(uint32 log, uint32 seg); +extern void XLogGetLastRemoved(uint32 *log, uint32 *seg); extern void XLogSetAsyncCommitLSN(XLogRecPtr record); extern void RestoreBkpBlocks(XLogRecPtr lsn, XLogRecord *record, bool cleanup);