diff --git a/doc/src/sgml/ref/pg_basebackup.sgml b/doc/src/sgml/ref/pg_basebackup.sgml
index 05d5bedef93..c654a364124 100644
--- a/doc/src/sgml/ref/pg_basebackup.sgml
+++ b/doc/src/sgml/ref/pg_basebackup.sgml
@@ -64,6 +64,48 @@ PostgreSQL documentation
better from a performance point of view to take only one backup, and copy
the result.
+
+
+ pg_basebackup can make a base backup from
+ not only the master but also the standby. To take a backup from the standby,
+ set up the standby so that it can accept replication connections (that is, set
+ max_wal_senders> and ,
+ and configure host-based authentication).
+ You will also need to enable on the master.
+
+
+
+ Note that there are some limitations in an online backup from the standby:
+
+
+
+
+ The backup history file is not created in the database cluster backed up.
+
+
+
+
+ There is no guarantee that all WAL files required for the backup are archived
+ at the end of backup. If you are planning to use the backup for an archive
+ recovery and want to ensure that all required files are available at that moment,
+ you need to include them into the backup by using -x> option.
+
+
+
+
+ If the standby is promoted to the master during online backup, the backup fails.
+
+
+
+
+ All WAL records required for the backup must contain sufficient full-page writes,
+ which requires you to enable full_page_writes> on the master and
+ not to use the tool like pg_compresslog> as
+ archive_command> to remove full-page writes from WAL files.
+
+
+
+
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index ce659ec63d9..4b273a8318f 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -156,6 +156,14 @@ HotStandbyState standbyState = STANDBY_DISABLED;
static XLogRecPtr LastRec;
+/*
+ * During recovery, lastFullPageWrites keeps track of full_page_writes that
+ * the replayed WAL records indicate. It's initialized with full_page_writes
+ * that the recovery starting checkpoint record indicates, and then updated
+ * each time XLOG_FPW_CHANGE record is replayed.
+ */
+static bool lastFullPageWrites;
+
/*
* Local copy of SharedRecoveryInProgress variable. True actually means "not
* known, need to check the shared state".
@@ -354,6 +362,15 @@ typedef struct XLogCtlInsert
XLogRecPtr RedoRecPtr; /* current redo point for insertions */
bool forcePageWrites; /* forcing full-page writes for PITR? */
+ /*
+ * fullPageWrites is the master copy used by all backends to determine
+ * whether to write full-page to WAL, instead of using process-local
+ * one. This is required because, when full_page_writes is changed
+ * by SIGHUP, we must WAL-log it before it actually affects
+ * WAL-logging by backends. Checkpointer sets at startup or after SIGHUP.
+ */
+ bool fullPageWrites;
+
/*
* exclusiveBackup is true if a backup started with pg_start_backup() is
* in progress, and nonExclusiveBackups is a counter indicating the number
@@ -460,6 +477,12 @@ typedef struct XLogCtlData
/* Are we requested to pause recovery? */
bool recoveryPause;
+ /*
+ * lastFpwDisableRecPtr points to the start of the last replayed
+ * XLOG_FPW_CHANGE record that instructs full_page_writes is disabled.
+ */
+ XLogRecPtr lastFpwDisableRecPtr;
+
slock_t info_lck; /* locks shared variables shown above */
} XLogCtlData;
@@ -663,7 +686,7 @@ static void xlog_outrec(StringInfo buf, XLogRecord *record);
#endif
static void pg_start_backup_callback(int code, Datum arg);
static bool read_backup_label(XLogRecPtr *checkPointLoc,
- bool *backupEndRequired);
+ bool *backupEndRequired, bool *backupFromStandby);
static void rm_redo_error_callback(void *arg);
static int get_sync_bit(int method);
@@ -708,7 +731,8 @@ XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata)
unsigned i;
bool updrqst;
bool doPageWrites;
- bool isLogSwitch = (rmid == RM_XLOG_ID && info == XLOG_SWITCH);
+ bool isLogSwitch = false;
+ bool fpwChange = false;
uint8 info_orig = info;
/* cross-check on whether we should be here or not */
@@ -722,11 +746,30 @@ XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata)
TRACE_POSTGRESQL_XLOG_INSERT(rmid, info);
/*
- * In bootstrap mode, we don't actually log anything but XLOG resources;
- * return a phony record pointer.
+ * Handle special cases/records.
*/
- if (IsBootstrapProcessingMode() && rmid != RM_XLOG_ID)
+ if (rmid == RM_XLOG_ID)
{
+ switch (info)
+ {
+ case XLOG_SWITCH:
+ isLogSwitch = true;
+ break;
+
+ case XLOG_FPW_CHANGE:
+ fpwChange = true;
+ break;
+
+ default:
+ break;
+ }
+ }
+ else if (IsBootstrapProcessingMode())
+ {
+ /*
+ * In bootstrap mode, we don't actually log anything but XLOG resources;
+ * return a phony record pointer.
+ */
RecPtr.xlogid = 0;
RecPtr.xrecoff = SizeOfXLogLongPHD; /* start of 1st chkpt record */
return RecPtr;
@@ -756,10 +799,10 @@ begin:;
/*
* Decide if we need to do full-page writes in this XLOG record: true if
* full_page_writes is on or we have a PITR request for it. Since we
- * don't yet have the insert lock, forcePageWrites could change under us,
- * but we'll recheck it once we have the lock.
+ * don't yet have the insert lock, fullPageWrites and forcePageWrites
+ * could change under us, but we'll recheck them once we have the lock.
*/
- doPageWrites = fullPageWrites || Insert->forcePageWrites;
+ doPageWrites = Insert->fullPageWrites || Insert->forcePageWrites;
len = 0;
for (rdt = rdata;;)
@@ -939,12 +982,12 @@ begin:;
}
/*
- * Also check to see if forcePageWrites was just turned on; if we weren't
- * already doing full-page writes then go back and recompute. (If it was
- * just turned off, we could recompute the record without full pages, but
- * we choose not to bother.)
+ * Also check to see if fullPageWrites or forcePageWrites was just turned on;
+ * if we weren't already doing full-page writes then go back and recompute.
+ * (If it was just turned off, we could recompute the record without full pages,
+ * but we choose not to bother.)
*/
- if (Insert->forcePageWrites && !doPageWrites)
+ if ((Insert->fullPageWrites || Insert->forcePageWrites) && !doPageWrites)
{
/* Oops, must redo it with full-page data. */
LWLockRelease(WALInsertLock);
@@ -1189,6 +1232,15 @@ begin:;
WriteRqst = XLogCtl->xlblocks[curridx];
}
+ /*
+ * If the record is an XLOG_FPW_CHANGE, we update full_page_writes
+ * in shared memory before releasing WALInsertLock. This ensures that
+ * an XLOG_FPW_CHANGE record precedes any WAL record affected
+ * by this change of full_page_writes.
+ */
+ if (fpwChange)
+ Insert->fullPageWrites = fullPageWrites;
+
LWLockRelease(WALInsertLock);
if (updrqst)
@@ -5147,6 +5199,7 @@ BootStrapXLOG(void)
checkPoint.redo.xlogid = 0;
checkPoint.redo.xrecoff = XLogSegSize + SizeOfXLogLongPHD;
checkPoint.ThisTimeLineID = ThisTimeLineID;
+ checkPoint.fullPageWrites = fullPageWrites;
checkPoint.nextXidEpoch = 0;
checkPoint.nextXid = FirstNormalTransactionId;
checkPoint.nextOid = FirstBootstrapObjectId;
@@ -5961,6 +6014,8 @@ StartupXLOG(void)
uint32 freespace;
TransactionId oldestActiveXID;
bool backupEndRequired = false;
+ bool backupFromStandby = false;
+ DBState dbstate_at_startup;
/*
* Read control file and check XLOG status looks valid.
@@ -6094,7 +6149,8 @@ StartupXLOG(void)
if (StandbyMode)
OwnLatch(&XLogCtl->recoveryWakeupLatch);
- if (read_backup_label(&checkPointLoc, &backupEndRequired))
+ if (read_backup_label(&checkPointLoc, &backupEndRequired,
+ &backupFromStandby))
{
/*
* When a backup_label file is present, we want to roll forward from
@@ -6210,6 +6266,8 @@ StartupXLOG(void)
*/
ThisTimeLineID = checkPoint.ThisTimeLineID;
+ lastFullPageWrites = checkPoint.fullPageWrites;
+
RedoRecPtr = XLogCtl->Insert.RedoRecPtr = checkPoint.redo;
if (XLByteLT(RecPtr, checkPoint.redo))
@@ -6250,6 +6308,7 @@ StartupXLOG(void)
* pg_control with any minimum recovery stop point obtained from a
* backup history file.
*/
+ dbstate_at_startup = ControlFile->state;
if (InArchiveRecovery)
ControlFile->state = DB_IN_ARCHIVE_RECOVERY;
else
@@ -6270,12 +6329,28 @@ StartupXLOG(void)
}
/*
- * set backupStartPoint if we're starting recovery from a base backup
+ * Set backupStartPoint if we're starting recovery from a base backup.
+ *
+ * Set backupEndPoint and use minRecoveryPoint as the backup end location
+ * if we're starting recovery from a base backup which was taken from
+ * the standby. In this case, the database system status in pg_control must
+ * indicate DB_IN_ARCHIVE_RECOVERY. If not, which means that backup
+ * is corrupted, so we cancel recovery.
*/
if (haveBackupLabel)
{
ControlFile->backupStartPoint = checkPoint.redo;
ControlFile->backupEndRequired = backupEndRequired;
+
+ if (backupFromStandby)
+ {
+ if (dbstate_at_startup != DB_IN_ARCHIVE_RECOVERY)
+ ereport(FATAL,
+ (errmsg("backup_label contains inconsistent data with control file"),
+ errhint("This means that the backup is corrupted and you will "
+ "have to use another backup for recovery.")));
+ ControlFile->backupEndPoint = ControlFile->minRecoveryPoint;
+ }
}
ControlFile->time = (pg_time_t) time(NULL);
/* No need to hold ControlFileLock yet, we aren't up far enough */
@@ -6564,6 +6639,27 @@ StartupXLOG(void)
/* Pop the error context stack */
error_context_stack = errcontext.previous;
+ if (!XLogRecPtrIsInvalid(ControlFile->backupStartPoint) &&
+ XLByteLE(ControlFile->backupEndPoint, EndRecPtr))
+ {
+ /*
+ * We have reached the end of base backup, the point where
+ * the minimum recovery point in pg_control indicates.
+ * The data on disk is now consistent. Reset backupStartPoint
+ * and backupEndPoint.
+ */
+ elog(DEBUG1, "end of backup reached");
+
+ LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
+
+ MemSet(&ControlFile->backupStartPoint, 0, sizeof(XLogRecPtr));
+ MemSet(&ControlFile->backupEndPoint, 0, sizeof(XLogRecPtr));
+ ControlFile->backupEndRequired = false;
+ UpdateControlFile();
+
+ LWLockRelease(ControlFileLock);
+ }
+
/*
* Update shared recoveryLastRecPtr after this record has been
* replayed.
@@ -6763,6 +6859,16 @@ StartupXLOG(void)
/* Pre-scan prepared transactions to find out the range of XIDs present */
oldestActiveXID = PrescanPreparedTransactions(NULL, NULL);
+ /*
+ * Update full_page_writes in shared memory and write an
+ * XLOG_FPW_CHANGE record before resource manager writes cleanup
+ * WAL records or checkpoint record is written.
+ */
+ Insert->fullPageWrites = lastFullPageWrites;
+ LocalSetXLogInsertAllowed();
+ UpdateFullPageWrites();
+ LocalXLogInsertAllowed = -1;
+
if (InRecovery)
{
int rmid;
@@ -7644,6 +7750,7 @@ CreateCheckPoint(int flags)
LocalSetXLogInsertAllowed();
checkPoint.ThisTimeLineID = ThisTimeLineID;
+ checkPoint.fullPageWrites = Insert->fullPageWrites;
/*
* Compute new REDO record ptr = location of next XLOG record.
@@ -8358,6 +8465,48 @@ XLogReportParameters(void)
}
}
+/*
+ * Update full_page_writes in shared memory, and write an
+ * XLOG_FPW_CHANGE record if necessary.
+ */
+void
+UpdateFullPageWrites(void)
+{
+ XLogCtlInsert *Insert = &XLogCtl->Insert;
+
+ /*
+ * Do nothing if full_page_writes has not been changed.
+ *
+ * It's safe to check the shared full_page_writes without the lock,
+ * because we can guarantee that there is no concurrently running
+ * process which can update it.
+ */
+ if (fullPageWrites == Insert->fullPageWrites)
+ return;
+
+ /*
+ * Write an XLOG_FPW_CHANGE record. This allows us to keep
+ * track of full_page_writes during archive recovery, if required.
+ */
+ if (XLogStandbyInfoActive() && !RecoveryInProgress())
+ {
+ XLogRecData rdata;
+
+ rdata.data = (char *) (&fullPageWrites);
+ rdata.len = sizeof(bool);
+ rdata.buffer = InvalidBuffer;
+ rdata.next = NULL;
+
+ XLogInsert(RM_XLOG_ID, XLOG_FPW_CHANGE, &rdata);
+ }
+ else
+ {
+ LWLockAcquire(WALInsertLock, LW_EXCLUSIVE);
+ Insert->fullPageWrites = fullPageWrites;
+ LWLockRelease(WALInsertLock);
+ }
+}
+
/*
* XLOG resource manager's routines
*
@@ -8402,7 +8551,8 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
* never arrive.
*/
if (InArchiveRecovery &&
- !XLogRecPtrIsInvalid(ControlFile->backupStartPoint))
+ !XLogRecPtrIsInvalid(ControlFile->backupStartPoint) &&
+ XLogRecPtrIsInvalid(ControlFile->backupEndPoint))
ereport(ERROR,
(errmsg("online backup was canceled, recovery cannot continue")));
@@ -8571,6 +8721,30 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
/* Check to see if any changes to max_connections give problems */
CheckRequiredParameterValues();
}
+ else if (info == XLOG_FPW_CHANGE)
+ {
+ /* use volatile pointer to prevent code rearrangement */
+ volatile XLogCtlData *xlogctl = XLogCtl;
+ bool fpw;
+
+ memcpy(&fpw, XLogRecGetData(record), sizeof(bool));
+
+ /*
+ * Update the LSN of the last replayed XLOG_FPW_CHANGE record
+ * so that do_pg_start_backup() and do_pg_stop_backup() can check
+ * whether full_page_writes has been disabled during online backup.
+ */
+ if (!fpw)
+ {
+ SpinLockAcquire(&xlogctl->info_lck);
+ if (XLByteLT(xlogctl->lastFpwDisableRecPtr, ReadRecPtr))
+ xlogctl->lastFpwDisableRecPtr = ReadRecPtr;
+ SpinLockRelease(&xlogctl->info_lck);
+ }
+
+ /* Keep track of full_page_writes */
+ lastFullPageWrites = fpw;
+ }
}
void
@@ -8584,10 +8758,11 @@ xlog_desc(StringInfo buf, uint8 xl_info, char *rec)
CheckPoint *checkpoint = (CheckPoint *) rec;
appendStringInfo(buf, "checkpoint: redo %X/%X; "
- "tli %u; xid %u/%u; oid %u; multi %u; offset %u; "
+ "tli %u; fpw %s; xid %u/%u; oid %u; multi %u; offset %u; "
"oldest xid %u in DB %u; oldest running xid %u; %s",
checkpoint->redo.xlogid, checkpoint->redo.xrecoff,
checkpoint->ThisTimeLineID,
+ checkpoint->fullPageWrites ? "true" : "false",
checkpoint->nextXidEpoch, checkpoint->nextXid,
checkpoint->nextOid,
checkpoint->nextMulti,
@@ -8652,6 +8827,13 @@ xlog_desc(StringInfo buf, uint8 xl_info, char *rec)
xlrec.max_locks_per_xact,
wal_level_str);
}
+ else if (info == XLOG_FPW_CHANGE)
+ {
+ bool fpw;
+
+ memcpy(&fpw, rec, sizeof(bool));
+ appendStringInfo(buf, "full_page_writes: %s", fpw ? "true" : "false");
+ }
else
appendStringInfo(buf, "UNKNOWN");
}
@@ -8837,6 +9019,7 @@ XLogRecPtr
do_pg_start_backup(const char *backupidstr, bool fast, char **labelfile)
{
bool exclusive = (labelfile == NULL);
+ bool backup_started_in_recovery = false;
XLogRecPtr checkpointloc;
XLogRecPtr startpoint;
pg_time_t stamp_time;
@@ -8848,18 +9031,27 @@ do_pg_start_backup(const char *backupidstr, bool fast, char **labelfile)
FILE *fp;
StringInfoData labelfbuf;
+ backup_started_in_recovery = RecoveryInProgress();
+
if (!superuser() && !is_authenticated_user_replication_role())
ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
errmsg("must be superuser or replication role to run a backup")));
- if (RecoveryInProgress())
+ /*
+ * Currently only non-exclusive backup can be taken during recovery.
+ */
+ if (backup_started_in_recovery && exclusive)
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("recovery is in progress"),
errhint("WAL control functions cannot be executed during recovery.")));
- if (!XLogIsNeeded())
+ /*
+ * During recovery, we don't need to check WAL level. Because, if WAL level
+ * is not sufficient, it's impossible to get here during recovery.
+ */
+ if (!backup_started_in_recovery && !XLogIsNeeded())
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("WAL level not sufficient for making an online backup"),
@@ -8885,6 +9077,9 @@ do_pg_start_backup(const char *backupidstr, bool fast, char **labelfile)
* since we expect that any pages not modified during the backup interval
* must have been correctly captured by the backup.)
*
+ * Note that forcePageWrites has no effect during an online backup from
+ * the standby.
+ *
* We must hold WALInsertLock to change the value of forcePageWrites, to
* ensure adequate interlocking against XLogInsert().
*/
@@ -8927,17 +9122,32 @@ do_pg_start_backup(const char *backupidstr, bool fast, char **labelfile)
* Therefore, if a WAL archiver (such as pglesslog) is trying to
* compress out removable backup blocks, it won't remove any that
* occur after this point.
+ *
+ * During recovery, we skip forcing XLOG file switch, which means that
+ * the backup taken during recovery is not available for the special
+ * recovery case described above.
*/
- RequestXLogSwitch();
+ if (!backup_started_in_recovery)
+ RequestXLogSwitch();
do
{
+ bool checkpointfpw;
+
/*
- * Force a CHECKPOINT. Aside from being necessary to prevent torn
+ * Force a CHECKPOINT. Aside from being necessary to prevent torn
* page problems, this guarantees that two successive backup runs
* will have different checkpoint positions and hence different
* history file names, even if nothing happened in between.
*
+ * During recovery, establish a restartpoint if possible. We use the last
+ * restartpoint as the backup starting checkpoint. This means that two
+ * successive backup runs can have same checkpoint positions.
+ *
+ * Since the fact that we are executing do_pg_start_backup() during
+ * recovery means that checkpointer is running, we can use
+ * RequestCheckpoint() to establish a restartpoint.
+ *
* We use CHECKPOINT_IMMEDIATE only if requested by user (via
* passing fast = true). Otherwise this can take awhile.
*/
@@ -8953,8 +9163,44 @@ do_pg_start_backup(const char *backupidstr, bool fast, char **labelfile)
LWLockAcquire(ControlFileLock, LW_SHARED);
checkpointloc = ControlFile->checkPoint;
startpoint = ControlFile->checkPointCopy.redo;
+ checkpointfpw = ControlFile->checkPointCopy.fullPageWrites;
LWLockRelease(ControlFileLock);
+ if (backup_started_in_recovery)
+ {
+ /* use volatile pointer to prevent code rearrangement */
+ volatile XLogCtlData *xlogctl = XLogCtl;
+ XLogRecPtr recptr;
+
+ /*
+ * Check to see if all WAL replayed during online backup (i.e.,
+ * since last restartpoint used as backup starting checkpoint)
+ * contain full-page writes.
+ */
+ SpinLockAcquire(&xlogctl->info_lck);
+ recptr = xlogctl->lastFpwDisableRecPtr;
+ SpinLockRelease(&xlogctl->info_lck);
+
+ if (!checkpointfpw || XLByteLE(startpoint, recptr))
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("WAL generated with full_page_writes=off was replayed "
+ "since last restartpoint"),
+ errhint("This means that the backup being taken on standby "
+ "is corrupt and should not be used. "
+ "Enable full_page_writes and run CHECKPOINT on the master, "
+ "and then try an online backup again.")));
+
+ /*
+ * During recovery, since we don't use the end-of-backup WAL
+ * record and don't write the backup history file, the starting WAL
+ * location doesn't need to be unique. This means that two base
+ * backups started at the same time might use the same checkpoint
+ * as starting locations.
+ */
+ gotUniqueStartpoint = true;
+ }
+
/*
* If two base backups are started at the same time (in WAL sender
* processes), we need to make sure that they use different
@@ -8994,6 +9240,8 @@ do_pg_start_backup(const char *backupidstr, bool fast, char **labelfile)
checkpointloc.xlogid, checkpointloc.xrecoff);
appendStringInfo(&labelfbuf, "BACKUP METHOD: %s\n",
exclusive ? "pg_start_backup" : "streamed");
+ appendStringInfo(&labelfbuf, "BACKUP FROM: %s\n",
+ backup_started_in_recovery ? "standby" : "master");
appendStringInfo(&labelfbuf, "START TIME: %s\n", strfbuf);
appendStringInfo(&labelfbuf, "LABEL: %s\n", backupidstr);
@@ -9088,6 +9336,7 @@ XLogRecPtr
do_pg_stop_backup(char *labelfile, bool waitforarchive)
{
bool exclusive = (labelfile == NULL);
+ bool backup_started_in_recovery = false;
XLogRecPtr startpoint;
XLogRecPtr stoppoint;
XLogRecData rdata;
@@ -9098,6 +9347,7 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive)
char stopxlogfilename[MAXFNAMELEN];
char lastxlogfilename[MAXFNAMELEN];
char histfilename[MAXFNAMELEN];
+ char backupfrom[20];
uint32 _logId;
uint32 _logSeg;
FILE *lfp;
@@ -9107,19 +9357,29 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive)
int waits = 0;
bool reported_waiting = false;
char *remaining;
+ char *ptr;
+
+ backup_started_in_recovery = RecoveryInProgress();
if (!superuser() && !is_authenticated_user_replication_role())
ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
(errmsg("must be superuser or replication role to run a backup"))));
- if (RecoveryInProgress())
+ /*
+ * Currently only non-exclusive backup can be taken during recovery.
+ */
+ if (backup_started_in_recovery && exclusive)
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("recovery is in progress"),
errhint("WAL control functions cannot be executed during recovery.")));
- if (!XLogIsNeeded())
+ /*
+ * During recovery, we don't need to check WAL level. Because, if WAL level
+ * is not sufficient, it's impossible to get here during recovery.
+ */
+ if (!backup_started_in_recovery && !XLogIsNeeded())
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("WAL level not sufficient for making an online backup"),
@@ -9209,6 +9469,82 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive)
errmsg("invalid data in file \"%s\"", BACKUP_LABEL_FILE)));
remaining = strchr(labelfile, '\n') + 1; /* %n is not portable enough */
+ /*
+ * Parse the BACKUP FROM line. If we are taking an online backup from
+ * the standby, we confirm that the standby has not been promoted
+ * during the backup.
+ */
+ ptr = strstr(remaining, "BACKUP FROM:");
+ if (sscanf(ptr, "BACKUP FROM: %19s\n", backupfrom) != 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("invalid data in file \"%s\"", BACKUP_LABEL_FILE)));
+ if (strcmp(backupfrom, "standby") == 0 && !backup_started_in_recovery)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("the standby was promoted during online backup"),
+ errhint("This means that the backup being taken is corrupt "
+ "and should not be used. "
+ "Try taking another online backup.")));
+
+ /*
+ * During recovery, we don't write an end-of-backup record. We assume
+ * that pg_control was backed up last and its minimum recovery
+ * point can be available as the backup end location. Since we don't
+ * have an end-of-backup record, we use the pg_control value to check
+ * whether we've reached the end of backup when starting recovery from
+ * this backup. We have no way of checking if pg_control wasn't backed
+ * up last however.
+ *
+ * We don't force a switch to new WAL file and wait for all the required
+ * files to be archived. This is okay if we use the backup to start
+ * the standby. But, if it's for an archive recovery, to ensure all the
+ * required files are available, a user should wait for them to be archived,
+ * or include them into the backup.
+ *
+ * We return the current minimum recovery point as the backup end
+ * location. Note that it's would be bigger than the exact backup end
+ * location if the minimum recovery point is updated since the backup
+ * of pg_control. This is harmless for current uses.
+ *
+ * XXX currently a backup history file is for informational and debug
+ * purposes only. It's not essential for an online backup. Furthermore,
+ * even if it's created, it will not be archived during recovery because
+ * an archiver is not invoked. So it doesn't seem worthwhile to write
+ * a backup history file during recovery.
+ */
+ if (backup_started_in_recovery)
+ {
+ /* use volatile pointer to prevent code rearrangement */
+ volatile XLogCtlData *xlogctl = XLogCtl;
+ XLogRecPtr recptr;
+
+ /*
+ * Check to see if all WAL replayed during online backup contain
+ * full-page writes.
+ */
+ SpinLockAcquire(&xlogctl->info_lck);
+ recptr = xlogctl->lastFpwDisableRecPtr;
+ SpinLockRelease(&xlogctl->info_lck);
+
+ if (XLByteLE(startpoint, recptr))
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("WAL generated with full_page_writes=off was replayed "
+ "during online backup"),
+ errhint("This means that the backup being taken on standby "
+ "is corrupt and should not be used. "
+ "Enable full_page_writes and run CHECKPOINT on the master, "
+ "and then try an online backup again.")));
+
+
+ LWLockAcquire(ControlFileLock, LW_SHARED);
+ stoppoint = ControlFile->minRecoveryPoint;
+ LWLockRelease(ControlFileLock);
+
+ return stoppoint;
+ }
+
/*
* Write the backup-end xlog record
*/
@@ -9454,18 +9790,22 @@ GetXLogWriteRecPtr(void)
* Returns TRUE if a backup_label was found (and fills the checkpoint
* location and its REDO location into *checkPointLoc and RedoStartLSN,
* respectively); returns FALSE if not. If this backup_label came from a
- * streamed backup, *backupEndRequired is set to TRUE.
+ * streamed backup, *backupEndRequired is set to TRUE. If this backup_label
+ * was created during recovery, *backupFromStandby is set to TRUE.
*/
static bool
-read_backup_label(XLogRecPtr *checkPointLoc, bool *backupEndRequired)
+read_backup_label(XLogRecPtr *checkPointLoc, bool *backupEndRequired,
+ bool *backupFromStandby)
{
char startxlogfilename[MAXFNAMELEN];
TimeLineID tli;
FILE *lfp;
char ch;
char backuptype[20];
+ char backupfrom[20];
*backupEndRequired = false;
+ *backupFromStandby = false;
/*
* See if label file is present
@@ -9499,16 +9839,22 @@ read_backup_label(XLogRecPtr *checkPointLoc, bool *backupEndRequired)
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("invalid data in file \"%s\"", BACKUP_LABEL_FILE)));
/*
- * BACKUP METHOD line is new in 9.1. We can't restore from an older backup
- * anyway, but since the information on it is not strictly required, don't
- * error out if it's missing for some reason.
+ * BACKUP METHOD and BACKUP FROM lines are new in 9.2. We can't
+ * restore from an older backup anyway, but since the information on it
+ * is not strictly required, don't error out if it's missing for some reason.
*/
- if (fscanf(lfp, "BACKUP METHOD: %19s", backuptype) == 1)
+ if (fscanf(lfp, "BACKUP METHOD: %19s\n", backuptype) == 1)
{
if (strcmp(backuptype, "streamed") == 0)
*backupEndRequired = true;
}
+ if (fscanf(lfp, "BACKUP FROM: %19s\n", backupfrom) == 1)
+ {
+ if (strcmp(backupfrom, "standby") == 0)
+ *backupFromStandby = true;
+ }
+
if (ferror(lfp) || FreeFile(lfp))
ereport(FATAL,
(errcode_for_file_access(),
diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c
index 0b792d2b105..76cb25cd382 100644
--- a/src/backend/postmaster/checkpointer.c
+++ b/src/backend/postmaster/checkpointer.c
@@ -171,6 +171,7 @@ static void CheckArchiveTimeout(void);
static bool IsCheckpointOnSchedule(double progress);
static bool ImmediateCheckpointRequested(void);
static bool CompactCheckpointerRequestQueue(void);
+static void UpdateSharedMemoryConfig(void);
/* Signal handlers */
@@ -351,8 +352,12 @@ CheckpointerMain(void)
if (RecoveryInProgress())
ThisTimeLineID = GetRecoveryTargetTLI();
- /* Do this once before starting the loop, then just at SIGHUP time. */
- SyncRepUpdateSyncStandbysDefined();
+ /*
+ * Ensure all shared memory values are set correctly for the config.
+ * Doing this here ensures no race conditions from other concurrent
+ * updaters.
+ */
+ UpdateSharedMemoryConfig();
/*
* Loop forever
@@ -380,8 +385,19 @@ CheckpointerMain(void)
{
got_SIGHUP = false;
ProcessConfigFile(PGC_SIGHUP);
- /* update global shmem state for sync rep */
- SyncRepUpdateSyncStandbysDefined();
+
+ /*
+ * Checkpointer is the last process to shutdown, so we ask
+ * it to hold the keys for a range of other tasks required
+ * most of which have nothing to do with checkpointing at all.
+ *
+ * For various reasons, some config values can change
+ * dynamically so are the primary copy of them is held in
+ * shared memory to make sure all backends see the same value.
+ * We make Checkpointer responsible for updating the shared
+ * memory copy if the parameter setting changes because of SIGHUP.
+ */
+ UpdateSharedMemoryConfig();
}
if (checkpoint_requested)
{
@@ -1239,3 +1255,21 @@ AbsorbFsyncRequests(void)
END_CRIT_SECTION();
}
+
+/*
+ * Update any shared memory configurations based on config parameters
+ */
+static void
+UpdateSharedMemoryConfig(void)
+{
+ /* update global shmem state for sync rep */
+ SyncRepUpdateSyncStandbysDefined();
+
+ /*
+ * If full_page_writes has been changed by SIGHUP, we update it
+ * in shared memory and write an XLOG_FPW_CHANGE record.
+ */
+ UpdateFullPageWrites();
+
+ elog(DEBUG2, "checkpointer updated shared memory configuration values");
+}
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index ad0c17ac7a8..9d242cbfcb7 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -3067,8 +3067,8 @@ PostmasterStateMachine(void)
else
{
/*
- * Terminate backup mode to avoid recovery after a clean fast
- * shutdown. Since a backup can only be taken during normal
+ * Terminate exclusive backup mode to avoid recovery after a clean fast
+ * shutdown. Since an exclusive backup can only be taken during normal
* running (and not, for example, while running under Hot Standby)
* it only makes sense to do this if we reached normal running. If
* we're still in recovery, the backup file is one we're
diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c
index 2fa1f546135..81203c9f5ac 100644
--- a/src/backend/replication/basebackup.c
+++ b/src/backend/replication/basebackup.c
@@ -180,6 +180,22 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
ti->path == NULL ? 1 : strlen(ti->path),
false);
+ /* In the main tar, include pg_control last. */
+ if (ti->path == NULL)
+ {
+ struct stat statbuf;
+
+ if (lstat(XLOG_CONTROL_FILE, &statbuf) != 0)
+ {
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not stat control file \"%s\": %m",
+ XLOG_CONTROL_FILE)));
+ }
+
+ sendFile(XLOG_CONTROL_FILE, XLOG_CONTROL_FILE, &statbuf);
+ }
+
/*
* If we're including WAL, and this is the main data directory we
* don't terminate the tar stream here. Instead, we will append
@@ -361,11 +377,6 @@ SendBaseBackup(BaseBackupCmd *cmd)
MemoryContext old_context;
basebackup_options opt;
- if (am_cascading_walsender)
- ereport(FATAL,
- (errcode(ERRCODE_CANNOT_CONNECT_NOW),
- errmsg("recovery is still in progress, can't accept WAL streaming connections for backup")));
-
parse_basebackup_options(cmd->options, &opt);
backup_context = AllocSetContextCreate(CurrentMemoryContext,
@@ -609,6 +620,10 @@ sendDir(char *path, int basepathlen, bool sizeonly)
strcmp(pathbuf, "./postmaster.opts") == 0)
continue;
+ /* Skip pg_control here to back up it last */
+ if (strcmp(pathbuf, "./global/pg_control") == 0)
+ continue;
+
if (lstat(pathbuf, &statbuf) != 0)
{
if (errno != ENOENT)
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index ec8f2f2309b..7df5292f951 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -130,7 +130,6 @@ extern int CommitSiblings;
extern char *default_tablespace;
extern char *temp_tablespaces;
extern bool synchronize_seqscans;
-extern bool fullPageWrites;
extern int ssl_renegotiation_limit;
extern char *SSLCipherSuites;
diff --git a/src/bin/pg_controldata/pg_controldata.c b/src/bin/pg_controldata/pg_controldata.c
index 9fafb7e8e76..c00183ab4fe 100644
--- a/src/bin/pg_controldata/pg_controldata.c
+++ b/src/bin/pg_controldata/pg_controldata.c
@@ -209,6 +209,8 @@ main(int argc, char *argv[])
ControlFile.checkPointCopy.redo.xrecoff);
printf(_("Latest checkpoint's TimeLineID: %u\n"),
ControlFile.checkPointCopy.ThisTimeLineID);
+ printf(_("Latest checkpoint's full_page_writes: %s\n"),
+ ControlFile.checkPointCopy.fullPageWrites ? _("on") : _("off"));
printf(_("Latest checkpoint's NextXID: %u/%u\n"),
ControlFile.checkPointCopy.nextXidEpoch,
ControlFile.checkPointCopy.nextXid);
@@ -232,6 +234,9 @@ main(int argc, char *argv[])
printf(_("Backup start location: %X/%X\n"),
ControlFile.backupStartPoint.xlogid,
ControlFile.backupStartPoint.xrecoff);
+ printf(_("Backup end location: %X/%X\n"),
+ ControlFile.backupEndPoint.xlogid,
+ ControlFile.backupEndPoint.xrecoff);
printf(_("End-of-backup record required: %s\n"),
ControlFile.backupEndRequired ? _("yes") : _("no"));
printf(_("Current wal_level setting: %s\n"),
diff --git a/src/bin/pg_resetxlog/pg_resetxlog.c b/src/bin/pg_resetxlog/pg_resetxlog.c
index a14601ce7b4..a3cd37e37fd 100644
--- a/src/bin/pg_resetxlog/pg_resetxlog.c
+++ b/src/bin/pg_resetxlog/pg_resetxlog.c
@@ -489,6 +489,7 @@ GuessControlValues(void)
ControlFile.checkPointCopy.redo.xlogid = 0;
ControlFile.checkPointCopy.redo.xrecoff = SizeOfXLogLongPHD;
ControlFile.checkPointCopy.ThisTimeLineID = 1;
+ ControlFile.checkPointCopy.fullPageWrites = false;
ControlFile.checkPointCopy.nextXidEpoch = 0;
ControlFile.checkPointCopy.nextXid = FirstNormalTransactionId;
ControlFile.checkPointCopy.nextOid = FirstBootstrapObjectId;
@@ -503,7 +504,7 @@ GuessControlValues(void)
ControlFile.time = (pg_time_t) time(NULL);
ControlFile.checkPoint = ControlFile.checkPointCopy.redo;
- /* minRecoveryPoint and backupStartPoint can be left zero */
+ /* minRecoveryPoint, backupStartPoint and backupEndPoint can be left zero */
ControlFile.wal_level = WAL_LEVEL_MINIMAL;
ControlFile.MaxConnections = 100;
@@ -569,6 +570,8 @@ PrintControlValues(bool guessed)
sysident_str);
printf(_("Latest checkpoint's TimeLineID: %u\n"),
ControlFile.checkPointCopy.ThisTimeLineID);
+ printf(_("Latest checkpoint's full_page_writes: %s\n"),
+ ControlFile.checkPointCopy.fullPageWrites ? _("on") : _("off"));
printf(_("Latest checkpoint's NextXID: %u/%u\n"),
ControlFile.checkPointCopy.nextXidEpoch,
ControlFile.checkPointCopy.nextXid);
@@ -637,6 +640,8 @@ RewriteControlFile(void)
ControlFile.minRecoveryPoint.xrecoff = 0;
ControlFile.backupStartPoint.xlogid = 0;
ControlFile.backupStartPoint.xrecoff = 0;
+ ControlFile.backupEndPoint.xlogid = 0;
+ ControlFile.backupEndPoint.xrecoff = 0;
ControlFile.backupEndRequired = false;
/*
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 1ddf4bf15f9..f8aecef665b 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -192,6 +192,7 @@ extern int XLogArchiveTimeout;
extern bool XLogArchiveMode;
extern char *XLogArchiveCommand;
extern bool EnableHotStandby;
+extern bool fullPageWrites;
extern bool log_checkpoints;
/* WAL levels */
@@ -307,6 +308,7 @@ extern void CreateCheckPoint(int flags);
extern bool CreateRestartPoint(int flags);
extern void XLogPutNextOid(Oid nextOid);
extern XLogRecPtr XLogRestorePoint(const char *rpName);
+extern void UpdateFullPageWrites(void);
extern XLogRecPtr GetRedoRecPtr(void);
extern XLogRecPtr GetInsertRecPtr(void);
extern XLogRecPtr GetFlushRecPtr(void);
diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h
index db6380f7de4..b81c1568818 100644
--- a/src/include/access/xlog_internal.h
+++ b/src/include/access/xlog_internal.h
@@ -71,7 +71,7 @@ typedef struct XLogContRecord
/*
* Each page of XLOG file has a header like this:
*/
-#define XLOG_PAGE_MAGIC 0xD069 /* can be used as WAL version indicator */
+#define XLOG_PAGE_MAGIC 0xD070 /* can be used as WAL version indicator */
typedef struct XLogPageHeaderData
{
diff --git a/src/include/catalog/pg_control.h b/src/include/catalog/pg_control.h
index d0d2e9e39d5..1031e565127 100644
--- a/src/include/catalog/pg_control.h
+++ b/src/include/catalog/pg_control.h
@@ -21,7 +21,7 @@
/* Version identifier for this pg_control format */
-#define PG_CONTROL_VERSION 921
+#define PG_CONTROL_VERSION 922
/*
* Body of CheckPoint XLOG records. This is declared here because we keep
@@ -33,6 +33,7 @@ typedef struct CheckPoint
XLogRecPtr redo; /* next RecPtr available when we began to
* create CheckPoint (i.e. REDO start point) */
TimeLineID ThisTimeLineID; /* current TLI */
+ bool fullPageWrites; /* current full_page_writes */
uint32 nextXidEpoch; /* higher-order bits of nextXid */
TransactionId nextXid; /* next free XID */
Oid nextOid; /* next free OID */
@@ -60,6 +61,7 @@ typedef struct CheckPoint
#define XLOG_BACKUP_END 0x50
#define XLOG_PARAMETER_CHANGE 0x60
#define XLOG_RESTORE_POINT 0x70
+#define XLOG_FPW_CHANGE 0x80
/*
@@ -138,6 +140,12 @@ typedef struct ControlFileData
* record, to make sure the end-of-backup record corresponds the base
* backup we're recovering from.
*
+ * backupEndPoint is the backup end location, if we are recovering from
+ * an online backup which was taken from the standby and haven't reached
+ * the end of backup yet. It is initialized to the minimum recovery point
+ * in pg_control which was backed up last. It is reset to zero when
+ * the end of backup is reached, and we mustn't start up before that.
+ *
* If backupEndRequired is true, we know for sure that we're restoring
* from a backup, and must see a backup-end record before we can safely
* start up. If it's false, but backupStartPoint is set, a backup_label
@@ -146,6 +154,7 @@ typedef struct ControlFileData
*/
XLogRecPtr minRecoveryPoint;
XLogRecPtr backupStartPoint;
+ XLogRecPtr backupEndPoint;
bool backupEndRequired;
/*