1
0
mirror of https://github.com/postgres/postgres.git synced 2025-06-29 10:41:53 +03:00

Skip WAL recycling and preallocation during archive recovery.

The previous commit addressed the chief consequences of a race condition
between InstallXLogFileSegment() and KeepFileRestoredFromArchive().  Fix
three lesser consequences.  A spurious durable_rename_excl() LOG message
remained possible.  KeepFileRestoredFromArchive() wasted the proceeds of
WAL recycling and preallocation.  Finally, XLogFileInitInternal() could
return a descriptor for a file that KeepFileRestoredFromArchive() had
already unlinked.  That felt like a recipe for future bugs.

This commit has been applied as of cc2c7d65fc in v15 and newer
versions.  This is required on stable branches of v13 and v14 to fix a
regression reported by Noah Misch, introduced by 1f95181b44, causing
spurious failures in archive recovery (neither streaming nor archive
recovery) with concurrent restartpoints.  The backpatched versions of
the patches have been aligned on these branches by me, Noah Misch is the
author.  Tests have been conducted by the both of us.

Note that this commit is known to have introduced a regression of its
own.  This is fixed by the commit following this one, and not grouped in
a single commit to keep the commit history consistent across all
branches.

Reported-by: Arun Thirupathi
Author: Noah Misch <noah@leadboat.com>
Discussion: https://postgr.es/m/20210202151416.GB3304930@rfd.leadboat.com
Discussion: https://postgr.es/m/20250306193013.36.nmisch@google.com
Backpatch-through: 13
This commit is contained in:
Noah Misch
2021-06-28 18:34:56 -07:00
committed by Michael Paquier
parent cbed472a93
commit a5b0c06daa

View File

@ -675,6 +675,16 @@ typedef struct XLogCtlData
*/ */
bool SharedHotStandbyActive; bool SharedHotStandbyActive;
/*
* InstallXLogFileSegmentActive indicates whether the checkpointer should
* arrange for future segments by recycling and/or PreallocXlogFiles().
* Protected by ControlFileLock. Only the startup process changes it. If
* true, anyone can use InstallXLogFileSegment(). If false, the startup
* process owns the exclusive right to install segments, by reading from
* the archive and possibly replacing existing files.
*/
bool InstallXLogFileSegmentActive;
/* /*
* SharedPromoteIsTriggered indicates if a standby promotion has been * SharedPromoteIsTriggered indicates if a standby promotion has been
* triggered. Protected by info_lck. * triggered. Protected by info_lck.
@ -925,6 +935,7 @@ static int XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr,
int reqLen, XLogRecPtr targetRecPtr, char *readBuf); int reqLen, XLogRecPtr targetRecPtr, char *readBuf);
static bool WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess, static bool WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
bool fetching_ckpt, XLogRecPtr tliRecPtr); bool fetching_ckpt, XLogRecPtr tliRecPtr);
static void XLogShutdownWalRcv(void);
static int emode_for_corrupt_record(int emode, XLogRecPtr RecPtr); static int emode_for_corrupt_record(int emode, XLogRecPtr RecPtr);
static void XLogFileClose(void); static void XLogFileClose(void);
static void PreallocXlogFiles(XLogRecPtr endptr); static void PreallocXlogFiles(XLogRecPtr endptr);
@ -3608,8 +3619,8 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno,
* is false.) * is false.)
* *
* Returns true if the file was installed successfully. false indicates that * Returns true if the file was installed successfully. false indicates that
* max_segno limit was exceeded, or an error occurred while renaming the * max_segno limit was exceeded, the startup process has disabled this
* file into place. * function for now, or an error occurred while renaming the file into place.
*/ */
static bool static bool
InstallXLogFileSegment(XLogSegNo *segno, char *tmppath, InstallXLogFileSegment(XLogSegNo *segno, char *tmppath,
@ -3621,6 +3632,11 @@ InstallXLogFileSegment(XLogSegNo *segno, char *tmppath,
XLogFilePath(path, ThisTimeLineID, *segno, wal_segment_size); XLogFilePath(path, ThisTimeLineID, *segno, wal_segment_size);
LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
if (!XLogCtl->InstallXLogFileSegmentActive)
{
LWLockRelease(ControlFileLock);
return false;
}
if (!find_free) if (!find_free)
{ {
@ -3725,6 +3741,7 @@ XLogFileRead(XLogSegNo segno, int emode, TimeLineID tli,
*/ */
if (source == XLOG_FROM_ARCHIVE) if (source == XLOG_FROM_ARCHIVE)
{ {
Assert(!XLogCtl->InstallXLogFileSegmentActive);
KeepFileRestoredFromArchive(path, xlogfname); KeepFileRestoredFromArchive(path, xlogfname);
/* /*
@ -3926,6 +3943,9 @@ PreallocXlogFiles(XLogRecPtr endptr)
char path[MAXPGPATH]; char path[MAXPGPATH];
uint64 offset; uint64 offset;
if (!XLogCtl->InstallXLogFileSegmentActive)
return; /* unlocked check says no */
XLByteToPrevSeg(endptr, _logSegNo, wal_segment_size); XLByteToPrevSeg(endptr, _logSegNo, wal_segment_size);
offset = XLogSegmentOffset(endptr - 1, wal_segment_size); offset = XLogSegmentOffset(endptr - 1, wal_segment_size);
if (offset >= (uint32) (0.75 * wal_segment_size)) if (offset >= (uint32) (0.75 * wal_segment_size))
@ -4207,6 +4227,7 @@ RemoveXlogFile(const char *segname, XLogRecPtr lastredoptr, XLogRecPtr endptr)
*/ */
if (wal_recycle && if (wal_recycle &&
endlogSegNo <= recycleSegNo && endlogSegNo <= recycleSegNo &&
XLogCtl->InstallXLogFileSegmentActive && /* callee rechecks this */
lstat(path, &statbuf) == 0 && S_ISREG(statbuf.st_mode) && lstat(path, &statbuf) == 0 && S_ISREG(statbuf.st_mode) &&
InstallXLogFileSegment(&endlogSegNo, path, InstallXLogFileSegment(&endlogSegNo, path,
true, recycleSegNo)) true, recycleSegNo))
@ -4220,7 +4241,7 @@ RemoveXlogFile(const char *segname, XLogRecPtr lastredoptr, XLogRecPtr endptr)
} }
else else
{ {
/* No need for any more future segments... */ /* No need for any more future segments, or recycling failed ... */
int rc; int rc;
ereport(DEBUG2, ereport(DEBUG2,
@ -5225,6 +5246,7 @@ XLOGShmemInit(void)
XLogCtl->XLogCacheBlck = XLOGbuffers - 1; XLogCtl->XLogCacheBlck = XLOGbuffers - 1;
XLogCtl->SharedRecoveryState = RECOVERY_STATE_CRASH; XLogCtl->SharedRecoveryState = RECOVERY_STATE_CRASH;
XLogCtl->SharedHotStandbyActive = false; XLogCtl->SharedHotStandbyActive = false;
XLogCtl->InstallXLogFileSegmentActive = false;
XLogCtl->SharedPromoteIsTriggered = false; XLogCtl->SharedPromoteIsTriggered = false;
XLogCtl->WalWriterSleeping = false; XLogCtl->WalWriterSleeping = false;
@ -5251,6 +5273,11 @@ BootStrapXLOG(void)
struct timeval tv; struct timeval tv;
pg_crc32c crc; pg_crc32c crc;
/* allow ordinary WAL segment creation, like StartupXLOG() would */
LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
XLogCtl->InstallXLogFileSegmentActive = true;
LWLockRelease(ControlFileLock);
/* /*
* Select a hopefully-unique system identifier code for this installation. * Select a hopefully-unique system identifier code for this installation.
* We use the result of gettimeofday(), including the fractional seconds * We use the result of gettimeofday(), including the fractional seconds
@ -7531,7 +7558,7 @@ StartupXLOG(void)
* over these records and subsequent ones if it's still alive when we * over these records and subsequent ones if it's still alive when we
* start writing WAL. * start writing WAL.
*/ */
ShutdownWalRcv(); XLogShutdownWalRcv();
/* /*
* Reset unlogged relations to the contents of their INIT fork. This is * Reset unlogged relations to the contents of their INIT fork. This is
@ -7556,7 +7583,7 @@ StartupXLOG(void)
* recovery, e.g., timeline history file) from archive or pg_wal. * recovery, e.g., timeline history file) from archive or pg_wal.
* *
* Note that standby mode must be turned off after killing WAL receiver, * Note that standby mode must be turned off after killing WAL receiver,
* i.e., calling ShutdownWalRcv(). * i.e., calling XLogShutdownWalRcv().
*/ */
Assert(!WalRcvStreaming()); Assert(!WalRcvStreaming());
StandbyMode = false; StandbyMode = false;
@ -7625,6 +7652,14 @@ StartupXLOG(void)
*/ */
oldestActiveXID = PrescanPreparedTransactions(NULL, NULL); oldestActiveXID = PrescanPreparedTransactions(NULL, NULL);
/*
* Allow ordinary WAL segment creation before any exitArchiveRecovery(),
* which sometimes creates a segment, and after the last ReadRecord().
*/
LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
XLogCtl->InstallXLogFileSegmentActive = true;
LWLockRelease(ControlFileLock);
/* /*
* Consider whether we need to assign a new timeline ID. * Consider whether we need to assign a new timeline ID.
* *
@ -12491,7 +12526,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
*/ */
if (StandbyMode && CheckForStandbyTrigger()) if (StandbyMode && CheckForStandbyTrigger())
{ {
ShutdownWalRcv(); XLogShutdownWalRcv();
return false; return false;
} }
@ -12539,7 +12574,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
* WAL that we restore from archive. * WAL that we restore from archive.
*/ */
if (WalRcvStreaming()) if (WalRcvStreaming())
ShutdownWalRcv(); XLogShutdownWalRcv();
/* /*
* Before we sleep, re-scan for possible new timelines if * Before we sleep, re-scan for possible new timelines if
@ -12669,7 +12704,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
*/ */
if (pendingWalRcvRestart && !startWalReceiver) if (pendingWalRcvRestart && !startWalReceiver)
{ {
ShutdownWalRcv(); XLogShutdownWalRcv();
/* /*
* Re-scan for possible new timelines if we were * Re-scan for possible new timelines if we were
@ -12720,6 +12755,9 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
tli, curFileTLI); tli, curFileTLI);
} }
curFileTLI = tli; curFileTLI = tli;
LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
XLogCtl->InstallXLogFileSegmentActive = true;
LWLockRelease(ControlFileLock);
RequestXLogStreaming(tli, ptr, PrimaryConnInfo, RequestXLogStreaming(tli, ptr, PrimaryConnInfo,
PrimarySlotName, PrimarySlotName,
wal_receiver_create_temp_slot); wal_receiver_create_temp_slot);
@ -12882,6 +12920,17 @@ StartupRequestWalReceiverRestart(void)
} }
} }
/* Thin wrapper around ShutdownWalRcv(). */
static void
XLogShutdownWalRcv(void)
{
ShutdownWalRcv();
LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
XLogCtl->InstallXLogFileSegmentActive = false;
LWLockRelease(ControlFileLock);
}
/* /*
* Determine what log level should be used to report a corrupt WAL record * Determine what log level should be used to report a corrupt WAL record
* in the current WAL page, previously read by XLogPageRead(). * in the current WAL page, previously read by XLogPageRead().