1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-07 00:36:50 +03:00

Fix handling of WAL segments ready to be archived during crash recovery

78ea8b5 has fixed an issue related to the recycling of WAL segments on
standbys depending on archive_mode.  However, it has introduced a
regression with the handling of WAL segments ready to be archived during
crash recovery, causing those files to be recycled without getting
archived.

This commit fixes the regression by tracking in shared memory if a live
cluster is either in crash recovery or archive recovery as the handling
of WAL segments ready to be archived is different in both cases (those
WAL segments should not be removed during crash recovery), and by using
this new shared memory state to decide if a segment can be recycled or
not.  Previously, it was not possible to know if a cluster was in crash
recovery or archive recovery as the shared state was able to track only
if recovery was happening or not, leading to the problem.

A set of TAP tests is added to close the gap here, making sure that WAL
segments ready to be archived are correctly handled when a cluster is in
archive or crash recovery with archive_mode set to "on" or "always", for
both standby and primary.

Reported-by: Benoît Lobréau
Author: Jehan-Guillaume de Rorthais
Reviewed-by: Kyotaro Horiguchi, Fujii Masao, Michael Paquier
Discussion: https://postgr.es/m/20200331172229.40ee00dc@firost
Backpatch-through: 9.5
This commit is contained in:
Michael Paquier
2020-04-24 08:48:51 +09:00
parent c5abde5f35
commit c2d8ae0dfe
4 changed files with 303 additions and 16 deletions

View File

@ -204,8 +204,9 @@ static TimeLineID receiveTLI = 0;
static bool lastFullPageWrites;
/*
* Local copy of SharedRecoveryInProgress variable. True actually means "not
* known, need to check the shared state".
* Local copy of the state tracked by SharedRecoveryState in shared memory,
* It is false if SharedRecoveryState is RECOVERY_STATE_DONE. True actually
* means "not known, need to check the shared state".
*/
static bool LocalRecoveryInProgress = true;
@ -616,10 +617,10 @@ typedef struct XLogCtlData
char archiveCleanupCommand[MAXPGPATH];
/*
* SharedRecoveryInProgress indicates if we're still in crash or archive
* SharedRecoveryState indicates if we're still in crash or archive
* recovery. Protected by info_lck.
*/
bool SharedRecoveryInProgress;
RecoveryState SharedRecoveryState;
/*
* SharedHotStandbyActive indicates if we're still in crash or archive
@ -4136,6 +4137,16 @@ ReadRecord(XLogReaderState *xlogreader, XLogRecPtr RecPtr, int emode,
updateMinRecoveryPoint = true;
UpdateControlFile();
/*
* We update SharedRecoveryState while holding the lock on
* ControlFileLock so both states are consistent in shared
* memory.
*/
SpinLockAcquire(&XLogCtl->info_lck);
XLogCtl->SharedRecoveryState = RECOVERY_STATE_ARCHIVE;
SpinLockRelease(&XLogCtl->info_lck);
LWLockRelease(ControlFileLock);
CheckRecoveryConsistency();
@ -4819,7 +4830,7 @@ XLOGShmemInit(void)
* in additional info.)
*/
XLogCtl->XLogCacheBlck = XLOGbuffers - 1;
XLogCtl->SharedRecoveryInProgress = true;
XLogCtl->SharedRecoveryState = RECOVERY_STATE_CRASH;
XLogCtl->SharedHotStandbyActive = false;
XLogCtl->WalWriterSleeping = false;
@ -6541,7 +6552,13 @@ StartupXLOG(void)
*/
dbstate_at_startup = ControlFile->state;
if (InArchiveRecovery)
{
ControlFile->state = DB_IN_ARCHIVE_RECOVERY;
SpinLockAcquire(&XLogCtl->info_lck);
XLogCtl->SharedRecoveryState = RECOVERY_STATE_ARCHIVE;
SpinLockRelease(&XLogCtl->info_lck);
}
else
{
ereport(LOG,
@ -6554,6 +6571,10 @@ StartupXLOG(void)
ControlFile->checkPointCopy.ThisTimeLineID,
recoveryTargetTLI)));
ControlFile->state = DB_IN_CRASH_RECOVERY;
SpinLockAcquire(&XLogCtl->info_lck);
XLogCtl->SharedRecoveryState = RECOVERY_STATE_CRASH;
SpinLockRelease(&XLogCtl->info_lck);
}
ControlFile->prevCheckPoint = ControlFile->checkPoint;
ControlFile->checkPoint = checkPointLoc;
@ -7555,7 +7576,7 @@ StartupXLOG(void)
* updates to shared memory.)
*/
SpinLockAcquire(&XLogCtl->info_lck);
XLogCtl->SharedRecoveryInProgress = false;
XLogCtl->SharedRecoveryState = RECOVERY_STATE_DONE;
SpinLockRelease(&XLogCtl->info_lck);
/*
@ -7698,7 +7719,7 @@ RecoveryInProgress(void)
*/
volatile XLogCtlData *xlogctl = XLogCtl;
LocalRecoveryInProgress = xlogctl->SharedRecoveryInProgress;
LocalRecoveryInProgress = (xlogctl->SharedRecoveryState != RECOVERY_STATE_DONE);
/*
* Initialize TimeLineID and RedoRecPtr when we discover that recovery
@ -7710,8 +7731,8 @@ RecoveryInProgress(void)
{
/*
* If we just exited recovery, make sure we read TimeLineID and
* RedoRecPtr after SharedRecoveryInProgress (for machines with
* weak memory ordering).
* RedoRecPtr after SharedRecoveryState (for machines with weak
* memory ordering).
*/
pg_memory_barrier();
InitXLOGAccess();
@ -7727,6 +7748,24 @@ RecoveryInProgress(void)
}
}
/*
* Returns current recovery state from shared memory.
*
* This returned state is kept consistent with the contents of the control
* file. See details about the possible values of RecoveryState in xlog.h.
*/
RecoveryState
GetRecoveryState(void)
{
RecoveryState retval;
SpinLockAcquire(&XLogCtl->info_lck);
retval = XLogCtl->SharedRecoveryState;
SpinLockRelease(&XLogCtl->info_lck);
return retval;
}
/*
* Is HotStandby active yet? This is only important in special backends
* since normal backends won't ever be able to connect until this returns

View File

@ -619,18 +619,25 @@ XLogArchiveCheckDone(const char *xlog)
{
char archiveStatusPath[MAXPGPATH];
struct stat stat_buf;
bool inRecovery = RecoveryInProgress();
/* The file is always deletable if archive_mode is "off". */
if (!XLogArchivingActive())
return true;
/*
* The file is always deletable if archive_mode is "off". On standbys
* archiving is disabled if archive_mode is "on", and enabled with
* "always". On a primary, archiving is enabled if archive_mode is "on"
* or "always".
* During archive recovery, the file is deletable if archive_mode is not
* "always".
*/
if (!((XLogArchivingActive() && !inRecovery) ||
(XLogArchivingAlways() && inRecovery)))
if (!XLogArchivingAlways() &&
GetRecoveryState() == RECOVERY_STATE_ARCHIVE)
return true;
/*
* At this point of the logic, note that we are either a primary with
* archive_mode set to "on" or "always", or a standby with archive_mode
* set to "always".
*/
/* First check for .done --- this means archiver is done with it */
StatusFilePath(archiveStatusPath, xlog, ".done");
if (stat(archiveStatusPath, &stat_buf) == 0)