diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 3f8de31b386..1d81fa1c2af 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -946,6 +946,7 @@ static int XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen, XLogRecPtr targetRecPtr, char *readBuf); static bool WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess, bool fetching_ckpt, XLogRecPtr tliRecPtr); +static void ResetInstallXLogFileSegmentActive(void); static void XLogShutdownWalRcv(void); static int emode_for_corrupt_record(int emode, XLogRecPtr RecPtr); static void XLogFileClose(void); @@ -12837,8 +12838,18 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess, * Before we leave XLOG_FROM_STREAM state, make sure that * walreceiver is not active, so that it won't overwrite * WAL that we restore from archive. + * If walreceiver is actively streaming (or attempting to + * connect), we must shut it down. However, if it's + * already in WAITING state (e.g., due to timeline + * divergence), we only need to reset the install flag to + * allow archive restoration. */ - XLogShutdownWalRcv(); + if (WalRcvStreaming()) + XLogShutdownWalRcv(); + else + { + ResetInstallXLogFileSegmentActive(); + } /* * Before we sleep, re-scan for possible new timelines if @@ -13191,15 +13202,21 @@ StartupRequestWalReceiverRestart(void) } } +/* Disable WAL file recycling and preallocation. */ +static void +ResetInstallXLogFileSegmentActive(void) +{ + LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); + XLogCtl->InstallXLogFileSegmentActive = false; + LWLockRelease(ControlFileLock); +} + /* Thin wrapper around ShutdownWalRcv(). */ static void XLogShutdownWalRcv(void) { ShutdownWalRcv(); - - LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); - XLogCtl->InstallXLogFileSegmentActive = false; - LWLockRelease(ControlFileLock); + ResetInstallXLogFileSegmentActive(); } /* diff --git a/src/test/recovery/t/004_timeline_switch.pl b/src/test/recovery/t/004_timeline_switch.pl index c101980e9e2..a777d8e6a3d 100644 --- a/src/test/recovery/t/004_timeline_switch.pl +++ b/src/test/recovery/t/004_timeline_switch.pl @@ -7,7 +7,7 @@ use warnings; use File::Path qw(rmtree); use PostgresNode; use TestLib; -use Test::More tests => 3; +use Test::More tests => 4; $ENV{PGDATABASE} = 'postgres'; @@ -71,6 +71,14 @@ my $result = $node_standby_2->safe_psql('postgres', "SELECT count(*) FROM tab_int"); is($result, qq(2000), 'check content of standby 2'); +# Check the logs, WAL receiver should not have been stopped while +# transitioning to its new timeline. There is no need to rely on an +# offset in this check of the server logs: a new log file is used on +# node restart when primary_conninfo is updated above. +ok( !$node_standby_2->log_contains( + "FATAL: .* terminating walreceiver process due to administrator command" + ), + 'WAL receiver should not be stopped across timeline jumps'); # Ensure that a standby is able to follow a primary on a newer timeline # when WAL archiving is enabled.