diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index c1dd8b54328..3735a2e3dfd 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -667,7 +667,8 @@ static XLogRecPtr CreateOverwriteContrecordRecord(XLogRecPtr aborted_lsn, XLogRecPtr pagePtr, TimeLineID newTLI); static void CheckPointGuts(XLogRecPtr checkPointRedo, int flags); -static void KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo); +static void KeepLogSeg(XLogRecPtr recptr, XLogRecPtr slotsMinLSN, + XLogSegNo *logSegNo); static XLogRecPtr XLogGetReplicationSlotMinimumLSN(void); static void AdvanceXLInsertBuffer(XLogRecPtr upto, TimeLineID tli, @@ -6891,6 +6892,7 @@ CreateCheckPoint(int flags) VirtualTransactionId *vxids; int nvxids; int oldXLogAllowed = 0; + XLogRecPtr slotsMinReqLSN; /* * An end-of-recovery checkpoint is really a shutdown checkpoint, just @@ -7119,6 +7121,15 @@ CreateCheckPoint(int flags) */ END_CRIT_SECTION(); + /* + * Get the current minimum LSN to be used later in the WAL segment + * cleanup. We may clean up only WAL segments, which are not needed + * according to synchronized LSNs of replication slots. The slot's LSN + * might be advanced concurrently, so we call this before + * CheckPointReplicationSlots() synchronizes replication slots. + */ + slotsMinReqLSN = XLogGetReplicationSlotMinimumLSN(); + /* * In some cases there are groups of actions that must all occur on one * side or the other of a checkpoint record. Before flushing the @@ -7307,17 +7318,25 @@ CreateCheckPoint(int flags) * prevent the disk holding the xlog from growing full. */ XLByteToSeg(RedoRecPtr, _logSegNo, wal_segment_size); - KeepLogSeg(recptr, &_logSegNo); + KeepLogSeg(recptr, slotsMinReqLSN, &_logSegNo); if (InvalidateObsoleteReplicationSlots(RS_INVAL_WAL_REMOVED, _logSegNo, InvalidOid, InvalidTransactionId)) { + /* + * Recalculate the current minimum LSN to be used in the WAL segment + * cleanup. Then, we must synchronize the replication slots again in + * order to make this LSN safe to use. + */ + slotsMinReqLSN = XLogGetReplicationSlotMinimumLSN(); + CheckPointReplicationSlots(shutdown); + /* * Some slots have been invalidated; recalculate the old-segment * horizon, starting again from RedoRecPtr. */ XLByteToSeg(RedoRecPtr, _logSegNo, wal_segment_size); - KeepLogSeg(recptr, &_logSegNo); + KeepLogSeg(recptr, slotsMinReqLSN, &_logSegNo); } _logSegNo--; RemoveOldXlogFiles(_logSegNo, RedoRecPtr, recptr, @@ -7590,6 +7609,7 @@ CreateRestartPoint(int flags) XLogRecPtr endptr; XLogSegNo _logSegNo; TimestampTz xtime; + XLogRecPtr slotsMinReqLSN; /* Concurrent checkpoint/restartpoint cannot happen */ Assert(!IsUnderPostmaster || MyBackendType == B_CHECKPOINTER); @@ -7672,6 +7692,15 @@ CreateRestartPoint(int flags) MemSet(&CheckpointStats, 0, sizeof(CheckpointStats)); CheckpointStats.ckpt_start_t = GetCurrentTimestamp(); + /* + * Get the current minimum LSN to be used later in the WAL segment + * cleanup. We may clean up only WAL segments, which are not needed + * according to synchronized LSNs of replication slots. The slot's LSN + * might be advanced concurrently, so we call this before + * CheckPointReplicationSlots() synchronizes replication slots. + */ + slotsMinReqLSN = XLogGetReplicationSlotMinimumLSN(); + if (log_checkpoints) LogCheckpointStart(flags, true); @@ -7760,17 +7789,25 @@ CreateRestartPoint(int flags) receivePtr = GetWalRcvFlushRecPtr(NULL, NULL); replayPtr = GetXLogReplayRecPtr(&replayTLI); endptr = (receivePtr < replayPtr) ? replayPtr : receivePtr; - KeepLogSeg(endptr, &_logSegNo); + KeepLogSeg(endptr, slotsMinReqLSN, &_logSegNo); if (InvalidateObsoleteReplicationSlots(RS_INVAL_WAL_REMOVED, _logSegNo, InvalidOid, InvalidTransactionId)) { + /* + * Recalculate the current minimum LSN to be used in the WAL segment + * cleanup. Then, we must synchronize the replication slots again in + * order to make this LSN safe to use. + */ + slotsMinReqLSN = XLogGetReplicationSlotMinimumLSN(); + CheckPointReplicationSlots(flags & CHECKPOINT_IS_SHUTDOWN); + /* * Some slots have been invalidated; recalculate the old-segment * horizon, starting again from RedoRecPtr. */ XLByteToSeg(RedoRecPtr, _logSegNo, wal_segment_size); - KeepLogSeg(endptr, &_logSegNo); + KeepLogSeg(endptr, slotsMinReqLSN, &_logSegNo); } _logSegNo--; @@ -7865,6 +7902,7 @@ GetWALAvailability(XLogRecPtr targetLSN) XLogSegNo oldestSegMaxWalSize; /* oldest segid kept by max_wal_size */ XLogSegNo oldestSlotSeg; /* oldest segid kept by slot */ uint64 keepSegs; + XLogRecPtr slotsMinReqLSN; /* * slot does not reserve WAL. Either deactivated, or has never been active @@ -7878,8 +7916,9 @@ GetWALAvailability(XLogRecPtr targetLSN) * oldestSlotSeg to the current segment. */ currpos = GetXLogWriteRecPtr(); + slotsMinReqLSN = XLogGetReplicationSlotMinimumLSN(); XLByteToSeg(currpos, oldestSlotSeg, wal_segment_size); - KeepLogSeg(currpos, &oldestSlotSeg); + KeepLogSeg(currpos, slotsMinReqLSN, &oldestSlotSeg); /* * Find the oldest extant segment file. We get 1 until checkpoint removes @@ -7940,7 +7979,7 @@ GetWALAvailability(XLogRecPtr targetLSN) * invalidation is optionally done here, instead. */ static void -KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo) +KeepLogSeg(XLogRecPtr recptr, XLogRecPtr slotsMinReqLSN, XLogSegNo *logSegNo) { XLogSegNo currSegNo; XLogSegNo segno; @@ -7953,7 +7992,7 @@ KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo) * Calculate how many segments are kept by slots first, adjusting for * max_slot_wal_keep_size. */ - keep = XLogGetReplicationSlotMinimumLSN(); + keep = slotsMinReqLSN; if (keep != InvalidXLogRecPtr && keep < recptr) { XLByteToSeg(keep, segno, wal_segment_size); diff --git a/src/backend/replication/logical/logical.c b/src/backend/replication/logical/logical.c index 97b6aa899ee..4407df84a1c 100644 --- a/src/backend/replication/logical/logical.c +++ b/src/backend/replication/logical/logical.c @@ -1897,7 +1897,15 @@ LogicalConfirmReceivedLocation(XLogRecPtr lsn) SpinLockRelease(&MyReplicationSlot->mutex); - /* first write new xmin to disk, so we know what's up after a crash */ + /* + * First, write new xmin and restart_lsn to disk so we know what's up + * after a crash. Even when we do this, the checkpointer can see the + * updated restart_lsn value in the shared memory; then, a crash can + * happen before we manage to write that value to the disk. Thus, + * checkpointer still needs to make special efforts to keep WAL + * segments required by the restart_lsn written to the disk. See + * CreateCheckPoint() and CreateRestartPoint() for details. + */ if (updated_xmin || updated_restart) { ReplicationSlotMarkDirty(); diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c index 7be04cda5f9..4019eb90292 100644 --- a/src/backend/replication/walsender.c +++ b/src/backend/replication/walsender.c @@ -2392,6 +2392,10 @@ PhysicalConfirmReceivedLocation(XLogRecPtr lsn) * be energy wasted - the worst thing lost information could cause here is * to give wrong information in a statistics view - we'll just potentially * be more conservative in removing files. + * + * Checkpointer makes special efforts to keep the WAL segments required by + * the restart_lsn written to the disk. See CreateCheckPoint() and + * CreateRestartPoint() for details. */ }