1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-02 09:02:37 +03:00

Allow users to limit storage reserved by replication slots

Replication slots are useful to retain data that may be needed by a
replication system.  But experience has shown that allowing them to
retain excessive data can lead to the primary failing because of running
out of space.  This new feature allows the user to configure a maximum
amount of space to be reserved using the new option
max_slot_wal_keep_size.  Slots that overrun that space are invalidated
at checkpoint time, enabling the storage to be released.

Author: Kyotaro HORIGUCHI <horiguchi.kyotaro@lab.ntt.co.jp>
Reviewed-by: Masahiko Sawada <sawada.mshk@gmail.com>
Reviewed-by: Jehan-Guillaume de Rorthais <jgdr@dalibo.com>
Reviewed-by: Álvaro Herrera <alvherre@alvh.no-ip.org>
Discussion: https://postgr.es/m/20170228.122736.123383594.horiguchi.kyotaro@lab.ntt.co.jp
This commit is contained in:
Alvaro Herrera
2020-04-07 18:35:00 -04:00
parent b63c293bcb
commit c655077639
17 changed files with 595 additions and 43 deletions

View File

@ -108,6 +108,7 @@ int wal_level = WAL_LEVEL_MINIMAL;
int CommitDelay = 0; /* precommit delay in microseconds */
int CommitSiblings = 5; /* # concurrent xacts needed to sleep */
int wal_retrieve_retry_interval = 5000;
int max_slot_wal_keep_size_mb = -1;
#ifdef WAL_DEBUG
bool XLOG_DEBUG = false;
@ -759,7 +760,7 @@ static ControlFileData *ControlFile = NULL;
*/
#define UsableBytesInPage (XLOG_BLCKSZ - SizeOfXLogShortPHD)
/* Convert min_wal_size_mb and max_wal_size_mb to equivalent segment count */
/* Convert values of GUCs measured in megabytes to equiv. segment count */
#define ConvertToXSegs(x, segsize) \
(x / ((segsize) / (1024 * 1024)))
@ -3963,9 +3964,10 @@ XLogGetLastRemovedSegno(void)
return lastRemovedSegNo;
}
/*
* Update the last removed segno pointer in shared memory, to reflect
* that the given XLOG file has been removed.
* Update the last removed segno pointer in shared memory, to reflect that the
* given XLOG file has been removed.
*/
static void
UpdateLastRemovedPtr(char *filename)
@ -9043,6 +9045,7 @@ CreateCheckPoint(int flags)
*/
XLByteToSeg(RedoRecPtr, _logSegNo, wal_segment_size);
KeepLogSeg(recptr, &_logSegNo);
InvalidateObsoleteReplicationSlots(_logSegNo);
_logSegNo--;
RemoveOldXlogFiles(_logSegNo, RedoRecPtr, recptr);
@ -9377,6 +9380,7 @@ CreateRestartPoint(int flags)
replayPtr = GetXLogReplayRecPtr(&replayTLI);
endptr = (receivePtr < replayPtr) ? replayPtr : receivePtr;
KeepLogSeg(endptr, &_logSegNo);
InvalidateObsoleteReplicationSlots(_logSegNo);
_logSegNo--;
/*
@ -9445,48 +9449,143 @@ CreateRestartPoint(int flags)
return true;
}
/*
* Report availability of WAL for the given target LSN
* (typically a slot's restart_lsn)
*
* Returns one of the following enum values:
* * WALAVAIL_NORMAL means targetLSN is available because it is in the range
* of max_wal_size.
*
* * WALAVAIL_PRESERVED means it is still available by preserving extra
* segments beyond max_wal_size. If max_slot_wal_keep_size is smaller
* than max_wal_size, this state is not returned.
*
* * WALAVAIL_REMOVED means it is definitely lost. A replication stream on
* a slot with this LSN cannot continue.
*
* * WALAVAIL_INVALID_LSN means the slot hasn't been set to reserve WAL.
*/
WALAvailability
GetWALAvailability(XLogRecPtr targetLSN)
{
XLogRecPtr currpos; /* current write LSN */
XLogSegNo currSeg; /* segid of currpos */
XLogSegNo targetSeg; /* segid of targetLSN */
XLogSegNo oldestSeg; /* actual oldest segid */
XLogSegNo oldestSegMaxWalSize; /* oldest segid kept by max_wal_size */
XLogSegNo oldestSlotSeg = InvalidXLogRecPtr; /* oldest segid kept by
* slot */
uint64 keepSegs;
/* slot does not reserve WAL. Either deactivated, or has never been active */
if (XLogRecPtrIsInvalid(targetLSN))
return WALAVAIL_INVALID_LSN;
currpos = GetXLogWriteRecPtr();
/* calculate oldest segment currently needed by slots */
XLByteToSeg(targetLSN, targetSeg, wal_segment_size);
KeepLogSeg(currpos, &oldestSlotSeg);
/*
* Find the oldest extant segment file. We get 1 until checkpoint removes
* the first WAL segment file since startup, which causes the status being
* wrong under certain abnormal conditions but that doesn't actually harm.
*/
oldestSeg = XLogGetLastRemovedSegno() + 1;
/* calculate oldest segment by max_wal_size and wal_keep_segments */
XLByteToSeg(currpos, currSeg, wal_segment_size);
keepSegs = ConvertToXSegs(Max(max_wal_size_mb, wal_keep_segments),
wal_segment_size) + 1;
if (currSeg > keepSegs)
oldestSegMaxWalSize = currSeg - keepSegs;
else
oldestSegMaxWalSize = 1;
/*
* If max_slot_wal_keep_size has changed after the last call, the segment
* that would been kept by the current setting might have been lost by the
* previous setting. No point in showing normal or keeping status values
* if the targetSeg is known to be lost.
*/
if (targetSeg >= oldestSeg)
{
/*
* show "normal" when targetSeg is within max_wal_size, even if
* max_slot_wal_keep_size is smaller than max_wal_size.
*/
if ((max_slot_wal_keep_size_mb <= 0 ||
max_slot_wal_keep_size_mb >= max_wal_size_mb) &&
oldestSegMaxWalSize <= targetSeg)
return WALAVAIL_NORMAL;
/* being retained by slots */
if (oldestSlotSeg <= targetSeg)
return WALAVAIL_RESERVED;
}
/* Definitely lost */
return WALAVAIL_REMOVED;
}
/*
* Retreat *logSegNo to the last segment that we need to retain because of
* either wal_keep_segments or replication slots.
*
* This is calculated by subtracting wal_keep_segments from the given xlog
* location, recptr and by making sure that that result is below the
* requirement of replication slots.
* requirement of replication slots. For the latter criterion we do consider
* the effects of max_slot_wal_keep_size: reserve at most that much space back
* from recptr.
*/
static void
KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo)
{
XLogSegNo currSegNo;
XLogSegNo segno;
XLogRecPtr keep;
XLByteToSeg(recptr, segno, wal_segment_size);
keep = XLogGetReplicationSlotMinimumLSN();
XLByteToSeg(recptr, currSegNo, wal_segment_size);
segno = currSegNo;
/* compute limit for wal_keep_segments first */
if (wal_keep_segments > 0)
/*
* Calculate how many segments are kept by slots first, adjusting for
* max_slot_wal_keep_size.
*/
keep = XLogGetReplicationSlotMinimumLSN();
if (keep != InvalidXLogRecPtr)
{
/* avoid underflow, don't go below 1 */
if (segno <= wal_keep_segments)
segno = 1;
else
segno = segno - wal_keep_segments;
XLByteToSeg(keep, segno, wal_segment_size);
/* Cap by max_slot_wal_keep_size ... */
if (max_slot_wal_keep_size_mb >= 0)
{
XLogRecPtr slot_keep_segs;
slot_keep_segs =
ConvertToXSegs(max_slot_wal_keep_size_mb, wal_segment_size);
if (currSegNo - segno > slot_keep_segs)
segno = currSegNo - slot_keep_segs;
}
}
/* then check whether slots limit removal further */
if (max_replication_slots > 0 && keep != InvalidXLogRecPtr)
/* but, keep at least wal_keep_segments if that's set */
if (wal_keep_segments > 0 && currSegNo - segno < wal_keep_segments)
{
XLogSegNo slotSegNo;
XLByteToSeg(keep, slotSegNo, wal_segment_size);
if (slotSegNo <= 0)
/* avoid underflow, don't go below 1 */
if (currSegNo <= wal_keep_segments)
segno = 1;
else if (slotSegNo < segno)
segno = slotSegNo;
else
segno = currSegNo - wal_keep_segments;
}
/* don't delete WAL segments newer than the calculated segment */
if (segno < *logSegNo)
if (XLogRecPtrIsInvalid(*logSegNo) || segno < *logSegNo)
*logSegNo = segno;
}

View File

@ -876,7 +876,9 @@ CREATE VIEW pg_replication_slots AS
L.xmin,
L.catalog_xmin,
L.restart_lsn,
L.confirmed_flush_lsn
L.confirmed_flush_lsn,
L.wal_status,
L.min_safe_lsn
FROM pg_get_replication_slots() AS L
LEFT JOIN pg_database D ON (L.datoid = D.oid);

View File

@ -225,7 +225,7 @@ pg_logical_slot_get_changes_guts(FunctionCallInfo fcinfo, bool confirm, bool bin
else
end_of_wal = GetXLogReplayRecPtr(&ThisTimeLineID);
ReplicationSlotAcquire(NameStr(*name), true);
(void) ReplicationSlotAcquire(NameStr(*name), SAB_Error);
PG_TRY();
{

View File

@ -325,9 +325,15 @@ ReplicationSlotCreate(const char *name, bool db_specific,
/*
* Find a previously created slot and mark it as used by this backend.
*
* The return value is only useful if behavior is SAB_Inquire, in which
* it's zero if we successfully acquired the slot, or the PID of the
* owning process otherwise. If behavior is SAB_Error, then trying to
* acquire an owned slot is an error. If SAB_Block, we sleep until the
* slot is released by the owning process.
*/
void
ReplicationSlotAcquire(const char *name, bool nowait)
int
ReplicationSlotAcquire(const char *name, SlotAcquireBehavior behavior)
{
ReplicationSlot *slot;
int active_pid;
@ -392,11 +398,13 @@ retry:
*/
if (active_pid != MyProcPid)
{
if (nowait)
if (behavior == SAB_Error)
ereport(ERROR,
(errcode(ERRCODE_OBJECT_IN_USE),
errmsg("replication slot \"%s\" is active for PID %d",
name, active_pid)));
else if (behavior == SAB_Inquire)
return active_pid;
/* Wait here until we get signaled, and then restart */
ConditionVariableSleep(&slot->active_cv,
@ -412,6 +420,9 @@ retry:
/* We made this slot active, so it's ours now. */
MyReplicationSlot = slot;
/* success */
return 0;
}
/*
@ -518,7 +529,7 @@ ReplicationSlotDrop(const char *name, bool nowait)
{
Assert(MyReplicationSlot == NULL);
ReplicationSlotAcquire(name, nowait);
(void) ReplicationSlotAcquire(name, nowait ? SAB_Error : SAB_Block);
ReplicationSlotDropAcquired();
}
@ -743,6 +754,10 @@ ReplicationSlotsComputeRequiredXmin(bool already_locked)
/*
* Compute the oldest restart LSN across all slots and inform xlog module.
*
* Note: while max_slot_wal_keep_size is theoretically relevant for this
* purpose, we don't try to account for that, because this module doesn't
* know what to compare against.
*/
void
ReplicationSlotsComputeRequiredLSN(void)
@ -818,6 +833,9 @@ ReplicationSlotsComputeLogicalRestartLSN(void)
restart_lsn = s->data.restart_lsn;
SpinLockRelease(&s->mutex);
if (restart_lsn == InvalidXLogRecPtr)
continue;
if (result == InvalidXLogRecPtr ||
restart_lsn < result)
result = restart_lsn;
@ -1064,6 +1082,80 @@ ReplicationSlotReserveWal(void)
}
}
/*
* Mark any slot that points to an LSN older than the given segment
* as invalid; it requires WAL that's about to be removed.
*
* NB - this runs as part of checkpoint, so avoid raising errors if possible.
*/
void
InvalidateObsoleteReplicationSlots(XLogSegNo oldestSegno)
{
XLogRecPtr oldestLSN;
XLogSegNoOffsetToRecPtr(oldestSegno, 0, wal_segment_size, oldestLSN);
restart:
LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
for (int i = 0; i < max_replication_slots; i++)
{
ReplicationSlot *s = &ReplicationSlotCtl->replication_slots[i];
XLogRecPtr restart_lsn = InvalidXLogRecPtr;
char *slotname;
if (!s->in_use)
continue;
SpinLockAcquire(&s->mutex);
if (s->data.restart_lsn == InvalidXLogRecPtr ||
s->data.restart_lsn >= oldestLSN)
{
SpinLockRelease(&s->mutex);
continue;
}
slotname = pstrdup(NameStr(s->data.name));
restart_lsn = s->data.restart_lsn;
SpinLockRelease(&s->mutex);
LWLockRelease(ReplicationSlotControlLock);
for (;;)
{
int wspid = ReplicationSlotAcquire(slotname, SAB_Inquire);
/* no walsender? success! */
if (wspid == 0)
break;
ereport(LOG,
(errmsg("terminating walsender %d because replication slot \"%s\" is too far behind",
wspid, slotname)));
(void) kill(wspid, SIGTERM);
ConditionVariableTimedSleep(&s->active_cv, 10,
WAIT_EVENT_REPLICATION_SLOT_DROP);
}
ConditionVariableCancelSleep();
ereport(LOG,
(errmsg("invalidating slot \"%s\" because its restart_lsn %X/%X exceeds max_slot_wal_keep_size",
slotname,
(uint32) (restart_lsn >> 32),
(uint32) restart_lsn)));
SpinLockAcquire(&s->mutex);
s->data.restart_lsn = InvalidXLogRecPtr;
SpinLockRelease(&s->mutex);
ReplicationSlotRelease();
/* if we did anything, start from scratch */
CHECK_FOR_INTERRUPTS();
goto restart;
}
LWLockRelease(ReplicationSlotControlLock);
}
/*
* Flush all replication slots to disk.
*

View File

@ -234,7 +234,7 @@ pg_drop_replication_slot(PG_FUNCTION_ARGS)
Datum
pg_get_replication_slots(PG_FUNCTION_ARGS)
{
#define PG_GET_REPLICATION_SLOTS_COLS 11
#define PG_GET_REPLICATION_SLOTS_COLS 13
ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
TupleDesc tupdesc;
Tuplestorestate *tupstore;
@ -288,6 +288,8 @@ pg_get_replication_slots(PG_FUNCTION_ARGS)
Oid database;
NameData slot_name;
NameData plugin;
WALAvailability walstate;
XLogSegNo last_removed_seg;
int i;
if (!slot->in_use)
@ -355,6 +357,40 @@ pg_get_replication_slots(PG_FUNCTION_ARGS)
else
nulls[i++] = true;
walstate = GetWALAvailability(restart_lsn);
switch (walstate)
{
case WALAVAIL_INVALID_LSN:
nulls[i++] = true;
break;
case WALAVAIL_NORMAL:
values[i++] = CStringGetTextDatum("normal");
break;
case WALAVAIL_RESERVED:
values[i++] = CStringGetTextDatum("reserved");
break;
case WALAVAIL_REMOVED:
values[i++] = CStringGetTextDatum("lost");
break;
}
if (max_slot_wal_keep_size_mb >= 0 &&
(walstate == WALAVAIL_NORMAL || walstate == WALAVAIL_RESERVED) &&
((last_removed_seg = XLogGetLastRemovedSegno()) != 0))
{
XLogRecPtr min_safe_lsn;
XLogSegNoOffsetToRecPtr(last_removed_seg + 1, 0,
wal_segment_size, min_safe_lsn);
values[i++] = Int64GetDatum(min_safe_lsn);
}
else
nulls[i++] = true;
tuplestore_putvalues(tupstore, tupdesc, values, nulls);
}
LWLockRelease(ReplicationSlotControlLock);
@ -377,6 +413,8 @@ pg_physical_replication_slot_advance(XLogRecPtr moveto)
XLogRecPtr startlsn = MyReplicationSlot->data.restart_lsn;
XLogRecPtr retlsn = startlsn;
Assert(moveto != InvalidXLogRecPtr);
if (startlsn < moveto)
{
SpinLockAcquire(&MyReplicationSlot->mutex);
@ -414,6 +452,8 @@ pg_logical_replication_slot_advance(XLogRecPtr moveto)
ResourceOwner old_resowner = CurrentResourceOwner;
XLogRecPtr retlsn;
Assert(moveto != InvalidXLogRecPtr);
PG_TRY();
{
/*
@ -552,7 +592,7 @@ pg_replication_slot_advance(PG_FUNCTION_ARGS)
moveto = Min(moveto, GetXLogReplayRecPtr(&ThisTimeLineID));
/* Acquire the slot so we "own" it */
ReplicationSlotAcquire(NameStr(*slotname), true);
(void) ReplicationSlotAcquire(NameStr(*slotname), SAB_Error);
/* A slot whose restart_lsn has never been reserved cannot be advanced */
if (XLogRecPtrIsInvalid(MyReplicationSlot->data.restart_lsn))

View File

@ -595,7 +595,7 @@ StartReplication(StartReplicationCmd *cmd)
if (cmd->slotname)
{
ReplicationSlotAcquire(cmd->slotname, true);
(void) ReplicationSlotAcquire(cmd->slotname, SAB_Error);
if (SlotIsLogical(MyReplicationSlot))
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
@ -1132,7 +1132,7 @@ StartLogicalReplication(StartReplicationCmd *cmd)
Assert(!MyReplicationSlot);
ReplicationSlotAcquire(cmd->slotname, true);
(void) ReplicationSlotAcquire(cmd->slotname, SAB_Error);
/*
* Force a disconnect, so that the decoding code doesn't need to care

View File

@ -2784,6 +2784,19 @@ static struct config_int ConfigureNamesInt[] =
NULL, NULL, NULL
},
{
{"max_slot_wal_keep_size", PGC_SIGHUP, REPLICATION_SENDING,
gettext_noop("Sets the maximum WAL size that can be reserved by replication slots."),
gettext_noop("Replication slots will be marked as failed, and segments released "
"for deletion or recycling, if this much space is occupied by WAL "
"on disk."),
GUC_UNIT_MB
},
&max_slot_wal_keep_size_mb,
-1, -1, MAX_KILOBYTES,
NULL, NULL, NULL
},
{
{"wal_sender_timeout", PGC_USERSET, REPLICATION_SENDING,
gettext_noop("Sets the maximum time to wait for WAL replication."),

View File

@ -289,6 +289,7 @@
#max_wal_senders = 10 # max number of walsender processes
# (change requires restart)
#wal_keep_segments = 0 # in logfile segments; 0 disables
#max_slot_wal_keep_size = -1 # measured in bytes; -1 disables
#wal_sender_timeout = 60s # in milliseconds; 0 disables
#max_replication_slots = 10 # max number of replication slots