mirror of
https://github.com/postgres/postgres.git
synced 2025-10-25 13:17:41 +03:00
Invalidate inactive replication slots.
This commit introduces idle_replication_slot_timeout GUC that allows inactive slots to be invalidated at the time of checkpoint. Because checkpoints happen checkpoint_timeout intervals, there can be some lag between when the idle_replication_slot_timeout was exceeded and when the slot invalidation is triggered at the next checkpoint. To avoid such lags, users can force a checkpoint to promptly invalidate inactive slots. Note that the idle timeout invalidation mechanism is not applicable for slots that do not reserve WAL or for slots on the standby server that are synced from the primary server (i.e., standby slots having 'synced' field 'true'). Synced slots are always considered to be inactive because they don't perform logical decoding to produce changes. The slots can become inactive for a long period if a subscriber is down due to a system error or inaccessible because of network issues. If such a situation persists, it might be more practical to recreate the subscriber rather than attempt to recover the node and wait for it to catch up which could be time-consuming. Then, external tools could create replication slots (e.g., for migrations or upgrades) that may fail to remove them if an error occurs, leaving behind unused slots that take up space and resources. Manually cleaning them up can be tedious and error-prone, and without intervention, these lingering slots can cause unnecessary WAL retention and system bloat. As the duration of idle_replication_slot_timeout is in minutes, any test using that would be time-consuming. We are planning to commit a follow up patch for tests by using the injection point framework. Author: Nisha Moond <nisha.moond412@gmail.com> Author: Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com> Reviewed-by: Peter Smith <smithpb2250@gmail.com> Reviewed-by: Hayato Kuroda <kuroda.hayato@fujitsu.com> Reviewed-by: Vignesh C <vignesh21@gmail.com> Reviewed-by: Amit Kapila <amit.kapila16@gmail.com> Reviewed-by: Hou Zhijie <houzj.fnst@fujitsu.com> Reviewed-by: Bertrand Drouvot <bertranddrouvot.pg@gmail.com> Discussion: https://postgr.es/m/CALj2ACW4aUe-_uFQOjdWCEN-xXoLGhmvRFnL8SNw_TZ5nJe+aw@mail.gmail.com Discussion: https://postgr.es/m/OS0PR01MB5716C131A7D80DAE8CB9E88794FC2@OS0PR01MB5716.jpnprd01.prod.outlook.com
This commit is contained in:
@@ -44,21 +44,25 @@ typedef enum ReplicationSlotPersistency
|
||||
* Slots can be invalidated, e.g. due to max_slot_wal_keep_size. If so, the
|
||||
* 'invalidated' field is set to a value other than _NONE.
|
||||
*
|
||||
* When adding a new invalidation cause here, remember to update
|
||||
* SlotInvalidationCauses and RS_INVAL_MAX_CAUSES.
|
||||
* When adding a new invalidation cause here, the value must be powers of 2
|
||||
* (e.g., 1, 2, 4...) for proper bitwise operations. Also, remember to update
|
||||
* RS_INVAL_MAX_CAUSES below, and SlotInvalidationCauses in slot.c.
|
||||
*/
|
||||
typedef enum ReplicationSlotInvalidationCause
|
||||
{
|
||||
RS_INVAL_NONE,
|
||||
RS_INVAL_NONE = 0,
|
||||
/* required WAL has been removed */
|
||||
RS_INVAL_WAL_REMOVED,
|
||||
RS_INVAL_WAL_REMOVED = (1 << 0),
|
||||
/* required rows have been removed */
|
||||
RS_INVAL_HORIZON,
|
||||
RS_INVAL_HORIZON = (1 << 1),
|
||||
/* wal_level insufficient for slot */
|
||||
RS_INVAL_WAL_LEVEL,
|
||||
RS_INVAL_WAL_LEVEL = (1 << 2),
|
||||
/* idle slot timeout has occurred */
|
||||
RS_INVAL_IDLE_TIMEOUT = (1 << 3),
|
||||
} ReplicationSlotInvalidationCause;
|
||||
|
||||
extern PGDLLIMPORT const char *const SlotInvalidationCauses[];
|
||||
/* Maximum number of invalidation causes */
|
||||
#define RS_INVAL_MAX_CAUSES 4
|
||||
|
||||
/*
|
||||
* On-Disk data of a replication slot, preserved across restarts.
|
||||
@@ -254,6 +258,7 @@ extern PGDLLIMPORT ReplicationSlot *MyReplicationSlot;
|
||||
/* GUCs */
|
||||
extern PGDLLIMPORT int max_replication_slots;
|
||||
extern PGDLLIMPORT char *synchronized_standby_slots;
|
||||
extern PGDLLIMPORT int idle_replication_slot_timeout_mins;
|
||||
|
||||
/* shmem initialization functions */
|
||||
extern Size ReplicationSlotsShmemSize(void);
|
||||
@@ -286,7 +291,7 @@ extern void ReplicationSlotsComputeRequiredLSN(void);
|
||||
extern XLogRecPtr ReplicationSlotsComputeLogicalRestartLSN(void);
|
||||
extern bool ReplicationSlotsCountDBSlots(Oid dboid, int *nslots, int *nactive);
|
||||
extern void ReplicationSlotsDropDBSlots(Oid dboid);
|
||||
extern bool InvalidateObsoleteReplicationSlots(ReplicationSlotInvalidationCause cause,
|
||||
extern bool InvalidateObsoleteReplicationSlots(uint32 possible_causes,
|
||||
XLogSegNo oldestSegno,
|
||||
Oid dboid,
|
||||
TransactionId snapshotConflictHorizon);
|
||||
@@ -303,6 +308,7 @@ extern void CheckSlotRequirements(void);
|
||||
extern void CheckSlotPermissions(void);
|
||||
extern ReplicationSlotInvalidationCause
|
||||
GetSlotInvalidationCause(const char *invalidation_reason);
|
||||
extern const char *GetSlotInvalidationCauseName(ReplicationSlotInvalidationCause cause);
|
||||
|
||||
extern bool SlotExistsInSyncStandbySlots(const char *slot_name);
|
||||
extern bool StandbySlotsHaveCaughtup(XLogRecPtr wait_for_lsn, int elevel);
|
||||
|
||||
Reference in New Issue
Block a user