diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index c67688cbf5f..8d7d9a2f3e8 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -29698,7 +29698,7 @@ postgres=# SELECT '0/0'::pg_lsn + pd.segment_number * ps.setting::int + :offset - + pg_logical_slot_get_binary_changes @@ -29970,7 +29970,9 @@ postgres=# SELECT '0/0'::pg_lsn + pd.segment_number * ps.setting::int + :offset standby server. Temporary synced slots, if any, cannot be used for logical decoding and must be dropped after promotion. See for details. - Note that this function cannot be executed if + Note that this function is primarily intended for testing and + debugging purposes and should be used with caution. Additionaly, + this function cannot be executed if sync_replication_slots is enabled and the slotsync worker is already running to perform the synchronization of slots. diff --git a/doc/src/sgml/logicaldecoding.sgml b/doc/src/sgml/logicaldecoding.sgml index dd9e83b08ea..5c5957e0d37 100644 --- a/doc/src/sgml/logicaldecoding.sgml +++ b/doc/src/sgml/logicaldecoding.sgml @@ -370,10 +370,10 @@ postgres=# select * from pg_logical_slot_get_changes('regression_slot', NULL, NU pg_create_logical_replication_slot, or by using the failover option of - CREATE SUBSCRIPTION during slot creation, and then calling - - pg_sync_replication_slots - on the standby. By setting + CREATE SUBSCRIPTION during slot creation. + Additionally, enabling + sync_replication_slots on the standby + is required. By enabling sync_replication_slots on the standby, the failover slots can be synchronized periodically in the slotsync worker. For the synchronization to work, it is mandatory to @@ -398,6 +398,52 @@ postgres=# select * from pg_logical_slot_get_changes('regression_slot', NULL, NU receiving the WAL up to the latest flushed position on the primary server. + + + While enabling + sync_replication_slots allows for automatic + periodic synchronization of failover slots, they can also be manually + synchronized using the + pg_sync_replication_slots function on the standby. + However, this function is primarily intended for testing and debugging and + should be used with caution. Unlike automatic synchronization, it does not + include cyclic retries, making it more prone to synchronization failures, + particularly during initial sync scenarios where the required WAL files + or catalog rows for the slot may have already been removed or are at risk + of being removed on the standby. In contrast, automatic synchronization + via sync_replication_slots provides continuous slot + updates, enabling seamless failover and supporting high availability. + Therefore, it is the recommended method for synchronizing slots. + + + + + When slot synchronization is configured as recommended, + and the initial synchronization is performed either automatically or + manually via pg_sync_replication_slot, the standby can persist the + synchronized slot only if the following condition is met: The logical + replication slot on the primary must retain WALs and system catalog + rows that are still available on the standby. This ensures data + integrity and allows logical replication to continue smoothly after + promotion. + If the required WALs or catalog rows have already been purged from the + standby, the slot will not be persisted to avoid data loss. In such + cases, the following log message may appear: + + LOG: could not synchronize replication slot "failover_slot" + DETAIL: Synchronization could lead to data loss as the remote slot needs WAL at LSN 0/3003F28 and catalog xmin 754, but the standby has LSN 0/3003F28 and catalog xmin 756 + + If the logical replication slot is actively used by a consumer, no + manual intervention is needed; the slot will advance automatically, + and synchronization will resume in the next cycle. However, if no + consumer is configured, it is advisable to manually advance the slot + on the primary using + pg_logical_slot_get_changes or + + pg_logical_slot_get_binary_changes, + allowing synchronization to proceed. + + The ability to resume logical replication after failover depends upon the pg_replication_slots.synced diff --git a/src/backend/replication/logical/slotsync.c b/src/backend/replication/logical/slotsync.c index 656e66e0ae0..f1dcbebfa1a 100644 --- a/src/backend/replication/logical/slotsync.c +++ b/src/backend/replication/logical/slotsync.c @@ -211,9 +211,9 @@ update_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid, * impact the users, so we used DEBUG1 level to log the message. */ ereport(slot->data.persistency == RS_TEMPORARY ? LOG : DEBUG1, - errmsg("could not synchronize replication slot \"%s\" because remote slot precedes local slot", + errmsg("could not synchronize replication slot \"%s\"", remote_slot->name), - errdetail("The remote slot has LSN %X/%X and catalog xmin %u, but the local slot has LSN %X/%X and catalog xmin %u.", + errdetail("Synchronization could lead to data loss as the remote slot needs WAL at LSN %X/%X and catalog xmin %u, but the standby has LSN %X/%X and catalog xmin %u.", LSN_FORMAT_ARGS(remote_slot->restart_lsn), remote_slot->catalog_xmin, LSN_FORMAT_ARGS(slot->data.restart_lsn), @@ -593,7 +593,7 @@ update_and_persist_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid) { ereport(LOG, errmsg("could not synchronize replication slot \"%s\"", remote_slot->name), - errdetail("Logical decoding could not find consistent point from local slot's LSN %X/%X.", + errdetail("Synchronization could lead to data loss as standby could not build a consistent snapshot to decode WALs at LSN %X/%X.", LSN_FORMAT_ARGS(slot->data.restart_lsn))); return false;