mirror of
https://github.com/postgres/postgres.git
synced 2025-07-27 12:41:57 +03:00
Introduce a new GUC 'standby_slot_names'.
This patch provides a way to ensure that physical standbys that are potential failover candidates have received and flushed changes before the primary server making them visible to subscribers. Doing so guarantees that the promoted standby server is not lagging behind the subscribers when a failover is necessary. The logical walsender now guarantees that all local changes are sent and flushed to the standby servers corresponding to the replication slots specified in 'standby_slot_names' before sending those changes to the subscriber. Additionally, the SQL functions pg_logical_slot_get_changes, pg_logical_slot_peek_changes and pg_replication_slot_advance are modified to ensure that they process changes for failover slots only after physical slots specified in 'standby_slot_names' have confirmed WAL receipt for those. Author: Hou Zhijie and Shveta Malik Reviewed-by: Masahiko Sawada, Peter Smith, Bertrand Drouvot, Ajin Cherian, Nisha Moond, Amit Kapila Discussion: https://postgr.es/m/514f6f2f-6833-4539-39f1-96cd1e011f23@enterprisedb.com
This commit is contained in:
@ -1725,26 +1725,110 @@ WalSndUpdateProgress(LogicalDecodingContext *ctx, XLogRecPtr lsn, TransactionId
|
||||
ProcessPendingWrites();
|
||||
}
|
||||
|
||||
/*
|
||||
* Wake up the logical walsender processes with logical failover slots if the
|
||||
* currently acquired physical slot is specified in standby_slot_names GUC.
|
||||
*/
|
||||
void
|
||||
PhysicalWakeupLogicalWalSnd(void)
|
||||
{
|
||||
Assert(MyReplicationSlot && SlotIsPhysical(MyReplicationSlot));
|
||||
|
||||
/*
|
||||
* If we are running in a standby, there is no need to wake up walsenders.
|
||||
* This is because we do not support syncing slots to cascading standbys,
|
||||
* so, there are no walsenders waiting for standbys to catch up.
|
||||
*/
|
||||
if (RecoveryInProgress())
|
||||
return;
|
||||
|
||||
if (SlotExistsInStandbySlotNames(NameStr(MyReplicationSlot->data.name)))
|
||||
ConditionVariableBroadcast(&WalSndCtl->wal_confirm_rcv_cv);
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns true if not all standbys have caught up to the flushed position
|
||||
* (flushed_lsn) when the current acquired slot is a logical failover
|
||||
* slot and we are streaming; otherwise, returns false.
|
||||
*
|
||||
* If returning true, the function sets the appropriate wait event in
|
||||
* wait_event; otherwise, wait_event is set to 0.
|
||||
*/
|
||||
static bool
|
||||
NeedToWaitForStandbys(XLogRecPtr flushed_lsn, uint32 *wait_event)
|
||||
{
|
||||
int elevel = got_STOPPING ? ERROR : WARNING;
|
||||
bool failover_slot;
|
||||
|
||||
failover_slot = (replication_active && MyReplicationSlot->data.failover);
|
||||
|
||||
/*
|
||||
* Note that after receiving the shutdown signal, an ERROR is reported if
|
||||
* any slots are dropped, invalidated, or inactive. This measure is taken
|
||||
* to prevent the walsender from waiting indefinitely.
|
||||
*/
|
||||
if (failover_slot && !StandbySlotsHaveCaughtup(flushed_lsn, elevel))
|
||||
{
|
||||
*wait_event = WAIT_EVENT_WAIT_FOR_STANDBY_CONFIRMATION;
|
||||
return true;
|
||||
}
|
||||
|
||||
*wait_event = 0;
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns true if we need to wait for WALs to be flushed to disk, or if not
|
||||
* all standbys have caught up to the flushed position (flushed_lsn) when the
|
||||
* current acquired slot is a logical failover slot and we are
|
||||
* streaming; otherwise, returns false.
|
||||
*
|
||||
* If returning true, the function sets the appropriate wait event in
|
||||
* wait_event; otherwise, wait_event is set to 0.
|
||||
*/
|
||||
static bool
|
||||
NeedToWaitForWal(XLogRecPtr target_lsn, XLogRecPtr flushed_lsn,
|
||||
uint32 *wait_event)
|
||||
{
|
||||
/* Check if we need to wait for WALs to be flushed to disk */
|
||||
if (target_lsn > flushed_lsn)
|
||||
{
|
||||
*wait_event = WAIT_EVENT_WAL_SENDER_WAIT_FOR_WAL;
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Check if the standby slots have caught up to the flushed position */
|
||||
return NeedToWaitForStandbys(flushed_lsn, wait_event);
|
||||
}
|
||||
|
||||
/*
|
||||
* Wait till WAL < loc is flushed to disk so it can be safely sent to client.
|
||||
*
|
||||
* Returns end LSN of flushed WAL. Normally this will be >= loc, but
|
||||
* if we detect a shutdown request (either from postmaster or client)
|
||||
* we will return early, so caller must always check.
|
||||
* If the walsender holds a logical failover slot, we also wait for all the
|
||||
* specified streaming replication standby servers to confirm receipt of WAL
|
||||
* up to RecentFlushPtr. It is beneficial to wait here for the confirmation
|
||||
* up to RecentFlushPtr rather than waiting before transmitting each change
|
||||
* to logical subscribers, which is already covered by RecentFlushPtr.
|
||||
*
|
||||
* Returns end LSN of flushed WAL. Normally this will be >= loc, but if we
|
||||
* detect a shutdown request (either from postmaster or client) we will return
|
||||
* early, so caller must always check.
|
||||
*/
|
||||
static XLogRecPtr
|
||||
WalSndWaitForWal(XLogRecPtr loc)
|
||||
{
|
||||
int wakeEvents;
|
||||
uint32 wait_event = 0;
|
||||
static XLogRecPtr RecentFlushPtr = InvalidXLogRecPtr;
|
||||
|
||||
/*
|
||||
* Fast path to avoid acquiring the spinlock in case we already know we
|
||||
* have enough WAL available. This is particularly interesting if we're
|
||||
* far behind.
|
||||
* have enough WAL available and all the standby servers have confirmed
|
||||
* receipt of WAL up to RecentFlushPtr. This is particularly interesting
|
||||
* if we're far behind.
|
||||
*/
|
||||
if (RecentFlushPtr != InvalidXLogRecPtr &&
|
||||
loc <= RecentFlushPtr)
|
||||
if (!XLogRecPtrIsInvalid(RecentFlushPtr) &&
|
||||
!NeedToWaitForWal(loc, RecentFlushPtr, &wait_event))
|
||||
return RecentFlushPtr;
|
||||
|
||||
/* Get a more recent flush pointer. */
|
||||
@ -1753,8 +1837,14 @@ WalSndWaitForWal(XLogRecPtr loc)
|
||||
else
|
||||
RecentFlushPtr = GetXLogReplayRecPtr(NULL);
|
||||
|
||||
/*
|
||||
* Within the loop, we wait for the necessary WALs to be flushed to disk
|
||||
* first, followed by waiting for standbys to catch up if there are enough
|
||||
* WALs (see NeedToWaitForWal()) or upon receiving the shutdown signal.
|
||||
*/
|
||||
for (;;)
|
||||
{
|
||||
bool wait_for_standby_at_stop = false;
|
||||
long sleeptime;
|
||||
|
||||
/* Clear any already-pending wakeups */
|
||||
@ -1781,21 +1871,35 @@ WalSndWaitForWal(XLogRecPtr loc)
|
||||
if (got_STOPPING)
|
||||
XLogBackgroundFlush();
|
||||
|
||||
/* Update our idea of the currently flushed position. */
|
||||
if (!RecoveryInProgress())
|
||||
RecentFlushPtr = GetFlushRecPtr(NULL);
|
||||
else
|
||||
RecentFlushPtr = GetXLogReplayRecPtr(NULL);
|
||||
/*
|
||||
* To avoid the scenario where standbys need to catch up to a newer
|
||||
* WAL location in each iteration, we update our idea of the currently
|
||||
* flushed position only if we are not waiting for standbys to catch
|
||||
* up.
|
||||
*/
|
||||
if (wait_event != WAIT_EVENT_WAIT_FOR_STANDBY_CONFIRMATION)
|
||||
{
|
||||
if (!RecoveryInProgress())
|
||||
RecentFlushPtr = GetFlushRecPtr(NULL);
|
||||
else
|
||||
RecentFlushPtr = GetXLogReplayRecPtr(NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* If postmaster asked us to stop, don't wait anymore.
|
||||
* If postmaster asked us to stop and the standby slots have caught up
|
||||
* to the flushed position, don't wait anymore.
|
||||
*
|
||||
* It's important to do this check after the recomputation of
|
||||
* RecentFlushPtr, so we can send all remaining data before shutting
|
||||
* down.
|
||||
*/
|
||||
if (got_STOPPING)
|
||||
break;
|
||||
{
|
||||
if (NeedToWaitForStandbys(RecentFlushPtr, &wait_event))
|
||||
wait_for_standby_at_stop = true;
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* We only send regular messages to the client for full decoded
|
||||
@ -1810,11 +1914,18 @@ WalSndWaitForWal(XLogRecPtr loc)
|
||||
!waiting_for_ping_response)
|
||||
WalSndKeepalive(false, InvalidXLogRecPtr);
|
||||
|
||||
/* check whether we're done */
|
||||
if (loc <= RecentFlushPtr)
|
||||
/*
|
||||
* Exit the loop if already caught up and doesn't need to wait for
|
||||
* standby slots.
|
||||
*/
|
||||
if (!wait_for_standby_at_stop &&
|
||||
!NeedToWaitForWal(loc, RecentFlushPtr, &wait_event))
|
||||
break;
|
||||
|
||||
/* Waiting for new WAL. Since we need to wait, we're now caught up. */
|
||||
/*
|
||||
* Waiting for new WAL or waiting for standbys to catch up. Since we
|
||||
* need to wait, we're now caught up.
|
||||
*/
|
||||
WalSndCaughtUp = true;
|
||||
|
||||
/*
|
||||
@ -1852,7 +1963,9 @@ WalSndWaitForWal(XLogRecPtr loc)
|
||||
if (pq_is_send_pending())
|
||||
wakeEvents |= WL_SOCKET_WRITEABLE;
|
||||
|
||||
WalSndWait(wakeEvents, sleeptime, WAIT_EVENT_WAL_SENDER_WAIT_FOR_WAL);
|
||||
Assert(wait_event != 0);
|
||||
|
||||
WalSndWait(wakeEvents, sleeptime, wait_event);
|
||||
}
|
||||
|
||||
/* reactivate latch so WalSndLoop knows to continue */
|
||||
@ -2262,6 +2375,7 @@ PhysicalConfirmReceivedLocation(XLogRecPtr lsn)
|
||||
{
|
||||
ReplicationSlotMarkDirty();
|
||||
ReplicationSlotsComputeRequiredLSN();
|
||||
PhysicalWakeupLogicalWalSnd();
|
||||
}
|
||||
|
||||
/*
|
||||
@ -3535,6 +3649,7 @@ WalSndShmemInit(void)
|
||||
|
||||
ConditionVariableInit(&WalSndCtl->wal_flush_cv);
|
||||
ConditionVariableInit(&WalSndCtl->wal_replay_cv);
|
||||
ConditionVariableInit(&WalSndCtl->wal_confirm_rcv_cv);
|
||||
}
|
||||
}
|
||||
|
||||
@ -3604,8 +3719,14 @@ WalSndWait(uint32 socket_events, long timeout, uint32 wait_event)
|
||||
*
|
||||
* And, we use separate shared memory CVs for physical and logical
|
||||
* walsenders for selective wake ups, see WalSndWakeup() for more details.
|
||||
*
|
||||
* If the wait event is WAIT_FOR_STANDBY_CONFIRMATION, wait on another CV
|
||||
* until awakened by physical walsenders after the walreceiver confirms
|
||||
* the receipt of the LSN.
|
||||
*/
|
||||
if (MyWalSnd->kind == REPLICATION_KIND_PHYSICAL)
|
||||
if (wait_event == WAIT_EVENT_WAIT_FOR_STANDBY_CONFIRMATION)
|
||||
ConditionVariablePrepareToSleep(&WalSndCtl->wal_confirm_rcv_cv);
|
||||
else if (MyWalSnd->kind == REPLICATION_KIND_PHYSICAL)
|
||||
ConditionVariablePrepareToSleep(&WalSndCtl->wal_flush_cv);
|
||||
else if (MyWalSnd->kind == REPLICATION_KIND_LOGICAL)
|
||||
ConditionVariablePrepareToSleep(&WalSndCtl->wal_replay_cv);
|
||||
|
Reference in New Issue
Block a user