diff --git a/src/backend/replication/syncrep.c b/src/backend/replication/syncrep.c index 0ea71b5c434..2455841d6b4 100644 --- a/src/backend/replication/syncrep.c +++ b/src/backend/replication/syncrep.c @@ -162,16 +162,23 @@ SyncRepWaitForLSN(XLogRecPtr lsn, bool commit) * sync replication standby names defined. * * Since this routine gets called every commit time, it's important to - * exit quickly if sync replication is not requested. So we check - * WalSndCtl->sync_standbys_defined flag without the lock and exit - * immediately if it's false. If it's true, we need to check it again - * later while holding the lock, to check the flag and operate the sync - * rep queue atomically. This is necessary to avoid the race condition - * described in SyncRepUpdateSyncStandbysDefined(). On the other hand, if - * it's false, the lock is not necessary because we don't touch the queue. + * exit quickly if sync replication is not requested. + * + * We check WalSndCtl->sync_standbys_status flag without the lock and exit + * immediately if SYNC_STANDBY_INIT is set (the checkpointer has + * initialized this data) but SYNC_STANDBY_DEFINED is missing (no sync + * replication requested). + * + * If SYNC_STANDBY_DEFINED is set, we need to check the status again later + * while holding the lock, to check the flag and operate the sync rep + * queue atomically. This is necessary to avoid the race condition + * described in SyncRepUpdateSyncStandbysDefined(). On the other hand, if + * SYNC_STANDBY_DEFINED is not set, the lock is not necessary because we + * don't touch the queue. */ if (!SyncRepRequested() || - !((volatile WalSndCtlData *) WalSndCtl)->sync_standbys_defined) + ((((volatile WalSndCtlData *) WalSndCtl)->sync_standbys_status) & + (SYNC_STANDBY_INIT | SYNC_STANDBY_DEFINED)) == SYNC_STANDBY_INIT) return; /* Cap the level for anything other than commit to remote flush only. */ @@ -187,16 +194,52 @@ SyncRepWaitForLSN(XLogRecPtr lsn, bool commit) Assert(MyProc->syncRepState == SYNC_REP_NOT_WAITING); /* - * We don't wait for sync rep if WalSndCtl->sync_standbys_defined is not - * set. See SyncRepUpdateSyncStandbysDefined. + * We don't wait for sync rep if SYNC_STANDBY_DEFINED is not set. See + * SyncRepUpdateSyncStandbysDefined(). * * Also check that the standby hasn't already replied. Unlikely race * condition but we'll be fetching that cache line anyway so it's likely * to be a low cost check. + * + * If the sync standby data has not been initialized yet + * (SYNC_STANDBY_INIT is not set), fall back to a check based on the LSN, + * then do a direct GUC check. */ - if (!WalSndCtl->sync_standbys_defined || - lsn <= WalSndCtl->lsn[mode]) + if (WalSndCtl->sync_standbys_status & SYNC_STANDBY_INIT) { + if ((WalSndCtl->sync_standbys_status & SYNC_STANDBY_DEFINED) == 0 || + lsn <= WalSndCtl->lsn[mode]) + { + LWLockRelease(SyncRepLock); + return; + } + } + else if (lsn <= WalSndCtl->lsn[mode]) + { + /* + * The LSN is older than what we need to wait for. The sync standby + * data has not been initialized yet, but we are OK to not wait + * because we know that there is no point in doing so based on the + * LSN. + */ + LWLockRelease(SyncRepLock); + return; + } + else if (!SyncStandbysDefined()) + { + /* + * If we are here, the sync standby data has not been initialized yet, + * and the LSN is newer than what need to wait for, so we have fallen + * back to the best thing we could do in this case: a check on + * SyncStandbysDefined() to see if the GUC is set or not. + * + * When the GUC has a value, we wait until the checkpointer updates + * the status data because we cannot be sure yet if we should wait or + * not. Here, the GUC has *no* value, we are sure that there is no + * point to wait; this matters for example when initializing a + * cluster, where we should never wait, and no sync standbys is the + * default behavior. + */ LWLockRelease(SyncRepLock); return; } @@ -918,7 +961,7 @@ SyncRepWakeQueue(bool all, int mode) /* * The checkpointer calls this as needed to update the shared - * sync_standbys_defined flag, so that backends don't remain permanently wedged + * sync_standbys_status flag, so that backends don't remain permanently wedged * if synchronous_standby_names is unset. It's safe to check the current value * without the lock, because it's only ever updated by one process. But we * must take the lock to change it. @@ -928,7 +971,8 @@ SyncRepUpdateSyncStandbysDefined(void) { bool sync_standbys_defined = SyncStandbysDefined(); - if (sync_standbys_defined != WalSndCtl->sync_standbys_defined) + if (sync_standbys_defined != + ((WalSndCtl->sync_standbys_status & SYNC_STANDBY_DEFINED) != 0)) { LWLockAcquire(SyncRepLock, LW_EXCLUSIVE); @@ -952,7 +996,30 @@ SyncRepUpdateSyncStandbysDefined(void) * backend that hasn't yet reloaded its config might go to sleep on * the queue (and never wake up). This prevents that. */ - WalSndCtl->sync_standbys_defined = sync_standbys_defined; + WalSndCtl->sync_standbys_status = SYNC_STANDBY_INIT | + (sync_standbys_defined ? SYNC_STANDBY_DEFINED : 0); + + LWLockRelease(SyncRepLock); + } + else if ((WalSndCtl->sync_standbys_status & SYNC_STANDBY_INIT) == 0) + { + LWLockAcquire(SyncRepLock, LW_EXCLUSIVE); + + /* + * Note that there is no need to wake up the queues here. We would + * reach this path only if SyncStandbysDefined() returns false, or it + * would mean that some backends are waiting with the GUC set. See + * SyncRepWaitForLSN(). + */ + Assert(!SyncStandbysDefined()); + + /* + * Even if there is no sync standby defined, let the readers of this + * information know that the sync standby data has been initialized. + * This can just be done once, hence the previous check on + * SYNC_STANDBY_INIT to avoid useless work. + */ + WalSndCtl->sync_standbys_status |= SYNC_STANDBY_INIT; LWLockRelease(SyncRepLock); } diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c index f433658a0d1..1e0480b1d55 100644 --- a/src/backend/replication/walsender.c +++ b/src/backend/replication/walsender.c @@ -1499,13 +1499,13 @@ WalSndUpdateProgress(LogicalDecodingContext *ctx, XLogRecPtr lsn, TransactionId * When skipping empty transactions in synchronous replication, we send a * keepalive message to avoid delaying such transactions. * - * It is okay to check sync_standbys_defined flag without lock here as in - * the worst case we will just send an extra keepalive message when it is + * It is okay to check sync_standbys_status without lock here as in the + * worst case we will just send an extra keepalive message when it is * really not required. */ if (skipped_xact && SyncRepRequested() && - ((volatile WalSndCtlData *) WalSndCtl)->sync_standbys_defined) + (((volatile WalSndCtlData *) WalSndCtl)->sync_standbys_status & SYNC_STANDBY_DEFINED)) { WalSndKeepalive(false, lsn); diff --git a/src/include/replication/walsender_private.h b/src/include/replication/walsender_private.h index 7d919583bd3..48e1bd1a5aa 100644 --- a/src/include/replication/walsender_private.h +++ b/src/include/replication/walsender_private.h @@ -103,11 +103,11 @@ typedef struct XLogRecPtr lsn[NUM_SYNC_REP_WAIT_MODE]; /* - * Are any sync standbys defined? Waiting backends can't reload the - * config file safely, so checkpointer updates this value as needed. - * Protected by SyncRepLock. + * Status of data related to the synchronous standbys. Waiting backends + * can't reload the config file safely, so checkpointer updates this value + * as needed. Protected by SyncRepLock. */ - bool sync_standbys_defined; + bits8 sync_standbys_status; /* used as a registry of physical / logical walsenders to wake */ ConditionVariable wal_flush_cv; @@ -116,6 +116,21 @@ typedef struct WalSnd walsnds[FLEXIBLE_ARRAY_MEMBER]; } WalSndCtlData; +/* Flags for WalSndCtlData->sync_standbys_status */ + +/* + * Is the synchronous standby data initialized from the GUC? This is set the + * first time synchronous_standby_names is processed by the checkpointer. + */ +#define SYNC_STANDBY_INIT (1 << 0) + +/* + * Is the synchronous standby data defined? This is set when + * synchronous_standby_names has some data, after being processed by the + * checkpointer. + */ +#define SYNC_STANDBY_DEFINED (1 << 1) + extern PGDLLIMPORT WalSndCtlData *WalSndCtl;