From 081e4104a4317709c1adf0fab42a1546ebf8d6b2 Mon Sep 17 00:00:00 2001 From: Noah Misch Date: Fri, 31 Aug 2018 22:59:58 -0700 Subject: [PATCH] Ignore server-side delays when enforcing wal_sender_timeout. Healthy clients of servers having poor I/O performance, such as buildfarm members hamster and tern, saw unexpected timeouts. That disagreed with documentation. This fix adds one gettimeofday() call whenever ProcessRepliesIfAny() finds no client reply messages. Back-patch to 9.4; the bug's symptom is rare and mild, and the code all moved between 9.3 and 9.4. Discussion: https://postgr.es/m/20180826034600.GA1105084@rfd.leadboat.com --- src/backend/replication/walsender.c | 64 ++++++++++++++++------------- 1 file changed, 36 insertions(+), 28 deletions(-) diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c index ac11bf57244..6645ed86081 100644 --- a/src/backend/replication/walsender.c +++ b/src/backend/replication/walsender.c @@ -154,9 +154,12 @@ static StringInfoData output_message; static StringInfoData reply_message; static StringInfoData tmpbuf; +/* Timestamp of last ProcessRepliesIfAny(). */ +static TimestampTz last_processing = 0; + /* - * Timestamp of the last receipt of the reply from the standby. Set to 0 if - * wal_sender_timeout doesn't need to be active. + * Timestamp of last ProcessRepliesIfAny() that saw a reply from the + * standby. Set to 0 if wal_sender_timeout doesn't need to be active. */ static TimestampTz last_reply_timestamp = 0; @@ -213,8 +216,8 @@ static void ProcessStandbyReplyMessage(void); static void ProcessStandbyHSFeedbackMessage(void); static void ProcessRepliesIfAny(void); static void WalSndKeepalive(bool requestReply); -static void WalSndKeepaliveIfNecessary(TimestampTz now); -static void WalSndCheckTimeOut(TimestampTz now); +static void WalSndKeepaliveIfNecessary(void); +static void WalSndCheckTimeOut(void); static long WalSndComputeSleeptime(TimestampTz now); static void WalSndPrepareWrite(LogicalDecodingContext *ctx, XLogRecPtr lsn, TransactionId xid, bool last_write); static void WalSndWriteData(LogicalDecodingContext *ctx, XLogRecPtr lsn, TransactionId xid, bool last_write); @@ -1117,18 +1120,16 @@ WalSndWriteData(LogicalDecodingContext *ctx, XLogRecPtr lsn, TransactionId xid, /* Check for input from the client */ ProcessRepliesIfAny(); - now = GetCurrentTimestamp(); - /* die if timeout was reached */ - WalSndCheckTimeOut(now); + WalSndCheckTimeOut(); /* Send keepalive if the time has come */ - WalSndKeepaliveIfNecessary(now); + WalSndKeepaliveIfNecessary(); if (!pq_is_send_pending()) break; - sleeptime = WalSndComputeSleeptime(now); + sleeptime = WalSndComputeSleeptime(GetCurrentTimestamp()); wakeEvents = WL_LATCH_SET | WL_POSTMASTER_DEATH | WL_SOCKET_WRITEABLE | WL_SOCKET_READABLE | WL_TIMEOUT; @@ -1198,7 +1199,6 @@ WalSndWaitForWal(XLogRecPtr loc) for (;;) { long sleeptime; - TimestampTz now; /* * Emergency bailout if postmaster has died. This is to avoid the @@ -1283,13 +1283,11 @@ WalSndWaitForWal(XLogRecPtr loc) !pq_is_send_pending()) break; - now = GetCurrentTimestamp(); - /* die if timeout was reached */ - WalSndCheckTimeOut(now); + WalSndCheckTimeOut(); /* Send keepalive if the time has come */ - WalSndKeepaliveIfNecessary(now); + WalSndKeepaliveIfNecessary(); /* * Sleep until something happens or we time out. Also wait for the @@ -1298,7 +1296,7 @@ WalSndWaitForWal(XLogRecPtr loc) * new WAL to be generated. (But if we have nothing to send, we don't * want to wake on socket-writable.) */ - sleeptime = WalSndComputeSleeptime(now); + sleeptime = WalSndComputeSleeptime(GetCurrentTimestamp()); wakeEvents = WL_LATCH_SET | WL_POSTMASTER_DEATH | WL_SOCKET_READABLE | WL_TIMEOUT; @@ -1438,6 +1436,8 @@ ProcessRepliesIfAny(void) int r; bool received = false; + last_processing = GetCurrentTimestamp(); + for (;;) { pq_startmsgread(); @@ -1525,7 +1525,7 @@ ProcessRepliesIfAny(void) */ if (received) { - last_reply_timestamp = GetCurrentTimestamp(); + last_reply_timestamp = last_processing; waiting_for_ping_response = false; } } @@ -1818,10 +1818,18 @@ WalSndComputeSleeptime(TimestampTz now) /* * Check whether there have been responses by the client within - * wal_sender_timeout and shutdown if not. + * wal_sender_timeout and shutdown if not. Using last_processing as the + * reference point avoids counting server-side stalls against the client. + * However, a long server-side stall can make WalSndKeepaliveIfNecessary() + * postdate last_processing by more than wal_sender_timeout. If that happens, + * the client must reply almost immediately to avoid a timeout. This rarely + * affects the default configuration, under which clients spontaneously send a + * message every standby_message_timeout = wal_sender_timeout/6 = 10s. We + * could eliminate that problem by recognizing timeout expiration at + * wal_sender_timeout/2 after the keepalive. */ static void -WalSndCheckTimeOut(TimestampTz now) +WalSndCheckTimeOut(void) { TimestampTz timeout; @@ -1832,7 +1840,7 @@ WalSndCheckTimeOut(TimestampTz now) timeout = TimestampTzPlusMilliseconds(last_reply_timestamp, wal_sender_timeout); - if (wal_sender_timeout > 0 && now >= timeout) + if (wal_sender_timeout > 0 && last_processing >= timeout) { /* * Since typically expiration of replication timeout means @@ -1863,8 +1871,6 @@ WalSndLoop(WalSndSendDataCallback send_data) */ for (;;) { - TimestampTz now; - /* * Emergency bailout if postmaster has died. This is to avoid the * necessity for manual cleanup of all postmaster children. @@ -1942,13 +1948,11 @@ WalSndLoop(WalSndSendDataCallback send_data) WalSndDone(send_data); } - now = GetCurrentTimestamp(); - /* Check for replication timeout. */ - WalSndCheckTimeOut(now); + WalSndCheckTimeOut(); /* Send keepalive if the time has come */ - WalSndKeepaliveIfNecessary(now); + WalSndKeepaliveIfNecessary(); /* * We don't block if not caught up, unless there is unsent data @@ -1966,7 +1970,11 @@ WalSndLoop(WalSndSendDataCallback send_data) wakeEvents = WL_LATCH_SET | WL_POSTMASTER_DEATH | WL_TIMEOUT | WL_SOCKET_READABLE; - sleeptime = WalSndComputeSleeptime(now); + /* + * Use fresh timestamp, not last_processed, to reduce the chance + * of reaching wal_sender_timeout before sending a keepalive. + */ + sleeptime = WalSndComputeSleeptime(GetCurrentTimestamp()); if (pq_is_send_pending()) wakeEvents |= WL_SOCKET_WRITEABLE; @@ -3037,7 +3045,7 @@ WalSndKeepalive(bool requestReply) * Send keepalive message if too much time has elapsed. */ static void -WalSndKeepaliveIfNecessary(TimestampTz now) +WalSndKeepaliveIfNecessary(void) { TimestampTz ping_time; @@ -3058,7 +3066,7 @@ WalSndKeepaliveIfNecessary(TimestampTz now) */ ping_time = TimestampTzPlusMilliseconds(last_reply_timestamp, wal_sender_timeout / 2); - if (now >= ping_time) + if (last_processing >= ping_time) { WalSndKeepalive(true); waiting_for_ping_response = true;