From 171c457cd06051b5910cf357ea6a77643b69088a Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Tue, 10 Nov 2020 22:51:19 -0500
Subject: [PATCH] Fix and simplify some usages of TimestampDifference().

Introduce TimestampDifferenceMilliseconds() to simplify callers
that would rather have the difference in milliseconds, instead of
the select()-oriented seconds-and-microseconds format.  This gets
rid of at least one integer division per call, and it eliminates
some apparently-easy-to-mess-up arithmetic.

Two of these call sites were in fact wrong:

* pg_prewarm's autoprewarm_main() forgot to multiply the seconds
by 1000, thus ending up with a delay 1000X shorter than intended.
That doesn't quite make it a busy-wait, but close.

* postgres_fdw's pgfdw_get_cleanup_result() thought it needed to compute
microseconds not milliseconds, thus ending up with a delay 1000X longer
than intended.  Somebody along the way had noticed this problem but
misdiagnosed the cause, and imposed an ad-hoc 60-second limit rather
than fixing the units.  This was relatively harmless in context, because
we don't care that much about exactly how long this delay is; still,
it's wrong.

There are a few more callers of TimestampDifference() that don't
have a direct need for seconds-and-microseconds, but can't use
TimestampDifferenceMilliseconds() either because they do need
microsecond precision or because they might possibly deal with
intervals long enough to overflow 32-bit milliseconds.  It might be
worth inventing another API to improve that, but that seems outside
the scope of this patch; so those callers are untouched here.

Given the fact that we are fixing some bugs, and the likelihood
that future patches might want to back-patch code that uses this
new API, back-patch to all supported branches.

Alexey Kondratov and Tom Lane

Discussion: https://postgr.es/m/3b1c053a21c07c1ed5e00be3b2b855ef@postgrespro.ru
---
 contrib/pg_prewarm/autoprewarm.c           | 12 ++--
 contrib/postgres_fdw/connection.c          |  9 +--
 src/backend/access/transam/xlog.c          | 84 +++++++++-------------
 src/backend/replication/walreceiverfuncs.c | 25 ++-----
 src/backend/replication/walsender.c        |  8 +--
 src/backend/utils/adt/timestamp.c          | 38 +++++++++-
 src/include/utils/timestamp.h              |  2 +
 7 files changed, 81 insertions(+), 97 deletions(-)

diff --git a/contrib/pg_prewarm/autoprewarm.c b/contrib/pg_prewarm/autoprewarm.c
index 38ae240c551..8f7125c7fc0 100644
--- a/contrib/pg_prewarm/autoprewarm.c
+++ b/contrib/pg_prewarm/autoprewarm.c
@@ -224,18 +224,16 @@ autoprewarm_main(Datum main_arg)
 		}
 		else
 		{
-			long		delay_in_ms = 0;
-			TimestampTz next_dump_time = 0;
-			long		secs = 0;
-			int			usecs = 0;
+			TimestampTz next_dump_time;
+			long		delay_in_ms;
 
 			/* Compute the next dump time. */
 			next_dump_time =
 				TimestampTzPlusMilliseconds(last_dump_time,
 											autoprewarm_interval * 1000);
-			TimestampDifference(GetCurrentTimestamp(), next_dump_time,
-								&secs, &usecs);
-			delay_in_ms = secs + (usecs / 1000);
+			delay_in_ms =
+				TimestampDifferenceMilliseconds(GetCurrentTimestamp(),
+												next_dump_time);
 
 			/* Perform a dump if it's time. */
 			if (delay_in_ms <= 0)
diff --git a/contrib/postgres_fdw/connection.c b/contrib/postgres_fdw/connection.c
index 9ef2d1336ef..f382203b8f3 100644
--- a/contrib/postgres_fdw/connection.c
+++ b/contrib/postgres_fdw/connection.c
@@ -1140,20 +1140,15 @@ pgfdw_get_cleanup_result(PGconn *conn, TimestampTz endtime, PGresult **result)
 			{
 				int			wc;
 				TimestampTz now = GetCurrentTimestamp();
-				long		secs;
-				int			microsecs;
 				long		cur_timeout;
 
 				/* If timeout has expired, give up, else get sleep time. */
-				if (now >= endtime)
+				cur_timeout = TimestampDifferenceMilliseconds(now, endtime);
+				if (cur_timeout <= 0)
 				{
 					timed_out = true;
 					goto exit;
 				}
-				TimestampDifference(now, endtime, &secs, &microsecs);
-
-				/* To protect against clock skew, limit sleep to one minute. */
-				cur_timeout = Min(60000, secs * USECS_PER_SEC + microsecs);
 
 				/* Sleep until there's something to do */
 				wc = WaitLatchOrSocket(MyLatch,
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index af57d77554e..2a79c2e3586 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -6005,8 +6005,7 @@ recoveryApplyDelay(XLogReaderState *record)
 {
 	uint8		xact_info;
 	TimestampTz xtime;
-	long		secs;
-	int			microsecs;
+	long		msecs;
 
 	/* nothing to do if no delay configured */
 	if (recovery_min_apply_delay <= 0)
@@ -6047,9 +6046,9 @@ recoveryApplyDelay(XLogReaderState *record)
 	 * Exit without arming the latch if it's already past time to apply this
 	 * record
 	 */
-	TimestampDifference(GetCurrentTimestamp(), recoveryDelayUntilTime,
-						&secs, &microsecs);
-	if (secs <= 0 && microsecs <= 0)
+	msecs = TimestampDifferenceMilliseconds(GetCurrentTimestamp(),
+											recoveryDelayUntilTime);
+	if (msecs <= 0)
 		return false;
 
 	while (true)
@@ -6066,19 +6065,17 @@ recoveryApplyDelay(XLogReaderState *record)
 		 * Wait for difference between GetCurrentTimestamp() and
 		 * recoveryDelayUntilTime
 		 */
-		TimestampDifference(GetCurrentTimestamp(), recoveryDelayUntilTime,
-							&secs, &microsecs);
+		msecs = TimestampDifferenceMilliseconds(GetCurrentTimestamp(),
+												recoveryDelayUntilTime);
 
-		/* NB: We're ignoring waits below min_apply_delay's resolution. */
-		if (secs <= 0 && microsecs / 1000 <= 0)
+		if (msecs <= 0)
 			break;
 
-		elog(DEBUG2, "recovery apply delay %ld seconds, %d milliseconds",
-			 secs, microsecs / 1000);
+		elog(DEBUG2, "recovery apply delay %ld milliseconds", msecs);
 
 		(void) WaitLatch(&XLogCtl->recoveryWakeupLatch,
 						 WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
-						 secs * 1000L + microsecs / 1000,
+						 msecs,
 						 WAIT_EVENT_RECOVERY_APPLY_DELAY);
 	}
 	return true;
@@ -8448,33 +8445,24 @@ LogCheckpointStart(int flags, bool restartpoint)
 static void
 LogCheckpointEnd(bool restartpoint)
 {
-	long		write_secs,
-				sync_secs,
-				total_secs,
-				longest_secs,
-				average_secs;
-	int			write_usecs,
-				sync_usecs,
-				total_usecs,
-				longest_usecs,
-				average_usecs;
+	long		write_msecs,
+				sync_msecs,
+				total_msecs,
+				longest_msecs,
+				average_msecs;
 	uint64		average_sync_time;
 
 	CheckpointStats.ckpt_end_t = GetCurrentTimestamp();
 
-	TimestampDifference(CheckpointStats.ckpt_write_t,
-						CheckpointStats.ckpt_sync_t,
-						&write_secs, &write_usecs);
+	write_msecs = TimestampDifferenceMilliseconds(CheckpointStats.ckpt_write_t,
+												  CheckpointStats.ckpt_sync_t);
 
-	TimestampDifference(CheckpointStats.ckpt_sync_t,
-						CheckpointStats.ckpt_sync_end_t,
-						&sync_secs, &sync_usecs);
+	sync_msecs = TimestampDifferenceMilliseconds(CheckpointStats.ckpt_sync_t,
+												 CheckpointStats.ckpt_sync_end_t);
 
 	/* Accumulate checkpoint timing summary data, in milliseconds. */
-	BgWriterStats.m_checkpoint_write_time +=
-		write_secs * 1000 + write_usecs / 1000;
-	BgWriterStats.m_checkpoint_sync_time +=
-		sync_secs * 1000 + sync_usecs / 1000;
+	BgWriterStats.m_checkpoint_write_time += write_msecs;
+	BgWriterStats.m_checkpoint_sync_time += sync_msecs;
 
 	/*
 	 * All of the published timing statistics are accounted for.  Only
@@ -8483,25 +8471,20 @@ LogCheckpointEnd(bool restartpoint)
 	if (!log_checkpoints)
 		return;
 
-	TimestampDifference(CheckpointStats.ckpt_start_t,
-						CheckpointStats.ckpt_end_t,
-						&total_secs, &total_usecs);
+	total_msecs = TimestampDifferenceMilliseconds(CheckpointStats.ckpt_start_t,
+												  CheckpointStats.ckpt_end_t);
 
 	/*
 	 * Timing values returned from CheckpointStats are in microseconds.
-	 * Convert to the second plus microsecond form that TimestampDifference
-	 * returns for homogeneous printing.
+	 * Convert to milliseconds for consistent printing.
 	 */
-	longest_secs = (long) (CheckpointStats.ckpt_longest_sync / 1000000);
-	longest_usecs = CheckpointStats.ckpt_longest_sync -
-		(uint64) longest_secs * 1000000;
+	longest_msecs = (long) ((CheckpointStats.ckpt_longest_sync + 999) / 1000);
 
 	average_sync_time = 0;
 	if (CheckpointStats.ckpt_sync_rels > 0)
 		average_sync_time = CheckpointStats.ckpt_agg_sync_time /
 			CheckpointStats.ckpt_sync_rels;
-	average_secs = (long) (average_sync_time / 1000000);
-	average_usecs = average_sync_time - (uint64) average_secs * 1000000;
+	average_msecs = (long) ((average_sync_time + 999) / 1000);
 
 	elog(LOG, "%s complete: wrote %d buffers (%.1f%%); "
 		 "%d WAL file(s) added, %d removed, %d recycled; "
@@ -8514,12 +8497,12 @@ LogCheckpointEnd(bool restartpoint)
 		 CheckpointStats.ckpt_segs_added,
 		 CheckpointStats.ckpt_segs_removed,
 		 CheckpointStats.ckpt_segs_recycled,
-		 write_secs, write_usecs / 1000,
-		 sync_secs, sync_usecs / 1000,
-		 total_secs, total_usecs / 1000,
+		 write_msecs / 1000, (int) (write_msecs % 1000),
+		 sync_msecs / 1000, (int) (sync_msecs % 1000),
+		 total_msecs / 1000, (int) (total_msecs % 1000),
 		 CheckpointStats.ckpt_sync_rels,
-		 longest_secs, longest_usecs / 1000,
-		 average_secs, average_usecs / 1000,
+		 longest_msecs / 1000, (int) (longest_msecs % 1000),
+		 average_msecs / 1000, (int) (average_msecs % 1000),
 		 (int) (PrevCheckPointDistance / 1024.0),
 		 (int) (CheckPointDistanceEstimate / 1024.0));
 }
@@ -12024,13 +12007,10 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 					if (!TimestampDifferenceExceeds(last_fail_time, now,
 													wal_retrieve_retry_interval))
 					{
-						long		secs,
-									wait_time;
-						int			usecs;
+						long		wait_time;
 
-						TimestampDifference(last_fail_time, now, &secs, &usecs);
 						wait_time = wal_retrieve_retry_interval -
-							(secs * 1000 + usecs / 1000);
+							TimestampDifferenceMilliseconds(last_fail_time, now);
 
 						(void) WaitLatch(&XLogCtl->recoveryWakeupLatch,
 										 WL_LATCH_SET | WL_TIMEOUT |
diff --git a/src/backend/replication/walreceiverfuncs.c b/src/backend/replication/walreceiverfuncs.c
index 2d6cdfe0a21..76c73d8c1bc 100644
--- a/src/backend/replication/walreceiverfuncs.c
+++ b/src/backend/replication/walreceiverfuncs.c
@@ -320,10 +320,6 @@ GetReplicationApplyDelay(void)
 	WalRcvData *walrcv = WalRcv;
 	XLogRecPtr	receivePtr;
 	XLogRecPtr	replayPtr;
-
-	long		secs;
-	int			usecs;
-
 	TimestampTz chunkReplayStartTime;
 
 	SpinLockAcquire(&walrcv->mutex);
@@ -340,11 +336,8 @@ GetReplicationApplyDelay(void)
 	if (chunkReplayStartTime == 0)
 		return -1;
 
-	TimestampDifference(chunkReplayStartTime,
-						GetCurrentTimestamp(),
-						&secs, &usecs);
-
-	return (((int) secs * 1000) + (usecs / 1000));
+	return TimestampDifferenceMilliseconds(chunkReplayStartTime,
+										   GetCurrentTimestamp());
 }
 
 /*
@@ -355,24 +348,14 @@ int
 GetReplicationTransferLatency(void)
 {
 	WalRcvData *walrcv = WalRcv;
-
 	TimestampTz lastMsgSendTime;
 	TimestampTz lastMsgReceiptTime;
 
-	long		secs = 0;
-	int			usecs = 0;
-	int			ms;
-
 	SpinLockAcquire(&walrcv->mutex);
 	lastMsgSendTime = walrcv->lastMsgSendTime;
 	lastMsgReceiptTime = walrcv->lastMsgReceiptTime;
 	SpinLockRelease(&walrcv->mutex);
 
-	TimestampDifference(lastMsgSendTime,
-						lastMsgReceiptTime,
-						&secs, &usecs);
-
-	ms = ((int) secs * 1000) + (usecs / 1000);
-
-	return ms;
+	return TimestampDifferenceMilliseconds(lastMsgSendTime,
+										   lastMsgReceiptTime);
 }
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index dc4b86ba683..86e6f4e433b 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -2079,8 +2079,6 @@ WalSndComputeSleeptime(TimestampTz now)
 	if (wal_sender_timeout > 0 && last_reply_timestamp > 0)
 	{
 		TimestampTz wakeup_time;
-		long		sec_to_timeout;
-		int			microsec_to_timeout;
 
 		/*
 		 * At the latest stop sleeping once wal_sender_timeout has been
@@ -2099,11 +2097,7 @@ WalSndComputeSleeptime(TimestampTz now)
 													  wal_sender_timeout / 2);
 
 		/* Compute relative time until wakeup. */
-		TimestampDifference(now, wakeup_time,
-							&sec_to_timeout, &microsec_to_timeout);
-
-		sleeptime = sec_to_timeout * 1000 +
-			microsec_to_timeout / 1000;
+		sleeptime = TimestampDifferenceMilliseconds(now, wakeup_time);
 	}
 
 	return sleeptime;
diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c
index a246ffa8036..3e7c77ee48f 100644
--- a/src/backend/utils/adt/timestamp.c
+++ b/src/backend/utils/adt/timestamp.c
@@ -1623,12 +1623,14 @@ timeofday(PG_FUNCTION_ARGS)
  * TimestampDifference -- convert the difference between two timestamps
  *		into integer seconds and microseconds
  *
+ * This is typically used to calculate a wait timeout for select(2),
+ * which explains the otherwise-odd choice of output format.
+ *
  * Both inputs must be ordinary finite timestamps (in current usage,
  * they'll be results from GetCurrentTimestamp()).
  *
- * We expect start_time <= stop_time.  If not, we return zeros; for current
- * callers there is no need to be tense about which way division rounds on
- * negative inputs.
+ * We expect start_time <= stop_time.  If not, we return zeros,
+ * since then we're already past the previously determined stop_time.
  */
 void
 TimestampDifference(TimestampTz start_time, TimestampTz stop_time,
@@ -1648,6 +1650,36 @@ TimestampDifference(TimestampTz start_time, TimestampTz stop_time,
 	}
 }
 
+/*
+ * TimestampDifferenceMilliseconds -- convert the difference between two
+ * 		timestamps into integer milliseconds
+ *
+ * This is typically used to calculate a wait timeout for WaitLatch()
+ * or a related function.  The choice of "long" as the result type
+ * is to harmonize with that.  It is caller's responsibility that the
+ * input timestamps not be so far apart as to risk overflow of "long"
+ * (which'd happen at about 25 days on machines with 32-bit "long").
+ *
+ * Both inputs must be ordinary finite timestamps (in current usage,
+ * they'll be results from GetCurrentTimestamp()).
+ *
+ * We expect start_time <= stop_time.  If not, we return zero,
+ * since then we're already past the previously determined stop_time.
+ *
+ * Note we round up any fractional millisecond, since waiting for just
+ * less than the intended timeout is undesirable.
+ */
+long
+TimestampDifferenceMilliseconds(TimestampTz start_time, TimestampTz stop_time)
+{
+	TimestampTz diff = stop_time - start_time;
+
+	if (diff <= 0)
+		return 0;
+	else
+		return (long) ((diff + 999) / 1000);
+}
+
 /*
  * TimestampDifferenceExceeds -- report whether the difference between two
  *		timestamps is >= a threshold (expressed in milliseconds)
diff --git a/src/include/utils/timestamp.h b/src/include/utils/timestamp.h
index ea16190ec3d..512f0c110ce 100644
--- a/src/include/utils/timestamp.h
+++ b/src/include/utils/timestamp.h
@@ -72,6 +72,8 @@ extern TimestampTz GetSQLCurrentTimestamp(int32 typmod);
 extern Timestamp GetSQLLocalTimestamp(int32 typmod);
 extern void TimestampDifference(TimestampTz start_time, TimestampTz stop_time,
 								long *secs, int *microsecs);
+extern long TimestampDifferenceMilliseconds(TimestampTz start_time,
+											TimestampTz stop_time);
 extern bool TimestampDifferenceExceeds(TimestampTz start_time,
 									   TimestampTz stop_time,
 									   int msec);