Fix and simplify some usages of TimestampDifference().
authorTom Lane <[email protected]>
Wed, 11 Nov 2020 03:51:19 +0000 (22:51 -0500)
committerTom Lane <[email protected]>
Wed, 11 Nov 2020 03:51:58 +0000 (22:51 -0500)
Introduce TimestampDifferenceMilliseconds() to simplify callers
that would rather have the difference in milliseconds, instead of
the select()-oriented seconds-and-microseconds format.  This gets
rid of at least one integer division per call, and it eliminates
some apparently-easy-to-mess-up arithmetic.

Two of these call sites were in fact wrong:

* pg_prewarm's autoprewarm_main() forgot to multiply the seconds
by 1000, thus ending up with a delay 1000X shorter than intended.
That doesn't quite make it a busy-wait, but close.

* postgres_fdw's pgfdw_get_cleanup_result() thought it needed to compute
microseconds not milliseconds, thus ending up with a delay 1000X longer
than intended.  Somebody along the way had noticed this problem but
misdiagnosed the cause, and imposed an ad-hoc 60-second limit rather
than fixing the units.  This was relatively harmless in context, because
we don't care that much about exactly how long this delay is; still,
it's wrong.

There are a few more callers of TimestampDifference() that don't
have a direct need for seconds-and-microseconds, but can't use
TimestampDifferenceMilliseconds() either because they do need
microsecond precision or because they might possibly deal with
intervals long enough to overflow 32-bit milliseconds.  It might be
worth inventing another API to improve that, but that seems outside
the scope of this patch; so those callers are untouched here.

Given the fact that we are fixing some bugs, and the likelihood
that future patches might want to back-patch code that uses this
new API, back-patch to all supported branches.

Alexey Kondratov and Tom Lane

Discussion: https://postgr.es/m/3b1c053a21c07c1ed5e00be3b2b855ef@postgrespro.ru

contrib/postgres_fdw/connection.c
src/backend/access/transam/xlog.c
src/backend/replication/walreceiverfuncs.c
src/backend/replication/walsender.c
src/backend/utils/adt/timestamp.c
src/include/utils/timestamp.h

index 2ee2744064d6fd434e79c07485f753a386a8c137..7e35c3a8704d7eb5fd21f19de121755054d83005 100644 (file)
@@ -1153,20 +1153,15 @@ pgfdw_get_cleanup_result(PGconn *conn, TimestampTz endtime, PGresult **result)
            {
                int         wc;
                TimestampTz now = GetCurrentTimestamp();
-               long        secs;
-               int         microsecs;
                long        cur_timeout;
 
                /* If timeout has expired, give up, else get sleep time. */
-               if (now >= endtime)
+               cur_timeout = TimestampDifferenceMilliseconds(now, endtime);
+               if (cur_timeout <= 0)
                {
                    timed_out = true;
                    goto exit;
                }
-               TimestampDifference(now, endtime, &secs, &microsecs);
-
-               /* To protect against clock skew, limit sleep to one minute. */
-               cur_timeout = Min(60000, secs * USECS_PER_SEC + microsecs);
 
                /* Sleep until there's something to do */
                wc = WaitLatchOrSocket(MyLatch,
index f6261eb0e1da91dd56050c2bab15ab4d7753f693..f5a1599cb374a242f0cbdd57e56633011766b424 100644 (file)
@@ -5765,8 +5765,7 @@ recoveryApplyDelay(XLogReaderState *record)
 {
    uint8       xact_info;
    TimestampTz xtime;
-   long        secs;
-   int         microsecs;
+   long        msecs;
 
    /* nothing to do if no delay configured */
    if (recovery_min_apply_delay <= 0)
@@ -5803,9 +5802,9 @@ recoveryApplyDelay(XLogReaderState *record)
     * Exit without arming the latch if it's already past time to apply this
     * record
     */
-   TimestampDifference(GetCurrentTimestamp(), recoveryDelayUntilTime,
-                       &secs, &microsecs);
-   if (secs <= 0 && microsecs <= 0)
+   msecs = TimestampDifferenceMilliseconds(GetCurrentTimestamp(),
+                                           recoveryDelayUntilTime);
+   if (msecs <= 0)
        return false;
 
    while (true)
@@ -5822,19 +5821,17 @@ recoveryApplyDelay(XLogReaderState *record)
         * Wait for difference between GetCurrentTimestamp() and
         * recoveryDelayUntilTime
         */
-       TimestampDifference(GetCurrentTimestamp(), recoveryDelayUntilTime,
-                           &secs, &microsecs);
+       msecs = TimestampDifferenceMilliseconds(GetCurrentTimestamp(),
+                                               recoveryDelayUntilTime);
 
-       /* NB: We're ignoring waits below min_apply_delay's resolution. */
-       if (secs <= 0 && microsecs / 1000 <= 0)
+       if (msecs <= 0)
            break;
 
-       elog(DEBUG2, "recovery apply delay %ld seconds, %d milliseconds",
-            secs, microsecs / 1000);
+       elog(DEBUG2, "recovery apply delay %ld milliseconds", msecs);
 
-       WaitLatch(&XLogCtl->recoveryWakeupLatch,
-                 WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
-                 secs * 1000L + microsecs / 1000);
+       (void) WaitLatch(&XLogCtl->recoveryWakeupLatch,
+                        WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
+                        msecs);
    }
    return true;
 }
@@ -8165,33 +8162,24 @@ LogCheckpointStart(int flags, bool restartpoint)
 static void
 LogCheckpointEnd(bool restartpoint)
 {
-   long        write_secs,
-               sync_secs,
-               total_secs,
-               longest_secs,
-               average_secs;
-   int         write_usecs,
-               sync_usecs,
-               total_usecs,
-               longest_usecs,
-               average_usecs;
+   long        write_msecs,
+               sync_msecs,
+               total_msecs,
+               longest_msecs,
+               average_msecs;
    uint64      average_sync_time;
 
    CheckpointStats.ckpt_end_t = GetCurrentTimestamp();
 
-   TimestampDifference(CheckpointStats.ckpt_write_t,
-                       CheckpointStats.ckpt_sync_t,
-                       &write_secs, &write_usecs);
+   write_msecs = TimestampDifferenceMilliseconds(CheckpointStats.ckpt_write_t,
+                                                 CheckpointStats.ckpt_sync_t);
 
-   TimestampDifference(CheckpointStats.ckpt_sync_t,
-                       CheckpointStats.ckpt_sync_end_t,
-                       &sync_secs, &sync_usecs);
+   sync_msecs = TimestampDifferenceMilliseconds(CheckpointStats.ckpt_sync_t,
+                                                CheckpointStats.ckpt_sync_end_t);
 
    /* Accumulate checkpoint timing summary data, in milliseconds. */
-   BgWriterStats.m_checkpoint_write_time +=
-       write_secs * 1000 + write_usecs / 1000;
-   BgWriterStats.m_checkpoint_sync_time +=
-       sync_secs * 1000 + sync_usecs / 1000;
+   BgWriterStats.m_checkpoint_write_time += write_msecs;
+   BgWriterStats.m_checkpoint_sync_time += sync_msecs;
 
    /*
     * All of the published timing statistics are accounted for.  Only
@@ -8200,25 +8188,20 @@ LogCheckpointEnd(bool restartpoint)
    if (!log_checkpoints)
        return;
 
-   TimestampDifference(CheckpointStats.ckpt_start_t,
-                       CheckpointStats.ckpt_end_t,
-                       &total_secs, &total_usecs);
+   total_msecs = TimestampDifferenceMilliseconds(CheckpointStats.ckpt_start_t,
+                                                 CheckpointStats.ckpt_end_t);
 
    /*
     * Timing values returned from CheckpointStats are in microseconds.
-    * Convert to the second plus microsecond form that TimestampDifference
-    * returns for homogeneous printing.
+    * Convert to milliseconds for consistent printing.
     */
-   longest_secs = (long) (CheckpointStats.ckpt_longest_sync / 1000000);
-   longest_usecs = CheckpointStats.ckpt_longest_sync -
-       (uint64) longest_secs *1000000;
+   longest_msecs = (long) ((CheckpointStats.ckpt_longest_sync + 999) / 1000);
 
    average_sync_time = 0;
    if (CheckpointStats.ckpt_sync_rels > 0)
        average_sync_time = CheckpointStats.ckpt_agg_sync_time /
            CheckpointStats.ckpt_sync_rels;
-   average_secs = (long) (average_sync_time / 1000000);
-   average_usecs = average_sync_time - (uint64) average_secs *1000000;
+   average_msecs = (long) ((average_sync_time + 999) / 1000);
 
    elog(LOG, "%s complete: wrote %d buffers (%.1f%%); "
         "%d transaction log file(s) added, %d removed, %d recycled; "
@@ -8231,12 +8214,12 @@ LogCheckpointEnd(bool restartpoint)
         CheckpointStats.ckpt_segs_added,
         CheckpointStats.ckpt_segs_removed,
         CheckpointStats.ckpt_segs_recycled,
-        write_secs, write_usecs / 1000,
-        sync_secs, sync_usecs / 1000,
-        total_secs, total_usecs / 1000,
+        write_msecs / 1000, (int) (write_msecs % 1000),
+        sync_msecs / 1000, (int) (sync_msecs % 1000),
+        total_msecs / 1000, (int) (total_msecs % 1000),
         CheckpointStats.ckpt_sync_rels,
-        longest_secs, longest_usecs / 1000,
-        average_secs, average_usecs / 1000,
+        longest_msecs / 1000, (int) (longest_msecs % 1000),
+        average_msecs / 1000, (int) (average_msecs % 1000),
         (int) (PrevCheckPointDistance / 1024.0),
         (int) (CheckPointDistanceEstimate / 1024.0));
 }
@@ -11619,13 +11602,10 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
                    if (!TimestampDifferenceExceeds(last_fail_time, now,
                                                wal_retrieve_retry_interval))
                    {
-                       long        secs,
-                                   wait_time;
-                       int         usecs;
+                       long        wait_time;
 
-                       TimestampDifference(last_fail_time, now, &secs, &usecs);
                        wait_time = wal_retrieve_retry_interval -
-                           (secs * 1000 + usecs / 1000);
+                           TimestampDifferenceMilliseconds(last_fail_time, now);
 
                        WaitLatch(&XLogCtl->recoveryWakeupLatch,
                             WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
index 741899bf46b7967557d83bfd180e3e287e5e86d7..8f5d336142b646fac7372a3f3b20d213003b4686 100644 (file)
@@ -325,10 +325,6 @@ GetReplicationApplyDelay(void)
 
    XLogRecPtr  receivePtr;
    XLogRecPtr  replayPtr;
-
-   long        secs;
-   int         usecs;
-
    TimestampTz chunkReplayStartTime;
 
    SpinLockAcquire(&walrcv->mutex);
@@ -345,11 +341,8 @@ GetReplicationApplyDelay(void)
    if (chunkReplayStartTime == 0)
        return -1;
 
-   TimestampDifference(chunkReplayStartTime,
-                       GetCurrentTimestamp(),
-                       &secs, &usecs);
-
-   return (((int) secs * 1000) + (usecs / 1000));
+   return TimestampDifferenceMilliseconds(chunkReplayStartTime,
+                                          GetCurrentTimestamp());
 }
 
 /*
@@ -361,24 +354,14 @@ GetReplicationTransferLatency(void)
 {
    /* use volatile pointer to prevent code rearrangement */
    volatile WalRcvData *walrcv = WalRcv;
-
    TimestampTz lastMsgSendTime;
    TimestampTz lastMsgReceiptTime;
 
-   long        secs = 0;
-   int         usecs = 0;
-   int         ms;
-
    SpinLockAcquire(&walrcv->mutex);
    lastMsgSendTime = walrcv->lastMsgSendTime;
    lastMsgReceiptTime = walrcv->lastMsgReceiptTime;
    SpinLockRelease(&walrcv->mutex);
 
-   TimestampDifference(lastMsgSendTime,
-                       lastMsgReceiptTime,
-                       &secs, &usecs);
-
-   ms = ((int) secs * 1000) + (usecs / 1000);
-
-   return ms;
+   return TimestampDifferenceMilliseconds(lastMsgSendTime,
+                                          lastMsgReceiptTime);
 }
index e8523e8ced81e0736026c464c91138e82d192ddc..d65dd98a579faf1d97e3b95f27affb0a3d5924b9 100644 (file)
@@ -1781,8 +1781,6 @@ WalSndComputeSleeptime(TimestampTz now)
    if (wal_sender_timeout > 0 && last_reply_timestamp > 0)
    {
        TimestampTz wakeup_time;
-       long        sec_to_timeout;
-       int         microsec_to_timeout;
 
        /*
         * At the latest stop sleeping once wal_sender_timeout has been
@@ -1801,11 +1799,7 @@ WalSndComputeSleeptime(TimestampTz now)
                                                      wal_sender_timeout / 2);
 
        /* Compute relative time until wakeup. */
-       TimestampDifference(now, wakeup_time,
-                           &sec_to_timeout, &microsec_to_timeout);
-
-       sleeptime = sec_to_timeout * 1000 +
-           microsec_to_timeout / 1000;
+       sleeptime = TimestampDifferenceMilliseconds(now, wakeup_time);
    }
 
    return sleeptime;
index 76151e0d0c2cf20408c4e32ad8910c5366118140..c6d43bbe01c40152f2ae6eb25ff4e42b980ac0bb 100644 (file)
@@ -1671,12 +1671,14 @@ IntegerTimestampToTimestampTz(int64 timestamp)
  * TimestampDifference -- convert the difference between two timestamps
  *     into integer seconds and microseconds
  *
+ * This is typically used to calculate a wait timeout for select(2),
+ * which explains the otherwise-odd choice of output format.
+ *
  * Both inputs must be ordinary finite timestamps (in current usage,
  * they'll be results from GetCurrentTimestamp()).
  *
- * We expect start_time <= stop_time.  If not, we return zeroes; for current
- * callers there is no need to be tense about which way division rounds on
- * negative inputs.
+ * We expect start_time <= stop_time.  If not, we return zeros,
+ * since then we're already past the previously determined stop_time.
  */
 void
 TimestampDifference(TimestampTz start_time, TimestampTz stop_time,
@@ -1701,6 +1703,36 @@ TimestampDifference(TimestampTz start_time, TimestampTz stop_time,
    }
 }
 
+/*
+ * TimestampDifferenceMilliseconds -- convert the difference between two
+ *         timestamps into integer milliseconds
+ *
+ * This is typically used to calculate a wait timeout for WaitLatch()
+ * or a related function.  The choice of "long" as the result type
+ * is to harmonize with that.  It is caller's responsibility that the
+ * input timestamps not be so far apart as to risk overflow of "long"
+ * (which'd happen at about 25 days on machines with 32-bit "long").
+ *
+ * Both inputs must be ordinary finite timestamps (in current usage,
+ * they'll be results from GetCurrentTimestamp()).
+ *
+ * We expect start_time <= stop_time.  If not, we return zero,
+ * since then we're already past the previously determined stop_time.
+ *
+ * Note we round up any fractional millisecond, since waiting for just
+ * less than the intended timeout is undesirable.
+ */
+long
+TimestampDifferenceMilliseconds(TimestampTz start_time, TimestampTz stop_time)
+{
+   TimestampTz diff = stop_time - start_time;
+
+   if (diff <= 0)
+       return 0;
+   else
+       return (long) ((diff + 999) / 1000);
+}
+
 /*
  * TimestampDifferenceExceeds -- report whether the difference between two
  *     timestamps is >= a threshold (expressed in milliseconds)
index 530fef1915e4a5e8d8a5ff38d47820e0bcca1b94..2ed0ca0a0ec6cf722f29e40b1efc6688708e3860 100644 (file)
@@ -216,6 +216,8 @@ extern Datum generate_series_timestamptz(PG_FUNCTION_ARGS);
 extern TimestampTz GetCurrentTimestamp(void);
 extern void TimestampDifference(TimestampTz start_time, TimestampTz stop_time,
                    long *secs, int *microsecs);
+extern long TimestampDifferenceMilliseconds(TimestampTz start_time,
+                                           TimestampTz stop_time);
 extern bool TimestampDifferenceExceeds(TimestampTz start_time,
                           TimestampTz stop_time,
                           int msec);