Ensure vacuum removes all visibly dead tuples older than OldestXmin
authorMelanie Plageman <[email protected]>
Fri, 19 Jul 2024 15:16:51 +0000 (11:16 -0400)
committerMelanie Plageman <[email protected]>
Fri, 19 Jul 2024 16:07:53 +0000 (12:07 -0400)
If vacuum fails to remove a tuple with xmax older than
VacuumCutoffs->OldestXmin and younger than GlobalVisState->maybe_needed,
it will loop infinitely in lazy_scan_prune(), which compares tuples'
visibility information to OldestXmin.

Starting in version 14, which uses GlobalVisState for visibility testing
during pruning, it is possible for GlobalVisState->maybe_needed to
precede OldestXmin if maybe_needed is forced to go backward while vacuum
is running. This can happen if a disconnected standby with a running
transaction older than VacuumCutoffs->OldestXmin reconnects to the
primary after vacuum initially calculates GlobalVisState and OldestXmin.

Fix this by having vacuum always remove tuples older than OldestXmin
during pruning. This is okay because the standby won't replay the tuple
removal until the tuple is removable. Thus, the worst that can happen is
a recovery conflict.

Fixes BUG# 17257

Back-patched in versions 14-17

Author: Melanie Plageman
Reviewed-by: Noah Misch, Peter Geoghegan, Robert Haas, Andres Freund, and Heikki Linnakangas
Discussion: https://postgr.es/m/CAAKRu_Y_NJzF4-8gzTTeaOuUL3CcGoXPjXcAHbTTygT8AyVqag%40mail.gmail.com

src/backend/access/heap/pruneheap.c
src/backend/access/heap/vacuumlazy.c
src/include/access/heapam.h

index f7f8056d631d86739815f9dca2fdef9700871889..7d274de23014b40b988209e20155b312f10a55a7 100644 (file)
@@ -32,7 +32,8 @@ typedef struct
 {
    Relation    rel;
 
-   /* tuple visibility test, initialized for the relation */
+   /* State used to test tuple visibility; Initialized for the relation */
+   TransactionId oldest_xmin;
    GlobalVisState *vistest;
 
    /*
@@ -202,7 +203,8 @@ heap_page_prune_opt(Relation relation, Buffer buffer)
        if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
        {
            /* OK to prune */
-           (void) heap_page_prune(relation, buffer, vistest,
+           (void) heap_page_prune(relation, buffer, InvalidTransactionId,
+                                  vistest,
                                   limited_xmin, limited_ts,
                                   true, NULL);
        }
@@ -218,11 +220,14 @@ heap_page_prune_opt(Relation relation, Buffer buffer)
  *
  * Caller must have pin and buffer cleanup lock on the page.
  *
- * vistest is used to distinguish whether tuples are DEAD or RECENTLY_DEAD
- * (see heap_prune_satisfies_vacuum and
- * HeapTupleSatisfiesVacuum). old_snap_xmin / old_snap_ts need to
- * either have been set by TransactionIdLimitedForOldSnapshots, or
- * InvalidTransactionId/0 respectively.
+ * vistest and oldest_xmin are used to distinguish whether tuples are DEAD or
+ * RECENTLY_DEAD (see heap_prune_satisfies_vacuum and
+ * HeapTupleSatisfiesVacuum). If oldest_xmin is provided by the caller, it is
+ * used before consulting GlobalVisState.
+ *
+ * old_snap_xmin / old_snap_ts need to either have been set by
+ * TransactionIdLimitedForOldSnapshots, or InvalidTransactionId/0
+ * respectively.
  *
  * If report_stats is true then we send the number of reclaimed heap-only
  * tuples to pgstats.  (This must be false during vacuum, since vacuum will
@@ -236,6 +241,7 @@ heap_page_prune_opt(Relation relation, Buffer buffer)
  */
 int
 heap_page_prune(Relation relation, Buffer buffer,
+               TransactionId oldest_xmin,
                GlobalVisState *vistest,
                TransactionId old_snap_xmin,
                TimestampTz old_snap_ts,
@@ -262,6 +268,7 @@ heap_page_prune(Relation relation, Buffer buffer,
     */
    prstate.new_prune_xid = InvalidTransactionId;
    prstate.rel = relation;
+   prstate.oldest_xmin = oldest_xmin;
    prstate.vistest = vistest;
    prstate.old_snap_xmin = old_snap_xmin;
    prstate.old_snap_ts = old_snap_ts;
@@ -512,13 +519,31 @@ heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer)
    }
 
    /*
-    * First check if GlobalVisTestIsRemovableXid() is sufficient to find the
-    * row dead. If not, and old_snapshot_threshold is enabled, try to use the
-    * lowered horizon.
+    * For VACUUM, we must be sure to prune tuples with xmax older than
+    * oldest_xmin -- a visibility cutoff determined at the beginning of
+    * vacuuming the relation. oldest_xmin is used for freezing determination
+    * and we cannot freeze dead tuples' xmaxes.
+    */
+   if (TransactionIdIsValid(prstate->oldest_xmin) &&
+       NormalTransactionIdPrecedes(dead_after, prstate->oldest_xmin))
+       return HEAPTUPLE_DEAD;
+
+   /*
+    * Determine whether or not the tuple is considered dead when compared
+    * with the provided GlobalVisState. On-access pruning does not provide
+    * oldest_xmin. And for vacuum, even if the tuple's xmax is not older than
+    * oldest_xmin, GlobalVisTestIsRemovableXid() could find the row dead if
+    * the GlobalVisState has been updated since the beginning of vacuuming
+    * the relation.
     */
    if (GlobalVisTestIsRemovableXid(prstate->vistest, dead_after))
-       res = HEAPTUPLE_DEAD;
-   else if (OldSnapshotThresholdActive())
+       return HEAPTUPLE_DEAD;
+
+   /*
+    * If GlobalVisTestIsRemovableXid() is not sufficient to find the row dead
+    * and old_snapshot_threshold is enabled, try to use the lowered horizon.
+    */
+   if (OldSnapshotThresholdActive())
    {
        /* haven't determined limited horizon yet, requests */
        if (!TransactionIdIsValid(prstate->old_snap_xmin))
index 8aab6e324e038a05f2d01f133cf24a208ee9f77f..5898eae127f714d466148ca02dc2570a70c17533 100644 (file)
@@ -1738,7 +1738,7 @@ retry:
     * lpdead_items's final value can be thought of as the number of tuples
     * that were deleted from indexes.
     */
-   tuples_deleted = heap_page_prune(rel, buf, vistest,
+   tuples_deleted = heap_page_prune(rel, buf, vacrel->OldestXmin, vistest,
                                     InvalidTransactionId, 0, false,
                                     &vacrel->offnum);
 
index 4f1dff9ca1b06a2dd755bbbdbc917bda792145a3..b484db8fa571dc1c0d9ade9c93e10dd18dc86da9 100644 (file)
@@ -186,6 +186,7 @@ extern TransactionId heap_index_delete_tuples(Relation rel,
 struct GlobalVisState;
 extern void heap_page_prune_opt(Relation relation, Buffer buffer);
 extern int heap_page_prune(Relation relation, Buffer buffer,
+                           TransactionId oldest_xmin,
                            struct GlobalVisState *vistest,
                            TransactionId old_snap_xmin,
                            TimestampTz old_snap_ts_ts,