Ensure vacuum removes all visibly dead tuples older than OldestXmin
authorMelanie Plageman <[email protected]>
Fri, 19 Jul 2024 14:44:36 +0000 (10:44 -0400)
committerMelanie Plageman <[email protected]>
Fri, 19 Jul 2024 16:11:41 +0000 (12:11 -0400)
If vacuum fails to remove a tuple with xmax older than
VacuumCutoffs->OldestXmin and younger than GlobalVisState->maybe_needed,
it will loop infinitely in lazy_scan_prune(), which compares tuples'
visibility information to OldestXmin.

Starting in version 14, which uses GlobalVisState for visibility testing
during pruning, it is possible for GlobalVisState->maybe_needed to
precede OldestXmin if maybe_needed is forced to go backward while vacuum
is running. This can happen if a disconnected standby with a running
transaction older than VacuumCutoffs->OldestXmin reconnects to the
primary after vacuum initially calculates GlobalVisState and OldestXmin.

Fix this by having vacuum always remove tuples older than OldestXmin
during pruning. This is okay because the standby won't replay the tuple
removal until the tuple is removable. Thus, the worst that can happen is
a recovery conflict.

Fixes BUG# 17257

Back-patched in versions 14-17

Author: Melanie Plageman

Reviewed-by: Noah Misch, Peter Geoghegan, Robert Haas, Andres Freund, and Heikki Linnakangas
Discussion: https://postgr.es/m/CAAKRu_Y_NJzF4-8gzTTeaOuUL3CcGoXPjXcAHbTTygT8AyVqag%40mail.gmail.com

src/backend/access/heap/pruneheap.c
src/backend/access/heap/vacuumlazy.c
src/include/access/heapam.h

index 47b9e209154d5c74e168b4c0cafdfa9816b768e5..c2c20d4b3bc069545bb10f99a9d77d489cb78f85 100644 (file)
@@ -33,7 +33,8 @@ typedef struct
 {
    Relation    rel;
 
-   /* tuple visibility test, initialized for the relation */
+   /* State used to test tuple visibility; Initialized for the relation */
+   TransactionId oldest_xmin;
    GlobalVisState *vistest;
 
    /*
@@ -205,7 +206,8 @@ heap_page_prune_opt(Relation relation, Buffer buffer)
            int         ndeleted,
                        nnewlpdead;
 
-           ndeleted = heap_page_prune(relation, buffer, vistest, limited_xmin,
+           ndeleted = heap_page_prune(relation, buffer, InvalidTransactionId,
+                                      vistest, limited_xmin,
                                       limited_ts, &nnewlpdead, NULL);
 
            /*
@@ -247,11 +249,14 @@ heap_page_prune_opt(Relation relation, Buffer buffer)
  * also need to account for a reduction in the length of the line pointer
  * array following array truncation by us.
  *
- * vistest is used to distinguish whether tuples are DEAD or RECENTLY_DEAD
- * (see heap_prune_satisfies_vacuum and
- * HeapTupleSatisfiesVacuum). old_snap_xmin / old_snap_ts need to
- * either have been set by TransactionIdLimitedForOldSnapshots, or
- * InvalidTransactionId/0 respectively.
+ * vistest and oldest_xmin are used to distinguish whether tuples are DEAD or
+ * RECENTLY_DEAD (see heap_prune_satisfies_vacuum and
+ * HeapTupleSatisfiesVacuum). If oldest_xmin is provided by the caller, it is
+ * used before consulting GlobalVisState.
+ *
+ * old_snap_xmin / old_snap_ts need to either have been set by
+ * TransactionIdLimitedForOldSnapshots, or InvalidTransactionId/0
+ * respectively.
  *
  * Sets *nnewlpdead for caller, indicating the number of items that were
  * newly set LP_DEAD during prune operation.
@@ -263,6 +268,7 @@ heap_page_prune_opt(Relation relation, Buffer buffer)
  */
 int
 heap_page_prune(Relation relation, Buffer buffer,
+               TransactionId oldest_xmin,
                GlobalVisState *vistest,
                TransactionId old_snap_xmin,
                TimestampTz old_snap_ts,
@@ -290,6 +296,7 @@ heap_page_prune(Relation relation, Buffer buffer,
     */
    prstate.new_prune_xid = InvalidTransactionId;
    prstate.rel = relation;
+   prstate.oldest_xmin = oldest_xmin;
    prstate.vistest = vistest;
    prstate.old_snap_xmin = old_snap_xmin;
    prstate.old_snap_ts = old_snap_ts;
@@ -520,13 +527,31 @@ heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer)
    }
 
    /*
-    * First check if GlobalVisTestIsRemovableXid() is sufficient to find the
-    * row dead. If not, and old_snapshot_threshold is enabled, try to use the
-    * lowered horizon.
+    * For VACUUM, we must be sure to prune tuples with xmax older than
+    * oldest_xmin -- a visibility cutoff determined at the beginning of
+    * vacuuming the relation. oldest_xmin is used for freezing determination
+    * and we cannot freeze dead tuples' xmaxes.
+    */
+   if (TransactionIdIsValid(prstate->oldest_xmin) &&
+       NormalTransactionIdPrecedes(dead_after, prstate->oldest_xmin))
+       return HEAPTUPLE_DEAD;
+
+   /*
+    * Determine whether or not the tuple is considered dead when compared
+    * with the provided GlobalVisState. On-access pruning does not provide
+    * oldest_xmin. And for vacuum, even if the tuple's xmax is not older than
+    * oldest_xmin, GlobalVisTestIsRemovableXid() could find the row dead if
+    * the GlobalVisState has been updated since the beginning of vacuuming
+    * the relation.
     */
    if (GlobalVisTestIsRemovableXid(prstate->vistest, dead_after))
-       res = HEAPTUPLE_DEAD;
-   else if (OldSnapshotThresholdActive())
+       return HEAPTUPLE_DEAD;
+
+   /*
+    * If GlobalVisTestIsRemovableXid() is not sufficient to find the row dead
+    * and old_snapshot_threshold is enabled, try to use the lowered horizon.
+    */
+   if (OldSnapshotThresholdActive())
    {
        /* haven't determined limited horizon yet, requests */
        if (!TransactionIdIsValid(prstate->old_snap_xmin))
index c474b0611464a9f227d2cd975f0d98ad171f9222..9fa88960ada56ca9d1226ebf8e9c50fe4becbd8c 100644 (file)
@@ -1581,7 +1581,8 @@ retry:
     * lpdead_items's final value can be thought of as the number of tuples
     * that were deleted from indexes.
     */
-   tuples_deleted = heap_page_prune(rel, buf, vacrel->vistest,
+   tuples_deleted = heap_page_prune(rel, buf, vacrel->cutoffs.OldestXmin,
+                                    vacrel->vistest,
                                     InvalidTransactionId, 0, &nnewlpdead,
                                     &vacrel->offnum);
 
index faf50265191c76209718ead5bb86a9ffbef3af05..c7278219b24cb1079d1555dd032732dee18b67b8 100644 (file)
@@ -285,6 +285,7 @@ extern TransactionId heap_index_delete_tuples(Relation rel,
 struct GlobalVisState;
 extern void heap_page_prune_opt(Relation relation, Buffer buffer);
 extern int heap_page_prune(Relation relation, Buffer buffer,
+                           TransactionId oldest_xmin,
                            struct GlobalVisState *vistest,
                            TransactionId old_snap_xmin,
                            TimestampTz old_snap_ts,