From dc6354c67017a3fe60744fcfe4936d14d72f2d7c Mon Sep 17 00:00:00 2001 From: Melanie Plageman Date: Fri, 19 Jul 2024 11:06:03 -0400 Subject: [PATCH] Ensure vacuum removes all visibly dead tuples older than OldestXmin If vacuum fails to remove a tuple with xmax older than VacuumCutoffs->OldestXmin and younger than GlobalVisState->maybe_needed, it will loop infinitely in lazy_scan_prune(), which compares tuples' visibility information to OldestXmin. Starting in version 14, which uses GlobalVisState for visibility testing during pruning, it is possible for GlobalVisState->maybe_needed to precede OldestXmin if maybe_needed is forced to go backward while vacuum is running. This can happen if a disconnected standby with a running transaction older than VacuumCutoffs->OldestXmin reconnects to the primary after vacuum initially calculates GlobalVisState and OldestXmin. Fix this by having vacuum always remove tuples older than OldestXmin during pruning. This is okay because the standby won't replay the tuple removal until the tuple is removable. Thus, the worst that can happen is a recovery conflict. Fixes BUG# 17257 Back-patched in versions 14-17 Author: Melanie Plageman Reviewed-by: Noah Misch, Peter Geoghegan, Robert Haas, Andres Freund, and Heikki Linnakangas Discussion: https://postgr.es/m/CAAKRu_Y_NJzF4-8gzTTeaOuUL3CcGoXPjXcAHbTTygT8AyVqag%40mail.gmail.com --- src/backend/access/heap/pruneheap.c | 49 +++++++++++++++++++++------- src/backend/access/heap/vacuumlazy.c | 3 +- src/include/access/heapam.h | 1 + 3 files changed, 40 insertions(+), 13 deletions(-) diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c index 9f43bbe25f5..72ce130346c 100644 --- a/src/backend/access/heap/pruneheap.c +++ b/src/backend/access/heap/pruneheap.c @@ -33,7 +33,8 @@ typedef struct { Relation rel; - /* tuple visibility test, initialized for the relation */ + /* State used to test tuple visibility; Initialized for the relation */ + TransactionId oldest_xmin; GlobalVisState *vistest; /* @@ -206,7 +207,8 @@ heap_page_prune_opt(Relation relation, Buffer buffer) int ndeleted, nnewlpdead; - ndeleted = heap_page_prune(relation, buffer, vistest, limited_xmin, + ndeleted = heap_page_prune(relation, buffer, InvalidTransactionId, + vistest, limited_xmin, limited_ts, &nnewlpdead, NULL); /* @@ -248,11 +250,14 @@ heap_page_prune_opt(Relation relation, Buffer buffer) * also need to account for a reduction in the length of the line pointer * array following array truncation by us. * - * vistest is used to distinguish whether tuples are DEAD or RECENTLY_DEAD - * (see heap_prune_satisfies_vacuum and - * HeapTupleSatisfiesVacuum). old_snap_xmin / old_snap_ts need to - * either have been set by TransactionIdLimitedForOldSnapshots, or - * InvalidTransactionId/0 respectively. + * vistest and oldest_xmin are used to distinguish whether tuples are DEAD or + * RECENTLY_DEAD (see heap_prune_satisfies_vacuum and + * HeapTupleSatisfiesVacuum). If oldest_xmin is provided by the caller, it is + * used before consulting GlobalVisState. + * + * old_snap_xmin / old_snap_ts need to either have been set by + * TransactionIdLimitedForOldSnapshots, or InvalidTransactionId/0 + * respectively. * * Sets *nnewlpdead for caller, indicating the number of items that were * newly set LP_DEAD during prune operation. @@ -264,6 +269,7 @@ heap_page_prune_opt(Relation relation, Buffer buffer) */ int heap_page_prune(Relation relation, Buffer buffer, + TransactionId oldest_xmin, GlobalVisState *vistest, TransactionId old_snap_xmin, TimestampTz old_snap_ts, @@ -291,6 +297,7 @@ heap_page_prune(Relation relation, Buffer buffer, */ prstate.new_prune_xid = InvalidTransactionId; prstate.rel = relation; + prstate.oldest_xmin = oldest_xmin; prstate.vistest = vistest; prstate.old_snap_xmin = old_snap_xmin; prstate.old_snap_ts = old_snap_ts; @@ -520,13 +527,31 @@ heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer) } /* - * First check if GlobalVisTestIsRemovableXid() is sufficient to find the - * row dead. If not, and old_snapshot_threshold is enabled, try to use the - * lowered horizon. + * For VACUUM, we must be sure to prune tuples with xmax older than + * oldest_xmin -- a visibility cutoff determined at the beginning of + * vacuuming the relation. oldest_xmin is used for freezing determination + * and we cannot freeze dead tuples' xmaxes. + */ + if (TransactionIdIsValid(prstate->oldest_xmin) && + NormalTransactionIdPrecedes(dead_after, prstate->oldest_xmin)) + return HEAPTUPLE_DEAD; + + /* + * Determine whether or not the tuple is considered dead when compared + * with the provided GlobalVisState. On-access pruning does not provide + * oldest_xmin. And for vacuum, even if the tuple's xmax is not older than + * oldest_xmin, GlobalVisTestIsRemovableXid() could find the row dead if + * the GlobalVisState has been updated since the beginning of vacuuming + * the relation. */ if (GlobalVisTestIsRemovableXid(prstate->vistest, dead_after)) - res = HEAPTUPLE_DEAD; - else if (OldSnapshotThresholdActive()) + return HEAPTUPLE_DEAD; + + /* + * If GlobalVisTestIsRemovableXid() is not sufficient to find the row dead + * and old_snapshot_threshold is enabled, try to use the lowered horizon. + */ + if (OldSnapshotThresholdActive()) { /* haven't determined limited horizon yet, requests */ if (!TransactionIdIsValid(prstate->old_snap_xmin)) diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index 2e61e2f20b4..7537be5c1f6 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -1585,7 +1585,8 @@ retry: * lpdead_items's final value can be thought of as the number of tuples * that were deleted from indexes. */ - tuples_deleted = heap_page_prune(rel, buf, vacrel->vistest, + tuples_deleted = heap_page_prune(rel, buf, vacrel->OldestXmin, + vacrel->vistest, InvalidTransactionId, 0, &nnewlpdead, &vacrel->offnum); diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index abf62d9df79..50b9e729e63 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -185,6 +185,7 @@ extern TransactionId heap_index_delete_tuples(Relation rel, struct GlobalVisState; extern void heap_page_prune_opt(Relation relation, Buffer buffer); extern int heap_page_prune(Relation relation, Buffer buffer, + TransactionId oldest_xmin, struct GlobalVisState *vistest, TransactionId old_snap_xmin, TimestampTz old_snap_ts_ts,