1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-05 07:21:24 +03:00

Remove retry loop in heap_page_prune().

The retry loop is needed because heap_page_prune() calls
HeapTupleSatisfiesVacuum() and then lazy_scan_prune() does the same
thing again, and they might get different answers due to concurrent
clog updates.  But this patch makes heap_page_prune() return the
HeapTupleSatisfiesVacuum() results that it computed back to the
caller, which allows lazy_scan_prune() to avoid needing to recompute
those values in the first place. That's nice both because it eliminates
the need for a retry loop and also because it's cheaper.

Melanie Plageman, reviewed by David Geier, Andres Freund, and me.

Discussion: https://postgr.es/m/CAAKRu_br124qsGJieuYA0nGjywEukhK1dKBfRdby_4yY3E9SXA%40mail.gmail.com
This commit is contained in:
Robert Haas
2023-10-02 11:40:07 -04:00
parent e64c733bb1
commit 1ccc1e05ae
3 changed files with 55 additions and 49 deletions

View File

@ -53,16 +53,6 @@ typedef struct
* 1. Otherwise every access would need to subtract 1. * 1. Otherwise every access would need to subtract 1.
*/ */
bool marked[MaxHeapTuplesPerPage + 1]; bool marked[MaxHeapTuplesPerPage + 1];
/*
* Tuple visibility is only computed once for each tuple, for correctness
* and efficiency reasons; see comment in heap_page_prune() for details.
* This is of type int8[], instead of HTSV_Result[], so we can use -1 to
* indicate no visibility has been computed, e.g. for LP_DEAD items.
*
* Same indexing as ->marked.
*/
int8 htsv[MaxHeapTuplesPerPage + 1];
} PruneState; } PruneState;
/* Local functions */ /* Local functions */
@ -71,6 +61,7 @@ static HTSV_Result heap_prune_satisfies_vacuum(PruneState *prstate,
Buffer buffer); Buffer buffer);
static int heap_prune_chain(Buffer buffer, static int heap_prune_chain(Buffer buffer,
OffsetNumber rootoffnum, OffsetNumber rootoffnum,
int8 *htsv,
PruneState *prstate); PruneState *prstate);
static void heap_prune_record_prunable(PruneState *prstate, TransactionId xid); static void heap_prune_record_prunable(PruneState *prstate, TransactionId xid);
static void heap_prune_record_redirect(PruneState *prstate, static void heap_prune_record_redirect(PruneState *prstate,
@ -240,6 +231,10 @@ heap_page_prune(Relation relation, Buffer buffer,
prstate.nredirected = prstate.ndead = prstate.nunused = 0; prstate.nredirected = prstate.ndead = prstate.nunused = 0;
memset(prstate.marked, 0, sizeof(prstate.marked)); memset(prstate.marked, 0, sizeof(prstate.marked));
/*
* presult->htsv is not initialized here because all ntuple spots in the
* array will be set either to a valid HTSV_Result value or -1.
*/
presult->ndeleted = 0; presult->ndeleted = 0;
presult->nnewlpdead = 0; presult->nnewlpdead = 0;
@ -276,7 +271,7 @@ heap_page_prune(Relation relation, Buffer buffer,
/* Nothing to do if slot doesn't contain a tuple */ /* Nothing to do if slot doesn't contain a tuple */
if (!ItemIdIsNormal(itemid)) if (!ItemIdIsNormal(itemid))
{ {
prstate.htsv[offnum] = -1; presult->htsv[offnum] = -1;
continue; continue;
} }
@ -292,8 +287,8 @@ heap_page_prune(Relation relation, Buffer buffer,
if (off_loc) if (off_loc)
*off_loc = offnum; *off_loc = offnum;
prstate.htsv[offnum] = heap_prune_satisfies_vacuum(&prstate, &tup, presult->htsv[offnum] = heap_prune_satisfies_vacuum(&prstate, &tup,
buffer); buffer);
} }
/* Scan the page */ /* Scan the page */
@ -317,7 +312,8 @@ heap_page_prune(Relation relation, Buffer buffer,
continue; continue;
/* Process this item or chain of items */ /* Process this item or chain of items */
presult->ndeleted += heap_prune_chain(buffer, offnum, &prstate); presult->ndeleted += heap_prune_chain(buffer, offnum,
presult->htsv, &prstate);
} }
/* Clear the offset information once we have processed the given page. */ /* Clear the offset information once we have processed the given page. */
@ -446,6 +442,8 @@ heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer)
/* /*
* Prune specified line pointer or a HOT chain originating at line pointer. * Prune specified line pointer or a HOT chain originating at line pointer.
* *
* Tuple visibility information is provided in htsv.
*
* If the item is an index-referenced tuple (i.e. not a heap-only tuple), * If the item is an index-referenced tuple (i.e. not a heap-only tuple),
* the HOT chain is pruned by removing all DEAD tuples at the start of the HOT * the HOT chain is pruned by removing all DEAD tuples at the start of the HOT
* chain. We also prune any RECENTLY_DEAD tuples preceding a DEAD tuple. * chain. We also prune any RECENTLY_DEAD tuples preceding a DEAD tuple.
@ -473,7 +471,8 @@ heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer)
* Returns the number of tuples (to be) deleted from the page. * Returns the number of tuples (to be) deleted from the page.
*/ */
static int static int
heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate) heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum,
int8 *htsv, PruneState *prstate)
{ {
int ndeleted = 0; int ndeleted = 0;
Page dp = (Page) BufferGetPage(buffer); Page dp = (Page) BufferGetPage(buffer);
@ -494,7 +493,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate)
*/ */
if (ItemIdIsNormal(rootlp)) if (ItemIdIsNormal(rootlp))
{ {
Assert(prstate->htsv[rootoffnum] != -1); Assert(htsv[rootoffnum] != -1);
htup = (HeapTupleHeader) PageGetItem(dp, rootlp); htup = (HeapTupleHeader) PageGetItem(dp, rootlp);
if (HeapTupleHeaderIsHeapOnly(htup)) if (HeapTupleHeaderIsHeapOnly(htup))
@ -517,7 +516,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate)
* either here or while following a chain below. Whichever path * either here or while following a chain below. Whichever path
* gets there first will mark the tuple unused. * gets there first will mark the tuple unused.
*/ */
if (prstate->htsv[rootoffnum] == HEAPTUPLE_DEAD && if (htsv[rootoffnum] == HEAPTUPLE_DEAD &&
!HeapTupleHeaderIsHotUpdated(htup)) !HeapTupleHeaderIsHotUpdated(htup))
{ {
heap_prune_record_unused(prstate, rootoffnum); heap_prune_record_unused(prstate, rootoffnum);
@ -585,7 +584,6 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate)
break; break;
Assert(ItemIdIsNormal(lp)); Assert(ItemIdIsNormal(lp));
Assert(prstate->htsv[offnum] != -1);
htup = (HeapTupleHeader) PageGetItem(dp, lp); htup = (HeapTupleHeader) PageGetItem(dp, lp);
/* /*
@ -605,7 +603,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate)
*/ */
tupdead = recent_dead = false; tupdead = recent_dead = false;
switch ((HTSV_Result) prstate->htsv[offnum]) switch (htsv_get_valid_status(htsv[offnum]))
{ {
case HEAPTUPLE_DEAD: case HEAPTUPLE_DEAD:
tupdead = true; tupdead = true;

View File

@ -1524,12 +1524,13 @@ lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno,
* of complexity just so we could deal with tuples that were DEAD to VACUUM, * of complexity just so we could deal with tuples that were DEAD to VACUUM,
* but nevertheless were left with storage after pruning. * but nevertheless were left with storage after pruning.
* *
* The approach we take now is to restart pruning when the race condition is * As of Postgres 17, we circumvent this problem altogether by reusing the
* detected. This allows heap_page_prune() to prune the tuples inserted by * result of heap_page_prune()'s visibility check. Without the second call to
* the now-aborted transaction. This is a little crude, but it guarantees * HeapTupleSatisfiesVacuum(), there is no new HTSV_Result and there can be no
* that any items that make it into the dead_items array are simple LP_DEAD * disagreement. We'll just handle such tuples as if they had become fully dead
* line pointers, and that every remaining item with tuple storage is * right after this operation completes instead of in the middle of it. Note that
* considered as a candidate for freezing. * any tuple that becomes dead after the call to heap_page_prune() can't need to
* be frozen, because it was visible to another session when vacuum started.
*/ */
static void static void
lazy_scan_prune(LVRelState *vacrel, lazy_scan_prune(LVRelState *vacrel,
@ -1542,8 +1543,6 @@ lazy_scan_prune(LVRelState *vacrel,
OffsetNumber offnum, OffsetNumber offnum,
maxoff; maxoff;
ItemId itemid; ItemId itemid;
HeapTupleData tuple;
HTSV_Result res;
PruneResult presult; PruneResult presult;
int tuples_frozen, int tuples_frozen,
lpdead_items, lpdead_items,
@ -1563,8 +1562,6 @@ lazy_scan_prune(LVRelState *vacrel,
*/ */
maxoff = PageGetMaxOffsetNumber(page); maxoff = PageGetMaxOffsetNumber(page);
retry:
/* Initialize (or reset) page-level state */ /* Initialize (or reset) page-level state */
pagefrz.freeze_required = false; pagefrz.freeze_required = false;
pagefrz.FreezePageRelfrozenXid = vacrel->NewRelfrozenXid; pagefrz.FreezePageRelfrozenXid = vacrel->NewRelfrozenXid;
@ -1600,6 +1597,7 @@ retry:
offnum <= maxoff; offnum <= maxoff;
offnum = OffsetNumberNext(offnum)) offnum = OffsetNumberNext(offnum))
{ {
HeapTupleHeader htup;
bool totally_frozen; bool totally_frozen;
/* /*
@ -1642,22 +1640,7 @@ retry:
Assert(ItemIdIsNormal(itemid)); Assert(ItemIdIsNormal(itemid));
ItemPointerSet(&(tuple.t_self), blkno, offnum); htup = (HeapTupleHeader) PageGetItem(page, itemid);
tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
tuple.t_len = ItemIdGetLength(itemid);
tuple.t_tableOid = RelationGetRelid(rel);
/*
* DEAD tuples are almost always pruned into LP_DEAD line pointers by
* heap_page_prune(), but it's possible that the tuple state changed
* since heap_page_prune() looked. Handle that here by restarting.
* (See comments at the top of function for a full explanation.)
*/
res = HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin,
buf);
if (unlikely(res == HEAPTUPLE_DEAD))
goto retry;
/* /*
* The criteria for counting a tuple as live in this block need to * The criteria for counting a tuple as live in this block need to
@ -1678,7 +1661,7 @@ retry:
* (Cases where we bypass index vacuuming will violate this optimistic * (Cases where we bypass index vacuuming will violate this optimistic
* assumption, but the overall impact of that should be negligible.) * assumption, but the overall impact of that should be negligible.)
*/ */
switch (res) switch (htsv_get_valid_status(presult.htsv[offnum]))
{ {
case HEAPTUPLE_LIVE: case HEAPTUPLE_LIVE:
@ -1700,7 +1683,7 @@ retry:
{ {
TransactionId xmin; TransactionId xmin;
if (!HeapTupleHeaderXminCommitted(tuple.t_data)) if (!HeapTupleHeaderXminCommitted(htup))
{ {
prunestate->all_visible = false; prunestate->all_visible = false;
break; break;
@ -1710,7 +1693,7 @@ retry:
* The inserter definitely committed. But is it old enough * The inserter definitely committed. But is it old enough
* that everyone sees it as committed? * that everyone sees it as committed?
*/ */
xmin = HeapTupleHeaderGetXmin(tuple.t_data); xmin = HeapTupleHeaderGetXmin(htup);
if (!TransactionIdPrecedes(xmin, if (!TransactionIdPrecedes(xmin,
vacrel->cutoffs.OldestXmin)) vacrel->cutoffs.OldestXmin))
{ {
@ -1764,7 +1747,7 @@ retry:
prunestate->hastup = true; /* page makes rel truncation unsafe */ prunestate->hastup = true; /* page makes rel truncation unsafe */
/* Tuple with storage -- consider need to freeze */ /* Tuple with storage -- consider need to freeze */
if (heap_prepare_freeze_tuple(tuple.t_data, &vacrel->cutoffs, &pagefrz, if (heap_prepare_freeze_tuple(htup, &vacrel->cutoffs, &pagefrz,
&frozen[tuples_frozen], &totally_frozen)) &frozen[tuples_frozen], &totally_frozen))
{ {
/* Save prepared freeze plan for later */ /* Save prepared freeze plan for later */

View File

@ -198,8 +198,33 @@ typedef struct PruneResult
{ {
int ndeleted; /* Number of tuples deleted from the page */ int ndeleted; /* Number of tuples deleted from the page */
int nnewlpdead; /* Number of newly LP_DEAD items */ int nnewlpdead; /* Number of newly LP_DEAD items */
/*
* Tuple visibility is only computed once for each tuple, for correctness
* and efficiency reasons; see comment in heap_page_prune() for details.
* This is of type int8[], instead of HTSV_Result[], so we can use -1 to
* indicate no visibility has been computed, e.g. for LP_DEAD items.
*
* This needs to be MaxHeapTuplesPerPage + 1 long as FirstOffsetNumber is
* 1. Otherwise every access would need to subtract 1.
*/
int8 htsv[MaxHeapTuplesPerPage + 1];
} PruneResult; } PruneResult;
/*
* Pruning calculates tuple visibility once and saves the results in an array
* of int8. See PruneResult.htsv for details. This helper function is meant to
* guard against examining visibility status array members which have not yet
* been computed.
*/
static inline HTSV_Result
htsv_get_valid_status(int status)
{
Assert(status >= HEAPTUPLE_DEAD &&
status <= HEAPTUPLE_DELETE_IN_PROGRESS);
return (HTSV_Result) status;
}
/* ---------------- /* ----------------
* function prototypes for heap access method * function prototypes for heap access method
* *