mirror of
https://github.com/postgres/postgres.git
synced 2025-07-05 07:21:24 +03:00
Remove retry loop in heap_page_prune().
The retry loop is needed because heap_page_prune() calls HeapTupleSatisfiesVacuum() and then lazy_scan_prune() does the same thing again, and they might get different answers due to concurrent clog updates. But this patch makes heap_page_prune() return the HeapTupleSatisfiesVacuum() results that it computed back to the caller, which allows lazy_scan_prune() to avoid needing to recompute those values in the first place. That's nice both because it eliminates the need for a retry loop and also because it's cheaper. Melanie Plageman, reviewed by David Geier, Andres Freund, and me. Discussion: https://postgr.es/m/CAAKRu_br124qsGJieuYA0nGjywEukhK1dKBfRdby_4yY3E9SXA%40mail.gmail.com
This commit is contained in:
@ -53,16 +53,6 @@ typedef struct
|
|||||||
* 1. Otherwise every access would need to subtract 1.
|
* 1. Otherwise every access would need to subtract 1.
|
||||||
*/
|
*/
|
||||||
bool marked[MaxHeapTuplesPerPage + 1];
|
bool marked[MaxHeapTuplesPerPage + 1];
|
||||||
|
|
||||||
/*
|
|
||||||
* Tuple visibility is only computed once for each tuple, for correctness
|
|
||||||
* and efficiency reasons; see comment in heap_page_prune() for details.
|
|
||||||
* This is of type int8[], instead of HTSV_Result[], so we can use -1 to
|
|
||||||
* indicate no visibility has been computed, e.g. for LP_DEAD items.
|
|
||||||
*
|
|
||||||
* Same indexing as ->marked.
|
|
||||||
*/
|
|
||||||
int8 htsv[MaxHeapTuplesPerPage + 1];
|
|
||||||
} PruneState;
|
} PruneState;
|
||||||
|
|
||||||
/* Local functions */
|
/* Local functions */
|
||||||
@ -71,6 +61,7 @@ static HTSV_Result heap_prune_satisfies_vacuum(PruneState *prstate,
|
|||||||
Buffer buffer);
|
Buffer buffer);
|
||||||
static int heap_prune_chain(Buffer buffer,
|
static int heap_prune_chain(Buffer buffer,
|
||||||
OffsetNumber rootoffnum,
|
OffsetNumber rootoffnum,
|
||||||
|
int8 *htsv,
|
||||||
PruneState *prstate);
|
PruneState *prstate);
|
||||||
static void heap_prune_record_prunable(PruneState *prstate, TransactionId xid);
|
static void heap_prune_record_prunable(PruneState *prstate, TransactionId xid);
|
||||||
static void heap_prune_record_redirect(PruneState *prstate,
|
static void heap_prune_record_redirect(PruneState *prstate,
|
||||||
@ -240,6 +231,10 @@ heap_page_prune(Relation relation, Buffer buffer,
|
|||||||
prstate.nredirected = prstate.ndead = prstate.nunused = 0;
|
prstate.nredirected = prstate.ndead = prstate.nunused = 0;
|
||||||
memset(prstate.marked, 0, sizeof(prstate.marked));
|
memset(prstate.marked, 0, sizeof(prstate.marked));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* presult->htsv is not initialized here because all ntuple spots in the
|
||||||
|
* array will be set either to a valid HTSV_Result value or -1.
|
||||||
|
*/
|
||||||
presult->ndeleted = 0;
|
presult->ndeleted = 0;
|
||||||
presult->nnewlpdead = 0;
|
presult->nnewlpdead = 0;
|
||||||
|
|
||||||
@ -276,7 +271,7 @@ heap_page_prune(Relation relation, Buffer buffer,
|
|||||||
/* Nothing to do if slot doesn't contain a tuple */
|
/* Nothing to do if slot doesn't contain a tuple */
|
||||||
if (!ItemIdIsNormal(itemid))
|
if (!ItemIdIsNormal(itemid))
|
||||||
{
|
{
|
||||||
prstate.htsv[offnum] = -1;
|
presult->htsv[offnum] = -1;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -292,8 +287,8 @@ heap_page_prune(Relation relation, Buffer buffer,
|
|||||||
if (off_loc)
|
if (off_loc)
|
||||||
*off_loc = offnum;
|
*off_loc = offnum;
|
||||||
|
|
||||||
prstate.htsv[offnum] = heap_prune_satisfies_vacuum(&prstate, &tup,
|
presult->htsv[offnum] = heap_prune_satisfies_vacuum(&prstate, &tup,
|
||||||
buffer);
|
buffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Scan the page */
|
/* Scan the page */
|
||||||
@ -317,7 +312,8 @@ heap_page_prune(Relation relation, Buffer buffer,
|
|||||||
continue;
|
continue;
|
||||||
|
|
||||||
/* Process this item or chain of items */
|
/* Process this item or chain of items */
|
||||||
presult->ndeleted += heap_prune_chain(buffer, offnum, &prstate);
|
presult->ndeleted += heap_prune_chain(buffer, offnum,
|
||||||
|
presult->htsv, &prstate);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Clear the offset information once we have processed the given page. */
|
/* Clear the offset information once we have processed the given page. */
|
||||||
@ -446,6 +442,8 @@ heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer)
|
|||||||
/*
|
/*
|
||||||
* Prune specified line pointer or a HOT chain originating at line pointer.
|
* Prune specified line pointer or a HOT chain originating at line pointer.
|
||||||
*
|
*
|
||||||
|
* Tuple visibility information is provided in htsv.
|
||||||
|
*
|
||||||
* If the item is an index-referenced tuple (i.e. not a heap-only tuple),
|
* If the item is an index-referenced tuple (i.e. not a heap-only tuple),
|
||||||
* the HOT chain is pruned by removing all DEAD tuples at the start of the HOT
|
* the HOT chain is pruned by removing all DEAD tuples at the start of the HOT
|
||||||
* chain. We also prune any RECENTLY_DEAD tuples preceding a DEAD tuple.
|
* chain. We also prune any RECENTLY_DEAD tuples preceding a DEAD tuple.
|
||||||
@ -473,7 +471,8 @@ heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer)
|
|||||||
* Returns the number of tuples (to be) deleted from the page.
|
* Returns the number of tuples (to be) deleted from the page.
|
||||||
*/
|
*/
|
||||||
static int
|
static int
|
||||||
heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate)
|
heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum,
|
||||||
|
int8 *htsv, PruneState *prstate)
|
||||||
{
|
{
|
||||||
int ndeleted = 0;
|
int ndeleted = 0;
|
||||||
Page dp = (Page) BufferGetPage(buffer);
|
Page dp = (Page) BufferGetPage(buffer);
|
||||||
@ -494,7 +493,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate)
|
|||||||
*/
|
*/
|
||||||
if (ItemIdIsNormal(rootlp))
|
if (ItemIdIsNormal(rootlp))
|
||||||
{
|
{
|
||||||
Assert(prstate->htsv[rootoffnum] != -1);
|
Assert(htsv[rootoffnum] != -1);
|
||||||
htup = (HeapTupleHeader) PageGetItem(dp, rootlp);
|
htup = (HeapTupleHeader) PageGetItem(dp, rootlp);
|
||||||
|
|
||||||
if (HeapTupleHeaderIsHeapOnly(htup))
|
if (HeapTupleHeaderIsHeapOnly(htup))
|
||||||
@ -517,7 +516,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate)
|
|||||||
* either here or while following a chain below. Whichever path
|
* either here or while following a chain below. Whichever path
|
||||||
* gets there first will mark the tuple unused.
|
* gets there first will mark the tuple unused.
|
||||||
*/
|
*/
|
||||||
if (prstate->htsv[rootoffnum] == HEAPTUPLE_DEAD &&
|
if (htsv[rootoffnum] == HEAPTUPLE_DEAD &&
|
||||||
!HeapTupleHeaderIsHotUpdated(htup))
|
!HeapTupleHeaderIsHotUpdated(htup))
|
||||||
{
|
{
|
||||||
heap_prune_record_unused(prstate, rootoffnum);
|
heap_prune_record_unused(prstate, rootoffnum);
|
||||||
@ -585,7 +584,6 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate)
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
Assert(ItemIdIsNormal(lp));
|
Assert(ItemIdIsNormal(lp));
|
||||||
Assert(prstate->htsv[offnum] != -1);
|
|
||||||
htup = (HeapTupleHeader) PageGetItem(dp, lp);
|
htup = (HeapTupleHeader) PageGetItem(dp, lp);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -605,7 +603,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate)
|
|||||||
*/
|
*/
|
||||||
tupdead = recent_dead = false;
|
tupdead = recent_dead = false;
|
||||||
|
|
||||||
switch ((HTSV_Result) prstate->htsv[offnum])
|
switch (htsv_get_valid_status(htsv[offnum]))
|
||||||
{
|
{
|
||||||
case HEAPTUPLE_DEAD:
|
case HEAPTUPLE_DEAD:
|
||||||
tupdead = true;
|
tupdead = true;
|
||||||
|
@ -1524,12 +1524,13 @@ lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno,
|
|||||||
* of complexity just so we could deal with tuples that were DEAD to VACUUM,
|
* of complexity just so we could deal with tuples that were DEAD to VACUUM,
|
||||||
* but nevertheless were left with storage after pruning.
|
* but nevertheless were left with storage after pruning.
|
||||||
*
|
*
|
||||||
* The approach we take now is to restart pruning when the race condition is
|
* As of Postgres 17, we circumvent this problem altogether by reusing the
|
||||||
* detected. This allows heap_page_prune() to prune the tuples inserted by
|
* result of heap_page_prune()'s visibility check. Without the second call to
|
||||||
* the now-aborted transaction. This is a little crude, but it guarantees
|
* HeapTupleSatisfiesVacuum(), there is no new HTSV_Result and there can be no
|
||||||
* that any items that make it into the dead_items array are simple LP_DEAD
|
* disagreement. We'll just handle such tuples as if they had become fully dead
|
||||||
* line pointers, and that every remaining item with tuple storage is
|
* right after this operation completes instead of in the middle of it. Note that
|
||||||
* considered as a candidate for freezing.
|
* any tuple that becomes dead after the call to heap_page_prune() can't need to
|
||||||
|
* be frozen, because it was visible to another session when vacuum started.
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
lazy_scan_prune(LVRelState *vacrel,
|
lazy_scan_prune(LVRelState *vacrel,
|
||||||
@ -1542,8 +1543,6 @@ lazy_scan_prune(LVRelState *vacrel,
|
|||||||
OffsetNumber offnum,
|
OffsetNumber offnum,
|
||||||
maxoff;
|
maxoff;
|
||||||
ItemId itemid;
|
ItemId itemid;
|
||||||
HeapTupleData tuple;
|
|
||||||
HTSV_Result res;
|
|
||||||
PruneResult presult;
|
PruneResult presult;
|
||||||
int tuples_frozen,
|
int tuples_frozen,
|
||||||
lpdead_items,
|
lpdead_items,
|
||||||
@ -1563,8 +1562,6 @@ lazy_scan_prune(LVRelState *vacrel,
|
|||||||
*/
|
*/
|
||||||
maxoff = PageGetMaxOffsetNumber(page);
|
maxoff = PageGetMaxOffsetNumber(page);
|
||||||
|
|
||||||
retry:
|
|
||||||
|
|
||||||
/* Initialize (or reset) page-level state */
|
/* Initialize (or reset) page-level state */
|
||||||
pagefrz.freeze_required = false;
|
pagefrz.freeze_required = false;
|
||||||
pagefrz.FreezePageRelfrozenXid = vacrel->NewRelfrozenXid;
|
pagefrz.FreezePageRelfrozenXid = vacrel->NewRelfrozenXid;
|
||||||
@ -1600,6 +1597,7 @@ retry:
|
|||||||
offnum <= maxoff;
|
offnum <= maxoff;
|
||||||
offnum = OffsetNumberNext(offnum))
|
offnum = OffsetNumberNext(offnum))
|
||||||
{
|
{
|
||||||
|
HeapTupleHeader htup;
|
||||||
bool totally_frozen;
|
bool totally_frozen;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1642,22 +1640,7 @@ retry:
|
|||||||
|
|
||||||
Assert(ItemIdIsNormal(itemid));
|
Assert(ItemIdIsNormal(itemid));
|
||||||
|
|
||||||
ItemPointerSet(&(tuple.t_self), blkno, offnum);
|
htup = (HeapTupleHeader) PageGetItem(page, itemid);
|
||||||
tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
|
|
||||||
tuple.t_len = ItemIdGetLength(itemid);
|
|
||||||
tuple.t_tableOid = RelationGetRelid(rel);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* DEAD tuples are almost always pruned into LP_DEAD line pointers by
|
|
||||||
* heap_page_prune(), but it's possible that the tuple state changed
|
|
||||||
* since heap_page_prune() looked. Handle that here by restarting.
|
|
||||||
* (See comments at the top of function for a full explanation.)
|
|
||||||
*/
|
|
||||||
res = HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin,
|
|
||||||
buf);
|
|
||||||
|
|
||||||
if (unlikely(res == HEAPTUPLE_DEAD))
|
|
||||||
goto retry;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The criteria for counting a tuple as live in this block need to
|
* The criteria for counting a tuple as live in this block need to
|
||||||
@ -1678,7 +1661,7 @@ retry:
|
|||||||
* (Cases where we bypass index vacuuming will violate this optimistic
|
* (Cases where we bypass index vacuuming will violate this optimistic
|
||||||
* assumption, but the overall impact of that should be negligible.)
|
* assumption, but the overall impact of that should be negligible.)
|
||||||
*/
|
*/
|
||||||
switch (res)
|
switch (htsv_get_valid_status(presult.htsv[offnum]))
|
||||||
{
|
{
|
||||||
case HEAPTUPLE_LIVE:
|
case HEAPTUPLE_LIVE:
|
||||||
|
|
||||||
@ -1700,7 +1683,7 @@ retry:
|
|||||||
{
|
{
|
||||||
TransactionId xmin;
|
TransactionId xmin;
|
||||||
|
|
||||||
if (!HeapTupleHeaderXminCommitted(tuple.t_data))
|
if (!HeapTupleHeaderXminCommitted(htup))
|
||||||
{
|
{
|
||||||
prunestate->all_visible = false;
|
prunestate->all_visible = false;
|
||||||
break;
|
break;
|
||||||
@ -1710,7 +1693,7 @@ retry:
|
|||||||
* The inserter definitely committed. But is it old enough
|
* The inserter definitely committed. But is it old enough
|
||||||
* that everyone sees it as committed?
|
* that everyone sees it as committed?
|
||||||
*/
|
*/
|
||||||
xmin = HeapTupleHeaderGetXmin(tuple.t_data);
|
xmin = HeapTupleHeaderGetXmin(htup);
|
||||||
if (!TransactionIdPrecedes(xmin,
|
if (!TransactionIdPrecedes(xmin,
|
||||||
vacrel->cutoffs.OldestXmin))
|
vacrel->cutoffs.OldestXmin))
|
||||||
{
|
{
|
||||||
@ -1764,7 +1747,7 @@ retry:
|
|||||||
prunestate->hastup = true; /* page makes rel truncation unsafe */
|
prunestate->hastup = true; /* page makes rel truncation unsafe */
|
||||||
|
|
||||||
/* Tuple with storage -- consider need to freeze */
|
/* Tuple with storage -- consider need to freeze */
|
||||||
if (heap_prepare_freeze_tuple(tuple.t_data, &vacrel->cutoffs, &pagefrz,
|
if (heap_prepare_freeze_tuple(htup, &vacrel->cutoffs, &pagefrz,
|
||||||
&frozen[tuples_frozen], &totally_frozen))
|
&frozen[tuples_frozen], &totally_frozen))
|
||||||
{
|
{
|
||||||
/* Save prepared freeze plan for later */
|
/* Save prepared freeze plan for later */
|
||||||
|
@ -198,8 +198,33 @@ typedef struct PruneResult
|
|||||||
{
|
{
|
||||||
int ndeleted; /* Number of tuples deleted from the page */
|
int ndeleted; /* Number of tuples deleted from the page */
|
||||||
int nnewlpdead; /* Number of newly LP_DEAD items */
|
int nnewlpdead; /* Number of newly LP_DEAD items */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Tuple visibility is only computed once for each tuple, for correctness
|
||||||
|
* and efficiency reasons; see comment in heap_page_prune() for details.
|
||||||
|
* This is of type int8[], instead of HTSV_Result[], so we can use -1 to
|
||||||
|
* indicate no visibility has been computed, e.g. for LP_DEAD items.
|
||||||
|
*
|
||||||
|
* This needs to be MaxHeapTuplesPerPage + 1 long as FirstOffsetNumber is
|
||||||
|
* 1. Otherwise every access would need to subtract 1.
|
||||||
|
*/
|
||||||
|
int8 htsv[MaxHeapTuplesPerPage + 1];
|
||||||
} PruneResult;
|
} PruneResult;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Pruning calculates tuple visibility once and saves the results in an array
|
||||||
|
* of int8. See PruneResult.htsv for details. This helper function is meant to
|
||||||
|
* guard against examining visibility status array members which have not yet
|
||||||
|
* been computed.
|
||||||
|
*/
|
||||||
|
static inline HTSV_Result
|
||||||
|
htsv_get_valid_status(int status)
|
||||||
|
{
|
||||||
|
Assert(status >= HEAPTUPLE_DEAD &&
|
||||||
|
status <= HEAPTUPLE_DELETE_IN_PROGRESS);
|
||||||
|
return (HTSV_Result) status;
|
||||||
|
}
|
||||||
|
|
||||||
/* ----------------
|
/* ----------------
|
||||||
* function prototypes for heap access method
|
* function prototypes for heap access method
|
||||||
*
|
*
|
||||||
|
Reference in New Issue
Block a user