1
0
mirror of https://github.com/postgres/postgres.git synced 2025-10-21 02:52:47 +03:00

Eliminate XLOG_HEAP2_VISIBLE from vacuum phase III

Instead of emitting a separate XLOG_HEAP2_VISIBLE WAL record for each
page that becomes all-visible in vacuum's third phase, specify the VM
changes in the already emitted XLOG_HEAP2_PRUNE_VACUUM_CLEANUP record.

Visibility checks are now performed before marking dead items unused.
This is safe because the heap page is held under exclusive lock for the
entire operation.

This reduces the number of WAL records generated by VACUUM phase III by
up to 50%.

Author: Melanie Plageman <melanieplageman@gmail.com>
Reviewed-by: Andres Freund <andres@anarazel.de>
Reviewed-by: Robert Haas <robertmhaas@gmail.com>
Reviewed-by: Kirill Reshke <reshkekirill@gmail.com>
Discussion: https://postgr.es/m/flat/CAAKRu_ZMw6Npd_qm2KM%2BFwQ3cMOMx1Dh3VMhp8-V7SOLxdK9-g%40mail.gmail.com
This commit is contained in:
Melanie Plageman
2025-10-13 18:01:06 -04:00
parent 03bf7a12c5
commit add323da40
6 changed files with 303 additions and 77 deletions

View File

@@ -35,7 +35,9 @@ heap_xlog_prune_freeze(XLogReaderState *record)
Buffer buffer; Buffer buffer;
RelFileLocator rlocator; RelFileLocator rlocator;
BlockNumber blkno; BlockNumber blkno;
XLogRedoAction action; Buffer vmbuffer = InvalidBuffer;
uint8 vmflags = 0;
Size freespace = 0;
XLogRecGetBlockTag(record, 0, &rlocator, NULL, &blkno); XLogRecGetBlockTag(record, 0, &rlocator, NULL, &blkno);
memcpy(&xlrec, maindataptr, SizeOfHeapPrune); memcpy(&xlrec, maindataptr, SizeOfHeapPrune);
@@ -50,11 +52,22 @@ heap_xlog_prune_freeze(XLogReaderState *record)
Assert((xlrec.flags & XLHP_CLEANUP_LOCK) != 0 || Assert((xlrec.flags & XLHP_CLEANUP_LOCK) != 0 ||
(xlrec.flags & (XLHP_HAS_REDIRECTIONS | XLHP_HAS_DEAD_ITEMS)) == 0); (xlrec.flags & (XLHP_HAS_REDIRECTIONS | XLHP_HAS_DEAD_ITEMS)) == 0);
if (xlrec.flags & XLHP_VM_ALL_VISIBLE)
{
vmflags = VISIBILITYMAP_ALL_VISIBLE;
if (xlrec.flags & XLHP_VM_ALL_FROZEN)
vmflags |= VISIBILITYMAP_ALL_FROZEN;
}
/* /*
* We are about to remove and/or freeze tuples. In Hot Standby mode, * After xl_heap_prune is the optional snapshot conflict horizon.
* ensure that there are no queries running for which the removed tuples *
* are still visible or which still consider the frozen xids as running. * In Hot Standby mode, we must ensure that there are no running queries
* The conflict horizon XID comes after xl_heap_prune. * which would conflict with the changes in this record. That means we
* can't replay this record if it removes tuples that are still visible to
* transactions on the standby, freeze tuples with xids that are still
* considered running on the standby, or set a page as all-visible in the
* VM if it isn't all-visible to all transactions on the standby.
*/ */
if ((xlrec.flags & XLHP_HAS_CONFLICT_HORIZON) != 0) if ((xlrec.flags & XLHP_HAS_CONFLICT_HORIZON) != 0)
{ {
@@ -71,12 +84,12 @@ heap_xlog_prune_freeze(XLogReaderState *record)
} }
/* /*
* If we have a full-page image, restore it and we're done. * If we have a full-page image of the heap block, restore it and we're
* done with the heap block.
*/ */
action = XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, if (XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL,
(xlrec.flags & XLHP_CLEANUP_LOCK) != 0, (xlrec.flags & XLHP_CLEANUP_LOCK) != 0,
&buffer); &buffer) == BLK_NEEDS_REDO)
if (action == BLK_NEEDS_REDO)
{ {
Page page = BufferGetPage(buffer); Page page = BufferGetPage(buffer);
OffsetNumber *redirected; OffsetNumber *redirected;
@@ -90,6 +103,7 @@ heap_xlog_prune_freeze(XLogReaderState *record)
xlhp_freeze_plan *plans; xlhp_freeze_plan *plans;
OffsetNumber *frz_offsets; OffsetNumber *frz_offsets;
char *dataptr = XLogRecGetBlockData(record, 0, &datalen); char *dataptr = XLogRecGetBlockData(record, 0, &datalen);
bool do_prune;
heap_xlog_deserialize_prune_and_freeze(dataptr, xlrec.flags, heap_xlog_deserialize_prune_and_freeze(dataptr, xlrec.flags,
&nplans, &plans, &frz_offsets, &nplans, &plans, &frz_offsets,
@@ -97,11 +111,16 @@ heap_xlog_prune_freeze(XLogReaderState *record)
&ndead, &nowdead, &ndead, &nowdead,
&nunused, &nowunused); &nunused, &nowunused);
do_prune = nredirected > 0 || ndead > 0 || nunused > 0;
/* Ensure the record does something */
Assert(do_prune || nplans > 0 || vmflags & VISIBILITYMAP_VALID_BITS);
/* /*
* Update all line pointers per the record, and repair fragmentation * Update all line pointers per the record, and repair fragmentation
* if needed. * if needed.
*/ */
if (nredirected > 0 || ndead > 0 || nunused > 0) if (do_prune)
heap_page_prune_execute(buffer, heap_page_prune_execute(buffer,
(xlrec.flags & XLHP_CLEANUP_LOCK) == 0, (xlrec.flags & XLHP_CLEANUP_LOCK) == 0,
redirected, nredirected, redirected, nredirected,
@@ -138,36 +157,90 @@ heap_xlog_prune_freeze(XLogReaderState *record)
/* There should be no more data */ /* There should be no more data */
Assert((char *) frz_offsets == dataptr + datalen); Assert((char *) frz_offsets == dataptr + datalen);
if (vmflags & VISIBILITYMAP_VALID_BITS)
PageSetAllVisible(page);
MarkBufferDirty(buffer);
/*
* See log_heap_prune_and_freeze() for commentary on when we set the
* heap page LSN.
*/
if (do_prune || nplans > 0 ||
((vmflags & VISIBILITYMAP_VALID_BITS) && XLogHintBitIsNeeded()))
PageSetLSN(page, lsn);
/* /*
* Note: we don't worry about updating the page's prunability hints. * Note: we don't worry about updating the page's prunability hints.
* At worst this will cause an extra prune cycle to occur soon. * At worst this will cause an extra prune cycle to occur soon.
*/ */
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
} }
/* /*
* If we released any space or line pointers, update the free space map. * If we 1) released any space or line pointers or 2) set PD_ALL_VISIBLE
* or the VM, update the freespace map.
* *
* Do this regardless of a full-page image being applied, since the FSM * Even when no actual space is freed (when only marking the page
* data is not in the page anyway. * all-visible or frozen), we still update the FSM. Because the FSM is
* unlogged and maintained heuristically, it often becomes stale on
* standbys. If such a standby is later promoted and runs VACUUM, it will
* skip recalculating free space for pages that were marked
* all-visible/all-forzen. FreeSpaceMapVacuum() can then propagate overly
* optimistic free space values upward, causing future insertions to
* select pages that turn out to be unusable. In bulk, this can lead to
* long stalls.
*
* To prevent this, always update the FSM even when only marking a page
* all-visible/all-frozen.
*
* Do this regardless of whether a full-page image is logged, since FSM
* data is not part of the page itself.
*/ */
if (BufferIsValid(buffer)) if (BufferIsValid(buffer))
{ {
if (xlrec.flags & (XLHP_HAS_REDIRECTIONS | if ((xlrec.flags & (XLHP_HAS_REDIRECTIONS |
XLHP_HAS_DEAD_ITEMS | XLHP_HAS_DEAD_ITEMS |
XLHP_HAS_NOW_UNUSED_ITEMS)) XLHP_HAS_NOW_UNUSED_ITEMS)) ||
{ (vmflags & VISIBILITYMAP_VALID_BITS))
Size freespace = PageGetHeapFreeSpace(BufferGetPage(buffer)); freespace = PageGetHeapFreeSpace(BufferGetPage(buffer));
UnlockReleaseBuffer(buffer); /*
* We want to avoid holding an exclusive lock on the heap buffer while
XLogRecordPageWithFreeSpace(rlocator, blkno, freespace); * doing IO (either of the FSM or the VM), so we'll release it now.
} */
else UnlockReleaseBuffer(buffer);
UnlockReleaseBuffer(buffer);
} }
/*
* Now read and update the VM block.
*
* We must redo changes to the VM even if the heap page was skipped due to
* LSN interlock. See comment in heap_xlog_multi_insert() for more details
* on replaying changes to the VM.
*/
if ((vmflags & VISIBILITYMAP_VALID_BITS) &&
XLogReadBufferForRedoExtended(record, 1,
RBM_ZERO_ON_ERROR,
false,
&vmbuffer) == BLK_NEEDS_REDO)
{
Page vmpage = BufferGetPage(vmbuffer);
/* initialize the page if it was read as zeros */
if (PageIsNew(vmpage))
PageInit(vmpage, BLCKSZ, 0);
visibilitymap_set_vmbits(blkno, vmbuffer, vmflags, rlocator);
Assert(BufferIsDirty(vmbuffer));
PageSetLSN(vmpage, lsn);
}
if (BufferIsValid(vmbuffer))
UnlockReleaseBuffer(vmbuffer);
if (freespace > 0)
XLogRecordPageWithFreeSpace(rlocator, blkno, freespace);
} }
/* /*
@@ -703,6 +776,7 @@ heap_xlog_multi_insert(XLogReaderState *record)
VISIBILITYMAP_ALL_FROZEN, VISIBILITYMAP_ALL_FROZEN,
rlocator); rlocator);
Assert(BufferIsDirty(vmbuffer));
PageSetLSN(vmpage, lsn); PageSetLSN(vmpage, lsn);
} }

View File

@@ -19,6 +19,7 @@
#include "access/htup_details.h" #include "access/htup_details.h"
#include "access/multixact.h" #include "access/multixact.h"
#include "access/transam.h" #include "access/transam.h"
#include "access/visibilitymapdefs.h"
#include "access/xlog.h" #include "access/xlog.h"
#include "access/xloginsert.h" #include "access/xloginsert.h"
#include "commands/vacuum.h" #include "commands/vacuum.h"
@@ -835,6 +836,8 @@ heap_page_prune_and_freeze(Relation relation, Buffer buffer,
conflict_xid = prstate.latest_xid_removed; conflict_xid = prstate.latest_xid_removed;
log_heap_prune_and_freeze(relation, buffer, log_heap_prune_and_freeze(relation, buffer,
InvalidBuffer, /* vmbuffer */
0, /* vmflags */
conflict_xid, conflict_xid,
true, reason, true, reason,
prstate.frozen, prstate.nfrozen, prstate.frozen, prstate.nfrozen,
@@ -2045,12 +2048,17 @@ heap_log_freeze_plan(HeapTupleFreeze *tuples, int ntuples,
* replaying 'unused' items depends on whether they were all previously marked * replaying 'unused' items depends on whether they were all previously marked
* as dead. * as dead.
* *
* If the VM is being updated, vmflags will contain the bits to set. In this
* case, vmbuffer should already have been updated and marked dirty and should
* still be pinned and locked.
*
* Note: This function scribbles on the 'frozen' array. * Note: This function scribbles on the 'frozen' array.
* *
* Note: This is called in a critical section, so careful what you do here. * Note: This is called in a critical section, so careful what you do here.
*/ */
void void
log_heap_prune_and_freeze(Relation relation, Buffer buffer, log_heap_prune_and_freeze(Relation relation, Buffer buffer,
Buffer vmbuffer, uint8 vmflags,
TransactionId conflict_xid, TransactionId conflict_xid,
bool cleanup_lock, bool cleanup_lock,
PruneReason reason, PruneReason reason,
@@ -2062,6 +2070,7 @@ log_heap_prune_and_freeze(Relation relation, Buffer buffer,
xl_heap_prune xlrec; xl_heap_prune xlrec;
XLogRecPtr recptr; XLogRecPtr recptr;
uint8 info; uint8 info;
uint8 regbuf_flags_heap;
/* The following local variables hold data registered in the WAL record: */ /* The following local variables hold data registered in the WAL record: */
xlhp_freeze_plan plans[MaxHeapTuplesPerPage]; xlhp_freeze_plan plans[MaxHeapTuplesPerPage];
@@ -2070,8 +2079,26 @@ log_heap_prune_and_freeze(Relation relation, Buffer buffer,
xlhp_prune_items dead_items; xlhp_prune_items dead_items;
xlhp_prune_items unused_items; xlhp_prune_items unused_items;
OffsetNumber frz_offsets[MaxHeapTuplesPerPage]; OffsetNumber frz_offsets[MaxHeapTuplesPerPage];
bool do_prune = nredirected > 0 || ndead > 0 || nunused > 0;
bool do_set_vm = vmflags & VISIBILITYMAP_VALID_BITS;
Assert((vmflags & VISIBILITYMAP_VALID_BITS) == vmflags);
xlrec.flags = 0; xlrec.flags = 0;
regbuf_flags_heap = REGBUF_STANDARD;
/*
* We can avoid an FPI of the heap page if the only modification we are
* making to it is to set PD_ALL_VISIBLE and checksums/wal_log_hints are
* disabled. Note that if we explicitly skip an FPI, we must not stamp the
* heap page with this record's LSN. Recovery skips records <= the stamped
* LSN, so this could lead to skipping an earlier FPI needed to repair a
* torn page.
*/
if (!do_prune &&
nfrozen == 0 &&
(!do_set_vm || !XLogHintBitIsNeeded()))
regbuf_flags_heap |= REGBUF_NO_IMAGE;
/* /*
* Prepare data for the buffer. The arrays are not actually in the * Prepare data for the buffer. The arrays are not actually in the
@@ -2079,7 +2106,11 @@ log_heap_prune_and_freeze(Relation relation, Buffer buffer,
* page image, the arrays can be omitted. * page image, the arrays can be omitted.
*/ */
XLogBeginInsert(); XLogBeginInsert();
XLogRegisterBuffer(0, buffer, REGBUF_STANDARD); XLogRegisterBuffer(0, buffer, regbuf_flags_heap);
if (do_set_vm)
XLogRegisterBuffer(1, vmbuffer, 0);
if (nfrozen > 0) if (nfrozen > 0)
{ {
int nplans; int nplans;
@@ -2136,6 +2167,12 @@ log_heap_prune_and_freeze(Relation relation, Buffer buffer,
* Prepare the main xl_heap_prune record. We already set the XLHP_HAS_* * Prepare the main xl_heap_prune record. We already set the XLHP_HAS_*
* flag above. * flag above.
*/ */
if (vmflags & VISIBILITYMAP_ALL_VISIBLE)
{
xlrec.flags |= XLHP_VM_ALL_VISIBLE;
if (vmflags & VISIBILITYMAP_ALL_FROZEN)
xlrec.flags |= XLHP_VM_ALL_FROZEN;
}
if (RelationIsAccessibleInLogicalDecoding(relation)) if (RelationIsAccessibleInLogicalDecoding(relation))
xlrec.flags |= XLHP_IS_CATALOG_REL; xlrec.flags |= XLHP_IS_CATALOG_REL;
if (TransactionIdIsValid(conflict_xid)) if (TransactionIdIsValid(conflict_xid))
@@ -2168,5 +2205,19 @@ log_heap_prune_and_freeze(Relation relation, Buffer buffer,
} }
recptr = XLogInsert(RM_HEAP2_ID, info); recptr = XLogInsert(RM_HEAP2_ID, info);
PageSetLSN(BufferGetPage(buffer), recptr); if (do_set_vm)
{
Assert(BufferIsDirty(vmbuffer));
PageSetLSN(BufferGetPage(vmbuffer), recptr);
}
/*
* See comment at the top of the function about regbuf_flags_heap for
* details on when we can advance the page LSN.
*/
if (do_prune || nfrozen > 0 || (do_set_vm && XLogHintBitIsNeeded()))
{
Assert(BufferIsDirty(buffer));
PageSetLSN(BufferGetPage(buffer), recptr);
}
} }

View File

@@ -463,8 +463,15 @@ static void dead_items_add(LVRelState *vacrel, BlockNumber blkno, OffsetNumber *
int num_offsets); int num_offsets);
static void dead_items_reset(LVRelState *vacrel); static void dead_items_reset(LVRelState *vacrel);
static void dead_items_cleanup(LVRelState *vacrel); static void dead_items_cleanup(LVRelState *vacrel);
#ifdef USE_ASSERT_CHECKING
static bool heap_page_is_all_visible(LVRelState *vacrel, Buffer buf, static bool heap_page_is_all_visible(LVRelState *vacrel, Buffer buf,
TransactionId *visibility_cutoff_xid, bool *all_frozen); TransactionId *visibility_cutoff_xid, bool *all_frozen);
#endif
static bool heap_page_would_be_all_visible(LVRelState *vacrel, Buffer buf,
OffsetNumber *deadoffsets,
int ndeadoffsets,
bool *all_frozen,
TransactionId *visibility_cutoff_xid);
static void update_relstats_all_indexes(LVRelState *vacrel); static void update_relstats_all_indexes(LVRelState *vacrel);
static void vacuum_error_callback(void *arg); static void vacuum_error_callback(void *arg);
static void update_vacuum_error_info(LVRelState *vacrel, static void update_vacuum_error_info(LVRelState *vacrel,
@@ -2848,8 +2855,10 @@ lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer,
OffsetNumber unused[MaxHeapTuplesPerPage]; OffsetNumber unused[MaxHeapTuplesPerPage];
int nunused = 0; int nunused = 0;
TransactionId visibility_cutoff_xid; TransactionId visibility_cutoff_xid;
TransactionId conflict_xid = InvalidTransactionId;
bool all_frozen; bool all_frozen;
LVSavedErrInfo saved_err_info; LVSavedErrInfo saved_err_info;
uint8 vmflags = 0;
Assert(vacrel->do_index_vacuuming); Assert(vacrel->do_index_vacuuming);
@@ -2860,6 +2869,33 @@ lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer,
VACUUM_ERRCB_PHASE_VACUUM_HEAP, blkno, VACUUM_ERRCB_PHASE_VACUUM_HEAP, blkno,
InvalidOffsetNumber); InvalidOffsetNumber);
/*
* Before marking dead items unused, check whether the page will become
* all-visible once that change is applied. This lets us reap the tuples
* and mark the page all-visible within the same critical section,
* enabling both changes to be emitted in a single WAL record. Since the
* visibility checks may perform I/O and allocate memory, they must be
* done outside the critical section.
*/
if (heap_page_would_be_all_visible(vacrel, buffer,
deadoffsets, num_offsets,
&all_frozen, &visibility_cutoff_xid))
{
vmflags |= VISIBILITYMAP_ALL_VISIBLE;
if (all_frozen)
{
vmflags |= VISIBILITYMAP_ALL_FROZEN;
Assert(!TransactionIdIsValid(visibility_cutoff_xid));
}
/*
* Take the lock on the vmbuffer before entering a critical section.
* The heap page lock must also be held while updating the VM to
* ensure consistency.
*/
LockBuffer(vmbuffer, BUFFER_LOCK_EXCLUSIVE);
}
START_CRIT_SECTION(); START_CRIT_SECTION();
for (int i = 0; i < num_offsets; i++) for (int i = 0; i < num_offsets; i++)
@@ -2879,6 +2915,19 @@ lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer,
/* Attempt to truncate line pointer array now */ /* Attempt to truncate line pointer array now */
PageTruncateLinePointerArray(page); PageTruncateLinePointerArray(page);
if ((vmflags & VISIBILITYMAP_VALID_BITS) != 0)
{
/*
* The page is guaranteed to have had dead line pointers, so we always
* set PD_ALL_VISIBLE.
*/
PageSetAllVisible(page);
visibilitymap_set_vmbits(blkno,
vmbuffer, vmflags,
vacrel->rel->rd_locator);
conflict_xid = visibility_cutoff_xid;
}
/* /*
* Mark buffer dirty before we write WAL. * Mark buffer dirty before we write WAL.
*/ */
@@ -2888,7 +2937,9 @@ lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer,
if (RelationNeedsWAL(vacrel->rel)) if (RelationNeedsWAL(vacrel->rel))
{ {
log_heap_prune_and_freeze(vacrel->rel, buffer, log_heap_prune_and_freeze(vacrel->rel, buffer,
InvalidTransactionId, vmflags != 0 ? vmbuffer : InvalidBuffer,
vmflags,
conflict_xid,
false, /* no cleanup lock required */ false, /* no cleanup lock required */
PRUNE_VACUUM_CLEANUP, PRUNE_VACUUM_CLEANUP,
NULL, 0, /* frozen */ NULL, 0, /* frozen */
@@ -2897,39 +2948,12 @@ lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer,
unused, nunused); unused, nunused);
} }
/*
* End critical section, so we safely can do visibility tests (which
* possibly need to perform IO and allocate memory!). If we crash now the
* page (including the corresponding vm bit) might not be marked all
* visible, but that's fine. A later vacuum will fix that.
*/
END_CRIT_SECTION(); END_CRIT_SECTION();
/* if ((vmflags & VISIBILITYMAP_ALL_VISIBLE) != 0)
* Now that we have removed the LP_DEAD items from the page, once again
* check if the page has become all-visible. The page is already marked
* dirty, exclusively locked, and, if needed, a full page image has been
* emitted.
*/
Assert(!PageIsAllVisible(page));
if (heap_page_is_all_visible(vacrel, buffer, &visibility_cutoff_xid,
&all_frozen))
{ {
uint8 flags = VISIBILITYMAP_ALL_VISIBLE;
if (all_frozen)
{
Assert(!TransactionIdIsValid(visibility_cutoff_xid));
flags |= VISIBILITYMAP_ALL_FROZEN;
}
PageSetAllVisible(page);
visibilitymap_set(vacrel->rel, blkno, buffer,
InvalidXLogRecPtr,
vmbuffer, visibility_cutoff_xid,
flags);
/* Count the newly set VM page for logging */ /* Count the newly set VM page for logging */
LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
vacrel->vm_new_visible_pages++; vacrel->vm_new_visible_pages++;
if (all_frozen) if (all_frozen)
vacrel->vm_new_visible_frozen_pages++; vacrel->vm_new_visible_frozen_pages++;
@@ -3597,31 +3621,80 @@ dead_items_cleanup(LVRelState *vacrel)
vacrel->pvs = NULL; vacrel->pvs = NULL;
} }
#ifdef USE_ASSERT_CHECKING
/* /*
* Check if every tuple in the given page is visible to all current and future * Wrapper for heap_page_would_be_all_visible() which can be used for callers
* transactions. Also return the visibility_cutoff_xid which is the highest * that expect no LP_DEAD on the page. Currently assert-only, but there is no
* xmin amongst the visible tuples. Set *all_frozen to true if every tuple * reason not to use it outside of asserts.
* on this page is frozen.
*
* This is a stripped down version of lazy_scan_prune(). If you change
* anything here, make sure that everything stays in sync. Note that an
* assertion calls us to verify that everybody still agrees. Be sure to avoid
* introducing new side-effects here.
*/ */
static bool static bool
heap_page_is_all_visible(LVRelState *vacrel, Buffer buf, heap_page_is_all_visible(LVRelState *vacrel, Buffer buf,
TransactionId *visibility_cutoff_xid, TransactionId *visibility_cutoff_xid,
bool *all_frozen) bool *all_frozen)
{ {
return heap_page_would_be_all_visible(vacrel, buf,
NULL, 0,
all_frozen,
visibility_cutoff_xid);
}
#endif
/*
* Check whether the heap page in buf is all-visible except for the dead
* tuples referenced in the deadoffsets array.
*
* Vacuum uses this to check if a page would become all-visible after reaping
* known dead tuples. This function does not remove the dead items.
*
* This cannot be called in a critical section, as the visibility checks may
* perform IO and allocate memory.
*
* Returns true if the page is all-visible other than the provided
* deadoffsets and false otherwise.
*
* Output parameters:
*
* - *all_frozen: true if every tuple on the page is frozen
* - *visibility_cutoff_xid: newest xmin; valid only if page is all-visible
*
* Callers looking to verify that the page is already all-visible can call
* heap_page_is_all_visible().
*
* This logic is closely related to heap_prune_record_unchanged_lp_normal().
* If you modify this function, ensure consistency with that code. An
* assertion cross-checks that both remain in agreement. Do not introduce new
* side-effects.
*/
static bool
heap_page_would_be_all_visible(LVRelState *vacrel, Buffer buf,
OffsetNumber *deadoffsets,
int ndeadoffsets,
bool *all_frozen,
TransactionId *visibility_cutoff_xid)
{
Page page = BufferGetPage(buf); Page page = BufferGetPage(buf);
BlockNumber blockno = BufferGetBlockNumber(buf); BlockNumber blockno = BufferGetBlockNumber(buf);
OffsetNumber offnum, OffsetNumber offnum,
maxoff; maxoff;
bool all_visible = true; bool all_visible = true;
int matched_dead_count = 0;
*visibility_cutoff_xid = InvalidTransactionId; *visibility_cutoff_xid = InvalidTransactionId;
*all_frozen = true; *all_frozen = true;
Assert(ndeadoffsets == 0 || deadoffsets);
#ifdef USE_ASSERT_CHECKING
/* Confirm input deadoffsets[] is strictly sorted */
if (ndeadoffsets > 1)
{
for (int i = 1; i < ndeadoffsets; i++)
Assert(deadoffsets[i - 1] < deadoffsets[i]);
}
#endif
maxoff = PageGetMaxOffsetNumber(page); maxoff = PageGetMaxOffsetNumber(page);
for (offnum = FirstOffsetNumber; for (offnum = FirstOffsetNumber;
offnum <= maxoff && all_visible; offnum <= maxoff && all_visible;
@@ -3649,9 +3722,15 @@ heap_page_is_all_visible(LVRelState *vacrel, Buffer buf,
*/ */
if (ItemIdIsDead(itemid)) if (ItemIdIsDead(itemid))
{ {
all_visible = false; if (!deadoffsets ||
*all_frozen = false; matched_dead_count >= ndeadoffsets ||
break; deadoffsets[matched_dead_count] != offnum)
{
*all_frozen = all_visible = false;
break;
}
matched_dead_count++;
continue;
} }
Assert(ItemIdIsNormal(itemid)); Assert(ItemIdIsNormal(itemid));
@@ -3660,6 +3739,9 @@ heap_page_is_all_visible(LVRelState *vacrel, Buffer buf,
tuple.t_len = ItemIdGetLength(itemid); tuple.t_len = ItemIdGetLength(itemid);
tuple.t_tableOid = RelationGetRelid(vacrel->rel); tuple.t_tableOid = RelationGetRelid(vacrel->rel);
/* Visibility checks may do IO or allocate memory */
Assert(CritSectionCount == 0);
switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin, switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin,
buf)) buf))
{ {

View File

@@ -103,7 +103,7 @@ plan_elem_desc(StringInfo buf, void *plan, void *data)
* code, the latter of which is used in frontend (pg_waldump) code. * code, the latter of which is used in frontend (pg_waldump) code.
*/ */
void void
heap_xlog_deserialize_prune_and_freeze(char *cursor, uint8 flags, heap_xlog_deserialize_prune_and_freeze(char *cursor, uint16 flags,
int *nplans, xlhp_freeze_plan **plans, int *nplans, xlhp_freeze_plan **plans,
OffsetNumber **frz_offsets, OffsetNumber **frz_offsets,
int *nredirected, OffsetNumber **redirected, int *nredirected, OffsetNumber **redirected,
@@ -287,6 +287,15 @@ heap2_desc(StringInfo buf, XLogReaderState *record)
appendStringInfo(buf, ", isCatalogRel: %c", appendStringInfo(buf, ", isCatalogRel: %c",
xlrec->flags & XLHP_IS_CATALOG_REL ? 'T' : 'F'); xlrec->flags & XLHP_IS_CATALOG_REL ? 'T' : 'F');
if (xlrec->flags & XLHP_VM_ALL_VISIBLE)
{
uint8 vmflags = VISIBILITYMAP_ALL_VISIBLE;
if (xlrec->flags & XLHP_VM_ALL_FROZEN)
vmflags |= VISIBILITYMAP_ALL_FROZEN;
appendStringInfo(buf, ", vm_flags: 0x%02X", vmflags);
}
if (XLogRecHasBlockData(record, 0)) if (XLogRecHasBlockData(record, 0))
{ {
Size datalen; Size datalen;

View File

@@ -382,6 +382,7 @@ extern void heap_page_prune_execute(Buffer buffer, bool lp_truncate_only,
OffsetNumber *nowunused, int nunused); OffsetNumber *nowunused, int nunused);
extern void heap_get_root_tuples(Page page, OffsetNumber *root_offsets); extern void heap_get_root_tuples(Page page, OffsetNumber *root_offsets);
extern void log_heap_prune_and_freeze(Relation relation, Buffer buffer, extern void log_heap_prune_and_freeze(Relation relation, Buffer buffer,
Buffer vmbuffer, uint8 vmflags,
TransactionId conflict_xid, TransactionId conflict_xid,
bool cleanup_lock, bool cleanup_lock,
PruneReason reason, PruneReason reason,

View File

@@ -249,7 +249,7 @@ typedef struct xl_heap_update
* Main data section: * Main data section:
* *
* xl_heap_prune * xl_heap_prune
* uint8 flags * uint16 flags
* TransactionId snapshot_conflict_horizon * TransactionId snapshot_conflict_horizon
* *
* Block 0 data section: * Block 0 data section:
@@ -284,7 +284,7 @@ typedef struct xl_heap_update
*/ */
typedef struct xl_heap_prune typedef struct xl_heap_prune
{ {
uint8 flags; uint16 flags;
/* /*
* If XLHP_HAS_CONFLICT_HORIZON is set, the conflict horizon XID follows, * If XLHP_HAS_CONFLICT_HORIZON is set, the conflict horizon XID follows,
@@ -292,7 +292,7 @@ typedef struct xl_heap_prune
*/ */
} xl_heap_prune; } xl_heap_prune;
#define SizeOfHeapPrune (offsetof(xl_heap_prune, flags) + sizeof(uint8)) #define SizeOfHeapPrune (offsetof(xl_heap_prune, flags) + sizeof(uint16))
/* to handle recovery conflict during logical decoding on standby */ /* to handle recovery conflict during logical decoding on standby */
#define XLHP_IS_CATALOG_REL (1 << 1) #define XLHP_IS_CATALOG_REL (1 << 1)
@@ -330,6 +330,15 @@ typedef struct xl_heap_prune
#define XLHP_HAS_DEAD_ITEMS (1 << 6) #define XLHP_HAS_DEAD_ITEMS (1 << 6)
#define XLHP_HAS_NOW_UNUSED_ITEMS (1 << 7) #define XLHP_HAS_NOW_UNUSED_ITEMS (1 << 7)
/*
* The xl_heap_prune record's flags may also contain which VM bits to set.
* xl_heap_prune should always use the XLHP_VM_ALL_VISIBLE and
* XLHP_VM_ALL_FROZEN flags and translate them to their visibilitymapdefs.h
* equivalents, VISIBILITYMAP_ALL_VISIBLE and VISIBILITYMAP_ALL_FROZEN.
*/
#define XLHP_VM_ALL_VISIBLE (1 << 8)
#define XLHP_VM_ALL_FROZEN (1 << 9)
/* /*
* xlhp_freeze_plan describes how to freeze a group of one or more heap tuples * xlhp_freeze_plan describes how to freeze a group of one or more heap tuples
* (appears in xl_heap_prune's xlhp_freeze_plans sub-record) * (appears in xl_heap_prune's xlhp_freeze_plans sub-record)
@@ -497,7 +506,7 @@ extern XLogRecPtr log_heap_visible(Relation rel, Buffer heap_buffer,
uint8 vmflags); uint8 vmflags);
/* in heapdesc.c, so it can be shared between frontend/backend code */ /* in heapdesc.c, so it can be shared between frontend/backend code */
extern void heap_xlog_deserialize_prune_and_freeze(char *cursor, uint8 flags, extern void heap_xlog_deserialize_prune_and_freeze(char *cursor, uint16 flags,
int *nplans, xlhp_freeze_plan **plans, int *nplans, xlhp_freeze_plan **plans,
OffsetNumber **frz_offsets, OffsetNumber **frz_offsets,
int *nredirected, OffsetNumber **redirected, int *nredirected, OffsetNumber **redirected,