1
0
mirror of https://github.com/postgres/postgres.git synced 2025-10-19 15:49:24 +03:00

Eliminate COPY FREEZE use of XLOG_HEAP2_VISIBLE

Instead of emitting a separate WAL XLOG_HEAP2_VISIBLE record for setting
bits in the VM, specify the VM block changes in the
XLOG_HEAP2_MULTI_INSERT record.

This halves the number of WAL records emitted by COPY FREEZE.

Author: Melanie Plageman <melanieplageman@gmail.com>
Reviewed-by: Andres Freund <andres@anarazel.de>
Reviewed-by: Robert Haas <robertmhaas@gmail.com>
Reviewed-by: Kirill Reshke <reshkekirill@gmail.com>
Discussion: https://postgr.es/m/flat/CAAKRu_ZMw6Npd_qm2KM%2BFwQ3cMOMx1Dh3VMhp8-V7SOLxdK9-g%40mail.gmail.com
This commit is contained in:
Melanie Plageman
2025-10-09 16:25:50 -04:00
parent 1b073cba49
commit d96f87332b
5 changed files with 154 additions and 18 deletions

View File

@@ -2466,7 +2466,11 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
starting_with_empty_page = PageGetMaxOffsetNumber(page) == 0; starting_with_empty_page = PageGetMaxOffsetNumber(page) == 0;
if (starting_with_empty_page && (options & HEAP_INSERT_FROZEN)) if (starting_with_empty_page && (options & HEAP_INSERT_FROZEN))
{
all_frozen_set = true; all_frozen_set = true;
/* Lock the vmbuffer before entering the critical section */
LockBuffer(vmbuffer, BUFFER_LOCK_EXCLUSIVE);
}
/* NO EREPORT(ERROR) from here till changes are logged */ /* NO EREPORT(ERROR) from here till changes are logged */
START_CRIT_SECTION(); START_CRIT_SECTION();
@@ -2506,7 +2510,8 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
* going to add further frozen rows to it. * going to add further frozen rows to it.
* *
* If we're only adding already frozen rows to a previously empty * If we're only adding already frozen rows to a previously empty
* page, mark it as all-visible. * page, mark it as all-frozen and update the visibility map. We're
* already holding a pin on the vmbuffer.
*/ */
if (PageIsAllVisible(page) && !(options & HEAP_INSERT_FROZEN)) if (PageIsAllVisible(page) && !(options & HEAP_INSERT_FROZEN))
{ {
@@ -2517,7 +2522,14 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
vmbuffer, VISIBILITYMAP_VALID_BITS); vmbuffer, VISIBILITYMAP_VALID_BITS);
} }
else if (all_frozen_set) else if (all_frozen_set)
{
PageSetAllVisible(page); PageSetAllVisible(page);
visibilitymap_set_vmbits(BufferGetBlockNumber(buffer),
vmbuffer,
VISIBILITYMAP_ALL_VISIBLE |
VISIBILITYMAP_ALL_FROZEN,
relation->rd_locator);
}
/* /*
* XXX Should we set PageSetPrunable on this page ? See heap_insert() * XXX Should we set PageSetPrunable on this page ? See heap_insert()
@@ -2565,6 +2577,12 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
xlrec->flags = 0; xlrec->flags = 0;
if (all_visible_cleared) if (all_visible_cleared)
xlrec->flags = XLH_INSERT_ALL_VISIBLE_CLEARED; xlrec->flags = XLH_INSERT_ALL_VISIBLE_CLEARED;
/*
* We don't have to worry about including a conflict xid in the
* WAL record, as HEAP_INSERT_FROZEN intentionally violates
* visibility rules.
*/
if (all_frozen_set) if (all_frozen_set)
xlrec->flags = XLH_INSERT_ALL_FROZEN_SET; xlrec->flags = XLH_INSERT_ALL_FROZEN_SET;
@@ -2628,6 +2646,8 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
XLogBeginInsert(); XLogBeginInsert();
XLogRegisterData(xlrec, tupledata - scratch.data); XLogRegisterData(xlrec, tupledata - scratch.data);
XLogRegisterBuffer(0, buffer, REGBUF_STANDARD | bufflags); XLogRegisterBuffer(0, buffer, REGBUF_STANDARD | bufflags);
if (all_frozen_set)
XLogRegisterBuffer(1, vmbuffer, 0);
XLogRegisterBufData(0, tupledata, totaldatalen); XLogRegisterBufData(0, tupledata, totaldatalen);
@@ -2637,26 +2657,17 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
recptr = XLogInsert(RM_HEAP2_ID, info); recptr = XLogInsert(RM_HEAP2_ID, info);
PageSetLSN(page, recptr); PageSetLSN(page, recptr);
if (all_frozen_set)
{
Assert(BufferIsDirty(vmbuffer));
PageSetLSN(BufferGetPage(vmbuffer), recptr);
}
} }
END_CRIT_SECTION(); END_CRIT_SECTION();
/*
* If we've frozen everything on the page, update the visibilitymap.
* We're already holding pin on the vmbuffer.
*/
if (all_frozen_set) if (all_frozen_set)
{ LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
/*
* It's fine to use InvalidTransactionId here - this is only used
* when HEAP_INSERT_FROZEN is specified, which intentionally
* violates visibility rules.
*/
visibilitymap_set(relation, BufferGetBlockNumber(buffer), buffer,
InvalidXLogRecPtr, vmbuffer,
InvalidTransactionId,
VISIBILITYMAP_ALL_VISIBLE | VISIBILITYMAP_ALL_FROZEN);
}
UnlockReleaseBuffer(buffer); UnlockReleaseBuffer(buffer);
ndone += nthispage; ndone += nthispage;

View File

@@ -551,6 +551,7 @@ heap_xlog_multi_insert(XLogReaderState *record)
int i; int i;
bool isinit = (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) != 0; bool isinit = (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) != 0;
XLogRedoAction action; XLogRedoAction action;
Buffer vmbuffer = InvalidBuffer;
/* /*
* Insertion doesn't overwrite MVCC data, so no conflict processing is * Insertion doesn't overwrite MVCC data, so no conflict processing is
@@ -571,11 +572,11 @@ heap_xlog_multi_insert(XLogReaderState *record)
if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED) if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
{ {
Relation reln = CreateFakeRelcacheEntry(rlocator); Relation reln = CreateFakeRelcacheEntry(rlocator);
Buffer vmbuffer = InvalidBuffer;
visibilitymap_pin(reln, blkno, &vmbuffer); visibilitymap_pin(reln, blkno, &vmbuffer);
visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS); visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
ReleaseBuffer(vmbuffer); ReleaseBuffer(vmbuffer);
vmbuffer = InvalidBuffer;
FreeFakeRelcacheEntry(reln); FreeFakeRelcacheEntry(reln);
} }
@@ -662,6 +663,52 @@ heap_xlog_multi_insert(XLogReaderState *record)
if (BufferIsValid(buffer)) if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer); UnlockReleaseBuffer(buffer);
buffer = InvalidBuffer;
/*
* Read and update the visibility map (VM) block.
*
* We must always redo VM changes, even if the corresponding heap page
* update was skipped due to the LSN interlock. Each VM block covers
* multiple heap pages, so later WAL records may update other bits in the
* same block. If this record includes an FPI (full-page image),
* subsequent WAL records may depend on it to guard against torn pages.
*
* Heap page changes are replayed first to preserve the invariant:
* PD_ALL_VISIBLE must be set on the heap page if the VM bit is set.
*
* Note that we released the heap page lock above. During normal
* operation, this would be unsafe — a concurrent modification could
* clear PD_ALL_VISIBLE while the VM bit remained set, violating the
* invariant.
*
* During recovery, however, no concurrent writers exist. Therefore,
* updating the VM without holding the heap page lock is safe enough. This
* same approach is taken when replaying xl_heap_visible records (see
* heap_xlog_visible()).
*/
if ((xlrec->flags & XLH_INSERT_ALL_FROZEN_SET) &&
XLogReadBufferForRedoExtended(record, 1, RBM_ZERO_ON_ERROR, false,
&vmbuffer) == BLK_NEEDS_REDO)
{
Page vmpage = BufferGetPage(vmbuffer);
/* initialize the page if it was read as zeros */
if (PageIsNew(vmpage))
PageInit(vmpage, BLCKSZ, 0);
visibilitymap_set_vmbits(blkno,
vmbuffer,
VISIBILITYMAP_ALL_VISIBLE |
VISIBILITYMAP_ALL_FROZEN,
rlocator);
PageSetLSN(vmpage, lsn);
}
if (BufferIsValid(vmbuffer))
UnlockReleaseBuffer(vmbuffer);
/* /*
* If the page is running low on free space, update the FSM as well. * If the page is running low on free space, update the FSM as well.
* Arbitrarily, our definition of "low" is less than 20%. We can't do much * Arbitrarily, our definition of "low" is less than 20%. We can't do much

View File

@@ -14,7 +14,8 @@
* visibilitymap_clear - clear bits for one page in the visibility map * visibilitymap_clear - clear bits for one page in the visibility map
* visibilitymap_pin - pin a map page for setting a bit * visibilitymap_pin - pin a map page for setting a bit
* visibilitymap_pin_ok - check whether correct map page is already pinned * visibilitymap_pin_ok - check whether correct map page is already pinned
* visibilitymap_set - set a bit in a previously pinned page * visibilitymap_set - set bit(s) in a previously pinned page and log
* visibilitymap_set_vmbits - set bit(s) in a pinned page
* visibilitymap_get_status - get status of bits * visibilitymap_get_status - get status of bits
* visibilitymap_count - count number of bits set in visibility map * visibilitymap_count - count number of bits set in visibility map
* visibilitymap_prepare_truncate - * visibilitymap_prepare_truncate -
@@ -322,6 +323,73 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
return status; return status;
} }
/*
* Set VM (visibility map) flags in the VM block in vmBuf.
*
* This function is intended for callers that log VM changes together
* with the heap page modifications that rendered the page all-visible.
* Callers that log VM changes separately should use visibilitymap_set().
*
* vmBuf must be pinned and exclusively locked, and it must cover the VM bits
* corresponding to heapBlk.
*
* In normal operation (not recovery), this must be called inside a critical
* section that also applies the necessary heap page changes and, if
* applicable, emits WAL.
*
* The caller is responsible for ensuring consistency between the heap page
* and the VM page by holding a pin and exclusive lock on the buffer
* containing heapBlk.
*
* rlocator is used only for debugging messages.
*/
uint8
visibilitymap_set_vmbits(BlockNumber heapBlk,
Buffer vmBuf, uint8 flags,
const RelFileLocator rlocator)
{
BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
uint32 mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);
uint8 mapOffset = HEAPBLK_TO_OFFSET(heapBlk);
Page page;
uint8 *map;
uint8 status;
#ifdef TRACE_VISIBILITYMAP
elog(DEBUG1, "vm_set flags 0x%02X for %s %d",
flags,
relpathbackend(rlocator, MyProcNumber, MAIN_FORKNUM).str,
heapBlk);
#endif
/* Call in same critical section where WAL is emitted. */
Assert(InRecovery || CritSectionCount > 0);
/* Flags should be valid. Also never clear bits with this function */
Assert((flags & VISIBILITYMAP_VALID_BITS) == flags);
/* Must never set all_frozen bit without also setting all_visible bit */
Assert(flags != VISIBILITYMAP_ALL_FROZEN);
/* Check that we have the right VM page pinned */
if (!BufferIsValid(vmBuf) || BufferGetBlockNumber(vmBuf) != mapBlock)
elog(ERROR, "wrong VM buffer passed to visibilitymap_set");
Assert(BufferIsLockedByMeInMode(vmBuf, BUFFER_LOCK_EXCLUSIVE));
page = BufferGetPage(vmBuf);
map = (uint8 *) PageGetContents(page);
status = (map[mapByte] >> mapOffset) & VISIBILITYMAP_VALID_BITS;
if (flags != status)
{
map[mapByte] |= (flags << mapOffset);
MarkBufferDirty(vmBuf);
}
return status;
}
/* /*
* visibilitymap_get_status - get status of bits * visibilitymap_get_status - get status of bits
* *

View File

@@ -16,6 +16,7 @@
#include "access/heapam_xlog.h" #include "access/heapam_xlog.h"
#include "access/rmgrdesc_utils.h" #include "access/rmgrdesc_utils.h"
#include "access/visibilitymapdefs.h"
#include "storage/standbydefs.h" #include "storage/standbydefs.h"
/* /*
@@ -354,6 +355,11 @@ heap2_desc(StringInfo buf, XLogReaderState *record)
appendStringInfo(buf, "ntuples: %d, flags: 0x%02X", xlrec->ntuples, appendStringInfo(buf, "ntuples: %d, flags: 0x%02X", xlrec->ntuples,
xlrec->flags); xlrec->flags);
if (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)
appendStringInfo(buf, ", vm_flags: 0x%02X",
VISIBILITYMAP_ALL_VISIBLE |
VISIBILITYMAP_ALL_FROZEN);
if (XLogRecHasBlockData(record, 0) && !isinit) if (XLogRecHasBlockData(record, 0) && !isinit)
{ {
appendStringInfoString(buf, ", offsets:"); appendStringInfoString(buf, ", offsets:");

View File

@@ -18,6 +18,7 @@
#include "access/xlogdefs.h" #include "access/xlogdefs.h"
#include "storage/block.h" #include "storage/block.h"
#include "storage/buf.h" #include "storage/buf.h"
#include "storage/relfilelocator.h"
#include "utils/relcache.h" #include "utils/relcache.h"
/* Macros for visibilitymap test */ /* Macros for visibilitymap test */
@@ -37,6 +38,9 @@ extern uint8 visibilitymap_set(Relation rel,
Buffer vmBuf, Buffer vmBuf,
TransactionId cutoff_xid, TransactionId cutoff_xid,
uint8 flags); uint8 flags);
extern uint8 visibilitymap_set_vmbits(BlockNumber heapBlk,
Buffer vmBuf, uint8 flags,
const RelFileLocator rlocator);
extern uint8 visibilitymap_get_status(Relation rel, BlockNumber heapBlk, Buffer *vmbuf); extern uint8 visibilitymap_get_status(Relation rel, BlockNumber heapBlk, Buffer *vmbuf);
extern void visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen); extern void visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen);
extern BlockNumber visibilitymap_prepare_truncate(Relation rel, extern BlockNumber visibilitymap_prepare_truncate(Relation rel,