mirror of
https://github.com/postgres/postgres.git
synced 2025-10-19 15:49:24 +03:00
Eliminate COPY FREEZE use of XLOG_HEAP2_VISIBLE
Instead of emitting a separate WAL XLOG_HEAP2_VISIBLE record for setting bits in the VM, specify the VM block changes in the XLOG_HEAP2_MULTI_INSERT record. This halves the number of WAL records emitted by COPY FREEZE. Author: Melanie Plageman <melanieplageman@gmail.com> Reviewed-by: Andres Freund <andres@anarazel.de> Reviewed-by: Robert Haas <robertmhaas@gmail.com> Reviewed-by: Kirill Reshke <reshkekirill@gmail.com> Discussion: https://postgr.es/m/flat/CAAKRu_ZMw6Npd_qm2KM%2BFwQ3cMOMx1Dh3VMhp8-V7SOLxdK9-g%40mail.gmail.com
This commit is contained in:
@@ -2466,7 +2466,11 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
|
||||
starting_with_empty_page = PageGetMaxOffsetNumber(page) == 0;
|
||||
|
||||
if (starting_with_empty_page && (options & HEAP_INSERT_FROZEN))
|
||||
{
|
||||
all_frozen_set = true;
|
||||
/* Lock the vmbuffer before entering the critical section */
|
||||
LockBuffer(vmbuffer, BUFFER_LOCK_EXCLUSIVE);
|
||||
}
|
||||
|
||||
/* NO EREPORT(ERROR) from here till changes are logged */
|
||||
START_CRIT_SECTION();
|
||||
@@ -2506,7 +2510,8 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
|
||||
* going to add further frozen rows to it.
|
||||
*
|
||||
* If we're only adding already frozen rows to a previously empty
|
||||
* page, mark it as all-visible.
|
||||
* page, mark it as all-frozen and update the visibility map. We're
|
||||
* already holding a pin on the vmbuffer.
|
||||
*/
|
||||
if (PageIsAllVisible(page) && !(options & HEAP_INSERT_FROZEN))
|
||||
{
|
||||
@@ -2517,7 +2522,14 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
|
||||
vmbuffer, VISIBILITYMAP_VALID_BITS);
|
||||
}
|
||||
else if (all_frozen_set)
|
||||
{
|
||||
PageSetAllVisible(page);
|
||||
visibilitymap_set_vmbits(BufferGetBlockNumber(buffer),
|
||||
vmbuffer,
|
||||
VISIBILITYMAP_ALL_VISIBLE |
|
||||
VISIBILITYMAP_ALL_FROZEN,
|
||||
relation->rd_locator);
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX Should we set PageSetPrunable on this page ? See heap_insert()
|
||||
@@ -2565,6 +2577,12 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
|
||||
xlrec->flags = 0;
|
||||
if (all_visible_cleared)
|
||||
xlrec->flags = XLH_INSERT_ALL_VISIBLE_CLEARED;
|
||||
|
||||
/*
|
||||
* We don't have to worry about including a conflict xid in the
|
||||
* WAL record, as HEAP_INSERT_FROZEN intentionally violates
|
||||
* visibility rules.
|
||||
*/
|
||||
if (all_frozen_set)
|
||||
xlrec->flags = XLH_INSERT_ALL_FROZEN_SET;
|
||||
|
||||
@@ -2628,6 +2646,8 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
|
||||
XLogBeginInsert();
|
||||
XLogRegisterData(xlrec, tupledata - scratch.data);
|
||||
XLogRegisterBuffer(0, buffer, REGBUF_STANDARD | bufflags);
|
||||
if (all_frozen_set)
|
||||
XLogRegisterBuffer(1, vmbuffer, 0);
|
||||
|
||||
XLogRegisterBufData(0, tupledata, totaldatalen);
|
||||
|
||||
@@ -2637,26 +2657,17 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
|
||||
recptr = XLogInsert(RM_HEAP2_ID, info);
|
||||
|
||||
PageSetLSN(page, recptr);
|
||||
if (all_frozen_set)
|
||||
{
|
||||
Assert(BufferIsDirty(vmbuffer));
|
||||
PageSetLSN(BufferGetPage(vmbuffer), recptr);
|
||||
}
|
||||
}
|
||||
|
||||
END_CRIT_SECTION();
|
||||
|
||||
/*
|
||||
* If we've frozen everything on the page, update the visibilitymap.
|
||||
* We're already holding pin on the vmbuffer.
|
||||
*/
|
||||
if (all_frozen_set)
|
||||
{
|
||||
/*
|
||||
* It's fine to use InvalidTransactionId here - this is only used
|
||||
* when HEAP_INSERT_FROZEN is specified, which intentionally
|
||||
* violates visibility rules.
|
||||
*/
|
||||
visibilitymap_set(relation, BufferGetBlockNumber(buffer), buffer,
|
||||
InvalidXLogRecPtr, vmbuffer,
|
||||
InvalidTransactionId,
|
||||
VISIBILITYMAP_ALL_VISIBLE | VISIBILITYMAP_ALL_FROZEN);
|
||||
}
|
||||
LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
|
||||
|
||||
UnlockReleaseBuffer(buffer);
|
||||
ndone += nthispage;
|
||||
|
@@ -551,6 +551,7 @@ heap_xlog_multi_insert(XLogReaderState *record)
|
||||
int i;
|
||||
bool isinit = (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) != 0;
|
||||
XLogRedoAction action;
|
||||
Buffer vmbuffer = InvalidBuffer;
|
||||
|
||||
/*
|
||||
* Insertion doesn't overwrite MVCC data, so no conflict processing is
|
||||
@@ -571,11 +572,11 @@ heap_xlog_multi_insert(XLogReaderState *record)
|
||||
if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
|
||||
{
|
||||
Relation reln = CreateFakeRelcacheEntry(rlocator);
|
||||
Buffer vmbuffer = InvalidBuffer;
|
||||
|
||||
visibilitymap_pin(reln, blkno, &vmbuffer);
|
||||
visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
|
||||
ReleaseBuffer(vmbuffer);
|
||||
vmbuffer = InvalidBuffer;
|
||||
FreeFakeRelcacheEntry(reln);
|
||||
}
|
||||
|
||||
@@ -662,6 +663,52 @@ heap_xlog_multi_insert(XLogReaderState *record)
|
||||
if (BufferIsValid(buffer))
|
||||
UnlockReleaseBuffer(buffer);
|
||||
|
||||
buffer = InvalidBuffer;
|
||||
|
||||
/*
|
||||
* Read and update the visibility map (VM) block.
|
||||
*
|
||||
* We must always redo VM changes, even if the corresponding heap page
|
||||
* update was skipped due to the LSN interlock. Each VM block covers
|
||||
* multiple heap pages, so later WAL records may update other bits in the
|
||||
* same block. If this record includes an FPI (full-page image),
|
||||
* subsequent WAL records may depend on it to guard against torn pages.
|
||||
*
|
||||
* Heap page changes are replayed first to preserve the invariant:
|
||||
* PD_ALL_VISIBLE must be set on the heap page if the VM bit is set.
|
||||
*
|
||||
* Note that we released the heap page lock above. During normal
|
||||
* operation, this would be unsafe — a concurrent modification could
|
||||
* clear PD_ALL_VISIBLE while the VM bit remained set, violating the
|
||||
* invariant.
|
||||
*
|
||||
* During recovery, however, no concurrent writers exist. Therefore,
|
||||
* updating the VM without holding the heap page lock is safe enough. This
|
||||
* same approach is taken when replaying xl_heap_visible records (see
|
||||
* heap_xlog_visible()).
|
||||
*/
|
||||
if ((xlrec->flags & XLH_INSERT_ALL_FROZEN_SET) &&
|
||||
XLogReadBufferForRedoExtended(record, 1, RBM_ZERO_ON_ERROR, false,
|
||||
&vmbuffer) == BLK_NEEDS_REDO)
|
||||
{
|
||||
Page vmpage = BufferGetPage(vmbuffer);
|
||||
|
||||
/* initialize the page if it was read as zeros */
|
||||
if (PageIsNew(vmpage))
|
||||
PageInit(vmpage, BLCKSZ, 0);
|
||||
|
||||
visibilitymap_set_vmbits(blkno,
|
||||
vmbuffer,
|
||||
VISIBILITYMAP_ALL_VISIBLE |
|
||||
VISIBILITYMAP_ALL_FROZEN,
|
||||
rlocator);
|
||||
|
||||
PageSetLSN(vmpage, lsn);
|
||||
}
|
||||
|
||||
if (BufferIsValid(vmbuffer))
|
||||
UnlockReleaseBuffer(vmbuffer);
|
||||
|
||||
/*
|
||||
* If the page is running low on free space, update the FSM as well.
|
||||
* Arbitrarily, our definition of "low" is less than 20%. We can't do much
|
||||
|
@@ -14,7 +14,8 @@
|
||||
* visibilitymap_clear - clear bits for one page in the visibility map
|
||||
* visibilitymap_pin - pin a map page for setting a bit
|
||||
* visibilitymap_pin_ok - check whether correct map page is already pinned
|
||||
* visibilitymap_set - set a bit in a previously pinned page
|
||||
* visibilitymap_set - set bit(s) in a previously pinned page and log
|
||||
* visibilitymap_set_vmbits - set bit(s) in a pinned page
|
||||
* visibilitymap_get_status - get status of bits
|
||||
* visibilitymap_count - count number of bits set in visibility map
|
||||
* visibilitymap_prepare_truncate -
|
||||
@@ -322,6 +323,73 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
|
||||
return status;
|
||||
}
|
||||
|
||||
/*
|
||||
* Set VM (visibility map) flags in the VM block in vmBuf.
|
||||
*
|
||||
* This function is intended for callers that log VM changes together
|
||||
* with the heap page modifications that rendered the page all-visible.
|
||||
* Callers that log VM changes separately should use visibilitymap_set().
|
||||
*
|
||||
* vmBuf must be pinned and exclusively locked, and it must cover the VM bits
|
||||
* corresponding to heapBlk.
|
||||
*
|
||||
* In normal operation (not recovery), this must be called inside a critical
|
||||
* section that also applies the necessary heap page changes and, if
|
||||
* applicable, emits WAL.
|
||||
*
|
||||
* The caller is responsible for ensuring consistency between the heap page
|
||||
* and the VM page by holding a pin and exclusive lock on the buffer
|
||||
* containing heapBlk.
|
||||
*
|
||||
* rlocator is used only for debugging messages.
|
||||
*/
|
||||
uint8
|
||||
visibilitymap_set_vmbits(BlockNumber heapBlk,
|
||||
Buffer vmBuf, uint8 flags,
|
||||
const RelFileLocator rlocator)
|
||||
{
|
||||
BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
|
||||
uint32 mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);
|
||||
uint8 mapOffset = HEAPBLK_TO_OFFSET(heapBlk);
|
||||
Page page;
|
||||
uint8 *map;
|
||||
uint8 status;
|
||||
|
||||
#ifdef TRACE_VISIBILITYMAP
|
||||
elog(DEBUG1, "vm_set flags 0x%02X for %s %d",
|
||||
flags,
|
||||
relpathbackend(rlocator, MyProcNumber, MAIN_FORKNUM).str,
|
||||
heapBlk);
|
||||
#endif
|
||||
|
||||
/* Call in same critical section where WAL is emitted. */
|
||||
Assert(InRecovery || CritSectionCount > 0);
|
||||
|
||||
/* Flags should be valid. Also never clear bits with this function */
|
||||
Assert((flags & VISIBILITYMAP_VALID_BITS) == flags);
|
||||
|
||||
/* Must never set all_frozen bit without also setting all_visible bit */
|
||||
Assert(flags != VISIBILITYMAP_ALL_FROZEN);
|
||||
|
||||
/* Check that we have the right VM page pinned */
|
||||
if (!BufferIsValid(vmBuf) || BufferGetBlockNumber(vmBuf) != mapBlock)
|
||||
elog(ERROR, "wrong VM buffer passed to visibilitymap_set");
|
||||
|
||||
Assert(BufferIsLockedByMeInMode(vmBuf, BUFFER_LOCK_EXCLUSIVE));
|
||||
|
||||
page = BufferGetPage(vmBuf);
|
||||
map = (uint8 *) PageGetContents(page);
|
||||
|
||||
status = (map[mapByte] >> mapOffset) & VISIBILITYMAP_VALID_BITS;
|
||||
if (flags != status)
|
||||
{
|
||||
map[mapByte] |= (flags << mapOffset);
|
||||
MarkBufferDirty(vmBuf);
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
/*
|
||||
* visibilitymap_get_status - get status of bits
|
||||
*
|
||||
|
@@ -16,6 +16,7 @@
|
||||
|
||||
#include "access/heapam_xlog.h"
|
||||
#include "access/rmgrdesc_utils.h"
|
||||
#include "access/visibilitymapdefs.h"
|
||||
#include "storage/standbydefs.h"
|
||||
|
||||
/*
|
||||
@@ -354,6 +355,11 @@ heap2_desc(StringInfo buf, XLogReaderState *record)
|
||||
appendStringInfo(buf, "ntuples: %d, flags: 0x%02X", xlrec->ntuples,
|
||||
xlrec->flags);
|
||||
|
||||
if (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)
|
||||
appendStringInfo(buf, ", vm_flags: 0x%02X",
|
||||
VISIBILITYMAP_ALL_VISIBLE |
|
||||
VISIBILITYMAP_ALL_FROZEN);
|
||||
|
||||
if (XLogRecHasBlockData(record, 0) && !isinit)
|
||||
{
|
||||
appendStringInfoString(buf, ", offsets:");
|
||||
|
@@ -18,6 +18,7 @@
|
||||
#include "access/xlogdefs.h"
|
||||
#include "storage/block.h"
|
||||
#include "storage/buf.h"
|
||||
#include "storage/relfilelocator.h"
|
||||
#include "utils/relcache.h"
|
||||
|
||||
/* Macros for visibilitymap test */
|
||||
@@ -37,6 +38,9 @@ extern uint8 visibilitymap_set(Relation rel,
|
||||
Buffer vmBuf,
|
||||
TransactionId cutoff_xid,
|
||||
uint8 flags);
|
||||
extern uint8 visibilitymap_set_vmbits(BlockNumber heapBlk,
|
||||
Buffer vmBuf, uint8 flags,
|
||||
const RelFileLocator rlocator);
|
||||
extern uint8 visibilitymap_get_status(Relation rel, BlockNumber heapBlk, Buffer *vmbuf);
|
||||
extern void visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen);
|
||||
extern BlockNumber visibilitymap_prepare_truncate(Relation rel,
|
||||
|
Reference in New Issue
Block a user