mirror of
https://github.com/postgres/postgres.git
synced 2025-10-19 15:49:24 +03:00
Eliminate COPY FREEZE use of XLOG_HEAP2_VISIBLE
Instead of emitting a separate WAL XLOG_HEAP2_VISIBLE record for setting bits in the VM, specify the VM block changes in the XLOG_HEAP2_MULTI_INSERT record. This halves the number of WAL records emitted by COPY FREEZE. Author: Melanie Plageman <melanieplageman@gmail.com> Reviewed-by: Andres Freund <andres@anarazel.de> Reviewed-by: Robert Haas <robertmhaas@gmail.com> Reviewed-by: Kirill Reshke <reshkekirill@gmail.com> Discussion: https://postgr.es/m/flat/CAAKRu_ZMw6Npd_qm2KM%2BFwQ3cMOMx1Dh3VMhp8-V7SOLxdK9-g%40mail.gmail.com
This commit is contained in:
@@ -2466,7 +2466,11 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
|
|||||||
starting_with_empty_page = PageGetMaxOffsetNumber(page) == 0;
|
starting_with_empty_page = PageGetMaxOffsetNumber(page) == 0;
|
||||||
|
|
||||||
if (starting_with_empty_page && (options & HEAP_INSERT_FROZEN))
|
if (starting_with_empty_page && (options & HEAP_INSERT_FROZEN))
|
||||||
|
{
|
||||||
all_frozen_set = true;
|
all_frozen_set = true;
|
||||||
|
/* Lock the vmbuffer before entering the critical section */
|
||||||
|
LockBuffer(vmbuffer, BUFFER_LOCK_EXCLUSIVE);
|
||||||
|
}
|
||||||
|
|
||||||
/* NO EREPORT(ERROR) from here till changes are logged */
|
/* NO EREPORT(ERROR) from here till changes are logged */
|
||||||
START_CRIT_SECTION();
|
START_CRIT_SECTION();
|
||||||
@@ -2506,7 +2510,8 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
|
|||||||
* going to add further frozen rows to it.
|
* going to add further frozen rows to it.
|
||||||
*
|
*
|
||||||
* If we're only adding already frozen rows to a previously empty
|
* If we're only adding already frozen rows to a previously empty
|
||||||
* page, mark it as all-visible.
|
* page, mark it as all-frozen and update the visibility map. We're
|
||||||
|
* already holding a pin on the vmbuffer.
|
||||||
*/
|
*/
|
||||||
if (PageIsAllVisible(page) && !(options & HEAP_INSERT_FROZEN))
|
if (PageIsAllVisible(page) && !(options & HEAP_INSERT_FROZEN))
|
||||||
{
|
{
|
||||||
@@ -2517,7 +2522,14 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
|
|||||||
vmbuffer, VISIBILITYMAP_VALID_BITS);
|
vmbuffer, VISIBILITYMAP_VALID_BITS);
|
||||||
}
|
}
|
||||||
else if (all_frozen_set)
|
else if (all_frozen_set)
|
||||||
|
{
|
||||||
PageSetAllVisible(page);
|
PageSetAllVisible(page);
|
||||||
|
visibilitymap_set_vmbits(BufferGetBlockNumber(buffer),
|
||||||
|
vmbuffer,
|
||||||
|
VISIBILITYMAP_ALL_VISIBLE |
|
||||||
|
VISIBILITYMAP_ALL_FROZEN,
|
||||||
|
relation->rd_locator);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* XXX Should we set PageSetPrunable on this page ? See heap_insert()
|
* XXX Should we set PageSetPrunable on this page ? See heap_insert()
|
||||||
@@ -2565,6 +2577,12 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
|
|||||||
xlrec->flags = 0;
|
xlrec->flags = 0;
|
||||||
if (all_visible_cleared)
|
if (all_visible_cleared)
|
||||||
xlrec->flags = XLH_INSERT_ALL_VISIBLE_CLEARED;
|
xlrec->flags = XLH_INSERT_ALL_VISIBLE_CLEARED;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We don't have to worry about including a conflict xid in the
|
||||||
|
* WAL record, as HEAP_INSERT_FROZEN intentionally violates
|
||||||
|
* visibility rules.
|
||||||
|
*/
|
||||||
if (all_frozen_set)
|
if (all_frozen_set)
|
||||||
xlrec->flags = XLH_INSERT_ALL_FROZEN_SET;
|
xlrec->flags = XLH_INSERT_ALL_FROZEN_SET;
|
||||||
|
|
||||||
@@ -2628,6 +2646,8 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
|
|||||||
XLogBeginInsert();
|
XLogBeginInsert();
|
||||||
XLogRegisterData(xlrec, tupledata - scratch.data);
|
XLogRegisterData(xlrec, tupledata - scratch.data);
|
||||||
XLogRegisterBuffer(0, buffer, REGBUF_STANDARD | bufflags);
|
XLogRegisterBuffer(0, buffer, REGBUF_STANDARD | bufflags);
|
||||||
|
if (all_frozen_set)
|
||||||
|
XLogRegisterBuffer(1, vmbuffer, 0);
|
||||||
|
|
||||||
XLogRegisterBufData(0, tupledata, totaldatalen);
|
XLogRegisterBufData(0, tupledata, totaldatalen);
|
||||||
|
|
||||||
@@ -2637,26 +2657,17 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
|
|||||||
recptr = XLogInsert(RM_HEAP2_ID, info);
|
recptr = XLogInsert(RM_HEAP2_ID, info);
|
||||||
|
|
||||||
PageSetLSN(page, recptr);
|
PageSetLSN(page, recptr);
|
||||||
|
if (all_frozen_set)
|
||||||
|
{
|
||||||
|
Assert(BufferIsDirty(vmbuffer));
|
||||||
|
PageSetLSN(BufferGetPage(vmbuffer), recptr);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
END_CRIT_SECTION();
|
END_CRIT_SECTION();
|
||||||
|
|
||||||
/*
|
|
||||||
* If we've frozen everything on the page, update the visibilitymap.
|
|
||||||
* We're already holding pin on the vmbuffer.
|
|
||||||
*/
|
|
||||||
if (all_frozen_set)
|
if (all_frozen_set)
|
||||||
{
|
LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
|
||||||
/*
|
|
||||||
* It's fine to use InvalidTransactionId here - this is only used
|
|
||||||
* when HEAP_INSERT_FROZEN is specified, which intentionally
|
|
||||||
* violates visibility rules.
|
|
||||||
*/
|
|
||||||
visibilitymap_set(relation, BufferGetBlockNumber(buffer), buffer,
|
|
||||||
InvalidXLogRecPtr, vmbuffer,
|
|
||||||
InvalidTransactionId,
|
|
||||||
VISIBILITYMAP_ALL_VISIBLE | VISIBILITYMAP_ALL_FROZEN);
|
|
||||||
}
|
|
||||||
|
|
||||||
UnlockReleaseBuffer(buffer);
|
UnlockReleaseBuffer(buffer);
|
||||||
ndone += nthispage;
|
ndone += nthispage;
|
||||||
|
@@ -551,6 +551,7 @@ heap_xlog_multi_insert(XLogReaderState *record)
|
|||||||
int i;
|
int i;
|
||||||
bool isinit = (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) != 0;
|
bool isinit = (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) != 0;
|
||||||
XLogRedoAction action;
|
XLogRedoAction action;
|
||||||
|
Buffer vmbuffer = InvalidBuffer;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Insertion doesn't overwrite MVCC data, so no conflict processing is
|
* Insertion doesn't overwrite MVCC data, so no conflict processing is
|
||||||
@@ -571,11 +572,11 @@ heap_xlog_multi_insert(XLogReaderState *record)
|
|||||||
if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
|
if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
|
||||||
{
|
{
|
||||||
Relation reln = CreateFakeRelcacheEntry(rlocator);
|
Relation reln = CreateFakeRelcacheEntry(rlocator);
|
||||||
Buffer vmbuffer = InvalidBuffer;
|
|
||||||
|
|
||||||
visibilitymap_pin(reln, blkno, &vmbuffer);
|
visibilitymap_pin(reln, blkno, &vmbuffer);
|
||||||
visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
|
visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
|
||||||
ReleaseBuffer(vmbuffer);
|
ReleaseBuffer(vmbuffer);
|
||||||
|
vmbuffer = InvalidBuffer;
|
||||||
FreeFakeRelcacheEntry(reln);
|
FreeFakeRelcacheEntry(reln);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -662,6 +663,52 @@ heap_xlog_multi_insert(XLogReaderState *record)
|
|||||||
if (BufferIsValid(buffer))
|
if (BufferIsValid(buffer))
|
||||||
UnlockReleaseBuffer(buffer);
|
UnlockReleaseBuffer(buffer);
|
||||||
|
|
||||||
|
buffer = InvalidBuffer;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Read and update the visibility map (VM) block.
|
||||||
|
*
|
||||||
|
* We must always redo VM changes, even if the corresponding heap page
|
||||||
|
* update was skipped due to the LSN interlock. Each VM block covers
|
||||||
|
* multiple heap pages, so later WAL records may update other bits in the
|
||||||
|
* same block. If this record includes an FPI (full-page image),
|
||||||
|
* subsequent WAL records may depend on it to guard against torn pages.
|
||||||
|
*
|
||||||
|
* Heap page changes are replayed first to preserve the invariant:
|
||||||
|
* PD_ALL_VISIBLE must be set on the heap page if the VM bit is set.
|
||||||
|
*
|
||||||
|
* Note that we released the heap page lock above. During normal
|
||||||
|
* operation, this would be unsafe — a concurrent modification could
|
||||||
|
* clear PD_ALL_VISIBLE while the VM bit remained set, violating the
|
||||||
|
* invariant.
|
||||||
|
*
|
||||||
|
* During recovery, however, no concurrent writers exist. Therefore,
|
||||||
|
* updating the VM without holding the heap page lock is safe enough. This
|
||||||
|
* same approach is taken when replaying xl_heap_visible records (see
|
||||||
|
* heap_xlog_visible()).
|
||||||
|
*/
|
||||||
|
if ((xlrec->flags & XLH_INSERT_ALL_FROZEN_SET) &&
|
||||||
|
XLogReadBufferForRedoExtended(record, 1, RBM_ZERO_ON_ERROR, false,
|
||||||
|
&vmbuffer) == BLK_NEEDS_REDO)
|
||||||
|
{
|
||||||
|
Page vmpage = BufferGetPage(vmbuffer);
|
||||||
|
|
||||||
|
/* initialize the page if it was read as zeros */
|
||||||
|
if (PageIsNew(vmpage))
|
||||||
|
PageInit(vmpage, BLCKSZ, 0);
|
||||||
|
|
||||||
|
visibilitymap_set_vmbits(blkno,
|
||||||
|
vmbuffer,
|
||||||
|
VISIBILITYMAP_ALL_VISIBLE |
|
||||||
|
VISIBILITYMAP_ALL_FROZEN,
|
||||||
|
rlocator);
|
||||||
|
|
||||||
|
PageSetLSN(vmpage, lsn);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (BufferIsValid(vmbuffer))
|
||||||
|
UnlockReleaseBuffer(vmbuffer);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If the page is running low on free space, update the FSM as well.
|
* If the page is running low on free space, update the FSM as well.
|
||||||
* Arbitrarily, our definition of "low" is less than 20%. We can't do much
|
* Arbitrarily, our definition of "low" is less than 20%. We can't do much
|
||||||
|
@@ -14,7 +14,8 @@
|
|||||||
* visibilitymap_clear - clear bits for one page in the visibility map
|
* visibilitymap_clear - clear bits for one page in the visibility map
|
||||||
* visibilitymap_pin - pin a map page for setting a bit
|
* visibilitymap_pin - pin a map page for setting a bit
|
||||||
* visibilitymap_pin_ok - check whether correct map page is already pinned
|
* visibilitymap_pin_ok - check whether correct map page is already pinned
|
||||||
* visibilitymap_set - set a bit in a previously pinned page
|
* visibilitymap_set - set bit(s) in a previously pinned page and log
|
||||||
|
* visibilitymap_set_vmbits - set bit(s) in a pinned page
|
||||||
* visibilitymap_get_status - get status of bits
|
* visibilitymap_get_status - get status of bits
|
||||||
* visibilitymap_count - count number of bits set in visibility map
|
* visibilitymap_count - count number of bits set in visibility map
|
||||||
* visibilitymap_prepare_truncate -
|
* visibilitymap_prepare_truncate -
|
||||||
@@ -322,6 +323,73 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
|
|||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Set VM (visibility map) flags in the VM block in vmBuf.
|
||||||
|
*
|
||||||
|
* This function is intended for callers that log VM changes together
|
||||||
|
* with the heap page modifications that rendered the page all-visible.
|
||||||
|
* Callers that log VM changes separately should use visibilitymap_set().
|
||||||
|
*
|
||||||
|
* vmBuf must be pinned and exclusively locked, and it must cover the VM bits
|
||||||
|
* corresponding to heapBlk.
|
||||||
|
*
|
||||||
|
* In normal operation (not recovery), this must be called inside a critical
|
||||||
|
* section that also applies the necessary heap page changes and, if
|
||||||
|
* applicable, emits WAL.
|
||||||
|
*
|
||||||
|
* The caller is responsible for ensuring consistency between the heap page
|
||||||
|
* and the VM page by holding a pin and exclusive lock on the buffer
|
||||||
|
* containing heapBlk.
|
||||||
|
*
|
||||||
|
* rlocator is used only for debugging messages.
|
||||||
|
*/
|
||||||
|
uint8
|
||||||
|
visibilitymap_set_vmbits(BlockNumber heapBlk,
|
||||||
|
Buffer vmBuf, uint8 flags,
|
||||||
|
const RelFileLocator rlocator)
|
||||||
|
{
|
||||||
|
BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
|
||||||
|
uint32 mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);
|
||||||
|
uint8 mapOffset = HEAPBLK_TO_OFFSET(heapBlk);
|
||||||
|
Page page;
|
||||||
|
uint8 *map;
|
||||||
|
uint8 status;
|
||||||
|
|
||||||
|
#ifdef TRACE_VISIBILITYMAP
|
||||||
|
elog(DEBUG1, "vm_set flags 0x%02X for %s %d",
|
||||||
|
flags,
|
||||||
|
relpathbackend(rlocator, MyProcNumber, MAIN_FORKNUM).str,
|
||||||
|
heapBlk);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Call in same critical section where WAL is emitted. */
|
||||||
|
Assert(InRecovery || CritSectionCount > 0);
|
||||||
|
|
||||||
|
/* Flags should be valid. Also never clear bits with this function */
|
||||||
|
Assert((flags & VISIBILITYMAP_VALID_BITS) == flags);
|
||||||
|
|
||||||
|
/* Must never set all_frozen bit without also setting all_visible bit */
|
||||||
|
Assert(flags != VISIBILITYMAP_ALL_FROZEN);
|
||||||
|
|
||||||
|
/* Check that we have the right VM page pinned */
|
||||||
|
if (!BufferIsValid(vmBuf) || BufferGetBlockNumber(vmBuf) != mapBlock)
|
||||||
|
elog(ERROR, "wrong VM buffer passed to visibilitymap_set");
|
||||||
|
|
||||||
|
Assert(BufferIsLockedByMeInMode(vmBuf, BUFFER_LOCK_EXCLUSIVE));
|
||||||
|
|
||||||
|
page = BufferGetPage(vmBuf);
|
||||||
|
map = (uint8 *) PageGetContents(page);
|
||||||
|
|
||||||
|
status = (map[mapByte] >> mapOffset) & VISIBILITYMAP_VALID_BITS;
|
||||||
|
if (flags != status)
|
||||||
|
{
|
||||||
|
map[mapByte] |= (flags << mapOffset);
|
||||||
|
MarkBufferDirty(vmBuf);
|
||||||
|
}
|
||||||
|
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* visibilitymap_get_status - get status of bits
|
* visibilitymap_get_status - get status of bits
|
||||||
*
|
*
|
||||||
|
@@ -16,6 +16,7 @@
|
|||||||
|
|
||||||
#include "access/heapam_xlog.h"
|
#include "access/heapam_xlog.h"
|
||||||
#include "access/rmgrdesc_utils.h"
|
#include "access/rmgrdesc_utils.h"
|
||||||
|
#include "access/visibilitymapdefs.h"
|
||||||
#include "storage/standbydefs.h"
|
#include "storage/standbydefs.h"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -354,6 +355,11 @@ heap2_desc(StringInfo buf, XLogReaderState *record)
|
|||||||
appendStringInfo(buf, "ntuples: %d, flags: 0x%02X", xlrec->ntuples,
|
appendStringInfo(buf, "ntuples: %d, flags: 0x%02X", xlrec->ntuples,
|
||||||
xlrec->flags);
|
xlrec->flags);
|
||||||
|
|
||||||
|
if (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)
|
||||||
|
appendStringInfo(buf, ", vm_flags: 0x%02X",
|
||||||
|
VISIBILITYMAP_ALL_VISIBLE |
|
||||||
|
VISIBILITYMAP_ALL_FROZEN);
|
||||||
|
|
||||||
if (XLogRecHasBlockData(record, 0) && !isinit)
|
if (XLogRecHasBlockData(record, 0) && !isinit)
|
||||||
{
|
{
|
||||||
appendStringInfoString(buf, ", offsets:");
|
appendStringInfoString(buf, ", offsets:");
|
||||||
|
@@ -18,6 +18,7 @@
|
|||||||
#include "access/xlogdefs.h"
|
#include "access/xlogdefs.h"
|
||||||
#include "storage/block.h"
|
#include "storage/block.h"
|
||||||
#include "storage/buf.h"
|
#include "storage/buf.h"
|
||||||
|
#include "storage/relfilelocator.h"
|
||||||
#include "utils/relcache.h"
|
#include "utils/relcache.h"
|
||||||
|
|
||||||
/* Macros for visibilitymap test */
|
/* Macros for visibilitymap test */
|
||||||
@@ -37,6 +38,9 @@ extern uint8 visibilitymap_set(Relation rel,
|
|||||||
Buffer vmBuf,
|
Buffer vmBuf,
|
||||||
TransactionId cutoff_xid,
|
TransactionId cutoff_xid,
|
||||||
uint8 flags);
|
uint8 flags);
|
||||||
|
extern uint8 visibilitymap_set_vmbits(BlockNumber heapBlk,
|
||||||
|
Buffer vmBuf, uint8 flags,
|
||||||
|
const RelFileLocator rlocator);
|
||||||
extern uint8 visibilitymap_get_status(Relation rel, BlockNumber heapBlk, Buffer *vmbuf);
|
extern uint8 visibilitymap_get_status(Relation rel, BlockNumber heapBlk, Buffer *vmbuf);
|
||||||
extern void visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen);
|
extern void visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen);
|
||||||
extern BlockNumber visibilitymap_prepare_truncate(Relation rel,
|
extern BlockNumber visibilitymap_prepare_truncate(Relation rel,
|
||||||
|
Reference in New Issue
Block a user