diff --git a/contrib/pgstattuple/pgstatapprox.c b/contrib/pgstattuple/pgstatapprox.c
index 001988bf00a..5d08c7377c5 100644
--- a/contrib/pgstattuple/pgstatapprox.c
+++ b/contrib/pgstattuple/pgstatapprox.c
@@ -87,7 +87,7 @@ statapprox_heap(Relation rel, output_type *stat)
* If the page has only visible tuples, then we can find out the free
* space from the FSM and move on.
*/
- if (visibilitymap_test(rel, blkno, &vmbuffer))
+ if (VM_ALL_VISIBLE(rel, blkno, &vmbuffer))
{
freespace = GetRecordedFreeSpace(rel, blkno);
stat->tuple_len += BLCKSZ - freespace;
diff --git a/doc/src/sgml/storage.sgml b/doc/src/sgml/storage.sgml
index 164d08ce490..e2be43e63df 100644
--- a/doc/src/sgml/storage.sgml
+++ b/doc/src/sgml/storage.sgml
@@ -623,7 +623,8 @@ can be used to examine the information stored in free space maps.
Each heap relation has a Visibility Map
(VM) to keep track of which pages contain only tuples that are known to be
-visible to all active transactions. It's stored
+visible to all active transactions; it also keeps track of which pages contain
+only unfrozen tuples. It's stored
alongside the main relation data in a separate relation fork, named after the
filenode number of the relation, plus a _vm> suffix. For example,
if the filenode of a relation is 12345, the VM is stored in a file called
@@ -632,11 +633,12 @@ Note that indexes do not have VMs.
-The visibility map simply stores one bit per heap page. A set bit means
-that all tuples on the page are known to be visible to all transactions.
-This means that the page does not contain any tuples that need to be vacuumed.
+The visibility map stores two bits per heap page. The first bit, if set,
+indicates that the page is all-visible, or in other words that the page does
+not contain any tuples that need to be vacuumed.
This information can also be used by index-only scans> to answer
queries using only the index tuple.
+The second bit, if set, means that all tuples on the page have been frozen.
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index f4437428cb3..8a64321fe49 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -6951,6 +6951,55 @@ ConditionalMultiXactIdWait(MultiXactId multi, MultiXactStatus status,
rel, NULL, XLTW_None, remaining);
}
+/*
+ * heap_tuple_needs_eventual_freeze
+ *
+ * Check to see whether any of the XID fields of a tuple (xmin, xmax, xvac)
+ * will eventually require freezing. Similar to heap_tuple_needs_freeze,
+ * but there's no cutoff, since we're trying to figure out whether freezing
+ * will ever be needed, not whether it's needed now.
+ */
+bool
+heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple)
+{
+ TransactionId xid;
+
+ /*
+ * If xmin is a normal transaction ID, this tuple is definitely not
+ * frozen.
+ */
+ xid = HeapTupleHeaderGetXmin(tuple);
+ if (TransactionIdIsNormal(xid))
+ return true;
+
+ /*
+ * If xmax is a valid xact or multixact, this tuple is also not frozen.
+ */
+ if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
+ {
+ MultiXactId multi;
+
+ multi = HeapTupleHeaderGetRawXmax(tuple);
+ if (MultiXactIdIsValid(multi))
+ return true;
+ }
+ else
+ {
+ xid = HeapTupleHeaderGetRawXmax(tuple);
+ if (TransactionIdIsNormal(xid))
+ return true;
+ }
+
+ if (tuple->t_infomask & HEAP_MOVED)
+ {
+ xid = HeapTupleHeaderGetXvac(tuple);
+ if (TransactionIdIsNormal(xid))
+ return true;
+ }
+
+ return false;
+}
+
/*
* heap_tuple_needs_freeze
*
@@ -7205,7 +7254,7 @@ log_heap_freeze(Relation reln, Buffer buffer, TransactionId cutoff_xid,
*/
XLogRecPtr
log_heap_visible(RelFileNode rnode, Buffer heap_buffer, Buffer vm_buffer,
- TransactionId cutoff_xid)
+ TransactionId cutoff_xid, uint8 vmflags)
{
xl_heap_visible xlrec;
XLogRecPtr recptr;
@@ -7215,6 +7264,7 @@ log_heap_visible(RelFileNode rnode, Buffer heap_buffer, Buffer vm_buffer,
Assert(BufferIsValid(vm_buffer));
xlrec.cutoff_xid = cutoff_xid;
+ xlrec.flags = vmflags;
XLogBeginInsert();
XLogRegisterData((char *) &xlrec, SizeOfHeapVisible);
@@ -7804,7 +7854,12 @@ heap_xlog_visible(XLogReaderState *record)
* the subsequent update won't be replayed to clear the flag.
*/
page = BufferGetPage(buffer);
- PageSetAllVisible(page);
+
+ if (xlrec->flags & VISIBILITYMAP_ALL_VISIBLE)
+ PageSetAllVisible(page);
+ if (xlrec->flags & VISIBILITYMAP_ALL_FROZEN)
+ PageSetAllFrozen(page);
+
MarkBufferDirty(buffer);
}
else if (action == BLK_RESTORED)
@@ -7856,7 +7911,7 @@ heap_xlog_visible(XLogReaderState *record)
*/
if (lsn > PageGetLSN(vmpage))
visibilitymap_set(reln, blkno, InvalidBuffer, lsn, vmbuffer,
- xlrec->cutoff_xid);
+ xlrec->cutoff_xid, xlrec->flags);
ReleaseBuffer(vmbuffer);
FreeFakeRelcacheEntry(reln);
diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c
index fc28f3f8c5d..2e64fc3dfe8 100644
--- a/src/backend/access/heap/visibilitymap.c
+++ b/src/backend/access/heap/visibilitymap.c
@@ -15,39 +15,42 @@
* visibilitymap_pin - pin a map page for setting a bit
* visibilitymap_pin_ok - check whether correct map page is already pinned
* visibilitymap_set - set a bit in a previously pinned page
- * visibilitymap_test - test if a bit is set
+ * visibilitymap_get_status - get status of bits
* visibilitymap_count - count number of bits set in visibility map
* visibilitymap_truncate - truncate the visibility map
*
* NOTES
*
- * The visibility map is a bitmap with one bit per heap page. A set bit means
- * that all tuples on the page are known visible to all transactions, and
- * therefore the page doesn't need to be vacuumed. The map is conservative in
- * the sense that we make sure that whenever a bit is set, we know the
- * condition is true, but if a bit is not set, it might or might not be true.
+ * The visibility map is a bitmap with two bits (all-visible and all-frozen)
+ * per heap page. A set all-visible bit means that all tuples on the page are
+ * known visible to all transactions, and therefore the page doesn't need to
+ * be vacuumed. A set all-frozen bit means that all tuples on the page are
+ * completely frozen, and therefore the page doesn't need to be vacuumed even
+ * if whole table scanning vacuum is required (e.g. anti-wraparound vacuum).
+ * The all-frozen bit must be set only when the page is already all-visible.
*
- * Clearing a visibility map bit is not separately WAL-logged. The callers
+ * The map is conservative in the sense that we make sure that whenever a bit
+ * is set, we know the condition is true, but if a bit is not set, it might or
+ * might not be true.
+ *
+ * Clearing both visibility map bits is not separately WAL-logged. The callers
* must make sure that whenever a bit is cleared, the bit is cleared on WAL
* replay of the updating operation as well.
*
* When we *set* a visibility map during VACUUM, we must write WAL. This may
* seem counterintuitive, since the bit is basically a hint: if it is clear,
- * it may still be the case that every tuple on the page is visible to all
- * transactions; we just don't know that for certain. The difficulty is that
- * there are two bits which are typically set together: the PD_ALL_VISIBLE bit
- * on the page itself, and the visibility map bit. If a crash occurs after the
- * visibility map page makes it to disk and before the updated heap page makes
- * it to disk, redo must set the bit on the heap page. Otherwise, the next
- * insert, update, or delete on the heap page will fail to realize that the
- * visibility map bit must be cleared, possibly causing index-only scans to
- * return wrong answers.
+ * it may still be the case that every tuple on the page is all-visible or
+ * all-frozen we just don't know that for certain. The difficulty is that
+ * there are two bits which are typically set together: the PD_ALL_VISIBLE
+ * or PD_ALL_FROZEN bit on the page itself, and the corresponding visibility
+ * map bit. If a crash occurs after the visibility map page makes it to disk
+ * and before the updated heap page makes it to disk, redo must set the bit on
+ * the heap page. Otherwise, the next insert, update, or delete on the heap
+ * page will fail to realize that the visibility map bit must be cleared,
+ * possibly causing index-only scans to return wrong answers.
*
* VACUUM will normally skip pages for which the visibility map bit is set;
* such pages can't contain any dead tuples and therefore don't need vacuuming.
- * The visibility map is not used for anti-wraparound vacuums, because
- * an anti-wraparound vacuum needs to freeze tuples and observe the latest xid
- * present in the table, even on pages that don't have any dead tuples.
*
* LOCKING
*
@@ -101,38 +104,50 @@
*/
#define MAPSIZE (BLCKSZ - MAXALIGN(SizeOfPageHeaderData))
-/* Number of bits allocated for each heap block. */
-#define BITS_PER_HEAPBLOCK 1
-
-/* Number of heap blocks we can represent in one byte. */
-#define HEAPBLOCKS_PER_BYTE 8
-
/* Number of heap blocks we can represent in one visibility map page. */
#define HEAPBLOCKS_PER_PAGE (MAPSIZE * HEAPBLOCKS_PER_BYTE)
/* Mapping from heap block number to the right bit in the visibility map */
#define HEAPBLK_TO_MAPBLOCK(x) ((x) / HEAPBLOCKS_PER_PAGE)
#define HEAPBLK_TO_MAPBYTE(x) (((x) % HEAPBLOCKS_PER_PAGE) / HEAPBLOCKS_PER_BYTE)
-#define HEAPBLK_TO_MAPBIT(x) ((x) % HEAPBLOCKS_PER_BYTE)
+#define HEAPBLK_TO_MAPBIT(x) (((x) % HEAPBLOCKS_PER_BYTE) * BITS_PER_HEAPBLOCK)
-/* table for fast counting of set bits */
-static const uint8 number_of_ones[256] = {
- 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
- 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
- 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
- 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
- 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
- 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
- 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
- 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
- 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
- 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
+/* tables for fast counting of set bits for visible and frozen */
+static const uint8 number_of_ones_for_visible[256] = {
+ 0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2,
+ 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
+ 0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2,
+ 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
+ 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
+ 2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4,
+ 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
+ 2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4,
+ 0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2,
+ 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
+ 0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2,
+ 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
+ 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
+ 2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4,
+ 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
+ 2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4
+};
+static const uint8 number_of_ones_for_frozen[256] = {
+ 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2,
+ 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2,
+ 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
+ 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
+ 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2,
+ 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2,
+ 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
+ 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
+ 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
+ 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
+ 2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4,
+ 2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4,
+ 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
+ 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
+ 2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4,
+ 2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4
};
/* prototypes for internal routines */
@@ -141,7 +156,7 @@ static void vm_extend(Relation rel, BlockNumber nvmblocks);
/*
- * visibilitymap_clear - clear a bit in visibility map
+ * visibilitymap_clear - clear all bits in visibility map
*
* You must pass a buffer containing the correct map page to this function.
* Call visibilitymap_pin first to pin the right one. This function doesn't do
@@ -153,7 +168,7 @@ visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer buf)
BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
int mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);
int mapBit = HEAPBLK_TO_MAPBIT(heapBlk);
- uint8 mask = 1 << mapBit;
+ uint8 mask = VISIBILITYMAP_VALID_BITS << mapBit;
char *map;
#ifdef TRACE_VISIBILITYMAP
@@ -186,7 +201,7 @@ visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer buf)
* visibilitymap_set to actually set the bit.
*
* On entry, *buf should be InvalidBuffer or a valid buffer returned by
- * an earlier call to visibilitymap_pin or visibilitymap_test on the same
+ * an earlier call to visibilitymap_pin or visibilitymap_get_status on the same
* relation. On return, *buf is a valid buffer with the map page containing
* the bit for heapBlk.
*
@@ -212,7 +227,7 @@ visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *buf)
* visibilitymap_pin_ok - do we already have the correct page pinned?
*
* On entry, buf should be InvalidBuffer or a valid buffer returned by
- * an earlier call to visibilitymap_pin or visibilitymap_test on the same
+ * an earlier call to visibilitymap_pin or visibilitymap_get_status on the same
* relation. The return value indicates whether the buffer covers the
* given heapBlk.
*/
@@ -225,19 +240,22 @@ visibilitymap_pin_ok(BlockNumber heapBlk, Buffer buf)
}
/*
- * visibilitymap_set - set a bit on a previously pinned page
+ * visibilitymap_set - set bit(s) on a previously pinned page
*
* recptr is the LSN of the XLOG record we're replaying, if we're in recovery,
* or InvalidXLogRecPtr in normal running. The page LSN is advanced to the
* one provided; in normal running, we generate a new XLOG record and set the
* page LSN to that value. cutoff_xid is the largest xmin on the page being
* marked all-visible; it is needed for Hot Standby, and can be
- * InvalidTransactionId if the page contains no tuples.
+ * InvalidTransactionId if the page contains no tuples. It can also be set
+ * to InvalidTransactionId when a page that is already all-visible is being
+ * marked all-frozen.
*
- * Caller is expected to set the heap page's PD_ALL_VISIBLE bit before calling
- * this function. Except in recovery, caller should also pass the heap
- * buffer. When checksums are enabled and we're not in recovery, we must add
- * the heap buffer to the WAL chain to protect it from being torn.
+ * Caller is expected to set the heap page's PD_ALL_VISIBLE or PD_ALL_FROZEN
+ * bit before calling this function. Except in recovery, caller should also
+ * pass the heap buffer and flags which indicates what flag we want to set.
+ * When checksums are enabled and we're not in recovery, we must add the heap
+ * buffer to the WAL chain to protect it from being torn.
*
* You must pass a buffer containing the correct map page to this function.
* Call visibilitymap_pin first to pin the right one. This function doesn't do
@@ -245,13 +263,14 @@ visibilitymap_pin_ok(BlockNumber heapBlk, Buffer buf)
*/
void
visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
- XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid)
+ XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid,
+ uint8 flags)
{
BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
uint32 mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);
uint8 mapBit = HEAPBLK_TO_MAPBIT(heapBlk);
Page page;
- char *map;
+ uint8 *map;
#ifdef TRACE_VISIBILITYMAP
elog(DEBUG1, "vm_set %s %d", RelationGetRelationName(rel), heapBlk);
@@ -259,6 +278,7 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
Assert(InRecovery || XLogRecPtrIsInvalid(recptr));
Assert(InRecovery || BufferIsValid(heapBuf));
+ Assert(flags & VISIBILITYMAP_VALID_BITS);
/* Check that we have the right heap page pinned, if present */
if (BufferIsValid(heapBuf) && BufferGetBlockNumber(heapBuf) != heapBlk)
@@ -269,14 +289,14 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
elog(ERROR, "wrong VM buffer passed to visibilitymap_set");
page = BufferGetPage(vmBuf);
- map = PageGetContents(page);
+ map = (uint8 *)PageGetContents(page);
LockBuffer(vmBuf, BUFFER_LOCK_EXCLUSIVE);
- if (!(map[mapByte] & (1 << mapBit)))
+ if (flags != (map[mapByte] >> mapBit & VISIBILITYMAP_VALID_BITS))
{
START_CRIT_SECTION();
- map[mapByte] |= (1 << mapBit);
+ map[mapByte] |= (flags << mapBit);
MarkBufferDirty(vmBuf);
if (RelationNeedsWAL(rel))
@@ -285,7 +305,7 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
{
Assert(!InRecovery);
recptr = log_heap_visible(rel->rd_node, heapBuf, vmBuf,
- cutoff_xid);
+ cutoff_xid, flags);
/*
* If data checksums are enabled (or wal_log_hints=on), we
@@ -295,8 +315,10 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
{
Page heapPage = BufferGetPage(heapBuf);
- /* caller is expected to set PD_ALL_VISIBLE first */
- Assert(PageIsAllVisible(heapPage));
+ /* Caller is expected to set page-level bits first. */
+ Assert((flags & VISIBILITYMAP_ALL_VISIBLE) == 0 || PageIsAllVisible(heapPage));
+ Assert((flags & VISIBILITYMAP_ALL_FROZEN) == 0 || PageIsAllFrozen(heapPage));
+
PageSetLSN(heapPage, recptr);
}
}
@@ -310,15 +332,17 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
}
/*
- * visibilitymap_test - test if a bit is set
+ * visibilitymap_get_status - get status of bits
*
- * Are all tuples on heapBlk visible to all, according to the visibility map?
+ * Are all tuples on heapBlk visible to all or are marked frozen, according
+ * to the visibility map?
*
* On entry, *buf should be InvalidBuffer or a valid buffer returned by an
- * earlier call to visibilitymap_pin or visibilitymap_test on the same
+ * earlier call to visibilitymap_pin or visibilitymap_get_status on the same
* relation. On return, *buf is a valid buffer with the map page containing
* the bit for heapBlk, or InvalidBuffer. The caller is responsible for
- * releasing *buf after it's done testing and setting bits.
+ * releasing *buf after it's done testing and setting bits, and must pass flags
+ * for which it needs to check the value in visibility map.
*
* NOTE: This function is typically called without a lock on the heap page,
* so somebody else could change the bit just after we look at it. In fact,
@@ -327,17 +351,16 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
* we might see the old value. It is the caller's responsibility to deal with
* all concurrency issues!
*/
-bool
-visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *buf)
+uint8
+visibilitymap_get_status(Relation rel, BlockNumber heapBlk, Buffer *buf)
{
BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
uint32 mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);
uint8 mapBit = HEAPBLK_TO_MAPBIT(heapBlk);
- bool result;
char *map;
#ifdef TRACE_VISIBILITYMAP
- elog(DEBUG1, "vm_test %s %d", RelationGetRelationName(rel), heapBlk);
+ elog(DEBUG1, "vm_get_status %s %d", RelationGetRelationName(rel), heapBlk);
#endif
/* Reuse the old pinned buffer if possible */
@@ -360,13 +383,11 @@ visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *buf)
map = PageGetContents(BufferGetPage(*buf));
/*
- * A single-bit read is atomic. There could be memory-ordering effects
+ * A single byte read is atomic. There could be memory-ordering effects
* here, but for performance reasons we make it the caller's job to worry
* about that.
*/
- result = (map[mapByte] & (1 << mapBit)) ? true : false;
-
- return result;
+ return ((map[mapByte] >> mapBit) & VISIBILITYMAP_VALID_BITS);
}
/*
@@ -374,14 +395,20 @@ visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *buf)
*
* Note: we ignore the possibility of race conditions when the table is being
* extended concurrently with the call. New pages added to the table aren't
- * going to be marked all-visible, so they won't affect the result.
+ * going to be marked all-visible or all-frozen, so they won't affect the result.
*/
-BlockNumber
-visibilitymap_count(Relation rel)
+void
+visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen)
{
- BlockNumber result = 0;
BlockNumber mapBlock;
+ /* all_visible must be specified */
+ Assert(all_visible);
+
+ *all_visible = 0;
+ if (all_frozen)
+ *all_frozen = 0;
+
for (mapBlock = 0;; mapBlock++)
{
Buffer mapBuffer;
@@ -406,13 +433,13 @@ visibilitymap_count(Relation rel)
for (i = 0; i < MAPSIZE; i++)
{
- result += number_of_ones[map[i]];
+ *all_visible += number_of_ones_for_visible[map[i]];
+ if (all_frozen)
+ *all_frozen += number_of_ones_for_frozen[map[i]];
}
ReleaseBuffer(mapBuffer);
}
-
- return result;
}
/*
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index 8898b55d360..31a1438d4aa 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -1920,7 +1920,7 @@ index_update_stats(Relation rel,
BlockNumber relallvisible;
if (rd_rel->relkind != RELKIND_INDEX)
- relallvisible = visibilitymap_count(rel);
+ visibilitymap_count(rel, &relallvisible, NULL);
else /* don't bother for indexes */
relallvisible = 0;
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index 070df29bb2f..8a5f07c957c 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -569,14 +569,20 @@ do_analyze_rel(Relation onerel, int options, VacuumParams *params,
* inherited stats.
*/
if (!inh)
+ {
+ BlockNumber relallvisible;
+
+ visibilitymap_count(onerel, &relallvisible, NULL);
+
vac_update_relstats(onerel,
relpages,
totalrows,
- visibilitymap_count(onerel),
+ relallvisible,
hasindex,
InvalidTransactionId,
InvalidMultiXactId,
in_outer_xact);
+ }
/*
* Same for indexes. Vacuum always scans all indexes, so if we're part of
diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c
index 4f6f6e7782d..8f7b2486e00 100644
--- a/src/backend/commands/vacuumlazy.c
+++ b/src/backend/commands/vacuumlazy.c
@@ -157,7 +157,7 @@ static void lazy_record_dead_tuple(LVRelStats *vacrelstats,
static bool lazy_tid_reaped(ItemPointer itemptr, void *state);
static int vac_cmp_itemptr(const void *left, const void *right);
static bool heap_page_is_all_visible(Relation rel, Buffer buf,
- TransactionId *visibility_cutoff_xid);
+ TransactionId *visibility_cutoff_xid, bool *all_frozen);
/*
@@ -295,7 +295,7 @@ lazy_vacuum_rel(Relation onerel, int options, VacuumParams *params,
new_rel_tuples = vacrelstats->old_rel_tuples;
}
- new_rel_allvisible = visibilitymap_count(onerel);
+ visibilitymap_count(onerel, &new_rel_allvisible, NULL);
if (new_rel_allvisible > new_rel_pages)
new_rel_allvisible = new_rel_pages;
@@ -496,7 +496,7 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
* maintain next_not_all_visible_block anyway, so as to set up the
* all_visible_according_to_vm flag correctly for each page.
*
- * Note: The value returned by visibilitymap_test could be slightly
+ * Note: The value returned by visibilitymap_get_status could be slightly
* out-of-date, since we make this test before reading the corresponding
* heap page or locking the buffer. This is OK. If we mistakenly think
* that the page is all-visible when in fact the flag's just been cleared,
@@ -518,7 +518,7 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
next_not_all_visible_block < nblocks;
next_not_all_visible_block++)
{
- if (!visibilitymap_test(onerel, next_not_all_visible_block, &vmbuffer))
+ if (!VM_ALL_VISIBLE(onerel, next_not_all_visible_block, &vmbuffer))
break;
vacuum_delay_point();
}
@@ -540,6 +540,7 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
Size freespace;
bool all_visible_according_to_vm;
bool all_visible;
+ bool all_frozen = true; /* provided all_visible is also true */
bool has_dead_tuples;
TransactionId visibility_cutoff_xid = InvalidTransactionId;
@@ -554,8 +555,7 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
next_not_all_visible_block < nblocks;
next_not_all_visible_block++)
{
- if (!visibilitymap_test(onerel, next_not_all_visible_block,
- &vmbuffer))
+ if (!VM_ALL_VISIBLE(onerel, next_not_all_visible_block, &vmbuffer))
break;
vacuum_delay_point();
}
@@ -743,7 +743,7 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
empty_pages++;
freespace = PageGetHeapFreeSpace(page);
- /* empty pages are always all-visible */
+ /* empty pages are always all-visible and all-frozen */
if (!PageIsAllVisible(page))
{
START_CRIT_SECTION();
@@ -766,8 +766,10 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
log_newpage_buffer(buf, true);
PageSetAllVisible(page);
+ PageSetAllFrozen(page);
visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr,
- vmbuffer, InvalidTransactionId);
+ vmbuffer, InvalidTransactionId,
+ VISIBILITYMAP_ALL_VISIBLE | VISIBILITYMAP_ALL_FROZEN);
END_CRIT_SECTION();
}
@@ -954,6 +956,8 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
if (heap_prepare_freeze_tuple(tuple.t_data, FreezeLimit,
MultiXactCutoff, &frozen[nfrozen]))
frozen[nfrozen++].offset = offnum;
+ else if (heap_tuple_needs_eventual_freeze(tuple.t_data))
+ all_frozen = false;
}
} /* scan along page */
@@ -1018,6 +1022,8 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
/* mark page all-visible, if appropriate */
if (all_visible && !all_visible_according_to_vm)
{
+ uint8 flags = VISIBILITYMAP_ALL_VISIBLE;
+
/*
* It should never be the case that the visibility map page is set
* while the page-level bit is clear, but the reverse is allowed
@@ -1032,9 +1038,14 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
* rare cases after a crash, it is not worth optimizing.
*/
PageSetAllVisible(page);
+ if (all_frozen)
+ {
+ PageSetAllFrozen(page);
+ flags |= VISIBILITYMAP_ALL_FROZEN;
+ }
MarkBufferDirty(buf);
visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr,
- vmbuffer, visibility_cutoff_xid);
+ vmbuffer, visibility_cutoff_xid, flags);
}
/*
@@ -1045,7 +1056,7 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
* that something bad has happened.
*/
else if (all_visible_according_to_vm && !PageIsAllVisible(page)
- && visibilitymap_test(onerel, blkno, &vmbuffer))
+ && VM_ALL_VISIBLE(onerel, blkno, &vmbuffer))
{
elog(WARNING, "page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u",
relname, blkno);
@@ -1074,6 +1085,28 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
visibilitymap_clear(onerel, blkno, vmbuffer);
}
+ /*
+ * If the page is marked as all-visible but not all-frozen, we should
+ * so mark it. Note that all_frozen is only valid if all_visible is
+ * true, so we must check both.
+ */
+ else if (all_visible_according_to_vm && all_visible && all_frozen &&
+ !VM_ALL_FROZEN(onerel, blkno, &vmbuffer))
+ {
+ /* Page is marked all-visible but should be all-frozen */
+ PageSetAllFrozen(page);
+ MarkBufferDirty(buf);
+
+ /*
+ * We can pass InvalidTransactionId as the cutoff XID here,
+ * because setting the all-frozen bit doesn't cause recovery
+ * conflicts.
+ */
+ visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr,
+ vmbuffer, InvalidTransactionId,
+ VISIBILITYMAP_ALL_FROZEN);
+ }
+
UnlockReleaseBuffer(buf);
/* Remember the location of the last page with nonremovable tuples */
@@ -1257,6 +1290,7 @@ lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
OffsetNumber unused[MaxOffsetNumber];
int uncnt = 0;
TransactionId visibility_cutoff_xid;
+ bool all_frozen;
START_CRIT_SECTION();
@@ -1308,19 +1342,34 @@ lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
* dirty, exclusively locked, and, if needed, a full page image has been
* emitted in the log_heap_clean() above.
*/
- if (heap_page_is_all_visible(onerel, buffer, &visibility_cutoff_xid))
+ if (heap_page_is_all_visible(onerel, buffer, &visibility_cutoff_xid,
+ &all_frozen))
+ {
PageSetAllVisible(page);
+ if (all_frozen)
+ PageSetAllFrozen(page);
+ }
/*
* All the changes to the heap page have been done. If the all-visible
- * flag is now set, also set the VM bit.
+ * flag is now set, also set the VM all-visible bit (and, if possible,
+ * the all-frozen bit) unless this has already been done previously.
*/
- if (PageIsAllVisible(page) &&
- !visibilitymap_test(onerel, blkno, vmbuffer))
+ if (PageIsAllVisible(page))
{
+ uint8 vm_status = visibilitymap_get_status(onerel, blkno, vmbuffer);
+ uint8 flags = 0;
+
+ /* Set the VM all-frozen bit to flag, if needed */
+ if ((vm_status & VISIBILITYMAP_ALL_VISIBLE) == 0)
+ flags |= VISIBILITYMAP_ALL_VISIBLE;
+ if ((vm_status & VISIBILITYMAP_ALL_FROZEN) == 0 && all_frozen)
+ flags |= VISIBILITYMAP_ALL_FROZEN;
+
Assert(BufferIsValid(*vmbuffer));
- visibilitymap_set(onerel, blkno, buffer, InvalidXLogRecPtr, *vmbuffer,
- visibility_cutoff_xid);
+ if (flags != 0)
+ visibilitymap_set(onerel, blkno, buffer, InvalidXLogRecPtr,
+ *vmbuffer, visibility_cutoff_xid, flags);
}
return tupindex;
@@ -1842,10 +1891,13 @@ vac_cmp_itemptr(const void *left, const void *right)
/*
* Check if every tuple in the given page is visible to all current and future
* transactions. Also return the visibility_cutoff_xid which is the highest
- * xmin amongst the visible tuples.
+ * xmin amongst the visible tuples. Set *all_frozen to true if every tuple
+ * on this page is frozen.
*/
static bool
-heap_page_is_all_visible(Relation rel, Buffer buf, TransactionId *visibility_cutoff_xid)
+heap_page_is_all_visible(Relation rel, Buffer buf,
+ TransactionId *visibility_cutoff_xid,
+ bool *all_frozen)
{
Page page = BufferGetPage(buf);
BlockNumber blockno = BufferGetBlockNumber(buf);
@@ -1854,6 +1906,7 @@ heap_page_is_all_visible(Relation rel, Buffer buf, TransactionId *visibility_cut
bool all_visible = true;
*visibility_cutoff_xid = InvalidTransactionId;
+ *all_frozen = true;
/*
* This is a stripped down version of the line pointer scan in
@@ -1918,6 +1971,11 @@ heap_page_is_all_visible(Relation rel, Buffer buf, TransactionId *visibility_cut
/* Track newest xmin on page. */
if (TransactionIdFollows(xmin, *visibility_cutoff_xid))
*visibility_cutoff_xid = xmin;
+
+ /* Check whether this tuple is already frozen or not */
+ if (all_visible && *all_frozen &&
+ heap_tuple_needs_eventual_freeze(tuple.t_data))
+ *all_frozen = false;
}
break;
@@ -1934,5 +1992,14 @@ heap_page_is_all_visible(Relation rel, Buffer buf, TransactionId *visibility_cut
}
} /* scan along page */
+ /*
+ * We don't bother clearing *all_frozen when the page is discovered not
+ * to be all-visible, so do that now if necessary. The page might fail
+ * to be all-frozen for other reasons anyway, but if it's not all-visible,
+ * then it definitely isn't all-frozen.
+ */
+ if (!all_visible)
+ *all_frozen = false;
+
return all_visible;
}
diff --git a/src/backend/executor/nodeIndexonlyscan.c b/src/backend/executor/nodeIndexonlyscan.c
index 90afbdca652..4f6f91c8dba 100644
--- a/src/backend/executor/nodeIndexonlyscan.c
+++ b/src/backend/executor/nodeIndexonlyscan.c
@@ -85,9 +85,9 @@ IndexOnlyNext(IndexOnlyScanState *node)
* which all tuples are known visible to everybody. In any case,
* we'll use the index tuple not the heap tuple as the data source.
*
- * Note on Memory Ordering Effects: visibilitymap_test does not lock
- * the visibility map buffer, and therefore the result we read here
- * could be slightly stale. However, it can't be stale enough to
+ * Note on Memory Ordering Effects: visibilitymap_get_status does not
+ * lock the visibility map buffer, and therefore the result we read
+ * here could be slightly stale. However, it can't be stale enough to
* matter.
*
* We need to detect clearing a VM bit due to an insert right away,
@@ -114,9 +114,9 @@ IndexOnlyNext(IndexOnlyScanState *node)
* It's worth going through this complexity to avoid needing to lock
* the VM buffer, which could cause significant contention.
*/
- if (!visibilitymap_test(scandesc->heapRelation,
- ItemPointerGetBlockNumber(tid),
- &node->ioss_VMBuffer))
+ if (!VM_ALL_VISIBLE(scandesc->heapRelation,
+ ItemPointerGetBlockNumber(tid),
+ &node->ioss_VMBuffer))
{
/*
* Rats, we have to visit the heap to check visibility.
diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c
index 3f5df9a2e96..206fa2d69e6 100644
--- a/src/backend/storage/ipc/standby.c
+++ b/src/backend/storage/ipc/standby.c
@@ -270,8 +270,10 @@ ResolveRecoveryConflictWithSnapshot(TransactionId latestRemovedXid, RelFileNode
* If we get passed InvalidTransactionId then we are a little surprised,
* but it is theoretically possible in normal running. It also happens
* when replaying already applied WAL records after a standby crash or
- * restart. If latestRemovedXid is invalid then there is no conflict. That
- * rule applies across all record types that suffer from this conflict.
+ * restart, or when replaying an XLOG_HEAP2_VISIBLE record that marks as
+ * frozen a page which was already all-visible. If latestRemovedXid is
+ * invalid then there is no conflict. That rule applies across all record
+ * types that suffer from this conflict.
*/
if (!TransactionIdIsValid(latestRemovedXid))
return;
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index a427df5eaa1..b3a595c67e9 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -170,6 +170,7 @@ extern bool heap_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid,
TransactionId cutoff_multi);
extern bool heap_tuple_needs_freeze(HeapTupleHeader tuple, TransactionId cutoff_xid,
MultiXactId cutoff_multi, Buffer buf);
+extern bool heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple);
extern Oid simple_heap_insert(Relation relation, HeapTuple tup);
extern void simple_heap_delete(Relation relation, ItemPointer tid);
diff --git a/src/include/access/heapam_xlog.h b/src/include/access/heapam_xlog.h
index f77489bb78d..ad30217cfbf 100644
--- a/src/include/access/heapam_xlog.h
+++ b/src/include/access/heapam_xlog.h
@@ -320,9 +320,10 @@ typedef struct xl_heap_freeze_page
typedef struct xl_heap_visible
{
TransactionId cutoff_xid;
+ uint8 flags;
} xl_heap_visible;
-#define SizeOfHeapVisible (offsetof(xl_heap_visible, cutoff_xid) + sizeof(TransactionId))
+#define SizeOfHeapVisible (offsetof(xl_heap_visible, flags) + sizeof(uint8))
typedef struct xl_heap_new_cid
{
@@ -389,6 +390,6 @@ extern bool heap_prepare_freeze_tuple(HeapTupleHeader tuple,
extern void heap_execute_freeze_tuple(HeapTupleHeader tuple,
xl_heap_freeze_tuple *xlrec_tp);
extern XLogRecPtr log_heap_visible(RelFileNode rnode, Buffer heap_buffer,
- Buffer vm_buffer, TransactionId cutoff_xid);
+ Buffer vm_buffer, TransactionId cutoff_xid, uint8 flags);
#endif /* HEAPAM_XLOG_H */
diff --git a/src/include/access/visibilitymap.h b/src/include/access/visibilitymap.h
index d447daff7a5..b8dc54c55d2 100644
--- a/src/include/access/visibilitymap.h
+++ b/src/include/access/visibilitymap.h
@@ -19,15 +19,30 @@
#include "storage/buf.h"
#include "utils/relcache.h"
+#define BITS_PER_HEAPBLOCK 2
+#define HEAPBLOCKS_PER_BYTE (BITS_PER_BYTE / BITS_PER_HEAPBLOCK)
+
+/* Flags for bit map */
+#define VISIBILITYMAP_ALL_VISIBLE 0x01
+#define VISIBILITYMAP_ALL_FROZEN 0x02
+#define VISIBILITYMAP_VALID_BITS 0x03 /* OR of all valid visiblitymap flags bits */
+
+/* Macros for visibilitymap test */
+#define VM_ALL_VISIBLE(r, b, v) \
+ ((visibilitymap_get_status((r), (b), (v)) & VISIBILITYMAP_ALL_VISIBLE) != 0)
+#define VM_ALL_FROZEN(r, b, v) \
+ ((visibilitymap_get_status((r), (b), (v)) & VISIBILITYMAP_ALL_FROZEN) != 0)
+
extern void visibilitymap_clear(Relation rel, BlockNumber heapBlk,
Buffer vmbuf);
extern void visibilitymap_pin(Relation rel, BlockNumber heapBlk,
Buffer *vmbuf);
extern bool visibilitymap_pin_ok(BlockNumber heapBlk, Buffer vmbuf);
extern void visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
- XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid);
-extern bool visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *vmbuf);
-extern BlockNumber visibilitymap_count(Relation rel);
+ XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid,
+ uint8 flags);
+extern uint8 visibilitymap_get_status(Relation rel, BlockNumber heapBlk, Buffer *vmbuf);
+extern void visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen);
extern void visibilitymap_truncate(Relation rel, BlockNumber nheapblocks);
#endif /* VISIBILITYMAP_H */
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index aff12d353c3..6795834912a 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 201602221
+#define CATALOG_VERSION_NO 201603011
#endif
diff --git a/src/include/storage/bufpage.h b/src/include/storage/bufpage.h
index 2ce3be765c0..0b023b3d853 100644
--- a/src/include/storage/bufpage.h
+++ b/src/include/storage/bufpage.h
@@ -178,8 +178,10 @@ typedef PageHeaderData *PageHeader;
* tuple? */
#define PD_ALL_VISIBLE 0x0004 /* all tuples on page are visible to
* everyone */
+#define PD_ALL_FROZEN 0x0008 /* all tuples on page are completely
+ frozen */
-#define PD_VALID_FLAG_BITS 0x0007 /* OR of all valid pd_flags bits */
+#define PD_VALID_FLAG_BITS 0x000F /* OR of all valid pd_flags bits */
/*
* Page layout version number 0 is for pre-7.3 Postgres releases.
@@ -367,7 +369,12 @@ typedef PageHeaderData *PageHeader;
#define PageSetAllVisible(page) \
(((PageHeader) (page))->pd_flags |= PD_ALL_VISIBLE)
#define PageClearAllVisible(page) \
- (((PageHeader) (page))->pd_flags &= ~PD_ALL_VISIBLE)
+ (((PageHeader) (page))->pd_flags &= ~(PD_ALL_VISIBLE | PD_ALL_FROZEN))
+
+#define PageIsAllFrozen(page) \
+ (((PageHeader) (page))->pd_flags & PD_ALL_FROZEN)
+#define PageSetAllFrozen(page) \
+ (((PageHeader) (page))->pd_flags |= PD_ALL_FROZEN)
#define PageIsPrunable(page, oldestxmin) \
( \