mirror of
https://github.com/postgres/postgres.git
synced 2025-10-24 01:29:19 +03:00
Change the format of the VM fork to add a second bit per page.
The new bit indicates whether every tuple on the page is already frozen. It is cleared only when the all-visible bit is cleared, and it can be set only when we vacuum a page and find that every tuple on that page is both visible to every transaction and in no need of any future vacuuming. A future commit will use this new bit to optimize away full-table scans that would otherwise be triggered by XID wraparound considerations. A page which is merely all-visible must still be scanned in that case, but a page which is all-frozen need not be. This commit does not attempt that optimization, although that optimization is the goal here. It seems better to get the basic infrastructure in place first. Per discussion, it's very desirable for pg_upgrade to automatically migrate existing VM forks from the old format to the new format. That, too, will be handled in a follow-on patch. Masahiko Sawada, reviewed by Kyotaro Horiguchi, Fujii Masao, Amit Kapila, Simon Riggs, Andres Freund, and others, and substantially revised by me.
This commit is contained in:
@@ -87,7 +87,7 @@ statapprox_heap(Relation rel, output_type *stat)
|
|||||||
* If the page has only visible tuples, then we can find out the free
|
* If the page has only visible tuples, then we can find out the free
|
||||||
* space from the FSM and move on.
|
* space from the FSM and move on.
|
||||||
*/
|
*/
|
||||||
if (visibilitymap_test(rel, blkno, &vmbuffer))
|
if (VM_ALL_VISIBLE(rel, blkno, &vmbuffer))
|
||||||
{
|
{
|
||||||
freespace = GetRecordedFreeSpace(rel, blkno);
|
freespace = GetRecordedFreeSpace(rel, blkno);
|
||||||
stat->tuple_len += BLCKSZ - freespace;
|
stat->tuple_len += BLCKSZ - freespace;
|
||||||
|
@@ -623,7 +623,8 @@ can be used to examine the information stored in free space maps.
|
|||||||
<para>
|
<para>
|
||||||
Each heap relation has a Visibility Map
|
Each heap relation has a Visibility Map
|
||||||
(VM) to keep track of which pages contain only tuples that are known to be
|
(VM) to keep track of which pages contain only tuples that are known to be
|
||||||
visible to all active transactions. It's stored
|
visible to all active transactions; it also keeps track of which pages contain
|
||||||
|
only unfrozen tuples. It's stored
|
||||||
alongside the main relation data in a separate relation fork, named after the
|
alongside the main relation data in a separate relation fork, named after the
|
||||||
filenode number of the relation, plus a <literal>_vm</> suffix. For example,
|
filenode number of the relation, plus a <literal>_vm</> suffix. For example,
|
||||||
if the filenode of a relation is 12345, the VM is stored in a file called
|
if the filenode of a relation is 12345, the VM is stored in a file called
|
||||||
@@ -632,11 +633,12 @@ Note that indexes do not have VMs.
|
|||||||
</para>
|
</para>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
The visibility map simply stores one bit per heap page. A set bit means
|
The visibility map stores two bits per heap page. The first bit, if set,
|
||||||
that all tuples on the page are known to be visible to all transactions.
|
indicates that the page is all-visible, or in other words that the page does
|
||||||
This means that the page does not contain any tuples that need to be vacuumed.
|
not contain any tuples that need to be vacuumed.
|
||||||
This information can also be used by <firstterm>index-only scans</> to answer
|
This information can also be used by <firstterm>index-only scans</> to answer
|
||||||
queries using only the index tuple.
|
queries using only the index tuple.
|
||||||
|
The second bit, if set, means that all tuples on the page have been frozen.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
|
@@ -6951,6 +6951,55 @@ ConditionalMultiXactIdWait(MultiXactId multi, MultiXactStatus status,
|
|||||||
rel, NULL, XLTW_None, remaining);
|
rel, NULL, XLTW_None, remaining);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* heap_tuple_needs_eventual_freeze
|
||||||
|
*
|
||||||
|
* Check to see whether any of the XID fields of a tuple (xmin, xmax, xvac)
|
||||||
|
* will eventually require freezing. Similar to heap_tuple_needs_freeze,
|
||||||
|
* but there's no cutoff, since we're trying to figure out whether freezing
|
||||||
|
* will ever be needed, not whether it's needed now.
|
||||||
|
*/
|
||||||
|
bool
|
||||||
|
heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple)
|
||||||
|
{
|
||||||
|
TransactionId xid;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If xmin is a normal transaction ID, this tuple is definitely not
|
||||||
|
* frozen.
|
||||||
|
*/
|
||||||
|
xid = HeapTupleHeaderGetXmin(tuple);
|
||||||
|
if (TransactionIdIsNormal(xid))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If xmax is a valid xact or multixact, this tuple is also not frozen.
|
||||||
|
*/
|
||||||
|
if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
|
||||||
|
{
|
||||||
|
MultiXactId multi;
|
||||||
|
|
||||||
|
multi = HeapTupleHeaderGetRawXmax(tuple);
|
||||||
|
if (MultiXactIdIsValid(multi))
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
xid = HeapTupleHeaderGetRawXmax(tuple);
|
||||||
|
if (TransactionIdIsNormal(xid))
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tuple->t_infomask & HEAP_MOVED)
|
||||||
|
{
|
||||||
|
xid = HeapTupleHeaderGetXvac(tuple);
|
||||||
|
if (TransactionIdIsNormal(xid))
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* heap_tuple_needs_freeze
|
* heap_tuple_needs_freeze
|
||||||
*
|
*
|
||||||
@@ -7205,7 +7254,7 @@ log_heap_freeze(Relation reln, Buffer buffer, TransactionId cutoff_xid,
|
|||||||
*/
|
*/
|
||||||
XLogRecPtr
|
XLogRecPtr
|
||||||
log_heap_visible(RelFileNode rnode, Buffer heap_buffer, Buffer vm_buffer,
|
log_heap_visible(RelFileNode rnode, Buffer heap_buffer, Buffer vm_buffer,
|
||||||
TransactionId cutoff_xid)
|
TransactionId cutoff_xid, uint8 vmflags)
|
||||||
{
|
{
|
||||||
xl_heap_visible xlrec;
|
xl_heap_visible xlrec;
|
||||||
XLogRecPtr recptr;
|
XLogRecPtr recptr;
|
||||||
@@ -7215,6 +7264,7 @@ log_heap_visible(RelFileNode rnode, Buffer heap_buffer, Buffer vm_buffer,
|
|||||||
Assert(BufferIsValid(vm_buffer));
|
Assert(BufferIsValid(vm_buffer));
|
||||||
|
|
||||||
xlrec.cutoff_xid = cutoff_xid;
|
xlrec.cutoff_xid = cutoff_xid;
|
||||||
|
xlrec.flags = vmflags;
|
||||||
XLogBeginInsert();
|
XLogBeginInsert();
|
||||||
XLogRegisterData((char *) &xlrec, SizeOfHeapVisible);
|
XLogRegisterData((char *) &xlrec, SizeOfHeapVisible);
|
||||||
|
|
||||||
@@ -7804,7 +7854,12 @@ heap_xlog_visible(XLogReaderState *record)
|
|||||||
* the subsequent update won't be replayed to clear the flag.
|
* the subsequent update won't be replayed to clear the flag.
|
||||||
*/
|
*/
|
||||||
page = BufferGetPage(buffer);
|
page = BufferGetPage(buffer);
|
||||||
PageSetAllVisible(page);
|
|
||||||
|
if (xlrec->flags & VISIBILITYMAP_ALL_VISIBLE)
|
||||||
|
PageSetAllVisible(page);
|
||||||
|
if (xlrec->flags & VISIBILITYMAP_ALL_FROZEN)
|
||||||
|
PageSetAllFrozen(page);
|
||||||
|
|
||||||
MarkBufferDirty(buffer);
|
MarkBufferDirty(buffer);
|
||||||
}
|
}
|
||||||
else if (action == BLK_RESTORED)
|
else if (action == BLK_RESTORED)
|
||||||
@@ -7856,7 +7911,7 @@ heap_xlog_visible(XLogReaderState *record)
|
|||||||
*/
|
*/
|
||||||
if (lsn > PageGetLSN(vmpage))
|
if (lsn > PageGetLSN(vmpage))
|
||||||
visibilitymap_set(reln, blkno, InvalidBuffer, lsn, vmbuffer,
|
visibilitymap_set(reln, blkno, InvalidBuffer, lsn, vmbuffer,
|
||||||
xlrec->cutoff_xid);
|
xlrec->cutoff_xid, xlrec->flags);
|
||||||
|
|
||||||
ReleaseBuffer(vmbuffer);
|
ReleaseBuffer(vmbuffer);
|
||||||
FreeFakeRelcacheEntry(reln);
|
FreeFakeRelcacheEntry(reln);
|
||||||
|
@@ -15,39 +15,42 @@
|
|||||||
* visibilitymap_pin - pin a map page for setting a bit
|
* visibilitymap_pin - pin a map page for setting a bit
|
||||||
* visibilitymap_pin_ok - check whether correct map page is already pinned
|
* visibilitymap_pin_ok - check whether correct map page is already pinned
|
||||||
* visibilitymap_set - set a bit in a previously pinned page
|
* visibilitymap_set - set a bit in a previously pinned page
|
||||||
* visibilitymap_test - test if a bit is set
|
* visibilitymap_get_status - get status of bits
|
||||||
* visibilitymap_count - count number of bits set in visibility map
|
* visibilitymap_count - count number of bits set in visibility map
|
||||||
* visibilitymap_truncate - truncate the visibility map
|
* visibilitymap_truncate - truncate the visibility map
|
||||||
*
|
*
|
||||||
* NOTES
|
* NOTES
|
||||||
*
|
*
|
||||||
* The visibility map is a bitmap with one bit per heap page. A set bit means
|
* The visibility map is a bitmap with two bits (all-visible and all-frozen)
|
||||||
* that all tuples on the page are known visible to all transactions, and
|
* per heap page. A set all-visible bit means that all tuples on the page are
|
||||||
* therefore the page doesn't need to be vacuumed. The map is conservative in
|
* known visible to all transactions, and therefore the page doesn't need to
|
||||||
* the sense that we make sure that whenever a bit is set, we know the
|
* be vacuumed. A set all-frozen bit means that all tuples on the page are
|
||||||
* condition is true, but if a bit is not set, it might or might not be true.
|
* completely frozen, and therefore the page doesn't need to be vacuumed even
|
||||||
|
* if whole table scanning vacuum is required (e.g. anti-wraparound vacuum).
|
||||||
|
* The all-frozen bit must be set only when the page is already all-visible.
|
||||||
*
|
*
|
||||||
* Clearing a visibility map bit is not separately WAL-logged. The callers
|
* The map is conservative in the sense that we make sure that whenever a bit
|
||||||
|
* is set, we know the condition is true, but if a bit is not set, it might or
|
||||||
|
* might not be true.
|
||||||
|
*
|
||||||
|
* Clearing both visibility map bits is not separately WAL-logged. The callers
|
||||||
* must make sure that whenever a bit is cleared, the bit is cleared on WAL
|
* must make sure that whenever a bit is cleared, the bit is cleared on WAL
|
||||||
* replay of the updating operation as well.
|
* replay of the updating operation as well.
|
||||||
*
|
*
|
||||||
* When we *set* a visibility map during VACUUM, we must write WAL. This may
|
* When we *set* a visibility map during VACUUM, we must write WAL. This may
|
||||||
* seem counterintuitive, since the bit is basically a hint: if it is clear,
|
* seem counterintuitive, since the bit is basically a hint: if it is clear,
|
||||||
* it may still be the case that every tuple on the page is visible to all
|
* it may still be the case that every tuple on the page is all-visible or
|
||||||
* transactions; we just don't know that for certain. The difficulty is that
|
* all-frozen we just don't know that for certain. The difficulty is that
|
||||||
* there are two bits which are typically set together: the PD_ALL_VISIBLE bit
|
* there are two bits which are typically set together: the PD_ALL_VISIBLE
|
||||||
* on the page itself, and the visibility map bit. If a crash occurs after the
|
* or PD_ALL_FROZEN bit on the page itself, and the corresponding visibility
|
||||||
* visibility map page makes it to disk and before the updated heap page makes
|
* map bit. If a crash occurs after the visibility map page makes it to disk
|
||||||
* it to disk, redo must set the bit on the heap page. Otherwise, the next
|
* and before the updated heap page makes it to disk, redo must set the bit on
|
||||||
* insert, update, or delete on the heap page will fail to realize that the
|
* the heap page. Otherwise, the next insert, update, or delete on the heap
|
||||||
* visibility map bit must be cleared, possibly causing index-only scans to
|
* page will fail to realize that the visibility map bit must be cleared,
|
||||||
* return wrong answers.
|
* possibly causing index-only scans to return wrong answers.
|
||||||
*
|
*
|
||||||
* VACUUM will normally skip pages for which the visibility map bit is set;
|
* VACUUM will normally skip pages for which the visibility map bit is set;
|
||||||
* such pages can't contain any dead tuples and therefore don't need vacuuming.
|
* such pages can't contain any dead tuples and therefore don't need vacuuming.
|
||||||
* The visibility map is not used for anti-wraparound vacuums, because
|
|
||||||
* an anti-wraparound vacuum needs to freeze tuples and observe the latest xid
|
|
||||||
* present in the table, even on pages that don't have any dead tuples.
|
|
||||||
*
|
*
|
||||||
* LOCKING
|
* LOCKING
|
||||||
*
|
*
|
||||||
@@ -101,38 +104,50 @@
|
|||||||
*/
|
*/
|
||||||
#define MAPSIZE (BLCKSZ - MAXALIGN(SizeOfPageHeaderData))
|
#define MAPSIZE (BLCKSZ - MAXALIGN(SizeOfPageHeaderData))
|
||||||
|
|
||||||
/* Number of bits allocated for each heap block. */
|
|
||||||
#define BITS_PER_HEAPBLOCK 1
|
|
||||||
|
|
||||||
/* Number of heap blocks we can represent in one byte. */
|
|
||||||
#define HEAPBLOCKS_PER_BYTE 8
|
|
||||||
|
|
||||||
/* Number of heap blocks we can represent in one visibility map page. */
|
/* Number of heap blocks we can represent in one visibility map page. */
|
||||||
#define HEAPBLOCKS_PER_PAGE (MAPSIZE * HEAPBLOCKS_PER_BYTE)
|
#define HEAPBLOCKS_PER_PAGE (MAPSIZE * HEAPBLOCKS_PER_BYTE)
|
||||||
|
|
||||||
/* Mapping from heap block number to the right bit in the visibility map */
|
/* Mapping from heap block number to the right bit in the visibility map */
|
||||||
#define HEAPBLK_TO_MAPBLOCK(x) ((x) / HEAPBLOCKS_PER_PAGE)
|
#define HEAPBLK_TO_MAPBLOCK(x) ((x) / HEAPBLOCKS_PER_PAGE)
|
||||||
#define HEAPBLK_TO_MAPBYTE(x) (((x) % HEAPBLOCKS_PER_PAGE) / HEAPBLOCKS_PER_BYTE)
|
#define HEAPBLK_TO_MAPBYTE(x) (((x) % HEAPBLOCKS_PER_PAGE) / HEAPBLOCKS_PER_BYTE)
|
||||||
#define HEAPBLK_TO_MAPBIT(x) ((x) % HEAPBLOCKS_PER_BYTE)
|
#define HEAPBLK_TO_MAPBIT(x) (((x) % HEAPBLOCKS_PER_BYTE) * BITS_PER_HEAPBLOCK)
|
||||||
|
|
||||||
/* table for fast counting of set bits */
|
/* tables for fast counting of set bits for visible and frozen */
|
||||||
static const uint8 number_of_ones[256] = {
|
static const uint8 number_of_ones_for_visible[256] = {
|
||||||
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
|
0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2,
|
||||||
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
|
||||||
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2,
|
||||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
|
||||||
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
|
||||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4,
|
||||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
|
||||||
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4,
|
||||||
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2,
|
||||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
|
||||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2,
|
||||||
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
|
||||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
|
||||||
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4,
|
||||||
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
|
||||||
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
|
2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4
|
||||||
|
};
|
||||||
|
static const uint8 number_of_ones_for_frozen[256] = {
|
||||||
|
0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2,
|
||||||
|
0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2,
|
||||||
|
1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
|
||||||
|
1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
|
||||||
|
0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2,
|
||||||
|
0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2,
|
||||||
|
1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
|
||||||
|
1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
|
||||||
|
1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
|
||||||
|
1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
|
||||||
|
2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4,
|
||||||
|
2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4,
|
||||||
|
1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
|
||||||
|
1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
|
||||||
|
2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4,
|
||||||
|
2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4
|
||||||
};
|
};
|
||||||
|
|
||||||
/* prototypes for internal routines */
|
/* prototypes for internal routines */
|
||||||
@@ -141,7 +156,7 @@ static void vm_extend(Relation rel, BlockNumber nvmblocks);
|
|||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* visibilitymap_clear - clear a bit in visibility map
|
* visibilitymap_clear - clear all bits in visibility map
|
||||||
*
|
*
|
||||||
* You must pass a buffer containing the correct map page to this function.
|
* You must pass a buffer containing the correct map page to this function.
|
||||||
* Call visibilitymap_pin first to pin the right one. This function doesn't do
|
* Call visibilitymap_pin first to pin the right one. This function doesn't do
|
||||||
@@ -153,7 +168,7 @@ visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer buf)
|
|||||||
BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
|
BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
|
||||||
int mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);
|
int mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);
|
||||||
int mapBit = HEAPBLK_TO_MAPBIT(heapBlk);
|
int mapBit = HEAPBLK_TO_MAPBIT(heapBlk);
|
||||||
uint8 mask = 1 << mapBit;
|
uint8 mask = VISIBILITYMAP_VALID_BITS << mapBit;
|
||||||
char *map;
|
char *map;
|
||||||
|
|
||||||
#ifdef TRACE_VISIBILITYMAP
|
#ifdef TRACE_VISIBILITYMAP
|
||||||
@@ -186,7 +201,7 @@ visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer buf)
|
|||||||
* visibilitymap_set to actually set the bit.
|
* visibilitymap_set to actually set the bit.
|
||||||
*
|
*
|
||||||
* On entry, *buf should be InvalidBuffer or a valid buffer returned by
|
* On entry, *buf should be InvalidBuffer or a valid buffer returned by
|
||||||
* an earlier call to visibilitymap_pin or visibilitymap_test on the same
|
* an earlier call to visibilitymap_pin or visibilitymap_get_status on the same
|
||||||
* relation. On return, *buf is a valid buffer with the map page containing
|
* relation. On return, *buf is a valid buffer with the map page containing
|
||||||
* the bit for heapBlk.
|
* the bit for heapBlk.
|
||||||
*
|
*
|
||||||
@@ -212,7 +227,7 @@ visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *buf)
|
|||||||
* visibilitymap_pin_ok - do we already have the correct page pinned?
|
* visibilitymap_pin_ok - do we already have the correct page pinned?
|
||||||
*
|
*
|
||||||
* On entry, buf should be InvalidBuffer or a valid buffer returned by
|
* On entry, buf should be InvalidBuffer or a valid buffer returned by
|
||||||
* an earlier call to visibilitymap_pin or visibilitymap_test on the same
|
* an earlier call to visibilitymap_pin or visibilitymap_get_status on the same
|
||||||
* relation. The return value indicates whether the buffer covers the
|
* relation. The return value indicates whether the buffer covers the
|
||||||
* given heapBlk.
|
* given heapBlk.
|
||||||
*/
|
*/
|
||||||
@@ -225,19 +240,22 @@ visibilitymap_pin_ok(BlockNumber heapBlk, Buffer buf)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* visibilitymap_set - set a bit on a previously pinned page
|
* visibilitymap_set - set bit(s) on a previously pinned page
|
||||||
*
|
*
|
||||||
* recptr is the LSN of the XLOG record we're replaying, if we're in recovery,
|
* recptr is the LSN of the XLOG record we're replaying, if we're in recovery,
|
||||||
* or InvalidXLogRecPtr in normal running. The page LSN is advanced to the
|
* or InvalidXLogRecPtr in normal running. The page LSN is advanced to the
|
||||||
* one provided; in normal running, we generate a new XLOG record and set the
|
* one provided; in normal running, we generate a new XLOG record and set the
|
||||||
* page LSN to that value. cutoff_xid is the largest xmin on the page being
|
* page LSN to that value. cutoff_xid is the largest xmin on the page being
|
||||||
* marked all-visible; it is needed for Hot Standby, and can be
|
* marked all-visible; it is needed for Hot Standby, and can be
|
||||||
* InvalidTransactionId if the page contains no tuples.
|
* InvalidTransactionId if the page contains no tuples. It can also be set
|
||||||
|
* to InvalidTransactionId when a page that is already all-visible is being
|
||||||
|
* marked all-frozen.
|
||||||
*
|
*
|
||||||
* Caller is expected to set the heap page's PD_ALL_VISIBLE bit before calling
|
* Caller is expected to set the heap page's PD_ALL_VISIBLE or PD_ALL_FROZEN
|
||||||
* this function. Except in recovery, caller should also pass the heap
|
* bit before calling this function. Except in recovery, caller should also
|
||||||
* buffer. When checksums are enabled and we're not in recovery, we must add
|
* pass the heap buffer and flags which indicates what flag we want to set.
|
||||||
* the heap buffer to the WAL chain to protect it from being torn.
|
* When checksums are enabled and we're not in recovery, we must add the heap
|
||||||
|
* buffer to the WAL chain to protect it from being torn.
|
||||||
*
|
*
|
||||||
* You must pass a buffer containing the correct map page to this function.
|
* You must pass a buffer containing the correct map page to this function.
|
||||||
* Call visibilitymap_pin first to pin the right one. This function doesn't do
|
* Call visibilitymap_pin first to pin the right one. This function doesn't do
|
||||||
@@ -245,13 +263,14 @@ visibilitymap_pin_ok(BlockNumber heapBlk, Buffer buf)
|
|||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
|
visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
|
||||||
XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid)
|
XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid,
|
||||||
|
uint8 flags)
|
||||||
{
|
{
|
||||||
BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
|
BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
|
||||||
uint32 mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);
|
uint32 mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);
|
||||||
uint8 mapBit = HEAPBLK_TO_MAPBIT(heapBlk);
|
uint8 mapBit = HEAPBLK_TO_MAPBIT(heapBlk);
|
||||||
Page page;
|
Page page;
|
||||||
char *map;
|
uint8 *map;
|
||||||
|
|
||||||
#ifdef TRACE_VISIBILITYMAP
|
#ifdef TRACE_VISIBILITYMAP
|
||||||
elog(DEBUG1, "vm_set %s %d", RelationGetRelationName(rel), heapBlk);
|
elog(DEBUG1, "vm_set %s %d", RelationGetRelationName(rel), heapBlk);
|
||||||
@@ -259,6 +278,7 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
|
|||||||
|
|
||||||
Assert(InRecovery || XLogRecPtrIsInvalid(recptr));
|
Assert(InRecovery || XLogRecPtrIsInvalid(recptr));
|
||||||
Assert(InRecovery || BufferIsValid(heapBuf));
|
Assert(InRecovery || BufferIsValid(heapBuf));
|
||||||
|
Assert(flags & VISIBILITYMAP_VALID_BITS);
|
||||||
|
|
||||||
/* Check that we have the right heap page pinned, if present */
|
/* Check that we have the right heap page pinned, if present */
|
||||||
if (BufferIsValid(heapBuf) && BufferGetBlockNumber(heapBuf) != heapBlk)
|
if (BufferIsValid(heapBuf) && BufferGetBlockNumber(heapBuf) != heapBlk)
|
||||||
@@ -269,14 +289,14 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
|
|||||||
elog(ERROR, "wrong VM buffer passed to visibilitymap_set");
|
elog(ERROR, "wrong VM buffer passed to visibilitymap_set");
|
||||||
|
|
||||||
page = BufferGetPage(vmBuf);
|
page = BufferGetPage(vmBuf);
|
||||||
map = PageGetContents(page);
|
map = (uint8 *)PageGetContents(page);
|
||||||
LockBuffer(vmBuf, BUFFER_LOCK_EXCLUSIVE);
|
LockBuffer(vmBuf, BUFFER_LOCK_EXCLUSIVE);
|
||||||
|
|
||||||
if (!(map[mapByte] & (1 << mapBit)))
|
if (flags != (map[mapByte] >> mapBit & VISIBILITYMAP_VALID_BITS))
|
||||||
{
|
{
|
||||||
START_CRIT_SECTION();
|
START_CRIT_SECTION();
|
||||||
|
|
||||||
map[mapByte] |= (1 << mapBit);
|
map[mapByte] |= (flags << mapBit);
|
||||||
MarkBufferDirty(vmBuf);
|
MarkBufferDirty(vmBuf);
|
||||||
|
|
||||||
if (RelationNeedsWAL(rel))
|
if (RelationNeedsWAL(rel))
|
||||||
@@ -285,7 +305,7 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
|
|||||||
{
|
{
|
||||||
Assert(!InRecovery);
|
Assert(!InRecovery);
|
||||||
recptr = log_heap_visible(rel->rd_node, heapBuf, vmBuf,
|
recptr = log_heap_visible(rel->rd_node, heapBuf, vmBuf,
|
||||||
cutoff_xid);
|
cutoff_xid, flags);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If data checksums are enabled (or wal_log_hints=on), we
|
* If data checksums are enabled (or wal_log_hints=on), we
|
||||||
@@ -295,8 +315,10 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
|
|||||||
{
|
{
|
||||||
Page heapPage = BufferGetPage(heapBuf);
|
Page heapPage = BufferGetPage(heapBuf);
|
||||||
|
|
||||||
/* caller is expected to set PD_ALL_VISIBLE first */
|
/* Caller is expected to set page-level bits first. */
|
||||||
Assert(PageIsAllVisible(heapPage));
|
Assert((flags & VISIBILITYMAP_ALL_VISIBLE) == 0 || PageIsAllVisible(heapPage));
|
||||||
|
Assert((flags & VISIBILITYMAP_ALL_FROZEN) == 0 || PageIsAllFrozen(heapPage));
|
||||||
|
|
||||||
PageSetLSN(heapPage, recptr);
|
PageSetLSN(heapPage, recptr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -310,15 +332,17 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* visibilitymap_test - test if a bit is set
|
* visibilitymap_get_status - get status of bits
|
||||||
*
|
*
|
||||||
* Are all tuples on heapBlk visible to all, according to the visibility map?
|
* Are all tuples on heapBlk visible to all or are marked frozen, according
|
||||||
|
* to the visibility map?
|
||||||
*
|
*
|
||||||
* On entry, *buf should be InvalidBuffer or a valid buffer returned by an
|
* On entry, *buf should be InvalidBuffer or a valid buffer returned by an
|
||||||
* earlier call to visibilitymap_pin or visibilitymap_test on the same
|
* earlier call to visibilitymap_pin or visibilitymap_get_status on the same
|
||||||
* relation. On return, *buf is a valid buffer with the map page containing
|
* relation. On return, *buf is a valid buffer with the map page containing
|
||||||
* the bit for heapBlk, or InvalidBuffer. The caller is responsible for
|
* the bit for heapBlk, or InvalidBuffer. The caller is responsible for
|
||||||
* releasing *buf after it's done testing and setting bits.
|
* releasing *buf after it's done testing and setting bits, and must pass flags
|
||||||
|
* for which it needs to check the value in visibility map.
|
||||||
*
|
*
|
||||||
* NOTE: This function is typically called without a lock on the heap page,
|
* NOTE: This function is typically called without a lock on the heap page,
|
||||||
* so somebody else could change the bit just after we look at it. In fact,
|
* so somebody else could change the bit just after we look at it. In fact,
|
||||||
@@ -327,17 +351,16 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
|
|||||||
* we might see the old value. It is the caller's responsibility to deal with
|
* we might see the old value. It is the caller's responsibility to deal with
|
||||||
* all concurrency issues!
|
* all concurrency issues!
|
||||||
*/
|
*/
|
||||||
bool
|
uint8
|
||||||
visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *buf)
|
visibilitymap_get_status(Relation rel, BlockNumber heapBlk, Buffer *buf)
|
||||||
{
|
{
|
||||||
BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
|
BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
|
||||||
uint32 mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);
|
uint32 mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);
|
||||||
uint8 mapBit = HEAPBLK_TO_MAPBIT(heapBlk);
|
uint8 mapBit = HEAPBLK_TO_MAPBIT(heapBlk);
|
||||||
bool result;
|
|
||||||
char *map;
|
char *map;
|
||||||
|
|
||||||
#ifdef TRACE_VISIBILITYMAP
|
#ifdef TRACE_VISIBILITYMAP
|
||||||
elog(DEBUG1, "vm_test %s %d", RelationGetRelationName(rel), heapBlk);
|
elog(DEBUG1, "vm_get_status %s %d", RelationGetRelationName(rel), heapBlk);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Reuse the old pinned buffer if possible */
|
/* Reuse the old pinned buffer if possible */
|
||||||
@@ -360,13 +383,11 @@ visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *buf)
|
|||||||
map = PageGetContents(BufferGetPage(*buf));
|
map = PageGetContents(BufferGetPage(*buf));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* A single-bit read is atomic. There could be memory-ordering effects
|
* A single byte read is atomic. There could be memory-ordering effects
|
||||||
* here, but for performance reasons we make it the caller's job to worry
|
* here, but for performance reasons we make it the caller's job to worry
|
||||||
* about that.
|
* about that.
|
||||||
*/
|
*/
|
||||||
result = (map[mapByte] & (1 << mapBit)) ? true : false;
|
return ((map[mapByte] >> mapBit) & VISIBILITYMAP_VALID_BITS);
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -374,14 +395,20 @@ visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *buf)
|
|||||||
*
|
*
|
||||||
* Note: we ignore the possibility of race conditions when the table is being
|
* Note: we ignore the possibility of race conditions when the table is being
|
||||||
* extended concurrently with the call. New pages added to the table aren't
|
* extended concurrently with the call. New pages added to the table aren't
|
||||||
* going to be marked all-visible, so they won't affect the result.
|
* going to be marked all-visible or all-frozen, so they won't affect the result.
|
||||||
*/
|
*/
|
||||||
BlockNumber
|
void
|
||||||
visibilitymap_count(Relation rel)
|
visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen)
|
||||||
{
|
{
|
||||||
BlockNumber result = 0;
|
|
||||||
BlockNumber mapBlock;
|
BlockNumber mapBlock;
|
||||||
|
|
||||||
|
/* all_visible must be specified */
|
||||||
|
Assert(all_visible);
|
||||||
|
|
||||||
|
*all_visible = 0;
|
||||||
|
if (all_frozen)
|
||||||
|
*all_frozen = 0;
|
||||||
|
|
||||||
for (mapBlock = 0;; mapBlock++)
|
for (mapBlock = 0;; mapBlock++)
|
||||||
{
|
{
|
||||||
Buffer mapBuffer;
|
Buffer mapBuffer;
|
||||||
@@ -406,13 +433,13 @@ visibilitymap_count(Relation rel)
|
|||||||
|
|
||||||
for (i = 0; i < MAPSIZE; i++)
|
for (i = 0; i < MAPSIZE; i++)
|
||||||
{
|
{
|
||||||
result += number_of_ones[map[i]];
|
*all_visible += number_of_ones_for_visible[map[i]];
|
||||||
|
if (all_frozen)
|
||||||
|
*all_frozen += number_of_ones_for_frozen[map[i]];
|
||||||
}
|
}
|
||||||
|
|
||||||
ReleaseBuffer(mapBuffer);
|
ReleaseBuffer(mapBuffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@@ -1920,7 +1920,7 @@ index_update_stats(Relation rel,
|
|||||||
BlockNumber relallvisible;
|
BlockNumber relallvisible;
|
||||||
|
|
||||||
if (rd_rel->relkind != RELKIND_INDEX)
|
if (rd_rel->relkind != RELKIND_INDEX)
|
||||||
relallvisible = visibilitymap_count(rel);
|
visibilitymap_count(rel, &relallvisible, NULL);
|
||||||
else /* don't bother for indexes */
|
else /* don't bother for indexes */
|
||||||
relallvisible = 0;
|
relallvisible = 0;
|
||||||
|
|
||||||
|
@@ -569,14 +569,20 @@ do_analyze_rel(Relation onerel, int options, VacuumParams *params,
|
|||||||
* inherited stats.
|
* inherited stats.
|
||||||
*/
|
*/
|
||||||
if (!inh)
|
if (!inh)
|
||||||
|
{
|
||||||
|
BlockNumber relallvisible;
|
||||||
|
|
||||||
|
visibilitymap_count(onerel, &relallvisible, NULL);
|
||||||
|
|
||||||
vac_update_relstats(onerel,
|
vac_update_relstats(onerel,
|
||||||
relpages,
|
relpages,
|
||||||
totalrows,
|
totalrows,
|
||||||
visibilitymap_count(onerel),
|
relallvisible,
|
||||||
hasindex,
|
hasindex,
|
||||||
InvalidTransactionId,
|
InvalidTransactionId,
|
||||||
InvalidMultiXactId,
|
InvalidMultiXactId,
|
||||||
in_outer_xact);
|
in_outer_xact);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Same for indexes. Vacuum always scans all indexes, so if we're part of
|
* Same for indexes. Vacuum always scans all indexes, so if we're part of
|
||||||
|
@@ -157,7 +157,7 @@ static void lazy_record_dead_tuple(LVRelStats *vacrelstats,
|
|||||||
static bool lazy_tid_reaped(ItemPointer itemptr, void *state);
|
static bool lazy_tid_reaped(ItemPointer itemptr, void *state);
|
||||||
static int vac_cmp_itemptr(const void *left, const void *right);
|
static int vac_cmp_itemptr(const void *left, const void *right);
|
||||||
static bool heap_page_is_all_visible(Relation rel, Buffer buf,
|
static bool heap_page_is_all_visible(Relation rel, Buffer buf,
|
||||||
TransactionId *visibility_cutoff_xid);
|
TransactionId *visibility_cutoff_xid, bool *all_frozen);
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -295,7 +295,7 @@ lazy_vacuum_rel(Relation onerel, int options, VacuumParams *params,
|
|||||||
new_rel_tuples = vacrelstats->old_rel_tuples;
|
new_rel_tuples = vacrelstats->old_rel_tuples;
|
||||||
}
|
}
|
||||||
|
|
||||||
new_rel_allvisible = visibilitymap_count(onerel);
|
visibilitymap_count(onerel, &new_rel_allvisible, NULL);
|
||||||
if (new_rel_allvisible > new_rel_pages)
|
if (new_rel_allvisible > new_rel_pages)
|
||||||
new_rel_allvisible = new_rel_pages;
|
new_rel_allvisible = new_rel_pages;
|
||||||
|
|
||||||
@@ -496,7 +496,7 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
|
|||||||
* maintain next_not_all_visible_block anyway, so as to set up the
|
* maintain next_not_all_visible_block anyway, so as to set up the
|
||||||
* all_visible_according_to_vm flag correctly for each page.
|
* all_visible_according_to_vm flag correctly for each page.
|
||||||
*
|
*
|
||||||
* Note: The value returned by visibilitymap_test could be slightly
|
* Note: The value returned by visibilitymap_get_status could be slightly
|
||||||
* out-of-date, since we make this test before reading the corresponding
|
* out-of-date, since we make this test before reading the corresponding
|
||||||
* heap page or locking the buffer. This is OK. If we mistakenly think
|
* heap page or locking the buffer. This is OK. If we mistakenly think
|
||||||
* that the page is all-visible when in fact the flag's just been cleared,
|
* that the page is all-visible when in fact the flag's just been cleared,
|
||||||
@@ -518,7 +518,7 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
|
|||||||
next_not_all_visible_block < nblocks;
|
next_not_all_visible_block < nblocks;
|
||||||
next_not_all_visible_block++)
|
next_not_all_visible_block++)
|
||||||
{
|
{
|
||||||
if (!visibilitymap_test(onerel, next_not_all_visible_block, &vmbuffer))
|
if (!VM_ALL_VISIBLE(onerel, next_not_all_visible_block, &vmbuffer))
|
||||||
break;
|
break;
|
||||||
vacuum_delay_point();
|
vacuum_delay_point();
|
||||||
}
|
}
|
||||||
@@ -540,6 +540,7 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
|
|||||||
Size freespace;
|
Size freespace;
|
||||||
bool all_visible_according_to_vm;
|
bool all_visible_according_to_vm;
|
||||||
bool all_visible;
|
bool all_visible;
|
||||||
|
bool all_frozen = true; /* provided all_visible is also true */
|
||||||
bool has_dead_tuples;
|
bool has_dead_tuples;
|
||||||
TransactionId visibility_cutoff_xid = InvalidTransactionId;
|
TransactionId visibility_cutoff_xid = InvalidTransactionId;
|
||||||
|
|
||||||
@@ -554,8 +555,7 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
|
|||||||
next_not_all_visible_block < nblocks;
|
next_not_all_visible_block < nblocks;
|
||||||
next_not_all_visible_block++)
|
next_not_all_visible_block++)
|
||||||
{
|
{
|
||||||
if (!visibilitymap_test(onerel, next_not_all_visible_block,
|
if (!VM_ALL_VISIBLE(onerel, next_not_all_visible_block, &vmbuffer))
|
||||||
&vmbuffer))
|
|
||||||
break;
|
break;
|
||||||
vacuum_delay_point();
|
vacuum_delay_point();
|
||||||
}
|
}
|
||||||
@@ -743,7 +743,7 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
|
|||||||
empty_pages++;
|
empty_pages++;
|
||||||
freespace = PageGetHeapFreeSpace(page);
|
freespace = PageGetHeapFreeSpace(page);
|
||||||
|
|
||||||
/* empty pages are always all-visible */
|
/* empty pages are always all-visible and all-frozen */
|
||||||
if (!PageIsAllVisible(page))
|
if (!PageIsAllVisible(page))
|
||||||
{
|
{
|
||||||
START_CRIT_SECTION();
|
START_CRIT_SECTION();
|
||||||
@@ -766,8 +766,10 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
|
|||||||
log_newpage_buffer(buf, true);
|
log_newpage_buffer(buf, true);
|
||||||
|
|
||||||
PageSetAllVisible(page);
|
PageSetAllVisible(page);
|
||||||
|
PageSetAllFrozen(page);
|
||||||
visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr,
|
visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr,
|
||||||
vmbuffer, InvalidTransactionId);
|
vmbuffer, InvalidTransactionId,
|
||||||
|
VISIBILITYMAP_ALL_VISIBLE | VISIBILITYMAP_ALL_FROZEN);
|
||||||
END_CRIT_SECTION();
|
END_CRIT_SECTION();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -954,6 +956,8 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
|
|||||||
if (heap_prepare_freeze_tuple(tuple.t_data, FreezeLimit,
|
if (heap_prepare_freeze_tuple(tuple.t_data, FreezeLimit,
|
||||||
MultiXactCutoff, &frozen[nfrozen]))
|
MultiXactCutoff, &frozen[nfrozen]))
|
||||||
frozen[nfrozen++].offset = offnum;
|
frozen[nfrozen++].offset = offnum;
|
||||||
|
else if (heap_tuple_needs_eventual_freeze(tuple.t_data))
|
||||||
|
all_frozen = false;
|
||||||
}
|
}
|
||||||
} /* scan along page */
|
} /* scan along page */
|
||||||
|
|
||||||
@@ -1018,6 +1022,8 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
|
|||||||
/* mark page all-visible, if appropriate */
|
/* mark page all-visible, if appropriate */
|
||||||
if (all_visible && !all_visible_according_to_vm)
|
if (all_visible && !all_visible_according_to_vm)
|
||||||
{
|
{
|
||||||
|
uint8 flags = VISIBILITYMAP_ALL_VISIBLE;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* It should never be the case that the visibility map page is set
|
* It should never be the case that the visibility map page is set
|
||||||
* while the page-level bit is clear, but the reverse is allowed
|
* while the page-level bit is clear, but the reverse is allowed
|
||||||
@@ -1032,9 +1038,14 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
|
|||||||
* rare cases after a crash, it is not worth optimizing.
|
* rare cases after a crash, it is not worth optimizing.
|
||||||
*/
|
*/
|
||||||
PageSetAllVisible(page);
|
PageSetAllVisible(page);
|
||||||
|
if (all_frozen)
|
||||||
|
{
|
||||||
|
PageSetAllFrozen(page);
|
||||||
|
flags |= VISIBILITYMAP_ALL_FROZEN;
|
||||||
|
}
|
||||||
MarkBufferDirty(buf);
|
MarkBufferDirty(buf);
|
||||||
visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr,
|
visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr,
|
||||||
vmbuffer, visibility_cutoff_xid);
|
vmbuffer, visibility_cutoff_xid, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -1045,7 +1056,7 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
|
|||||||
* that something bad has happened.
|
* that something bad has happened.
|
||||||
*/
|
*/
|
||||||
else if (all_visible_according_to_vm && !PageIsAllVisible(page)
|
else if (all_visible_according_to_vm && !PageIsAllVisible(page)
|
||||||
&& visibilitymap_test(onerel, blkno, &vmbuffer))
|
&& VM_ALL_VISIBLE(onerel, blkno, &vmbuffer))
|
||||||
{
|
{
|
||||||
elog(WARNING, "page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u",
|
elog(WARNING, "page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u",
|
||||||
relname, blkno);
|
relname, blkno);
|
||||||
@@ -1074,6 +1085,28 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
|
|||||||
visibilitymap_clear(onerel, blkno, vmbuffer);
|
visibilitymap_clear(onerel, blkno, vmbuffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If the page is marked as all-visible but not all-frozen, we should
|
||||||
|
* so mark it. Note that all_frozen is only valid if all_visible is
|
||||||
|
* true, so we must check both.
|
||||||
|
*/
|
||||||
|
else if (all_visible_according_to_vm && all_visible && all_frozen &&
|
||||||
|
!VM_ALL_FROZEN(onerel, blkno, &vmbuffer))
|
||||||
|
{
|
||||||
|
/* Page is marked all-visible but should be all-frozen */
|
||||||
|
PageSetAllFrozen(page);
|
||||||
|
MarkBufferDirty(buf);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We can pass InvalidTransactionId as the cutoff XID here,
|
||||||
|
* because setting the all-frozen bit doesn't cause recovery
|
||||||
|
* conflicts.
|
||||||
|
*/
|
||||||
|
visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr,
|
||||||
|
vmbuffer, InvalidTransactionId,
|
||||||
|
VISIBILITYMAP_ALL_FROZEN);
|
||||||
|
}
|
||||||
|
|
||||||
UnlockReleaseBuffer(buf);
|
UnlockReleaseBuffer(buf);
|
||||||
|
|
||||||
/* Remember the location of the last page with nonremovable tuples */
|
/* Remember the location of the last page with nonremovable tuples */
|
||||||
@@ -1257,6 +1290,7 @@ lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
|
|||||||
OffsetNumber unused[MaxOffsetNumber];
|
OffsetNumber unused[MaxOffsetNumber];
|
||||||
int uncnt = 0;
|
int uncnt = 0;
|
||||||
TransactionId visibility_cutoff_xid;
|
TransactionId visibility_cutoff_xid;
|
||||||
|
bool all_frozen;
|
||||||
|
|
||||||
START_CRIT_SECTION();
|
START_CRIT_SECTION();
|
||||||
|
|
||||||
@@ -1308,19 +1342,34 @@ lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
|
|||||||
* dirty, exclusively locked, and, if needed, a full page image has been
|
* dirty, exclusively locked, and, if needed, a full page image has been
|
||||||
* emitted in the log_heap_clean() above.
|
* emitted in the log_heap_clean() above.
|
||||||
*/
|
*/
|
||||||
if (heap_page_is_all_visible(onerel, buffer, &visibility_cutoff_xid))
|
if (heap_page_is_all_visible(onerel, buffer, &visibility_cutoff_xid,
|
||||||
|
&all_frozen))
|
||||||
|
{
|
||||||
PageSetAllVisible(page);
|
PageSetAllVisible(page);
|
||||||
|
if (all_frozen)
|
||||||
|
PageSetAllFrozen(page);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* All the changes to the heap page have been done. If the all-visible
|
* All the changes to the heap page have been done. If the all-visible
|
||||||
* flag is now set, also set the VM bit.
|
* flag is now set, also set the VM all-visible bit (and, if possible,
|
||||||
|
* the all-frozen bit) unless this has already been done previously.
|
||||||
*/
|
*/
|
||||||
if (PageIsAllVisible(page) &&
|
if (PageIsAllVisible(page))
|
||||||
!visibilitymap_test(onerel, blkno, vmbuffer))
|
|
||||||
{
|
{
|
||||||
|
uint8 vm_status = visibilitymap_get_status(onerel, blkno, vmbuffer);
|
||||||
|
uint8 flags = 0;
|
||||||
|
|
||||||
|
/* Set the VM all-frozen bit to flag, if needed */
|
||||||
|
if ((vm_status & VISIBILITYMAP_ALL_VISIBLE) == 0)
|
||||||
|
flags |= VISIBILITYMAP_ALL_VISIBLE;
|
||||||
|
if ((vm_status & VISIBILITYMAP_ALL_FROZEN) == 0 && all_frozen)
|
||||||
|
flags |= VISIBILITYMAP_ALL_FROZEN;
|
||||||
|
|
||||||
Assert(BufferIsValid(*vmbuffer));
|
Assert(BufferIsValid(*vmbuffer));
|
||||||
visibilitymap_set(onerel, blkno, buffer, InvalidXLogRecPtr, *vmbuffer,
|
if (flags != 0)
|
||||||
visibility_cutoff_xid);
|
visibilitymap_set(onerel, blkno, buffer, InvalidXLogRecPtr,
|
||||||
|
*vmbuffer, visibility_cutoff_xid, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
return tupindex;
|
return tupindex;
|
||||||
@@ -1842,10 +1891,13 @@ vac_cmp_itemptr(const void *left, const void *right)
|
|||||||
/*
|
/*
|
||||||
* Check if every tuple in the given page is visible to all current and future
|
* Check if every tuple in the given page is visible to all current and future
|
||||||
* transactions. Also return the visibility_cutoff_xid which is the highest
|
* transactions. Also return the visibility_cutoff_xid which is the highest
|
||||||
* xmin amongst the visible tuples.
|
* xmin amongst the visible tuples. Set *all_frozen to true if every tuple
|
||||||
|
* on this page is frozen.
|
||||||
*/
|
*/
|
||||||
static bool
|
static bool
|
||||||
heap_page_is_all_visible(Relation rel, Buffer buf, TransactionId *visibility_cutoff_xid)
|
heap_page_is_all_visible(Relation rel, Buffer buf,
|
||||||
|
TransactionId *visibility_cutoff_xid,
|
||||||
|
bool *all_frozen)
|
||||||
{
|
{
|
||||||
Page page = BufferGetPage(buf);
|
Page page = BufferGetPage(buf);
|
||||||
BlockNumber blockno = BufferGetBlockNumber(buf);
|
BlockNumber blockno = BufferGetBlockNumber(buf);
|
||||||
@@ -1854,6 +1906,7 @@ heap_page_is_all_visible(Relation rel, Buffer buf, TransactionId *visibility_cut
|
|||||||
bool all_visible = true;
|
bool all_visible = true;
|
||||||
|
|
||||||
*visibility_cutoff_xid = InvalidTransactionId;
|
*visibility_cutoff_xid = InvalidTransactionId;
|
||||||
|
*all_frozen = true;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This is a stripped down version of the line pointer scan in
|
* This is a stripped down version of the line pointer scan in
|
||||||
@@ -1918,6 +1971,11 @@ heap_page_is_all_visible(Relation rel, Buffer buf, TransactionId *visibility_cut
|
|||||||
/* Track newest xmin on page. */
|
/* Track newest xmin on page. */
|
||||||
if (TransactionIdFollows(xmin, *visibility_cutoff_xid))
|
if (TransactionIdFollows(xmin, *visibility_cutoff_xid))
|
||||||
*visibility_cutoff_xid = xmin;
|
*visibility_cutoff_xid = xmin;
|
||||||
|
|
||||||
|
/* Check whether this tuple is already frozen or not */
|
||||||
|
if (all_visible && *all_frozen &&
|
||||||
|
heap_tuple_needs_eventual_freeze(tuple.t_data))
|
||||||
|
*all_frozen = false;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@@ -1934,5 +1992,14 @@ heap_page_is_all_visible(Relation rel, Buffer buf, TransactionId *visibility_cut
|
|||||||
}
|
}
|
||||||
} /* scan along page */
|
} /* scan along page */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We don't bother clearing *all_frozen when the page is discovered not
|
||||||
|
* to be all-visible, so do that now if necessary. The page might fail
|
||||||
|
* to be all-frozen for other reasons anyway, but if it's not all-visible,
|
||||||
|
* then it definitely isn't all-frozen.
|
||||||
|
*/
|
||||||
|
if (!all_visible)
|
||||||
|
*all_frozen = false;
|
||||||
|
|
||||||
return all_visible;
|
return all_visible;
|
||||||
}
|
}
|
||||||
|
@@ -85,9 +85,9 @@ IndexOnlyNext(IndexOnlyScanState *node)
|
|||||||
* which all tuples are known visible to everybody. In any case,
|
* which all tuples are known visible to everybody. In any case,
|
||||||
* we'll use the index tuple not the heap tuple as the data source.
|
* we'll use the index tuple not the heap tuple as the data source.
|
||||||
*
|
*
|
||||||
* Note on Memory Ordering Effects: visibilitymap_test does not lock
|
* Note on Memory Ordering Effects: visibilitymap_get_status does not
|
||||||
* the visibility map buffer, and therefore the result we read here
|
* lock the visibility map buffer, and therefore the result we read
|
||||||
* could be slightly stale. However, it can't be stale enough to
|
* here could be slightly stale. However, it can't be stale enough to
|
||||||
* matter.
|
* matter.
|
||||||
*
|
*
|
||||||
* We need to detect clearing a VM bit due to an insert right away,
|
* We need to detect clearing a VM bit due to an insert right away,
|
||||||
@@ -114,9 +114,9 @@ IndexOnlyNext(IndexOnlyScanState *node)
|
|||||||
* It's worth going through this complexity to avoid needing to lock
|
* It's worth going through this complexity to avoid needing to lock
|
||||||
* the VM buffer, which could cause significant contention.
|
* the VM buffer, which could cause significant contention.
|
||||||
*/
|
*/
|
||||||
if (!visibilitymap_test(scandesc->heapRelation,
|
if (!VM_ALL_VISIBLE(scandesc->heapRelation,
|
||||||
ItemPointerGetBlockNumber(tid),
|
ItemPointerGetBlockNumber(tid),
|
||||||
&node->ioss_VMBuffer))
|
&node->ioss_VMBuffer))
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* Rats, we have to visit the heap to check visibility.
|
* Rats, we have to visit the heap to check visibility.
|
||||||
|
@@ -270,8 +270,10 @@ ResolveRecoveryConflictWithSnapshot(TransactionId latestRemovedXid, RelFileNode
|
|||||||
* If we get passed InvalidTransactionId then we are a little surprised,
|
* If we get passed InvalidTransactionId then we are a little surprised,
|
||||||
* but it is theoretically possible in normal running. It also happens
|
* but it is theoretically possible in normal running. It also happens
|
||||||
* when replaying already applied WAL records after a standby crash or
|
* when replaying already applied WAL records after a standby crash or
|
||||||
* restart. If latestRemovedXid is invalid then there is no conflict. That
|
* restart, or when replaying an XLOG_HEAP2_VISIBLE record that marks as
|
||||||
* rule applies across all record types that suffer from this conflict.
|
* frozen a page which was already all-visible. If latestRemovedXid is
|
||||||
|
* invalid then there is no conflict. That rule applies across all record
|
||||||
|
* types that suffer from this conflict.
|
||||||
*/
|
*/
|
||||||
if (!TransactionIdIsValid(latestRemovedXid))
|
if (!TransactionIdIsValid(latestRemovedXid))
|
||||||
return;
|
return;
|
||||||
|
@@ -170,6 +170,7 @@ extern bool heap_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid,
|
|||||||
TransactionId cutoff_multi);
|
TransactionId cutoff_multi);
|
||||||
extern bool heap_tuple_needs_freeze(HeapTupleHeader tuple, TransactionId cutoff_xid,
|
extern bool heap_tuple_needs_freeze(HeapTupleHeader tuple, TransactionId cutoff_xid,
|
||||||
MultiXactId cutoff_multi, Buffer buf);
|
MultiXactId cutoff_multi, Buffer buf);
|
||||||
|
extern bool heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple);
|
||||||
|
|
||||||
extern Oid simple_heap_insert(Relation relation, HeapTuple tup);
|
extern Oid simple_heap_insert(Relation relation, HeapTuple tup);
|
||||||
extern void simple_heap_delete(Relation relation, ItemPointer tid);
|
extern void simple_heap_delete(Relation relation, ItemPointer tid);
|
||||||
|
@@ -320,9 +320,10 @@ typedef struct xl_heap_freeze_page
|
|||||||
typedef struct xl_heap_visible
|
typedef struct xl_heap_visible
|
||||||
{
|
{
|
||||||
TransactionId cutoff_xid;
|
TransactionId cutoff_xid;
|
||||||
|
uint8 flags;
|
||||||
} xl_heap_visible;
|
} xl_heap_visible;
|
||||||
|
|
||||||
#define SizeOfHeapVisible (offsetof(xl_heap_visible, cutoff_xid) + sizeof(TransactionId))
|
#define SizeOfHeapVisible (offsetof(xl_heap_visible, flags) + sizeof(uint8))
|
||||||
|
|
||||||
typedef struct xl_heap_new_cid
|
typedef struct xl_heap_new_cid
|
||||||
{
|
{
|
||||||
@@ -389,6 +390,6 @@ extern bool heap_prepare_freeze_tuple(HeapTupleHeader tuple,
|
|||||||
extern void heap_execute_freeze_tuple(HeapTupleHeader tuple,
|
extern void heap_execute_freeze_tuple(HeapTupleHeader tuple,
|
||||||
xl_heap_freeze_tuple *xlrec_tp);
|
xl_heap_freeze_tuple *xlrec_tp);
|
||||||
extern XLogRecPtr log_heap_visible(RelFileNode rnode, Buffer heap_buffer,
|
extern XLogRecPtr log_heap_visible(RelFileNode rnode, Buffer heap_buffer,
|
||||||
Buffer vm_buffer, TransactionId cutoff_xid);
|
Buffer vm_buffer, TransactionId cutoff_xid, uint8 flags);
|
||||||
|
|
||||||
#endif /* HEAPAM_XLOG_H */
|
#endif /* HEAPAM_XLOG_H */
|
||||||
|
@@ -19,15 +19,30 @@
|
|||||||
#include "storage/buf.h"
|
#include "storage/buf.h"
|
||||||
#include "utils/relcache.h"
|
#include "utils/relcache.h"
|
||||||
|
|
||||||
|
#define BITS_PER_HEAPBLOCK 2
|
||||||
|
#define HEAPBLOCKS_PER_BYTE (BITS_PER_BYTE / BITS_PER_HEAPBLOCK)
|
||||||
|
|
||||||
|
/* Flags for bit map */
|
||||||
|
#define VISIBILITYMAP_ALL_VISIBLE 0x01
|
||||||
|
#define VISIBILITYMAP_ALL_FROZEN 0x02
|
||||||
|
#define VISIBILITYMAP_VALID_BITS 0x03 /* OR of all valid visiblitymap flags bits */
|
||||||
|
|
||||||
|
/* Macros for visibilitymap test */
|
||||||
|
#define VM_ALL_VISIBLE(r, b, v) \
|
||||||
|
((visibilitymap_get_status((r), (b), (v)) & VISIBILITYMAP_ALL_VISIBLE) != 0)
|
||||||
|
#define VM_ALL_FROZEN(r, b, v) \
|
||||||
|
((visibilitymap_get_status((r), (b), (v)) & VISIBILITYMAP_ALL_FROZEN) != 0)
|
||||||
|
|
||||||
extern void visibilitymap_clear(Relation rel, BlockNumber heapBlk,
|
extern void visibilitymap_clear(Relation rel, BlockNumber heapBlk,
|
||||||
Buffer vmbuf);
|
Buffer vmbuf);
|
||||||
extern void visibilitymap_pin(Relation rel, BlockNumber heapBlk,
|
extern void visibilitymap_pin(Relation rel, BlockNumber heapBlk,
|
||||||
Buffer *vmbuf);
|
Buffer *vmbuf);
|
||||||
extern bool visibilitymap_pin_ok(BlockNumber heapBlk, Buffer vmbuf);
|
extern bool visibilitymap_pin_ok(BlockNumber heapBlk, Buffer vmbuf);
|
||||||
extern void visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
|
extern void visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
|
||||||
XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid);
|
XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid,
|
||||||
extern bool visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *vmbuf);
|
uint8 flags);
|
||||||
extern BlockNumber visibilitymap_count(Relation rel);
|
extern uint8 visibilitymap_get_status(Relation rel, BlockNumber heapBlk, Buffer *vmbuf);
|
||||||
|
extern void visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen);
|
||||||
extern void visibilitymap_truncate(Relation rel, BlockNumber nheapblocks);
|
extern void visibilitymap_truncate(Relation rel, BlockNumber nheapblocks);
|
||||||
|
|
||||||
#endif /* VISIBILITYMAP_H */
|
#endif /* VISIBILITYMAP_H */
|
||||||
|
@@ -53,6 +53,6 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
/* yyyymmddN */
|
/* yyyymmddN */
|
||||||
#define CATALOG_VERSION_NO 201602221
|
#define CATALOG_VERSION_NO 201603011
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@@ -178,8 +178,10 @@ typedef PageHeaderData *PageHeader;
|
|||||||
* tuple? */
|
* tuple? */
|
||||||
#define PD_ALL_VISIBLE 0x0004 /* all tuples on page are visible to
|
#define PD_ALL_VISIBLE 0x0004 /* all tuples on page are visible to
|
||||||
* everyone */
|
* everyone */
|
||||||
|
#define PD_ALL_FROZEN 0x0008 /* all tuples on page are completely
|
||||||
|
frozen */
|
||||||
|
|
||||||
#define PD_VALID_FLAG_BITS 0x0007 /* OR of all valid pd_flags bits */
|
#define PD_VALID_FLAG_BITS 0x000F /* OR of all valid pd_flags bits */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Page layout version number 0 is for pre-7.3 Postgres releases.
|
* Page layout version number 0 is for pre-7.3 Postgres releases.
|
||||||
@@ -367,7 +369,12 @@ typedef PageHeaderData *PageHeader;
|
|||||||
#define PageSetAllVisible(page) \
|
#define PageSetAllVisible(page) \
|
||||||
(((PageHeader) (page))->pd_flags |= PD_ALL_VISIBLE)
|
(((PageHeader) (page))->pd_flags |= PD_ALL_VISIBLE)
|
||||||
#define PageClearAllVisible(page) \
|
#define PageClearAllVisible(page) \
|
||||||
(((PageHeader) (page))->pd_flags &= ~PD_ALL_VISIBLE)
|
(((PageHeader) (page))->pd_flags &= ~(PD_ALL_VISIBLE | PD_ALL_FROZEN))
|
||||||
|
|
||||||
|
#define PageIsAllFrozen(page) \
|
||||||
|
(((PageHeader) (page))->pd_flags & PD_ALL_FROZEN)
|
||||||
|
#define PageSetAllFrozen(page) \
|
||||||
|
(((PageHeader) (page))->pd_flags |= PD_ALL_FROZEN)
|
||||||
|
|
||||||
#define PageIsPrunable(page, oldestxmin) \
|
#define PageIsPrunable(page, oldestxmin) \
|
||||||
( \
|
( \
|
||||||
|
Reference in New Issue
Block a user