1
0
mirror of https://github.com/postgres/postgres.git synced 2025-10-24 01:29:19 +03:00

HOT updates. When we update a tuple without changing any of its indexed

columns, and the new version can be stored on the same heap page, we no longer
generate extra index entries for the new version.  Instead, index searches
follow the HOT-chain links to ensure they find the correct tuple version.

In addition, this patch introduces the ability to "prune" dead tuples on a
per-page basis, without having to do a complete VACUUM pass to recover space.
VACUUM is still needed to clean up dead index entries, however.

Pavan Deolasee, with help from a bunch of other people.
This commit is contained in:
Tom Lane
2007-09-20 17:56:33 +00:00
parent bbf4fdc253
commit 282d2a03dd
65 changed files with 3517 additions and 514 deletions

View File

@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/access/htup.h,v 1.93 2007/04/06 04:21:43 tgl Exp $
* $PostgreSQL: pgsql/src/include/access/htup.h,v 1.94 2007/09/20 17:56:32 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -184,8 +184,12 @@ typedef HeapTupleHeaderData *HeapTupleHeader;
/*
* information stored in t_infomask2:
*/
#define HEAP_NATTS_MASK 0x7FF /* 11 bits for number of attributes */
/* bits 0xF800 are currently unused */
#define HEAP_NATTS_MASK 0x07FF /* 11 bits for number of attributes */
/* bits 0x3800 are available */
#define HEAP_HOT_UPDATED 0x4000 /* tuple was HOT-updated */
#define HEAP_ONLY_TUPLE 0x8000 /* this is heap-only tuple */
#define HEAP2_XACT_MASK 0xC000 /* visibility-related bits */
/*
* HeapTupleHeader accessor macros
@@ -201,7 +205,7 @@ typedef HeapTupleHeaderData *HeapTupleHeader;
#define HeapTupleHeaderSetXmin(tup, xid) \
( \
TransactionIdStore((xid), &(tup)->t_choice.t_heap.t_xmin) \
(tup)->t_choice.t_heap.t_xmin = (xid) \
)
#define HeapTupleHeaderGetXmax(tup) \
@@ -211,7 +215,7 @@ typedef HeapTupleHeaderData *HeapTupleHeader;
#define HeapTupleHeaderSetXmax(tup, xid) \
( \
TransactionIdStore((xid), &(tup)->t_choice.t_heap.t_xmax) \
(tup)->t_choice.t_heap.t_xmax = (xid) \
)
/*
@@ -255,7 +259,7 @@ do { \
#define HeapTupleHeaderSetXvac(tup, xid) \
do { \
Assert((tup)->t_infomask & HEAP_MOVED); \
TransactionIdStore((xid), &(tup)->t_choice.t_heap.t_field3.t_xvac); \
(tup)->t_choice.t_heap.t_field3.t_xvac = (xid); \
} while (0)
#define HeapTupleHeaderGetDatumLength(tup) \
@@ -298,6 +302,43 @@ do { \
*((Oid *) ((char *)(tup) + (tup)->t_hoff - sizeof(Oid))) = (oid); \
} while (0)
/*
* Note that we stop considering a tuple HOT-updated as soon as it is known
* aborted or the would-be updating transaction is known aborted. For best
* efficiency, check tuple visibility before using this macro, so that the
* INVALID bits will be as up to date as possible.
*/
#define HeapTupleHeaderIsHotUpdated(tup) \
( \
((tup)->t_infomask2 & HEAP_HOT_UPDATED) != 0 && \
((tup)->t_infomask & (HEAP_XMIN_INVALID | HEAP_XMAX_INVALID)) == 0 \
)
#define HeapTupleHeaderSetHotUpdated(tup) \
( \
(tup)->t_infomask2 |= HEAP_HOT_UPDATED \
)
#define HeapTupleHeaderClearHotUpdated(tup) \
( \
(tup)->t_infomask2 &= ~HEAP_HOT_UPDATED \
)
#define HeapTupleHeaderIsHeapOnly(tup) \
( \
(tup)->t_infomask2 & HEAP_ONLY_TUPLE \
)
#define HeapTupleHeaderSetHeapOnly(tup) \
( \
(tup)->t_infomask2 |= HEAP_ONLY_TUPLE \
)
#define HeapTupleHeaderClearHeapOnly(tup) \
( \
(tup)->t_infomask2 &= ~HEAP_ONLY_TUPLE \
)
#define HeapTupleHeaderGetNatts(tup) \
((tup)->t_infomask2 & HEAP_NATTS_MASK)
@@ -331,6 +372,11 @@ do { \
* fit on one heap page. (Note that indexes could have more, because they
* use a smaller tuple header.) We arrive at the divisor because each tuple
* must be maxaligned, and it must have an associated item pointer.
*
* Note: with HOT, there could theoretically be more line pointers (not actual
* tuples) than this on a heap page. However we constrain the number of line
* pointers to this anyway, to avoid excessive line-pointer bloat and not
* require increases in the size of work arrays.
*/
#define MaxHeapTuplesPerPage \
((int) ((BLCKSZ - offsetof(PageHeaderData, pd_linp)) / \
@@ -484,6 +530,24 @@ typedef HeapTupleData *HeapTuple;
#define HeapTupleHasExternal(tuple) \
(((tuple)->t_data->t_infomask & HEAP_HASEXTERNAL) != 0)
#define HeapTupleIsHotUpdated(tuple) \
HeapTupleHeaderIsHotUpdated((tuple)->t_data)
#define HeapTupleSetHotUpdated(tuple) \
HeapTupleHeaderSetHotUpdated((tuple)->t_data)
#define HeapTupleClearHotUpdated(tuple) \
HeapTupleHeaderClearHotUpdated((tuple)->t_data)
#define HeapTupleIsHeapOnly(tuple) \
HeapTupleHeaderIsHeapOnly((tuple)->t_data)
#define HeapTupleSetHeapOnly(tuple) \
HeapTupleHeaderSetHeapOnly((tuple)->t_data)
#define HeapTupleClearHeapOnly(tuple) \
HeapTupleHeaderClearHeapOnly((tuple)->t_data)
#define HeapTupleGetOid(tuple) \
HeapTupleHeaderGetOid((tuple)->t_data)
@@ -497,27 +561,30 @@ typedef HeapTupleData *HeapTuple;
* XLOG allows to store some information in high 4 bits of log
* record xl_info field. We use 3 for opcode and one for init bit.
*/
#define XLOG_HEAP_INSERT 0x00
#define XLOG_HEAP_DELETE 0x10
#define XLOG_HEAP_UPDATE 0x20
#define XLOG_HEAP_MOVE 0x30
#define XLOG_HEAP_CLEAN 0x40
#define XLOG_HEAP_NEWPAGE 0x50
#define XLOG_HEAP_LOCK 0x60
#define XLOG_HEAP_INPLACE 0x70
#define XLOG_HEAP_OPMASK 0x70
#define XLOG_HEAP_INSERT 0x00
#define XLOG_HEAP_DELETE 0x10
#define XLOG_HEAP_UPDATE 0x20
#define XLOG_HEAP_MOVE 0x30
#define XLOG_HEAP_HOT_UPDATE 0x40
#define XLOG_HEAP_NEWPAGE 0x50
#define XLOG_HEAP_LOCK 0x60
#define XLOG_HEAP_INPLACE 0x70
#define XLOG_HEAP_OPMASK 0x70
/*
* When we insert 1st item on new page in INSERT/UPDATE
* we can (and we do) restore entire page in redo
*/
#define XLOG_HEAP_INIT_PAGE 0x80
#define XLOG_HEAP_INIT_PAGE 0x80
/*
* We ran out of opcodes, so heapam.c now has a second RmgrId. These opcodes
* are associated with RM_HEAP2_ID, but are not logically different from
* the ones above associated with RM_HEAP_ID. We apply XLOG_HEAP_OPMASK,
* although currently XLOG_HEAP_INIT_PAGE is not used for any of these.
*/
#define XLOG_HEAP2_FREEZE 0x00
#define XLOG_HEAP2_FREEZE 0x00
#define XLOG_HEAP2_CLEAN 0x10
#define XLOG_HEAP2_CLEAN_MOVE 0x20
/*
* All what we need to find changed tuple
@@ -569,7 +636,7 @@ typedef struct xl_heap_insert
#define SizeOfHeapInsert (offsetof(xl_heap_insert, target) + SizeOfHeapTid)
/* This is what we need to know about update|move */
/* This is what we need to know about update|move|hot_update */
typedef struct xl_heap_update
{
xl_heaptid target; /* deleted tuple id */
@@ -580,15 +647,34 @@ typedef struct xl_heap_update
#define SizeOfHeapUpdate (offsetof(xl_heap_update, newtid) + SizeOfIptrData)
/* This is what we need to know about vacuum page cleanup */
/*
* This is what we need to know about vacuum page cleanup/redirect
*
* The array of OffsetNumbers following the fixed part of the record contains:
* * for each redirected item: the item offset, then the offset redirected to
* * for each now-dead item: the item offset
* * for each now-unused item: the item offset
* The total number of OffsetNumbers is therefore 2*nredirected+ndead+nunused.
* Note that nunused is not explicitly stored, but may be found by reference
* to the total record length.
*
* If the opcode is CLEAN_MOVE instead of CLEAN, then each redirection pair
* should be interpreted as physically moving the "to" item pointer to the
* "from" slot, rather than placing a redirection item in the "from" slot.
* The moved pointers should be replaced by LP_UNUSED items (there will not
* be explicit entries in the "now-unused" list for this). Also, the
* HEAP_ONLY bit in the moved tuples must be turned off.
*/
typedef struct xl_heap_clean
{
RelFileNode node;
BlockNumber block;
/* UNUSED OFFSET NUMBERS FOLLOW AT THE END */
uint16 nredirected;
uint16 ndead;
/* OFFSET NUMBERS FOLLOW */
} xl_heap_clean;
#define SizeOfHeapClean (offsetof(xl_heap_clean, block) + sizeof(BlockNumber))
#define SizeOfHeapClean (offsetof(xl_heap_clean, ndead) + sizeof(uint16))
/* This is for replacing a page's contents in toto */
/* NB: this is used for indexes as well as heaps */