mirror of
https://github.com/postgres/postgres.git
synced 2025-07-28 23:42:10 +03:00
Split index xlog headers from other private index headers.
The xlog-specific headers need to be included in both frontend code - specifically, pg_waldump - and the backend, but the remainder of the private headers for each index are only needed by the backend. By splitting the xlog stuff out into separate headers, pg_waldump pulls in fewer backend headers, which is a good thing. Patch by me, reviewed by Michael Paquier and Andres Freund, per a complaint from Dilip Kumar. Discussion: http://postgr.es/m/CA+TgmoZ=F=GkxV0YEv-A8tb+AEGy_Qa7GSiJ8deBKFATnzfEug@mail.gmail.com
This commit is contained in:
@ -15,6 +15,7 @@
|
|||||||
#include "postgres.h"
|
#include "postgres.h"
|
||||||
|
|
||||||
#include "access/gin_private.h"
|
#include "access/gin_private.h"
|
||||||
|
#include "access/ginxlog.h"
|
||||||
#include "access/xloginsert.h"
|
#include "access/xloginsert.h"
|
||||||
#include "miscadmin.h"
|
#include "miscadmin.h"
|
||||||
#include "utils/memutils.h"
|
#include "utils/memutils.h"
|
||||||
|
@ -15,6 +15,7 @@
|
|||||||
#include "postgres.h"
|
#include "postgres.h"
|
||||||
|
|
||||||
#include "access/gin_private.h"
|
#include "access/gin_private.h"
|
||||||
|
#include "access/ginxlog.h"
|
||||||
#include "access/xloginsert.h"
|
#include "access/xloginsert.h"
|
||||||
#include "lib/ilist.h"
|
#include "lib/ilist.h"
|
||||||
#include "miscadmin.h"
|
#include "miscadmin.h"
|
||||||
|
@ -15,6 +15,7 @@
|
|||||||
#include "postgres.h"
|
#include "postgres.h"
|
||||||
|
|
||||||
#include "access/gin_private.h"
|
#include "access/gin_private.h"
|
||||||
|
#include "access/ginxlog.h"
|
||||||
#include "access/xloginsert.h"
|
#include "access/xloginsert.h"
|
||||||
#include "miscadmin.h"
|
#include "miscadmin.h"
|
||||||
#include "utils/rel.h"
|
#include "utils/rel.h"
|
||||||
|
@ -19,6 +19,7 @@
|
|||||||
#include "postgres.h"
|
#include "postgres.h"
|
||||||
|
|
||||||
#include "access/gin_private.h"
|
#include "access/gin_private.h"
|
||||||
|
#include "access/ginxlog.h"
|
||||||
#include "access/xloginsert.h"
|
#include "access/xloginsert.h"
|
||||||
#include "access/xlog.h"
|
#include "access/xlog.h"
|
||||||
#include "commands/vacuum.h"
|
#include "commands/vacuum.h"
|
||||||
|
@ -15,6 +15,7 @@
|
|||||||
#include "postgres.h"
|
#include "postgres.h"
|
||||||
|
|
||||||
#include "access/gin_private.h"
|
#include "access/gin_private.h"
|
||||||
|
#include "access/ginxlog.h"
|
||||||
#include "access/xloginsert.h"
|
#include "access/xloginsert.h"
|
||||||
#include "catalog/index.h"
|
#include "catalog/index.h"
|
||||||
#include "miscadmin.h"
|
#include "miscadmin.h"
|
||||||
|
@ -15,6 +15,7 @@
|
|||||||
#include "postgres.h"
|
#include "postgres.h"
|
||||||
|
|
||||||
#include "access/gin_private.h"
|
#include "access/gin_private.h"
|
||||||
|
#include "access/ginxlog.h"
|
||||||
#include "access/reloptions.h"
|
#include "access/reloptions.h"
|
||||||
#include "access/xloginsert.h"
|
#include "access/xloginsert.h"
|
||||||
#include "catalog/pg_collation.h"
|
#include "catalog/pg_collation.h"
|
||||||
|
@ -15,6 +15,7 @@
|
|||||||
#include "postgres.h"
|
#include "postgres.h"
|
||||||
|
|
||||||
#include "access/gin_private.h"
|
#include "access/gin_private.h"
|
||||||
|
#include "access/ginxlog.h"
|
||||||
#include "access/xloginsert.h"
|
#include "access/xloginsert.h"
|
||||||
#include "commands/vacuum.h"
|
#include "commands/vacuum.h"
|
||||||
#include "miscadmin.h"
|
#include "miscadmin.h"
|
||||||
|
@ -15,6 +15,7 @@
|
|||||||
|
|
||||||
#include "access/bufmask.h"
|
#include "access/bufmask.h"
|
||||||
#include "access/gin_private.h"
|
#include "access/gin_private.h"
|
||||||
|
#include "access/ginxlog.h"
|
||||||
#include "access/xlogutils.h"
|
#include "access/xlogutils.h"
|
||||||
#include "utils/memutils.h"
|
#include "utils/memutils.h"
|
||||||
|
|
||||||
|
@ -18,6 +18,7 @@
|
|||||||
|
|
||||||
#include "access/genam.h"
|
#include "access/genam.h"
|
||||||
#include "access/gist_private.h"
|
#include "access/gist_private.h"
|
||||||
|
#include "access/gistxlog.h"
|
||||||
#include "access/xloginsert.h"
|
#include "access/xloginsert.h"
|
||||||
#include "catalog/index.h"
|
#include "catalog/index.h"
|
||||||
#include "miscadmin.h"
|
#include "miscadmin.h"
|
||||||
|
@ -15,6 +15,7 @@
|
|||||||
|
|
||||||
#include "access/bufmask.h"
|
#include "access/bufmask.h"
|
||||||
#include "access/gist_private.h"
|
#include "access/gist_private.h"
|
||||||
|
#include "access/gistxlog.h"
|
||||||
#include "access/xloginsert.h"
|
#include "access/xloginsert.h"
|
||||||
#include "access/xlogutils.h"
|
#include "access/xlogutils.h"
|
||||||
#include "utils/memutils.h"
|
#include "utils/memutils.h"
|
||||||
|
@ -17,6 +17,7 @@
|
|||||||
|
|
||||||
#include "access/heapam.h"
|
#include "access/heapam.h"
|
||||||
#include "access/nbtree.h"
|
#include "access/nbtree.h"
|
||||||
|
#include "access/nbtxlog.h"
|
||||||
#include "access/transam.h"
|
#include "access/transam.h"
|
||||||
#include "access/xloginsert.h"
|
#include "access/xloginsert.h"
|
||||||
#include "miscadmin.h"
|
#include "miscadmin.h"
|
||||||
|
@ -23,6 +23,7 @@
|
|||||||
#include "postgres.h"
|
#include "postgres.h"
|
||||||
|
|
||||||
#include "access/nbtree.h"
|
#include "access/nbtree.h"
|
||||||
|
#include "access/nbtxlog.h"
|
||||||
#include "access/transam.h"
|
#include "access/transam.h"
|
||||||
#include "access/xlog.h"
|
#include "access/xlog.h"
|
||||||
#include "access/xloginsert.h"
|
#include "access/xloginsert.h"
|
||||||
|
@ -17,6 +17,7 @@
|
|||||||
#include "access/bufmask.h"
|
#include "access/bufmask.h"
|
||||||
#include "access/heapam_xlog.h"
|
#include "access/heapam_xlog.h"
|
||||||
#include "access/nbtree.h"
|
#include "access/nbtree.h"
|
||||||
|
#include "access/nbtxlog.h"
|
||||||
#include "access/transam.h"
|
#include "access/transam.h"
|
||||||
#include "access/xlog.h"
|
#include "access/xlog.h"
|
||||||
#include "access/xlogutils.h"
|
#include "access/xlogutils.h"
|
||||||
|
@ -14,7 +14,7 @@
|
|||||||
*/
|
*/
|
||||||
#include "postgres.h"
|
#include "postgres.h"
|
||||||
|
|
||||||
#include "access/gin_private.h"
|
#include "access/ginxlog.h"
|
||||||
#include "access/xlogutils.h"
|
#include "access/xlogutils.h"
|
||||||
#include "lib/stringinfo.h"
|
#include "lib/stringinfo.h"
|
||||||
#include "storage/relfilenode.h"
|
#include "storage/relfilenode.h"
|
||||||
|
@ -14,7 +14,7 @@
|
|||||||
*/
|
*/
|
||||||
#include "postgres.h"
|
#include "postgres.h"
|
||||||
|
|
||||||
#include "access/gist_private.h"
|
#include "access/gistxlog.h"
|
||||||
#include "lib/stringinfo.h"
|
#include "lib/stringinfo.h"
|
||||||
#include "storage/relfilenode.h"
|
#include "storage/relfilenode.h"
|
||||||
|
|
||||||
|
@ -14,7 +14,7 @@
|
|||||||
*/
|
*/
|
||||||
#include "postgres.h"
|
#include "postgres.h"
|
||||||
|
|
||||||
#include "access/nbtree.h"
|
#include "access/nbtxlog.h"
|
||||||
|
|
||||||
void
|
void
|
||||||
btree_desc(StringInfo buf, XLogReaderState *record)
|
btree_desc(StringInfo buf, XLogReaderState *record)
|
||||||
|
@ -14,7 +14,7 @@
|
|||||||
*/
|
*/
|
||||||
#include "postgres.h"
|
#include "postgres.h"
|
||||||
|
|
||||||
#include "access/spgist_private.h"
|
#include "access/spgxlog.h"
|
||||||
|
|
||||||
void
|
void
|
||||||
spg_desc(StringInfo buf, XLogReaderState *record)
|
spg_desc(StringInfo buf, XLogReaderState *record)
|
||||||
|
@ -17,6 +17,7 @@
|
|||||||
|
|
||||||
#include "access/genam.h"
|
#include "access/genam.h"
|
||||||
#include "access/spgist_private.h"
|
#include "access/spgist_private.h"
|
||||||
|
#include "access/spgxlog.h"
|
||||||
#include "access/xloginsert.h"
|
#include "access/xloginsert.h"
|
||||||
#include "miscadmin.h"
|
#include "miscadmin.h"
|
||||||
#include "storage/bufmgr.h"
|
#include "storage/bufmgr.h"
|
||||||
|
@ -18,6 +18,7 @@
|
|||||||
|
|
||||||
#include "access/genam.h"
|
#include "access/genam.h"
|
||||||
#include "access/spgist_private.h"
|
#include "access/spgist_private.h"
|
||||||
|
#include "access/spgxlog.h"
|
||||||
#include "access/xlog.h"
|
#include "access/xlog.h"
|
||||||
#include "access/xloginsert.h"
|
#include "access/xloginsert.h"
|
||||||
#include "catalog/index.h"
|
#include "catalog/index.h"
|
||||||
|
@ -17,6 +17,7 @@
|
|||||||
|
|
||||||
#include "access/genam.h"
|
#include "access/genam.h"
|
||||||
#include "access/spgist_private.h"
|
#include "access/spgist_private.h"
|
||||||
|
#include "access/spgxlog.h"
|
||||||
#include "access/transam.h"
|
#include "access/transam.h"
|
||||||
#include "access/xloginsert.h"
|
#include "access/xloginsert.h"
|
||||||
#include "catalog/storage_xlog.h"
|
#include "catalog/storage_xlog.h"
|
||||||
|
@ -16,6 +16,7 @@
|
|||||||
|
|
||||||
#include "access/bufmask.h"
|
#include "access/bufmask.h"
|
||||||
#include "access/spgist_private.h"
|
#include "access/spgist_private.h"
|
||||||
|
#include "access/spgxlog.h"
|
||||||
#include "access/transam.h"
|
#include "access/transam.h"
|
||||||
#include "access/xlog.h"
|
#include "access/xlog.h"
|
||||||
#include "access/xlogutils.h"
|
#include "access/xlogutils.h"
|
||||||
|
@ -9,15 +9,15 @@
|
|||||||
|
|
||||||
#include "access/clog.h"
|
#include "access/clog.h"
|
||||||
#include "access/commit_ts.h"
|
#include "access/commit_ts.h"
|
||||||
#include "access/gin.h"
|
#include "access/ginxlog.h"
|
||||||
#include "access/gist_private.h"
|
#include "access/gistxlog.h"
|
||||||
#include "access/generic_xlog.h"
|
#include "access/generic_xlog.h"
|
||||||
#include "access/hash_xlog.h"
|
#include "access/hash_xlog.h"
|
||||||
#include "access/heapam_xlog.h"
|
#include "access/heapam_xlog.h"
|
||||||
#include "access/brin_xlog.h"
|
#include "access/brin_xlog.h"
|
||||||
#include "access/multixact.h"
|
#include "access/multixact.h"
|
||||||
#include "access/nbtree.h"
|
#include "access/nbtxlog.h"
|
||||||
#include "access/spgist.h"
|
#include "access/spgxlog.h"
|
||||||
#include "access/xact.h"
|
#include "access/xact.h"
|
||||||
#include "access/xlog_internal.h"
|
#include "access/xlog_internal.h"
|
||||||
#include "catalog/storage_xlog.h"
|
#include "catalog/storage_xlog.h"
|
||||||
|
@ -12,14 +12,14 @@
|
|||||||
#include "access/clog.h"
|
#include "access/clog.h"
|
||||||
#include "access/commit_ts.h"
|
#include "access/commit_ts.h"
|
||||||
#include "access/generic_xlog.h"
|
#include "access/generic_xlog.h"
|
||||||
#include "access/gin.h"
|
#include "access/ginxlog.h"
|
||||||
#include "access/gist_private.h"
|
#include "access/gistxlog.h"
|
||||||
#include "access/hash_xlog.h"
|
#include "access/hash_xlog.h"
|
||||||
#include "access/heapam_xlog.h"
|
#include "access/heapam_xlog.h"
|
||||||
#include "access/multixact.h"
|
#include "access/multixact.h"
|
||||||
#include "access/nbtree.h"
|
#include "access/nbtxlog.h"
|
||||||
#include "access/rmgr.h"
|
#include "access/rmgr.h"
|
||||||
#include "access/spgist.h"
|
#include "access/spgxlog.h"
|
||||||
#include "access/xact.h"
|
#include "access/xact.h"
|
||||||
#include "access/xlog_internal.h"
|
#include "access/xlog_internal.h"
|
||||||
#include "catalog/storage_xlog.h"
|
#include "catalog/storage_xlog.h"
|
||||||
|
@ -73,12 +73,4 @@ extern int gin_pending_list_limit;
|
|||||||
extern void ginGetStats(Relation index, GinStatsData *stats);
|
extern void ginGetStats(Relation index, GinStatsData *stats);
|
||||||
extern void ginUpdateStats(Relation index, const GinStatsData *stats);
|
extern void ginUpdateStats(Relation index, const GinStatsData *stats);
|
||||||
|
|
||||||
/* ginxlog.c */
|
|
||||||
extern void gin_redo(XLogReaderState *record);
|
|
||||||
extern void gin_desc(StringInfo buf, XLogReaderState *record);
|
|
||||||
extern const char *gin_identify(uint8 info);
|
|
||||||
extern void gin_xlog_startup(void);
|
|
||||||
extern void gin_xlog_cleanup(void);
|
|
||||||
extern void gin_mask(char *pagedata, BlockNumber blkno);
|
|
||||||
|
|
||||||
#endif /* GIN_H */
|
#endif /* GIN_H */
|
||||||
|
@ -12,309 +12,12 @@
|
|||||||
|
|
||||||
#include "access/amapi.h"
|
#include "access/amapi.h"
|
||||||
#include "access/gin.h"
|
#include "access/gin.h"
|
||||||
|
#include "access/ginblock.h"
|
||||||
#include "access/itup.h"
|
#include "access/itup.h"
|
||||||
#include "fmgr.h"
|
#include "fmgr.h"
|
||||||
#include "storage/bufmgr.h"
|
#include "storage/bufmgr.h"
|
||||||
#include "lib/rbtree.h"
|
#include "lib/rbtree.h"
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Page opaque data in an inverted index page.
|
|
||||||
*
|
|
||||||
* Note: GIN does not include a page ID word as do the other index types.
|
|
||||||
* This is OK because the opaque data is only 8 bytes and so can be reliably
|
|
||||||
* distinguished by size. Revisit this if the size ever increases.
|
|
||||||
* Further note: as of 9.2, SP-GiST also uses 8-byte special space, as does
|
|
||||||
* BRIN as of 9.5. This is still OK, as long as GIN isn't using all of the
|
|
||||||
* high-order bits in its flags word, because that way the flags word cannot
|
|
||||||
* match the page IDs used by SP-GiST and BRIN.
|
|
||||||
*/
|
|
||||||
typedef struct GinPageOpaqueData
|
|
||||||
{
|
|
||||||
BlockNumber rightlink; /* next page if any */
|
|
||||||
OffsetNumber maxoff; /* number of PostingItems on GIN_DATA &
|
|
||||||
* ~GIN_LEAF page. On GIN_LIST page, number of
|
|
||||||
* heap tuples. */
|
|
||||||
uint16 flags; /* see bit definitions below */
|
|
||||||
} GinPageOpaqueData;
|
|
||||||
|
|
||||||
typedef GinPageOpaqueData *GinPageOpaque;
|
|
||||||
|
|
||||||
#define GIN_DATA (1 << 0)
|
|
||||||
#define GIN_LEAF (1 << 1)
|
|
||||||
#define GIN_DELETED (1 << 2)
|
|
||||||
#define GIN_META (1 << 3)
|
|
||||||
#define GIN_LIST (1 << 4)
|
|
||||||
#define GIN_LIST_FULLROW (1 << 5) /* makes sense only on GIN_LIST page */
|
|
||||||
#define GIN_INCOMPLETE_SPLIT (1 << 6) /* page was split, but parent not
|
|
||||||
* updated */
|
|
||||||
#define GIN_COMPRESSED (1 << 7)
|
|
||||||
|
|
||||||
/* Page numbers of fixed-location pages */
|
|
||||||
#define GIN_METAPAGE_BLKNO (0)
|
|
||||||
#define GIN_ROOT_BLKNO (1)
|
|
||||||
|
|
||||||
typedef struct GinMetaPageData
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* Pointers to head and tail of pending list, which consists of GIN_LIST
|
|
||||||
* pages. These store fast-inserted entries that haven't yet been moved
|
|
||||||
* into the regular GIN structure.
|
|
||||||
*/
|
|
||||||
BlockNumber head;
|
|
||||||
BlockNumber tail;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Free space in bytes in the pending list's tail page.
|
|
||||||
*/
|
|
||||||
uint32 tailFreeSize;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* We store both number of pages and number of heap tuples that are in the
|
|
||||||
* pending list.
|
|
||||||
*/
|
|
||||||
BlockNumber nPendingPages;
|
|
||||||
int64 nPendingHeapTuples;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Statistics for planner use (accurate as of last VACUUM)
|
|
||||||
*/
|
|
||||||
BlockNumber nTotalPages;
|
|
||||||
BlockNumber nEntryPages;
|
|
||||||
BlockNumber nDataPages;
|
|
||||||
int64 nEntries;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* GIN version number (ideally this should have been at the front, but too
|
|
||||||
* late now. Don't move it!)
|
|
||||||
*
|
|
||||||
* Currently 2 (for indexes initialized in 9.4 or later)
|
|
||||||
*
|
|
||||||
* Version 1 (indexes initialized in version 9.1, 9.2 or 9.3), is
|
|
||||||
* compatible, but may contain uncompressed posting tree (leaf) pages and
|
|
||||||
* posting lists. They will be converted to compressed format when
|
|
||||||
* modified.
|
|
||||||
*
|
|
||||||
* Version 0 (indexes initialized in 9.0 or before) is compatible but may
|
|
||||||
* be missing null entries, including both null keys and placeholders.
|
|
||||||
* Reject full-index-scan attempts on such indexes.
|
|
||||||
*/
|
|
||||||
int32 ginVersion;
|
|
||||||
} GinMetaPageData;
|
|
||||||
|
|
||||||
#define GIN_CURRENT_VERSION 2
|
|
||||||
|
|
||||||
#define GinPageGetMeta(p) \
|
|
||||||
((GinMetaPageData *) PageGetContents(p))
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Macros for accessing a GIN index page's opaque data
|
|
||||||
*/
|
|
||||||
#define GinPageGetOpaque(page) ( (GinPageOpaque) PageGetSpecialPointer(page) )
|
|
||||||
|
|
||||||
#define GinPageIsLeaf(page) ( (GinPageGetOpaque(page)->flags & GIN_LEAF) != 0 )
|
|
||||||
#define GinPageSetLeaf(page) ( GinPageGetOpaque(page)->flags |= GIN_LEAF )
|
|
||||||
#define GinPageSetNonLeaf(page) ( GinPageGetOpaque(page)->flags &= ~GIN_LEAF )
|
|
||||||
#define GinPageIsData(page) ( (GinPageGetOpaque(page)->flags & GIN_DATA) != 0 )
|
|
||||||
#define GinPageSetData(page) ( GinPageGetOpaque(page)->flags |= GIN_DATA )
|
|
||||||
#define GinPageIsList(page) ( (GinPageGetOpaque(page)->flags & GIN_LIST) != 0 )
|
|
||||||
#define GinPageSetList(page) ( GinPageGetOpaque(page)->flags |= GIN_LIST )
|
|
||||||
#define GinPageHasFullRow(page) ( (GinPageGetOpaque(page)->flags & GIN_LIST_FULLROW) != 0 )
|
|
||||||
#define GinPageSetFullRow(page) ( GinPageGetOpaque(page)->flags |= GIN_LIST_FULLROW )
|
|
||||||
#define GinPageIsCompressed(page) ( (GinPageGetOpaque(page)->flags & GIN_COMPRESSED) != 0 )
|
|
||||||
#define GinPageSetCompressed(page) ( GinPageGetOpaque(page)->flags |= GIN_COMPRESSED )
|
|
||||||
|
|
||||||
#define GinPageIsDeleted(page) ( (GinPageGetOpaque(page)->flags & GIN_DELETED) != 0 )
|
|
||||||
#define GinPageSetDeleted(page) ( GinPageGetOpaque(page)->flags |= GIN_DELETED)
|
|
||||||
#define GinPageSetNonDeleted(page) ( GinPageGetOpaque(page)->flags &= ~GIN_DELETED)
|
|
||||||
#define GinPageIsIncompleteSplit(page) ( (GinPageGetOpaque(page)->flags & GIN_INCOMPLETE_SPLIT) != 0 )
|
|
||||||
|
|
||||||
#define GinPageRightMost(page) ( GinPageGetOpaque(page)->rightlink == InvalidBlockNumber)
|
|
||||||
|
|
||||||
/*
|
|
||||||
* We use our own ItemPointerGet(BlockNumber|OffsetNumber)
|
|
||||||
* to avoid Asserts, since sometimes the ip_posid isn't "valid"
|
|
||||||
*/
|
|
||||||
#define GinItemPointerGetBlockNumber(pointer) \
|
|
||||||
BlockIdGetBlockNumber(&(pointer)->ip_blkid)
|
|
||||||
|
|
||||||
#define GinItemPointerGetOffsetNumber(pointer) \
|
|
||||||
((pointer)->ip_posid)
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Special-case item pointer values needed by the GIN search logic.
|
|
||||||
* MIN: sorts less than any valid item pointer
|
|
||||||
* MAX: sorts greater than any valid item pointer
|
|
||||||
* LOSSY PAGE: indicates a whole heap page, sorts after normal item
|
|
||||||
* pointers for that page
|
|
||||||
* Note that these are all distinguishable from an "invalid" item pointer
|
|
||||||
* (which is InvalidBlockNumber/0) as well as from all normal item
|
|
||||||
* pointers (which have item numbers in the range 1..MaxHeapTuplesPerPage).
|
|
||||||
*/
|
|
||||||
#define ItemPointerSetMin(p) \
|
|
||||||
ItemPointerSet((p), (BlockNumber)0, (OffsetNumber)0)
|
|
||||||
#define ItemPointerIsMin(p) \
|
|
||||||
(GinItemPointerGetOffsetNumber(p) == (OffsetNumber)0 && \
|
|
||||||
GinItemPointerGetBlockNumber(p) == (BlockNumber)0)
|
|
||||||
#define ItemPointerSetMax(p) \
|
|
||||||
ItemPointerSet((p), InvalidBlockNumber, (OffsetNumber)0xffff)
|
|
||||||
#define ItemPointerIsMax(p) \
|
|
||||||
(GinItemPointerGetOffsetNumber(p) == (OffsetNumber)0xffff && \
|
|
||||||
GinItemPointerGetBlockNumber(p) == InvalidBlockNumber)
|
|
||||||
#define ItemPointerSetLossyPage(p, b) \
|
|
||||||
ItemPointerSet((p), (b), (OffsetNumber)0xffff)
|
|
||||||
#define ItemPointerIsLossyPage(p) \
|
|
||||||
(GinItemPointerGetOffsetNumber(p) == (OffsetNumber)0xffff && \
|
|
||||||
GinItemPointerGetBlockNumber(p) != InvalidBlockNumber)
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Posting item in a non-leaf posting-tree page
|
|
||||||
*/
|
|
||||||
typedef struct
|
|
||||||
{
|
|
||||||
/* We use BlockIdData not BlockNumber to avoid padding space wastage */
|
|
||||||
BlockIdData child_blkno;
|
|
||||||
ItemPointerData key;
|
|
||||||
} PostingItem;
|
|
||||||
|
|
||||||
#define PostingItemGetBlockNumber(pointer) \
|
|
||||||
BlockIdGetBlockNumber(&(pointer)->child_blkno)
|
|
||||||
|
|
||||||
#define PostingItemSetBlockNumber(pointer, blockNumber) \
|
|
||||||
BlockIdSet(&((pointer)->child_blkno), (blockNumber))
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Category codes to distinguish placeholder nulls from ordinary NULL keys.
|
|
||||||
* Note that the datatype size and the first two code values are chosen to be
|
|
||||||
* compatible with the usual usage of bool isNull flags.
|
|
||||||
*
|
|
||||||
* GIN_CAT_EMPTY_QUERY is never stored in the index; and notice that it is
|
|
||||||
* chosen to sort before not after regular key values.
|
|
||||||
*/
|
|
||||||
typedef signed char GinNullCategory;
|
|
||||||
|
|
||||||
#define GIN_CAT_NORM_KEY 0 /* normal, non-null key value */
|
|
||||||
#define GIN_CAT_NULL_KEY 1 /* null key value */
|
|
||||||
#define GIN_CAT_EMPTY_ITEM 2 /* placeholder for zero-key item */
|
|
||||||
#define GIN_CAT_NULL_ITEM 3 /* placeholder for null item */
|
|
||||||
#define GIN_CAT_EMPTY_QUERY (-1) /* placeholder for full-scan query */
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Access macros for null category byte in entry tuples
|
|
||||||
*/
|
|
||||||
#define GinCategoryOffset(itup,ginstate) \
|
|
||||||
(IndexInfoFindDataOffset((itup)->t_info) + \
|
|
||||||
((ginstate)->oneCol ? 0 : sizeof(int16)))
|
|
||||||
#define GinGetNullCategory(itup,ginstate) \
|
|
||||||
(*((GinNullCategory *) ((char*)(itup) + GinCategoryOffset(itup,ginstate))))
|
|
||||||
#define GinSetNullCategory(itup,ginstate,c) \
|
|
||||||
(*((GinNullCategory *) ((char*)(itup) + GinCategoryOffset(itup,ginstate))) = (c))
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Access macros for leaf-page entry tuples (see discussion in README)
|
|
||||||
*/
|
|
||||||
#define GinGetNPosting(itup) GinItemPointerGetOffsetNumber(&(itup)->t_tid)
|
|
||||||
#define GinSetNPosting(itup,n) ItemPointerSetOffsetNumber(&(itup)->t_tid,n)
|
|
||||||
#define GIN_TREE_POSTING ((OffsetNumber)0xffff)
|
|
||||||
#define GinIsPostingTree(itup) (GinGetNPosting(itup) == GIN_TREE_POSTING)
|
|
||||||
#define GinSetPostingTree(itup, blkno) ( GinSetNPosting((itup),GIN_TREE_POSTING), ItemPointerSetBlockNumber(&(itup)->t_tid, blkno) )
|
|
||||||
#define GinGetPostingTree(itup) GinItemPointerGetBlockNumber(&(itup)->t_tid)
|
|
||||||
|
|
||||||
#define GIN_ITUP_COMPRESSED (1U << 31)
|
|
||||||
#define GinGetPostingOffset(itup) (GinItemPointerGetBlockNumber(&(itup)->t_tid) & (~GIN_ITUP_COMPRESSED))
|
|
||||||
#define GinSetPostingOffset(itup,n) ItemPointerSetBlockNumber(&(itup)->t_tid,(n)|GIN_ITUP_COMPRESSED)
|
|
||||||
#define GinGetPosting(itup) ((Pointer) ((char*)(itup) + GinGetPostingOffset(itup)))
|
|
||||||
#define GinItupIsCompressed(itup) ((GinItemPointerGetBlockNumber(&(itup)->t_tid) & GIN_ITUP_COMPRESSED) != 0)
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Maximum size of an item on entry tree page. Make sure that we fit at least
|
|
||||||
* three items on each page. (On regular B-tree indexes, we must fit at least
|
|
||||||
* three items: two data items and the "high key". In GIN entry tree, we don't
|
|
||||||
* currently store the high key explicitly, we just use the rightmost item on
|
|
||||||
* the page, so it would actually be enough to fit two items.)
|
|
||||||
*/
|
|
||||||
#define GinMaxItemSize \
|
|
||||||
Min(INDEX_SIZE_MASK, \
|
|
||||||
MAXALIGN_DOWN(((BLCKSZ - \
|
|
||||||
MAXALIGN(SizeOfPageHeaderData + 3 * sizeof(ItemIdData)) - \
|
|
||||||
MAXALIGN(sizeof(GinPageOpaqueData))) / 3)))
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Access macros for non-leaf entry tuples
|
|
||||||
*/
|
|
||||||
#define GinGetDownlink(itup) GinItemPointerGetBlockNumber(&(itup)->t_tid)
|
|
||||||
#define GinSetDownlink(itup,blkno) ItemPointerSet(&(itup)->t_tid, blkno, InvalidOffsetNumber)
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Data (posting tree) pages
|
|
||||||
*
|
|
||||||
* Posting tree pages don't store regular tuples. Non-leaf pages contain
|
|
||||||
* PostingItems, which are pairs of ItemPointers and child block numbers.
|
|
||||||
* Leaf pages contain GinPostingLists and an uncompressed array of item
|
|
||||||
* pointers.
|
|
||||||
*
|
|
||||||
* In a leaf page, the compressed posting lists are stored after the regular
|
|
||||||
* page header, one after each other. Although we don't store regular tuples,
|
|
||||||
* pd_lower is used to indicate the end of the posting lists. After that, free
|
|
||||||
* space follows. This layout is compatible with the "standard" heap and
|
|
||||||
* index page layout described in bufpage.h, so that we can e.g set buffer_std
|
|
||||||
* when writing WAL records.
|
|
||||||
*
|
|
||||||
* In the special space is the GinPageOpaque struct.
|
|
||||||
*/
|
|
||||||
#define GinDataLeafPageGetPostingList(page) \
|
|
||||||
(GinPostingList *) ((PageGetContents(page) + MAXALIGN(sizeof(ItemPointerData))))
|
|
||||||
#define GinDataLeafPageGetPostingListSize(page) \
|
|
||||||
(((PageHeader) page)->pd_lower - MAXALIGN(SizeOfPageHeaderData) - MAXALIGN(sizeof(ItemPointerData)))
|
|
||||||
|
|
||||||
#define GinDataLeafPageIsEmpty(page) \
|
|
||||||
(GinPageIsCompressed(page) ? (GinDataLeafPageGetPostingListSize(page) == 0) : (GinPageGetOpaque(page)->maxoff < FirstOffsetNumber))
|
|
||||||
|
|
||||||
#define GinDataLeafPageGetFreeSpace(page) PageGetExactFreeSpace(page)
|
|
||||||
|
|
||||||
#define GinDataPageGetRightBound(page) ((ItemPointer) PageGetContents(page))
|
|
||||||
/*
|
|
||||||
* Pointer to the data portion of a posting tree page. For internal pages,
|
|
||||||
* that's the beginning of the array of PostingItems. For compressed leaf
|
|
||||||
* pages, the first compressed posting list. For uncompressed (pre-9.4) leaf
|
|
||||||
* pages, it's the beginning of the ItemPointer array.
|
|
||||||
*/
|
|
||||||
#define GinDataPageGetData(page) \
|
|
||||||
(PageGetContents(page) + MAXALIGN(sizeof(ItemPointerData)))
|
|
||||||
/* non-leaf pages contain PostingItems */
|
|
||||||
#define GinDataPageGetPostingItem(page, i) \
|
|
||||||
((PostingItem *) (GinDataPageGetData(page) + ((i)-1) * sizeof(PostingItem)))
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Note: there is no GinDataPageGetDataSize macro, because before version
|
|
||||||
* 9.4, we didn't set pd_lower on data pages. There can be pages in the index
|
|
||||||
* that were binary-upgraded from earlier versions and still have an invalid
|
|
||||||
* pd_lower, so we cannot trust it in general. Compressed posting tree leaf
|
|
||||||
* pages are new in 9.4, however, so we can trust them; see
|
|
||||||
* GinDataLeafPageGetPostingListSize.
|
|
||||||
*/
|
|
||||||
#define GinDataPageSetDataSize(page, size) \
|
|
||||||
{ \
|
|
||||||
Assert(size <= GinDataPageMaxDataSize); \
|
|
||||||
((PageHeader) page)->pd_lower = (size) + MAXALIGN(SizeOfPageHeaderData) + MAXALIGN(sizeof(ItemPointerData)); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define GinNonLeafDataPageGetFreeSpace(page) \
|
|
||||||
(GinDataPageMaxDataSize - \
|
|
||||||
GinPageGetOpaque(page)->maxoff * sizeof(PostingItem))
|
|
||||||
|
|
||||||
#define GinDataPageMaxDataSize \
|
|
||||||
(BLCKSZ - MAXALIGN(SizeOfPageHeaderData) \
|
|
||||||
- MAXALIGN(sizeof(ItemPointerData)) \
|
|
||||||
- MAXALIGN(sizeof(GinPageOpaqueData)))
|
|
||||||
|
|
||||||
/*
|
|
||||||
* List pages
|
|
||||||
*/
|
|
||||||
#define GinListPageSize \
|
|
||||||
( BLCKSZ - SizeOfPageHeaderData - MAXALIGN(sizeof(GinPageOpaqueData)) )
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Storage type for GIN's reloptions
|
* Storage type for GIN's reloptions
|
||||||
*/
|
*/
|
||||||
@ -380,216 +83,6 @@ typedef struct GinState
|
|||||||
} GinState;
|
} GinState;
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* A compressed posting list.
|
|
||||||
*
|
|
||||||
* Note: This requires 2-byte alignment.
|
|
||||||
*/
|
|
||||||
typedef struct
|
|
||||||
{
|
|
||||||
ItemPointerData first; /* first item in this posting list (unpacked) */
|
|
||||||
uint16 nbytes; /* number of bytes that follow */
|
|
||||||
unsigned char bytes[FLEXIBLE_ARRAY_MEMBER]; /* varbyte encoded items */
|
|
||||||
} GinPostingList;
|
|
||||||
|
|
||||||
#define SizeOfGinPostingList(plist) (offsetof(GinPostingList, bytes) + SHORTALIGN((plist)->nbytes) )
|
|
||||||
#define GinNextPostingListSegment(cur) ((GinPostingList *) (((char *) (cur)) + SizeOfGinPostingList((cur))))
|
|
||||||
|
|
||||||
|
|
||||||
/* XLog stuff */
|
|
||||||
|
|
||||||
#define XLOG_GIN_CREATE_INDEX 0x00
|
|
||||||
|
|
||||||
#define XLOG_GIN_CREATE_PTREE 0x10
|
|
||||||
|
|
||||||
typedef struct ginxlogCreatePostingTree
|
|
||||||
{
|
|
||||||
uint32 size;
|
|
||||||
/* A compressed posting list follows */
|
|
||||||
} ginxlogCreatePostingTree;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* The format of the insertion record varies depending on the page type.
|
|
||||||
* ginxlogInsert is the common part between all variants.
|
|
||||||
*
|
|
||||||
* Backup Blk 0: target page
|
|
||||||
* Backup Blk 1: left child, if this insertion finishes an incomplete split
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define XLOG_GIN_INSERT 0x20
|
|
||||||
|
|
||||||
typedef struct
|
|
||||||
{
|
|
||||||
uint16 flags; /* GIN_INSERT_ISLEAF and/or GIN_INSERT_ISDATA */
|
|
||||||
|
|
||||||
/*
|
|
||||||
* FOLLOWS:
|
|
||||||
*
|
|
||||||
* 1. if not leaf page, block numbers of the left and right child pages
|
|
||||||
* whose split this insertion finishes, as BlockIdData[2] (beware of
|
|
||||||
* adding fields in this struct that would make them not 16-bit aligned)
|
|
||||||
*
|
|
||||||
* 2. a ginxlogInsertEntry or ginxlogRecompressDataLeaf struct, depending
|
|
||||||
* on tree type.
|
|
||||||
*
|
|
||||||
* NB: the below structs are only 16-bit aligned when appended to a
|
|
||||||
* ginxlogInsert struct! Beware of adding fields to them that require
|
|
||||||
* stricter alignment.
|
|
||||||
*/
|
|
||||||
} ginxlogInsert;
|
|
||||||
|
|
||||||
typedef struct
|
|
||||||
{
|
|
||||||
OffsetNumber offset;
|
|
||||||
bool isDelete;
|
|
||||||
IndexTupleData tuple; /* variable length */
|
|
||||||
} ginxlogInsertEntry;
|
|
||||||
|
|
||||||
|
|
||||||
typedef struct
|
|
||||||
{
|
|
||||||
uint16 nactions;
|
|
||||||
|
|
||||||
/* Variable number of 'actions' follow */
|
|
||||||
} ginxlogRecompressDataLeaf;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Note: this struct is currently not used in code, and only acts as
|
|
||||||
* documentation. The WAL record format is as specified here, but the code
|
|
||||||
* uses straight access through a Pointer and memcpy to read/write these.
|
|
||||||
*/
|
|
||||||
typedef struct
|
|
||||||
{
|
|
||||||
uint8 segno; /* segment this action applies to */
|
|
||||||
char type; /* action type (see below) */
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Action-specific data follows. For INSERT and REPLACE actions that is a
|
|
||||||
* GinPostingList struct. For ADDITEMS, a uint16 for the number of items
|
|
||||||
* added, followed by the items themselves as ItemPointers. DELETE actions
|
|
||||||
* have no further data.
|
|
||||||
*/
|
|
||||||
} ginxlogSegmentAction;
|
|
||||||
|
|
||||||
/* Action types */
|
|
||||||
#define GIN_SEGMENT_UNMODIFIED 0 /* no action (not used in WAL records) */
|
|
||||||
#define GIN_SEGMENT_DELETE 1 /* a whole segment is removed */
|
|
||||||
#define GIN_SEGMENT_INSERT 2 /* a whole segment is added */
|
|
||||||
#define GIN_SEGMENT_REPLACE 3 /* a segment is replaced */
|
|
||||||
#define GIN_SEGMENT_ADDITEMS 4 /* items are added to existing segment */
|
|
||||||
|
|
||||||
typedef struct
|
|
||||||
{
|
|
||||||
OffsetNumber offset;
|
|
||||||
PostingItem newitem;
|
|
||||||
} ginxlogInsertDataInternal;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Backup Blk 0: new left page (= original page, if not root split)
|
|
||||||
* Backup Blk 1: new right page
|
|
||||||
* Backup Blk 2: original page / new root page, if root split
|
|
||||||
* Backup Blk 3: left child, if this insertion completes an earlier split
|
|
||||||
*/
|
|
||||||
#define XLOG_GIN_SPLIT 0x30
|
|
||||||
|
|
||||||
typedef struct ginxlogSplit
|
|
||||||
{
|
|
||||||
RelFileNode node;
|
|
||||||
BlockNumber rrlink; /* right link, or root's blocknumber if root
|
|
||||||
* split */
|
|
||||||
BlockNumber leftChildBlkno; /* valid on a non-leaf split */
|
|
||||||
BlockNumber rightChildBlkno;
|
|
||||||
uint16 flags; /* see below */
|
|
||||||
} ginxlogSplit;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Flags used in ginxlogInsert and ginxlogSplit records
|
|
||||||
*/
|
|
||||||
#define GIN_INSERT_ISDATA 0x01 /* for both insert and split records */
|
|
||||||
#define GIN_INSERT_ISLEAF 0x02 /* ditto */
|
|
||||||
#define GIN_SPLIT_ROOT 0x04 /* only for split records */
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Vacuum simply WAL-logs the whole page, when anything is modified. This
|
|
||||||
* is functionally identical to heap_newpage records, but is kept separate for
|
|
||||||
* debugging purposes. (When inspecting the WAL stream, it's easier to see
|
|
||||||
* what's going on when GIN vacuum records are marked as such, not as heap
|
|
||||||
* records.) This is currently only used for entry tree leaf pages.
|
|
||||||
*/
|
|
||||||
#define XLOG_GIN_VACUUM_PAGE 0x40
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Vacuuming posting tree leaf page is WAL-logged like recompression caused
|
|
||||||
* by insertion.
|
|
||||||
*/
|
|
||||||
#define XLOG_GIN_VACUUM_DATA_LEAF_PAGE 0x90
|
|
||||||
|
|
||||||
typedef struct ginxlogVacuumDataLeafPage
|
|
||||||
{
|
|
||||||
ginxlogRecompressDataLeaf data;
|
|
||||||
} ginxlogVacuumDataLeafPage;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Backup Blk 0: deleted page
|
|
||||||
* Backup Blk 1: parent
|
|
||||||
* Backup Blk 2: left sibling
|
|
||||||
*/
|
|
||||||
#define XLOG_GIN_DELETE_PAGE 0x50
|
|
||||||
|
|
||||||
typedef struct ginxlogDeletePage
|
|
||||||
{
|
|
||||||
OffsetNumber parentOffset;
|
|
||||||
BlockNumber rightLink;
|
|
||||||
} ginxlogDeletePage;
|
|
||||||
|
|
||||||
#define XLOG_GIN_UPDATE_META_PAGE 0x60
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Backup Blk 0: metapage
|
|
||||||
* Backup Blk 1: tail page
|
|
||||||
*/
|
|
||||||
typedef struct ginxlogUpdateMeta
|
|
||||||
{
|
|
||||||
RelFileNode node;
|
|
||||||
GinMetaPageData metadata;
|
|
||||||
BlockNumber prevTail;
|
|
||||||
BlockNumber newRightlink;
|
|
||||||
int32 ntuples; /* if ntuples > 0 then metadata.tail was
|
|
||||||
* updated with that many tuples; else new sub
|
|
||||||
* list was inserted */
|
|
||||||
/* array of inserted tuples follows */
|
|
||||||
} ginxlogUpdateMeta;
|
|
||||||
|
|
||||||
#define XLOG_GIN_INSERT_LISTPAGE 0x70
|
|
||||||
|
|
||||||
typedef struct ginxlogInsertListPage
|
|
||||||
{
|
|
||||||
BlockNumber rightlink;
|
|
||||||
int32 ntuples;
|
|
||||||
/* array of inserted tuples follows */
|
|
||||||
} ginxlogInsertListPage;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Backup Blk 0: metapage
|
|
||||||
* Backup Blk 1 to (ndeleted + 1): deleted pages
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define XLOG_GIN_DELETE_LISTPAGE 0x80
|
|
||||||
|
|
||||||
/*
|
|
||||||
* The WAL record for deleting list pages must contain a block reference to
|
|
||||||
* all the deleted pages, so the number of pages that can be deleted in one
|
|
||||||
* record is limited by XLR_MAX_BLOCK_ID. (block_id 0 is used for the
|
|
||||||
* metapage.)
|
|
||||||
*/
|
|
||||||
#define GIN_NDELETE_AT_ONCE Min(16, XLR_MAX_BLOCK_ID - 1)
|
|
||||||
typedef struct ginxlogDeleteListPages
|
|
||||||
{
|
|
||||||
GinMetaPageData metadata;
|
|
||||||
int32 ndeleted;
|
|
||||||
} ginxlogDeleteListPages;
|
|
||||||
|
|
||||||
|
|
||||||
/* ginutil.c */
|
/* ginutil.c */
|
||||||
extern bytea *ginoptions(Datum reloptions, bool validate);
|
extern bytea *ginoptions(Datum reloptions, bool validate);
|
||||||
extern void initGinState(GinState *state, Relation index);
|
extern void initGinState(GinState *state, Relation index);
|
||||||
|
329
src/include/access/ginblock.h
Normal file
329
src/include/access/ginblock.h
Normal file
@ -0,0 +1,329 @@
|
|||||||
|
/*--------------------------------------------------------------------------
|
||||||
|
* ginblock.h
|
||||||
|
* details of structures stored in GIN index blocks
|
||||||
|
*
|
||||||
|
* Copyright (c) 2006-2017, PostgreSQL Global Development Group
|
||||||
|
*
|
||||||
|
* src/include/access/ginblock.h
|
||||||
|
*--------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
#ifndef GINBLOCK_H
|
||||||
|
#define GINBLOCK_H
|
||||||
|
|
||||||
|
#include "storage/block.h"
|
||||||
|
#include "storage/itemptr.h"
|
||||||
|
#include "storage/off.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Page opaque data in an inverted index page.
|
||||||
|
*
|
||||||
|
* Note: GIN does not include a page ID word as do the other index types.
|
||||||
|
* This is OK because the opaque data is only 8 bytes and so can be reliably
|
||||||
|
* distinguished by size. Revisit this if the size ever increases.
|
||||||
|
* Further note: as of 9.2, SP-GiST also uses 8-byte special space, as does
|
||||||
|
* BRIN as of 9.5. This is still OK, as long as GIN isn't using all of the
|
||||||
|
* high-order bits in its flags word, because that way the flags word cannot
|
||||||
|
* match the page IDs used by SP-GiST and BRIN.
|
||||||
|
*/
|
||||||
|
typedef struct GinPageOpaqueData
|
||||||
|
{
|
||||||
|
BlockNumber rightlink; /* next page if any */
|
||||||
|
OffsetNumber maxoff; /* number of PostingItems on GIN_DATA &
|
||||||
|
* ~GIN_LEAF page. On GIN_LIST page, number of
|
||||||
|
* heap tuples. */
|
||||||
|
uint16 flags; /* see bit definitions below */
|
||||||
|
} GinPageOpaqueData;
|
||||||
|
|
||||||
|
typedef GinPageOpaqueData *GinPageOpaque;
|
||||||
|
|
||||||
|
#define GIN_DATA (1 << 0)
|
||||||
|
#define GIN_LEAF (1 << 1)
|
||||||
|
#define GIN_DELETED (1 << 2)
|
||||||
|
#define GIN_META (1 << 3)
|
||||||
|
#define GIN_LIST (1 << 4)
|
||||||
|
#define GIN_LIST_FULLROW (1 << 5) /* makes sense only on GIN_LIST page */
|
||||||
|
#define GIN_INCOMPLETE_SPLIT (1 << 6) /* page was split, but parent not
|
||||||
|
* updated */
|
||||||
|
#define GIN_COMPRESSED (1 << 7)
|
||||||
|
|
||||||
|
/* Page numbers of fixed-location pages */
|
||||||
|
#define GIN_METAPAGE_BLKNO (0)
|
||||||
|
#define GIN_ROOT_BLKNO (1)
|
||||||
|
|
||||||
|
typedef struct GinMetaPageData
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Pointers to head and tail of pending list, which consists of GIN_LIST
|
||||||
|
* pages. These store fast-inserted entries that haven't yet been moved
|
||||||
|
* into the regular GIN structure.
|
||||||
|
*/
|
||||||
|
BlockNumber head;
|
||||||
|
BlockNumber tail;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Free space in bytes in the pending list's tail page.
|
||||||
|
*/
|
||||||
|
uint32 tailFreeSize;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We store both number of pages and number of heap tuples that are in the
|
||||||
|
* pending list.
|
||||||
|
*/
|
||||||
|
BlockNumber nPendingPages;
|
||||||
|
int64 nPendingHeapTuples;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Statistics for planner use (accurate as of last VACUUM)
|
||||||
|
*/
|
||||||
|
BlockNumber nTotalPages;
|
||||||
|
BlockNumber nEntryPages;
|
||||||
|
BlockNumber nDataPages;
|
||||||
|
int64 nEntries;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* GIN version number (ideally this should have been at the front, but too
|
||||||
|
* late now. Don't move it!)
|
||||||
|
*
|
||||||
|
* Currently 2 (for indexes initialized in 9.4 or later)
|
||||||
|
*
|
||||||
|
* Version 1 (indexes initialized in version 9.1, 9.2 or 9.3), is
|
||||||
|
* compatible, but may contain uncompressed posting tree (leaf) pages and
|
||||||
|
* posting lists. They will be converted to compressed format when
|
||||||
|
* modified.
|
||||||
|
*
|
||||||
|
* Version 0 (indexes initialized in 9.0 or before) is compatible but may
|
||||||
|
* be missing null entries, including both null keys and placeholders.
|
||||||
|
* Reject full-index-scan attempts on such indexes.
|
||||||
|
*/
|
||||||
|
int32 ginVersion;
|
||||||
|
} GinMetaPageData;
|
||||||
|
|
||||||
|
#define GIN_CURRENT_VERSION 2
|
||||||
|
|
||||||
|
#define GinPageGetMeta(p) \
|
||||||
|
((GinMetaPageData *) PageGetContents(p))
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Macros for accessing a GIN index page's opaque data
|
||||||
|
*/
|
||||||
|
#define GinPageGetOpaque(page) ( (GinPageOpaque) PageGetSpecialPointer(page) )
|
||||||
|
|
||||||
|
#define GinPageIsLeaf(page) ( (GinPageGetOpaque(page)->flags & GIN_LEAF) != 0 )
|
||||||
|
#define GinPageSetLeaf(page) ( GinPageGetOpaque(page)->flags |= GIN_LEAF )
|
||||||
|
#define GinPageSetNonLeaf(page) ( GinPageGetOpaque(page)->flags &= ~GIN_LEAF )
|
||||||
|
#define GinPageIsData(page) ( (GinPageGetOpaque(page)->flags & GIN_DATA) != 0 )
|
||||||
|
#define GinPageSetData(page) ( GinPageGetOpaque(page)->flags |= GIN_DATA )
|
||||||
|
#define GinPageIsList(page) ( (GinPageGetOpaque(page)->flags & GIN_LIST) != 0 )
|
||||||
|
#define GinPageSetList(page) ( GinPageGetOpaque(page)->flags |= GIN_LIST )
|
||||||
|
#define GinPageHasFullRow(page) ( (GinPageGetOpaque(page)->flags & GIN_LIST_FULLROW) != 0 )
|
||||||
|
#define GinPageSetFullRow(page) ( GinPageGetOpaque(page)->flags |= GIN_LIST_FULLROW )
|
||||||
|
#define GinPageIsCompressed(page) ( (GinPageGetOpaque(page)->flags & GIN_COMPRESSED) != 0 )
|
||||||
|
#define GinPageSetCompressed(page) ( GinPageGetOpaque(page)->flags |= GIN_COMPRESSED )
|
||||||
|
|
||||||
|
#define GinPageIsDeleted(page) ( (GinPageGetOpaque(page)->flags & GIN_DELETED) != 0 )
|
||||||
|
#define GinPageSetDeleted(page) ( GinPageGetOpaque(page)->flags |= GIN_DELETED)
|
||||||
|
#define GinPageSetNonDeleted(page) ( GinPageGetOpaque(page)->flags &= ~GIN_DELETED)
|
||||||
|
#define GinPageIsIncompleteSplit(page) ( (GinPageGetOpaque(page)->flags & GIN_INCOMPLETE_SPLIT) != 0 )
|
||||||
|
|
||||||
|
#define GinPageRightMost(page) ( GinPageGetOpaque(page)->rightlink == InvalidBlockNumber)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We use our own ItemPointerGet(BlockNumber|OffsetNumber)
|
||||||
|
* to avoid Asserts, since sometimes the ip_posid isn't "valid"
|
||||||
|
*/
|
||||||
|
#define GinItemPointerGetBlockNumber(pointer) \
|
||||||
|
BlockIdGetBlockNumber(&(pointer)->ip_blkid)
|
||||||
|
|
||||||
|
#define GinItemPointerGetOffsetNumber(pointer) \
|
||||||
|
((pointer)->ip_posid)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Special-case item pointer values needed by the GIN search logic.
|
||||||
|
* MIN: sorts less than any valid item pointer
|
||||||
|
* MAX: sorts greater than any valid item pointer
|
||||||
|
* LOSSY PAGE: indicates a whole heap page, sorts after normal item
|
||||||
|
* pointers for that page
|
||||||
|
* Note that these are all distinguishable from an "invalid" item pointer
|
||||||
|
* (which is InvalidBlockNumber/0) as well as from all normal item
|
||||||
|
* pointers (which have item numbers in the range 1..MaxHeapTuplesPerPage).
|
||||||
|
*/
|
||||||
|
#define ItemPointerSetMin(p) \
|
||||||
|
ItemPointerSet((p), (BlockNumber)0, (OffsetNumber)0)
|
||||||
|
#define ItemPointerIsMin(p) \
|
||||||
|
(GinItemPointerGetOffsetNumber(p) == (OffsetNumber)0 && \
|
||||||
|
GinItemPointerGetBlockNumber(p) == (BlockNumber)0)
|
||||||
|
#define ItemPointerSetMax(p) \
|
||||||
|
ItemPointerSet((p), InvalidBlockNumber, (OffsetNumber)0xffff)
|
||||||
|
#define ItemPointerIsMax(p) \
|
||||||
|
(GinItemPointerGetOffsetNumber(p) == (OffsetNumber)0xffff && \
|
||||||
|
GinItemPointerGetBlockNumber(p) == InvalidBlockNumber)
|
||||||
|
#define ItemPointerSetLossyPage(p, b) \
|
||||||
|
ItemPointerSet((p), (b), (OffsetNumber)0xffff)
|
||||||
|
#define ItemPointerIsLossyPage(p) \
|
||||||
|
(GinItemPointerGetOffsetNumber(p) == (OffsetNumber)0xffff && \
|
||||||
|
GinItemPointerGetBlockNumber(p) != InvalidBlockNumber)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Posting item in a non-leaf posting-tree page
|
||||||
|
*/
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
/* We use BlockIdData not BlockNumber to avoid padding space wastage */
|
||||||
|
BlockIdData child_blkno;
|
||||||
|
ItemPointerData key;
|
||||||
|
} PostingItem;
|
||||||
|
|
||||||
|
#define PostingItemGetBlockNumber(pointer) \
|
||||||
|
BlockIdGetBlockNumber(&(pointer)->child_blkno)
|
||||||
|
|
||||||
|
#define PostingItemSetBlockNumber(pointer, blockNumber) \
|
||||||
|
BlockIdSet(&((pointer)->child_blkno), (blockNumber))
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Category codes to distinguish placeholder nulls from ordinary NULL keys.
|
||||||
|
* Note that the datatype size and the first two code values are chosen to be
|
||||||
|
* compatible with the usual usage of bool isNull flags.
|
||||||
|
*
|
||||||
|
* GIN_CAT_EMPTY_QUERY is never stored in the index; and notice that it is
|
||||||
|
* chosen to sort before not after regular key values.
|
||||||
|
*/
|
||||||
|
typedef signed char GinNullCategory;
|
||||||
|
|
||||||
|
#define GIN_CAT_NORM_KEY 0 /* normal, non-null key value */
|
||||||
|
#define GIN_CAT_NULL_KEY 1 /* null key value */
|
||||||
|
#define GIN_CAT_EMPTY_ITEM 2 /* placeholder for zero-key item */
|
||||||
|
#define GIN_CAT_NULL_ITEM 3 /* placeholder for null item */
|
||||||
|
#define GIN_CAT_EMPTY_QUERY (-1) /* placeholder for full-scan query */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Access macros for null category byte in entry tuples
|
||||||
|
*/
|
||||||
|
#define GinCategoryOffset(itup,ginstate) \
|
||||||
|
(IndexInfoFindDataOffset((itup)->t_info) + \
|
||||||
|
((ginstate)->oneCol ? 0 : sizeof(int16)))
|
||||||
|
#define GinGetNullCategory(itup,ginstate) \
|
||||||
|
(*((GinNullCategory *) ((char*)(itup) + GinCategoryOffset(itup,ginstate))))
|
||||||
|
#define GinSetNullCategory(itup,ginstate,c) \
|
||||||
|
(*((GinNullCategory *) ((char*)(itup) + GinCategoryOffset(itup,ginstate))) = (c))
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Access macros for leaf-page entry tuples (see discussion in README)
|
||||||
|
*/
|
||||||
|
#define GinGetNPosting(itup) GinItemPointerGetOffsetNumber(&(itup)->t_tid)
|
||||||
|
#define GinSetNPosting(itup,n) ItemPointerSetOffsetNumber(&(itup)->t_tid,n)
|
||||||
|
#define GIN_TREE_POSTING ((OffsetNumber)0xffff)
|
||||||
|
#define GinIsPostingTree(itup) (GinGetNPosting(itup) == GIN_TREE_POSTING)
|
||||||
|
#define GinSetPostingTree(itup, blkno) ( GinSetNPosting((itup),GIN_TREE_POSTING), ItemPointerSetBlockNumber(&(itup)->t_tid, blkno) )
|
||||||
|
#define GinGetPostingTree(itup) GinItemPointerGetBlockNumber(&(itup)->t_tid)
|
||||||
|
|
||||||
|
#define GIN_ITUP_COMPRESSED (1U << 31)
|
||||||
|
#define GinGetPostingOffset(itup) (GinItemPointerGetBlockNumber(&(itup)->t_tid) & (~GIN_ITUP_COMPRESSED))
|
||||||
|
#define GinSetPostingOffset(itup,n) ItemPointerSetBlockNumber(&(itup)->t_tid,(n)|GIN_ITUP_COMPRESSED)
|
||||||
|
#define GinGetPosting(itup) ((Pointer) ((char*)(itup) + GinGetPostingOffset(itup)))
|
||||||
|
#define GinItupIsCompressed(itup) ((GinItemPointerGetBlockNumber(&(itup)->t_tid) & GIN_ITUP_COMPRESSED) != 0)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Maximum size of an item on entry tree page. Make sure that we fit at least
|
||||||
|
* three items on each page. (On regular B-tree indexes, we must fit at least
|
||||||
|
* three items: two data items and the "high key". In GIN entry tree, we don't
|
||||||
|
* currently store the high key explicitly, we just use the rightmost item on
|
||||||
|
* the page, so it would actually be enough to fit two items.)
|
||||||
|
*/
|
||||||
|
#define GinMaxItemSize \
|
||||||
|
Min(INDEX_SIZE_MASK, \
|
||||||
|
MAXALIGN_DOWN(((BLCKSZ - \
|
||||||
|
MAXALIGN(SizeOfPageHeaderData + 3 * sizeof(ItemIdData)) - \
|
||||||
|
MAXALIGN(sizeof(GinPageOpaqueData))) / 3)))
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Access macros for non-leaf entry tuples
|
||||||
|
*/
|
||||||
|
#define GinGetDownlink(itup) GinItemPointerGetBlockNumber(&(itup)->t_tid)
|
||||||
|
#define GinSetDownlink(itup,blkno) ItemPointerSet(&(itup)->t_tid, blkno, InvalidOffsetNumber)
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Data (posting tree) pages
|
||||||
|
*
|
||||||
|
* Posting tree pages don't store regular tuples. Non-leaf pages contain
|
||||||
|
* PostingItems, which are pairs of ItemPointers and child block numbers.
|
||||||
|
* Leaf pages contain GinPostingLists and an uncompressed array of item
|
||||||
|
* pointers.
|
||||||
|
*
|
||||||
|
* In a leaf page, the compressed posting lists are stored after the regular
|
||||||
|
* page header, one after each other. Although we don't store regular tuples,
|
||||||
|
* pd_lower is used to indicate the end of the posting lists. After that, free
|
||||||
|
* space follows. This layout is compatible with the "standard" heap and
|
||||||
|
* index page layout described in bufpage.h, so that we can e.g set buffer_std
|
||||||
|
* when writing WAL records.
|
||||||
|
*
|
||||||
|
* In the special space is the GinPageOpaque struct.
|
||||||
|
*/
|
||||||
|
#define GinDataLeafPageGetPostingList(page) \
|
||||||
|
(GinPostingList *) ((PageGetContents(page) + MAXALIGN(sizeof(ItemPointerData))))
|
||||||
|
#define GinDataLeafPageGetPostingListSize(page) \
|
||||||
|
(((PageHeader) page)->pd_lower - MAXALIGN(SizeOfPageHeaderData) - MAXALIGN(sizeof(ItemPointerData)))
|
||||||
|
|
||||||
|
#define GinDataLeafPageIsEmpty(page) \
|
||||||
|
(GinPageIsCompressed(page) ? (GinDataLeafPageGetPostingListSize(page) == 0) : (GinPageGetOpaque(page)->maxoff < FirstOffsetNumber))
|
||||||
|
|
||||||
|
#define GinDataLeafPageGetFreeSpace(page) PageGetExactFreeSpace(page)
|
||||||
|
|
||||||
|
#define GinDataPageGetRightBound(page) ((ItemPointer) PageGetContents(page))
|
||||||
|
/*
|
||||||
|
* Pointer to the data portion of a posting tree page. For internal pages,
|
||||||
|
* that's the beginning of the array of PostingItems. For compressed leaf
|
||||||
|
* pages, the first compressed posting list. For uncompressed (pre-9.4) leaf
|
||||||
|
* pages, it's the beginning of the ItemPointer array.
|
||||||
|
*/
|
||||||
|
#define GinDataPageGetData(page) \
|
||||||
|
(PageGetContents(page) + MAXALIGN(sizeof(ItemPointerData)))
|
||||||
|
/* non-leaf pages contain PostingItems */
|
||||||
|
#define GinDataPageGetPostingItem(page, i) \
|
||||||
|
((PostingItem *) (GinDataPageGetData(page) + ((i)-1) * sizeof(PostingItem)))
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Note: there is no GinDataPageGetDataSize macro, because before version
|
||||||
|
* 9.4, we didn't set pd_lower on data pages. There can be pages in the index
|
||||||
|
* that were binary-upgraded from earlier versions and still have an invalid
|
||||||
|
* pd_lower, so we cannot trust it in general. Compressed posting tree leaf
|
||||||
|
* pages are new in 9.4, however, so we can trust them; see
|
||||||
|
* GinDataLeafPageGetPostingListSize.
|
||||||
|
*/
|
||||||
|
#define GinDataPageSetDataSize(page, size) \
|
||||||
|
{ \
|
||||||
|
Assert(size <= GinDataPageMaxDataSize); \
|
||||||
|
((PageHeader) page)->pd_lower = (size) + MAXALIGN(SizeOfPageHeaderData) + MAXALIGN(sizeof(ItemPointerData)); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define GinNonLeafDataPageGetFreeSpace(page) \
|
||||||
|
(GinDataPageMaxDataSize - \
|
||||||
|
GinPageGetOpaque(page)->maxoff * sizeof(PostingItem))
|
||||||
|
|
||||||
|
#define GinDataPageMaxDataSize \
|
||||||
|
(BLCKSZ - MAXALIGN(SizeOfPageHeaderData) \
|
||||||
|
- MAXALIGN(sizeof(ItemPointerData)) \
|
||||||
|
- MAXALIGN(sizeof(GinPageOpaqueData)))
|
||||||
|
|
||||||
|
/*
|
||||||
|
* List pages
|
||||||
|
*/
|
||||||
|
#define GinListPageSize \
|
||||||
|
( BLCKSZ - SizeOfPageHeaderData - MAXALIGN(sizeof(GinPageOpaqueData)) )
|
||||||
|
|
||||||
|
/*
|
||||||
|
* A compressed posting list.
|
||||||
|
*
|
||||||
|
* Note: This requires 2-byte alignment.
|
||||||
|
*/
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
ItemPointerData first; /* first item in this posting list (unpacked) */
|
||||||
|
uint16 nbytes; /* number of bytes that follow */
|
||||||
|
unsigned char bytes[FLEXIBLE_ARRAY_MEMBER]; /* varbyte encoded items */
|
||||||
|
} GinPostingList;
|
||||||
|
|
||||||
|
#define SizeOfGinPostingList(plist) (offsetof(GinPostingList, bytes) + SHORTALIGN((plist)->nbytes) )
|
||||||
|
#define GinNextPostingListSegment(cur) ((GinPostingList *) (((char *) (cur)) + SizeOfGinPostingList((cur))))
|
||||||
|
|
||||||
|
#endif /* GINBLOCK_H */
|
217
src/include/access/ginxlog.h
Normal file
217
src/include/access/ginxlog.h
Normal file
@ -0,0 +1,217 @@
|
|||||||
|
/*--------------------------------------------------------------------------
|
||||||
|
* ginxlog.h
|
||||||
|
* header file for postgres inverted index xlog implementation.
|
||||||
|
*
|
||||||
|
* Copyright (c) 2006-2017, PostgreSQL Global Development Group
|
||||||
|
*
|
||||||
|
* src/include/access/ginxlog.h
|
||||||
|
*--------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
#ifndef GINXLOG_H
|
||||||
|
#define GINXLOG_H
|
||||||
|
|
||||||
|
#include "access/ginblock.h"
|
||||||
|
#include "access/itup.h"
|
||||||
|
#include "access/xlogreader.h"
|
||||||
|
#include "lib/stringinfo.h"
|
||||||
|
#include "storage/off.h"
|
||||||
|
|
||||||
|
#define XLOG_GIN_CREATE_INDEX 0x00
|
||||||
|
|
||||||
|
#define XLOG_GIN_CREATE_PTREE 0x10
|
||||||
|
|
||||||
|
typedef struct ginxlogCreatePostingTree
|
||||||
|
{
|
||||||
|
uint32 size;
|
||||||
|
/* A compressed posting list follows */
|
||||||
|
} ginxlogCreatePostingTree;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The format of the insertion record varies depending on the page type.
|
||||||
|
* ginxlogInsert is the common part between all variants.
|
||||||
|
*
|
||||||
|
* Backup Blk 0: target page
|
||||||
|
* Backup Blk 1: left child, if this insertion finishes an incomplete split
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define XLOG_GIN_INSERT 0x20
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
uint16 flags; /* GIN_INSERT_ISLEAF and/or GIN_INSERT_ISDATA */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* FOLLOWS:
|
||||||
|
*
|
||||||
|
* 1. if not leaf page, block numbers of the left and right child pages
|
||||||
|
* whose split this insertion finishes, as BlockIdData[2] (beware of
|
||||||
|
* adding fields in this struct that would make them not 16-bit aligned)
|
||||||
|
*
|
||||||
|
* 2. a ginxlogInsertEntry or ginxlogRecompressDataLeaf struct, depending
|
||||||
|
* on tree type.
|
||||||
|
*
|
||||||
|
* NB: the below structs are only 16-bit aligned when appended to a
|
||||||
|
* ginxlogInsert struct! Beware of adding fields to them that require
|
||||||
|
* stricter alignment.
|
||||||
|
*/
|
||||||
|
} ginxlogInsert;
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
OffsetNumber offset;
|
||||||
|
bool isDelete;
|
||||||
|
IndexTupleData tuple; /* variable length */
|
||||||
|
} ginxlogInsertEntry;
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
uint16 nactions;
|
||||||
|
|
||||||
|
/* Variable number of 'actions' follow */
|
||||||
|
} ginxlogRecompressDataLeaf;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Note: this struct is currently not used in code, and only acts as
|
||||||
|
* documentation. The WAL record format is as specified here, but the code
|
||||||
|
* uses straight access through a Pointer and memcpy to read/write these.
|
||||||
|
*/
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
uint8 segno; /* segment this action applies to */
|
||||||
|
char type; /* action type (see below) */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Action-specific data follows. For INSERT and REPLACE actions that is a
|
||||||
|
* GinPostingList struct. For ADDITEMS, a uint16 for the number of items
|
||||||
|
* added, followed by the items themselves as ItemPointers. DELETE actions
|
||||||
|
* have no further data.
|
||||||
|
*/
|
||||||
|
} ginxlogSegmentAction;
|
||||||
|
|
||||||
|
/* Action types */
|
||||||
|
#define GIN_SEGMENT_UNMODIFIED 0 /* no action (not used in WAL records) */
|
||||||
|
#define GIN_SEGMENT_DELETE 1 /* a whole segment is removed */
|
||||||
|
#define GIN_SEGMENT_INSERT 2 /* a whole segment is added */
|
||||||
|
#define GIN_SEGMENT_REPLACE 3 /* a segment is replaced */
|
||||||
|
#define GIN_SEGMENT_ADDITEMS 4 /* items are added to existing segment */
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
OffsetNumber offset;
|
||||||
|
PostingItem newitem;
|
||||||
|
} ginxlogInsertDataInternal;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Backup Blk 0: new left page (= original page, if not root split)
|
||||||
|
* Backup Blk 1: new right page
|
||||||
|
* Backup Blk 2: original page / new root page, if root split
|
||||||
|
* Backup Blk 3: left child, if this insertion completes an earlier split
|
||||||
|
*/
|
||||||
|
#define XLOG_GIN_SPLIT 0x30
|
||||||
|
|
||||||
|
typedef struct ginxlogSplit
|
||||||
|
{
|
||||||
|
RelFileNode node;
|
||||||
|
BlockNumber rrlink; /* right link, or root's blocknumber if root
|
||||||
|
* split */
|
||||||
|
BlockNumber leftChildBlkno; /* valid on a non-leaf split */
|
||||||
|
BlockNumber rightChildBlkno;
|
||||||
|
uint16 flags; /* see below */
|
||||||
|
} ginxlogSplit;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Flags used in ginxlogInsert and ginxlogSplit records
|
||||||
|
*/
|
||||||
|
#define GIN_INSERT_ISDATA 0x01 /* for both insert and split records */
|
||||||
|
#define GIN_INSERT_ISLEAF 0x02 /* ditto */
|
||||||
|
#define GIN_SPLIT_ROOT 0x04 /* only for split records */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Vacuum simply WAL-logs the whole page, when anything is modified. This
|
||||||
|
* is functionally identical to heap_newpage records, but is kept separate for
|
||||||
|
* debugging purposes. (When inspecting the WAL stream, it's easier to see
|
||||||
|
* what's going on when GIN vacuum records are marked as such, not as heap
|
||||||
|
* records.) This is currently only used for entry tree leaf pages.
|
||||||
|
*/
|
||||||
|
#define XLOG_GIN_VACUUM_PAGE 0x40
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Vacuuming posting tree leaf page is WAL-logged like recompression caused
|
||||||
|
* by insertion.
|
||||||
|
*/
|
||||||
|
#define XLOG_GIN_VACUUM_DATA_LEAF_PAGE 0x90
|
||||||
|
|
||||||
|
typedef struct ginxlogVacuumDataLeafPage
|
||||||
|
{
|
||||||
|
ginxlogRecompressDataLeaf data;
|
||||||
|
} ginxlogVacuumDataLeafPage;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Backup Blk 0: deleted page
|
||||||
|
* Backup Blk 1: parent
|
||||||
|
* Backup Blk 2: left sibling
|
||||||
|
*/
|
||||||
|
#define XLOG_GIN_DELETE_PAGE 0x50
|
||||||
|
|
||||||
|
typedef struct ginxlogDeletePage
|
||||||
|
{
|
||||||
|
OffsetNumber parentOffset;
|
||||||
|
BlockNumber rightLink;
|
||||||
|
} ginxlogDeletePage;
|
||||||
|
|
||||||
|
#define XLOG_GIN_UPDATE_META_PAGE 0x60
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Backup Blk 0: metapage
|
||||||
|
* Backup Blk 1: tail page
|
||||||
|
*/
|
||||||
|
typedef struct ginxlogUpdateMeta
|
||||||
|
{
|
||||||
|
RelFileNode node;
|
||||||
|
GinMetaPageData metadata;
|
||||||
|
BlockNumber prevTail;
|
||||||
|
BlockNumber newRightlink;
|
||||||
|
int32 ntuples; /* if ntuples > 0 then metadata.tail was
|
||||||
|
* updated with that many tuples; else new sub
|
||||||
|
* list was inserted */
|
||||||
|
/* array of inserted tuples follows */
|
||||||
|
} ginxlogUpdateMeta;
|
||||||
|
|
||||||
|
#define XLOG_GIN_INSERT_LISTPAGE 0x70
|
||||||
|
|
||||||
|
typedef struct ginxlogInsertListPage
|
||||||
|
{
|
||||||
|
BlockNumber rightlink;
|
||||||
|
int32 ntuples;
|
||||||
|
/* array of inserted tuples follows */
|
||||||
|
} ginxlogInsertListPage;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Backup Blk 0: metapage
|
||||||
|
* Backup Blk 1 to (ndeleted + 1): deleted pages
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define XLOG_GIN_DELETE_LISTPAGE 0x80
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The WAL record for deleting list pages must contain a block reference to
|
||||||
|
* all the deleted pages, so the number of pages that can be deleted in one
|
||||||
|
* record is limited by XLR_MAX_BLOCK_ID. (block_id 0 is used for the
|
||||||
|
* metapage.)
|
||||||
|
*/
|
||||||
|
#define GIN_NDELETE_AT_ONCE Min(16, XLR_MAX_BLOCK_ID - 1)
|
||||||
|
typedef struct ginxlogDeleteListPages
|
||||||
|
{
|
||||||
|
GinMetaPageData metadata;
|
||||||
|
int32 ndeleted;
|
||||||
|
} ginxlogDeleteListPages;
|
||||||
|
|
||||||
|
extern void gin_redo(XLogReaderState *record);
|
||||||
|
extern void gin_desc(StringInfo buf, XLogReaderState *record);
|
||||||
|
extern const char *gin_identify(uint8 info);
|
||||||
|
extern void gin_xlog_startup(void);
|
||||||
|
extern void gin_xlog_cleanup(void);
|
||||||
|
extern void gin_mask(char *pagedata, BlockNumber blkno);
|
||||||
|
|
||||||
|
#endif /* GINXLOG_H */
|
@ -17,7 +17,6 @@
|
|||||||
#include "access/amapi.h"
|
#include "access/amapi.h"
|
||||||
#include "access/gist.h"
|
#include "access/gist.h"
|
||||||
#include "access/itup.h"
|
#include "access/itup.h"
|
||||||
#include "access/xlogreader.h"
|
|
||||||
#include "fmgr.h"
|
#include "fmgr.h"
|
||||||
#include "lib/pairingheap.h"
|
#include "lib/pairingheap.h"
|
||||||
#include "storage/bufmgr.h"
|
#include "storage/bufmgr.h"
|
||||||
@ -177,51 +176,7 @@ typedef struct GISTScanOpaqueData
|
|||||||
|
|
||||||
typedef GISTScanOpaqueData *GISTScanOpaque;
|
typedef GISTScanOpaqueData *GISTScanOpaque;
|
||||||
|
|
||||||
|
/* despite the name, gistxlogPage is not part of any xlog record */
|
||||||
/* XLog stuff */
|
|
||||||
|
|
||||||
#define XLOG_GIST_PAGE_UPDATE 0x00
|
|
||||||
/* #define XLOG_GIST_NEW_ROOT 0x20 */ /* not used anymore */
|
|
||||||
#define XLOG_GIST_PAGE_SPLIT 0x30
|
|
||||||
/* #define XLOG_GIST_INSERT_COMPLETE 0x40 */ /* not used anymore */
|
|
||||||
#define XLOG_GIST_CREATE_INDEX 0x50
|
|
||||||
/* #define XLOG_GIST_PAGE_DELETE 0x60 */ /* not used anymore */
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Backup Blk 0: updated page.
|
|
||||||
* Backup Blk 1: If this operation completes a page split, by inserting a
|
|
||||||
* downlink for the split page, the left half of the split
|
|
||||||
*/
|
|
||||||
typedef struct gistxlogPageUpdate
|
|
||||||
{
|
|
||||||
/* number of deleted offsets */
|
|
||||||
uint16 ntodelete;
|
|
||||||
uint16 ntoinsert;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* In payload of blk 0 : 1. todelete OffsetNumbers 2. tuples to insert
|
|
||||||
*/
|
|
||||||
} gistxlogPageUpdate;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Backup Blk 0: If this operation completes a page split, by inserting a
|
|
||||||
* downlink for the split page, the left half of the split
|
|
||||||
* Backup Blk 1 - npage: split pages (1 is the original page)
|
|
||||||
*/
|
|
||||||
typedef struct gistxlogPageSplit
|
|
||||||
{
|
|
||||||
BlockNumber origrlink; /* rightlink of the page before split */
|
|
||||||
GistNSN orignsn; /* NSN of the page before split */
|
|
||||||
bool origleaf; /* was splitted page a leaf page? */
|
|
||||||
|
|
||||||
uint16 npage; /* # of pages in the split */
|
|
||||||
bool markfollowright; /* set F_FOLLOW_RIGHT flags */
|
|
||||||
|
|
||||||
/*
|
|
||||||
* follow: 1. gistxlogPage and array of IndexTupleData per page
|
|
||||||
*/
|
|
||||||
} gistxlogPageSplit;
|
|
||||||
|
|
||||||
typedef struct gistxlogPage
|
typedef struct gistxlogPage
|
||||||
{
|
{
|
||||||
BlockNumber blkno;
|
BlockNumber blkno;
|
||||||
@ -454,14 +409,6 @@ extern bool gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
|
|||||||
extern SplitedPageLayout *gistSplit(Relation r, Page page, IndexTuple *itup,
|
extern SplitedPageLayout *gistSplit(Relation r, Page page, IndexTuple *itup,
|
||||||
int len, GISTSTATE *giststate);
|
int len, GISTSTATE *giststate);
|
||||||
|
|
||||||
/* gistxlog.c */
|
|
||||||
extern void gist_redo(XLogReaderState *record);
|
|
||||||
extern void gist_desc(StringInfo buf, XLogReaderState *record);
|
|
||||||
extern const char *gist_identify(uint8 info);
|
|
||||||
extern void gist_xlog_startup(void);
|
|
||||||
extern void gist_xlog_cleanup(void);
|
|
||||||
extern void gist_mask(char *pagedata, BlockNumber blkno);
|
|
||||||
|
|
||||||
extern XLogRecPtr gistXLogUpdate(Buffer buffer,
|
extern XLogRecPtr gistXLogUpdate(Buffer buffer,
|
||||||
OffsetNumber *todelete, int ntodelete,
|
OffsetNumber *todelete, int ntodelete,
|
||||||
IndexTuple *itup, int ntup,
|
IndexTuple *itup, int ntup,
|
||||||
|
69
src/include/access/gistxlog.h
Normal file
69
src/include/access/gistxlog.h
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
/*-------------------------------------------------------------------------
|
||||||
|
*
|
||||||
|
* gistxlog.h
|
||||||
|
* gist xlog routines
|
||||||
|
*
|
||||||
|
* Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
|
||||||
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
|
*
|
||||||
|
* src/include/access/gistxlog.h
|
||||||
|
*
|
||||||
|
*-------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
#ifndef GIST_XLOG_H
|
||||||
|
#define GIST_XLOG_H
|
||||||
|
|
||||||
|
#include "access/gist.h"
|
||||||
|
#include "access/xlogreader.h"
|
||||||
|
#include "lib/stringinfo.h"
|
||||||
|
|
||||||
|
#define XLOG_GIST_PAGE_UPDATE 0x00
|
||||||
|
/* #define XLOG_GIST_NEW_ROOT 0x20 */ /* not used anymore */
|
||||||
|
#define XLOG_GIST_PAGE_SPLIT 0x30
|
||||||
|
/* #define XLOG_GIST_INSERT_COMPLETE 0x40 */ /* not used anymore */
|
||||||
|
#define XLOG_GIST_CREATE_INDEX 0x50
|
||||||
|
/* #define XLOG_GIST_PAGE_DELETE 0x60 */ /* not used anymore */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Backup Blk 0: updated page.
|
||||||
|
* Backup Blk 1: If this operation completes a page split, by inserting a
|
||||||
|
* downlink for the split page, the left half of the split
|
||||||
|
*/
|
||||||
|
typedef struct gistxlogPageUpdate
|
||||||
|
{
|
||||||
|
/* number of deleted offsets */
|
||||||
|
uint16 ntodelete;
|
||||||
|
uint16 ntoinsert;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* In payload of blk 0 : 1. todelete OffsetNumbers 2. tuples to insert
|
||||||
|
*/
|
||||||
|
} gistxlogPageUpdate;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Backup Blk 0: If this operation completes a page split, by inserting a
|
||||||
|
* downlink for the split page, the left half of the split
|
||||||
|
* Backup Blk 1 - npage: split pages (1 is the original page)
|
||||||
|
*/
|
||||||
|
typedef struct gistxlogPageSplit
|
||||||
|
{
|
||||||
|
BlockNumber origrlink; /* rightlink of the page before split */
|
||||||
|
GistNSN orignsn; /* NSN of the page before split */
|
||||||
|
bool origleaf; /* was splitted page a leaf page? */
|
||||||
|
|
||||||
|
uint16 npage; /* # of pages in the split */
|
||||||
|
bool markfollowright; /* set F_FOLLOW_RIGHT flags */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* follow: 1. gistxlogPage and array of IndexTupleData per page
|
||||||
|
*/
|
||||||
|
} gistxlogPageSplit;
|
||||||
|
|
||||||
|
extern void gist_redo(XLogReaderState *record);
|
||||||
|
extern void gist_desc(StringInfo buf, XLogReaderState *record);
|
||||||
|
extern const char *gist_identify(uint8 info);
|
||||||
|
extern void gist_xlog_startup(void);
|
||||||
|
extern void gist_xlog_cleanup(void);
|
||||||
|
extern void gist_mask(char *pagedata, BlockNumber blkno);
|
||||||
|
|
||||||
|
#endif
|
@ -14,8 +14,8 @@
|
|||||||
#ifndef HASH_XLOG_H
|
#ifndef HASH_XLOG_H
|
||||||
#define HASH_XLOG_H
|
#define HASH_XLOG_H
|
||||||
|
|
||||||
#include "access/hash.h"
|
|
||||||
#include "access/xlogreader.h"
|
#include "access/xlogreader.h"
|
||||||
|
#include "lib/stringinfo.h"
|
||||||
|
|
||||||
|
|
||||||
extern void hash_redo(XLogReaderState *record);
|
extern void hash_redo(XLogReaderState *record);
|
||||||
|
@ -204,232 +204,6 @@ typedef struct BTMetaPageData
|
|||||||
#define P_FIRSTKEY ((OffsetNumber) 2)
|
#define P_FIRSTKEY ((OffsetNumber) 2)
|
||||||
#define P_FIRSTDATAKEY(opaque) (P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY)
|
#define P_FIRSTDATAKEY(opaque) (P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY)
|
||||||
|
|
||||||
/*
|
|
||||||
* XLOG records for btree operations
|
|
||||||
*
|
|
||||||
* XLOG allows to store some information in high 4 bits of log
|
|
||||||
* record xl_info field
|
|
||||||
*/
|
|
||||||
#define XLOG_BTREE_INSERT_LEAF 0x00 /* add index tuple without split */
|
|
||||||
#define XLOG_BTREE_INSERT_UPPER 0x10 /* same, on a non-leaf page */
|
|
||||||
#define XLOG_BTREE_INSERT_META 0x20 /* same, plus update metapage */
|
|
||||||
#define XLOG_BTREE_SPLIT_L 0x30 /* add index tuple with split */
|
|
||||||
#define XLOG_BTREE_SPLIT_R 0x40 /* as above, new item on right */
|
|
||||||
#define XLOG_BTREE_SPLIT_L_ROOT 0x50 /* add tuple with split of root */
|
|
||||||
#define XLOG_BTREE_SPLIT_R_ROOT 0x60 /* as above, new item on right */
|
|
||||||
#define XLOG_BTREE_DELETE 0x70 /* delete leaf index tuples for a page */
|
|
||||||
#define XLOG_BTREE_UNLINK_PAGE 0x80 /* delete a half-dead page */
|
|
||||||
#define XLOG_BTREE_UNLINK_PAGE_META 0x90 /* same, and update metapage */
|
|
||||||
#define XLOG_BTREE_NEWROOT 0xA0 /* new root page */
|
|
||||||
#define XLOG_BTREE_MARK_PAGE_HALFDEAD 0xB0 /* mark a leaf as half-dead */
|
|
||||||
#define XLOG_BTREE_VACUUM 0xC0 /* delete entries on a page during
|
|
||||||
* vacuum */
|
|
||||||
#define XLOG_BTREE_REUSE_PAGE 0xD0 /* old page is about to be reused from
|
|
||||||
* FSM */
|
|
||||||
|
|
||||||
/*
|
|
||||||
* All that we need to regenerate the meta-data page
|
|
||||||
*/
|
|
||||||
typedef struct xl_btree_metadata
|
|
||||||
{
|
|
||||||
BlockNumber root;
|
|
||||||
uint32 level;
|
|
||||||
BlockNumber fastroot;
|
|
||||||
uint32 fastlevel;
|
|
||||||
} xl_btree_metadata;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* This is what we need to know about simple (without split) insert.
|
|
||||||
*
|
|
||||||
* This data record is used for INSERT_LEAF, INSERT_UPPER, INSERT_META.
|
|
||||||
* Note that INSERT_META implies it's not a leaf page.
|
|
||||||
*
|
|
||||||
* Backup Blk 0: original page (data contains the inserted tuple)
|
|
||||||
* Backup Blk 1: child's left sibling, if INSERT_UPPER or INSERT_META
|
|
||||||
* Backup Blk 2: xl_btree_metadata, if INSERT_META
|
|
||||||
*/
|
|
||||||
typedef struct xl_btree_insert
|
|
||||||
{
|
|
||||||
OffsetNumber offnum;
|
|
||||||
} xl_btree_insert;
|
|
||||||
|
|
||||||
#define SizeOfBtreeInsert (offsetof(xl_btree_insert, offnum) + sizeof(OffsetNumber))
|
|
||||||
|
|
||||||
/*
|
|
||||||
* On insert with split, we save all the items going into the right sibling
|
|
||||||
* so that we can restore it completely from the log record. This way takes
|
|
||||||
* less xlog space than the normal approach, because if we did it standardly,
|
|
||||||
* XLogInsert would almost always think the right page is new and store its
|
|
||||||
* whole page image. The left page, however, is handled in the normal
|
|
||||||
* incremental-update fashion.
|
|
||||||
*
|
|
||||||
* Note: the four XLOG_BTREE_SPLIT xl_info codes all use this data record.
|
|
||||||
* The _L and _R variants indicate whether the inserted tuple went into the
|
|
||||||
* left or right split page (and thus, whether newitemoff and the new item
|
|
||||||
* are stored or not). The _ROOT variants indicate that we are splitting
|
|
||||||
* the root page, and thus that a newroot record rather than an insert or
|
|
||||||
* split record should follow. Note that a split record never carries a
|
|
||||||
* metapage update --- we'll do that in the parent-level update.
|
|
||||||
*
|
|
||||||
* Backup Blk 0: original page / new left page
|
|
||||||
*
|
|
||||||
* The left page's data portion contains the new item, if it's the _L variant.
|
|
||||||
* (In the _R variants, the new item is one of the right page's tuples.)
|
|
||||||
* If level > 0, an IndexTuple representing the HIKEY of the left page
|
|
||||||
* follows. We don't need this on leaf pages, because it's the same as the
|
|
||||||
* leftmost key in the new right page.
|
|
||||||
*
|
|
||||||
* Backup Blk 1: new right page
|
|
||||||
*
|
|
||||||
* The right page's data portion contains the right page's tuples in the
|
|
||||||
* form used by _bt_restore_page.
|
|
||||||
*
|
|
||||||
* Backup Blk 2: next block (orig page's rightlink), if any
|
|
||||||
* Backup Blk 3: child's left sibling, if non-leaf split
|
|
||||||
*/
|
|
||||||
typedef struct xl_btree_split
|
|
||||||
{
|
|
||||||
uint32 level; /* tree level of page being split */
|
|
||||||
OffsetNumber firstright; /* first item moved to right page */
|
|
||||||
OffsetNumber newitemoff; /* new item's offset (if placed on left page) */
|
|
||||||
} xl_btree_split;
|
|
||||||
|
|
||||||
#define SizeOfBtreeSplit (offsetof(xl_btree_split, newitemoff) + sizeof(OffsetNumber))
|
|
||||||
|
|
||||||
/*
|
|
||||||
* This is what we need to know about delete of individual leaf index tuples.
|
|
||||||
* The WAL record can represent deletion of any number of index tuples on a
|
|
||||||
* single index page when *not* executed by VACUUM.
|
|
||||||
*
|
|
||||||
* Backup Blk 0: index page
|
|
||||||
*/
|
|
||||||
typedef struct xl_btree_delete
|
|
||||||
{
|
|
||||||
RelFileNode hnode; /* RelFileNode of the heap the index currently
|
|
||||||
* points at */
|
|
||||||
int nitems;
|
|
||||||
|
|
||||||
/* TARGET OFFSET NUMBERS FOLLOW AT THE END */
|
|
||||||
} xl_btree_delete;
|
|
||||||
|
|
||||||
#define SizeOfBtreeDelete (offsetof(xl_btree_delete, nitems) + sizeof(int))
|
|
||||||
|
|
||||||
/*
|
|
||||||
* This is what we need to know about page reuse within btree.
|
|
||||||
*/
|
|
||||||
typedef struct xl_btree_reuse_page
|
|
||||||
{
|
|
||||||
RelFileNode node;
|
|
||||||
BlockNumber block;
|
|
||||||
TransactionId latestRemovedXid;
|
|
||||||
} xl_btree_reuse_page;
|
|
||||||
|
|
||||||
#define SizeOfBtreeReusePage (sizeof(xl_btree_reuse_page))
|
|
||||||
|
|
||||||
/*
|
|
||||||
* This is what we need to know about vacuum of individual leaf index tuples.
|
|
||||||
* The WAL record can represent deletion of any number of index tuples on a
|
|
||||||
* single index page when executed by VACUUM.
|
|
||||||
*
|
|
||||||
* For MVCC scans, lastBlockVacuumed will be set to InvalidBlockNumber.
|
|
||||||
* For a non-MVCC index scans there is an additional correctness requirement
|
|
||||||
* for applying these changes during recovery, which is that we must do one
|
|
||||||
* of these two things for every block in the index:
|
|
||||||
* * lock the block for cleanup and apply any required changes
|
|
||||||
* * EnsureBlockUnpinned()
|
|
||||||
* The purpose of this is to ensure that no index scans started before we
|
|
||||||
* finish scanning the index are still running by the time we begin to remove
|
|
||||||
* heap tuples.
|
|
||||||
*
|
|
||||||
* Any changes to any one block are registered on just one WAL record. All
|
|
||||||
* blocks that we need to run EnsureBlockUnpinned() are listed as a block range
|
|
||||||
* starting from the last block vacuumed through until this one. Individual
|
|
||||||
* block numbers aren't given.
|
|
||||||
*
|
|
||||||
* Note that the *last* WAL record in any vacuum of an index is allowed to
|
|
||||||
* have a zero length array of offsets. Earlier records must have at least one.
|
|
||||||
*/
|
|
||||||
typedef struct xl_btree_vacuum
|
|
||||||
{
|
|
||||||
BlockNumber lastBlockVacuumed;
|
|
||||||
|
|
||||||
/* TARGET OFFSET NUMBERS FOLLOW */
|
|
||||||
} xl_btree_vacuum;
|
|
||||||
|
|
||||||
#define SizeOfBtreeVacuum (offsetof(xl_btree_vacuum, lastBlockVacuumed) + sizeof(BlockNumber))
|
|
||||||
|
|
||||||
/*
|
|
||||||
* This is what we need to know about marking an empty branch for deletion.
|
|
||||||
* The target identifies the tuple removed from the parent page (note that we
|
|
||||||
* remove this tuple's downlink and the *following* tuple's key). Note that
|
|
||||||
* the leaf page is empty, so we don't need to store its content --- it is
|
|
||||||
* just reinitialized during recovery using the rest of the fields.
|
|
||||||
*
|
|
||||||
* Backup Blk 0: leaf block
|
|
||||||
* Backup Blk 1: top parent
|
|
||||||
*/
|
|
||||||
typedef struct xl_btree_mark_page_halfdead
|
|
||||||
{
|
|
||||||
OffsetNumber poffset; /* deleted tuple id in parent page */
|
|
||||||
|
|
||||||
/* information needed to recreate the leaf page: */
|
|
||||||
BlockNumber leafblk; /* leaf block ultimately being deleted */
|
|
||||||
BlockNumber leftblk; /* leaf block's left sibling, if any */
|
|
||||||
BlockNumber rightblk; /* leaf block's right sibling */
|
|
||||||
BlockNumber topparent; /* topmost internal page in the branch */
|
|
||||||
} xl_btree_mark_page_halfdead;
|
|
||||||
|
|
||||||
#define SizeOfBtreeMarkPageHalfDead (offsetof(xl_btree_mark_page_halfdead, topparent) + sizeof(BlockNumber))
|
|
||||||
|
|
||||||
/*
|
|
||||||
* This is what we need to know about deletion of a btree page. Note we do
|
|
||||||
* not store any content for the deleted page --- it is just rewritten as empty
|
|
||||||
* during recovery, apart from resetting the btpo.xact.
|
|
||||||
*
|
|
||||||
* Backup Blk 0: target block being deleted
|
|
||||||
* Backup Blk 1: target block's left sibling, if any
|
|
||||||
* Backup Blk 2: target block's right sibling
|
|
||||||
* Backup Blk 3: leaf block (if different from target)
|
|
||||||
* Backup Blk 4: metapage (if rightsib becomes new fast root)
|
|
||||||
*/
|
|
||||||
typedef struct xl_btree_unlink_page
|
|
||||||
{
|
|
||||||
BlockNumber leftsib; /* target block's left sibling, if any */
|
|
||||||
BlockNumber rightsib; /* target block's right sibling */
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Information needed to recreate the leaf page, when target is an
|
|
||||||
* internal page.
|
|
||||||
*/
|
|
||||||
BlockNumber leafleftsib;
|
|
||||||
BlockNumber leafrightsib;
|
|
||||||
BlockNumber topparent; /* next child down in the branch */
|
|
||||||
|
|
||||||
TransactionId btpo_xact; /* value of btpo.xact for use in recovery */
|
|
||||||
/* xl_btree_metadata FOLLOWS IF XLOG_BTREE_UNLINK_PAGE_META */
|
|
||||||
} xl_btree_unlink_page;
|
|
||||||
|
|
||||||
#define SizeOfBtreeUnlinkPage (offsetof(xl_btree_unlink_page, btpo_xact) + sizeof(TransactionId))
|
|
||||||
|
|
||||||
/*
|
|
||||||
* New root log record. There are zero tuples if this is to establish an
|
|
||||||
* empty root, or two if it is the result of splitting an old root.
|
|
||||||
*
|
|
||||||
* Note that although this implies rewriting the metadata page, we don't need
|
|
||||||
* an xl_btree_metadata record --- the rootblk and level are sufficient.
|
|
||||||
*
|
|
||||||
* Backup Blk 0: new root page (2 tuples as payload, if splitting old root)
|
|
||||||
* Backup Blk 1: left child (if splitting an old root)
|
|
||||||
* Backup Blk 2: metapage
|
|
||||||
*/
|
|
||||||
typedef struct xl_btree_newroot
|
|
||||||
{
|
|
||||||
BlockNumber rootblk; /* location of new root (redundant with blk 0) */
|
|
||||||
uint32 level; /* its tree level */
|
|
||||||
} xl_btree_newroot;
|
|
||||||
|
|
||||||
#define SizeOfBtreeNewroot (offsetof(xl_btree_newroot, level) + sizeof(uint32))
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Operator strategy numbers for B-tree have been moved to access/stratnum.h,
|
* Operator strategy numbers for B-tree have been moved to access/stratnum.h,
|
||||||
@ -769,12 +543,4 @@ extern void _bt_spool(BTSpool *btspool, ItemPointer self,
|
|||||||
Datum *values, bool *isnull);
|
Datum *values, bool *isnull);
|
||||||
extern void _bt_leafbuild(BTSpool *btspool, BTSpool *spool2);
|
extern void _bt_leafbuild(BTSpool *btspool, BTSpool *spool2);
|
||||||
|
|
||||||
/*
|
|
||||||
* prototypes for functions in nbtxlog.c
|
|
||||||
*/
|
|
||||||
extern void btree_redo(XLogReaderState *record);
|
|
||||||
extern void btree_desc(StringInfo buf, XLogReaderState *record);
|
|
||||||
extern const char *btree_identify(uint8 info);
|
|
||||||
extern void btree_mask(char *pagedata, BlockNumber blkno);
|
|
||||||
|
|
||||||
#endif /* NBTREE_H */
|
#endif /* NBTREE_H */
|
||||||
|
255
src/include/access/nbtxlog.h
Normal file
255
src/include/access/nbtxlog.h
Normal file
@ -0,0 +1,255 @@
|
|||||||
|
/*-------------------------------------------------------------------------
|
||||||
|
*
|
||||||
|
* nbtxlog.h
|
||||||
|
* header file for postgres btree xlog routines
|
||||||
|
*
|
||||||
|
* Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
|
||||||
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
|
*
|
||||||
|
* src/include/access/nbtxlog.h
|
||||||
|
*
|
||||||
|
*-------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
#ifndef NBTXLOG_H
|
||||||
|
#define NBTXLOG_H
|
||||||
|
|
||||||
|
#include "access/xlogreader.h"
|
||||||
|
#include "lib/stringinfo.h"
|
||||||
|
#include "storage/off.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* XLOG records for btree operations
|
||||||
|
*
|
||||||
|
* XLOG allows to store some information in high 4 bits of log
|
||||||
|
* record xl_info field
|
||||||
|
*/
|
||||||
|
#define XLOG_BTREE_INSERT_LEAF 0x00 /* add index tuple without split */
|
||||||
|
#define XLOG_BTREE_INSERT_UPPER 0x10 /* same, on a non-leaf page */
|
||||||
|
#define XLOG_BTREE_INSERT_META 0x20 /* same, plus update metapage */
|
||||||
|
#define XLOG_BTREE_SPLIT_L 0x30 /* add index tuple with split */
|
||||||
|
#define XLOG_BTREE_SPLIT_R 0x40 /* as above, new item on right */
|
||||||
|
#define XLOG_BTREE_SPLIT_L_ROOT 0x50 /* add tuple with split of root */
|
||||||
|
#define XLOG_BTREE_SPLIT_R_ROOT 0x60 /* as above, new item on right */
|
||||||
|
#define XLOG_BTREE_DELETE 0x70 /* delete leaf index tuples for a page */
|
||||||
|
#define XLOG_BTREE_UNLINK_PAGE 0x80 /* delete a half-dead page */
|
||||||
|
#define XLOG_BTREE_UNLINK_PAGE_META 0x90 /* same, and update metapage */
|
||||||
|
#define XLOG_BTREE_NEWROOT 0xA0 /* new root page */
|
||||||
|
#define XLOG_BTREE_MARK_PAGE_HALFDEAD 0xB0 /* mark a leaf as half-dead */
|
||||||
|
#define XLOG_BTREE_VACUUM 0xC0 /* delete entries on a page during
|
||||||
|
* vacuum */
|
||||||
|
#define XLOG_BTREE_REUSE_PAGE 0xD0 /* old page is about to be reused from
|
||||||
|
* FSM */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* All that we need to regenerate the meta-data page
|
||||||
|
*/
|
||||||
|
typedef struct xl_btree_metadata
|
||||||
|
{
|
||||||
|
BlockNumber root;
|
||||||
|
uint32 level;
|
||||||
|
BlockNumber fastroot;
|
||||||
|
uint32 fastlevel;
|
||||||
|
} xl_btree_metadata;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This is what we need to know about simple (without split) insert.
|
||||||
|
*
|
||||||
|
* This data record is used for INSERT_LEAF, INSERT_UPPER, INSERT_META.
|
||||||
|
* Note that INSERT_META implies it's not a leaf page.
|
||||||
|
*
|
||||||
|
* Backup Blk 0: original page (data contains the inserted tuple)
|
||||||
|
* Backup Blk 1: child's left sibling, if INSERT_UPPER or INSERT_META
|
||||||
|
* Backup Blk 2: xl_btree_metadata, if INSERT_META
|
||||||
|
*/
|
||||||
|
typedef struct xl_btree_insert
|
||||||
|
{
|
||||||
|
OffsetNumber offnum;
|
||||||
|
} xl_btree_insert;
|
||||||
|
|
||||||
|
#define SizeOfBtreeInsert (offsetof(xl_btree_insert, offnum) + sizeof(OffsetNumber))
|
||||||
|
|
||||||
|
/*
|
||||||
|
* On insert with split, we save all the items going into the right sibling
|
||||||
|
* so that we can restore it completely from the log record. This way takes
|
||||||
|
* less xlog space than the normal approach, because if we did it standardly,
|
||||||
|
* XLogInsert would almost always think the right page is new and store its
|
||||||
|
* whole page image. The left page, however, is handled in the normal
|
||||||
|
* incremental-update fashion.
|
||||||
|
*
|
||||||
|
* Note: the four XLOG_BTREE_SPLIT xl_info codes all use this data record.
|
||||||
|
* The _L and _R variants indicate whether the inserted tuple went into the
|
||||||
|
* left or right split page (and thus, whether newitemoff and the new item
|
||||||
|
* are stored or not). The _ROOT variants indicate that we are splitting
|
||||||
|
* the root page, and thus that a newroot record rather than an insert or
|
||||||
|
* split record should follow. Note that a split record never carries a
|
||||||
|
* metapage update --- we'll do that in the parent-level update.
|
||||||
|
*
|
||||||
|
* Backup Blk 0: original page / new left page
|
||||||
|
*
|
||||||
|
* The left page's data portion contains the new item, if it's the _L variant.
|
||||||
|
* (In the _R variants, the new item is one of the right page's tuples.)
|
||||||
|
* If level > 0, an IndexTuple representing the HIKEY of the left page
|
||||||
|
* follows. We don't need this on leaf pages, because it's the same as the
|
||||||
|
* leftmost key in the new right page.
|
||||||
|
*
|
||||||
|
* Backup Blk 1: new right page
|
||||||
|
*
|
||||||
|
* The right page's data portion contains the right page's tuples in the
|
||||||
|
* form used by _bt_restore_page.
|
||||||
|
*
|
||||||
|
* Backup Blk 2: next block (orig page's rightlink), if any
|
||||||
|
* Backup Blk 3: child's left sibling, if non-leaf split
|
||||||
|
*/
|
||||||
|
typedef struct xl_btree_split
|
||||||
|
{
|
||||||
|
uint32 level; /* tree level of page being split */
|
||||||
|
OffsetNumber firstright; /* first item moved to right page */
|
||||||
|
OffsetNumber newitemoff; /* new item's offset (if placed on left page) */
|
||||||
|
} xl_btree_split;
|
||||||
|
|
||||||
|
#define SizeOfBtreeSplit (offsetof(xl_btree_split, newitemoff) + sizeof(OffsetNumber))
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This is what we need to know about delete of individual leaf index tuples.
|
||||||
|
* The WAL record can represent deletion of any number of index tuples on a
|
||||||
|
* single index page when *not* executed by VACUUM.
|
||||||
|
*
|
||||||
|
* Backup Blk 0: index page
|
||||||
|
*/
|
||||||
|
typedef struct xl_btree_delete
|
||||||
|
{
|
||||||
|
RelFileNode hnode; /* RelFileNode of the heap the index currently
|
||||||
|
* points at */
|
||||||
|
int nitems;
|
||||||
|
|
||||||
|
/* TARGET OFFSET NUMBERS FOLLOW AT THE END */
|
||||||
|
} xl_btree_delete;
|
||||||
|
|
||||||
|
#define SizeOfBtreeDelete (offsetof(xl_btree_delete, nitems) + sizeof(int))
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This is what we need to know about page reuse within btree.
|
||||||
|
*/
|
||||||
|
typedef struct xl_btree_reuse_page
|
||||||
|
{
|
||||||
|
RelFileNode node;
|
||||||
|
BlockNumber block;
|
||||||
|
TransactionId latestRemovedXid;
|
||||||
|
} xl_btree_reuse_page;
|
||||||
|
|
||||||
|
#define SizeOfBtreeReusePage (sizeof(xl_btree_reuse_page))
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This is what we need to know about vacuum of individual leaf index tuples.
|
||||||
|
* The WAL record can represent deletion of any number of index tuples on a
|
||||||
|
* single index page when executed by VACUUM.
|
||||||
|
*
|
||||||
|
* For MVCC scans, lastBlockVacuumed will be set to InvalidBlockNumber.
|
||||||
|
* For a non-MVCC index scans there is an additional correctness requirement
|
||||||
|
* for applying these changes during recovery, which is that we must do one
|
||||||
|
* of these two things for every block in the index:
|
||||||
|
* * lock the block for cleanup and apply any required changes
|
||||||
|
* * EnsureBlockUnpinned()
|
||||||
|
* The purpose of this is to ensure that no index scans started before we
|
||||||
|
* finish scanning the index are still running by the time we begin to remove
|
||||||
|
* heap tuples.
|
||||||
|
*
|
||||||
|
* Any changes to any one block are registered on just one WAL record. All
|
||||||
|
* blocks that we need to run EnsureBlockUnpinned() are listed as a block range
|
||||||
|
* starting from the last block vacuumed through until this one. Individual
|
||||||
|
* block numbers aren't given.
|
||||||
|
*
|
||||||
|
* Note that the *last* WAL record in any vacuum of an index is allowed to
|
||||||
|
* have a zero length array of offsets. Earlier records must have at least one.
|
||||||
|
*/
|
||||||
|
typedef struct xl_btree_vacuum
|
||||||
|
{
|
||||||
|
BlockNumber lastBlockVacuumed;
|
||||||
|
|
||||||
|
/* TARGET OFFSET NUMBERS FOLLOW */
|
||||||
|
} xl_btree_vacuum;
|
||||||
|
|
||||||
|
#define SizeOfBtreeVacuum (offsetof(xl_btree_vacuum, lastBlockVacuumed) + sizeof(BlockNumber))
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This is what we need to know about marking an empty branch for deletion.
|
||||||
|
* The target identifies the tuple removed from the parent page (note that we
|
||||||
|
* remove this tuple's downlink and the *following* tuple's key). Note that
|
||||||
|
* the leaf page is empty, so we don't need to store its content --- it is
|
||||||
|
* just reinitialized during recovery using the rest of the fields.
|
||||||
|
*
|
||||||
|
* Backup Blk 0: leaf block
|
||||||
|
* Backup Blk 1: top parent
|
||||||
|
*/
|
||||||
|
typedef struct xl_btree_mark_page_halfdead
|
||||||
|
{
|
||||||
|
OffsetNumber poffset; /* deleted tuple id in parent page */
|
||||||
|
|
||||||
|
/* information needed to recreate the leaf page: */
|
||||||
|
BlockNumber leafblk; /* leaf block ultimately being deleted */
|
||||||
|
BlockNumber leftblk; /* leaf block's left sibling, if any */
|
||||||
|
BlockNumber rightblk; /* leaf block's right sibling */
|
||||||
|
BlockNumber topparent; /* topmost internal page in the branch */
|
||||||
|
} xl_btree_mark_page_halfdead;
|
||||||
|
|
||||||
|
#define SizeOfBtreeMarkPageHalfDead (offsetof(xl_btree_mark_page_halfdead, topparent) + sizeof(BlockNumber))
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This is what we need to know about deletion of a btree page. Note we do
|
||||||
|
* not store any content for the deleted page --- it is just rewritten as empty
|
||||||
|
* during recovery, apart from resetting the btpo.xact.
|
||||||
|
*
|
||||||
|
* Backup Blk 0: target block being deleted
|
||||||
|
* Backup Blk 1: target block's left sibling, if any
|
||||||
|
* Backup Blk 2: target block's right sibling
|
||||||
|
* Backup Blk 3: leaf block (if different from target)
|
||||||
|
* Backup Blk 4: metapage (if rightsib becomes new fast root)
|
||||||
|
*/
|
||||||
|
typedef struct xl_btree_unlink_page
|
||||||
|
{
|
||||||
|
BlockNumber leftsib; /* target block's left sibling, if any */
|
||||||
|
BlockNumber rightsib; /* target block's right sibling */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Information needed to recreate the leaf page, when target is an
|
||||||
|
* internal page.
|
||||||
|
*/
|
||||||
|
BlockNumber leafleftsib;
|
||||||
|
BlockNumber leafrightsib;
|
||||||
|
BlockNumber topparent; /* next child down in the branch */
|
||||||
|
|
||||||
|
TransactionId btpo_xact; /* value of btpo.xact for use in recovery */
|
||||||
|
/* xl_btree_metadata FOLLOWS IF XLOG_BTREE_UNLINK_PAGE_META */
|
||||||
|
} xl_btree_unlink_page;
|
||||||
|
|
||||||
|
#define SizeOfBtreeUnlinkPage (offsetof(xl_btree_unlink_page, btpo_xact) + sizeof(TransactionId))
|
||||||
|
|
||||||
|
/*
|
||||||
|
* New root log record. There are zero tuples if this is to establish an
|
||||||
|
* empty root, or two if it is the result of splitting an old root.
|
||||||
|
*
|
||||||
|
* Note that although this implies rewriting the metadata page, we don't need
|
||||||
|
* an xl_btree_metadata record --- the rootblk and level are sufficient.
|
||||||
|
*
|
||||||
|
* Backup Blk 0: new root page (2 tuples as payload, if splitting old root)
|
||||||
|
* Backup Blk 1: left child (if splitting an old root)
|
||||||
|
* Backup Blk 2: metapage
|
||||||
|
*/
|
||||||
|
typedef struct xl_btree_newroot
|
||||||
|
{
|
||||||
|
BlockNumber rootblk; /* location of new root (redundant with blk 0) */
|
||||||
|
uint32 level; /* its tree level */
|
||||||
|
} xl_btree_newroot;
|
||||||
|
|
||||||
|
#define SizeOfBtreeNewroot (offsetof(xl_btree_newroot, level) + sizeof(uint32))
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* prototypes for functions in nbtxlog.c
|
||||||
|
*/
|
||||||
|
extern void btree_redo(XLogReaderState *record);
|
||||||
|
extern void btree_desc(StringInfo buf, XLogReaderState *record);
|
||||||
|
extern const char *btree_identify(uint8 info);
|
||||||
|
extern void btree_mask(char *pagedata, BlockNumber blkno);
|
||||||
|
|
||||||
|
#endif /* NBXLOG_H */
|
@ -214,12 +214,4 @@ extern IndexBulkDeleteResult *spgvacuumcleanup(IndexVacuumInfo *info,
|
|||||||
/* spgvalidate.c */
|
/* spgvalidate.c */
|
||||||
extern bool spgvalidate(Oid opclassoid);
|
extern bool spgvalidate(Oid opclassoid);
|
||||||
|
|
||||||
/* spgxlog.c */
|
|
||||||
extern void spg_redo(XLogReaderState *record);
|
|
||||||
extern void spg_desc(StringInfo buf, XLogReaderState *record);
|
|
||||||
extern const char *spg_identify(uint8 info);
|
|
||||||
extern void spg_xlog_startup(void);
|
|
||||||
extern void spg_xlog_cleanup(void);
|
|
||||||
extern void spg_mask(char *pagedata, BlockNumber blkno);
|
|
||||||
|
|
||||||
#endif /* SPGIST_H */
|
#endif /* SPGIST_H */
|
||||||
|
@ -354,242 +354,12 @@ typedef SpGistDeadTupleData *SpGistDeadTuple;
|
|||||||
* XLOG stuff
|
* XLOG stuff
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* XLOG record types for SPGiST */
|
|
||||||
#define XLOG_SPGIST_CREATE_INDEX 0x00
|
|
||||||
#define XLOG_SPGIST_ADD_LEAF 0x10
|
|
||||||
#define XLOG_SPGIST_MOVE_LEAFS 0x20
|
|
||||||
#define XLOG_SPGIST_ADD_NODE 0x30
|
|
||||||
#define XLOG_SPGIST_SPLIT_TUPLE 0x40
|
|
||||||
#define XLOG_SPGIST_PICKSPLIT 0x50
|
|
||||||
#define XLOG_SPGIST_VACUUM_LEAF 0x60
|
|
||||||
#define XLOG_SPGIST_VACUUM_ROOT 0x70
|
|
||||||
#define XLOG_SPGIST_VACUUM_REDIRECT 0x80
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Some redo functions need an SpGistState, although only a few of its fields
|
|
||||||
* need to be valid. spgxlogState carries the required info in xlog records.
|
|
||||||
* (See fillFakeState in spgxlog.c for more comments.)
|
|
||||||
*/
|
|
||||||
typedef struct spgxlogState
|
|
||||||
{
|
|
||||||
TransactionId myXid;
|
|
||||||
bool isBuild;
|
|
||||||
} spgxlogState;
|
|
||||||
|
|
||||||
#define STORE_STATE(s, d) \
|
#define STORE_STATE(s, d) \
|
||||||
do { \
|
do { \
|
||||||
(d).myXid = (s)->myXid; \
|
(d).myXid = (s)->myXid; \
|
||||||
(d).isBuild = (s)->isBuild; \
|
(d).isBuild = (s)->isBuild; \
|
||||||
} while(0)
|
} while(0)
|
||||||
|
|
||||||
/*
|
|
||||||
* Backup Blk 0: destination page for leaf tuple
|
|
||||||
* Backup Blk 1: parent page (if any)
|
|
||||||
*/
|
|
||||||
typedef struct spgxlogAddLeaf
|
|
||||||
{
|
|
||||||
bool newPage; /* init dest page? */
|
|
||||||
bool storesNulls; /* page is in the nulls tree? */
|
|
||||||
OffsetNumber offnumLeaf; /* offset where leaf tuple gets placed */
|
|
||||||
OffsetNumber offnumHeadLeaf; /* offset of head tuple in chain, if any */
|
|
||||||
|
|
||||||
OffsetNumber offnumParent; /* where the parent downlink is, if any */
|
|
||||||
uint16 nodeI;
|
|
||||||
|
|
||||||
/* new leaf tuple follows (unaligned!) */
|
|
||||||
} spgxlogAddLeaf;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Backup Blk 0: source leaf page
|
|
||||||
* Backup Blk 1: destination leaf page
|
|
||||||
* Backup Blk 2: parent page
|
|
||||||
*/
|
|
||||||
typedef struct spgxlogMoveLeafs
|
|
||||||
{
|
|
||||||
uint16 nMoves; /* number of tuples moved from source page */
|
|
||||||
bool newPage; /* init dest page? */
|
|
||||||
bool replaceDead; /* are we replacing a DEAD source tuple? */
|
|
||||||
bool storesNulls; /* pages are in the nulls tree? */
|
|
||||||
|
|
||||||
/* where the parent downlink is */
|
|
||||||
OffsetNumber offnumParent;
|
|
||||||
uint16 nodeI;
|
|
||||||
|
|
||||||
spgxlogState stateSrc;
|
|
||||||
|
|
||||||
/*----------
|
|
||||||
* data follows:
|
|
||||||
* array of deleted tuple numbers, length nMoves
|
|
||||||
* array of inserted tuple numbers, length nMoves + 1 or 1
|
|
||||||
* list of leaf tuples, length nMoves + 1 or 1 (unaligned!)
|
|
||||||
*
|
|
||||||
* Note: if replaceDead is true then there is only one inserted tuple
|
|
||||||
* number and only one leaf tuple in the data, because we are not copying
|
|
||||||
* the dead tuple from the source
|
|
||||||
*----------
|
|
||||||
*/
|
|
||||||
OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER];
|
|
||||||
} spgxlogMoveLeafs;
|
|
||||||
|
|
||||||
#define SizeOfSpgxlogMoveLeafs offsetof(spgxlogMoveLeafs, offsets)
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Backup Blk 0: original page
|
|
||||||
* Backup Blk 1: where new tuple goes, if not same place
|
|
||||||
* Backup Blk 2: where parent downlink is, if updated and different from
|
|
||||||
* the old and new
|
|
||||||
*/
|
|
||||||
typedef struct spgxlogAddNode
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* Offset of the original inner tuple, in the original page (on backup
|
|
||||||
* block 0).
|
|
||||||
*/
|
|
||||||
OffsetNumber offnum;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Offset of the new tuple, on the new page (on backup block 1). Invalid,
|
|
||||||
* if we overwrote the old tuple in the original page).
|
|
||||||
*/
|
|
||||||
OffsetNumber offnumNew;
|
|
||||||
bool newPage; /* init new page? */
|
|
||||||
|
|
||||||
/*----
|
|
||||||
* Where is the parent downlink? parentBlk indicates which page it's on,
|
|
||||||
* and offnumParent is the offset within the page. The possible values for
|
|
||||||
* parentBlk are:
|
|
||||||
*
|
|
||||||
* 0: parent == original page
|
|
||||||
* 1: parent == new page
|
|
||||||
* 2: parent == different page (blk ref 2)
|
|
||||||
* -1: parent not updated
|
|
||||||
*----
|
|
||||||
*/
|
|
||||||
int8 parentBlk;
|
|
||||||
OffsetNumber offnumParent; /* offset within the parent page */
|
|
||||||
|
|
||||||
uint16 nodeI;
|
|
||||||
|
|
||||||
spgxlogState stateSrc;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* updated inner tuple follows (unaligned!)
|
|
||||||
*/
|
|
||||||
} spgxlogAddNode;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Backup Blk 0: where the prefix tuple goes
|
|
||||||
* Backup Blk 1: where the postfix tuple goes (if different page)
|
|
||||||
*/
|
|
||||||
typedef struct spgxlogSplitTuple
|
|
||||||
{
|
|
||||||
/* where the prefix tuple goes */
|
|
||||||
OffsetNumber offnumPrefix;
|
|
||||||
|
|
||||||
/* where the postfix tuple goes */
|
|
||||||
OffsetNumber offnumPostfix;
|
|
||||||
bool newPage; /* need to init that page? */
|
|
||||||
bool postfixBlkSame; /* was postfix tuple put on same page as
|
|
||||||
* prefix? */
|
|
||||||
|
|
||||||
/*
|
|
||||||
* new prefix inner tuple follows, then new postfix inner tuple (both are
|
|
||||||
* unaligned!)
|
|
||||||
*/
|
|
||||||
} spgxlogSplitTuple;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Buffer references in the rdata array are:
|
|
||||||
* Backup Blk 0: Src page (only if not root)
|
|
||||||
* Backup Blk 1: Dest page (if used)
|
|
||||||
* Backup Blk 2: Inner page
|
|
||||||
* Backup Blk 3: Parent page (if any, and different from Inner)
|
|
||||||
*/
|
|
||||||
typedef struct spgxlogPickSplit
|
|
||||||
{
|
|
||||||
bool isRootSplit;
|
|
||||||
|
|
||||||
uint16 nDelete; /* n to delete from Src */
|
|
||||||
uint16 nInsert; /* n to insert on Src and/or Dest */
|
|
||||||
bool initSrc; /* re-init the Src page? */
|
|
||||||
bool initDest; /* re-init the Dest page? */
|
|
||||||
|
|
||||||
/* where to put new inner tuple */
|
|
||||||
OffsetNumber offnumInner;
|
|
||||||
bool initInner; /* re-init the Inner page? */
|
|
||||||
|
|
||||||
bool storesNulls; /* pages are in the nulls tree? */
|
|
||||||
|
|
||||||
/* where the parent downlink is, if any */
|
|
||||||
bool innerIsParent; /* is parent the same as inner page? */
|
|
||||||
OffsetNumber offnumParent;
|
|
||||||
uint16 nodeI;
|
|
||||||
|
|
||||||
spgxlogState stateSrc;
|
|
||||||
|
|
||||||
/*----------
|
|
||||||
* data follows:
|
|
||||||
* array of deleted tuple numbers, length nDelete
|
|
||||||
* array of inserted tuple numbers, length nInsert
|
|
||||||
* array of page selector bytes for inserted tuples, length nInsert
|
|
||||||
* new inner tuple (unaligned!)
|
|
||||||
* list of leaf tuples, length nInsert (unaligned!)
|
|
||||||
*----------
|
|
||||||
*/
|
|
||||||
OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER];
|
|
||||||
} spgxlogPickSplit;
|
|
||||||
|
|
||||||
#define SizeOfSpgxlogPickSplit offsetof(spgxlogPickSplit, offsets)
|
|
||||||
|
|
||||||
typedef struct spgxlogVacuumLeaf
|
|
||||||
{
|
|
||||||
uint16 nDead; /* number of tuples to become DEAD */
|
|
||||||
uint16 nPlaceholder; /* number of tuples to become PLACEHOLDER */
|
|
||||||
uint16 nMove; /* number of tuples to move */
|
|
||||||
uint16 nChain; /* number of tuples to re-chain */
|
|
||||||
|
|
||||||
spgxlogState stateSrc;
|
|
||||||
|
|
||||||
/*----------
|
|
||||||
* data follows:
|
|
||||||
* tuple numbers to become DEAD
|
|
||||||
* tuple numbers to become PLACEHOLDER
|
|
||||||
* tuple numbers to move from (and replace with PLACEHOLDER)
|
|
||||||
* tuple numbers to move to (replacing what is there)
|
|
||||||
* tuple numbers to update nextOffset links of
|
|
||||||
* tuple numbers to insert in nextOffset links
|
|
||||||
*----------
|
|
||||||
*/
|
|
||||||
OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER];
|
|
||||||
} spgxlogVacuumLeaf;
|
|
||||||
|
|
||||||
#define SizeOfSpgxlogVacuumLeaf offsetof(spgxlogVacuumLeaf, offsets)
|
|
||||||
|
|
||||||
typedef struct spgxlogVacuumRoot
|
|
||||||
{
|
|
||||||
/* vacuum a root page when it is also a leaf */
|
|
||||||
uint16 nDelete; /* number of tuples to delete */
|
|
||||||
|
|
||||||
spgxlogState stateSrc;
|
|
||||||
|
|
||||||
/* offsets of tuples to delete follow */
|
|
||||||
OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER];
|
|
||||||
} spgxlogVacuumRoot;
|
|
||||||
|
|
||||||
#define SizeOfSpgxlogVacuumRoot offsetof(spgxlogVacuumRoot, offsets)
|
|
||||||
|
|
||||||
typedef struct spgxlogVacuumRedirect
|
|
||||||
{
|
|
||||||
uint16 nToPlaceholder; /* number of redirects to make placeholders */
|
|
||||||
OffsetNumber firstPlaceholder; /* first placeholder tuple to remove */
|
|
||||||
TransactionId newestRedirectXid; /* newest XID of removed redirects */
|
|
||||||
|
|
||||||
/* offsets of redirect tuples to make placeholders follow */
|
|
||||||
OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER];
|
|
||||||
} spgxlogVacuumRedirect;
|
|
||||||
|
|
||||||
#define SizeOfSpgxlogVacuumRedirect offsetof(spgxlogVacuumRedirect, offsets)
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The "flags" argument for SpGistGetBuffer should be either GBUF_LEAF to
|
* The "flags" argument for SpGistGetBuffer should be either GBUF_LEAF to
|
||||||
* get a leaf page, or GBUF_INNER_PARITY(blockNumber) to get an inner
|
* get a leaf page, or GBUF_INNER_PARITY(blockNumber) to get an inner
|
||||||
|
257
src/include/access/spgxlog.h
Normal file
257
src/include/access/spgxlog.h
Normal file
@ -0,0 +1,257 @@
|
|||||||
|
/*-------------------------------------------------------------------------
|
||||||
|
*
|
||||||
|
* spgxlog.h
|
||||||
|
* xlog declarations for SP-GiST access method.
|
||||||
|
*
|
||||||
|
* Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
|
||||||
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
|
*
|
||||||
|
* src/include/access/spgxlog.h
|
||||||
|
*
|
||||||
|
*-------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
#ifndef SPGXLOG_H
|
||||||
|
#define SPGXLOG_H
|
||||||
|
|
||||||
|
#include "access/xlogreader.h"
|
||||||
|
#include "lib/stringinfo.h"
|
||||||
|
#include "storage/off.h"
|
||||||
|
|
||||||
|
/* XLOG record types for SPGiST */
|
||||||
|
#define XLOG_SPGIST_CREATE_INDEX 0x00
|
||||||
|
#define XLOG_SPGIST_ADD_LEAF 0x10
|
||||||
|
#define XLOG_SPGIST_MOVE_LEAFS 0x20
|
||||||
|
#define XLOG_SPGIST_ADD_NODE 0x30
|
||||||
|
#define XLOG_SPGIST_SPLIT_TUPLE 0x40
|
||||||
|
#define XLOG_SPGIST_PICKSPLIT 0x50
|
||||||
|
#define XLOG_SPGIST_VACUUM_LEAF 0x60
|
||||||
|
#define XLOG_SPGIST_VACUUM_ROOT 0x70
|
||||||
|
#define XLOG_SPGIST_VACUUM_REDIRECT 0x80
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Some redo functions need an SpGistState, although only a few of its fields
|
||||||
|
* need to be valid. spgxlogState carries the required info in xlog records.
|
||||||
|
* (See fillFakeState in spgxlog.c for more comments.)
|
||||||
|
*/
|
||||||
|
typedef struct spgxlogState
|
||||||
|
{
|
||||||
|
TransactionId myXid;
|
||||||
|
bool isBuild;
|
||||||
|
} spgxlogState;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Backup Blk 0: destination page for leaf tuple
|
||||||
|
* Backup Blk 1: parent page (if any)
|
||||||
|
*/
|
||||||
|
typedef struct spgxlogAddLeaf
|
||||||
|
{
|
||||||
|
bool newPage; /* init dest page? */
|
||||||
|
bool storesNulls; /* page is in the nulls tree? */
|
||||||
|
OffsetNumber offnumLeaf; /* offset where leaf tuple gets placed */
|
||||||
|
OffsetNumber offnumHeadLeaf; /* offset of head tuple in chain, if any */
|
||||||
|
|
||||||
|
OffsetNumber offnumParent; /* where the parent downlink is, if any */
|
||||||
|
uint16 nodeI;
|
||||||
|
|
||||||
|
/* new leaf tuple follows (unaligned!) */
|
||||||
|
} spgxlogAddLeaf;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Backup Blk 0: source leaf page
|
||||||
|
* Backup Blk 1: destination leaf page
|
||||||
|
* Backup Blk 2: parent page
|
||||||
|
*/
|
||||||
|
typedef struct spgxlogMoveLeafs
|
||||||
|
{
|
||||||
|
uint16 nMoves; /* number of tuples moved from source page */
|
||||||
|
bool newPage; /* init dest page? */
|
||||||
|
bool replaceDead; /* are we replacing a DEAD source tuple? */
|
||||||
|
bool storesNulls; /* pages are in the nulls tree? */
|
||||||
|
|
||||||
|
/* where the parent downlink is */
|
||||||
|
OffsetNumber offnumParent;
|
||||||
|
uint16 nodeI;
|
||||||
|
|
||||||
|
spgxlogState stateSrc;
|
||||||
|
|
||||||
|
/*----------
|
||||||
|
* data follows:
|
||||||
|
* array of deleted tuple numbers, length nMoves
|
||||||
|
* array of inserted tuple numbers, length nMoves + 1 or 1
|
||||||
|
* list of leaf tuples, length nMoves + 1 or 1 (unaligned!)
|
||||||
|
*
|
||||||
|
* Note: if replaceDead is true then there is only one inserted tuple
|
||||||
|
* number and only one leaf tuple in the data, because we are not copying
|
||||||
|
* the dead tuple from the source
|
||||||
|
*----------
|
||||||
|
*/
|
||||||
|
OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER];
|
||||||
|
} spgxlogMoveLeafs;
|
||||||
|
|
||||||
|
#define SizeOfSpgxlogMoveLeafs offsetof(spgxlogMoveLeafs, offsets)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Backup Blk 0: original page
|
||||||
|
* Backup Blk 1: where new tuple goes, if not same place
|
||||||
|
* Backup Blk 2: where parent downlink is, if updated and different from
|
||||||
|
* the old and new
|
||||||
|
*/
|
||||||
|
typedef struct spgxlogAddNode
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Offset of the original inner tuple, in the original page (on backup
|
||||||
|
* block 0).
|
||||||
|
*/
|
||||||
|
OffsetNumber offnum;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Offset of the new tuple, on the new page (on backup block 1). Invalid,
|
||||||
|
* if we overwrote the old tuple in the original page).
|
||||||
|
*/
|
||||||
|
OffsetNumber offnumNew;
|
||||||
|
bool newPage; /* init new page? */
|
||||||
|
|
||||||
|
/*----
|
||||||
|
* Where is the parent downlink? parentBlk indicates which page it's on,
|
||||||
|
* and offnumParent is the offset within the page. The possible values for
|
||||||
|
* parentBlk are:
|
||||||
|
*
|
||||||
|
* 0: parent == original page
|
||||||
|
* 1: parent == new page
|
||||||
|
* 2: parent == different page (blk ref 2)
|
||||||
|
* -1: parent not updated
|
||||||
|
*----
|
||||||
|
*/
|
||||||
|
int8 parentBlk;
|
||||||
|
OffsetNumber offnumParent; /* offset within the parent page */
|
||||||
|
|
||||||
|
uint16 nodeI;
|
||||||
|
|
||||||
|
spgxlogState stateSrc;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* updated inner tuple follows (unaligned!)
|
||||||
|
*/
|
||||||
|
} spgxlogAddNode;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Backup Blk 0: where the prefix tuple goes
|
||||||
|
* Backup Blk 1: where the postfix tuple goes (if different page)
|
||||||
|
*/
|
||||||
|
typedef struct spgxlogSplitTuple
|
||||||
|
{
|
||||||
|
/* where the prefix tuple goes */
|
||||||
|
OffsetNumber offnumPrefix;
|
||||||
|
|
||||||
|
/* where the postfix tuple goes */
|
||||||
|
OffsetNumber offnumPostfix;
|
||||||
|
bool newPage; /* need to init that page? */
|
||||||
|
bool postfixBlkSame; /* was postfix tuple put on same page as
|
||||||
|
* prefix? */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* new prefix inner tuple follows, then new postfix inner tuple (both are
|
||||||
|
* unaligned!)
|
||||||
|
*/
|
||||||
|
} spgxlogSplitTuple;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Buffer references in the rdata array are:
|
||||||
|
* Backup Blk 0: Src page (only if not root)
|
||||||
|
* Backup Blk 1: Dest page (if used)
|
||||||
|
* Backup Blk 2: Inner page
|
||||||
|
* Backup Blk 3: Parent page (if any, and different from Inner)
|
||||||
|
*/
|
||||||
|
typedef struct spgxlogPickSplit
|
||||||
|
{
|
||||||
|
bool isRootSplit;
|
||||||
|
|
||||||
|
uint16 nDelete; /* n to delete from Src */
|
||||||
|
uint16 nInsert; /* n to insert on Src and/or Dest */
|
||||||
|
bool initSrc; /* re-init the Src page? */
|
||||||
|
bool initDest; /* re-init the Dest page? */
|
||||||
|
|
||||||
|
/* where to put new inner tuple */
|
||||||
|
OffsetNumber offnumInner;
|
||||||
|
bool initInner; /* re-init the Inner page? */
|
||||||
|
|
||||||
|
bool storesNulls; /* pages are in the nulls tree? */
|
||||||
|
|
||||||
|
/* where the parent downlink is, if any */
|
||||||
|
bool innerIsParent; /* is parent the same as inner page? */
|
||||||
|
OffsetNumber offnumParent;
|
||||||
|
uint16 nodeI;
|
||||||
|
|
||||||
|
spgxlogState stateSrc;
|
||||||
|
|
||||||
|
/*----------
|
||||||
|
* data follows:
|
||||||
|
* array of deleted tuple numbers, length nDelete
|
||||||
|
* array of inserted tuple numbers, length nInsert
|
||||||
|
* array of page selector bytes for inserted tuples, length nInsert
|
||||||
|
* new inner tuple (unaligned!)
|
||||||
|
* list of leaf tuples, length nInsert (unaligned!)
|
||||||
|
*----------
|
||||||
|
*/
|
||||||
|
OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER];
|
||||||
|
} spgxlogPickSplit;
|
||||||
|
|
||||||
|
#define SizeOfSpgxlogPickSplit offsetof(spgxlogPickSplit, offsets)
|
||||||
|
|
||||||
|
typedef struct spgxlogVacuumLeaf
|
||||||
|
{
|
||||||
|
uint16 nDead; /* number of tuples to become DEAD */
|
||||||
|
uint16 nPlaceholder; /* number of tuples to become PLACEHOLDER */
|
||||||
|
uint16 nMove; /* number of tuples to move */
|
||||||
|
uint16 nChain; /* number of tuples to re-chain */
|
||||||
|
|
||||||
|
spgxlogState stateSrc;
|
||||||
|
|
||||||
|
/*----------
|
||||||
|
* data follows:
|
||||||
|
* tuple numbers to become DEAD
|
||||||
|
* tuple numbers to become PLACEHOLDER
|
||||||
|
* tuple numbers to move from (and replace with PLACEHOLDER)
|
||||||
|
* tuple numbers to move to (replacing what is there)
|
||||||
|
* tuple numbers to update nextOffset links of
|
||||||
|
* tuple numbers to insert in nextOffset links
|
||||||
|
*----------
|
||||||
|
*/
|
||||||
|
OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER];
|
||||||
|
} spgxlogVacuumLeaf;
|
||||||
|
|
||||||
|
#define SizeOfSpgxlogVacuumLeaf offsetof(spgxlogVacuumLeaf, offsets)
|
||||||
|
|
||||||
|
typedef struct spgxlogVacuumRoot
|
||||||
|
{
|
||||||
|
/* vacuum a root page when it is also a leaf */
|
||||||
|
uint16 nDelete; /* number of tuples to delete */
|
||||||
|
|
||||||
|
spgxlogState stateSrc;
|
||||||
|
|
||||||
|
/* offsets of tuples to delete follow */
|
||||||
|
OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER];
|
||||||
|
} spgxlogVacuumRoot;
|
||||||
|
|
||||||
|
#define SizeOfSpgxlogVacuumRoot offsetof(spgxlogVacuumRoot, offsets)
|
||||||
|
|
||||||
|
typedef struct spgxlogVacuumRedirect
|
||||||
|
{
|
||||||
|
uint16 nToPlaceholder; /* number of redirects to make placeholders */
|
||||||
|
OffsetNumber firstPlaceholder; /* first placeholder tuple to remove */
|
||||||
|
TransactionId newestRedirectXid; /* newest XID of removed redirects */
|
||||||
|
|
||||||
|
/* offsets of redirect tuples to make placeholders follow */
|
||||||
|
OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER];
|
||||||
|
} spgxlogVacuumRedirect;
|
||||||
|
|
||||||
|
#define SizeOfSpgxlogVacuumRedirect offsetof(spgxlogVacuumRedirect, offsets)
|
||||||
|
|
||||||
|
extern void spg_redo(XLogReaderState *record);
|
||||||
|
extern void spg_desc(StringInfo buf, XLogReaderState *record);
|
||||||
|
extern const char *spg_identify(uint8 info);
|
||||||
|
extern void spg_xlog_startup(void);
|
||||||
|
extern void spg_xlog_cleanup(void);
|
||||||
|
extern void spg_mask(char *pagedata, BlockNumber blkno);
|
||||||
|
|
||||||
|
#endif /* SPGXLOG_H */
|
Reference in New Issue
Block a user