1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-28 23:42:10 +03:00

Split index xlog headers from other private index headers.

The xlog-specific headers need to be included in both frontend code -
specifically, pg_waldump - and the backend, but the remainder of the
private headers for each index are only needed by the backend.  By
splitting the xlog stuff out into separate headers, pg_waldump pulls
in fewer backend headers, which is a good thing.

Patch by me, reviewed by Michael Paquier and Andres Freund, per a
complaint from Dilip Kumar.

Discussion: http://postgr.es/m/CA+TgmoZ=F=GkxV0YEv-A8tb+AEGy_Qa7GSiJ8deBKFATnzfEug@mail.gmail.com
This commit is contained in:
Robert Haas
2017-02-14 15:37:59 -05:00
parent fb47544d0c
commit 8da9a22636
35 changed files with 1159 additions and 1055 deletions

View File

@ -12,309 +12,12 @@
#include "access/amapi.h"
#include "access/gin.h"
#include "access/ginblock.h"
#include "access/itup.h"
#include "fmgr.h"
#include "storage/bufmgr.h"
#include "lib/rbtree.h"
/*
* Page opaque data in an inverted index page.
*
* Note: GIN does not include a page ID word as do the other index types.
* This is OK because the opaque data is only 8 bytes and so can be reliably
* distinguished by size. Revisit this if the size ever increases.
* Further note: as of 9.2, SP-GiST also uses 8-byte special space, as does
* BRIN as of 9.5. This is still OK, as long as GIN isn't using all of the
* high-order bits in its flags word, because that way the flags word cannot
* match the page IDs used by SP-GiST and BRIN.
*/
typedef struct GinPageOpaqueData
{
BlockNumber rightlink; /* next page if any */
OffsetNumber maxoff; /* number of PostingItems on GIN_DATA &
* ~GIN_LEAF page. On GIN_LIST page, number of
* heap tuples. */
uint16 flags; /* see bit definitions below */
} GinPageOpaqueData;
typedef GinPageOpaqueData *GinPageOpaque;
#define GIN_DATA (1 << 0)
#define GIN_LEAF (1 << 1)
#define GIN_DELETED (1 << 2)
#define GIN_META (1 << 3)
#define GIN_LIST (1 << 4)
#define GIN_LIST_FULLROW (1 << 5) /* makes sense only on GIN_LIST page */
#define GIN_INCOMPLETE_SPLIT (1 << 6) /* page was split, but parent not
* updated */
#define GIN_COMPRESSED (1 << 7)
/* Page numbers of fixed-location pages */
#define GIN_METAPAGE_BLKNO (0)
#define GIN_ROOT_BLKNO (1)
typedef struct GinMetaPageData
{
/*
* Pointers to head and tail of pending list, which consists of GIN_LIST
* pages. These store fast-inserted entries that haven't yet been moved
* into the regular GIN structure.
*/
BlockNumber head;
BlockNumber tail;
/*
* Free space in bytes in the pending list's tail page.
*/
uint32 tailFreeSize;
/*
* We store both number of pages and number of heap tuples that are in the
* pending list.
*/
BlockNumber nPendingPages;
int64 nPendingHeapTuples;
/*
* Statistics for planner use (accurate as of last VACUUM)
*/
BlockNumber nTotalPages;
BlockNumber nEntryPages;
BlockNumber nDataPages;
int64 nEntries;
/*
* GIN version number (ideally this should have been at the front, but too
* late now. Don't move it!)
*
* Currently 2 (for indexes initialized in 9.4 or later)
*
* Version 1 (indexes initialized in version 9.1, 9.2 or 9.3), is
* compatible, but may contain uncompressed posting tree (leaf) pages and
* posting lists. They will be converted to compressed format when
* modified.
*
* Version 0 (indexes initialized in 9.0 or before) is compatible but may
* be missing null entries, including both null keys and placeholders.
* Reject full-index-scan attempts on such indexes.
*/
int32 ginVersion;
} GinMetaPageData;
#define GIN_CURRENT_VERSION 2
#define GinPageGetMeta(p) \
((GinMetaPageData *) PageGetContents(p))
/*
* Macros for accessing a GIN index page's opaque data
*/
#define GinPageGetOpaque(page) ( (GinPageOpaque) PageGetSpecialPointer(page) )
#define GinPageIsLeaf(page) ( (GinPageGetOpaque(page)->flags & GIN_LEAF) != 0 )
#define GinPageSetLeaf(page) ( GinPageGetOpaque(page)->flags |= GIN_LEAF )
#define GinPageSetNonLeaf(page) ( GinPageGetOpaque(page)->flags &= ~GIN_LEAF )
#define GinPageIsData(page) ( (GinPageGetOpaque(page)->flags & GIN_DATA) != 0 )
#define GinPageSetData(page) ( GinPageGetOpaque(page)->flags |= GIN_DATA )
#define GinPageIsList(page) ( (GinPageGetOpaque(page)->flags & GIN_LIST) != 0 )
#define GinPageSetList(page) ( GinPageGetOpaque(page)->flags |= GIN_LIST )
#define GinPageHasFullRow(page) ( (GinPageGetOpaque(page)->flags & GIN_LIST_FULLROW) != 0 )
#define GinPageSetFullRow(page) ( GinPageGetOpaque(page)->flags |= GIN_LIST_FULLROW )
#define GinPageIsCompressed(page) ( (GinPageGetOpaque(page)->flags & GIN_COMPRESSED) != 0 )
#define GinPageSetCompressed(page) ( GinPageGetOpaque(page)->flags |= GIN_COMPRESSED )
#define GinPageIsDeleted(page) ( (GinPageGetOpaque(page)->flags & GIN_DELETED) != 0 )
#define GinPageSetDeleted(page) ( GinPageGetOpaque(page)->flags |= GIN_DELETED)
#define GinPageSetNonDeleted(page) ( GinPageGetOpaque(page)->flags &= ~GIN_DELETED)
#define GinPageIsIncompleteSplit(page) ( (GinPageGetOpaque(page)->flags & GIN_INCOMPLETE_SPLIT) != 0 )
#define GinPageRightMost(page) ( GinPageGetOpaque(page)->rightlink == InvalidBlockNumber)
/*
* We use our own ItemPointerGet(BlockNumber|OffsetNumber)
* to avoid Asserts, since sometimes the ip_posid isn't "valid"
*/
#define GinItemPointerGetBlockNumber(pointer) \
BlockIdGetBlockNumber(&(pointer)->ip_blkid)
#define GinItemPointerGetOffsetNumber(pointer) \
((pointer)->ip_posid)
/*
* Special-case item pointer values needed by the GIN search logic.
* MIN: sorts less than any valid item pointer
* MAX: sorts greater than any valid item pointer
* LOSSY PAGE: indicates a whole heap page, sorts after normal item
* pointers for that page
* Note that these are all distinguishable from an "invalid" item pointer
* (which is InvalidBlockNumber/0) as well as from all normal item
* pointers (which have item numbers in the range 1..MaxHeapTuplesPerPage).
*/
#define ItemPointerSetMin(p) \
ItemPointerSet((p), (BlockNumber)0, (OffsetNumber)0)
#define ItemPointerIsMin(p) \
(GinItemPointerGetOffsetNumber(p) == (OffsetNumber)0 && \
GinItemPointerGetBlockNumber(p) == (BlockNumber)0)
#define ItemPointerSetMax(p) \
ItemPointerSet((p), InvalidBlockNumber, (OffsetNumber)0xffff)
#define ItemPointerIsMax(p) \
(GinItemPointerGetOffsetNumber(p) == (OffsetNumber)0xffff && \
GinItemPointerGetBlockNumber(p) == InvalidBlockNumber)
#define ItemPointerSetLossyPage(p, b) \
ItemPointerSet((p), (b), (OffsetNumber)0xffff)
#define ItemPointerIsLossyPage(p) \
(GinItemPointerGetOffsetNumber(p) == (OffsetNumber)0xffff && \
GinItemPointerGetBlockNumber(p) != InvalidBlockNumber)
/*
* Posting item in a non-leaf posting-tree page
*/
typedef struct
{
/* We use BlockIdData not BlockNumber to avoid padding space wastage */
BlockIdData child_blkno;
ItemPointerData key;
} PostingItem;
#define PostingItemGetBlockNumber(pointer) \
BlockIdGetBlockNumber(&(pointer)->child_blkno)
#define PostingItemSetBlockNumber(pointer, blockNumber) \
BlockIdSet(&((pointer)->child_blkno), (blockNumber))
/*
* Category codes to distinguish placeholder nulls from ordinary NULL keys.
* Note that the datatype size and the first two code values are chosen to be
* compatible with the usual usage of bool isNull flags.
*
* GIN_CAT_EMPTY_QUERY is never stored in the index; and notice that it is
* chosen to sort before not after regular key values.
*/
typedef signed char GinNullCategory;
#define GIN_CAT_NORM_KEY 0 /* normal, non-null key value */
#define GIN_CAT_NULL_KEY 1 /* null key value */
#define GIN_CAT_EMPTY_ITEM 2 /* placeholder for zero-key item */
#define GIN_CAT_NULL_ITEM 3 /* placeholder for null item */
#define GIN_CAT_EMPTY_QUERY (-1) /* placeholder for full-scan query */
/*
* Access macros for null category byte in entry tuples
*/
#define GinCategoryOffset(itup,ginstate) \
(IndexInfoFindDataOffset((itup)->t_info) + \
((ginstate)->oneCol ? 0 : sizeof(int16)))
#define GinGetNullCategory(itup,ginstate) \
(*((GinNullCategory *) ((char*)(itup) + GinCategoryOffset(itup,ginstate))))
#define GinSetNullCategory(itup,ginstate,c) \
(*((GinNullCategory *) ((char*)(itup) + GinCategoryOffset(itup,ginstate))) = (c))
/*
* Access macros for leaf-page entry tuples (see discussion in README)
*/
#define GinGetNPosting(itup) GinItemPointerGetOffsetNumber(&(itup)->t_tid)
#define GinSetNPosting(itup,n) ItemPointerSetOffsetNumber(&(itup)->t_tid,n)
#define GIN_TREE_POSTING ((OffsetNumber)0xffff)
#define GinIsPostingTree(itup) (GinGetNPosting(itup) == GIN_TREE_POSTING)
#define GinSetPostingTree(itup, blkno) ( GinSetNPosting((itup),GIN_TREE_POSTING), ItemPointerSetBlockNumber(&(itup)->t_tid, blkno) )
#define GinGetPostingTree(itup) GinItemPointerGetBlockNumber(&(itup)->t_tid)
#define GIN_ITUP_COMPRESSED (1U << 31)
#define GinGetPostingOffset(itup) (GinItemPointerGetBlockNumber(&(itup)->t_tid) & (~GIN_ITUP_COMPRESSED))
#define GinSetPostingOffset(itup,n) ItemPointerSetBlockNumber(&(itup)->t_tid,(n)|GIN_ITUP_COMPRESSED)
#define GinGetPosting(itup) ((Pointer) ((char*)(itup) + GinGetPostingOffset(itup)))
#define GinItupIsCompressed(itup) ((GinItemPointerGetBlockNumber(&(itup)->t_tid) & GIN_ITUP_COMPRESSED) != 0)
/*
* Maximum size of an item on entry tree page. Make sure that we fit at least
* three items on each page. (On regular B-tree indexes, we must fit at least
* three items: two data items and the "high key". In GIN entry tree, we don't
* currently store the high key explicitly, we just use the rightmost item on
* the page, so it would actually be enough to fit two items.)
*/
#define GinMaxItemSize \
Min(INDEX_SIZE_MASK, \
MAXALIGN_DOWN(((BLCKSZ - \
MAXALIGN(SizeOfPageHeaderData + 3 * sizeof(ItemIdData)) - \
MAXALIGN(sizeof(GinPageOpaqueData))) / 3)))
/*
* Access macros for non-leaf entry tuples
*/
#define GinGetDownlink(itup) GinItemPointerGetBlockNumber(&(itup)->t_tid)
#define GinSetDownlink(itup,blkno) ItemPointerSet(&(itup)->t_tid, blkno, InvalidOffsetNumber)
/*
* Data (posting tree) pages
*
* Posting tree pages don't store regular tuples. Non-leaf pages contain
* PostingItems, which are pairs of ItemPointers and child block numbers.
* Leaf pages contain GinPostingLists and an uncompressed array of item
* pointers.
*
* In a leaf page, the compressed posting lists are stored after the regular
* page header, one after each other. Although we don't store regular tuples,
* pd_lower is used to indicate the end of the posting lists. After that, free
* space follows. This layout is compatible with the "standard" heap and
* index page layout described in bufpage.h, so that we can e.g set buffer_std
* when writing WAL records.
*
* In the special space is the GinPageOpaque struct.
*/
#define GinDataLeafPageGetPostingList(page) \
(GinPostingList *) ((PageGetContents(page) + MAXALIGN(sizeof(ItemPointerData))))
#define GinDataLeafPageGetPostingListSize(page) \
(((PageHeader) page)->pd_lower - MAXALIGN(SizeOfPageHeaderData) - MAXALIGN(sizeof(ItemPointerData)))
#define GinDataLeafPageIsEmpty(page) \
(GinPageIsCompressed(page) ? (GinDataLeafPageGetPostingListSize(page) == 0) : (GinPageGetOpaque(page)->maxoff < FirstOffsetNumber))
#define GinDataLeafPageGetFreeSpace(page) PageGetExactFreeSpace(page)
#define GinDataPageGetRightBound(page) ((ItemPointer) PageGetContents(page))
/*
* Pointer to the data portion of a posting tree page. For internal pages,
* that's the beginning of the array of PostingItems. For compressed leaf
* pages, the first compressed posting list. For uncompressed (pre-9.4) leaf
* pages, it's the beginning of the ItemPointer array.
*/
#define GinDataPageGetData(page) \
(PageGetContents(page) + MAXALIGN(sizeof(ItemPointerData)))
/* non-leaf pages contain PostingItems */
#define GinDataPageGetPostingItem(page, i) \
((PostingItem *) (GinDataPageGetData(page) + ((i)-1) * sizeof(PostingItem)))
/*
* Note: there is no GinDataPageGetDataSize macro, because before version
* 9.4, we didn't set pd_lower on data pages. There can be pages in the index
* that were binary-upgraded from earlier versions and still have an invalid
* pd_lower, so we cannot trust it in general. Compressed posting tree leaf
* pages are new in 9.4, however, so we can trust them; see
* GinDataLeafPageGetPostingListSize.
*/
#define GinDataPageSetDataSize(page, size) \
{ \
Assert(size <= GinDataPageMaxDataSize); \
((PageHeader) page)->pd_lower = (size) + MAXALIGN(SizeOfPageHeaderData) + MAXALIGN(sizeof(ItemPointerData)); \
}
#define GinNonLeafDataPageGetFreeSpace(page) \
(GinDataPageMaxDataSize - \
GinPageGetOpaque(page)->maxoff * sizeof(PostingItem))
#define GinDataPageMaxDataSize \
(BLCKSZ - MAXALIGN(SizeOfPageHeaderData) \
- MAXALIGN(sizeof(ItemPointerData)) \
- MAXALIGN(sizeof(GinPageOpaqueData)))
/*
* List pages
*/
#define GinListPageSize \
( BLCKSZ - SizeOfPageHeaderData - MAXALIGN(sizeof(GinPageOpaqueData)) )
/*
* Storage type for GIN's reloptions
*/
@ -380,216 +83,6 @@ typedef struct GinState
} GinState;
/*
* A compressed posting list.
*
* Note: This requires 2-byte alignment.
*/
typedef struct
{
ItemPointerData first; /* first item in this posting list (unpacked) */
uint16 nbytes; /* number of bytes that follow */
unsigned char bytes[FLEXIBLE_ARRAY_MEMBER]; /* varbyte encoded items */
} GinPostingList;
#define SizeOfGinPostingList(plist) (offsetof(GinPostingList, bytes) + SHORTALIGN((plist)->nbytes) )
#define GinNextPostingListSegment(cur) ((GinPostingList *) (((char *) (cur)) + SizeOfGinPostingList((cur))))
/* XLog stuff */
#define XLOG_GIN_CREATE_INDEX 0x00
#define XLOG_GIN_CREATE_PTREE 0x10
typedef struct ginxlogCreatePostingTree
{
uint32 size;
/* A compressed posting list follows */
} ginxlogCreatePostingTree;
/*
* The format of the insertion record varies depending on the page type.
* ginxlogInsert is the common part between all variants.
*
* Backup Blk 0: target page
* Backup Blk 1: left child, if this insertion finishes an incomplete split
*/
#define XLOG_GIN_INSERT 0x20
typedef struct
{
uint16 flags; /* GIN_INSERT_ISLEAF and/or GIN_INSERT_ISDATA */
/*
* FOLLOWS:
*
* 1. if not leaf page, block numbers of the left and right child pages
* whose split this insertion finishes, as BlockIdData[2] (beware of
* adding fields in this struct that would make them not 16-bit aligned)
*
* 2. a ginxlogInsertEntry or ginxlogRecompressDataLeaf struct, depending
* on tree type.
*
* NB: the below structs are only 16-bit aligned when appended to a
* ginxlogInsert struct! Beware of adding fields to them that require
* stricter alignment.
*/
} ginxlogInsert;
typedef struct
{
OffsetNumber offset;
bool isDelete;
IndexTupleData tuple; /* variable length */
} ginxlogInsertEntry;
typedef struct
{
uint16 nactions;
/* Variable number of 'actions' follow */
} ginxlogRecompressDataLeaf;
/*
* Note: this struct is currently not used in code, and only acts as
* documentation. The WAL record format is as specified here, but the code
* uses straight access through a Pointer and memcpy to read/write these.
*/
typedef struct
{
uint8 segno; /* segment this action applies to */
char type; /* action type (see below) */
/*
* Action-specific data follows. For INSERT and REPLACE actions that is a
* GinPostingList struct. For ADDITEMS, a uint16 for the number of items
* added, followed by the items themselves as ItemPointers. DELETE actions
* have no further data.
*/
} ginxlogSegmentAction;
/* Action types */
#define GIN_SEGMENT_UNMODIFIED 0 /* no action (not used in WAL records) */
#define GIN_SEGMENT_DELETE 1 /* a whole segment is removed */
#define GIN_SEGMENT_INSERT 2 /* a whole segment is added */
#define GIN_SEGMENT_REPLACE 3 /* a segment is replaced */
#define GIN_SEGMENT_ADDITEMS 4 /* items are added to existing segment */
typedef struct
{
OffsetNumber offset;
PostingItem newitem;
} ginxlogInsertDataInternal;
/*
* Backup Blk 0: new left page (= original page, if not root split)
* Backup Blk 1: new right page
* Backup Blk 2: original page / new root page, if root split
* Backup Blk 3: left child, if this insertion completes an earlier split
*/
#define XLOG_GIN_SPLIT 0x30
typedef struct ginxlogSplit
{
RelFileNode node;
BlockNumber rrlink; /* right link, or root's blocknumber if root
* split */
BlockNumber leftChildBlkno; /* valid on a non-leaf split */
BlockNumber rightChildBlkno;
uint16 flags; /* see below */
} ginxlogSplit;
/*
* Flags used in ginxlogInsert and ginxlogSplit records
*/
#define GIN_INSERT_ISDATA 0x01 /* for both insert and split records */
#define GIN_INSERT_ISLEAF 0x02 /* ditto */
#define GIN_SPLIT_ROOT 0x04 /* only for split records */
/*
* Vacuum simply WAL-logs the whole page, when anything is modified. This
* is functionally identical to heap_newpage records, but is kept separate for
* debugging purposes. (When inspecting the WAL stream, it's easier to see
* what's going on when GIN vacuum records are marked as such, not as heap
* records.) This is currently only used for entry tree leaf pages.
*/
#define XLOG_GIN_VACUUM_PAGE 0x40
/*
* Vacuuming posting tree leaf page is WAL-logged like recompression caused
* by insertion.
*/
#define XLOG_GIN_VACUUM_DATA_LEAF_PAGE 0x90
typedef struct ginxlogVacuumDataLeafPage
{
ginxlogRecompressDataLeaf data;
} ginxlogVacuumDataLeafPage;
/*
* Backup Blk 0: deleted page
* Backup Blk 1: parent
* Backup Blk 2: left sibling
*/
#define XLOG_GIN_DELETE_PAGE 0x50
typedef struct ginxlogDeletePage
{
OffsetNumber parentOffset;
BlockNumber rightLink;
} ginxlogDeletePage;
#define XLOG_GIN_UPDATE_META_PAGE 0x60
/*
* Backup Blk 0: metapage
* Backup Blk 1: tail page
*/
typedef struct ginxlogUpdateMeta
{
RelFileNode node;
GinMetaPageData metadata;
BlockNumber prevTail;
BlockNumber newRightlink;
int32 ntuples; /* if ntuples > 0 then metadata.tail was
* updated with that many tuples; else new sub
* list was inserted */
/* array of inserted tuples follows */
} ginxlogUpdateMeta;
#define XLOG_GIN_INSERT_LISTPAGE 0x70
typedef struct ginxlogInsertListPage
{
BlockNumber rightlink;
int32 ntuples;
/* array of inserted tuples follows */
} ginxlogInsertListPage;
/*
* Backup Blk 0: metapage
* Backup Blk 1 to (ndeleted + 1): deleted pages
*/
#define XLOG_GIN_DELETE_LISTPAGE 0x80
/*
* The WAL record for deleting list pages must contain a block reference to
* all the deleted pages, so the number of pages that can be deleted in one
* record is limited by XLR_MAX_BLOCK_ID. (block_id 0 is used for the
* metapage.)
*/
#define GIN_NDELETE_AT_ONCE Min(16, XLR_MAX_BLOCK_ID - 1)
typedef struct ginxlogDeleteListPages
{
GinMetaPageData metadata;
int32 ndeleted;
} ginxlogDeleteListPages;
/* ginutil.c */
extern bytea *ginoptions(Datum reloptions, bool validate);
extern void initGinState(GinState *state, Relation index);