mirror of
https://github.com/postgres/postgres.git
synced 2025-04-21 12:05:57 +03:00
While we don't need multiple iterators at the moment, the interface is nicer and less dangerous this way. Aleksander Alekseev, with some changes by me.
985 lines
33 KiB
C
985 lines
33 KiB
C
/*--------------------------------------------------------------------------
|
|
* gin_private.h
|
|
* header file for postgres inverted index access method implementation.
|
|
*
|
|
* Copyright (c) 2006-2016, PostgreSQL Global Development Group
|
|
*
|
|
* src/include/access/gin_private.h
|
|
*--------------------------------------------------------------------------
|
|
*/
|
|
#ifndef GIN_PRIVATE_H
|
|
#define GIN_PRIVATE_H
|
|
|
|
#include "access/amapi.h"
|
|
#include "access/gin.h"
|
|
#include "access/itup.h"
|
|
#include "fmgr.h"
|
|
#include "storage/bufmgr.h"
|
|
#include "lib/rbtree.h"
|
|
|
|
|
|
/*
|
|
* Page opaque data in an inverted index page.
|
|
*
|
|
* Note: GIN does not include a page ID word as do the other index types.
|
|
* This is OK because the opaque data is only 8 bytes and so can be reliably
|
|
* distinguished by size. Revisit this if the size ever increases.
|
|
* Further note: as of 9.2, SP-GiST also uses 8-byte special space, as does
|
|
* BRIN as of 9.5. This is still OK, as long as GIN isn't using all of the
|
|
* high-order bits in its flags word, because that way the flags word cannot
|
|
* match the page IDs used by SP-GiST and BRIN.
|
|
*/
|
|
typedef struct GinPageOpaqueData
|
|
{
|
|
BlockNumber rightlink; /* next page if any */
|
|
OffsetNumber maxoff; /* number of PostingItems on GIN_DATA &
|
|
* ~GIN_LEAF page. On GIN_LIST page, number of
|
|
* heap tuples. */
|
|
uint16 flags; /* see bit definitions below */
|
|
} GinPageOpaqueData;
|
|
|
|
typedef GinPageOpaqueData *GinPageOpaque;
|
|
|
|
#define GIN_DATA (1 << 0)
|
|
#define GIN_LEAF (1 << 1)
|
|
#define GIN_DELETED (1 << 2)
|
|
#define GIN_META (1 << 3)
|
|
#define GIN_LIST (1 << 4)
|
|
#define GIN_LIST_FULLROW (1 << 5) /* makes sense only on GIN_LIST page */
|
|
#define GIN_INCOMPLETE_SPLIT (1 << 6) /* page was split, but parent not
|
|
* updated */
|
|
#define GIN_COMPRESSED (1 << 7)
|
|
|
|
/* Page numbers of fixed-location pages */
|
|
#define GIN_METAPAGE_BLKNO (0)
|
|
#define GIN_ROOT_BLKNO (1)
|
|
|
|
typedef struct GinMetaPageData
|
|
{
|
|
/*
|
|
* Pointers to head and tail of pending list, which consists of GIN_LIST
|
|
* pages. These store fast-inserted entries that haven't yet been moved
|
|
* into the regular GIN structure.
|
|
*/
|
|
BlockNumber head;
|
|
BlockNumber tail;
|
|
|
|
/*
|
|
* Free space in bytes in the pending list's tail page.
|
|
*/
|
|
uint32 tailFreeSize;
|
|
|
|
/*
|
|
* We store both number of pages and number of heap tuples that are in the
|
|
* pending list.
|
|
*/
|
|
BlockNumber nPendingPages;
|
|
int64 nPendingHeapTuples;
|
|
|
|
/*
|
|
* Statistics for planner use (accurate as of last VACUUM)
|
|
*/
|
|
BlockNumber nTotalPages;
|
|
BlockNumber nEntryPages;
|
|
BlockNumber nDataPages;
|
|
int64 nEntries;
|
|
|
|
/*
|
|
* GIN version number (ideally this should have been at the front, but too
|
|
* late now. Don't move it!)
|
|
*
|
|
* Currently 2 (for indexes initialized in 9.4 or later)
|
|
*
|
|
* Version 1 (indexes initialized in version 9.1, 9.2 or 9.3), is
|
|
* compatible, but may contain uncompressed posting tree (leaf) pages and
|
|
* posting lists. They will be converted to compressed format when
|
|
* modified.
|
|
*
|
|
* Version 0 (indexes initialized in 9.0 or before) is compatible but may
|
|
* be missing null entries, including both null keys and placeholders.
|
|
* Reject full-index-scan attempts on such indexes.
|
|
*/
|
|
int32 ginVersion;
|
|
} GinMetaPageData;
|
|
|
|
#define GIN_CURRENT_VERSION 2
|
|
|
|
#define GinPageGetMeta(p) \
|
|
((GinMetaPageData *) PageGetContents(p))
|
|
|
|
/*
|
|
* Macros for accessing a GIN index page's opaque data
|
|
*/
|
|
#define GinPageGetOpaque(page) ( (GinPageOpaque) PageGetSpecialPointer(page) )
|
|
|
|
#define GinPageIsLeaf(page) ( (GinPageGetOpaque(page)->flags & GIN_LEAF) != 0 )
|
|
#define GinPageSetLeaf(page) ( GinPageGetOpaque(page)->flags |= GIN_LEAF )
|
|
#define GinPageSetNonLeaf(page) ( GinPageGetOpaque(page)->flags &= ~GIN_LEAF )
|
|
#define GinPageIsData(page) ( (GinPageGetOpaque(page)->flags & GIN_DATA) != 0 )
|
|
#define GinPageSetData(page) ( GinPageGetOpaque(page)->flags |= GIN_DATA )
|
|
#define GinPageIsList(page) ( (GinPageGetOpaque(page)->flags & GIN_LIST) != 0 )
|
|
#define GinPageSetList(page) ( GinPageGetOpaque(page)->flags |= GIN_LIST )
|
|
#define GinPageHasFullRow(page) ( (GinPageGetOpaque(page)->flags & GIN_LIST_FULLROW) != 0 )
|
|
#define GinPageSetFullRow(page) ( GinPageGetOpaque(page)->flags |= GIN_LIST_FULLROW )
|
|
#define GinPageIsCompressed(page) ( (GinPageGetOpaque(page)->flags & GIN_COMPRESSED) != 0 )
|
|
#define GinPageSetCompressed(page) ( GinPageGetOpaque(page)->flags |= GIN_COMPRESSED )
|
|
|
|
#define GinPageIsDeleted(page) ( (GinPageGetOpaque(page)->flags & GIN_DELETED) != 0 )
|
|
#define GinPageSetDeleted(page) ( GinPageGetOpaque(page)->flags |= GIN_DELETED)
|
|
#define GinPageSetNonDeleted(page) ( GinPageGetOpaque(page)->flags &= ~GIN_DELETED)
|
|
#define GinPageIsIncompleteSplit(page) ( (GinPageGetOpaque(page)->flags & GIN_INCOMPLETE_SPLIT) != 0 )
|
|
|
|
#define GinPageRightMost(page) ( GinPageGetOpaque(page)->rightlink == InvalidBlockNumber)
|
|
|
|
/*
|
|
* We use our own ItemPointerGet(BlockNumber|OffsetNumber)
|
|
* to avoid Asserts, since sometimes the ip_posid isn't "valid"
|
|
*/
|
|
#define GinItemPointerGetBlockNumber(pointer) \
|
|
BlockIdGetBlockNumber(&(pointer)->ip_blkid)
|
|
|
|
#define GinItemPointerGetOffsetNumber(pointer) \
|
|
((pointer)->ip_posid)
|
|
|
|
/*
|
|
* Special-case item pointer values needed by the GIN search logic.
|
|
* MIN: sorts less than any valid item pointer
|
|
* MAX: sorts greater than any valid item pointer
|
|
* LOSSY PAGE: indicates a whole heap page, sorts after normal item
|
|
* pointers for that page
|
|
* Note that these are all distinguishable from an "invalid" item pointer
|
|
* (which is InvalidBlockNumber/0) as well as from all normal item
|
|
* pointers (which have item numbers in the range 1..MaxHeapTuplesPerPage).
|
|
*/
|
|
#define ItemPointerSetMin(p) \
|
|
ItemPointerSet((p), (BlockNumber)0, (OffsetNumber)0)
|
|
#define ItemPointerIsMin(p) \
|
|
(GinItemPointerGetOffsetNumber(p) == (OffsetNumber)0 && \
|
|
GinItemPointerGetBlockNumber(p) == (BlockNumber)0)
|
|
#define ItemPointerSetMax(p) \
|
|
ItemPointerSet((p), InvalidBlockNumber, (OffsetNumber)0xffff)
|
|
#define ItemPointerIsMax(p) \
|
|
(GinItemPointerGetOffsetNumber(p) == (OffsetNumber)0xffff && \
|
|
GinItemPointerGetBlockNumber(p) == InvalidBlockNumber)
|
|
#define ItemPointerSetLossyPage(p, b) \
|
|
ItemPointerSet((p), (b), (OffsetNumber)0xffff)
|
|
#define ItemPointerIsLossyPage(p) \
|
|
(GinItemPointerGetOffsetNumber(p) == (OffsetNumber)0xffff && \
|
|
GinItemPointerGetBlockNumber(p) != InvalidBlockNumber)
|
|
|
|
/*
|
|
* Posting item in a non-leaf posting-tree page
|
|
*/
|
|
typedef struct
|
|
{
|
|
/* We use BlockIdData not BlockNumber to avoid padding space wastage */
|
|
BlockIdData child_blkno;
|
|
ItemPointerData key;
|
|
} PostingItem;
|
|
|
|
#define PostingItemGetBlockNumber(pointer) \
|
|
BlockIdGetBlockNumber(&(pointer)->child_blkno)
|
|
|
|
#define PostingItemSetBlockNumber(pointer, blockNumber) \
|
|
BlockIdSet(&((pointer)->child_blkno), (blockNumber))
|
|
|
|
/*
|
|
* Category codes to distinguish placeholder nulls from ordinary NULL keys.
|
|
* Note that the datatype size and the first two code values are chosen to be
|
|
* compatible with the usual usage of bool isNull flags.
|
|
*
|
|
* GIN_CAT_EMPTY_QUERY is never stored in the index; and notice that it is
|
|
* chosen to sort before not after regular key values.
|
|
*/
|
|
typedef signed char GinNullCategory;
|
|
|
|
#define GIN_CAT_NORM_KEY 0 /* normal, non-null key value */
|
|
#define GIN_CAT_NULL_KEY 1 /* null key value */
|
|
#define GIN_CAT_EMPTY_ITEM 2 /* placeholder for zero-key item */
|
|
#define GIN_CAT_NULL_ITEM 3 /* placeholder for null item */
|
|
#define GIN_CAT_EMPTY_QUERY (-1) /* placeholder for full-scan query */
|
|
|
|
/*
|
|
* Access macros for null category byte in entry tuples
|
|
*/
|
|
#define GinCategoryOffset(itup,ginstate) \
|
|
(IndexInfoFindDataOffset((itup)->t_info) + \
|
|
((ginstate)->oneCol ? 0 : sizeof(int16)))
|
|
#define GinGetNullCategory(itup,ginstate) \
|
|
(*((GinNullCategory *) ((char*)(itup) + GinCategoryOffset(itup,ginstate))))
|
|
#define GinSetNullCategory(itup,ginstate,c) \
|
|
(*((GinNullCategory *) ((char*)(itup) + GinCategoryOffset(itup,ginstate))) = (c))
|
|
|
|
/*
|
|
* Access macros for leaf-page entry tuples (see discussion in README)
|
|
*/
|
|
#define GinGetNPosting(itup) GinItemPointerGetOffsetNumber(&(itup)->t_tid)
|
|
#define GinSetNPosting(itup,n) ItemPointerSetOffsetNumber(&(itup)->t_tid,n)
|
|
#define GIN_TREE_POSTING ((OffsetNumber)0xffff)
|
|
#define GinIsPostingTree(itup) (GinGetNPosting(itup) == GIN_TREE_POSTING)
|
|
#define GinSetPostingTree(itup, blkno) ( GinSetNPosting((itup),GIN_TREE_POSTING), ItemPointerSetBlockNumber(&(itup)->t_tid, blkno) )
|
|
#define GinGetPostingTree(itup) GinItemPointerGetBlockNumber(&(itup)->t_tid)
|
|
|
|
#define GIN_ITUP_COMPRESSED (1U << 31)
|
|
#define GinGetPostingOffset(itup) (GinItemPointerGetBlockNumber(&(itup)->t_tid) & (~GIN_ITUP_COMPRESSED))
|
|
#define GinSetPostingOffset(itup,n) ItemPointerSetBlockNumber(&(itup)->t_tid,(n)|GIN_ITUP_COMPRESSED)
|
|
#define GinGetPosting(itup) ((Pointer) ((char*)(itup) + GinGetPostingOffset(itup)))
|
|
#define GinItupIsCompressed(itup) ((GinItemPointerGetBlockNumber(&(itup)->t_tid) & GIN_ITUP_COMPRESSED) != 0)
|
|
|
|
/*
|
|
* Maximum size of an item on entry tree page. Make sure that we fit at least
|
|
* three items on each page. (On regular B-tree indexes, we must fit at least
|
|
* three items: two data items and the "high key". In GIN entry tree, we don't
|
|
* currently store the high key explicitly, we just use the rightmost item on
|
|
* the page, so it would actually be enough to fit two items.)
|
|
*/
|
|
#define GinMaxItemSize \
|
|
Min(INDEX_SIZE_MASK, \
|
|
MAXALIGN_DOWN(((BLCKSZ - \
|
|
MAXALIGN(SizeOfPageHeaderData + 3 * sizeof(ItemIdData)) - \
|
|
MAXALIGN(sizeof(GinPageOpaqueData))) / 3)))
|
|
|
|
/*
|
|
* Access macros for non-leaf entry tuples
|
|
*/
|
|
#define GinGetDownlink(itup) GinItemPointerGetBlockNumber(&(itup)->t_tid)
|
|
#define GinSetDownlink(itup,blkno) ItemPointerSet(&(itup)->t_tid, blkno, InvalidOffsetNumber)
|
|
|
|
|
|
/*
|
|
* Data (posting tree) pages
|
|
*
|
|
* Posting tree pages don't store regular tuples. Non-leaf pages contain
|
|
* PostingItems, which are pairs of ItemPointers and child block numbers.
|
|
* Leaf pages contain GinPostingLists and an uncompressed array of item
|
|
* pointers.
|
|
*
|
|
* In a leaf page, the compressed posting lists are stored after the regular
|
|
* page header, one after each other. Although we don't store regular tuples,
|
|
* pd_lower is used to indicate the end of the posting lists. After that, free
|
|
* space follows. This layout is compatible with the "standard" heap and
|
|
* index page layout described in bufpage.h, so that we can e.g set buffer_std
|
|
* when writing WAL records.
|
|
*
|
|
* In the special space is the GinPageOpaque struct.
|
|
*/
|
|
#define GinDataLeafPageGetPostingList(page) \
|
|
(GinPostingList *) ((PageGetContents(page) + MAXALIGN(sizeof(ItemPointerData))))
|
|
#define GinDataLeafPageGetPostingListSize(page) \
|
|
(((PageHeader) page)->pd_lower - MAXALIGN(SizeOfPageHeaderData) - MAXALIGN(sizeof(ItemPointerData)))
|
|
|
|
#define GinDataLeafPageIsEmpty(page) \
|
|
(GinPageIsCompressed(page) ? (GinDataLeafPageGetPostingListSize(page) == 0) : (GinPageGetOpaque(page)->maxoff < FirstOffsetNumber))
|
|
|
|
#define GinDataLeafPageGetFreeSpace(page) PageGetExactFreeSpace(page)
|
|
|
|
#define GinDataPageGetRightBound(page) ((ItemPointer) PageGetContents(page))
|
|
/*
|
|
* Pointer to the data portion of a posting tree page. For internal pages,
|
|
* that's the beginning of the array of PostingItems. For compressed leaf
|
|
* pages, the first compressed posting list. For uncompressed (pre-9.4) leaf
|
|
* pages, it's the beginning of the ItemPointer array.
|
|
*/
|
|
#define GinDataPageGetData(page) \
|
|
(PageGetContents(page) + MAXALIGN(sizeof(ItemPointerData)))
|
|
/* non-leaf pages contain PostingItems */
|
|
#define GinDataPageGetPostingItem(page, i) \
|
|
((PostingItem *) (GinDataPageGetData(page) + ((i)-1) * sizeof(PostingItem)))
|
|
|
|
/*
|
|
* Note: there is no GinDataPageGetDataSize macro, because before version
|
|
* 9.4, we didn't set pd_lower on data pages. There can be pages in the index
|
|
* that were binary-upgraded from earlier versions and still have an invalid
|
|
* pd_lower, so we cannot trust it in general. Compressed posting tree leaf
|
|
* pages are new in 9.4, however, so we can trust them; see
|
|
* GinDataLeafPageGetPostingListSize.
|
|
*/
|
|
#define GinDataPageSetDataSize(page, size) \
|
|
{ \
|
|
Assert(size <= GinDataPageMaxDataSize); \
|
|
((PageHeader) page)->pd_lower = (size) + MAXALIGN(SizeOfPageHeaderData) + MAXALIGN(sizeof(ItemPointerData)); \
|
|
}
|
|
|
|
#define GinNonLeafDataPageGetFreeSpace(page) \
|
|
(GinDataPageMaxDataSize - \
|
|
GinPageGetOpaque(page)->maxoff * sizeof(PostingItem))
|
|
|
|
#define GinDataPageMaxDataSize \
|
|
(BLCKSZ - MAXALIGN(SizeOfPageHeaderData) \
|
|
- MAXALIGN(sizeof(ItemPointerData)) \
|
|
- MAXALIGN(sizeof(GinPageOpaqueData)))
|
|
|
|
/*
|
|
* List pages
|
|
*/
|
|
#define GinListPageSize \
|
|
( BLCKSZ - SizeOfPageHeaderData - MAXALIGN(sizeof(GinPageOpaqueData)) )
|
|
|
|
/*
|
|
* Storage type for GIN's reloptions
|
|
*/
|
|
typedef struct GinOptions
|
|
{
|
|
int32 vl_len_; /* varlena header (do not touch directly!) */
|
|
bool useFastUpdate; /* use fast updates? */
|
|
int pendingListCleanupSize; /* maximum size of pending list */
|
|
} GinOptions;
|
|
|
|
#define GIN_DEFAULT_USE_FASTUPDATE true
|
|
#define GinGetUseFastUpdate(relation) \
|
|
((relation)->rd_options ? \
|
|
((GinOptions *) (relation)->rd_options)->useFastUpdate : GIN_DEFAULT_USE_FASTUPDATE)
|
|
#define GinGetPendingListCleanupSize(relation) \
|
|
((relation)->rd_options && \
|
|
((GinOptions *) (relation)->rd_options)->pendingListCleanupSize != -1 ? \
|
|
((GinOptions *) (relation)->rd_options)->pendingListCleanupSize : \
|
|
gin_pending_list_limit)
|
|
|
|
|
|
/* Macros for buffer lock/unlock operations */
|
|
#define GIN_UNLOCK BUFFER_LOCK_UNLOCK
|
|
#define GIN_SHARE BUFFER_LOCK_SHARE
|
|
#define GIN_EXCLUSIVE BUFFER_LOCK_EXCLUSIVE
|
|
|
|
|
|
/*
|
|
* GinState: working data structure describing the index being worked on
|
|
*/
|
|
typedef struct GinState
|
|
{
|
|
Relation index;
|
|
bool oneCol; /* true if single-column index */
|
|
|
|
/*
|
|
* origTupDesc is the nominal tuple descriptor of the index, ie, the i'th
|
|
* attribute shows the key type (not the input data type!) of the i'th
|
|
* index column. In a single-column index this describes the actual leaf
|
|
* index tuples. In a multi-column index, the actual leaf tuples contain
|
|
* a smallint column number followed by a key datum of the appropriate
|
|
* type for that column. We set up tupdesc[i] to describe the actual
|
|
* rowtype of the index tuples for the i'th column, ie, (int2, keytype).
|
|
* Note that in any case, leaf tuples contain more data than is known to
|
|
* the TupleDesc; see access/gin/README for details.
|
|
*/
|
|
TupleDesc origTupdesc;
|
|
TupleDesc tupdesc[INDEX_MAX_KEYS];
|
|
|
|
/*
|
|
* Per-index-column opclass support functions
|
|
*/
|
|
FmgrInfo compareFn[INDEX_MAX_KEYS];
|
|
FmgrInfo extractValueFn[INDEX_MAX_KEYS];
|
|
FmgrInfo extractQueryFn[INDEX_MAX_KEYS];
|
|
FmgrInfo consistentFn[INDEX_MAX_KEYS];
|
|
FmgrInfo triConsistentFn[INDEX_MAX_KEYS];
|
|
FmgrInfo comparePartialFn[INDEX_MAX_KEYS]; /* optional method */
|
|
/* canPartialMatch[i] is true if comparePartialFn[i] is valid */
|
|
bool canPartialMatch[INDEX_MAX_KEYS];
|
|
/* Collations to pass to the support functions */
|
|
Oid supportCollation[INDEX_MAX_KEYS];
|
|
} GinState;
|
|
|
|
|
|
/*
|
|
* A compressed posting list.
|
|
*
|
|
* Note: This requires 2-byte alignment.
|
|
*/
|
|
typedef struct
|
|
{
|
|
ItemPointerData first; /* first item in this posting list (unpacked) */
|
|
uint16 nbytes; /* number of bytes that follow */
|
|
unsigned char bytes[FLEXIBLE_ARRAY_MEMBER]; /* varbyte encoded items */
|
|
} GinPostingList;
|
|
|
|
#define SizeOfGinPostingList(plist) (offsetof(GinPostingList, bytes) + SHORTALIGN((plist)->nbytes) )
|
|
#define GinNextPostingListSegment(cur) ((GinPostingList *) (((char *) (cur)) + SizeOfGinPostingList((cur))))
|
|
|
|
|
|
/* XLog stuff */
|
|
|
|
#define XLOG_GIN_CREATE_INDEX 0x00
|
|
|
|
#define XLOG_GIN_CREATE_PTREE 0x10
|
|
|
|
typedef struct ginxlogCreatePostingTree
|
|
{
|
|
uint32 size;
|
|
/* A compressed posting list follows */
|
|
} ginxlogCreatePostingTree;
|
|
|
|
/*
|
|
* The format of the insertion record varies depending on the page type.
|
|
* ginxlogInsert is the common part between all variants.
|
|
*
|
|
* Backup Blk 0: target page
|
|
* Backup Blk 1: left child, if this insertion finishes an incomplete split
|
|
*/
|
|
|
|
#define XLOG_GIN_INSERT 0x20
|
|
|
|
typedef struct
|
|
{
|
|
uint16 flags; /* GIN_INSERT_ISLEAF and/or GIN_INSERT_ISDATA */
|
|
|
|
/*
|
|
* FOLLOWS:
|
|
*
|
|
* 1. if not leaf page, block numbers of the left and right child pages
|
|
* whose split this insertion finishes, as BlockIdData[2] (beware of
|
|
* adding fields in this struct that would make them not 16-bit aligned)
|
|
*
|
|
* 2. a ginxlogInsertEntry or ginxlogRecompressDataLeaf struct, depending
|
|
* on tree type.
|
|
*
|
|
* NB: the below structs are only 16-bit aligned when appended to a
|
|
* ginxlogInsert struct! Beware of adding fields to them that require
|
|
* stricter alignment.
|
|
*/
|
|
} ginxlogInsert;
|
|
|
|
typedef struct
|
|
{
|
|
OffsetNumber offset;
|
|
bool isDelete;
|
|
IndexTupleData tuple; /* variable length */
|
|
} ginxlogInsertEntry;
|
|
|
|
|
|
typedef struct
|
|
{
|
|
uint16 nactions;
|
|
|
|
/* Variable number of 'actions' follow */
|
|
} ginxlogRecompressDataLeaf;
|
|
|
|
/*
|
|
* Note: this struct is currently not used in code, and only acts as
|
|
* documentation. The WAL record format is as specified here, but the code
|
|
* uses straight access through a Pointer and memcpy to read/write these.
|
|
*/
|
|
typedef struct
|
|
{
|
|
uint8 segno; /* segment this action applies to */
|
|
char type; /* action type (see below) */
|
|
|
|
/*
|
|
* Action-specific data follows. For INSERT and REPLACE actions that is a
|
|
* GinPostingList struct. For ADDITEMS, a uint16 for the number of items
|
|
* added, followed by the items themselves as ItemPointers. DELETE actions
|
|
* have no further data.
|
|
*/
|
|
} ginxlogSegmentAction;
|
|
|
|
/* Action types */
|
|
#define GIN_SEGMENT_UNMODIFIED 0 /* no action (not used in WAL records) */
|
|
#define GIN_SEGMENT_DELETE 1 /* a whole segment is removed */
|
|
#define GIN_SEGMENT_INSERT 2 /* a whole segment is added */
|
|
#define GIN_SEGMENT_REPLACE 3 /* a segment is replaced */
|
|
#define GIN_SEGMENT_ADDITEMS 4 /* items are added to existing segment */
|
|
|
|
typedef struct
|
|
{
|
|
OffsetNumber offset;
|
|
PostingItem newitem;
|
|
} ginxlogInsertDataInternal;
|
|
|
|
/*
|
|
* Backup Blk 0: new left page (= original page, if not root split)
|
|
* Backup Blk 1: new right page
|
|
* Backup Blk 2: original page / new root page, if root split
|
|
* Backup Blk 3: left child, if this insertion completes an earlier split
|
|
*/
|
|
#define XLOG_GIN_SPLIT 0x30
|
|
|
|
typedef struct ginxlogSplit
|
|
{
|
|
RelFileNode node;
|
|
BlockNumber rrlink; /* right link, or root's blocknumber if root
|
|
* split */
|
|
BlockNumber leftChildBlkno; /* valid on a non-leaf split */
|
|
BlockNumber rightChildBlkno;
|
|
uint16 flags; /* see below */
|
|
} ginxlogSplit;
|
|
|
|
/*
|
|
* Flags used in ginxlogInsert and ginxlogSplit records
|
|
*/
|
|
#define GIN_INSERT_ISDATA 0x01 /* for both insert and split records */
|
|
#define GIN_INSERT_ISLEAF 0x02 /* ditto */
|
|
#define GIN_SPLIT_ROOT 0x04 /* only for split records */
|
|
|
|
/*
|
|
* Vacuum simply WAL-logs the whole page, when anything is modified. This
|
|
* is functionally identical to heap_newpage records, but is kept separate for
|
|
* debugging purposes. (When inspecting the WAL stream, it's easier to see
|
|
* what's going on when GIN vacuum records are marked as such, not as heap
|
|
* records.) This is currently only used for entry tree leaf pages.
|
|
*/
|
|
#define XLOG_GIN_VACUUM_PAGE 0x40
|
|
|
|
/*
|
|
* Vacuuming posting tree leaf page is WAL-logged like recompression caused
|
|
* by insertion.
|
|
*/
|
|
#define XLOG_GIN_VACUUM_DATA_LEAF_PAGE 0x90
|
|
|
|
typedef struct ginxlogVacuumDataLeafPage
|
|
{
|
|
ginxlogRecompressDataLeaf data;
|
|
} ginxlogVacuumDataLeafPage;
|
|
|
|
/*
|
|
* Backup Blk 0: deleted page
|
|
* Backup Blk 1: parent
|
|
* Backup Blk 2: left sibling
|
|
*/
|
|
#define XLOG_GIN_DELETE_PAGE 0x50
|
|
|
|
typedef struct ginxlogDeletePage
|
|
{
|
|
OffsetNumber parentOffset;
|
|
BlockNumber rightLink;
|
|
} ginxlogDeletePage;
|
|
|
|
#define XLOG_GIN_UPDATE_META_PAGE 0x60
|
|
|
|
/*
|
|
* Backup Blk 0: metapage
|
|
* Backup Blk 1: tail page
|
|
*/
|
|
typedef struct ginxlogUpdateMeta
|
|
{
|
|
RelFileNode node;
|
|
GinMetaPageData metadata;
|
|
BlockNumber prevTail;
|
|
BlockNumber newRightlink;
|
|
int32 ntuples; /* if ntuples > 0 then metadata.tail was
|
|
* updated with that many tuples; else new sub
|
|
* list was inserted */
|
|
/* array of inserted tuples follows */
|
|
} ginxlogUpdateMeta;
|
|
|
|
#define XLOG_GIN_INSERT_LISTPAGE 0x70
|
|
|
|
typedef struct ginxlogInsertListPage
|
|
{
|
|
BlockNumber rightlink;
|
|
int32 ntuples;
|
|
/* array of inserted tuples follows */
|
|
} ginxlogInsertListPage;
|
|
|
|
/*
|
|
* Backup Blk 0: metapage
|
|
* Backup Blk 1 to (ndeleted + 1): deleted pages
|
|
*/
|
|
|
|
#define XLOG_GIN_DELETE_LISTPAGE 0x80
|
|
|
|
/*
|
|
* The WAL record for deleting list pages must contain a block reference to
|
|
* all the deleted pages, so the number of pages that can be deleted in one
|
|
* record is limited by XLR_MAX_BLOCK_ID. (block_id 0 is used for the
|
|
* metapage.)
|
|
*/
|
|
#define GIN_NDELETE_AT_ONCE Min(16, XLR_MAX_BLOCK_ID - 1)
|
|
typedef struct ginxlogDeleteListPages
|
|
{
|
|
GinMetaPageData metadata;
|
|
int32 ndeleted;
|
|
} ginxlogDeleteListPages;
|
|
|
|
|
|
/* ginutil.c */
|
|
extern Datum ginhandler(PG_FUNCTION_ARGS);
|
|
extern bytea *ginoptions(Datum reloptions, bool validate);
|
|
extern void initGinState(GinState *state, Relation index);
|
|
extern Buffer GinNewBuffer(Relation index);
|
|
extern void GinInitBuffer(Buffer b, uint32 f);
|
|
extern void GinInitPage(Page page, uint32 f, Size pageSize);
|
|
extern void GinInitMetabuffer(Buffer b);
|
|
extern int ginCompareEntries(GinState *ginstate, OffsetNumber attnum,
|
|
Datum a, GinNullCategory categorya,
|
|
Datum b, GinNullCategory categoryb);
|
|
extern int ginCompareAttEntries(GinState *ginstate,
|
|
OffsetNumber attnuma, Datum a, GinNullCategory categorya,
|
|
OffsetNumber attnumb, Datum b, GinNullCategory categoryb);
|
|
extern Datum *ginExtractEntries(GinState *ginstate, OffsetNumber attnum,
|
|
Datum value, bool isNull,
|
|
int32 *nentries, GinNullCategory **categories);
|
|
|
|
extern OffsetNumber gintuple_get_attrnum(GinState *ginstate, IndexTuple tuple);
|
|
extern Datum gintuple_get_key(GinState *ginstate, IndexTuple tuple,
|
|
GinNullCategory *category);
|
|
|
|
/* gininsert.c */
|
|
extern IndexBuildResult *ginbuild(Relation heap, Relation index,
|
|
struct IndexInfo *indexInfo);
|
|
extern void ginbuildempty(Relation index);
|
|
extern bool gininsert(Relation index, Datum *values, bool *isnull,
|
|
ItemPointer ht_ctid, Relation heapRel,
|
|
IndexUniqueCheck checkUnique);
|
|
extern void ginEntryInsert(GinState *ginstate,
|
|
OffsetNumber attnum, Datum key, GinNullCategory category,
|
|
ItemPointerData *items, uint32 nitem,
|
|
GinStatsData *buildStats);
|
|
|
|
/* ginbtree.c */
|
|
|
|
typedef struct GinBtreeStack
|
|
{
|
|
BlockNumber blkno;
|
|
Buffer buffer;
|
|
OffsetNumber off;
|
|
ItemPointerData iptr;
|
|
/* predictNumber contains predicted number of pages on current level */
|
|
uint32 predictNumber;
|
|
struct GinBtreeStack *parent;
|
|
} GinBtreeStack;
|
|
|
|
typedef struct GinBtreeData *GinBtree;
|
|
|
|
/* Return codes for GinBtreeData.beginPlaceToPage method */
|
|
typedef enum
|
|
{
|
|
GPTP_NO_WORK,
|
|
GPTP_INSERT,
|
|
GPTP_SPLIT
|
|
} GinPlaceToPageRC;
|
|
|
|
typedef struct GinBtreeData
|
|
{
|
|
/* search methods */
|
|
BlockNumber (*findChildPage) (GinBtree, GinBtreeStack *);
|
|
BlockNumber (*getLeftMostChild) (GinBtree, Page);
|
|
bool (*isMoveRight) (GinBtree, Page);
|
|
bool (*findItem) (GinBtree, GinBtreeStack *);
|
|
|
|
/* insert methods */
|
|
OffsetNumber (*findChildPtr) (GinBtree, Page, BlockNumber, OffsetNumber);
|
|
GinPlaceToPageRC (*beginPlaceToPage) (GinBtree, Buffer, GinBtreeStack *, void *, BlockNumber, void **, Page *, Page *);
|
|
void (*execPlaceToPage) (GinBtree, Buffer, GinBtreeStack *, void *, BlockNumber, void *);
|
|
void *(*prepareDownlink) (GinBtree, Buffer);
|
|
void (*fillRoot) (GinBtree, Page, BlockNumber, Page, BlockNumber, Page);
|
|
|
|
bool isData;
|
|
|
|
Relation index;
|
|
BlockNumber rootBlkno;
|
|
GinState *ginstate; /* not valid in a data scan */
|
|
bool fullScan;
|
|
bool isBuild;
|
|
|
|
/* Search key for Entry tree */
|
|
OffsetNumber entryAttnum;
|
|
Datum entryKey;
|
|
GinNullCategory entryCategory;
|
|
|
|
/* Search key for data tree (posting tree) */
|
|
ItemPointerData itemptr;
|
|
} GinBtreeData;
|
|
|
|
/* This represents a tuple to be inserted to entry tree. */
|
|
typedef struct
|
|
{
|
|
IndexTuple entry; /* tuple to insert */
|
|
bool isDelete; /* delete old tuple at same offset? */
|
|
} GinBtreeEntryInsertData;
|
|
|
|
/*
|
|
* This represents an itempointer, or many itempointers, to be inserted to
|
|
* a data (posting tree) leaf page
|
|
*/
|
|
typedef struct
|
|
{
|
|
ItemPointerData *items;
|
|
uint32 nitem;
|
|
uint32 curitem;
|
|
} GinBtreeDataLeafInsertData;
|
|
|
|
/*
|
|
* For internal data (posting tree) pages, the insertion payload is a
|
|
* PostingItem
|
|
*/
|
|
|
|
extern GinBtreeStack *ginFindLeafPage(GinBtree btree, bool searchMode, Snapshot snapshot);
|
|
extern Buffer ginStepRight(Buffer buffer, Relation index, int lockmode);
|
|
extern void freeGinBtreeStack(GinBtreeStack *stack);
|
|
extern void ginInsertValue(GinBtree btree, GinBtreeStack *stack,
|
|
void *insertdata, GinStatsData *buildStats);
|
|
|
|
/* ginentrypage.c */
|
|
extern IndexTuple GinFormTuple(GinState *ginstate,
|
|
OffsetNumber attnum, Datum key, GinNullCategory category,
|
|
Pointer data, Size dataSize, int nipd, bool errorTooBig);
|
|
extern void ginPrepareEntryScan(GinBtree btree, OffsetNumber attnum,
|
|
Datum key, GinNullCategory category,
|
|
GinState *ginstate);
|
|
extern void ginEntryFillRoot(GinBtree btree, Page root, BlockNumber lblkno, Page lpage, BlockNumber rblkno, Page rpage);
|
|
extern ItemPointer ginReadTuple(GinState *ginstate, OffsetNumber attnum,
|
|
IndexTuple itup, int *nitems);
|
|
|
|
/* gindatapage.c */
|
|
extern ItemPointer GinDataLeafPageGetItems(Page page, int *nitems, ItemPointerData advancePast);
|
|
extern int GinDataLeafPageGetItemsToTbm(Page page, TIDBitmap *tbm);
|
|
extern BlockNumber createPostingTree(Relation index,
|
|
ItemPointerData *items, uint32 nitems,
|
|
GinStatsData *buildStats);
|
|
extern void GinDataPageAddPostingItem(Page page, PostingItem *data, OffsetNumber offset);
|
|
extern void GinPageDeletePostingItem(Page page, OffsetNumber offset);
|
|
extern void ginInsertItemPointers(Relation index, BlockNumber rootBlkno,
|
|
ItemPointerData *items, uint32 nitem,
|
|
GinStatsData *buildStats);
|
|
extern GinBtreeStack *ginScanBeginPostingTree(GinBtree btree, Relation index, BlockNumber rootBlkno, Snapshot snapshot);
|
|
extern void ginDataFillRoot(GinBtree btree, Page root, BlockNumber lblkno, Page lpage, BlockNumber rblkno, Page rpage);
|
|
extern void ginPrepareDataScan(GinBtree btree, Relation index, BlockNumber rootBlkno);
|
|
|
|
/*
|
|
* This is declared in ginvacuum.c, but is passed between ginVacuumItemPointers
|
|
* and ginVacuumPostingTreeLeaf and as an opaque struct, so we need a forward
|
|
* declaration for it.
|
|
*/
|
|
typedef struct GinVacuumState GinVacuumState;
|
|
|
|
extern void ginVacuumPostingTreeLeaf(Relation rel, Buffer buf, GinVacuumState *gvs);
|
|
|
|
/* ginscan.c */
|
|
|
|
/*
|
|
* GinScanKeyData describes a single GIN index qualifier expression.
|
|
*
|
|
* From each qual expression, we extract one or more specific index search
|
|
* conditions, which are represented by GinScanEntryData. It's quite
|
|
* possible for identical search conditions to be requested by more than
|
|
* one qual expression, in which case we merge such conditions to have just
|
|
* one unique GinScanEntry --- this is particularly important for efficiency
|
|
* when dealing with full-index-scan entries. So there can be multiple
|
|
* GinScanKeyData.scanEntry pointers to the same GinScanEntryData.
|
|
*
|
|
* In each GinScanKeyData, nentries is the true number of entries, while
|
|
* nuserentries is the number that extractQueryFn returned (which is what
|
|
* we report to consistentFn). The "user" entries must come first.
|
|
*/
|
|
typedef struct GinScanKeyData *GinScanKey;
|
|
|
|
typedef struct GinScanEntryData *GinScanEntry;
|
|
|
|
typedef struct GinScanKeyData
|
|
{
|
|
/* Real number of entries in scanEntry[] (always > 0) */
|
|
uint32 nentries;
|
|
/* Number of entries that extractQueryFn and consistentFn know about */
|
|
uint32 nuserentries;
|
|
|
|
/* array of GinScanEntry pointers, one per extracted search condition */
|
|
GinScanEntry *scanEntry;
|
|
|
|
/*
|
|
* At least one of the entries in requiredEntries must be present for a
|
|
* tuple to match the overall qual.
|
|
*
|
|
* additionalEntries contains entries that are needed by the consistent
|
|
* function to decide if an item matches, but are not sufficient to
|
|
* satisfy the qual without entries from requiredEntries.
|
|
*/
|
|
GinScanEntry *requiredEntries;
|
|
int nrequired;
|
|
GinScanEntry *additionalEntries;
|
|
int nadditional;
|
|
|
|
/* array of check flags, reported to consistentFn */
|
|
bool *entryRes;
|
|
bool (*boolConsistentFn) (GinScanKey key);
|
|
GinTernaryValue (*triConsistentFn) (GinScanKey key);
|
|
FmgrInfo *consistentFmgrInfo;
|
|
FmgrInfo *triConsistentFmgrInfo;
|
|
Oid collation;
|
|
|
|
/* other data needed for calling consistentFn */
|
|
Datum query;
|
|
/* NB: these three arrays have only nuserentries elements! */
|
|
Datum *queryValues;
|
|
GinNullCategory *queryCategories;
|
|
Pointer *extra_data;
|
|
StrategyNumber strategy;
|
|
int32 searchMode;
|
|
OffsetNumber attnum;
|
|
|
|
/*
|
|
* Match status data. curItem is the TID most recently tested (could be a
|
|
* lossy-page pointer). curItemMatches is TRUE if it passes the
|
|
* consistentFn test; if so, recheckCurItem is the recheck flag.
|
|
* isFinished means that all the input entry streams are finished, so this
|
|
* key cannot succeed for any later TIDs.
|
|
*/
|
|
ItemPointerData curItem;
|
|
bool curItemMatches;
|
|
bool recheckCurItem;
|
|
bool isFinished;
|
|
} GinScanKeyData;
|
|
|
|
typedef struct GinScanEntryData
|
|
{
|
|
/* query key and other information from extractQueryFn */
|
|
Datum queryKey;
|
|
GinNullCategory queryCategory;
|
|
bool isPartialMatch;
|
|
Pointer extra_data;
|
|
StrategyNumber strategy;
|
|
int32 searchMode;
|
|
OffsetNumber attnum;
|
|
|
|
/* Current page in posting tree */
|
|
Buffer buffer;
|
|
|
|
/* current ItemPointer to heap */
|
|
ItemPointerData curItem;
|
|
|
|
/* for a partial-match or full-scan query, we accumulate all TIDs here */
|
|
TIDBitmap *matchBitmap;
|
|
TBMIterator *matchIterator;
|
|
TBMIterateResult *matchResult;
|
|
|
|
/* used for Posting list and one page in Posting tree */
|
|
ItemPointerData *list;
|
|
int nlist;
|
|
OffsetNumber offset;
|
|
|
|
bool isFinished;
|
|
bool reduceResult;
|
|
uint32 predictNumberResult;
|
|
GinBtreeData btree;
|
|
} GinScanEntryData;
|
|
|
|
typedef struct GinScanOpaqueData
|
|
{
|
|
MemoryContext tempCtx;
|
|
GinState ginstate;
|
|
|
|
GinScanKey keys; /* one per scan qualifier expr */
|
|
uint32 nkeys;
|
|
|
|
GinScanEntry *entries; /* one per index search condition */
|
|
uint32 totalentries;
|
|
uint32 allocentries; /* allocated length of entries[] */
|
|
|
|
MemoryContext keyCtx; /* used to hold key and entry data */
|
|
|
|
bool isVoidRes; /* true if query is unsatisfiable */
|
|
} GinScanOpaqueData;
|
|
|
|
typedef GinScanOpaqueData *GinScanOpaque;
|
|
|
|
extern IndexScanDesc ginbeginscan(Relation rel, int nkeys, int norderbys);
|
|
extern void ginendscan(IndexScanDesc scan);
|
|
extern void ginrescan(IndexScanDesc scan, ScanKey key, int nscankeys,
|
|
ScanKey orderbys, int norderbys);
|
|
extern void ginNewScanKey(IndexScanDesc scan);
|
|
extern void ginFreeScanKeys(GinScanOpaque so);
|
|
|
|
/* ginget.c */
|
|
extern int64 gingetbitmap(IndexScanDesc scan, TIDBitmap *tbm);
|
|
|
|
/* ginfast.c */
|
|
extern Datum gin_clean_pending_list(PG_FUNCTION_ARGS);
|
|
|
|
/* ginlogic.c */
|
|
extern void ginInitConsistentFunction(GinState *ginstate, GinScanKey key);
|
|
|
|
/* ginvacuum.c */
|
|
extern IndexBulkDeleteResult *ginbulkdelete(IndexVacuumInfo *info,
|
|
IndexBulkDeleteResult *stats,
|
|
IndexBulkDeleteCallback callback,
|
|
void *callback_state);
|
|
extern IndexBulkDeleteResult *ginvacuumcleanup(IndexVacuumInfo *info,
|
|
IndexBulkDeleteResult *stats);
|
|
extern ItemPointer ginVacuumItemPointers(GinVacuumState *gvs,
|
|
ItemPointerData *items, int nitem, int *nremaining);
|
|
|
|
/* ginvalidate.c */
|
|
extern bool ginvalidate(Oid opclassoid);
|
|
|
|
/* ginbulk.c */
|
|
typedef struct GinEntryAccumulator
|
|
{
|
|
RBNode rbnode;
|
|
Datum key;
|
|
GinNullCategory category;
|
|
OffsetNumber attnum;
|
|
bool shouldSort;
|
|
ItemPointerData *list;
|
|
uint32 maxcount; /* allocated size of list[] */
|
|
uint32 count; /* current number of list[] entries */
|
|
} GinEntryAccumulator;
|
|
|
|
typedef struct
|
|
{
|
|
GinState *ginstate;
|
|
Size allocatedMemory;
|
|
GinEntryAccumulator *entryallocator;
|
|
uint32 eas_used;
|
|
RBTree *tree;
|
|
RBTreeIterator tree_walk;
|
|
} BuildAccumulator;
|
|
|
|
extern void ginInitBA(BuildAccumulator *accum);
|
|
extern void ginInsertBAEntries(BuildAccumulator *accum,
|
|
ItemPointer heapptr, OffsetNumber attnum,
|
|
Datum *entries, GinNullCategory *categories,
|
|
int32 nentries);
|
|
extern void ginBeginBAScan(BuildAccumulator *accum);
|
|
extern ItemPointerData *ginGetBAEntry(BuildAccumulator *accum,
|
|
OffsetNumber *attnum, Datum *key, GinNullCategory *category,
|
|
uint32 *n);
|
|
|
|
/* ginfast.c */
|
|
|
|
typedef struct GinTupleCollector
|
|
{
|
|
IndexTuple *tuples;
|
|
uint32 ntuples;
|
|
uint32 lentuples;
|
|
uint32 sumsize;
|
|
} GinTupleCollector;
|
|
|
|
extern void ginHeapTupleFastInsert(GinState *ginstate,
|
|
GinTupleCollector *collector);
|
|
extern void ginHeapTupleFastCollect(GinState *ginstate,
|
|
GinTupleCollector *collector,
|
|
OffsetNumber attnum, Datum value, bool isNull,
|
|
ItemPointer ht_ctid);
|
|
extern void ginInsertCleanup(GinState *ginstate, bool full_clean,
|
|
bool fill_fsm, IndexBulkDeleteResult *stats);
|
|
|
|
/* ginpostinglist.c */
|
|
|
|
extern GinPostingList *ginCompressPostingList(const ItemPointer ptrs, int nptrs,
|
|
int maxsize, int *nwritten);
|
|
extern int ginPostingListDecodeAllSegmentsToTbm(GinPostingList *ptr, int totalsize, TIDBitmap *tbm);
|
|
|
|
extern ItemPointer ginPostingListDecodeAllSegments(GinPostingList *ptr, int len, int *ndecoded);
|
|
extern ItemPointer ginPostingListDecode(GinPostingList *ptr, int *ndecoded);
|
|
extern ItemPointer ginMergeItemPointers(ItemPointerData *a, uint32 na,
|
|
ItemPointerData *b, uint32 nb,
|
|
int *nmerged);
|
|
|
|
/*
|
|
* Merging the results of several gin scans compares item pointers a lot,
|
|
* so we want this to be inlined.
|
|
*/
|
|
static inline int
|
|
ginCompareItemPointers(ItemPointer a, ItemPointer b)
|
|
{
|
|
uint64 ia = (uint64) a->ip_blkid.bi_hi << 32 | (uint64) a->ip_blkid.bi_lo << 16 | a->ip_posid;
|
|
uint64 ib = (uint64) b->ip_blkid.bi_hi << 32 | (uint64) b->ip_blkid.bi_lo << 16 | b->ip_posid;
|
|
|
|
if (ia == ib)
|
|
return 0;
|
|
else if (ia > ib)
|
|
return 1;
|
|
else
|
|
return -1;
|
|
}
|
|
|
|
#endif /* GIN_PRIVATE_H */
|