mirror of
https://github.com/postgres/postgres.git
synced 2025-04-22 23:02:54 +03:00
Per my recent proposal(s). Null key datums can now be returned by extractValue and extractQuery functions, and will be stored in the index. Also, placeholder entries are made for indexable items that are NULL or contain no keys according to extractValue. This means that the index is now always complete, having at least one entry for every indexed heap TID, and so we can get rid of the prohibition on full-index scans. A full-index scan is implemented much the same way as partial-match scans were already: we build a bitmap representing all the TIDs found in the index, and then drive the results off that. Also, introduce a concept of a "search mode" that can be requested by extractQuery when the operator requires matching to empty items (this is just as cheap as matching to a single key) or requires a full index scan (which is not so cheap, but it sure beats failing or giving wrong answers). The behavior remains backward compatible for opclasses that don't return any null keys or request a non-default search mode. Using these features, we can now make the GIN index opclass for anyarray behave in a way that matches the actual anyarray operators for &&, <@, @>, and = ... which it failed to do before in assorted corner cases. This commit fixes the core GIN code and ginarrayprocs.c, updates the documentation, and adds some simple regression test cases for the new behaviors using the array operators. The tsearch and contrib GIN opclass support functions still need to be looked over and probably fixed. Another thing I intend to fix separately is that this is pretty inefficient for cases where more than one scan condition needs a full-index search: we'll run duplicate GinScanEntrys, each one of which builds a large bitmap. There is some existing logic to merge duplicate GinScanEntrys but it needs refactoring to make it work for entries belonging to different scan keys. Note that most of gin.h has been split out into a new file gin_private.h, so that gin.h doesn't export anything that's not supposed to be used by GIN opclasses or the rest of the backend. I did quite a bit of other code beautification work as well, mostly fixing comments and choosing more appropriate names for things.
476 lines
11 KiB
C
476 lines
11 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* ginbtree.c
|
|
* page utilities routines for the postgres inverted index access method.
|
|
*
|
|
*
|
|
* Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
* IDENTIFICATION
|
|
* src/backend/access/gin/ginbtree.c
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
|
|
#include "postgres.h"
|
|
|
|
#include "access/gin_private.h"
|
|
#include "miscadmin.h"
|
|
#include "storage/bufmgr.h"
|
|
#include "utils/rel.h"
|
|
|
|
/*
|
|
* Locks buffer by needed method for search.
|
|
*/
|
|
static int
|
|
ginTraverseLock(Buffer buffer, bool searchMode)
|
|
{
|
|
Page page;
|
|
int access = GIN_SHARE;
|
|
|
|
LockBuffer(buffer, GIN_SHARE);
|
|
page = BufferGetPage(buffer);
|
|
if (GinPageIsLeaf(page))
|
|
{
|
|
if (searchMode == FALSE)
|
|
{
|
|
/* we should relock our page */
|
|
LockBuffer(buffer, GIN_UNLOCK);
|
|
LockBuffer(buffer, GIN_EXCLUSIVE);
|
|
|
|
/* But root can become non-leaf during relock */
|
|
if (!GinPageIsLeaf(page))
|
|
{
|
|
/* restore old lock type (very rare) */
|
|
LockBuffer(buffer, GIN_UNLOCK);
|
|
LockBuffer(buffer, GIN_SHARE);
|
|
}
|
|
else
|
|
access = GIN_EXCLUSIVE;
|
|
}
|
|
}
|
|
|
|
return access;
|
|
}
|
|
|
|
GinBtreeStack *
|
|
ginPrepareFindLeafPage(GinBtree btree, BlockNumber blkno)
|
|
{
|
|
GinBtreeStack *stack = (GinBtreeStack *) palloc(sizeof(GinBtreeStack));
|
|
|
|
stack->blkno = blkno;
|
|
stack->buffer = ReadBuffer(btree->index, stack->blkno);
|
|
stack->parent = NULL;
|
|
stack->predictNumber = 1;
|
|
|
|
ginTraverseLock(stack->buffer, btree->searchMode);
|
|
|
|
return stack;
|
|
}
|
|
|
|
/*
|
|
* Locates leaf page contained tuple
|
|
*/
|
|
GinBtreeStack *
|
|
ginFindLeafPage(GinBtree btree, GinBtreeStack *stack)
|
|
{
|
|
bool isfirst = TRUE;
|
|
BlockNumber rootBlkno;
|
|
|
|
if (!stack)
|
|
stack = ginPrepareFindLeafPage(btree, GIN_ROOT_BLKNO);
|
|
rootBlkno = stack->blkno;
|
|
|
|
for (;;)
|
|
{
|
|
Page page;
|
|
BlockNumber child;
|
|
int access = GIN_SHARE;
|
|
|
|
stack->off = InvalidOffsetNumber;
|
|
|
|
page = BufferGetPage(stack->buffer);
|
|
|
|
if (isfirst)
|
|
{
|
|
if (GinPageIsLeaf(page) && !btree->searchMode)
|
|
access = GIN_EXCLUSIVE;
|
|
isfirst = FALSE;
|
|
}
|
|
else
|
|
access = ginTraverseLock(stack->buffer, btree->searchMode);
|
|
|
|
/*
|
|
* ok, page is correctly locked, we should check to move right ..,
|
|
* root never has a right link, so small optimization
|
|
*/
|
|
while (btree->fullScan == FALSE && stack->blkno != rootBlkno &&
|
|
btree->isMoveRight(btree, page))
|
|
{
|
|
BlockNumber rightlink = GinPageGetOpaque(page)->rightlink;
|
|
|
|
if (rightlink == InvalidBlockNumber)
|
|
/* rightmost page */
|
|
break;
|
|
|
|
stack->blkno = rightlink;
|
|
LockBuffer(stack->buffer, GIN_UNLOCK);
|
|
stack->buffer = ReleaseAndReadBuffer(stack->buffer, btree->index, stack->blkno);
|
|
LockBuffer(stack->buffer, access);
|
|
page = BufferGetPage(stack->buffer);
|
|
}
|
|
|
|
if (GinPageIsLeaf(page)) /* we found, return locked page */
|
|
return stack;
|
|
|
|
/* now we have correct buffer, try to find child */
|
|
child = btree->findChildPage(btree, stack);
|
|
|
|
LockBuffer(stack->buffer, GIN_UNLOCK);
|
|
Assert(child != InvalidBlockNumber);
|
|
Assert(stack->blkno != child);
|
|
|
|
if (btree->searchMode)
|
|
{
|
|
/* in search mode we may forget path to leaf */
|
|
stack->blkno = child;
|
|
stack->buffer = ReleaseAndReadBuffer(stack->buffer, btree->index, stack->blkno);
|
|
}
|
|
else
|
|
{
|
|
GinBtreeStack *ptr = (GinBtreeStack *) palloc(sizeof(GinBtreeStack));
|
|
|
|
ptr->parent = stack;
|
|
stack = ptr;
|
|
stack->blkno = child;
|
|
stack->buffer = ReadBuffer(btree->index, stack->blkno);
|
|
stack->predictNumber = 1;
|
|
}
|
|
}
|
|
|
|
/* keep compiler happy */
|
|
return NULL;
|
|
}
|
|
|
|
void
|
|
freeGinBtreeStack(GinBtreeStack *stack)
|
|
{
|
|
while (stack)
|
|
{
|
|
GinBtreeStack *tmp = stack->parent;
|
|
|
|
if (stack->buffer != InvalidBuffer)
|
|
ReleaseBuffer(stack->buffer);
|
|
|
|
pfree(stack);
|
|
stack = tmp;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Try to find parent for current stack position, returns correct
|
|
* parent and child's offset in stack->parent.
|
|
* Function should never release root page to prevent conflicts
|
|
* with vacuum process
|
|
*/
|
|
void
|
|
ginFindParents(GinBtree btree, GinBtreeStack *stack,
|
|
BlockNumber rootBlkno)
|
|
{
|
|
|
|
Page page;
|
|
Buffer buffer;
|
|
BlockNumber blkno,
|
|
leftmostBlkno;
|
|
OffsetNumber offset;
|
|
GinBtreeStack *root = stack->parent;
|
|
GinBtreeStack *ptr;
|
|
|
|
if (!root)
|
|
{
|
|
/* XLog mode... */
|
|
root = (GinBtreeStack *) palloc(sizeof(GinBtreeStack));
|
|
root->blkno = rootBlkno;
|
|
root->buffer = ReadBuffer(btree->index, rootBlkno);
|
|
LockBuffer(root->buffer, GIN_EXCLUSIVE);
|
|
root->parent = NULL;
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
* find root, we should not release root page until update is
|
|
* finished!!
|
|
*/
|
|
while (root->parent)
|
|
{
|
|
ReleaseBuffer(root->buffer);
|
|
root = root->parent;
|
|
}
|
|
|
|
Assert(root->blkno == rootBlkno);
|
|
Assert(BufferGetBlockNumber(root->buffer) == rootBlkno);
|
|
LockBuffer(root->buffer, GIN_EXCLUSIVE);
|
|
}
|
|
root->off = InvalidOffsetNumber;
|
|
|
|
page = BufferGetPage(root->buffer);
|
|
Assert(!GinPageIsLeaf(page));
|
|
|
|
/* check trivial case */
|
|
if ((root->off = btree->findChildPtr(btree, page, stack->blkno, InvalidOffsetNumber)) != InvalidOffsetNumber)
|
|
{
|
|
stack->parent = root;
|
|
return;
|
|
}
|
|
|
|
leftmostBlkno = blkno = btree->getLeftMostPage(btree, page);
|
|
LockBuffer(root->buffer, GIN_UNLOCK);
|
|
Assert(blkno != InvalidBlockNumber);
|
|
|
|
for (;;)
|
|
{
|
|
buffer = ReadBuffer(btree->index, blkno);
|
|
LockBuffer(buffer, GIN_EXCLUSIVE);
|
|
page = BufferGetPage(buffer);
|
|
if (GinPageIsLeaf(page))
|
|
elog(ERROR, "Lost path");
|
|
|
|
leftmostBlkno = btree->getLeftMostPage(btree, page);
|
|
|
|
while ((offset = btree->findChildPtr(btree, page, stack->blkno, InvalidOffsetNumber)) == InvalidOffsetNumber)
|
|
{
|
|
blkno = GinPageGetOpaque(page)->rightlink;
|
|
LockBuffer(buffer, GIN_UNLOCK);
|
|
ReleaseBuffer(buffer);
|
|
if (blkno == InvalidBlockNumber)
|
|
break;
|
|
buffer = ReadBuffer(btree->index, blkno);
|
|
LockBuffer(buffer, GIN_EXCLUSIVE);
|
|
page = BufferGetPage(buffer);
|
|
}
|
|
|
|
if (blkno != InvalidBlockNumber)
|
|
{
|
|
ptr = (GinBtreeStack *) palloc(sizeof(GinBtreeStack));
|
|
ptr->blkno = blkno;
|
|
ptr->buffer = buffer;
|
|
ptr->parent = root; /* it's may be wrong, but in next call we will
|
|
* correct */
|
|
ptr->off = offset;
|
|
stack->parent = ptr;
|
|
return;
|
|
}
|
|
|
|
blkno = leftmostBlkno;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Insert value (stored in GinBtree) to tree described by stack
|
|
*
|
|
* During an index build, buildStats is non-null and the counters
|
|
* it contains should be incremented as needed.
|
|
*
|
|
* NB: the passed-in stack is freed, as though by freeGinBtreeStack.
|
|
*/
|
|
void
|
|
ginInsertValue(GinBtree btree, GinBtreeStack *stack, GinStatsData *buildStats)
|
|
{
|
|
GinBtreeStack *parent = stack;
|
|
BlockNumber rootBlkno = InvalidBuffer;
|
|
Page page,
|
|
rpage,
|
|
lpage;
|
|
|
|
/* remember root BlockNumber */
|
|
while (parent)
|
|
{
|
|
rootBlkno = parent->blkno;
|
|
parent = parent->parent;
|
|
}
|
|
|
|
while (stack)
|
|
{
|
|
XLogRecData *rdata;
|
|
BlockNumber savedRightLink;
|
|
|
|
page = BufferGetPage(stack->buffer);
|
|
savedRightLink = GinPageGetOpaque(page)->rightlink;
|
|
|
|
if (btree->isEnoughSpace(btree, stack->buffer, stack->off))
|
|
{
|
|
START_CRIT_SECTION();
|
|
btree->placeToPage(btree, stack->buffer, stack->off, &rdata);
|
|
|
|
MarkBufferDirty(stack->buffer);
|
|
|
|
if (RelationNeedsWAL(btree->index))
|
|
{
|
|
XLogRecPtr recptr;
|
|
|
|
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT, rdata);
|
|
PageSetLSN(page, recptr);
|
|
PageSetTLI(page, ThisTimeLineID);
|
|
}
|
|
|
|
LockBuffer(stack->buffer, GIN_UNLOCK);
|
|
END_CRIT_SECTION();
|
|
|
|
freeGinBtreeStack(stack);
|
|
|
|
return;
|
|
}
|
|
else
|
|
{
|
|
Buffer rbuffer = GinNewBuffer(btree->index);
|
|
Page newlpage;
|
|
|
|
/*
|
|
* newlpage is a pointer to memory page, it doesn't associate with
|
|
* buffer, stack->buffer should be untouched
|
|
*/
|
|
newlpage = btree->splitPage(btree, stack->buffer, rbuffer, stack->off, &rdata);
|
|
|
|
((ginxlogSplit *) (rdata->data))->rootBlkno = rootBlkno;
|
|
|
|
/* During index build, count the newly-split page */
|
|
if (buildStats)
|
|
{
|
|
if (btree->isData)
|
|
buildStats->nDataPages++;
|
|
else
|
|
buildStats->nEntryPages++;
|
|
}
|
|
|
|
parent = stack->parent;
|
|
|
|
if (parent == NULL)
|
|
{
|
|
/*
|
|
* split root, so we need to allocate new left page and place
|
|
* pointer on root to left and right page
|
|
*/
|
|
Buffer lbuffer = GinNewBuffer(btree->index);
|
|
|
|
((ginxlogSplit *) (rdata->data))->isRootSplit = TRUE;
|
|
((ginxlogSplit *) (rdata->data))->rrlink = InvalidBlockNumber;
|
|
|
|
page = BufferGetPage(stack->buffer);
|
|
lpage = BufferGetPage(lbuffer);
|
|
rpage = BufferGetPage(rbuffer);
|
|
|
|
GinPageGetOpaque(rpage)->rightlink = InvalidBlockNumber;
|
|
GinPageGetOpaque(newlpage)->rightlink = BufferGetBlockNumber(rbuffer);
|
|
((ginxlogSplit *) (rdata->data))->lblkno = BufferGetBlockNumber(lbuffer);
|
|
|
|
START_CRIT_SECTION();
|
|
|
|
GinInitBuffer(stack->buffer, GinPageGetOpaque(newlpage)->flags & ~GIN_LEAF);
|
|
PageRestoreTempPage(newlpage, lpage);
|
|
btree->fillRoot(btree, stack->buffer, lbuffer, rbuffer);
|
|
|
|
MarkBufferDirty(rbuffer);
|
|
MarkBufferDirty(lbuffer);
|
|
MarkBufferDirty(stack->buffer);
|
|
|
|
if (RelationNeedsWAL(btree->index))
|
|
{
|
|
XLogRecPtr recptr;
|
|
|
|
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_SPLIT, rdata);
|
|
PageSetLSN(page, recptr);
|
|
PageSetTLI(page, ThisTimeLineID);
|
|
PageSetLSN(lpage, recptr);
|
|
PageSetTLI(lpage, ThisTimeLineID);
|
|
PageSetLSN(rpage, recptr);
|
|
PageSetTLI(rpage, ThisTimeLineID);
|
|
}
|
|
|
|
UnlockReleaseBuffer(rbuffer);
|
|
UnlockReleaseBuffer(lbuffer);
|
|
LockBuffer(stack->buffer, GIN_UNLOCK);
|
|
END_CRIT_SECTION();
|
|
|
|
freeGinBtreeStack(stack);
|
|
|
|
/* During index build, count the newly-added root page */
|
|
if (buildStats)
|
|
{
|
|
if (btree->isData)
|
|
buildStats->nDataPages++;
|
|
else
|
|
buildStats->nEntryPages++;
|
|
}
|
|
|
|
return;
|
|
}
|
|
else
|
|
{
|
|
/* split non-root page */
|
|
((ginxlogSplit *) (rdata->data))->isRootSplit = FALSE;
|
|
((ginxlogSplit *) (rdata->data))->rrlink = savedRightLink;
|
|
|
|
lpage = BufferGetPage(stack->buffer);
|
|
rpage = BufferGetPage(rbuffer);
|
|
|
|
GinPageGetOpaque(rpage)->rightlink = savedRightLink;
|
|
GinPageGetOpaque(newlpage)->rightlink = BufferGetBlockNumber(rbuffer);
|
|
|
|
START_CRIT_SECTION();
|
|
PageRestoreTempPage(newlpage, lpage);
|
|
|
|
MarkBufferDirty(rbuffer);
|
|
MarkBufferDirty(stack->buffer);
|
|
|
|
if (RelationNeedsWAL(btree->index))
|
|
{
|
|
XLogRecPtr recptr;
|
|
|
|
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_SPLIT, rdata);
|
|
PageSetLSN(lpage, recptr);
|
|
PageSetTLI(lpage, ThisTimeLineID);
|
|
PageSetLSN(rpage, recptr);
|
|
PageSetTLI(rpage, ThisTimeLineID);
|
|
}
|
|
UnlockReleaseBuffer(rbuffer);
|
|
END_CRIT_SECTION();
|
|
}
|
|
}
|
|
|
|
btree->isDelete = FALSE;
|
|
|
|
/* search parent to lock */
|
|
LockBuffer(parent->buffer, GIN_EXCLUSIVE);
|
|
|
|
/* move right if it's needed */
|
|
page = BufferGetPage(parent->buffer);
|
|
while ((parent->off = btree->findChildPtr(btree, page, stack->blkno, parent->off)) == InvalidOffsetNumber)
|
|
{
|
|
BlockNumber rightlink = GinPageGetOpaque(page)->rightlink;
|
|
|
|
LockBuffer(parent->buffer, GIN_UNLOCK);
|
|
|
|
if (rightlink == InvalidBlockNumber)
|
|
{
|
|
/*
|
|
* rightmost page, but we don't find parent, we should use
|
|
* plain search...
|
|
*/
|
|
ginFindParents(btree, stack, rootBlkno);
|
|
parent = stack->parent;
|
|
page = BufferGetPage(parent->buffer);
|
|
break;
|
|
}
|
|
|
|
parent->blkno = rightlink;
|
|
parent->buffer = ReleaseAndReadBuffer(parent->buffer, btree->index, parent->blkno);
|
|
LockBuffer(parent->buffer, GIN_EXCLUSIVE);
|
|
page = BufferGetPage(parent->buffer);
|
|
}
|
|
|
|
UnlockReleaseBuffer(stack->buffer);
|
|
pfree(stack);
|
|
stack = parent;
|
|
}
|
|
}
|