mirror of
https://github.com/postgres/postgres.git
synced 2025-06-29 10:41:53 +03:00
Although the key-combining code claimed to work correctly if its input contained both lossy and exact pointers for a single page in a single TID stream, in fact this did not work, and could not work without pretty fundamental redesign. Modify keyGetItem so that it will not return such a stream, by handling lossy-pointer cases a bit more explicitly than we did before. Per followup investigation of a gripe from Artur Dabrowski. An example of a query that failed given his data set is select count(*) from search_tab where (to_tsvector('german', keywords ) @@ to_tsquery('german', 'ee:* | dd:*')) and (to_tsvector('german', keywords ) @@ to_tsquery('german', 'aa:*')); Back-patch to 8.4 where the lossy pointer code was introduced.
1417 lines
38 KiB
C
1417 lines
38 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* ginget.c
|
|
* fetch tuples from a GIN scan.
|
|
*
|
|
*
|
|
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
* IDENTIFICATION
|
|
* $PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.27.2.3 2010/08/01 19:16:55 tgl Exp $
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
|
|
#include "postgres.h"
|
|
|
|
#include "access/gin.h"
|
|
#include "access/relscan.h"
|
|
#include "catalog/index.h"
|
|
#include "miscadmin.h"
|
|
#include "storage/bufmgr.h"
|
|
#include "utils/datum.h"
|
|
#include "utils/memutils.h"
|
|
|
|
|
|
typedef struct pendingPosition
|
|
{
|
|
Buffer pendingBuffer;
|
|
OffsetNumber firstOffset;
|
|
OffsetNumber lastOffset;
|
|
ItemPointerData item;
|
|
bool *hasMatchKey;
|
|
} pendingPosition;
|
|
|
|
|
|
/*
|
|
* Tries to refind previously taken ItemPointer on page.
|
|
*/
|
|
static bool
|
|
findItemInPage(Page page, ItemPointer item, OffsetNumber *off)
|
|
{
|
|
OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff;
|
|
int res;
|
|
|
|
if (GinPageGetOpaque(page)->flags & GIN_DELETED)
|
|
/* page was deleted by concurrent vacuum */
|
|
return false;
|
|
|
|
/*
|
|
* scan page to find equal or first greater value
|
|
*/
|
|
for (*off = FirstOffsetNumber; *off <= maxoff; (*off)++)
|
|
{
|
|
res = compareItemPointers(item, (ItemPointer) GinDataPageGetItem(page, *off));
|
|
|
|
if (res <= 0)
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* Goes to the next page if current offset is outside of bounds
|
|
*/
|
|
static bool
|
|
moveRightIfItNeeded(GinBtreeData *btree, GinBtreeStack *stack)
|
|
{
|
|
Page page = BufferGetPage(stack->buffer);
|
|
|
|
if (stack->off > PageGetMaxOffsetNumber(page))
|
|
{
|
|
/*
|
|
* We scanned the whole page, so we should take right page
|
|
*/
|
|
stack->blkno = GinPageGetOpaque(page)->rightlink;
|
|
|
|
if (GinPageRightMost(page))
|
|
return false; /* no more pages */
|
|
|
|
LockBuffer(stack->buffer, GIN_UNLOCK);
|
|
stack->buffer = ReleaseAndReadBuffer(stack->buffer, btree->index, stack->blkno);
|
|
LockBuffer(stack->buffer, GIN_SHARE);
|
|
stack->off = FirstOffsetNumber;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* Does fullscan of posting tree and saves ItemPointers
|
|
* in scanEntry->partialMatch TIDBitmap
|
|
*/
|
|
static void
|
|
scanForItems(Relation index, GinScanEntry scanEntry, BlockNumber rootPostingTree)
|
|
{
|
|
GinPostingTreeScan *gdi;
|
|
Buffer buffer;
|
|
Page page;
|
|
BlockNumber blkno;
|
|
|
|
gdi = prepareScanPostingTree(index, rootPostingTree, TRUE);
|
|
|
|
buffer = scanBeginPostingTree(gdi);
|
|
IncrBufferRefCount(buffer); /* prevent unpin in freeGinBtreeStack */
|
|
|
|
freeGinBtreeStack(gdi->stack);
|
|
pfree(gdi);
|
|
|
|
/*
|
|
* Goes through all leaves
|
|
*/
|
|
for (;;)
|
|
{
|
|
page = BufferGetPage(buffer);
|
|
|
|
if ((GinPageGetOpaque(page)->flags & GIN_DELETED) == 0 && GinPageGetOpaque(page)->maxoff >= FirstOffsetNumber)
|
|
{
|
|
tbm_add_tuples(scanEntry->partialMatch,
|
|
(ItemPointer) GinDataPageGetItem(page, FirstOffsetNumber),
|
|
GinPageGetOpaque(page)->maxoff, false);
|
|
scanEntry->predictNumberResult += GinPageGetOpaque(page)->maxoff;
|
|
}
|
|
|
|
blkno = GinPageGetOpaque(page)->rightlink;
|
|
if (GinPageRightMost(page))
|
|
{
|
|
UnlockReleaseBuffer(buffer);
|
|
return; /* no more pages */
|
|
}
|
|
|
|
LockBuffer(buffer, GIN_UNLOCK);
|
|
buffer = ReleaseAndReadBuffer(buffer, index, blkno);
|
|
LockBuffer(buffer, GIN_SHARE);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Collects all ItemPointer into the TIDBitmap struct
|
|
* for entries partially matched to search entry.
|
|
*
|
|
* Returns true if done, false if it's needed to restart scan from scratch
|
|
*/
|
|
static bool
|
|
computePartialMatchList(GinBtreeData *btree, GinBtreeStack *stack, GinScanEntry scanEntry)
|
|
{
|
|
Page page;
|
|
IndexTuple itup;
|
|
Datum idatum;
|
|
int32 cmp;
|
|
|
|
scanEntry->partialMatch = tbm_create(work_mem * 1024L);
|
|
|
|
for (;;)
|
|
{
|
|
/*
|
|
* stack->off points to the interested entry, buffer is already locked
|
|
*/
|
|
if (moveRightIfItNeeded(btree, stack) == false)
|
|
return true;
|
|
|
|
page = BufferGetPage(stack->buffer);
|
|
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
|
|
|
|
/*
|
|
* If tuple stores another attribute then stop scan
|
|
*/
|
|
if (gintuple_get_attrnum(btree->ginstate, itup) != scanEntry->attnum)
|
|
return true;
|
|
|
|
idatum = gin_index_getattr(btree->ginstate, itup);
|
|
|
|
|
|
/*----------
|
|
* Check of partial match.
|
|
* case cmp == 0 => match
|
|
* case cmp > 0 => not match and finish scan
|
|
* case cmp < 0 => not match and continue scan
|
|
*----------
|
|
*/
|
|
cmp = DatumGetInt32(FunctionCall4(&btree->ginstate->comparePartialFn[scanEntry->attnum - 1],
|
|
scanEntry->entry,
|
|
idatum,
|
|
UInt16GetDatum(scanEntry->strategy),
|
|
PointerGetDatum(scanEntry->extra_data)));
|
|
|
|
if (cmp > 0)
|
|
return true;
|
|
else if (cmp < 0)
|
|
{
|
|
stack->off++;
|
|
continue;
|
|
}
|
|
|
|
if (GinIsPostingTree(itup))
|
|
{
|
|
BlockNumber rootPostingTree = GinGetPostingTree(itup);
|
|
Datum newDatum,
|
|
savedDatum = datumCopy(
|
|
idatum,
|
|
btree->ginstate->origTupdesc->attrs[scanEntry->attnum - 1]->attbyval,
|
|
btree->ginstate->origTupdesc->attrs[scanEntry->attnum - 1]->attlen
|
|
);
|
|
|
|
/*
|
|
* We should unlock current page (but not unpin) during tree scan
|
|
* to prevent deadlock with vacuum processes.
|
|
*
|
|
* We save current entry value (savedDatum) to be able to refind
|
|
* our tuple after re-locking
|
|
*/
|
|
LockBuffer(stack->buffer, GIN_UNLOCK);
|
|
scanForItems(btree->index, scanEntry, rootPostingTree);
|
|
|
|
/*
|
|
* We lock again the entry page and while it was unlocked insert
|
|
* might occured, so we need to refind our position
|
|
*/
|
|
LockBuffer(stack->buffer, GIN_SHARE);
|
|
page = BufferGetPage(stack->buffer);
|
|
if (!GinPageIsLeaf(page))
|
|
{
|
|
/*
|
|
* Root page becomes non-leaf while we unlock it. We will
|
|
* start again, this situation doesn't cause often - root can
|
|
* became a non-leaf only one per life of index.
|
|
*/
|
|
|
|
return false;
|
|
}
|
|
|
|
for (;;)
|
|
{
|
|
if (moveRightIfItNeeded(btree, stack) == false)
|
|
elog(ERROR, "lost saved point in index"); /* must not happen !!! */
|
|
|
|
page = BufferGetPage(stack->buffer);
|
|
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
|
|
newDatum = gin_index_getattr(btree->ginstate, itup);
|
|
|
|
if (gintuple_get_attrnum(btree->ginstate, itup) != scanEntry->attnum)
|
|
elog(ERROR, "lost saved point in index"); /* must not happen !!! */
|
|
|
|
if (compareEntries(btree->ginstate, scanEntry->attnum, newDatum, savedDatum) == 0)
|
|
{
|
|
/* Found! */
|
|
if (btree->ginstate->origTupdesc->attrs[scanEntry->attnum - 1]->attbyval == false)
|
|
pfree(DatumGetPointer(savedDatum));
|
|
break;
|
|
}
|
|
|
|
stack->off++;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
tbm_add_tuples(scanEntry->partialMatch, GinGetPosting(itup), GinGetNPosting(itup), false);
|
|
scanEntry->predictNumberResult += GinGetNPosting(itup);
|
|
}
|
|
|
|
/*
|
|
* Ok, we save ItemPointers, go to the next entry
|
|
*/
|
|
stack->off++;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* Start* functions setup beginning state of searches: finds correct buffer and pins it.
|
|
*/
|
|
static void
|
|
startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry)
|
|
{
|
|
GinBtreeData btreeEntry;
|
|
GinBtreeStack *stackEntry;
|
|
Page page;
|
|
bool needUnlock = TRUE;
|
|
|
|
entry->buffer = InvalidBuffer;
|
|
entry->offset = InvalidOffsetNumber;
|
|
entry->list = NULL;
|
|
entry->nlist = 0;
|
|
entry->partialMatch = NULL;
|
|
entry->partialMatchResult = NULL;
|
|
entry->reduceResult = FALSE;
|
|
entry->predictNumberResult = 0;
|
|
|
|
if (entry->master != NULL)
|
|
{
|
|
entry->isFinished = entry->master->isFinished;
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* we should find entry, and begin scan of posting tree or just store
|
|
* posting list in memory
|
|
*/
|
|
|
|
prepareEntryScan(&btreeEntry, index, entry->attnum, entry->entry, ginstate);
|
|
btreeEntry.searchMode = TRUE;
|
|
stackEntry = ginFindLeafPage(&btreeEntry, NULL);
|
|
page = BufferGetPage(stackEntry->buffer);
|
|
|
|
entry->isFinished = TRUE;
|
|
|
|
if (entry->isPartialMatch)
|
|
{
|
|
/*
|
|
* btreeEntry.findItem points to the first equal or greater value than
|
|
* needed. So we will scan further and collect all ItemPointers
|
|
*/
|
|
btreeEntry.findItem(&btreeEntry, stackEntry);
|
|
if (computePartialMatchList(&btreeEntry, stackEntry, entry) == false)
|
|
{
|
|
/*
|
|
* GIN tree was seriously restructured, so we will cleanup all
|
|
* found data and rescan. See comments near 'return false' in
|
|
* computePartialMatchList()
|
|
*/
|
|
if (entry->partialMatch)
|
|
{
|
|
if (entry->partialMatchIterator)
|
|
tbm_end_iterate(entry->partialMatchIterator);
|
|
entry->partialMatchIterator = NULL;
|
|
tbm_free(entry->partialMatch);
|
|
entry->partialMatch = NULL;
|
|
}
|
|
LockBuffer(stackEntry->buffer, GIN_UNLOCK);
|
|
freeGinBtreeStack(stackEntry);
|
|
|
|
startScanEntry(index, ginstate, entry);
|
|
return;
|
|
}
|
|
|
|
if (entry->partialMatch && !tbm_is_empty(entry->partialMatch))
|
|
{
|
|
entry->partialMatchIterator = tbm_begin_iterate(entry->partialMatch);
|
|
entry->isFinished = FALSE;
|
|
}
|
|
}
|
|
else if (btreeEntry.findItem(&btreeEntry, stackEntry))
|
|
{
|
|
IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stackEntry->off));
|
|
|
|
if (GinIsPostingTree(itup))
|
|
{
|
|
BlockNumber rootPostingTree = GinGetPostingTree(itup);
|
|
GinPostingTreeScan *gdi;
|
|
Page page;
|
|
|
|
/*
|
|
* We should unlock entry page before make deal with posting tree
|
|
* to prevent deadlocks with vacuum processes. Because entry is
|
|
* never deleted from page and posting tree is never reduced to
|
|
* the posting list, we can unlock page after getting BlockNumber
|
|
* of root of posting tree.
|
|
*/
|
|
LockBuffer(stackEntry->buffer, GIN_UNLOCK);
|
|
needUnlock = FALSE;
|
|
gdi = prepareScanPostingTree(index, rootPostingTree, TRUE);
|
|
|
|
entry->buffer = scanBeginPostingTree(gdi);
|
|
|
|
/*
|
|
* We keep buffer pinned because we need to prevent deletion of
|
|
* page during scan. See GIN's vacuum implementation. RefCount is
|
|
* increased to keep buffer pinned after freeGinBtreeStack() call.
|
|
*/
|
|
IncrBufferRefCount(entry->buffer);
|
|
|
|
page = BufferGetPage(entry->buffer);
|
|
entry->predictNumberResult = gdi->stack->predictNumber * GinPageGetOpaque(page)->maxoff;
|
|
|
|
/*
|
|
* Keep page content in memory to prevent durable page locking
|
|
*/
|
|
entry->list = (ItemPointerData *) palloc(BLCKSZ);
|
|
entry->nlist = GinPageGetOpaque(page)->maxoff;
|
|
memcpy(entry->list, GinDataPageGetItem(page, FirstOffsetNumber),
|
|
GinPageGetOpaque(page)->maxoff * sizeof(ItemPointerData));
|
|
|
|
LockBuffer(entry->buffer, GIN_UNLOCK);
|
|
freeGinBtreeStack(gdi->stack);
|
|
pfree(gdi);
|
|
entry->isFinished = FALSE;
|
|
}
|
|
else if (GinGetNPosting(itup) > 0)
|
|
{
|
|
entry->nlist = GinGetNPosting(itup);
|
|
entry->list = (ItemPointerData *) palloc(sizeof(ItemPointerData) * entry->nlist);
|
|
memcpy(entry->list, GinGetPosting(itup), sizeof(ItemPointerData) * entry->nlist);
|
|
entry->isFinished = FALSE;
|
|
}
|
|
}
|
|
|
|
if (needUnlock)
|
|
LockBuffer(stackEntry->buffer, GIN_UNLOCK);
|
|
freeGinBtreeStack(stackEntry);
|
|
}
|
|
|
|
static void
|
|
startScanKey(Relation index, GinState *ginstate, GinScanKey key)
|
|
{
|
|
uint32 i;
|
|
|
|
if (!key->firstCall)
|
|
return;
|
|
|
|
for (i = 0; i < key->nentries; i++)
|
|
startScanEntry(index, ginstate, key->scanEntry + i);
|
|
|
|
key->isFinished = FALSE;
|
|
key->firstCall = FALSE;
|
|
|
|
if (GinFuzzySearchLimit > 0)
|
|
{
|
|
/*
|
|
* If all of keys more than threshold we will try to reduce result, we
|
|
* hope (and only hope, for intersection operation of array our
|
|
* supposition isn't true), that total result will not more than
|
|
* minimal predictNumberResult.
|
|
*/
|
|
|
|
for (i = 0; i < key->nentries; i++)
|
|
if (key->scanEntry[i].predictNumberResult <= key->nentries * GinFuzzySearchLimit)
|
|
return;
|
|
|
|
for (i = 0; i < key->nentries; i++)
|
|
if (key->scanEntry[i].predictNumberResult > key->nentries * GinFuzzySearchLimit)
|
|
{
|
|
key->scanEntry[i].predictNumberResult /= key->nentries;
|
|
key->scanEntry[i].reduceResult = TRUE;
|
|
}
|
|
}
|
|
}
|
|
|
|
static void
|
|
startScan(IndexScanDesc scan)
|
|
{
|
|
uint32 i;
|
|
GinScanOpaque so = (GinScanOpaque) scan->opaque;
|
|
|
|
for (i = 0; i < so->nkeys; i++)
|
|
startScanKey(scan->indexRelation, &so->ginstate, so->keys + i);
|
|
}
|
|
|
|
/*
|
|
* Gets next ItemPointer from PostingTree. Note, that we copy
|
|
* page into GinScanEntry->list array and unlock page, but keep it pinned
|
|
* to prevent interference with vacuum
|
|
*/
|
|
static void
|
|
entryGetNextItem(Relation index, GinScanEntry entry)
|
|
{
|
|
Page page;
|
|
BlockNumber blkno;
|
|
|
|
for (;;)
|
|
{
|
|
if (entry->offset < entry->nlist)
|
|
{
|
|
entry->curItem = entry->list[entry->offset++];
|
|
return;
|
|
}
|
|
|
|
LockBuffer(entry->buffer, GIN_SHARE);
|
|
page = BufferGetPage(entry->buffer);
|
|
for (;;)
|
|
{
|
|
/*
|
|
* It's needed to go by right link. During that we should refind
|
|
* first ItemPointer greater that stored
|
|
*/
|
|
|
|
blkno = GinPageGetOpaque(page)->rightlink;
|
|
|
|
LockBuffer(entry->buffer, GIN_UNLOCK);
|
|
if (blkno == InvalidBlockNumber)
|
|
{
|
|
ReleaseBuffer(entry->buffer);
|
|
ItemPointerSetInvalid(&entry->curItem);
|
|
entry->buffer = InvalidBuffer;
|
|
entry->isFinished = TRUE;
|
|
return;
|
|
}
|
|
|
|
entry->buffer = ReleaseAndReadBuffer(entry->buffer, index, blkno);
|
|
LockBuffer(entry->buffer, GIN_SHARE);
|
|
page = BufferGetPage(entry->buffer);
|
|
|
|
entry->offset = InvalidOffsetNumber;
|
|
if (!ItemPointerIsValid(&entry->curItem) ||
|
|
findItemInPage(page, &entry->curItem, &entry->offset))
|
|
{
|
|
/*
|
|
* Found position equal to or greater than stored
|
|
*/
|
|
entry->nlist = GinPageGetOpaque(page)->maxoff;
|
|
memcpy(entry->list, GinDataPageGetItem(page, FirstOffsetNumber),
|
|
GinPageGetOpaque(page)->maxoff * sizeof(ItemPointerData));
|
|
|
|
LockBuffer(entry->buffer, GIN_UNLOCK);
|
|
|
|
if (!ItemPointerIsValid(&entry->curItem) ||
|
|
compareItemPointers(&entry->curItem,
|
|
entry->list + entry->offset - 1) == 0)
|
|
{
|
|
/*
|
|
* First pages are deleted or empty, or we found exact
|
|
* position, so break inner loop and continue outer one.
|
|
*/
|
|
|
|
break;
|
|
}
|
|
|
|
/*
|
|
* Find greater than entry->curItem position, store it.
|
|
*/
|
|
entry->curItem = entry->list[entry->offset - 1];
|
|
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/* convenience function for invoking a key's consistentFn */
|
|
static inline bool
|
|
callConsistentFn(GinState *ginstate, GinScanKey key)
|
|
{
|
|
/*
|
|
* Initialize recheckCurItem in case the consistentFn doesn't know it
|
|
* should set it. The safe assumption in that case is to force recheck.
|
|
*/
|
|
key->recheckCurItem = true;
|
|
|
|
return DatumGetBool(FunctionCall6(&ginstate->consistentFn[key->attnum - 1],
|
|
PointerGetDatum(key->entryRes),
|
|
UInt16GetDatum(key->strategy),
|
|
key->query,
|
|
UInt32GetDatum(key->nentries),
|
|
PointerGetDatum(key->extra_data),
|
|
PointerGetDatum(&key->recheckCurItem)));
|
|
}
|
|
|
|
#define gin_rand() (((double) random()) / ((double) MAX_RANDOM_VALUE))
|
|
#define dropItem(e) ( gin_rand() > ((double)GinFuzzySearchLimit)/((double)((e)->predictNumberResult)) )
|
|
|
|
/*
|
|
* Sets entry->curItem to next heap item pointer for one entry of one scan key,
|
|
* or sets entry->isFinished to TRUE if there are no more.
|
|
*
|
|
* Item pointers must be returned in ascending order.
|
|
*
|
|
* Note: this can return a "lossy page" item pointer, indicating that the
|
|
* entry potentially matches all items on that heap page. However, it is
|
|
* not allowed to return both a lossy page pointer and exact (regular)
|
|
* item pointers for the same page. (Doing so would break the key-combination
|
|
* logic in keyGetItem and scanGetItem; see comment in scanGetItem.) In the
|
|
* current implementation this is guaranteed by the behavior of tidbitmaps.
|
|
*/
|
|
static void
|
|
entryGetItem(Relation index, GinScanEntry entry)
|
|
{
|
|
Assert(!entry->isFinished);
|
|
|
|
if (entry->master)
|
|
{
|
|
entry->isFinished = entry->master->isFinished;
|
|
entry->curItem = entry->master->curItem;
|
|
}
|
|
else if (entry->partialMatch)
|
|
{
|
|
do
|
|
{
|
|
if (entry->partialMatchResult == NULL ||
|
|
entry->offset >= entry->partialMatchResult->ntuples)
|
|
{
|
|
entry->partialMatchResult = tbm_iterate(entry->partialMatchIterator);
|
|
|
|
if (entry->partialMatchResult == NULL)
|
|
{
|
|
ItemPointerSetInvalid(&entry->curItem);
|
|
tbm_end_iterate(entry->partialMatchIterator);
|
|
entry->partialMatchIterator = NULL;
|
|
entry->isFinished = TRUE;
|
|
break;
|
|
}
|
|
|
|
/*
|
|
* reset counter to the beginning of
|
|
* entry->partialMatchResult. Note: entry->offset is still
|
|
* greater than partialMatchResult->ntuples if
|
|
* partialMatchResult is lossy. So, on next call we will get
|
|
* next result from TIDBitmap.
|
|
*/
|
|
entry->offset = 0;
|
|
}
|
|
|
|
if (entry->partialMatchResult->ntuples < 0)
|
|
{
|
|
/*
|
|
* lossy result, so we need to check the whole page
|
|
*/
|
|
ItemPointerSetLossyPage(&entry->curItem,
|
|
entry->partialMatchResult->blockno);
|
|
|
|
/*
|
|
* We might as well fall out of the loop; we could not
|
|
* estimate number of results on this page to support correct
|
|
* reducing of result even if it's enabled
|
|
*/
|
|
break;
|
|
}
|
|
|
|
ItemPointerSet(&entry->curItem,
|
|
entry->partialMatchResult->blockno,
|
|
entry->partialMatchResult->offsets[entry->offset]);
|
|
entry->offset++;
|
|
} while (entry->reduceResult == TRUE && dropItem(entry));
|
|
}
|
|
else if (!BufferIsValid(entry->buffer))
|
|
{
|
|
entry->offset++;
|
|
if (entry->offset <= entry->nlist)
|
|
entry->curItem = entry->list[entry->offset - 1];
|
|
else
|
|
{
|
|
ItemPointerSetInvalid(&entry->curItem);
|
|
entry->isFinished = TRUE;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
do
|
|
{
|
|
entryGetNextItem(index, entry);
|
|
} while (entry->isFinished == FALSE &&
|
|
entry->reduceResult == TRUE &&
|
|
dropItem(entry));
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Sets key->curItem to next heap item pointer for one scan key, advancing
|
|
* past any item pointers <= advancePast.
|
|
* Sets key->isFinished to TRUE if there are no more.
|
|
*
|
|
* On success, key->recheckCurItem is set true iff recheck is needed for this
|
|
* item pointer (including the case where the item pointer is a lossy page
|
|
* pointer).
|
|
*
|
|
* Item pointers must be returned in ascending order.
|
|
*
|
|
* Note: this can return a "lossy page" item pointer, indicating that the
|
|
* key potentially matches all items on that heap page. However, it is
|
|
* not allowed to return both a lossy page pointer and exact (regular)
|
|
* item pointers for the same page. (Doing so would break the key-combination
|
|
* logic in scanGetItem.)
|
|
*/
|
|
static void
|
|
keyGetItem(Relation index, GinState *ginstate, MemoryContext tempCtx,
|
|
GinScanKey key, ItemPointer advancePast)
|
|
{
|
|
ItemPointerData myAdvancePast = *advancePast;
|
|
ItemPointerData curPageLossy;
|
|
uint32 i;
|
|
uint32 lossyEntry;
|
|
bool haveLossyEntry;
|
|
GinScanEntry entry;
|
|
bool res;
|
|
MemoryContext oldCtx;
|
|
|
|
Assert(!key->isFinished);
|
|
|
|
do
|
|
{
|
|
/*
|
|
* Advance any entries that are <= myAdvancePast. In particular,
|
|
* since entry->curItem was initialized with ItemPointerSetMin, this
|
|
* ensures we fetch the first item for each entry on the first call.
|
|
* Then set key->curItem to the minimum of the valid entry curItems.
|
|
*
|
|
* Note: a lossy-page entry is encoded by a ItemPointer with max value
|
|
* for offset (0xffff), so that it will sort after any exact entries
|
|
* for the same page. So we'll prefer to return exact pointers not
|
|
* lossy pointers, which is good. Also, when we advance past an exact
|
|
* entry after processing it, we will not advance past lossy entries
|
|
* for the same page in other keys, which is NECESSARY for correct
|
|
* results (since we might have additional entries for the same page
|
|
* in the first key).
|
|
*/
|
|
ItemPointerSetMax(&key->curItem);
|
|
|
|
for (i = 0; i < key->nentries; i++)
|
|
{
|
|
entry = key->scanEntry + i;
|
|
|
|
while (entry->isFinished == FALSE &&
|
|
compareItemPointers(&entry->curItem, &myAdvancePast) <= 0)
|
|
entryGetItem(index, entry);
|
|
|
|
if (entry->isFinished == FALSE &&
|
|
compareItemPointers(&entry->curItem, &key->curItem) < 0)
|
|
key->curItem = entry->curItem;
|
|
}
|
|
|
|
if (ItemPointerIsMax(&key->curItem))
|
|
{
|
|
/* all entries are finished */
|
|
key->isFinished = TRUE;
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Now key->curItem contains first ItemPointer after previous result.
|
|
* Advance myAdvancePast to this value, so that if the consistentFn
|
|
* rejects the entry and we loop around again, we will advance to the
|
|
* next available item pointer.
|
|
*/
|
|
myAdvancePast = key->curItem;
|
|
|
|
/*
|
|
* Lossy-page entries pose a problem, since we don't know the correct
|
|
* entryRes state to pass to the consistentFn, and we also don't know
|
|
* what its combining logic will be (could be AND, OR, or even NOT).
|
|
* If the logic is OR then the consistentFn might succeed for all
|
|
* items in the lossy page even when none of the other entries match.
|
|
*
|
|
* If we have a single lossy-page entry then we check to see if the
|
|
* consistentFn will succeed with only that entry TRUE. If so,
|
|
* we return a lossy-page pointer to indicate that the whole heap
|
|
* page must be checked. (On the next call, we'll advance past all
|
|
* regular and lossy entries for this page before resuming search,
|
|
* thus ensuring that we never return both regular and lossy pointers
|
|
* for the same page.)
|
|
*
|
|
* This idea could be generalized to more than one lossy-page entry,
|
|
* but ideally lossy-page entries should be infrequent so it would
|
|
* seldom be the case that we have more than one at once. So it
|
|
* doesn't seem worth the extra complexity to optimize that case.
|
|
* If we do find more than one, we just punt and return a lossy-page
|
|
* pointer always.
|
|
*
|
|
* Note that only lossy-page entries pointing to the current item's
|
|
* page should trigger this processing; we might have future lossy
|
|
* pages in the entry array, but they aren't relevant yet.
|
|
*/
|
|
ItemPointerSetLossyPage(&curPageLossy,
|
|
GinItemPointerGetBlockNumber(&key->curItem));
|
|
|
|
lossyEntry = 0;
|
|
haveLossyEntry = false;
|
|
for (i = 0; i < key->nentries; i++)
|
|
{
|
|
entry = key->scanEntry + i;
|
|
if (entry->isFinished == FALSE &&
|
|
compareItemPointers(&entry->curItem, &curPageLossy) == 0)
|
|
{
|
|
if (haveLossyEntry)
|
|
{
|
|
/* Multiple lossy entries, punt */
|
|
key->curItem = curPageLossy;
|
|
key->recheckCurItem = true;
|
|
return;
|
|
}
|
|
lossyEntry = i;
|
|
haveLossyEntry = true;
|
|
}
|
|
}
|
|
|
|
/* prepare for calling consistentFn in temp context */
|
|
oldCtx = MemoryContextSwitchTo(tempCtx);
|
|
|
|
if (haveLossyEntry)
|
|
{
|
|
/* Single lossy-page entry, so see if whole page matches */
|
|
memset(key->entryRes, FALSE, key->nentries);
|
|
key->entryRes[lossyEntry] = TRUE;
|
|
|
|
if (callConsistentFn(ginstate, key))
|
|
{
|
|
/* Yes, so clean up ... */
|
|
MemoryContextSwitchTo(oldCtx);
|
|
MemoryContextReset(tempCtx);
|
|
|
|
/* and return lossy pointer for whole page */
|
|
key->curItem = curPageLossy;
|
|
key->recheckCurItem = true;
|
|
return;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* At this point we know that we don't need to return a lossy
|
|
* whole-page pointer, but we might have matches for individual exact
|
|
* item pointers, possibly in combination with a lossy pointer. Our
|
|
* strategy if there's a lossy pointer is to try the consistentFn both
|
|
* ways and return a hit if it accepts either one (forcing the hit to
|
|
* be marked lossy so it will be rechecked).
|
|
*
|
|
* Prepare entryRes array to be passed to consistentFn.
|
|
*
|
|
* (If key->nentries == 1 then the consistentFn should always succeed,
|
|
* but we must call it anyway to find out the recheck status.)
|
|
*/
|
|
for (i = 0; i < key->nentries; i++)
|
|
{
|
|
entry = key->scanEntry + i;
|
|
if (entry->isFinished == FALSE &&
|
|
compareItemPointers(&entry->curItem, &key->curItem) == 0)
|
|
key->entryRes[i] = TRUE;
|
|
else
|
|
key->entryRes[i] = FALSE;
|
|
}
|
|
if (haveLossyEntry)
|
|
key->entryRes[lossyEntry] = TRUE;
|
|
|
|
res = callConsistentFn(ginstate, key);
|
|
|
|
if (!res && haveLossyEntry)
|
|
{
|
|
/* try the other way for the lossy item */
|
|
key->entryRes[lossyEntry] = FALSE;
|
|
|
|
res = callConsistentFn(ginstate, key);
|
|
}
|
|
|
|
/* clean up after consistentFn calls */
|
|
MemoryContextSwitchTo(oldCtx);
|
|
MemoryContextReset(tempCtx);
|
|
|
|
/* If we matched a lossy entry, force recheckCurItem = true */
|
|
if (haveLossyEntry)
|
|
key->recheckCurItem = true;
|
|
} while (!res);
|
|
}
|
|
|
|
|
|
/*
|
|
* Get ItemPointer of next heap row to be checked from pending list.
|
|
* Returns false if there are no more. On pages with several rows
|
|
* it returns each row separately, on page with part of heap row returns
|
|
* per page data. pos->firstOffset and pos->lastOffset points
|
|
* fraction of tuples for current heap row.
|
|
*
|
|
* The pendingBuffer is presumed pinned and share-locked on entry, and is
|
|
* pinned and share-locked on success exit. On failure exit it's released.
|
|
*/
|
|
static bool
|
|
scanGetCandidate(IndexScanDesc scan, pendingPosition *pos)
|
|
{
|
|
OffsetNumber maxoff;
|
|
Page page;
|
|
IndexTuple itup;
|
|
|
|
ItemPointerSetInvalid(&pos->item);
|
|
for (;;)
|
|
{
|
|
page = BufferGetPage(pos->pendingBuffer);
|
|
|
|
maxoff = PageGetMaxOffsetNumber(page);
|
|
if (pos->firstOffset > maxoff)
|
|
{
|
|
BlockNumber blkno = GinPageGetOpaque(page)->rightlink;
|
|
|
|
if (blkno == InvalidBlockNumber)
|
|
{
|
|
UnlockReleaseBuffer(pos->pendingBuffer);
|
|
pos->pendingBuffer = InvalidBuffer;
|
|
|
|
return false;
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
* Here we must prevent deletion of next page by insertcleanup
|
|
* process, which may be trying to obtain exclusive lock on
|
|
* current page. So, we lock next page before releasing the
|
|
* current one
|
|
*/
|
|
Buffer tmpbuf = ReadBuffer(scan->indexRelation, blkno);
|
|
|
|
LockBuffer(tmpbuf, GIN_SHARE);
|
|
UnlockReleaseBuffer(pos->pendingBuffer);
|
|
|
|
pos->pendingBuffer = tmpbuf;
|
|
pos->firstOffset = FirstOffsetNumber;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, pos->firstOffset));
|
|
pos->item = itup->t_tid;
|
|
if (GinPageHasFullRow(page))
|
|
{
|
|
/*
|
|
* find itempointer to the next row
|
|
*/
|
|
for (pos->lastOffset = pos->firstOffset + 1; pos->lastOffset <= maxoff; pos->lastOffset++)
|
|
{
|
|
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, pos->lastOffset));
|
|
if (!ItemPointerEquals(&pos->item, &itup->t_tid))
|
|
break;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
* All itempointers are the same on this page
|
|
*/
|
|
pos->lastOffset = maxoff + 1;
|
|
}
|
|
|
|
/*
|
|
* Now pos->firstOffset points to the first tuple of current heap
|
|
* row, pos->lastOffset points to the first tuple of second heap
|
|
* row (or to the end of page)
|
|
*/
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* Scan page from current tuple (off) up till the first of:
|
|
* - match is found (then returns true)
|
|
* - no later match is possible
|
|
* - tuple's attribute number is not equal to entry's attrnum
|
|
* - reach end of page
|
|
*/
|
|
static bool
|
|
matchPartialInPendingList(GinState *ginstate, Page page,
|
|
OffsetNumber off, OffsetNumber maxoff,
|
|
Datum value, OffsetNumber attrnum,
|
|
Datum *datum, bool *datumExtracted,
|
|
StrategyNumber strategy,
|
|
Pointer extra_data)
|
|
{
|
|
IndexTuple itup;
|
|
int32 cmp;
|
|
|
|
while (off < maxoff)
|
|
{
|
|
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
|
|
if (attrnum != gintuple_get_attrnum(ginstate, itup))
|
|
return false;
|
|
|
|
if (datumExtracted[off - 1] == false)
|
|
{
|
|
datum[off - 1] = gin_index_getattr(ginstate, itup);
|
|
datumExtracted[off - 1] = true;
|
|
}
|
|
|
|
/*----------
|
|
* Check partial match.
|
|
* case cmp == 0 => match
|
|
* case cmp > 0 => not match and end scan (no later match possible)
|
|
* case cmp < 0 => not match and continue scan
|
|
*----------
|
|
*/
|
|
cmp = DatumGetInt32(FunctionCall4(&ginstate->comparePartialFn[attrnum - 1],
|
|
value,
|
|
datum[off - 1],
|
|
UInt16GetDatum(strategy),
|
|
PointerGetDatum(extra_data)));
|
|
if (cmp == 0)
|
|
return true;
|
|
else if (cmp > 0)
|
|
return false;
|
|
|
|
off++;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
static bool
|
|
hasAllMatchingKeys(GinScanOpaque so, pendingPosition *pos)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < so->nkeys; i++)
|
|
if (pos->hasMatchKey[i] == false)
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* Sets entryRes array for each key by looking at
|
|
* every entry per indexed value (heap's row) in pending list.
|
|
* returns true if at least one of datum was matched by key's entry
|
|
*
|
|
* The pendingBuffer is presumed pinned and share-locked on entry.
|
|
*/
|
|
static bool
|
|
collectDatumForItem(IndexScanDesc scan, pendingPosition *pos)
|
|
{
|
|
GinScanOpaque so = (GinScanOpaque) scan->opaque;
|
|
OffsetNumber attrnum;
|
|
Page page;
|
|
IndexTuple itup;
|
|
int i,
|
|
j;
|
|
|
|
/*
|
|
* Reset entryRes
|
|
*/
|
|
for (i = 0; i < so->nkeys; i++)
|
|
{
|
|
GinScanKey key = so->keys + i;
|
|
|
|
memset(key->entryRes, FALSE, key->nentries);
|
|
}
|
|
memset(pos->hasMatchKey, FALSE, so->nkeys);
|
|
|
|
for (;;)
|
|
{
|
|
Datum datum[BLCKSZ / sizeof(IndexTupleData)];
|
|
bool datumExtracted[BLCKSZ / sizeof(IndexTupleData)];
|
|
|
|
Assert(pos->lastOffset > pos->firstOffset);
|
|
memset(datumExtracted + pos->firstOffset - 1, 0, sizeof(bool) * (pos->lastOffset - pos->firstOffset));
|
|
|
|
page = BufferGetPage(pos->pendingBuffer);
|
|
|
|
for (i = 0; i < so->nkeys; i++)
|
|
{
|
|
GinScanKey key = so->keys + i;
|
|
|
|
for (j = 0; j < key->nentries; j++)
|
|
{
|
|
OffsetNumber StopLow = pos->firstOffset,
|
|
StopHigh = pos->lastOffset,
|
|
StopMiddle;
|
|
GinScanEntry entry = key->scanEntry + j;
|
|
|
|
/* already true - do not extra work */
|
|
if (key->entryRes[j])
|
|
continue;
|
|
|
|
/*
|
|
* Interested tuples are from pos->firstOffset to
|
|
* pos->lastOffset and they are ordered by (attnum, Datum) as
|
|
* it's done in entry tree So we could use binary search to
|
|
* prevent linear scanning
|
|
*/
|
|
while (StopLow < StopHigh)
|
|
{
|
|
StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
|
|
|
|
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, StopMiddle));
|
|
attrnum = gintuple_get_attrnum(&so->ginstate, itup);
|
|
|
|
if (key->attnum < attrnum)
|
|
StopHigh = StopMiddle;
|
|
else if (key->attnum > attrnum)
|
|
StopLow = StopMiddle + 1;
|
|
else
|
|
{
|
|
int res;
|
|
|
|
if (datumExtracted[StopMiddle - 1] == false)
|
|
{
|
|
datum[StopMiddle - 1] = gin_index_getattr(&so->ginstate, itup);
|
|
datumExtracted[StopMiddle - 1] = true;
|
|
}
|
|
res = compareEntries(&so->ginstate,
|
|
entry->attnum,
|
|
entry->entry,
|
|
datum[StopMiddle - 1]);
|
|
|
|
if (res == 0)
|
|
{
|
|
/*
|
|
* The exact match causes, so we just scan from
|
|
* current position to find a partial match. See
|
|
* comment above about tuple's ordering.
|
|
*/
|
|
if (entry->isPartialMatch)
|
|
key->entryRes[j] =
|
|
matchPartialInPendingList(&so->ginstate,
|
|
page, StopMiddle,
|
|
pos->lastOffset,
|
|
entry->entry,
|
|
entry->attnum,
|
|
datum,
|
|
datumExtracted,
|
|
entry->strategy,
|
|
entry->extra_data);
|
|
else
|
|
key->entryRes[j] = true;
|
|
break;
|
|
}
|
|
else if (res < 0)
|
|
StopHigh = StopMiddle;
|
|
else
|
|
StopLow = StopMiddle + 1;
|
|
}
|
|
}
|
|
|
|
if (StopLow >= StopHigh && entry->isPartialMatch)
|
|
{
|
|
/*
|
|
* The exact match wasn't found, so we need to start scan
|
|
* from first tuple greater then current entry See comment
|
|
* above about tuple's ordering.
|
|
*/
|
|
key->entryRes[j] =
|
|
matchPartialInPendingList(&so->ginstate,
|
|
page, StopHigh,
|
|
pos->lastOffset,
|
|
entry->entry,
|
|
entry->attnum,
|
|
datum,
|
|
datumExtracted,
|
|
entry->strategy,
|
|
entry->extra_data);
|
|
}
|
|
|
|
pos->hasMatchKey[i] |= key->entryRes[j];
|
|
}
|
|
}
|
|
|
|
pos->firstOffset = pos->lastOffset;
|
|
|
|
if (GinPageHasFullRow(page))
|
|
{
|
|
/*
|
|
* We scan all values from one tuple, go to next one
|
|
*/
|
|
|
|
return hasAllMatchingKeys(so, pos);
|
|
}
|
|
else
|
|
{
|
|
ItemPointerData item = pos->item;
|
|
|
|
/*
|
|
* need to get next portion of tuples of row containing on several
|
|
* pages
|
|
*/
|
|
|
|
if (scanGetCandidate(scan, pos) == false || !ItemPointerEquals(&pos->item, &item))
|
|
elog(ERROR, "Could not process tuple"); /* XXX should not be
|
|
* here ! */
|
|
}
|
|
}
|
|
|
|
return hasAllMatchingKeys(so, pos);
|
|
}
|
|
|
|
/*
|
|
* Collect all matched rows from pending list in bitmap
|
|
*/
|
|
static void
|
|
scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids)
|
|
{
|
|
GinScanOpaque so = (GinScanOpaque) scan->opaque;
|
|
MemoryContext oldCtx;
|
|
bool recheck,
|
|
match;
|
|
int i;
|
|
pendingPosition pos;
|
|
Buffer metabuffer = ReadBuffer(scan->indexRelation, GIN_METAPAGE_BLKNO);
|
|
BlockNumber blkno;
|
|
|
|
*ntids = 0;
|
|
|
|
LockBuffer(metabuffer, GIN_SHARE);
|
|
blkno = GinPageGetMeta(BufferGetPage(metabuffer))->head;
|
|
|
|
/*
|
|
* fetch head of list before unlocking metapage. head page must be pinned
|
|
* to prevent deletion by vacuum process
|
|
*/
|
|
if (blkno == InvalidBlockNumber)
|
|
{
|
|
/* No pending list, so proceed with normal scan */
|
|
UnlockReleaseBuffer(metabuffer);
|
|
return;
|
|
}
|
|
|
|
pos.pendingBuffer = ReadBuffer(scan->indexRelation, blkno);
|
|
LockBuffer(pos.pendingBuffer, GIN_SHARE);
|
|
pos.firstOffset = FirstOffsetNumber;
|
|
UnlockReleaseBuffer(metabuffer);
|
|
pos.hasMatchKey = palloc(sizeof(bool) * so->nkeys);
|
|
|
|
/*
|
|
* loop for each heap row. scanGetCandidate returns full row or row's
|
|
* tuples from first page.
|
|
*/
|
|
while (scanGetCandidate(scan, &pos))
|
|
{
|
|
/*
|
|
* Check entries in tuple and setup entryRes array If tuples of heap's
|
|
* row are placed on several pages collectDatumForItem will read all
|
|
* of that pages.
|
|
*/
|
|
if (!collectDatumForItem(scan, &pos))
|
|
continue;
|
|
|
|
/*
|
|
* Matching of entries of one row is finished, so check row using
|
|
* consistent functions.
|
|
*/
|
|
oldCtx = MemoryContextSwitchTo(so->tempCtx);
|
|
recheck = false;
|
|
match = true;
|
|
|
|
for (i = 0; i < so->nkeys; i++)
|
|
{
|
|
GinScanKey key = so->keys + i;
|
|
|
|
if (!callConsistentFn(&so->ginstate, key))
|
|
{
|
|
match = false;
|
|
break;
|
|
}
|
|
recheck |= key->recheckCurItem;
|
|
}
|
|
|
|
MemoryContextSwitchTo(oldCtx);
|
|
MemoryContextReset(so->tempCtx);
|
|
|
|
if (match)
|
|
{
|
|
tbm_add_tuples(tbm, &pos.item, 1, recheck);
|
|
(*ntids)++;
|
|
}
|
|
}
|
|
|
|
pfree(pos.hasMatchKey);
|
|
}
|
|
|
|
/*
|
|
* Get next heap item pointer (after advancePast) from scan.
|
|
* Returns true if anything found.
|
|
* On success, *item and *recheck are set.
|
|
*
|
|
* Note: this is very nearly the same logic as in keyGetItem(), except
|
|
* that we know the keys are to be combined with AND logic, whereas in
|
|
* keyGetItem() the combination logic is known only to the consistentFn.
|
|
*/
|
|
static bool
|
|
scanGetItem(IndexScanDesc scan, ItemPointer advancePast,
|
|
ItemPointerData *item, bool *recheck)
|
|
{
|
|
GinScanOpaque so = (GinScanOpaque) scan->opaque;
|
|
ItemPointerData myAdvancePast = *advancePast;
|
|
uint32 i;
|
|
bool match;
|
|
|
|
for (;;)
|
|
{
|
|
/*
|
|
* Advance any keys that are <= myAdvancePast. In particular,
|
|
* since key->curItem was initialized with ItemPointerSetMin, this
|
|
* ensures we fetch the first item for each key on the first call.
|
|
* Then set *item to the minimum of the key curItems.
|
|
*
|
|
* Note: a lossy-page entry is encoded by a ItemPointer with max value
|
|
* for offset (0xffff), so that it will sort after any exact entries
|
|
* for the same page. So we'll prefer to return exact pointers not
|
|
* lossy pointers, which is good. Also, when we advance past an exact
|
|
* entry after processing it, we will not advance past lossy entries
|
|
* for the same page in other keys, which is NECESSARY for correct
|
|
* results (since we might have additional entries for the same page
|
|
* in the first key).
|
|
*/
|
|
ItemPointerSetMax(item);
|
|
|
|
for (i = 0; i < so->nkeys; i++)
|
|
{
|
|
GinScanKey key = so->keys + i;
|
|
|
|
while (key->isFinished == FALSE &&
|
|
compareItemPointers(&key->curItem, &myAdvancePast) <= 0)
|
|
keyGetItem(scan->indexRelation, &so->ginstate, so->tempCtx,
|
|
key, &myAdvancePast);
|
|
|
|
if (key->isFinished)
|
|
return FALSE; /* finished one of keys */
|
|
|
|
if (compareItemPointers(&key->curItem, item) < 0)
|
|
*item = key->curItem;
|
|
}
|
|
|
|
Assert(!ItemPointerIsMax(item));
|
|
|
|
/*----------
|
|
* Now *item contains first ItemPointer after previous result.
|
|
*
|
|
* The item is a valid hit only if all the keys returned either
|
|
* that exact TID, or a lossy reference to the same page.
|
|
*
|
|
* This logic works only if a keyGetItem stream can never contain both
|
|
* exact and lossy pointers for the same page. Else we could have a
|
|
* case like
|
|
*
|
|
* stream 1 stream 2
|
|
* ... ...
|
|
* 42/6 42/7
|
|
* 50/1 42/0xffff
|
|
* ... ...
|
|
*
|
|
* We would conclude that 42/6 is not a match and advance stream 1,
|
|
* thus never detecting the match to the lossy pointer in stream 2.
|
|
* (keyGetItem has a similar problem versus entryGetItem.)
|
|
*----------
|
|
*/
|
|
match = true;
|
|
for (i = 0; i < so->nkeys; i++)
|
|
{
|
|
GinScanKey key = so->keys + i;
|
|
|
|
if (compareItemPointers(item, &key->curItem) == 0)
|
|
continue;
|
|
if (ItemPointerIsLossyPage(&key->curItem) &&
|
|
GinItemPointerGetBlockNumber(&key->curItem) ==
|
|
GinItemPointerGetBlockNumber(item))
|
|
continue;
|
|
match = false;
|
|
break;
|
|
}
|
|
|
|
if (match)
|
|
break;
|
|
|
|
/*
|
|
* No hit. Update myAdvancePast to this TID, so that on the next
|
|
* pass we'll move to the next possible entry.
|
|
*/
|
|
myAdvancePast = *item;
|
|
}
|
|
|
|
/*
|
|
* We must return recheck = true if any of the keys are marked recheck.
|
|
*/
|
|
*recheck = false;
|
|
for (i = 0; i < so->nkeys; i++)
|
|
{
|
|
GinScanKey key = so->keys + i;
|
|
|
|
if (key->recheckCurItem)
|
|
{
|
|
*recheck = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
#define GinIsNewKey(s) ( ((GinScanOpaque) scan->opaque)->keys == NULL )
|
|
#define GinIsVoidRes(s) ( ((GinScanOpaque) scan->opaque)->isVoidRes == true )
|
|
|
|
Datum
|
|
gingetbitmap(PG_FUNCTION_ARGS)
|
|
{
|
|
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
|
|
TIDBitmap *tbm = (TIDBitmap *) PG_GETARG_POINTER(1);
|
|
int64 ntids;
|
|
ItemPointerData iptr;
|
|
bool recheck;
|
|
|
|
if (GinIsNewKey(scan))
|
|
newScanKey(scan);
|
|
|
|
if (GinIsVoidRes(scan))
|
|
PG_RETURN_INT64(0);
|
|
|
|
ntids = 0;
|
|
|
|
/*
|
|
* First, scan the pending list and collect any matching entries into the
|
|
* bitmap. After we scan a pending item, some other backend could post it
|
|
* into the main index, and so we might visit it a second time during the
|
|
* main scan. This is okay because we'll just re-set the same bit in the
|
|
* bitmap. (The possibility of duplicate visits is a major reason why GIN
|
|
* can't support the amgettuple API, however.) Note that it would not do
|
|
* to scan the main index before the pending list, since concurrent
|
|
* cleanup could then make us miss entries entirely.
|
|
*/
|
|
scanPendingInsert(scan, tbm, &ntids);
|
|
|
|
/*
|
|
* Now scan the main index.
|
|
*/
|
|
startScan(scan);
|
|
|
|
ItemPointerSetMin(&iptr);
|
|
|
|
for (;;)
|
|
{
|
|
CHECK_FOR_INTERRUPTS();
|
|
|
|
if (!scanGetItem(scan, &iptr, &iptr, &recheck))
|
|
break;
|
|
|
|
if (ItemPointerIsLossyPage(&iptr))
|
|
tbm_add_page(tbm, ItemPointerGetBlockNumber(&iptr));
|
|
else
|
|
tbm_add_tuples(tbm, &iptr, 1, recheck);
|
|
ntids++;
|
|
}
|
|
|
|
PG_RETURN_INT64(ntids);
|
|
}
|