mirror of
https://github.com/postgres/postgres.git
synced 2025-05-08 07:21:33 +03:00
Further optimize GIN multi-key searches.
When skipping over some items in a posting tree, re-find the new location by descending the tree from root, rather than walking the right links. This can save a lot of I/O. Heavily modified from Alexander Korotkov's fast scan patch.
This commit is contained in:
parent
8440897b38
commit
626a120656
src
@ -1639,16 +1639,15 @@ ginInsertItemPointers(Relation index, BlockNumber rootBlkno,
|
||||
* Starts a new scan on a posting tree.
|
||||
*/
|
||||
GinBtreeStack *
|
||||
ginScanBeginPostingTree(Relation index, BlockNumber rootBlkno)
|
||||
ginScanBeginPostingTree(GinBtree btree, Relation index, BlockNumber rootBlkno)
|
||||
{
|
||||
GinBtreeData btree;
|
||||
GinBtreeStack *stack;
|
||||
|
||||
ginPrepareDataScan(&btree, index, rootBlkno);
|
||||
ginPrepareDataScan(btree, index, rootBlkno);
|
||||
|
||||
btree.fullScan = TRUE;
|
||||
btree->fullScan = TRUE;
|
||||
|
||||
stack = ginFindLeafPage(&btree, TRUE);
|
||||
stack = ginFindLeafPage(btree, TRUE);
|
||||
|
||||
return stack;
|
||||
}
|
||||
|
@ -99,12 +99,13 @@ static void
|
||||
scanPostingTree(Relation index, GinScanEntry scanEntry,
|
||||
BlockNumber rootPostingTree)
|
||||
{
|
||||
GinBtreeData btree;
|
||||
GinBtreeStack *stack;
|
||||
Buffer buffer;
|
||||
Page page;
|
||||
|
||||
/* Descend to the leftmost leaf page */
|
||||
stack = ginScanBeginPostingTree(index, rootPostingTree);
|
||||
stack = ginScanBeginPostingTree(&btree, index, rootPostingTree);
|
||||
buffer = stack->buffer;
|
||||
IncrBufferRefCount(buffer); /* prevent unpin in freeGinBtreeStack */
|
||||
|
||||
@ -412,7 +413,8 @@ restartScanEntry:
|
||||
LockBuffer(stackEntry->buffer, GIN_UNLOCK);
|
||||
needUnlock = FALSE;
|
||||
|
||||
stack = ginScanBeginPostingTree(ginstate->index, rootPostingTree);
|
||||
stack = ginScanBeginPostingTree(&entry->btree, ginstate->index,
|
||||
rootPostingTree);
|
||||
entry->buffer = stack->buffer;
|
||||
|
||||
/*
|
||||
@ -506,8 +508,60 @@ entryLoadMoreItems(GinState *ginstate, GinScanEntry entry, ItemPointerData advan
|
||||
{
|
||||
Page page;
|
||||
int i;
|
||||
bool stepright;
|
||||
|
||||
if (!BufferIsValid(entry->buffer))
|
||||
{
|
||||
entry->isFinished = true;
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* We have two strategies for finding the correct page: step right from
|
||||
* the current page, or descend the tree again from the root. If
|
||||
* advancePast equals the current item, the next matching item should be
|
||||
* on the next page, so we step right. Otherwise, descend from root.
|
||||
*/
|
||||
if (ginCompareItemPointers(&entry->curItem, &advancePast) == 0)
|
||||
{
|
||||
stepright = true;
|
||||
LockBuffer(entry->buffer, GIN_SHARE);
|
||||
}
|
||||
else
|
||||
{
|
||||
GinBtreeStack *stack;
|
||||
|
||||
ReleaseBuffer(entry->buffer);
|
||||
|
||||
/*
|
||||
* Set the search key, and find the correct leaf page.
|
||||
*/
|
||||
if (ItemPointerIsLossyPage(&advancePast))
|
||||
{
|
||||
ItemPointerSet(&entry->btree.itemptr,
|
||||
GinItemPointerGetBlockNumber(&advancePast) + 1,
|
||||
FirstOffsetNumber);
|
||||
}
|
||||
else
|
||||
{
|
||||
entry->btree.itemptr = advancePast;
|
||||
entry->btree.itemptr.ip_posid++;
|
||||
}
|
||||
entry->btree.fullScan = false;
|
||||
stack = ginFindLeafPage(&entry->btree, true);
|
||||
|
||||
/* we don't need the stack, just the buffer. */
|
||||
entry->buffer = stack->buffer;
|
||||
IncrBufferRefCount(entry->buffer);
|
||||
freeGinBtreeStack(stack);
|
||||
stepright = false;
|
||||
}
|
||||
|
||||
elog(DEBUG2, "entryLoadMoreItems, %u/%u, skip: %d",
|
||||
GinItemPointerGetBlockNumber(&advancePast),
|
||||
GinItemPointerGetOffsetNumber(&advancePast),
|
||||
!stepright);
|
||||
|
||||
LockBuffer(entry->buffer, GIN_SHARE);
|
||||
page = BufferGetPage(entry->buffer);
|
||||
for (;;)
|
||||
{
|
||||
@ -519,30 +573,34 @@ entryLoadMoreItems(GinState *ginstate, GinScanEntry entry, ItemPointerData advan
|
||||
entry->nlist = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* We've processed all the entries on this page. If it was the last
|
||||
* page in the tree, we're done.
|
||||
*/
|
||||
if (GinPageRightMost(page))
|
||||
if (stepright)
|
||||
{
|
||||
UnlockReleaseBuffer(entry->buffer);
|
||||
entry->buffer = InvalidBuffer;
|
||||
entry->isFinished = TRUE;
|
||||
return;
|
||||
/*
|
||||
* We've processed all the entries on this page. If it was the last
|
||||
* page in the tree, we're done.
|
||||
*/
|
||||
if (GinPageRightMost(page))
|
||||
{
|
||||
UnlockReleaseBuffer(entry->buffer);
|
||||
entry->buffer = InvalidBuffer;
|
||||
entry->isFinished = TRUE;
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Step to next page, following the right link. then find the first
|
||||
* ItemPointer greater than advancePast.
|
||||
*/
|
||||
entry->buffer = ginStepRight(entry->buffer,
|
||||
ginstate->index,
|
||||
GIN_SHARE);
|
||||
page = BufferGetPage(entry->buffer);
|
||||
}
|
||||
stepright = true;
|
||||
|
||||
if (GinPageGetOpaque(page)->flags & GIN_DELETED)
|
||||
continue; /* page was deleted by concurrent vacuum */
|
||||
|
||||
/*
|
||||
* Step to next page, following the right link. then find the first
|
||||
* ItemPointer greater than advancePast.
|
||||
*/
|
||||
entry->buffer = ginStepRight(entry->buffer,
|
||||
ginstate->index,
|
||||
GIN_SHARE);
|
||||
page = BufferGetPage(entry->buffer);
|
||||
|
||||
/*
|
||||
* The first item > advancePast might not be on this page, but
|
||||
* somewhere to the right, if the page was split, or a non-match from
|
||||
@ -566,8 +624,16 @@ entryLoadMoreItems(GinState *ginstate, GinScanEntry entry, ItemPointerData advan
|
||||
{
|
||||
if (ginCompareItemPointers(&advancePast, &entry->list[i]) < 0)
|
||||
{
|
||||
LockBuffer(entry->buffer, GIN_UNLOCK);
|
||||
entry->offset = i;
|
||||
|
||||
if (GinPageRightMost(page))
|
||||
{
|
||||
/* after processing the copied items, we're done. */
|
||||
UnlockReleaseBuffer(entry->buffer);
|
||||
entry->buffer = InvalidBuffer;
|
||||
}
|
||||
else
|
||||
LockBuffer(entry->buffer, GIN_UNLOCK);
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -677,7 +743,10 @@ entryGetItem(GinState *ginstate, GinScanEntry entry,
|
||||
}
|
||||
else if (!BufferIsValid(entry->buffer))
|
||||
{
|
||||
/* A posting list from an entry tuple */
|
||||
/*
|
||||
* A posting list from an entry tuple, or the last page of a posting
|
||||
* tree.
|
||||
*/
|
||||
do
|
||||
{
|
||||
if (entry->offset >= entry->nlist)
|
||||
|
@ -702,7 +702,7 @@ extern void GinPageDeletePostingItem(Page page, OffsetNumber offset);
|
||||
extern void ginInsertItemPointers(Relation index, BlockNumber rootBlkno,
|
||||
ItemPointerData *items, uint32 nitem,
|
||||
GinStatsData *buildStats);
|
||||
extern GinBtreeStack *ginScanBeginPostingTree(Relation index, BlockNumber rootBlkno);
|
||||
extern GinBtreeStack *ginScanBeginPostingTree(GinBtree btree, Relation index, BlockNumber rootBlkno);
|
||||
extern void ginDataFillRoot(GinBtree btree, Page root, BlockNumber lblkno, Page lpage, BlockNumber rblkno, Page rpage);
|
||||
extern void ginPrepareDataScan(GinBtree btree, Relation index, BlockNumber rootBlkno);
|
||||
|
||||
@ -802,6 +802,7 @@ typedef struct GinScanEntryData
|
||||
bool isFinished;
|
||||
bool reduceResult;
|
||||
uint32 predictNumberResult;
|
||||
GinBtreeData btree;
|
||||
} GinScanEntryData;
|
||||
|
||||
typedef struct GinScanOpaqueData
|
||||
|
Loading…
x
Reference in New Issue
Block a user