1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-07 00:36:50 +03:00

Predicate locking in GIN index

Predicate locks are used on per page basis only if fastupdate = off, in
opposite case predicate lock on pending list will effectively lock whole index,
to reduce locking overhead, just lock a relation. Entry and posting trees are
essentially B-tree, so locks are acquired on leaf pages only.

Author: Shubham Barai with some editorization by me and Dmitry Ivanov
Review by: Alexander Korotkov, Dmitry Ivanov, Fedor Sigaev
Discussion: https://www.postgresql.org/message-id/flat/CALxAEPt5sWW+EwTaKUGFL5_XFcZ0MuGBcyJ70oqbWqr42YKR8Q@mail.gmail.com
This commit is contained in:
Teodor Sigaev
2018-03-30 14:23:17 +03:00
parent 019fa576ca
commit 43d1ed60fd
11 changed files with 1054 additions and 18 deletions

View File

@ -17,6 +17,7 @@
#include "access/gin_private.h"
#include "access/ginxlog.h"
#include "access/xloginsert.h"
#include "storage/predicate.h"
#include "miscadmin.h"
#include "utils/memutils.h"
#include "utils/rel.h"
@ -515,6 +516,19 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
btree->fillRoot(btree, newrootpg,
BufferGetBlockNumber(lbuffer), newlpage,
BufferGetBlockNumber(rbuffer), newrpage);
if (GinPageIsLeaf(BufferGetPage(stack->buffer)))
{
PredicateLockPageSplit(btree->index,
BufferGetBlockNumber(stack->buffer),
BufferGetBlockNumber(lbuffer));
PredicateLockPageSplit(btree->index,
BufferGetBlockNumber(stack->buffer),
BufferGetBlockNumber(rbuffer));
}
}
else
{
@ -524,6 +538,14 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
GinPageGetOpaque(newrpage)->rightlink = savedRightLink;
GinPageGetOpaque(newlpage)->flags |= GIN_INCOMPLETE_SPLIT;
GinPageGetOpaque(newlpage)->rightlink = BufferGetBlockNumber(rbuffer);
if (GinPageIsLeaf(BufferGetPage(stack->buffer)))
{
PredicateLockPageSplit(btree->index,
BufferGetBlockNumber(stack->buffer),
BufferGetBlockNumber(rbuffer));
}
}
/*

View File

@ -19,6 +19,7 @@
#include "access/xloginsert.h"
#include "lib/ilist.h"
#include "miscadmin.h"
#include "storage/predicate.h"
#include "utils/rel.h"
/*
@ -1759,7 +1760,7 @@ leafRepackItems(disassembledLeaf *leaf, ItemPointer remaining)
*/
BlockNumber
createPostingTree(Relation index, ItemPointerData *items, uint32 nitems,
GinStatsData *buildStats)
GinStatsData *buildStats, Buffer entrybuffer)
{
BlockNumber blkno;
Buffer buffer;
@ -1810,6 +1811,12 @@ createPostingTree(Relation index, ItemPointerData *items, uint32 nitems,
page = BufferGetPage(buffer);
blkno = BufferGetBlockNumber(buffer);
/*
* Copy a predicate lock from entry tree leaf (containing posting list)
* to posting tree.
*/
PredicateLockPageSplit(index, BufferGetBlockNumber(entrybuffer), blkno);
START_CRIT_SECTION();
PageRestoreTempPage(tmppage, page);
@ -1904,6 +1911,7 @@ ginInsertItemPointers(Relation index, BlockNumber rootBlkno,
btree.itemptr = insertdata.items[insertdata.curitem];
stack = ginFindLeafPage(&btree, false, NULL);
GinCheckForSerializableConflictIn(btree.index, NULL, stack->buffer);
ginInsertValue(&btree, stack, &insertdata, buildStats);
}
}

View File

@ -17,8 +17,10 @@
#include "access/gin_private.h"
#include "access/relscan.h"
#include "miscadmin.h"
#include "storage/predicate.h"
#include "utils/datum.h"
#include "utils/memutils.h"
#include "utils/rel.h"
/* GUC parameter */
int GinFuzzySearchLimit = 0;
@ -33,11 +35,25 @@ typedef struct pendingPosition
} pendingPosition;
/*
* Place predicate lock on GIN page if needed.
*/
static void
GinPredicateLockPage(Relation index, BlockNumber blkno, Snapshot snapshot)
{
/*
* When fast update is on then no need in locking pages, because we
* anyway need to lock the whole index.
*/
if (!GinGetUseFastUpdate(index))
PredicateLockPage(index, blkno, snapshot);
}
/*
* Goes to the next page if current offset is outside of bounds
*/
static bool
moveRightIfItNeeded(GinBtreeData *btree, GinBtreeStack *stack)
moveRightIfItNeeded(GinBtreeData *btree, GinBtreeStack *stack, Snapshot snapshot)
{
Page page = BufferGetPage(stack->buffer);
@ -52,6 +68,7 @@ moveRightIfItNeeded(GinBtreeData *btree, GinBtreeStack *stack)
stack->buffer = ginStepRight(stack->buffer, btree->index, GIN_SHARE);
stack->blkno = BufferGetBlockNumber(stack->buffer);
stack->off = FirstOffsetNumber;
GinPredicateLockPage(btree->index, stack->blkno, snapshot);
}
return true;
@ -73,6 +90,7 @@ scanPostingTree(Relation index, GinScanEntry scanEntry,
/* Descend to the leftmost leaf page */
stack = ginScanBeginPostingTree(&btree, index, rootPostingTree, snapshot);
buffer = stack->buffer;
IncrBufferRefCount(buffer); /* prevent unpin in freeGinBtreeStack */
freeGinBtreeStack(stack);
@ -82,6 +100,11 @@ scanPostingTree(Relation index, GinScanEntry scanEntry,
*/
for (;;)
{
/*
* Predicate lock each leaf page in posting tree
*/
GinPredicateLockPage(index, BufferGetBlockNumber(buffer), snapshot);
page = BufferGetPage(buffer);
if ((GinPageGetOpaque(page)->flags & GIN_DELETED) == 0)
{
@ -131,6 +154,12 @@ collectMatchBitmap(GinBtreeData *btree, GinBtreeStack *stack,
attnum = scanEntry->attnum;
attr = TupleDescAttr(btree->ginstate->origTupdesc, attnum - 1);
/*
* Predicate lock entry leaf page, following pages will be locked by
* moveRightIfItNeeded()
*/
GinPredicateLockPage(btree->index, stack->buffer, snapshot);
for (;;)
{
Page page;
@ -141,7 +170,7 @@ collectMatchBitmap(GinBtreeData *btree, GinBtreeStack *stack,
/*
* stack->off points to the interested entry, buffer is already locked
*/
if (moveRightIfItNeeded(btree, stack) == false)
if (moveRightIfItNeeded(btree, stack, snapshot) == false)
return true;
page = BufferGetPage(stack->buffer);
@ -250,7 +279,7 @@ collectMatchBitmap(GinBtreeData *btree, GinBtreeStack *stack,
Datum newDatum;
GinNullCategory newCategory;
if (moveRightIfItNeeded(btree, stack) == false)
if (moveRightIfItNeeded(btree, stack, snapshot) == false)
elog(ERROR, "lost saved point in index"); /* must not happen !!! */
page = BufferGetPage(stack->buffer);
@ -323,6 +352,7 @@ restartScanEntry:
ginstate);
stackEntry = ginFindLeafPage(&btreeEntry, true, snapshot);
page = BufferGetPage(stackEntry->buffer);
/* ginFindLeafPage() will have already checked snapshot age. */
needUnlock = true;
@ -370,6 +400,10 @@ restartScanEntry:
{
IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stackEntry->off));
/* Predicate lock visited entry leaf page */
GinPredicateLockPage(ginstate->index,
BufferGetBlockNumber(stackEntry->buffer), snapshot);
if (GinIsPostingTree(itup))
{
BlockNumber rootPostingTree = GinGetPostingTree(itup);
@ -391,6 +425,12 @@ restartScanEntry:
rootPostingTree, snapshot);
entry->buffer = stack->buffer;
/*
* Predicate lock visited posting tree page, following pages
* will be locked by moveRightIfItNeeded or entryLoadMoreItems
*/
GinPredicateLockPage(ginstate->index, BufferGetBlockNumber(entry->buffer), snapshot);
/*
* We keep buffer pinned because we need to prevent deletion of
* page during scan. See GIN's vacuum implementation. RefCount is
@ -493,7 +533,7 @@ startScanKey(GinState *ginstate, GinScanOpaque so, GinScanKey key)
for (i = 0; i < key->nentries - 1; i++)
{
/* Pass all entries <= i as FALSE, and the rest as MAYBE */
/* Pass all entries <= i as false, and the rest as MAYBE */
for (j = 0; j <= i; j++)
key->entryRes[entryIndexes[j]] = GIN_FALSE;
for (j = i + 1; j < key->nentries; j++)
@ -633,6 +673,8 @@ entryLoadMoreItems(GinState *ginstate, GinScanEntry entry,
entry->btree.fullScan = false;
stack = ginFindLeafPage(&entry->btree, true, snapshot);
GinPredicateLockPage(ginstate->index, BufferGetBlockNumber(stack->buffer), snapshot);
/* we don't need the stack, just the buffer. */
entry->buffer = stack->buffer;
IncrBufferRefCount(entry->buffer);
@ -677,6 +719,10 @@ entryLoadMoreItems(GinState *ginstate, GinScanEntry entry,
entry->buffer = ginStepRight(entry->buffer,
ginstate->index,
GIN_SHARE);
GinPredicateLockPage(ginstate->index, BufferGetBlockNumber(entry->buffer), snapshot);
page = BufferGetPage(entry->buffer);
}
stepright = true;
@ -1038,8 +1084,8 @@ keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key,
* lossy page even when none of the other entries match.
*
* Our strategy is to call the tri-state consistent function, with the
* lossy-page entries set to MAYBE, and all the other entries FALSE. If it
* returns FALSE, none of the lossy items alone are enough for a match, so
* lossy-page entries set to MAYBE, and all the other entries false. If it
* returns false, none of the lossy items alone are enough for a match, so
* we don't need to return a lossy-page pointer. Otherwise, return a
* lossy-page pointer to indicate that the whole heap page must be
* checked. (On subsequent calls, we'll do nothing until minItem is past
@ -1700,7 +1746,8 @@ collectMatchesForHeapRow(IndexScanDesc scan, pendingPosition *pos)
}
/*
* Collect all matched rows from pending list into bitmap
* Collect all matched rows from pending list into bitmap. Also function
* takes PendingLockRelation if it's needed.
*/
static void
scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids)
@ -1730,9 +1777,24 @@ scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids)
{
/* No pending list, so proceed with normal scan */
UnlockReleaseBuffer(metabuffer);
/*
* If fast update is enabled, we acquire a predicate lock on the entire
* relation as fast update postpones the insertion of tuples into index
* structure due to which we can't detect rw conflicts.
*/
if (GinGetUseFastUpdate(scan->indexRelation))
PredicateLockRelation(scan->indexRelation, scan->xs_snapshot);
return;
}
/*
* Pending list is not empty, we need to lock the index doesn't despite on
* fastupdate state
*/
PredicateLockRelation(scan->indexRelation, scan->xs_snapshot);
pos.pendingBuffer = ReadBuffer(scan->indexRelation, blkno);
LockBuffer(pos.pendingBuffer, GIN_SHARE);
pos.firstOffset = FirstOffsetNumber;

View File

@ -22,6 +22,7 @@
#include "storage/bufmgr.h"
#include "storage/smgr.h"
#include "storage/indexfsm.h"
#include "storage/predicate.h"
#include "utils/memutils.h"
#include "utils/rel.h"
@ -48,7 +49,7 @@ static IndexTuple
addItemPointersToLeafTuple(GinState *ginstate,
IndexTuple old,
ItemPointerData *items, uint32 nitem,
GinStatsData *buildStats)
GinStatsData *buildStats, Buffer buffer)
{
OffsetNumber attnum;
Datum key;
@ -99,7 +100,8 @@ addItemPointersToLeafTuple(GinState *ginstate,
postingRoot = createPostingTree(ginstate->index,
oldItems,
oldNPosting,
buildStats);
buildStats,
buffer);
/* Now insert the TIDs-to-be-added into the posting tree */
ginInsertItemPointers(ginstate->index, postingRoot,
@ -127,7 +129,7 @@ static IndexTuple
buildFreshLeafTuple(GinState *ginstate,
OffsetNumber attnum, Datum key, GinNullCategory category,
ItemPointerData *items, uint32 nitem,
GinStatsData *buildStats)
GinStatsData *buildStats, Buffer buffer)
{
IndexTuple res = NULL;
GinPostingList *compressedList;
@ -157,7 +159,7 @@ buildFreshLeafTuple(GinState *ginstate,
* Initialize a new posting tree with the TIDs.
*/
postingRoot = createPostingTree(ginstate->index, items, nitem,
buildStats);
buildStats, buffer);
/* And save the root link in the result tuple */
GinSetPostingTree(res, postingRoot);
@ -217,17 +219,19 @@ ginEntryInsert(GinState *ginstate,
return;
}
GinCheckForSerializableConflictIn(btree.index, NULL, stack->buffer);
/* modify an existing leaf entry */
itup = addItemPointersToLeafTuple(ginstate, itup,
items, nitem, buildStats);
items, nitem, buildStats, stack->buffer);
insertdata.isDelete = true;
}
else
{
GinCheckForSerializableConflictIn(btree.index, NULL, stack->buffer);
/* no match, so construct a new leaf entry */
itup = buildFreshLeafTuple(ginstate, attnum, key, category,
items, nitem, buildStats);
items, nitem, buildStats, stack->buffer);
}
/* Insert the new or modified leaf tuple */
@ -513,6 +517,18 @@ gininsert(Relation index, Datum *values, bool *isnull,
memset(&collector, 0, sizeof(GinTupleCollector));
/*
* With fastupdate on each scan and each insert begin with access to
* pending list, so it effectively lock entire index. In this case
* we aquire predicate lock and check for conflicts over index relation,
* and hope that it will reduce locking overhead.
*
* Do not use GinCheckForSerializableConflictIn() here, because
* it will do nothing (it does actual work only with fastupdate off).
* Check for conflicts for entire index.
*/
CheckForSerializableConflictIn(index, NULL, InvalidBuffer);
for (i = 0; i < ginstate->origTupdesc->natts; i++)
ginHeapTupleFastCollect(ginstate, &collector,
(OffsetNumber) (i + 1),
@ -523,6 +539,16 @@ gininsert(Relation index, Datum *values, bool *isnull,
}
else
{
GinStatsData stats;
/*
* Fastupdate is off but if pending list isn't empty then we need to
* check conflicts with PredicateLockRelation in scanPendingInsert().
*/
ginGetStats(index, &stats);
if (stats.nPendingPages > 0)
CheckForSerializableConflictIn(index, NULL, InvalidBuffer);
for (i = 0; i < ginstate->origTupdesc->natts; i++)
ginHeapTupleInsert(ginstate, (OffsetNumber) (i + 1),
values[i], isnull[i],

View File

@ -23,6 +23,7 @@
#include "miscadmin.h"
#include "storage/indexfsm.h"
#include "storage/lmgr.h"
#include "storage/predicate.h"
#include "utils/builtins.h"
#include "utils/index_selfuncs.h"
#include "utils/typcache.h"
@ -49,7 +50,7 @@ ginhandler(PG_FUNCTION_ARGS)
amroutine->amsearchnulls = false;
amroutine->amstorage = true;
amroutine->amclusterable = false;
amroutine->ampredlocks = false;
amroutine->ampredlocks = true;
amroutine->amcanparallel = false;
amroutine->amkeytype = InvalidOid;
@ -716,3 +717,10 @@ ginUpdateStats(Relation index, const GinStatsData *stats)
END_CRIT_SECTION();
}
void
GinCheckForSerializableConflictIn(Relation relation, HeapTuple tuple, Buffer buffer)
{
if (!GinGetUseFastUpdate(relation))
CheckForSerializableConflictIn(relation, tuple, buffer);
}

View File

@ -22,6 +22,7 @@
#include "postmaster/autovacuum.h"
#include "storage/indexfsm.h"
#include "storage/lmgr.h"
#include "storage/predicate.h"
#include "utils/memutils.h"
struct GinVacuumState
@ -153,11 +154,18 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn
LockBuffer(lBuffer, GIN_EXCLUSIVE);
page = BufferGetPage(dBuffer);
rightlink = GinPageGetOpaque(page)->rightlink;
/*
* Any insert which would have gone on the leaf block will now go to its
* right sibling.
*/
PredicateLockPageCombine(gvs->index, deleteBlkno, rightlink);
START_CRIT_SECTION();
/* Unlink the page by changing left sibling's rightlink */
page = BufferGetPage(dBuffer);
rightlink = GinPageGetOpaque(page)->rightlink;
page = BufferGetPage(lBuffer);
GinPageGetOpaque(page)->rightlink = rightlink;