mirror of
https://github.com/postgres/postgres.git
synced 2025-04-21 12:05:57 +03:00
Reduce page locking in GIN vacuum
GIN vacuum during cleaning posting tree can lock this whole tree for a long time with by holding LockBufferForCleanup() on root. Patch changes it with two ways: first, cleanup lock will be taken only if there is an empty page (which should be deleted) and, second, it tries to lock only subtree, not the whole posting tree. Author: Andrey Borodin with minor editorization by me Reviewed-by: Jeff Davis, me https://commitfest.postgresql.org/13/896/
This commit is contained in:
parent
73561013e5
commit
218f51584d
@ -314,10 +314,17 @@ deleted.
|
||||
The previous paragraph's reasoning only applies to searches, and only to
|
||||
posting trees. To protect from inserters following a downlink to a deleted
|
||||
page, vacuum simply locks out all concurrent insertions to the posting tree,
|
||||
by holding a super-exclusive lock on the posting tree root. Inserters hold a
|
||||
pin on the root page, but searches do not, so while new searches cannot begin
|
||||
while root page is locked, any already-in-progress scans can continue
|
||||
concurrently with vacuum. In the entry tree, we never delete pages.
|
||||
by holding a super-exclusive lock on the parent page of subtree with deletable
|
||||
pages. Inserters hold a pin on the root page, but searches do not, so while
|
||||
new searches cannot begin while root page is locked, any already-in-progress
|
||||
scans can continue concurrently with vacuum in corresponding subtree of
|
||||
posting tree. To exclude interference with readers vacuum takes exclusive
|
||||
locks in a depth-first scan in left-to-right order of page tuples. Leftmost
|
||||
page is never deleted. Thus before deleting any page we obtain exclusive
|
||||
lock on any left page, effectively excluding deadlock with any reader, despite
|
||||
taking parent lock before current and left lock after current. We take left
|
||||
lock not for a concurrency reasons, but rather in need to mark page dirty.
|
||||
In the entry tree, we never delete pages.
|
||||
|
||||
(This is quite different from the mechanism the btree indexam uses to make
|
||||
page-deletions safe; it stamps the deleted pages with an XID and keeps the
|
||||
|
@ -31,7 +31,7 @@ static void ginFinishSplit(GinBtree btree, GinBtreeStack *stack,
|
||||
/*
|
||||
* Lock buffer by needed method for search.
|
||||
*/
|
||||
static int
|
||||
int
|
||||
ginTraverseLock(Buffer buffer, bool searchMode)
|
||||
{
|
||||
Page page;
|
||||
|
@ -109,75 +109,17 @@ xlogVacuumPage(Relation index, Buffer buffer)
|
||||
PageSetLSN(page, recptr);
|
||||
}
|
||||
|
||||
static bool
|
||||
ginVacuumPostingTreeLeaves(GinVacuumState *gvs, BlockNumber blkno, bool isRoot, Buffer *rootBuffer)
|
||||
|
||||
typedef struct DataPageDeleteStack
|
||||
{
|
||||
Buffer buffer;
|
||||
Page page;
|
||||
bool hasVoidPage = FALSE;
|
||||
MemoryContext oldCxt;
|
||||
struct DataPageDeleteStack *child;
|
||||
struct DataPageDeleteStack *parent;
|
||||
|
||||
buffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, blkno,
|
||||
RBM_NORMAL, gvs->strategy);
|
||||
page = BufferGetPage(buffer);
|
||||
BlockNumber blkno; /* current block number */
|
||||
BlockNumber leftBlkno; /* rightest non-deleted page on left */
|
||||
bool isRoot;
|
||||
} DataPageDeleteStack;
|
||||
|
||||
/*
|
||||
* We should be sure that we don't concurrent with inserts, insert process
|
||||
* never release root page until end (but it can unlock it and lock
|
||||
* again). New scan can't start but previously started ones work
|
||||
* concurrently.
|
||||
*/
|
||||
if (isRoot)
|
||||
LockBufferForCleanup(buffer);
|
||||
else
|
||||
LockBuffer(buffer, GIN_EXCLUSIVE);
|
||||
|
||||
Assert(GinPageIsData(page));
|
||||
|
||||
if (GinPageIsLeaf(page))
|
||||
{
|
||||
oldCxt = MemoryContextSwitchTo(gvs->tmpCxt);
|
||||
ginVacuumPostingTreeLeaf(gvs->index, buffer, gvs);
|
||||
MemoryContextSwitchTo(oldCxt);
|
||||
MemoryContextReset(gvs->tmpCxt);
|
||||
|
||||
/* if root is a leaf page, we don't desire further processing */
|
||||
if (!isRoot && !hasVoidPage && GinDataLeafPageIsEmpty(page))
|
||||
hasVoidPage = TRUE;
|
||||
}
|
||||
else
|
||||
{
|
||||
OffsetNumber i;
|
||||
bool isChildHasVoid = FALSE;
|
||||
|
||||
for (i = FirstOffsetNumber; i <= GinPageGetOpaque(page)->maxoff; i++)
|
||||
{
|
||||
PostingItem *pitem = GinDataPageGetPostingItem(page, i);
|
||||
|
||||
if (ginVacuumPostingTreeLeaves(gvs, PostingItemGetBlockNumber(pitem), FALSE, NULL))
|
||||
isChildHasVoid = TRUE;
|
||||
}
|
||||
|
||||
if (isChildHasVoid)
|
||||
hasVoidPage = TRUE;
|
||||
}
|
||||
|
||||
/*
|
||||
* if we have root and there are empty pages in tree, then we don't
|
||||
* release lock to go further processing and guarantee that tree is unused
|
||||
*/
|
||||
if (!(isRoot && hasVoidPage))
|
||||
{
|
||||
UnlockReleaseBuffer(buffer);
|
||||
}
|
||||
else
|
||||
{
|
||||
Assert(rootBuffer);
|
||||
*rootBuffer = buffer;
|
||||
}
|
||||
|
||||
return hasVoidPage;
|
||||
}
|
||||
|
||||
/*
|
||||
* Delete a posting tree page.
|
||||
@ -194,8 +136,13 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn
|
||||
BlockNumber rightlink;
|
||||
|
||||
/*
|
||||
* Lock the pages in the same order as an insertion would, to avoid
|
||||
* deadlocks: left, then right, then parent.
|
||||
* This function MUST be called only if someone of parent pages hold
|
||||
* exclusive cleanup lock. This guarantees that no insertions currently
|
||||
* happen in this subtree. Caller also acquire Exclusive lock on deletable
|
||||
* page and is acquiring and releasing exclusive lock on left page before.
|
||||
* Left page was locked and released. Then parent and this page are locked.
|
||||
* We acquire left page lock here only to mark page dirty after changing
|
||||
* right pointer.
|
||||
*/
|
||||
lBuffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, leftBlkno,
|
||||
RBM_NORMAL, gvs->strategy);
|
||||
@ -205,10 +152,6 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn
|
||||
RBM_NORMAL, gvs->strategy);
|
||||
|
||||
LockBuffer(lBuffer, GIN_EXCLUSIVE);
|
||||
LockBuffer(dBuffer, GIN_EXCLUSIVE);
|
||||
if (!isParentRoot) /* parent is already locked by
|
||||
* LockBufferForCleanup() */
|
||||
LockBuffer(pBuffer, GIN_EXCLUSIVE);
|
||||
|
||||
START_CRIT_SECTION();
|
||||
|
||||
@ -272,26 +215,15 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn
|
||||
PageSetLSN(BufferGetPage(lBuffer), recptr);
|
||||
}
|
||||
|
||||
if (!isParentRoot)
|
||||
LockBuffer(pBuffer, GIN_UNLOCK);
|
||||
ReleaseBuffer(pBuffer);
|
||||
UnlockReleaseBuffer(lBuffer);
|
||||
UnlockReleaseBuffer(dBuffer);
|
||||
ReleaseBuffer(dBuffer);
|
||||
|
||||
END_CRIT_SECTION();
|
||||
|
||||
gvs->result->pages_deleted++;
|
||||
}
|
||||
|
||||
typedef struct DataPageDeleteStack
|
||||
{
|
||||
struct DataPageDeleteStack *child;
|
||||
struct DataPageDeleteStack *parent;
|
||||
|
||||
BlockNumber blkno; /* current block number */
|
||||
BlockNumber leftBlkno; /* rightest non-deleted page on left */
|
||||
bool isRoot;
|
||||
} DataPageDeleteStack;
|
||||
|
||||
/*
|
||||
* scans posting tree and deletes empty pages
|
||||
@ -325,6 +257,10 @@ ginScanToDelete(GinVacuumState *gvs, BlockNumber blkno, bool isRoot,
|
||||
|
||||
buffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, blkno,
|
||||
RBM_NORMAL, gvs->strategy);
|
||||
|
||||
if(!isRoot)
|
||||
LockBuffer(buffer, GIN_EXCLUSIVE);
|
||||
|
||||
page = BufferGetPage(buffer);
|
||||
|
||||
Assert(GinPageIsData(page));
|
||||
@ -359,6 +295,9 @@ ginScanToDelete(GinVacuumState *gvs, BlockNumber blkno, bool isRoot,
|
||||
}
|
||||
}
|
||||
|
||||
if(!isRoot)
|
||||
LockBuffer(buffer, GIN_UNLOCK);
|
||||
|
||||
ReleaseBuffer(buffer);
|
||||
|
||||
if (!meDelete)
|
||||
@ -367,37 +306,124 @@ ginScanToDelete(GinVacuumState *gvs, BlockNumber blkno, bool isRoot,
|
||||
return meDelete;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Scan through posting tree, delete empty tuples from leaf pages.
|
||||
* Also, this function collects empty subtrees (with all empty leafs).
|
||||
* For parents of these subtrees CleanUp lock is taken, then we call
|
||||
* ScanToDelete. This is done for every inner page, which points to
|
||||
* empty subtree.
|
||||
*/
|
||||
static bool
|
||||
ginVacuumPostingTreeLeaves(GinVacuumState *gvs, BlockNumber blkno, bool isRoot)
|
||||
{
|
||||
Buffer buffer;
|
||||
Page page;
|
||||
bool hasVoidPage = FALSE;
|
||||
MemoryContext oldCxt;
|
||||
|
||||
buffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, blkno,
|
||||
RBM_NORMAL, gvs->strategy);
|
||||
page = BufferGetPage(buffer);
|
||||
|
||||
ginTraverseLock(buffer,false);
|
||||
|
||||
Assert(GinPageIsData(page));
|
||||
|
||||
if (GinPageIsLeaf(page))
|
||||
{
|
||||
oldCxt = MemoryContextSwitchTo(gvs->tmpCxt);
|
||||
ginVacuumPostingTreeLeaf(gvs->index, buffer, gvs);
|
||||
MemoryContextSwitchTo(oldCxt);
|
||||
MemoryContextReset(gvs->tmpCxt);
|
||||
|
||||
/* if root is a leaf page, we don't desire further processing */
|
||||
if (GinDataLeafPageIsEmpty(page))
|
||||
hasVoidPage = TRUE;
|
||||
|
||||
UnlockReleaseBuffer(buffer);
|
||||
|
||||
return hasVoidPage;
|
||||
}
|
||||
else
|
||||
{
|
||||
OffsetNumber i;
|
||||
bool hasEmptyChild = FALSE;
|
||||
bool hasNonEmptyChild = FALSE;
|
||||
OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff;
|
||||
BlockNumber* children = palloc(sizeof(BlockNumber) * (maxoff + 1));
|
||||
|
||||
/*
|
||||
* Read all children BlockNumbers.
|
||||
* Not sure it is safe if there are many concurrent vacuums.
|
||||
*/
|
||||
|
||||
for (i = FirstOffsetNumber; i <= maxoff; i++)
|
||||
{
|
||||
PostingItem *pitem = GinDataPageGetPostingItem(page, i);
|
||||
|
||||
children[i] = PostingItemGetBlockNumber(pitem);
|
||||
}
|
||||
|
||||
UnlockReleaseBuffer(buffer);
|
||||
|
||||
for (i = FirstOffsetNumber; i <= maxoff; i++)
|
||||
{
|
||||
if (ginVacuumPostingTreeLeaves(gvs, children[i], FALSE))
|
||||
hasEmptyChild = TRUE;
|
||||
else
|
||||
hasNonEmptyChild = TRUE;
|
||||
}
|
||||
|
||||
pfree(children);
|
||||
|
||||
vacuum_delay_point();
|
||||
|
||||
/*
|
||||
* All subtree is empty - just return TRUE to indicate that parent must
|
||||
* do a cleanup. Unless we are ROOT an there is way to go upper.
|
||||
*/
|
||||
|
||||
if(hasEmptyChild && !hasNonEmptyChild && !isRoot)
|
||||
return TRUE;
|
||||
|
||||
if(hasEmptyChild)
|
||||
{
|
||||
DataPageDeleteStack root,
|
||||
*ptr,
|
||||
*tmp;
|
||||
|
||||
buffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, blkno,
|
||||
RBM_NORMAL, gvs->strategy);
|
||||
LockBufferForCleanup(buffer);
|
||||
|
||||
memset(&root, 0, sizeof(DataPageDeleteStack));
|
||||
root.leftBlkno = InvalidBlockNumber;
|
||||
root.isRoot = TRUE;
|
||||
|
||||
ginScanToDelete(gvs, blkno, TRUE, &root, InvalidOffsetNumber);
|
||||
|
||||
ptr = root.child;
|
||||
|
||||
while (ptr)
|
||||
{
|
||||
tmp = ptr->child;
|
||||
pfree(ptr);
|
||||
ptr = tmp;
|
||||
}
|
||||
|
||||
UnlockReleaseBuffer(buffer);
|
||||
}
|
||||
|
||||
/* Here we have deleted all empty subtrees */
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
ginVacuumPostingTree(GinVacuumState *gvs, BlockNumber rootBlkno)
|
||||
{
|
||||
Buffer rootBuffer = InvalidBuffer;
|
||||
DataPageDeleteStack root,
|
||||
*ptr,
|
||||
*tmp;
|
||||
|
||||
if (ginVacuumPostingTreeLeaves(gvs, rootBlkno, TRUE, &rootBuffer) == FALSE)
|
||||
{
|
||||
Assert(rootBuffer == InvalidBuffer);
|
||||
return;
|
||||
}
|
||||
|
||||
memset(&root, 0, sizeof(DataPageDeleteStack));
|
||||
root.leftBlkno = InvalidBlockNumber;
|
||||
root.isRoot = TRUE;
|
||||
|
||||
vacuum_delay_point();
|
||||
|
||||
ginScanToDelete(gvs, rootBlkno, TRUE, &root, InvalidOffsetNumber);
|
||||
|
||||
ptr = root.child;
|
||||
while (ptr)
|
||||
{
|
||||
tmp = ptr->child;
|
||||
pfree(ptr);
|
||||
ptr = tmp;
|
||||
}
|
||||
|
||||
UnlockReleaseBuffer(rootBuffer);
|
||||
ginVacuumPostingTreeLeaves(gvs, rootBlkno, TRUE);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -471,4 +471,6 @@ ginCompareItemPointers(ItemPointer a, ItemPointer b)
|
||||
return -1;
|
||||
}
|
||||
|
||||
extern int ginTraverseLock(Buffer buffer, bool searchMode);
|
||||
|
||||
#endif /* GIN_PRIVATE_H */
|
||||
|
Loading…
x
Reference in New Issue
Block a user