mirror of
https://github.com/postgres/postgres.git
synced 2025-04-21 12:05:57 +03:00
Reduce page locking in GIN vacuum
GIN vacuum during cleaning posting tree can lock this whole tree for a long time with by holding LockBufferForCleanup() on root. Patch changes it with two ways: first, cleanup lock will be taken only if there is an empty page (which should be deleted) and, second, it tries to lock only subtree, not the whole posting tree. Author: Andrey Borodin with minor editorization by me Reviewed-by: Jeff Davis, me https://commitfest.postgresql.org/13/896/
This commit is contained in:
parent
73561013e5
commit
218f51584d
@ -314,10 +314,17 @@ deleted.
|
|||||||
The previous paragraph's reasoning only applies to searches, and only to
|
The previous paragraph's reasoning only applies to searches, and only to
|
||||||
posting trees. To protect from inserters following a downlink to a deleted
|
posting trees. To protect from inserters following a downlink to a deleted
|
||||||
page, vacuum simply locks out all concurrent insertions to the posting tree,
|
page, vacuum simply locks out all concurrent insertions to the posting tree,
|
||||||
by holding a super-exclusive lock on the posting tree root. Inserters hold a
|
by holding a super-exclusive lock on the parent page of subtree with deletable
|
||||||
pin on the root page, but searches do not, so while new searches cannot begin
|
pages. Inserters hold a pin on the root page, but searches do not, so while
|
||||||
while root page is locked, any already-in-progress scans can continue
|
new searches cannot begin while root page is locked, any already-in-progress
|
||||||
concurrently with vacuum. In the entry tree, we never delete pages.
|
scans can continue concurrently with vacuum in corresponding subtree of
|
||||||
|
posting tree. To exclude interference with readers vacuum takes exclusive
|
||||||
|
locks in a depth-first scan in left-to-right order of page tuples. Leftmost
|
||||||
|
page is never deleted. Thus before deleting any page we obtain exclusive
|
||||||
|
lock on any left page, effectively excluding deadlock with any reader, despite
|
||||||
|
taking parent lock before current and left lock after current. We take left
|
||||||
|
lock not for a concurrency reasons, but rather in need to mark page dirty.
|
||||||
|
In the entry tree, we never delete pages.
|
||||||
|
|
||||||
(This is quite different from the mechanism the btree indexam uses to make
|
(This is quite different from the mechanism the btree indexam uses to make
|
||||||
page-deletions safe; it stamps the deleted pages with an XID and keeps the
|
page-deletions safe; it stamps the deleted pages with an XID and keeps the
|
||||||
|
@ -31,7 +31,7 @@ static void ginFinishSplit(GinBtree btree, GinBtreeStack *stack,
|
|||||||
/*
|
/*
|
||||||
* Lock buffer by needed method for search.
|
* Lock buffer by needed method for search.
|
||||||
*/
|
*/
|
||||||
static int
|
int
|
||||||
ginTraverseLock(Buffer buffer, bool searchMode)
|
ginTraverseLock(Buffer buffer, bool searchMode)
|
||||||
{
|
{
|
||||||
Page page;
|
Page page;
|
||||||
|
@ -109,75 +109,17 @@ xlogVacuumPage(Relation index, Buffer buffer)
|
|||||||
PageSetLSN(page, recptr);
|
PageSetLSN(page, recptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool
|
|
||||||
ginVacuumPostingTreeLeaves(GinVacuumState *gvs, BlockNumber blkno, bool isRoot, Buffer *rootBuffer)
|
typedef struct DataPageDeleteStack
|
||||||
{
|
{
|
||||||
Buffer buffer;
|
struct DataPageDeleteStack *child;
|
||||||
Page page;
|
struct DataPageDeleteStack *parent;
|
||||||
bool hasVoidPage = FALSE;
|
|
||||||
MemoryContext oldCxt;
|
|
||||||
|
|
||||||
buffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, blkno,
|
BlockNumber blkno; /* current block number */
|
||||||
RBM_NORMAL, gvs->strategy);
|
BlockNumber leftBlkno; /* rightest non-deleted page on left */
|
||||||
page = BufferGetPage(buffer);
|
bool isRoot;
|
||||||
|
} DataPageDeleteStack;
|
||||||
|
|
||||||
/*
|
|
||||||
* We should be sure that we don't concurrent with inserts, insert process
|
|
||||||
* never release root page until end (but it can unlock it and lock
|
|
||||||
* again). New scan can't start but previously started ones work
|
|
||||||
* concurrently.
|
|
||||||
*/
|
|
||||||
if (isRoot)
|
|
||||||
LockBufferForCleanup(buffer);
|
|
||||||
else
|
|
||||||
LockBuffer(buffer, GIN_EXCLUSIVE);
|
|
||||||
|
|
||||||
Assert(GinPageIsData(page));
|
|
||||||
|
|
||||||
if (GinPageIsLeaf(page))
|
|
||||||
{
|
|
||||||
oldCxt = MemoryContextSwitchTo(gvs->tmpCxt);
|
|
||||||
ginVacuumPostingTreeLeaf(gvs->index, buffer, gvs);
|
|
||||||
MemoryContextSwitchTo(oldCxt);
|
|
||||||
MemoryContextReset(gvs->tmpCxt);
|
|
||||||
|
|
||||||
/* if root is a leaf page, we don't desire further processing */
|
|
||||||
if (!isRoot && !hasVoidPage && GinDataLeafPageIsEmpty(page))
|
|
||||||
hasVoidPage = TRUE;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
OffsetNumber i;
|
|
||||||
bool isChildHasVoid = FALSE;
|
|
||||||
|
|
||||||
for (i = FirstOffsetNumber; i <= GinPageGetOpaque(page)->maxoff; i++)
|
|
||||||
{
|
|
||||||
PostingItem *pitem = GinDataPageGetPostingItem(page, i);
|
|
||||||
|
|
||||||
if (ginVacuumPostingTreeLeaves(gvs, PostingItemGetBlockNumber(pitem), FALSE, NULL))
|
|
||||||
isChildHasVoid = TRUE;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (isChildHasVoid)
|
|
||||||
hasVoidPage = TRUE;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* if we have root and there are empty pages in tree, then we don't
|
|
||||||
* release lock to go further processing and guarantee that tree is unused
|
|
||||||
*/
|
|
||||||
if (!(isRoot && hasVoidPage))
|
|
||||||
{
|
|
||||||
UnlockReleaseBuffer(buffer);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
Assert(rootBuffer);
|
|
||||||
*rootBuffer = buffer;
|
|
||||||
}
|
|
||||||
|
|
||||||
return hasVoidPage;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Delete a posting tree page.
|
* Delete a posting tree page.
|
||||||
@ -194,8 +136,13 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn
|
|||||||
BlockNumber rightlink;
|
BlockNumber rightlink;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Lock the pages in the same order as an insertion would, to avoid
|
* This function MUST be called only if someone of parent pages hold
|
||||||
* deadlocks: left, then right, then parent.
|
* exclusive cleanup lock. This guarantees that no insertions currently
|
||||||
|
* happen in this subtree. Caller also acquire Exclusive lock on deletable
|
||||||
|
* page and is acquiring and releasing exclusive lock on left page before.
|
||||||
|
* Left page was locked and released. Then parent and this page are locked.
|
||||||
|
* We acquire left page lock here only to mark page dirty after changing
|
||||||
|
* right pointer.
|
||||||
*/
|
*/
|
||||||
lBuffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, leftBlkno,
|
lBuffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, leftBlkno,
|
||||||
RBM_NORMAL, gvs->strategy);
|
RBM_NORMAL, gvs->strategy);
|
||||||
@ -205,10 +152,6 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn
|
|||||||
RBM_NORMAL, gvs->strategy);
|
RBM_NORMAL, gvs->strategy);
|
||||||
|
|
||||||
LockBuffer(lBuffer, GIN_EXCLUSIVE);
|
LockBuffer(lBuffer, GIN_EXCLUSIVE);
|
||||||
LockBuffer(dBuffer, GIN_EXCLUSIVE);
|
|
||||||
if (!isParentRoot) /* parent is already locked by
|
|
||||||
* LockBufferForCleanup() */
|
|
||||||
LockBuffer(pBuffer, GIN_EXCLUSIVE);
|
|
||||||
|
|
||||||
START_CRIT_SECTION();
|
START_CRIT_SECTION();
|
||||||
|
|
||||||
@ -272,26 +215,15 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn
|
|||||||
PageSetLSN(BufferGetPage(lBuffer), recptr);
|
PageSetLSN(BufferGetPage(lBuffer), recptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!isParentRoot)
|
|
||||||
LockBuffer(pBuffer, GIN_UNLOCK);
|
|
||||||
ReleaseBuffer(pBuffer);
|
ReleaseBuffer(pBuffer);
|
||||||
UnlockReleaseBuffer(lBuffer);
|
UnlockReleaseBuffer(lBuffer);
|
||||||
UnlockReleaseBuffer(dBuffer);
|
ReleaseBuffer(dBuffer);
|
||||||
|
|
||||||
END_CRIT_SECTION();
|
END_CRIT_SECTION();
|
||||||
|
|
||||||
gvs->result->pages_deleted++;
|
gvs->result->pages_deleted++;
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef struct DataPageDeleteStack
|
|
||||||
{
|
|
||||||
struct DataPageDeleteStack *child;
|
|
||||||
struct DataPageDeleteStack *parent;
|
|
||||||
|
|
||||||
BlockNumber blkno; /* current block number */
|
|
||||||
BlockNumber leftBlkno; /* rightest non-deleted page on left */
|
|
||||||
bool isRoot;
|
|
||||||
} DataPageDeleteStack;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* scans posting tree and deletes empty pages
|
* scans posting tree and deletes empty pages
|
||||||
@ -325,6 +257,10 @@ ginScanToDelete(GinVacuumState *gvs, BlockNumber blkno, bool isRoot,
|
|||||||
|
|
||||||
buffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, blkno,
|
buffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, blkno,
|
||||||
RBM_NORMAL, gvs->strategy);
|
RBM_NORMAL, gvs->strategy);
|
||||||
|
|
||||||
|
if(!isRoot)
|
||||||
|
LockBuffer(buffer, GIN_EXCLUSIVE);
|
||||||
|
|
||||||
page = BufferGetPage(buffer);
|
page = BufferGetPage(buffer);
|
||||||
|
|
||||||
Assert(GinPageIsData(page));
|
Assert(GinPageIsData(page));
|
||||||
@ -359,6 +295,9 @@ ginScanToDelete(GinVacuumState *gvs, BlockNumber blkno, bool isRoot,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(!isRoot)
|
||||||
|
LockBuffer(buffer, GIN_UNLOCK);
|
||||||
|
|
||||||
ReleaseBuffer(buffer);
|
ReleaseBuffer(buffer);
|
||||||
|
|
||||||
if (!meDelete)
|
if (!meDelete)
|
||||||
@ -367,37 +306,124 @@ ginScanToDelete(GinVacuumState *gvs, BlockNumber blkno, bool isRoot,
|
|||||||
return meDelete;
|
return meDelete;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Scan through posting tree, delete empty tuples from leaf pages.
|
||||||
|
* Also, this function collects empty subtrees (with all empty leafs).
|
||||||
|
* For parents of these subtrees CleanUp lock is taken, then we call
|
||||||
|
* ScanToDelete. This is done for every inner page, which points to
|
||||||
|
* empty subtree.
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
ginVacuumPostingTreeLeaves(GinVacuumState *gvs, BlockNumber blkno, bool isRoot)
|
||||||
|
{
|
||||||
|
Buffer buffer;
|
||||||
|
Page page;
|
||||||
|
bool hasVoidPage = FALSE;
|
||||||
|
MemoryContext oldCxt;
|
||||||
|
|
||||||
|
buffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, blkno,
|
||||||
|
RBM_NORMAL, gvs->strategy);
|
||||||
|
page = BufferGetPage(buffer);
|
||||||
|
|
||||||
|
ginTraverseLock(buffer,false);
|
||||||
|
|
||||||
|
Assert(GinPageIsData(page));
|
||||||
|
|
||||||
|
if (GinPageIsLeaf(page))
|
||||||
|
{
|
||||||
|
oldCxt = MemoryContextSwitchTo(gvs->tmpCxt);
|
||||||
|
ginVacuumPostingTreeLeaf(gvs->index, buffer, gvs);
|
||||||
|
MemoryContextSwitchTo(oldCxt);
|
||||||
|
MemoryContextReset(gvs->tmpCxt);
|
||||||
|
|
||||||
|
/* if root is a leaf page, we don't desire further processing */
|
||||||
|
if (GinDataLeafPageIsEmpty(page))
|
||||||
|
hasVoidPage = TRUE;
|
||||||
|
|
||||||
|
UnlockReleaseBuffer(buffer);
|
||||||
|
|
||||||
|
return hasVoidPage;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
OffsetNumber i;
|
||||||
|
bool hasEmptyChild = FALSE;
|
||||||
|
bool hasNonEmptyChild = FALSE;
|
||||||
|
OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff;
|
||||||
|
BlockNumber* children = palloc(sizeof(BlockNumber) * (maxoff + 1));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Read all children BlockNumbers.
|
||||||
|
* Not sure it is safe if there are many concurrent vacuums.
|
||||||
|
*/
|
||||||
|
|
||||||
|
for (i = FirstOffsetNumber; i <= maxoff; i++)
|
||||||
|
{
|
||||||
|
PostingItem *pitem = GinDataPageGetPostingItem(page, i);
|
||||||
|
|
||||||
|
children[i] = PostingItemGetBlockNumber(pitem);
|
||||||
|
}
|
||||||
|
|
||||||
|
UnlockReleaseBuffer(buffer);
|
||||||
|
|
||||||
|
for (i = FirstOffsetNumber; i <= maxoff; i++)
|
||||||
|
{
|
||||||
|
if (ginVacuumPostingTreeLeaves(gvs, children[i], FALSE))
|
||||||
|
hasEmptyChild = TRUE;
|
||||||
|
else
|
||||||
|
hasNonEmptyChild = TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
pfree(children);
|
||||||
|
|
||||||
|
vacuum_delay_point();
|
||||||
|
|
||||||
|
/*
|
||||||
|
* All subtree is empty - just return TRUE to indicate that parent must
|
||||||
|
* do a cleanup. Unless we are ROOT an there is way to go upper.
|
||||||
|
*/
|
||||||
|
|
||||||
|
if(hasEmptyChild && !hasNonEmptyChild && !isRoot)
|
||||||
|
return TRUE;
|
||||||
|
|
||||||
|
if(hasEmptyChild)
|
||||||
|
{
|
||||||
|
DataPageDeleteStack root,
|
||||||
|
*ptr,
|
||||||
|
*tmp;
|
||||||
|
|
||||||
|
buffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, blkno,
|
||||||
|
RBM_NORMAL, gvs->strategy);
|
||||||
|
LockBufferForCleanup(buffer);
|
||||||
|
|
||||||
|
memset(&root, 0, sizeof(DataPageDeleteStack));
|
||||||
|
root.leftBlkno = InvalidBlockNumber;
|
||||||
|
root.isRoot = TRUE;
|
||||||
|
|
||||||
|
ginScanToDelete(gvs, blkno, TRUE, &root, InvalidOffsetNumber);
|
||||||
|
|
||||||
|
ptr = root.child;
|
||||||
|
|
||||||
|
while (ptr)
|
||||||
|
{
|
||||||
|
tmp = ptr->child;
|
||||||
|
pfree(ptr);
|
||||||
|
ptr = tmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
UnlockReleaseBuffer(buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Here we have deleted all empty subtrees */
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
ginVacuumPostingTree(GinVacuumState *gvs, BlockNumber rootBlkno)
|
ginVacuumPostingTree(GinVacuumState *gvs, BlockNumber rootBlkno)
|
||||||
{
|
{
|
||||||
Buffer rootBuffer = InvalidBuffer;
|
ginVacuumPostingTreeLeaves(gvs, rootBlkno, TRUE);
|
||||||
DataPageDeleteStack root,
|
|
||||||
*ptr,
|
|
||||||
*tmp;
|
|
||||||
|
|
||||||
if (ginVacuumPostingTreeLeaves(gvs, rootBlkno, TRUE, &rootBuffer) == FALSE)
|
|
||||||
{
|
|
||||||
Assert(rootBuffer == InvalidBuffer);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
memset(&root, 0, sizeof(DataPageDeleteStack));
|
|
||||||
root.leftBlkno = InvalidBlockNumber;
|
|
||||||
root.isRoot = TRUE;
|
|
||||||
|
|
||||||
vacuum_delay_point();
|
|
||||||
|
|
||||||
ginScanToDelete(gvs, rootBlkno, TRUE, &root, InvalidOffsetNumber);
|
|
||||||
|
|
||||||
ptr = root.child;
|
|
||||||
while (ptr)
|
|
||||||
{
|
|
||||||
tmp = ptr->child;
|
|
||||||
pfree(ptr);
|
|
||||||
ptr = tmp;
|
|
||||||
}
|
|
||||||
|
|
||||||
UnlockReleaseBuffer(rootBuffer);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -471,4 +471,6 @@ ginCompareItemPointers(ItemPointer a, ItemPointer b)
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
extern int ginTraverseLock(Buffer buffer, bool searchMode);
|
||||||
|
|
||||||
#endif /* GIN_PRIVATE_H */
|
#endif /* GIN_PRIVATE_H */
|
||||||
|
Loading…
x
Reference in New Issue
Block a user