diff --git a/src/backend/access/gin/README b/src/backend/access/gin/README index 990b5ffa581..fade0cbb617 100644 --- a/src/backend/access/gin/README +++ b/src/backend/access/gin/README @@ -314,17 +314,10 @@ deleted. The previous paragraph's reasoning only applies to searches, and only to posting trees. To protect from inserters following a downlink to a deleted page, vacuum simply locks out all concurrent insertions to the posting tree, -by holding a super-exclusive lock on the parent page of subtree with deletable -pages. Inserters hold a pin on the root page, but searches do not, so while -new searches cannot begin while root page is locked, any already-in-progress -scans can continue concurrently with vacuum in corresponding subtree of -posting tree. To exclude interference with readers vacuum takes exclusive -locks in a depth-first scan in left-to-right order of page tuples. Leftmost -page is never deleted. Thus before deleting any page we obtain exclusive -lock on any left page, effectively excluding deadlock with any reader, despite -taking parent lock before current and left lock after current. We take left -lock not for a concurrency reasons, but rather in need to mark page dirty. -In the entry tree, we never delete pages. +by holding a super-exclusive lock on the posting tree root. Inserters hold a +pin on the root page, but searches do not, so while new searches cannot begin +while root page is locked, any already-in-progress scans can continue +concurrently with vacuum. In the entry tree, we never delete pages. (This is quite different from the mechanism the btree indexam uses to make page-deletions safe; it stamps the deleted pages with an XID and keeps the diff --git a/src/backend/access/gin/ginvacuum.c b/src/backend/access/gin/ginvacuum.c index d3e4a177670..2292a045be2 100644 --- a/src/backend/access/gin/ginvacuum.c +++ b/src/backend/access/gin/ginvacuum.c @@ -308,122 +308,112 @@ ginScanToDelete(GinVacuumState *gvs, BlockNumber blkno, bool isRoot, /* - * Scan through posting tree, delete empty tuples from leaf pages. - * Also, this function collects empty subtrees (with all empty leafs). - * For parents of these subtrees CleanUp lock is taken, then we call - * ScanToDelete. This is done for every inner page, which points to - * empty subtree. + * Scan through posting tree leafs, delete empty tuples. Returns true if there + * is at least one empty page. */ static bool -ginVacuumPostingTreeLeaves(GinVacuumState *gvs, BlockNumber blkno, bool isRoot) +ginVacuumPostingTreeLeaves(GinVacuumState *gvs, BlockNumber blkno) { Buffer buffer; Page page; bool hasVoidPage = FALSE; MemoryContext oldCxt; - buffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, blkno, - RBM_NORMAL, gvs->strategy); - page = BufferGetPage(buffer); + /* Find leftmost leaf page of posting tree and lock it in exclusive mode */ + while (true) + { + PostingItem *pitem; - ginTraverseLock(buffer, false); + buffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, blkno, + RBM_NORMAL, gvs->strategy); + LockBuffer(buffer, GIN_SHARE); + page = BufferGetPage(buffer); - Assert(GinPageIsData(page)); + Assert(GinPageIsData(page)); - if (GinPageIsLeaf(page)) + if (GinPageIsLeaf(page)) + { + LockBuffer(buffer, GIN_UNLOCK); + LockBuffer(buffer, GIN_EXCLUSIVE); + break; + } + + Assert(PageGetMaxOffsetNumber(page) >= FirstOffsetNumber); + + pitem = GinDataPageGetPostingItem(page, FirstOffsetNumber); + blkno = PostingItemGetBlockNumber(pitem); + Assert(blkno != InvalidBlockNumber); + + UnlockReleaseBuffer(buffer); + } + + /* Iterate all posting tree leaves using rightlinks and vacuum them */ + while (true) { oldCxt = MemoryContextSwitchTo(gvs->tmpCxt); ginVacuumPostingTreeLeaf(gvs->index, buffer, gvs); MemoryContextSwitchTo(oldCxt); MemoryContextReset(gvs->tmpCxt); - /* if root is a leaf page, we don't desire further processing */ if (GinDataLeafPageIsEmpty(page)) hasVoidPage = TRUE; - UnlockReleaseBuffer(buffer); - - return hasVoidPage; - } - else - { - OffsetNumber i; - bool hasEmptyChild = FALSE; - bool hasNonEmptyChild = FALSE; - OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff; - BlockNumber *children = palloc(sizeof(BlockNumber) * (maxoff + 1)); - - /* - * Read all children BlockNumbers. Not sure it is safe if there are - * many concurrent vacuums. - */ - - for (i = FirstOffsetNumber; i <= maxoff; i++) - { - PostingItem *pitem = GinDataPageGetPostingItem(page, i); - - children[i] = PostingItemGetBlockNumber(pitem); - } + blkno = GinPageGetOpaque(page)->rightlink; UnlockReleaseBuffer(buffer); - for (i = FirstOffsetNumber; i <= maxoff; i++) - { - if (ginVacuumPostingTreeLeaves(gvs, children[i], FALSE)) - hasEmptyChild = TRUE; - else - hasNonEmptyChild = TRUE; - } + if (blkno == InvalidBlockNumber) + break; - pfree(children); - - vacuum_delay_point(); - - /* - * All subtree is empty - just return TRUE to indicate that parent - * must do a cleanup. Unless we are ROOT an there is way to go upper. - */ - - if (hasEmptyChild && !hasNonEmptyChild && !isRoot) - return TRUE; - - if (hasEmptyChild) - { - DataPageDeleteStack root, - *ptr, - *tmp; - - buffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, blkno, - RBM_NORMAL, gvs->strategy); - LockBufferForCleanup(buffer); - - memset(&root, 0, sizeof(DataPageDeleteStack)); - root.leftBlkno = InvalidBlockNumber; - root.isRoot = TRUE; - - ginScanToDelete(gvs, blkno, TRUE, &root, InvalidOffsetNumber); - - ptr = root.child; - - while (ptr) - { - tmp = ptr->child; - pfree(ptr); - ptr = tmp; - } - - UnlockReleaseBuffer(buffer); - } - - /* Here we have deleted all empty subtrees */ - return FALSE; + buffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, blkno, + RBM_NORMAL, gvs->strategy); + LockBuffer(buffer, GIN_EXCLUSIVE); + page = BufferGetPage(buffer); } + + return hasVoidPage; } static void ginVacuumPostingTree(GinVacuumState *gvs, BlockNumber rootBlkno) { - ginVacuumPostingTreeLeaves(gvs, rootBlkno, TRUE); + if (ginVacuumPostingTreeLeaves(gvs, rootBlkno)) + { + /* + * There is at least one empty page. So we have to rescan the tree + * deleting empty pages. + */ + Buffer buffer; + DataPageDeleteStack root, + *ptr, + *tmp; + + buffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, rootBlkno, + RBM_NORMAL, gvs->strategy); + + /* + * Lock posting tree root for cleanup to ensure there are no concurrent + * inserts. + */ + LockBufferForCleanup(buffer); + + memset(&root, 0, sizeof(DataPageDeleteStack)); + root.leftBlkno = InvalidBlockNumber; + root.isRoot = true; + + ginScanToDelete(gvs, rootBlkno, true, &root, InvalidOffsetNumber); + + ptr = root.child; + + while (ptr) + { + tmp = ptr->child; + pfree(ptr); + ptr = tmp; + } + + UnlockReleaseBuffer(buffer); + } } /*