diff --git a/src/backend/access/gin/README b/src/backend/access/gin/README index fade0cbb617..d551df1166c 100644 --- a/src/backend/access/gin/README +++ b/src/backend/access/gin/README @@ -304,12 +304,10 @@ the lock on next page has been acquired. The downlink is more tricky. A search descending the tree must release the lock on the parent page before locking the child, or it could deadlock with a concurrent split of the child page; a page split locks the parent, while -already holding a lock on the child page. However, posting trees are only -fully searched from left to right, starting from the leftmost leaf. (The -tree-structure is only needed by insertions, to quickly find the correct -insert location). So as long as we don't delete the leftmost page on each -level, a search can never follow a downlink to page that's about to be -deleted. +already holding a lock on the child page. So, deleted page cannot be reclaimed +immediately. Instead, we have to wait for every transaction, which might wait +to reference this page, to finish. Corresponding processes must observe that +the page is marked deleted and recover accordingly. The previous paragraph's reasoning only applies to searches, and only to posting trees. To protect from inserters following a downlink to a deleted diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c index d9146488c44..637caf9fdcf 100644 --- a/src/backend/access/gin/ginutil.c +++ b/src/backend/access/gin/ginutil.c @@ -274,12 +274,7 @@ GinNewBuffer(Relation index) */ if (ConditionalLockBuffer(buffer)) { - Page page = BufferGetPage(buffer); - - if (PageIsNew(page)) - return buffer; /* OK to use, if never initialized */ - - if (GinPageIsDeleted(page)) + if (GinPageIsRecyclable(BufferGetPage(buffer))) return buffer; /* OK to use */ LockBuffer(buffer, GIN_UNLOCK); diff --git a/src/backend/access/gin/ginvacuum.c b/src/backend/access/gin/ginvacuum.c index 3d23cebf628..58f0d03fd1e 100644 --- a/src/backend/access/gin/ginvacuum.c +++ b/src/backend/access/gin/ginvacuum.c @@ -215,6 +215,9 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn page = BufferGetPage(dBuffer); rightlink = GinPageGetOpaque(page)->rightlink; + /* For deleted page remember last xid which could knew its address */ + GinPageSetDeleteXid(page, ReadNewTransactionId()); + page = BufferGetPage(lBuffer); GinPageGetOpaque(page)->rightlink = rightlink; @@ -262,6 +265,7 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn data.parentOffset = myoff; data.rightLink = GinPageGetOpaque(page)->rightlink; + data.deleteXid = GinPageGetDeleteXid(page); XLogRegisterData((char *) &data, sizeof(ginxlogDeletePage)); @@ -708,7 +712,7 @@ ginvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats) LockBuffer(buffer, GIN_SHARE); page = (Page) BufferGetPage(buffer); - if (PageIsNew(page) || GinPageIsDeleted(page)) + if (GinPageIsRecyclable(page)) { Assert(blkno != GIN_ROOT_BLKNO); RecordFreeIndexPage(index, blkno); diff --git a/src/backend/access/gin/ginxlog.c b/src/backend/access/gin/ginxlog.c index 19ab1b61dce..bfe88827e4c 100644 --- a/src/backend/access/gin/ginxlog.c +++ b/src/backend/access/gin/ginxlog.c @@ -529,6 +529,7 @@ ginRedoDeletePage(XLogReaderState *record) page = BufferGetPage(dbuffer); Assert(GinPageIsData(page)); GinPageGetOpaque(page)->flags = GIN_DELETED; + GinPageSetDeleteXid(page, data->deleteXid); PageSetLSN(page, lsn); MarkBufferDirty(dbuffer); } diff --git a/src/include/access/gin_private.h b/src/include/access/gin_private.h index 0b6006f0ad4..8ee17b72a59 100644 --- a/src/include/access/gin_private.h +++ b/src/include/access/gin_private.h @@ -13,6 +13,7 @@ #include "access/amapi.h" #include "access/gin.h" #include "access/itup.h" +#include "access/transam.h" #include "fmgr.h" #include "storage/bufmgr.h" #include "lib/rbtree.h" @@ -131,6 +132,15 @@ typedef struct GinMetaPageData #define GinPageRightMost(page) ( GinPageGetOpaque(page)->rightlink == InvalidBlockNumber) +/* + * We should reclaim deleted page only once every transaction started before + * its deletion is over. + */ +#define GinPageGetDeleteXid(page) ( ((PageHeader) (page))->pd_prune_xid ) +#define GinPageSetDeleteXid(page, xid) ( ((PageHeader) (page))->pd_prune_xid = xid) +#define GinPageIsRecyclable(page) ( PageIsNew(page) || (GinPageIsDeleted(page) \ + && TransactionIdPrecedes(GinPageGetDeleteXid(page), RecentGlobalXmin))) + /* * We use our own ItemPointerGet(BlockNumber|OffsetNumber) * to avoid Asserts, since sometimes the ip_posid isn't "valid" @@ -540,6 +550,7 @@ typedef struct ginxlogDeletePage { OffsetNumber parentOffset; BlockNumber rightLink; + TransactionId deleteXid; /* last Xid which could see this page in scan */ } ginxlogDeletePage; #define XLOG_GIN_UPDATE_META_PAGE 0x60