diff --git a/src/backend/access/gin/README b/src/backend/access/gin/README index fade0cbb617..d551df1166c 100644 --- a/src/backend/access/gin/README +++ b/src/backend/access/gin/README @@ -304,12 +304,10 @@ the lock on next page has been acquired. The downlink is more tricky. A search descending the tree must release the lock on the parent page before locking the child, or it could deadlock with a concurrent split of the child page; a page split locks the parent, while -already holding a lock on the child page. However, posting trees are only -fully searched from left to right, starting from the leftmost leaf. (The -tree-structure is only needed by insertions, to quickly find the correct -insert location). So as long as we don't delete the leftmost page on each -level, a search can never follow a downlink to page that's about to be -deleted. +already holding a lock on the child page. So, deleted page cannot be reclaimed +immediately. Instead, we have to wait for every transaction, which might wait +to reference this page, to finish. Corresponding processes must observe that +the page is marked deleted and recover accordingly. The previous paragraph's reasoning only applies to searches, and only to posting trees. To protect from inserters following a downlink to a deleted diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c index 3ca0b68434b..40f11de9d70 100644 --- a/src/backend/access/gin/ginutil.c +++ b/src/backend/access/gin/ginutil.c @@ -227,12 +227,7 @@ GinNewBuffer(Relation index) */ if (ConditionalLockBuffer(buffer)) { - Page page = BufferGetPage(buffer); - - if (PageIsNew(page)) - return buffer; /* OK to use, if never initialized */ - - if (GinPageIsDeleted(page)) + if (GinPageIsRecyclable(BufferGetPage(buffer))) return buffer; /* OK to use */ LockBuffer(buffer, GIN_UNLOCK); diff --git a/src/backend/access/gin/ginvacuum.c b/src/backend/access/gin/ginvacuum.c index cc440d93547..aed60cb7ae5 100644 --- a/src/backend/access/gin/ginvacuum.c +++ b/src/backend/access/gin/ginvacuum.c @@ -261,6 +261,9 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn page = BufferGetPage(dBuffer); rightlink = GinPageGetOpaque(page)->rightlink; + /* For deleted page remember last xid which could knew its address */ + GinPageSetDeleteXid(page, ReadNewTransactionId()); + page = BufferGetPage(lBuffer); GinPageGetOpaque(page)->rightlink = rightlink; @@ -300,6 +303,7 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn data.parentOffset = myoff; data.leftBlkno = leftBlkno; data.rightLink = GinPageGetOpaque(page)->rightlink; + data.deleteXid = GinPageGetDeleteXid(page); /* * We can't pass buffer_std = TRUE, because we didn't set pd_lower on @@ -777,7 +781,7 @@ ginvacuumcleanup(PG_FUNCTION_ARGS) LockBuffer(buffer, GIN_SHARE); page = (Page) BufferGetPage(buffer); - if (PageIsNew(page) || GinPageIsDeleted(page)) + if (GinPageIsRecyclable(page)) { Assert(blkno != GIN_ROOT_BLKNO); RecordFreeIndexPage(index, blkno); diff --git a/src/include/access/gin_private.h b/src/include/access/gin_private.h index e042f5ca1fd..6b1ada5d006 100644 --- a/src/include/access/gin_private.h +++ b/src/include/access/gin_private.h @@ -13,9 +13,11 @@ #include "access/genam.h" #include "access/gin.h" #include "access/itup.h" +#include "access/transam.h" #include "fmgr.h" #include "storage/bufmgr.h" #include "utils/rbtree.h" +#include "utils/snapmgr.h" /* @@ -131,6 +133,15 @@ typedef struct GinMetaPageData #define GinPageRightMost(page) ( GinPageGetOpaque(page)->rightlink == InvalidBlockNumber) +/* + * We should reclaim deleted page only once every transaction started before + * its deletion is over. + */ +#define GinPageGetDeleteXid(page) ( ((PageHeader) (page))->pd_prune_xid ) +#define GinPageSetDeleteXid(page, xid) ( ((PageHeader) (page))->pd_prune_xid = xid) +#define GinPageIsRecyclable(page) ( PageIsNew(page) || (GinPageIsDeleted(page) \ + && TransactionIdPrecedes(GinPageGetDeleteXid(page), RecentGlobalDataXmin))) + /* * We use our own ItemPointerGet(BlockNumber|OffsetNumber) * to avoid Asserts, since sometimes the ip_posid isn't "valid" @@ -572,6 +583,7 @@ typedef struct ginxlogDeletePage OffsetNumber parentOffset; BlockNumber leftBlkno; BlockNumber rightLink; + TransactionId deleteXid; /* last Xid which could see this page in scan */ } ginxlogDeletePage; #define XLOG_GIN_UPDATE_META_PAGE 0x60