diff --git a/src/backend/access/gin/README b/src/backend/access/gin/README index fade0cbb617..d551df1166c 100644 --- a/src/backend/access/gin/README +++ b/src/backend/access/gin/README @@ -304,12 +304,10 @@ the lock on next page has been acquired. The downlink is more tricky. A search descending the tree must release the lock on the parent page before locking the child, or it could deadlock with a concurrent split of the child page; a page split locks the parent, while -already holding a lock on the child page. However, posting trees are only -fully searched from left to right, starting from the leftmost leaf. (The -tree-structure is only needed by insertions, to quickly find the correct -insert location). So as long as we don't delete the leftmost page on each -level, a search can never follow a downlink to page that's about to be -deleted. +already holding a lock on the child page. So, deleted page cannot be reclaimed +immediately. Instead, we have to wait for every transaction, which might wait +to reference this page, to finish. Corresponding processes must observe that +the page is marked deleted and recover accordingly. The previous paragraph's reasoning only applies to searches, and only to posting trees. To protect from inserters following a downlink to a deleted diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c index 91e4a8cf700..3e82a13edff 100644 --- a/src/backend/access/gin/ginutil.c +++ b/src/backend/access/gin/ginutil.c @@ -305,12 +305,7 @@ GinNewBuffer(Relation index) */ if (ConditionalLockBuffer(buffer)) { - Page page = BufferGetPage(buffer); - - if (PageIsNew(page)) - return buffer; /* OK to use, if never initialized */ - - if (GinPageIsDeleted(page)) + if (GinPageIsRecyclable(BufferGetPage(buffer))) return buffer; /* OK to use */ LockBuffer(buffer, GIN_UNLOCK); diff --git a/src/backend/access/gin/ginvacuum.c b/src/backend/access/gin/ginvacuum.c index 2292a045be2..6e25524d74c 100644 --- a/src/backend/access/gin/ginvacuum.c +++ b/src/backend/access/gin/ginvacuum.c @@ -159,6 +159,9 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn page = BufferGetPage(dBuffer); rightlink = GinPageGetOpaque(page)->rightlink; + /* For deleted page remember last xid which could knew its address */ + GinPageSetDeleteXid(page, ReadNewTransactionId()); + page = BufferGetPage(lBuffer); GinPageGetOpaque(page)->rightlink = rightlink; @@ -206,6 +209,7 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn data.parentOffset = myoff; data.rightLink = GinPageGetOpaque(page)->rightlink; + data.deleteXid = GinPageGetDeleteXid(page); XLogRegisterData((char *) &data, sizeof(ginxlogDeletePage)); @@ -725,7 +729,7 @@ ginvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats) LockBuffer(buffer, GIN_SHARE); page = (Page) BufferGetPage(buffer); - if (PageIsNew(page) || GinPageIsDeleted(page)) + if (GinPageIsRecyclable(page)) { Assert(blkno != GIN_ROOT_BLKNO); RecordFreeIndexPage(index, blkno); diff --git a/src/backend/access/gin/ginxlog.c b/src/backend/access/gin/ginxlog.c index 37bdcc40fac..291f22ef77f 100644 --- a/src/backend/access/gin/ginxlog.c +++ b/src/backend/access/gin/ginxlog.c @@ -531,6 +531,7 @@ ginRedoDeletePage(XLogReaderState *record) page = BufferGetPage(dbuffer); Assert(GinPageIsData(page)); GinPageGetOpaque(page)->flags = GIN_DELETED; + GinPageSetDeleteXid(page, data->deleteXid); PageSetLSN(page, lsn); MarkBufferDirty(dbuffer); } diff --git a/src/include/access/ginblock.h b/src/include/access/ginblock.h index 114370c7d71..ce031580e16 100644 --- a/src/include/access/ginblock.h +++ b/src/include/access/ginblock.h @@ -10,6 +10,7 @@ #ifndef GINBLOCK_H #define GINBLOCK_H +#include "access/transam.h" #include "storage/block.h" #include "storage/itemptr.h" #include "storage/off.h" @@ -127,6 +128,15 @@ typedef struct GinMetaPageData #define GinPageRightMost(page) ( GinPageGetOpaque(page)->rightlink == InvalidBlockNumber) +/* + * We should reclaim deleted page only once every transaction started before + * its deletion is over. + */ +#define GinPageGetDeleteXid(page) ( ((PageHeader) (page))->pd_prune_xid ) +#define GinPageSetDeleteXid(page, xid) ( ((PageHeader) (page))->pd_prune_xid = xid) +#define GinPageIsRecyclable(page) ( PageIsNew(page) || (GinPageIsDeleted(page) \ + && TransactionIdPrecedes(GinPageGetDeleteXid(page), RecentGlobalXmin))) + /* * We use our own ItemPointerGet(BlockNumber|OffsetNumber) * to avoid Asserts, since sometimes the ip_posid isn't "valid" diff --git a/src/include/access/ginxlog.h b/src/include/access/ginxlog.h index 42e0ae90c3c..a86735b7ed8 100644 --- a/src/include/access/ginxlog.h +++ b/src/include/access/ginxlog.h @@ -158,6 +158,7 @@ typedef struct ginxlogDeletePage { OffsetNumber parentOffset; BlockNumber rightLink; + TransactionId deleteXid; /* last Xid which could see this page in scan */ } ginxlogDeletePage; #define XLOG_GIN_UPDATE_META_PAGE 0x60