diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c index 891ad3822e1..6ddbf34382a 100644 --- a/src/backend/access/gist/gist.c +++ b/src/backend/access/gist/gist.c @@ -34,7 +34,8 @@ static bool gistinserttuples(GISTInsertState *state, GISTInsertStack *stack, bool unlockbuf, bool unlockleftchild); static void gistfinishsplit(GISTInsertState *state, GISTInsertStack *stack, GISTSTATE *giststate, List *splitinfo, bool releasebuf); -static void gistvacuumpage(Relation rel, Page page, Buffer buffer); +static void gistvacuumpage(Relation rel, Page page, Buffer buffer, + Relation heapRel); #define ROTATEDIST(d) do { \ @@ -161,7 +162,7 @@ gistinsert(Relation r, Datum *values, bool *isnull, values, isnull, true /* size is currently bogus */ ); itup->t_tid = *ht_ctid; - gistdoinsert(r, itup, 0, giststate); + gistdoinsert(r, itup, 0, giststate, heapRel); /* cleanup */ MemoryContextSwitchTo(oldCxt); @@ -207,7 +208,8 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate, BlockNumber *newblkno, Buffer leftchildbuf, List **splitinfo, - bool markfollowright) + bool markfollowright, + Relation heapRel) { BlockNumber blkno = BufferGetBlockNumber(buffer); Page page = BufferGetPage(buffer); @@ -248,7 +250,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate, */ if (is_split && GistPageIsLeaf(page) && GistPageHasGarbage(page)) { - gistvacuumpage(rel, page, buffer); + gistvacuumpage(rel, page, buffer, heapRel); is_split = gistnospace(page, itup, ntup, oldoffnum, freespace); } @@ -524,7 +526,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate, recptr = gistXLogUpdate(buffer, deloffs, ndeloffs, itup, ntup, - leftchildbuf); + leftchildbuf, NULL); PageSetLSN(page, recptr); } @@ -572,7 +574,8 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate, * so it does not bother releasing palloc'd allocations. */ void -gistdoinsert(Relation r, IndexTuple itup, Size freespace, GISTSTATE *giststate) +gistdoinsert(Relation r, IndexTuple itup, Size freespace, + GISTSTATE *giststate, Relation heapRel) { ItemId iid; IndexTuple idxtuple; @@ -584,6 +587,7 @@ gistdoinsert(Relation r, IndexTuple itup, Size freespace, GISTSTATE *giststate) memset(&state, 0, sizeof(GISTInsertState)); state.freespace = freespace; state.r = r; + state.heapRel = heapRel; /* Start from the root */ firststack.blkno = GIST_ROOT_BLKNO; @@ -1194,7 +1198,8 @@ gistinserttuples(GISTInsertState *state, GISTInsertStack *stack, oldoffnum, NULL, leftchild, &splitinfo, - true); + true, + state->heapRel); /* * Before recursing up in case the page was split, release locks on the @@ -1493,7 +1498,7 @@ freeGISTstate(GISTSTATE *giststate) * Function assumes that buffer is exclusively locked. */ static void -gistvacuumpage(Relation rel, Page page, Buffer buffer) +gistvacuumpage(Relation rel, Page page, Buffer buffer, Relation heapRel) { OffsetNumber deletable[MaxIndexTuplesPerPage]; int ndeletable = 0; @@ -1541,7 +1546,8 @@ gistvacuumpage(Relation rel, Page page, Buffer buffer) recptr = gistXLogUpdate(buffer, deletable, ndeletable, - NULL, 0, InvalidBuffer); + NULL, 0, InvalidBuffer, + &heapRel->rd_node); PageSetLSN(page, recptr); } diff --git a/src/backend/access/gist/gistbuild.c b/src/backend/access/gist/gistbuild.c index 4e43a6932a4..23a086a792f 100644 --- a/src/backend/access/gist/gistbuild.c +++ b/src/backend/access/gist/gistbuild.c @@ -55,6 +55,7 @@ typedef enum typedef struct { Relation indexrel; + Relation heaprel; GISTSTATE *giststate; int64 indtuples; /* number of tuples indexed */ @@ -121,6 +122,7 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo) int fillfactor; buildstate.indexrel = index; + buildstate.heaprel = heap; if (index->rd_options) { /* Get buffering mode from the options string */ @@ -483,7 +485,7 @@ gistBuildCallback(Relation index, * locked, we call gistdoinsert directly. */ gistdoinsert(index, itup, buildstate->freespace, - buildstate->giststate); + buildstate->giststate, buildstate->heaprel); } /* Update tuple count and total size. */ @@ -689,7 +691,8 @@ gistbufferinginserttuples(GISTBuildState *buildstate, Buffer buffer, int level, itup, ntup, oldoffnum, &placed_to_blk, InvalidBuffer, &splitinfo, - false); + false, + buildstate->heaprel); /* * If this is a root split, update the root path item kept in memory. This diff --git a/src/backend/access/gist/gistvacuum.c b/src/backend/access/gist/gistvacuum.c index 404d10b3c11..3b4cebb1de2 100644 --- a/src/backend/access/gist/gistvacuum.c +++ b/src/backend/access/gist/gistvacuum.c @@ -224,7 +224,8 @@ gistbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, recptr = gistXLogUpdate(buffer, todelete, ntodelete, - NULL, 0, InvalidBuffer); + NULL, 0, InvalidBuffer, + NULL); PageSetLSN(page, recptr); } else diff --git a/src/backend/access/gist/gistxlog.c b/src/backend/access/gist/gistxlog.c index 01c7ef7ea67..5425b45a143 100644 --- a/src/backend/access/gist/gistxlog.c +++ b/src/backend/access/gist/gistxlog.c @@ -14,8 +14,12 @@ #include "postgres.h" #include "access/gist_private.h" +#include "access/heapam_xlog.h" +#include "access/transam.h" #include "access/xloginsert.h" #include "access/xlogutils.h" +#include "miscadmin.h" +#include "storage/procarray.h" #include "utils/memutils.h" static MemoryContext opCtx; /* working memory for operations */ @@ -58,6 +62,155 @@ gistRedoClearFollowRight(XLogReaderState *record, uint8 block_id) UnlockReleaseBuffer(buffer); } +/* + * Get the latestRemovedXid from the heap pages pointed at by the index + * tuples being deleted. See also btree_xlog_delete_get_latestRemovedXid, + * on which this function is based. + */ +static TransactionId +gistRedoPageUpdateRecordGetLatestRemovedXid(XLogReaderState *record) +{ + gistxlogPageUpdate *xlrec = (gistxlogPageUpdate *) XLogRecGetData(record); + OffsetNumber *todelete; + Buffer ibuffer, + hbuffer; + Page ipage, + hpage; + RelFileNode rnode, + *hnode; + BlockNumber blkno; + ItemId iitemid, + hitemid; + IndexTuple itup; + HeapTupleHeader htuphdr; + BlockNumber hblkno; + OffsetNumber hoffnum; + TransactionId latestRemovedXid = InvalidTransactionId; + int i; + + /* + * If there's nothing running on the standby we don't need to derive a + * full latestRemovedXid value, so use a fast path out of here. This + * returns InvalidTransactionId, and so will conflict with all HS + * transactions; but since we just worked out that that's zero people, + * it's OK. + * + * XXX There is a race condition here, which is that a new backend might + * start just after we look. If so, it cannot need to conflict, but this + * coding will result in throwing a conflict anyway. + */ + if (CountDBBackends(InvalidOid) == 0) + return latestRemovedXid; + + /* + * In what follows, we have to examine the previous state of the index + * page, as well as the heap page(s) it points to. This is only valid if + * WAL replay has reached a consistent database state; which means that + * the preceding check is not just an optimization, but is *necessary*. We + * won't have let in any user sessions before we reach consistency. + */ + if (!reachedConsistency) + elog(PANIC, "gistRedoDeleteRecordGetLatestRemovedXid: cannot operate with inconsistent data"); + + /* + * Get index page. If the DB is consistent, this should not fail, nor + * should any of the heap page fetches below. If one does, we return + * InvalidTransactionId to cancel all HS transactions. That's probably + * overkill, but it's safe, and certainly better than panicking here. + */ + XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno); + ibuffer = XLogReadBufferExtended(rnode, MAIN_FORKNUM, blkno, RBM_NORMAL); + if (!BufferIsValid(ibuffer)) + return InvalidTransactionId; + LockBuffer(ibuffer, BUFFER_LOCK_EXCLUSIVE); + ipage = (Page) BufferGetPage(ibuffer); + + /* + * Loop through the deleted index items to obtain the TransactionId from + * the heap items they point to. + */ + hnode = (RelFileNode *) ((char *) xlrec + sizeof(gistxlogPageUpdate)); + todelete = (OffsetNumber *) ((char *) hnode + sizeof(RelFileNode)); + + for (i = 0; i < xlrec->ntodelete; i++) + { + /* + * Identify the index tuple about to be deleted + */ + iitemid = PageGetItemId(ipage, todelete[i]); + itup = (IndexTuple) PageGetItem(ipage, iitemid); + + /* + * Locate the heap page that the index tuple points at + */ + hblkno = ItemPointerGetBlockNumber(&(itup->t_tid)); + hbuffer = XLogReadBufferExtended(*hnode, MAIN_FORKNUM, hblkno, RBM_NORMAL); + if (!BufferIsValid(hbuffer)) + { + UnlockReleaseBuffer(ibuffer); + return InvalidTransactionId; + } + LockBuffer(hbuffer, BUFFER_LOCK_SHARE); + hpage = (Page) BufferGetPage(hbuffer); + + /* + * Look up the heap tuple header that the index tuple points at by + * using the heap node supplied with the xlrec. We can't use + * heap_fetch, since it uses ReadBuffer rather than XLogReadBuffer. + * Note that we are not looking at tuple data here, just headers. + */ + hoffnum = ItemPointerGetOffsetNumber(&(itup->t_tid)); + hitemid = PageGetItemId(hpage, hoffnum); + + /* + * Follow any redirections until we find something useful. + */ + while (ItemIdIsRedirected(hitemid)) + { + hoffnum = ItemIdGetRedirect(hitemid); + hitemid = PageGetItemId(hpage, hoffnum); + CHECK_FOR_INTERRUPTS(); + } + + /* + * If the heap item has storage, then read the header and use that to + * set latestRemovedXid. + * + * Some LP_DEAD items may not be accessible, so we ignore them. + */ + if (ItemIdHasStorage(hitemid)) + { + htuphdr = (HeapTupleHeader) PageGetItem(hpage, hitemid); + + HeapTupleHeaderAdvanceLatestRemovedXid(htuphdr, &latestRemovedXid); + } + else if (ItemIdIsDead(hitemid)) + { + /* + * Conjecture: if hitemid is dead then it had xids before the xids + * marked on LP_NORMAL items. So we just ignore this item and move + * onto the next, for the purposes of calculating + * latestRemovedxids. + */ + } + else + Assert(!ItemIdIsUsed(hitemid)); + + UnlockReleaseBuffer(hbuffer); + } + + UnlockReleaseBuffer(ibuffer); + + /* + * If all heap tuples were LP_DEAD then we will be returning + * InvalidTransactionId here, which avoids conflicts. This matches + * existing logic which assumes that LP_DEAD tuples must already be older + * than the latestRemovedXid on the cleanup record that set them as + * LP_DEAD, hence must already have generated a conflict. + */ + return latestRemovedXid; +} + /* * redo any page update (except page split) */ @@ -69,6 +222,34 @@ gistRedoPageUpdateRecord(XLogReaderState *record) Buffer buffer; Page page; + /* + * If we have any conflict processing to do, it must happen before we + * update the page. + * + * Support for conflict processing in GiST has been backpatched. This is + * why we have to use tricky way of saving WAL-compatibility between minor + * versions. Information required for conflict processing is just + * appended to data of XLOG_GIST_PAGE_UPDATE record. So, PostgreSQL + * version, which doesn't know about conflict processing, will just ignore + * that. + * + * GiST delete records can conflict with standby queries. You might think + * that vacuum records would conflict as well, but we've handled that + * already. XLOG_HEAP2_CLEANUP_INFO records provide the highest xid + * cleaned by the vacuum of the heap and so we can resolve any conflicts + * just once when that arrives. After that we know that no conflicts + * exist from individual gist vacuum records on that index. + */ + if (InHotStandby && XLogRecGetDataLen(record) > sizeof(gistxlogPageUpdate)) + { + TransactionId latestRemovedXid = gistRedoPageUpdateRecordGetLatestRemovedXid(record); + RelFileNode rnode; + + XLogRecGetBlockTag(record, 0, &rnode, NULL, NULL); + + ResolveRecoveryConflictWithSnapshot(latestRemovedXid, rnode); + } + if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO) { char *begin; @@ -390,7 +571,7 @@ XLogRecPtr gistXLogUpdate(Buffer buffer, OffsetNumber *todelete, int ntodelete, IndexTuple *itup, int ituplen, - Buffer leftchildbuf) + Buffer leftchildbuf, RelFileNode *hnode) { gistxlogPageUpdate xlrec; int i; @@ -402,6 +583,16 @@ gistXLogUpdate(Buffer buffer, XLogBeginInsert(); XLogRegisterData((char *) &xlrec, sizeof(gistxlogPageUpdate)); + /* + * Append the information required for standby conflict processing if it + * is provided by caller. + */ + if (hnode) + { + XLogRegisterData((char *) hnode, sizeof(RelFileNode)); + XLogRegisterData((char *) todelete, sizeof(OffsetNumber) * ntodelete); + } + XLogRegisterBuffer(0, buffer, REGBUF_STANDARD); XLogRegisterBufData(0, (char *) todelete, sizeof(OffsetNumber) * ntodelete); diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h index 78e87a60771..dcd7bd54662 100644 --- a/src/include/access/gist_private.h +++ b/src/include/access/gist_private.h @@ -285,6 +285,7 @@ typedef struct GistSplitVector typedef struct { Relation r; + Relation heapRel; Size freespace; /* free space to be left */ GISTInsertStack *stack; @@ -434,7 +435,8 @@ extern void freeGISTstate(GISTSTATE *giststate); extern void gistdoinsert(Relation r, IndexTuple itup, Size freespace, - GISTSTATE *GISTstate); + GISTSTATE *GISTstate, + Relation heapRel); /* A List of these is returned from gistplacetopage() in *splitinfo */ typedef struct @@ -449,7 +451,8 @@ extern bool gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate, OffsetNumber oldoffnum, BlockNumber *newblkno, Buffer leftchildbuf, List **splitinfo, - bool markleftchild); + bool markleftchild, + Relation heapRel); extern SplitedPageLayout *gistSplit(Relation r, Page page, IndexTuple *itup, int len, GISTSTATE *giststate); @@ -464,7 +467,7 @@ extern void gist_xlog_cleanup(void); extern XLogRecPtr gistXLogUpdate(Buffer buffer, OffsetNumber *todelete, int ntodelete, IndexTuple *itup, int ntup, - Buffer leftchild); + Buffer leftchild, RelFileNode *hnode); extern XLogRecPtr gistXLogSplit(bool page_is_leaf, SplitedPageLayout *dist,