mirror of
https://github.com/postgres/postgres.git
synced 2025-07-28 23:42:10 +03:00
Add the "snapshot too old" feature
This feature is controlled by a new old_snapshot_threshold GUC. A value of -1 disables the feature, and that is the default. The value of 0 is just intended for testing. Above that it is the number of minutes a snapshot can reach before pruning and vacuum are allowed to remove dead tuples which the snapshot would otherwise protect. The xmin associated with a transaction ID does still protect dead tuples. A connection which is using an "old" snapshot does not get an error unless it accesses a page modified recently enough that it might not be able to produce accurate results. This is similar to the Oracle feature, and we use the same SQLSTATE and error message for compatibility.
This commit is contained in:
@ -135,7 +135,7 @@ brininsert(Relation idxRel, Datum *values, bool *nulls,
|
||||
MemoryContext tupcxt = NULL;
|
||||
MemoryContext oldcxt = NULL;
|
||||
|
||||
revmap = brinRevmapInitialize(idxRel, &pagesPerRange);
|
||||
revmap = brinRevmapInitialize(idxRel, &pagesPerRange, NULL);
|
||||
|
||||
for (;;)
|
||||
{
|
||||
@ -152,7 +152,7 @@ brininsert(Relation idxRel, Datum *values, bool *nulls,
|
||||
/* normalize the block number to be the first block in the range */
|
||||
heapBlk = (heapBlk / pagesPerRange) * pagesPerRange;
|
||||
brtup = brinGetTupleForHeapBlock(revmap, heapBlk, &buf, &off, NULL,
|
||||
BUFFER_LOCK_SHARE);
|
||||
BUFFER_LOCK_SHARE, NULL);
|
||||
|
||||
/* if range is unsummarized, there's nothing to do */
|
||||
if (!brtup)
|
||||
@ -285,7 +285,8 @@ brinbeginscan(Relation r, int nkeys, int norderbys)
|
||||
scan = RelationGetIndexScan(r, nkeys, norderbys);
|
||||
|
||||
opaque = (BrinOpaque *) palloc(sizeof(BrinOpaque));
|
||||
opaque->bo_rmAccess = brinRevmapInitialize(r, &opaque->bo_pagesPerRange);
|
||||
opaque->bo_rmAccess = brinRevmapInitialize(r, &opaque->bo_pagesPerRange,
|
||||
scan->xs_snapshot);
|
||||
opaque->bo_bdesc = brin_build_desc(r);
|
||||
scan->opaque = opaque;
|
||||
|
||||
@ -368,7 +369,8 @@ bringetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
|
||||
MemoryContextResetAndDeleteChildren(perRangeCxt);
|
||||
|
||||
tup = brinGetTupleForHeapBlock(opaque->bo_rmAccess, heapBlk, &buf,
|
||||
&off, &size, BUFFER_LOCK_SHARE);
|
||||
&off, &size, BUFFER_LOCK_SHARE,
|
||||
scan->xs_snapshot);
|
||||
if (tup)
|
||||
{
|
||||
tup = brin_copy_tuple(tup, size);
|
||||
@ -647,7 +649,7 @@ brinbuild(Relation heap, Relation index, IndexInfo *indexInfo)
|
||||
/*
|
||||
* Initialize our state, including the deformed tuple state.
|
||||
*/
|
||||
revmap = brinRevmapInitialize(index, &pagesPerRange);
|
||||
revmap = brinRevmapInitialize(index, &pagesPerRange, NULL);
|
||||
state = initialize_brin_buildstate(index, revmap, pagesPerRange);
|
||||
|
||||
/*
|
||||
@ -1045,7 +1047,8 @@ summarize_range(IndexInfo *indexInfo, BrinBuildState *state, Relation heapRel,
|
||||
* the same.)
|
||||
*/
|
||||
phtup = brinGetTupleForHeapBlock(state->bs_rmAccess, heapBlk, &phbuf,
|
||||
&offset, &phsz, BUFFER_LOCK_SHARE);
|
||||
&offset, &phsz, BUFFER_LOCK_SHARE,
|
||||
NULL);
|
||||
/* the placeholder tuple must exist */
|
||||
if (phtup == NULL)
|
||||
elog(ERROR, "missing placeholder tuple");
|
||||
@ -1080,7 +1083,7 @@ brinsummarize(Relation index, Relation heapRel, double *numSummarized,
|
||||
BlockNumber pagesPerRange;
|
||||
Buffer buf;
|
||||
|
||||
revmap = brinRevmapInitialize(index, &pagesPerRange);
|
||||
revmap = brinRevmapInitialize(index, &pagesPerRange, NULL);
|
||||
|
||||
/*
|
||||
* Scan the revmap to find unsummarized items.
|
||||
@ -1095,7 +1098,7 @@ brinsummarize(Relation index, Relation heapRel, double *numSummarized,
|
||||
CHECK_FOR_INTERRUPTS();
|
||||
|
||||
tup = brinGetTupleForHeapBlock(revmap, heapBlk, &buf, &off, NULL,
|
||||
BUFFER_LOCK_SHARE);
|
||||
BUFFER_LOCK_SHARE, NULL);
|
||||
if (tup == NULL)
|
||||
{
|
||||
/* no revmap entry for this heap range. Summarize it. */
|
||||
|
@ -68,7 +68,8 @@ static void revmap_physical_extend(BrinRevmap *revmap);
|
||||
* brinRevmapTerminate when caller is done with it.
|
||||
*/
|
||||
BrinRevmap *
|
||||
brinRevmapInitialize(Relation idxrel, BlockNumber *pagesPerRange)
|
||||
brinRevmapInitialize(Relation idxrel, BlockNumber *pagesPerRange,
|
||||
Snapshot snapshot)
|
||||
{
|
||||
BrinRevmap *revmap;
|
||||
Buffer meta;
|
||||
@ -77,7 +78,7 @@ brinRevmapInitialize(Relation idxrel, BlockNumber *pagesPerRange)
|
||||
|
||||
meta = ReadBuffer(idxrel, BRIN_METAPAGE_BLKNO);
|
||||
LockBuffer(meta, BUFFER_LOCK_SHARE);
|
||||
page = BufferGetPage(meta, NULL, NULL, BGP_NO_SNAPSHOT_TEST);
|
||||
page = BufferGetPage(meta, snapshot, idxrel, BGP_TEST_FOR_OLD_SNAPSHOT);
|
||||
metadata = (BrinMetaPageData *) PageGetContents(page);
|
||||
|
||||
revmap = palloc(sizeof(BrinRevmap));
|
||||
@ -187,7 +188,8 @@ brinSetHeapBlockItemptr(Buffer buf, BlockNumber pagesPerRange,
|
||||
*/
|
||||
BrinTuple *
|
||||
brinGetTupleForHeapBlock(BrinRevmap *revmap, BlockNumber heapBlk,
|
||||
Buffer *buf, OffsetNumber *off, Size *size, int mode)
|
||||
Buffer *buf, OffsetNumber *off, Size *size, int mode,
|
||||
Snapshot snapshot)
|
||||
{
|
||||
Relation idxRel = revmap->rm_irel;
|
||||
BlockNumber mapBlk;
|
||||
@ -264,7 +266,8 @@ brinGetTupleForHeapBlock(BrinRevmap *revmap, BlockNumber heapBlk,
|
||||
*buf = ReadBuffer(idxRel, blk);
|
||||
}
|
||||
LockBuffer(*buf, mode);
|
||||
page = BufferGetPage(*buf, NULL, NULL, BGP_NO_SNAPSHOT_TEST);
|
||||
page = BufferGetPage(*buf, snapshot, idxRel,
|
||||
BGP_TEST_FOR_OLD_SNAPSHOT);
|
||||
|
||||
/* If we land on a revmap page, start over */
|
||||
if (BRIN_IS_REGULAR_PAGE(page))
|
||||
|
@ -71,7 +71,7 @@ ginTraverseLock(Buffer buffer, bool searchMode)
|
||||
* is share-locked, and stack->parent is NULL.
|
||||
*/
|
||||
GinBtreeStack *
|
||||
ginFindLeafPage(GinBtree btree, bool searchMode)
|
||||
ginFindLeafPage(GinBtree btree, bool searchMode, Snapshot snapshot)
|
||||
{
|
||||
GinBtreeStack *stack;
|
||||
|
||||
@ -89,7 +89,8 @@ ginFindLeafPage(GinBtree btree, bool searchMode)
|
||||
|
||||
stack->off = InvalidOffsetNumber;
|
||||
|
||||
page = BufferGetPage(stack->buffer, NULL, NULL, BGP_NO_SNAPSHOT_TEST);
|
||||
page = BufferGetPage(stack->buffer, snapshot, btree->index,
|
||||
BGP_TEST_FOR_OLD_SNAPSHOT);
|
||||
|
||||
access = ginTraverseLock(stack->buffer, searchMode);
|
||||
|
||||
@ -115,8 +116,8 @@ ginFindLeafPage(GinBtree btree, bool searchMode)
|
||||
|
||||
stack->buffer = ginStepRight(stack->buffer, btree->index, access);
|
||||
stack->blkno = rightlink;
|
||||
page = BufferGetPage(stack->buffer, NULL, NULL,
|
||||
BGP_NO_SNAPSHOT_TEST);
|
||||
page = BufferGetPage(stack->buffer, snapshot, btree->index,
|
||||
BGP_TEST_FOR_OLD_SNAPSHOT);
|
||||
|
||||
if (!searchMode && GinPageIsIncompleteSplit(page))
|
||||
ginFinishSplit(btree, stack, false, NULL);
|
||||
|
@ -1820,7 +1820,7 @@ ginInsertItemPointers(Relation index, BlockNumber rootBlkno,
|
||||
{
|
||||
/* search for the leaf page where the first item should go to */
|
||||
btree.itemptr = insertdata.items[insertdata.curitem];
|
||||
stack = ginFindLeafPage(&btree, false);
|
||||
stack = ginFindLeafPage(&btree, false, NULL);
|
||||
|
||||
ginInsertValue(&btree, stack, &insertdata, buildStats);
|
||||
}
|
||||
@ -1830,7 +1830,8 @@ ginInsertItemPointers(Relation index, BlockNumber rootBlkno,
|
||||
* Starts a new scan on a posting tree.
|
||||
*/
|
||||
GinBtreeStack *
|
||||
ginScanBeginPostingTree(GinBtree btree, Relation index, BlockNumber rootBlkno)
|
||||
ginScanBeginPostingTree(GinBtree btree, Relation index, BlockNumber rootBlkno,
|
||||
Snapshot snapshot)
|
||||
{
|
||||
GinBtreeStack *stack;
|
||||
|
||||
@ -1838,7 +1839,7 @@ ginScanBeginPostingTree(GinBtree btree, Relation index, BlockNumber rootBlkno)
|
||||
|
||||
btree->fullScan = TRUE;
|
||||
|
||||
stack = ginFindLeafPage(btree, TRUE);
|
||||
stack = ginFindLeafPage(btree, TRUE, snapshot);
|
||||
|
||||
return stack;
|
||||
}
|
||||
|
@ -73,7 +73,7 @@ scanPostingTree(Relation index, GinScanEntry scanEntry,
|
||||
Page page;
|
||||
|
||||
/* Descend to the leftmost leaf page */
|
||||
stack = ginScanBeginPostingTree(&btree, index, rootPostingTree);
|
||||
stack = ginScanBeginPostingTree(&btree, index, rootPostingTree, snapshot);
|
||||
buffer = stack->buffer;
|
||||
IncrBufferRefCount(buffer); /* prevent unpin in freeGinBtreeStack */
|
||||
|
||||
@ -146,7 +146,8 @@ collectMatchBitmap(GinBtreeData *btree, GinBtreeStack *stack,
|
||||
if (moveRightIfItNeeded(btree, stack) == false)
|
||||
return true;
|
||||
|
||||
page = BufferGetPage(stack->buffer, NULL, NULL, BGP_NO_SNAPSHOT_TEST);
|
||||
page = BufferGetPage(stack->buffer, snapshot, btree->index,
|
||||
BGP_TEST_FOR_OLD_SNAPSHOT);
|
||||
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
|
||||
|
||||
/*
|
||||
@ -320,7 +321,7 @@ restartScanEntry:
|
||||
ginPrepareEntryScan(&btreeEntry, entry->attnum,
|
||||
entry->queryKey, entry->queryCategory,
|
||||
ginstate);
|
||||
stackEntry = ginFindLeafPage(&btreeEntry, true);
|
||||
stackEntry = ginFindLeafPage(&btreeEntry, true, snapshot);
|
||||
page = BufferGetPage(stackEntry->buffer, NULL, NULL, BGP_NO_SNAPSHOT_TEST);
|
||||
needUnlock = TRUE;
|
||||
|
||||
@ -385,7 +386,7 @@ restartScanEntry:
|
||||
needUnlock = FALSE;
|
||||
|
||||
stack = ginScanBeginPostingTree(&entry->btree, ginstate->index,
|
||||
rootPostingTree);
|
||||
rootPostingTree, snapshot);
|
||||
entry->buffer = stack->buffer;
|
||||
|
||||
/*
|
||||
@ -627,7 +628,7 @@ entryLoadMoreItems(GinState *ginstate, GinScanEntry entry,
|
||||
entry->btree.itemptr.ip_posid++;
|
||||
}
|
||||
entry->btree.fullScan = false;
|
||||
stack = ginFindLeafPage(&entry->btree, true);
|
||||
stack = ginFindLeafPage(&entry->btree, true, snapshot);
|
||||
|
||||
/* we don't need the stack, just the buffer. */
|
||||
entry->buffer = stack->buffer;
|
||||
@ -1335,8 +1336,8 @@ scanGetCandidate(IndexScanDesc scan, pendingPosition *pos)
|
||||
ItemPointerSetInvalid(&pos->item);
|
||||
for (;;)
|
||||
{
|
||||
page = BufferGetPage(pos->pendingBuffer, NULL,
|
||||
NULL, BGP_NO_SNAPSHOT_TEST);
|
||||
page = BufferGetPage(pos->pendingBuffer, scan->xs_snapshot,
|
||||
scan->indexRelation, BGP_TEST_FOR_OLD_SNAPSHOT);
|
||||
|
||||
maxoff = PageGetMaxOffsetNumber(page);
|
||||
if (pos->firstOffset > maxoff)
|
||||
@ -1516,8 +1517,8 @@ collectMatchesForHeapRow(IndexScanDesc scan, pendingPosition *pos)
|
||||
memset(datumExtracted + pos->firstOffset - 1, 0,
|
||||
sizeof(bool) * (pos->lastOffset - pos->firstOffset));
|
||||
|
||||
page = BufferGetPage(pos->pendingBuffer, NULL,
|
||||
NULL, BGP_NO_SNAPSHOT_TEST);
|
||||
page = BufferGetPage(pos->pendingBuffer, scan->xs_snapshot,
|
||||
scan->indexRelation, BGP_TEST_FOR_OLD_SNAPSHOT);
|
||||
|
||||
for (i = 0; i < so->nkeys; i++)
|
||||
{
|
||||
@ -1710,7 +1711,8 @@ scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids)
|
||||
*ntids = 0;
|
||||
|
||||
LockBuffer(metabuffer, GIN_SHARE);
|
||||
page = BufferGetPage(metabuffer, NULL, NULL, BGP_NO_SNAPSHOT_TEST);
|
||||
page = BufferGetPage(metabuffer, scan->xs_snapshot, scan->indexRelation,
|
||||
BGP_TEST_FOR_OLD_SNAPSHOT);
|
||||
blkno = GinPageGetMeta(page)->head;
|
||||
|
||||
/*
|
||||
|
@ -192,7 +192,7 @@ ginEntryInsert(GinState *ginstate,
|
||||
|
||||
ginPrepareEntryScan(&btree, attnum, key, category, ginstate);
|
||||
|
||||
stack = ginFindLeafPage(&btree, false);
|
||||
stack = ginFindLeafPage(&btree, false, NULL);
|
||||
page = BufferGetPage(stack->buffer, NULL, NULL, BGP_NO_SNAPSHOT_TEST);
|
||||
|
||||
if (btree.findItem(&btree, stack))
|
||||
|
@ -336,7 +336,7 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, double *myDistances,
|
||||
buffer = ReadBuffer(scan->indexRelation, pageItem->blkno);
|
||||
LockBuffer(buffer, GIST_SHARE);
|
||||
gistcheckpage(scan->indexRelation, buffer);
|
||||
page = BufferGetPage(buffer, NULL, NULL, BGP_NO_SNAPSHOT_TEST);
|
||||
page = BufferGetPage(buffer, scan->xs_snapshot, r, BGP_TEST_FOR_OLD_SNAPSHOT);
|
||||
opaque = GistPageGetOpaque(page);
|
||||
|
||||
/*
|
||||
|
@ -278,7 +278,8 @@ hashgettuple(IndexScanDesc scan, ScanDirection dir)
|
||||
|
||||
buf = so->hashso_curbuf;
|
||||
Assert(BufferIsValid(buf));
|
||||
page = BufferGetPage(buf, NULL, NULL, BGP_NO_SNAPSHOT_TEST);
|
||||
page = BufferGetPage(buf, scan->xs_snapshot, rel,
|
||||
BGP_TEST_FOR_OLD_SNAPSHOT);
|
||||
maxoffnum = PageGetMaxOffsetNumber(page);
|
||||
for (offnum = ItemPointerGetOffsetNumber(current);
|
||||
offnum <= maxoffnum;
|
||||
|
@ -188,8 +188,8 @@ _hash_first(IndexScanDesc scan, ScanDirection dir)
|
||||
|
||||
/* Read the metapage */
|
||||
metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
|
||||
page = BufferGetPage(metabuf, NULL, NULL,
|
||||
BGP_NO_SNAPSHOT_TEST);
|
||||
page = BufferGetPage(metabuf, scan->xs_snapshot, rel,
|
||||
BGP_TEST_FOR_OLD_SNAPSHOT);
|
||||
metap = HashPageGetMeta(page);
|
||||
|
||||
/*
|
||||
@ -242,8 +242,8 @@ _hash_first(IndexScanDesc scan, ScanDirection dir)
|
||||
|
||||
/* Fetch the primary bucket page for the bucket */
|
||||
buf = _hash_getbuf(rel, blkno, HASH_READ, LH_BUCKET_PAGE);
|
||||
page = BufferGetPage(buf, NULL, NULL,
|
||||
BGP_NO_SNAPSHOT_TEST);
|
||||
page = BufferGetPage(buf, scan->xs_snapshot, rel,
|
||||
BGP_TEST_FOR_OLD_SNAPSHOT);
|
||||
opaque = (HashPageOpaque) PageGetSpecialPointer(page);
|
||||
Assert(opaque->hasho_bucket == bucket);
|
||||
|
||||
@ -350,6 +350,7 @@ _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
|
||||
_hash_readnext(rel, &buf, &page, &opaque);
|
||||
if (BufferIsValid(buf))
|
||||
{
|
||||
TestForOldSnapshot(scan->xs_snapshot, rel, page);
|
||||
maxoff = PageGetMaxOffsetNumber(page);
|
||||
offnum = _hash_binsearch(page, so->hashso_sk_hash);
|
||||
}
|
||||
@ -391,6 +392,7 @@ _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
|
||||
_hash_readprev(rel, &buf, &page, &opaque);
|
||||
if (BufferIsValid(buf))
|
||||
{
|
||||
TestForOldSnapshot(scan->xs_snapshot, rel, page);
|
||||
maxoff = PageGetMaxOffsetNumber(page);
|
||||
offnum = _hash_binsearch_last(page, so->hashso_sk_hash);
|
||||
}
|
||||
|
@ -394,7 +394,8 @@ heapgetpage(HeapScanDesc scan, BlockNumber page)
|
||||
*/
|
||||
LockBuffer(buffer, BUFFER_LOCK_SHARE);
|
||||
|
||||
dp = BufferGetPage(buffer, NULL, NULL, BGP_NO_SNAPSHOT_TEST);
|
||||
dp = BufferGetPage(buffer, snapshot, scan->rs_rd,
|
||||
BGP_TEST_FOR_OLD_SNAPSHOT);
|
||||
lines = PageGetMaxOffsetNumber(dp);
|
||||
ntup = 0;
|
||||
|
||||
@ -537,7 +538,7 @@ heapgettup(HeapScanDesc scan,
|
||||
|
||||
LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
|
||||
|
||||
dp = BufferGetPage(scan->rs_cbuf, NULL, NULL, BGP_NO_SNAPSHOT_TEST);
|
||||
dp = BufferGetPage(scan->rs_cbuf, snapshot, scan->rs_rd, BGP_TEST_FOR_OLD_SNAPSHOT);
|
||||
lines = PageGetMaxOffsetNumber(dp);
|
||||
/* page and lineoff now reference the physically next tid */
|
||||
|
||||
@ -582,7 +583,8 @@ heapgettup(HeapScanDesc scan,
|
||||
|
||||
LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
|
||||
|
||||
dp = BufferGetPage(scan->rs_cbuf, NULL, NULL, BGP_NO_SNAPSHOT_TEST);
|
||||
dp = BufferGetPage(scan->rs_cbuf, snapshot, scan->rs_rd,
|
||||
BGP_TEST_FOR_OLD_SNAPSHOT);
|
||||
lines = PageGetMaxOffsetNumber(dp);
|
||||
|
||||
if (!scan->rs_inited)
|
||||
@ -616,7 +618,8 @@ heapgettup(HeapScanDesc scan,
|
||||
heapgetpage(scan, page);
|
||||
|
||||
/* Since the tuple was previously fetched, needn't lock page here */
|
||||
dp = BufferGetPage(scan->rs_cbuf, NULL, NULL, BGP_NO_SNAPSHOT_TEST);
|
||||
dp = BufferGetPage(scan->rs_cbuf, snapshot, scan->rs_rd,
|
||||
BGP_TEST_FOR_OLD_SNAPSHOT);
|
||||
lineoff = ItemPointerGetOffsetNumber(&(tuple->t_self));
|
||||
lpp = PageGetItemId(dp, lineoff);
|
||||
Assert(ItemIdIsNormal(lpp));
|
||||
@ -745,7 +748,8 @@ heapgettup(HeapScanDesc scan,
|
||||
|
||||
LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
|
||||
|
||||
dp = BufferGetPage(scan->rs_cbuf, NULL, NULL, BGP_NO_SNAPSHOT_TEST);
|
||||
dp = BufferGetPage(scan->rs_cbuf, snapshot, scan->rs_rd,
|
||||
BGP_TEST_FOR_OLD_SNAPSHOT);
|
||||
lines = PageGetMaxOffsetNumber((Page) dp);
|
||||
linesleft = lines;
|
||||
if (backward)
|
||||
@ -832,7 +836,8 @@ heapgettup_pagemode(HeapScanDesc scan,
|
||||
lineindex = scan->rs_cindex + 1;
|
||||
}
|
||||
|
||||
dp = BufferGetPage(scan->rs_cbuf, NULL, NULL, BGP_NO_SNAPSHOT_TEST);
|
||||
dp = BufferGetPage(scan->rs_cbuf, scan->rs_snapshot, scan->rs_rd,
|
||||
BGP_TEST_FOR_OLD_SNAPSHOT);
|
||||
lines = scan->rs_ntuples;
|
||||
/* page and lineindex now reference the next visible tid */
|
||||
|
||||
@ -875,7 +880,8 @@ heapgettup_pagemode(HeapScanDesc scan,
|
||||
page = scan->rs_cblock; /* current page */
|
||||
}
|
||||
|
||||
dp = BufferGetPage(scan->rs_cbuf, NULL, NULL, BGP_NO_SNAPSHOT_TEST);
|
||||
dp = BufferGetPage(scan->rs_cbuf, scan->rs_snapshot, scan->rs_rd,
|
||||
BGP_TEST_FOR_OLD_SNAPSHOT);
|
||||
lines = scan->rs_ntuples;
|
||||
|
||||
if (!scan->rs_inited)
|
||||
@ -908,7 +914,8 @@ heapgettup_pagemode(HeapScanDesc scan,
|
||||
heapgetpage(scan, page);
|
||||
|
||||
/* Since the tuple was previously fetched, needn't lock page here */
|
||||
dp = BufferGetPage(scan->rs_cbuf, NULL, NULL, BGP_NO_SNAPSHOT_TEST);
|
||||
dp = BufferGetPage(scan->rs_cbuf, scan->rs_snapshot, scan->rs_rd,
|
||||
BGP_TEST_FOR_OLD_SNAPSHOT);
|
||||
lineoff = ItemPointerGetOffsetNumber(&(tuple->t_self));
|
||||
lpp = PageGetItemId(dp, lineoff);
|
||||
Assert(ItemIdIsNormal(lpp));
|
||||
@ -1027,7 +1034,8 @@ heapgettup_pagemode(HeapScanDesc scan,
|
||||
|
||||
heapgetpage(scan, page);
|
||||
|
||||
dp = BufferGetPage(scan->rs_cbuf, NULL, NULL, BGP_NO_SNAPSHOT_TEST);
|
||||
dp = BufferGetPage(scan->rs_cbuf, scan->rs_snapshot, scan->rs_rd,
|
||||
BGP_TEST_FOR_OLD_SNAPSHOT);
|
||||
lines = scan->rs_ntuples;
|
||||
linesleft = lines;
|
||||
if (backward)
|
||||
@ -1871,7 +1879,7 @@ heap_fetch(Relation relation,
|
||||
* Need share lock on buffer to examine tuple commit status.
|
||||
*/
|
||||
LockBuffer(buffer, BUFFER_LOCK_SHARE);
|
||||
page = BufferGetPage(buffer, NULL, NULL, BGP_NO_SNAPSHOT_TEST);
|
||||
page = BufferGetPage(buffer, snapshot, relation, BGP_TEST_FOR_OLD_SNAPSHOT);
|
||||
|
||||
/*
|
||||
* We'd better check for out-of-range offnum in case of VACUUM since the
|
||||
@ -2200,7 +2208,8 @@ heap_get_latest_tid(Relation relation,
|
||||
*/
|
||||
buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(&ctid));
|
||||
LockBuffer(buffer, BUFFER_LOCK_SHARE);
|
||||
page = BufferGetPage(buffer, NULL, NULL, BGP_NO_SNAPSHOT_TEST);
|
||||
page = BufferGetPage(buffer, snapshot, relation,
|
||||
BGP_TEST_FOR_OLD_SNAPSHOT);
|
||||
|
||||
/*
|
||||
* Check for bogus item number. This is not treated as an error
|
||||
|
@ -92,12 +92,21 @@ heap_page_prune_opt(Relation relation, Buffer buffer)
|
||||
* need to use the horizon that includes slots, otherwise the data-only
|
||||
* horizon can be used. Note that the toast relation of user defined
|
||||
* relations are *not* considered catalog relations.
|
||||
*
|
||||
* It is OK to apply the old snapshot limit before acquiring the cleanup
|
||||
* lock because the worst that can happen is that we are not quite as
|
||||
* aggressive about the cleanup (by however many transaction IDs are
|
||||
* consumed between this point and acquiring the lock). This allows us to
|
||||
* save significant overhead in the case where the page is found not to be
|
||||
* prunable.
|
||||
*/
|
||||
if (IsCatalogRelation(relation) ||
|
||||
RelationIsAccessibleInLogicalDecoding(relation))
|
||||
OldestXmin = RecentGlobalXmin;
|
||||
else
|
||||
OldestXmin = RecentGlobalDataXmin;
|
||||
OldestXmin =
|
||||
TransactionIdLimitedForOldSnapshots(RecentGlobalDataXmin,
|
||||
relation);
|
||||
|
||||
Assert(TransactionIdIsValid(OldestXmin));
|
||||
|
||||
|
@ -119,7 +119,7 @@ _bt_doinsert(Relation rel, IndexTuple itup,
|
||||
|
||||
top:
|
||||
/* find the first page containing this key */
|
||||
stack = _bt_search(rel, natts, itup_scankey, false, &buf, BT_WRITE);
|
||||
stack = _bt_search(rel, natts, itup_scankey, false, &buf, BT_WRITE, NULL);
|
||||
|
||||
offset = InvalidOffsetNumber;
|
||||
|
||||
@ -135,7 +135,7 @@ top:
|
||||
* precise description.
|
||||
*/
|
||||
buf = _bt_moveright(rel, buf, natts, itup_scankey, false,
|
||||
true, stack, BT_WRITE);
|
||||
true, stack, BT_WRITE, NULL);
|
||||
|
||||
/*
|
||||
* If we're not allowing duplicates, make sure the key isn't already in
|
||||
@ -1682,7 +1682,8 @@ _bt_insert_parent(Relation rel,
|
||||
elog(DEBUG2, "concurrent ROOT page split");
|
||||
lpageop = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
/* Find the leftmost page at the next level up */
|
||||
pbuf = _bt_get_endpoint(rel, lpageop->btpo.level + 1, false);
|
||||
pbuf = _bt_get_endpoint(rel, lpageop->btpo.level + 1, false,
|
||||
NULL);
|
||||
/* Set up a phony stack entry pointing there */
|
||||
stack = &fakestack;
|
||||
stack->bts_blkno = BufferGetBlockNumber(pbuf);
|
||||
|
@ -1255,7 +1255,7 @@ _bt_pagedel(Relation rel, Buffer buf)
|
||||
itup_scankey = _bt_mkscankey(rel, targetkey);
|
||||
/* find the leftmost leaf page containing this key */
|
||||
stack = _bt_search(rel, rel->rd_rel->relnatts, itup_scankey,
|
||||
false, &lbuf, BT_READ);
|
||||
false, &lbuf, BT_READ, NULL);
|
||||
/* don't need a pin on the page */
|
||||
_bt_relbuf(rel, lbuf);
|
||||
|
||||
|
@ -79,6 +79,10 @@ _bt_drop_lock_and_maybe_pin(IndexScanDesc scan, BTScanPos sp)
|
||||
* address of the leaf-page buffer, which is read-locked and pinned.
|
||||
* No locks are held on the parent pages, however!
|
||||
*
|
||||
* If the snapshot parameter is not NULL, "old snapshot" checking will take
|
||||
* place during the descent through the tree. This is not needed when
|
||||
* positioning for an insert or delete, so NULL is used for those cases.
|
||||
*
|
||||
* NOTE that the returned buffer is read-locked regardless of the access
|
||||
* parameter. However, access = BT_WRITE will allow an empty root page
|
||||
* to be created and returned. When access = BT_READ, an empty index
|
||||
@ -87,7 +91,7 @@ _bt_drop_lock_and_maybe_pin(IndexScanDesc scan, BTScanPos sp)
|
||||
*/
|
||||
BTStack
|
||||
_bt_search(Relation rel, int keysz, ScanKey scankey, bool nextkey,
|
||||
Buffer *bufP, int access)
|
||||
Buffer *bufP, int access, Snapshot snapshot)
|
||||
{
|
||||
BTStack stack_in = NULL;
|
||||
|
||||
@ -126,7 +130,7 @@ _bt_search(Relation rel, int keysz, ScanKey scankey, bool nextkey,
|
||||
*/
|
||||
*bufP = _bt_moveright(rel, *bufP, keysz, scankey, nextkey,
|
||||
(access == BT_WRITE), stack_in,
|
||||
BT_READ);
|
||||
BT_READ, snapshot);
|
||||
|
||||
/* if this is a leaf page, we're done */
|
||||
page = BufferGetPage(*bufP, NULL, NULL, BGP_NO_SNAPSHOT_TEST);
|
||||
@ -199,6 +203,10 @@ _bt_search(Relation rel, int keysz, ScanKey scankey, bool nextkey,
|
||||
* On entry, we have the buffer pinned and a lock of the type specified by
|
||||
* 'access'. If we move right, we release the buffer and lock and acquire
|
||||
* the same on the right sibling. Return value is the buffer we stop at.
|
||||
*
|
||||
* If the snapshot parameter is not NULL, "old snapshot" checking will take
|
||||
* place during the descent through the tree. This is not needed when
|
||||
* positioning for an insert or delete, so NULL is used for those cases.
|
||||
*/
|
||||
Buffer
|
||||
_bt_moveright(Relation rel,
|
||||
@ -208,7 +216,8 @@ _bt_moveright(Relation rel,
|
||||
bool nextkey,
|
||||
bool forupdate,
|
||||
BTStack stack,
|
||||
int access)
|
||||
int access,
|
||||
Snapshot snapshot)
|
||||
{
|
||||
Page page;
|
||||
BTPageOpaque opaque;
|
||||
@ -233,7 +242,7 @@ _bt_moveright(Relation rel,
|
||||
|
||||
for (;;)
|
||||
{
|
||||
page = BufferGetPage(buf, NULL, NULL, BGP_NO_SNAPSHOT_TEST);
|
||||
page = BufferGetPage(buf, snapshot, rel, BGP_TEST_FOR_OLD_SNAPSHOT);
|
||||
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
|
||||
if (P_RIGHTMOST(opaque))
|
||||
@ -972,7 +981,8 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
|
||||
* Use the manufactured insertion scan key to descend the tree and
|
||||
* position ourselves on the target leaf page.
|
||||
*/
|
||||
stack = _bt_search(rel, keysCount, scankeys, nextkey, &buf, BT_READ);
|
||||
stack = _bt_search(rel, keysCount, scankeys, nextkey, &buf, BT_READ,
|
||||
scan->xs_snapshot);
|
||||
|
||||
/* don't need to keep the stack around... */
|
||||
_bt_freestack(stack);
|
||||
@ -1337,8 +1347,8 @@ _bt_steppage(IndexScanDesc scan, ScanDirection dir)
|
||||
/* step right one page */
|
||||
so->currPos.buf = _bt_getbuf(rel, blkno, BT_READ);
|
||||
/* check for deleted page */
|
||||
page = BufferGetPage(so->currPos.buf, NULL, NULL,
|
||||
BGP_NO_SNAPSHOT_TEST);
|
||||
page = BufferGetPage(so->currPos.buf, scan->xs_snapshot, rel,
|
||||
BGP_TEST_FOR_OLD_SNAPSHOT);
|
||||
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
if (!P_IGNORE(opaque))
|
||||
{
|
||||
@ -1412,8 +1422,8 @@ _bt_steppage(IndexScanDesc scan, ScanDirection dir)
|
||||
* it's not half-dead and contains matching tuples. Else loop back
|
||||
* and do it all again.
|
||||
*/
|
||||
page = BufferGetPage(so->currPos.buf, NULL, NULL,
|
||||
BGP_NO_SNAPSHOT_TEST);
|
||||
page = BufferGetPage(so->currPos.buf, scan->xs_snapshot, rel,
|
||||
BGP_TEST_FOR_OLD_SNAPSHOT);
|
||||
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
if (!P_IGNORE(opaque))
|
||||
{
|
||||
@ -1476,7 +1486,7 @@ _bt_walk_left(Relation rel, Buffer buf, Snapshot snapshot)
|
||||
/* check for interrupts while we're not holding any buffer lock */
|
||||
CHECK_FOR_INTERRUPTS();
|
||||
buf = _bt_getbuf(rel, blkno, BT_READ);
|
||||
page = BufferGetPage(buf, NULL, NULL, BGP_NO_SNAPSHOT_TEST);
|
||||
page = BufferGetPage(buf, snapshot, rel, BGP_TEST_FOR_OLD_SNAPSHOT);
|
||||
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
|
||||
/*
|
||||
@ -1502,14 +1512,14 @@ _bt_walk_left(Relation rel, Buffer buf, Snapshot snapshot)
|
||||
break;
|
||||
blkno = opaque->btpo_next;
|
||||
buf = _bt_relandgetbuf(rel, buf, blkno, BT_READ);
|
||||
page = BufferGetPage(buf, NULL, NULL,
|
||||
BGP_NO_SNAPSHOT_TEST);
|
||||
page = BufferGetPage(buf, snapshot, rel,
|
||||
BGP_TEST_FOR_OLD_SNAPSHOT);
|
||||
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
}
|
||||
|
||||
/* Return to the original page to see what's up */
|
||||
buf = _bt_relandgetbuf(rel, buf, obknum, BT_READ);
|
||||
page = BufferGetPage(buf, NULL, NULL, BGP_NO_SNAPSHOT_TEST);
|
||||
page = BufferGetPage(buf, snapshot, rel, BGP_TEST_FOR_OLD_SNAPSHOT);
|
||||
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
if (P_ISDELETED(opaque))
|
||||
{
|
||||
@ -1526,8 +1536,8 @@ _bt_walk_left(Relation rel, Buffer buf, Snapshot snapshot)
|
||||
RelationGetRelationName(rel));
|
||||
blkno = opaque->btpo_next;
|
||||
buf = _bt_relandgetbuf(rel, buf, blkno, BT_READ);
|
||||
page = BufferGetPage(buf, NULL, NULL,
|
||||
BGP_NO_SNAPSHOT_TEST);
|
||||
page = BufferGetPage(buf, snapshot, rel,
|
||||
BGP_TEST_FOR_OLD_SNAPSHOT);
|
||||
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
if (!P_ISDELETED(opaque))
|
||||
break;
|
||||
@ -1564,7 +1574,8 @@ _bt_walk_left(Relation rel, Buffer buf, Snapshot snapshot)
|
||||
* The returned buffer is pinned and read-locked.
|
||||
*/
|
||||
Buffer
|
||||
_bt_get_endpoint(Relation rel, uint32 level, bool rightmost)
|
||||
_bt_get_endpoint(Relation rel, uint32 level, bool rightmost,
|
||||
Snapshot snapshot)
|
||||
{
|
||||
Buffer buf;
|
||||
Page page;
|
||||
@ -1586,7 +1597,7 @@ _bt_get_endpoint(Relation rel, uint32 level, bool rightmost)
|
||||
if (!BufferIsValid(buf))
|
||||
return InvalidBuffer;
|
||||
|
||||
page = BufferGetPage(buf, NULL, NULL, BGP_NO_SNAPSHOT_TEST);
|
||||
page = BufferGetPage(buf, snapshot, rel, BGP_TEST_FOR_OLD_SNAPSHOT);
|
||||
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
|
||||
for (;;)
|
||||
@ -1605,8 +1616,8 @@ _bt_get_endpoint(Relation rel, uint32 level, bool rightmost)
|
||||
elog(ERROR, "fell off the end of index \"%s\"",
|
||||
RelationGetRelationName(rel));
|
||||
buf = _bt_relandgetbuf(rel, buf, blkno, BT_READ);
|
||||
page = BufferGetPage(buf, NULL, NULL,
|
||||
BGP_NO_SNAPSHOT_TEST);
|
||||
page = BufferGetPage(buf, snapshot, rel,
|
||||
BGP_TEST_FOR_OLD_SNAPSHOT);
|
||||
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
}
|
||||
|
||||
@ -1659,7 +1670,7 @@ _bt_endpoint(IndexScanDesc scan, ScanDirection dir)
|
||||
* version of _bt_search(). We don't maintain a stack since we know we
|
||||
* won't need it.
|
||||
*/
|
||||
buf = _bt_get_endpoint(rel, 0, ScanDirectionIsBackward(dir));
|
||||
buf = _bt_get_endpoint(rel, 0, ScanDirectionIsBackward(dir), scan->xs_snapshot);
|
||||
|
||||
if (!BufferIsValid(buf))
|
||||
{
|
||||
|
@ -341,7 +341,7 @@ redirect:
|
||||
}
|
||||
/* else new pointer points to the same page, no work needed */
|
||||
|
||||
page = BufferGetPage(buffer, NULL, NULL, BGP_NO_SNAPSHOT_TEST);
|
||||
page = BufferGetPage(buffer, snapshot, index, BGP_TEST_FOR_OLD_SNAPSHOT);
|
||||
|
||||
isnull = SpGistPageStoresNulls(page) ? true : false;
|
||||
|
||||
|
Reference in New Issue
Block a user