1
0
mirror of https://github.com/postgres/postgres.git synced 2025-09-02 04:21:28 +03:00

Remove the "snapshot too old" feature.

Remove the old_snapshot_threshold setting and mechanism for producing
the error "snapshot too old", originally added by commit 848ef42b.
Unfortunately it had a number of known problems in terms of correctness
and performance, mostly reported by Andres in the course of his work on
snapshot scalability.  We agreed to remove it, after a long period
without an active plan to fix it.

This is certainly a desirable feature, and someone might propose a new
or improved implementation in the future.

Reported-by: Andres Freund <andres@anarazel.de>
Discussion: https://postgr.es/m/CACG%3DezYV%2BEvO135fLRdVn-ZusfVsTY6cH1OZqWtezuEYH6ciQA%40mail.gmail.com
Discussion: https://postgr.es/m/20200401064008.qob7bfnnbu4w5cw4%40alap3.anarazel.de
Discussion: https://postgr.es/m/CA%2BTgmoY%3Daqf0zjTD%2B3dUWYkgMiNDegDLFjo%2B6ze%3DWtpik%2B3XqA%40mail.gmail.com
This commit is contained in:
Thomas Munro
2023-09-05 18:26:12 +12:00
parent aa0d350456
commit f691f5b80a
50 changed files with 21 additions and 1425 deletions

View File

@@ -79,7 +79,6 @@ brinRevmapInitialize(Relation idxrel, BlockNumber *pagesPerRange,
meta = ReadBuffer(idxrel, BRIN_METAPAGE_BLKNO);
LockBuffer(meta, BUFFER_LOCK_SHARE);
page = BufferGetPage(meta);
TestForOldSnapshot(snapshot, idxrel, page);
metadata = (BrinMetaPageData *) PageGetContents(page);
revmap = palloc(sizeof(BrinRevmap));
@@ -277,7 +276,6 @@ brinGetTupleForHeapBlock(BrinRevmap *revmap, BlockNumber heapBlk,
}
LockBuffer(*buf, mode);
page = BufferGetPage(*buf);
TestForOldSnapshot(snapshot, idxRel, page);
/* If we land on a revmap page, start over */
if (BRIN_IS_REGULAR_PAGE(page))
@@ -372,11 +370,6 @@ brinRevmapDesummarizeRange(Relation idxrel, BlockNumber heapBlk)
LockBuffer(regBuf, BUFFER_LOCK_EXCLUSIVE);
regPg = BufferGetPage(regBuf);
/*
* We're only removing data, not reading it, so there's no need to
* TestForOldSnapshot here.
*/
/* if this is no longer a regular page, tell caller to start over */
if (!BRIN_IS_REGULAR_PAGE(regPg))
{

View File

@@ -100,7 +100,6 @@ ginFindLeafPage(GinBtree btree, bool searchMode,
stack->off = InvalidOffsetNumber;
page = BufferGetPage(stack->buffer);
TestForOldSnapshot(snapshot, btree->index, page);
access = ginTraverseLock(stack->buffer, searchMode);
@@ -127,7 +126,6 @@ ginFindLeafPage(GinBtree btree, bool searchMode,
stack->buffer = ginStepRight(stack->buffer, btree->index, access);
stack->blkno = rightlink;
page = BufferGetPage(stack->buffer);
TestForOldSnapshot(snapshot, btree->index, page);
if (!searchMode && GinPageIsIncompleteSplit(page))
ginFinishSplit(btree, stack, false, NULL);

View File

@@ -158,7 +158,6 @@ collectMatchBitmap(GinBtreeData *btree, GinBtreeStack *stack,
return true;
page = BufferGetPage(stack->buffer);
TestForOldSnapshot(snapshot, btree->index, page);
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
/*
@@ -1460,7 +1459,6 @@ scanGetCandidate(IndexScanDesc scan, pendingPosition *pos)
for (;;)
{
page = BufferGetPage(pos->pendingBuffer);
TestForOldSnapshot(scan->xs_snapshot, scan->indexRelation, page);
maxoff = PageGetMaxOffsetNumber(page);
if (pos->firstOffset > maxoff)
@@ -1641,7 +1639,6 @@ collectMatchesForHeapRow(IndexScanDesc scan, pendingPosition *pos)
sizeof(bool) * (pos->lastOffset - pos->firstOffset));
page = BufferGetPage(pos->pendingBuffer);
TestForOldSnapshot(scan->xs_snapshot, scan->indexRelation, page);
for (i = 0; i < so->nkeys; i++)
{
@@ -1844,7 +1841,6 @@ scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids)
LockBuffer(metabuffer, GIN_SHARE);
page = BufferGetPage(metabuffer);
TestForOldSnapshot(scan->xs_snapshot, scan->indexRelation, page);
blkno = GinPageGetMeta(page)->head;
/*

View File

@@ -346,7 +346,6 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem,
PredicateLockPage(r, BufferGetBlockNumber(buffer), scan->xs_snapshot);
gistcheckpage(scan->indexRelation, buffer);
page = BufferGetPage(buffer);
TestForOldSnapshot(scan->xs_snapshot, r, page);
opaque = GistPageGetOpaque(page);
/*

View File

@@ -71,7 +71,6 @@ _hash_next(IndexScanDesc scan, ScanDirection dir)
if (BlockNumberIsValid(blkno))
{
buf = _hash_getbuf(rel, blkno, HASH_READ, LH_OVERFLOW_PAGE);
TestForOldSnapshot(scan->xs_snapshot, rel, BufferGetPage(buf));
if (!_hash_readpage(scan, &buf, dir))
end_of_scan = true;
}
@@ -91,7 +90,6 @@ _hash_next(IndexScanDesc scan, ScanDirection dir)
{
buf = _hash_getbuf(rel, blkno, HASH_READ,
LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
TestForOldSnapshot(scan->xs_snapshot, rel, BufferGetPage(buf));
/*
* We always maintain the pin on bucket page for whole scan
@@ -186,7 +184,6 @@ _hash_readnext(IndexScanDesc scan,
if (block_found)
{
*pagep = BufferGetPage(*bufp);
TestForOldSnapshot(scan->xs_snapshot, rel, *pagep);
*opaquep = HashPageGetOpaque(*pagep);
}
}
@@ -232,7 +229,6 @@ _hash_readprev(IndexScanDesc scan,
*bufp = _hash_getbuf(rel, blkno, HASH_READ,
LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
*pagep = BufferGetPage(*bufp);
TestForOldSnapshot(scan->xs_snapshot, rel, *pagep);
*opaquep = HashPageGetOpaque(*pagep);
/*
@@ -351,7 +347,6 @@ _hash_first(IndexScanDesc scan, ScanDirection dir)
buf = _hash_getbucketbuf_from_hashkey(rel, hashkey, HASH_READ, NULL);
PredicateLockPage(rel, BufferGetBlockNumber(buf), scan->xs_snapshot);
page = BufferGetPage(buf);
TestForOldSnapshot(scan->xs_snapshot, rel, page);
opaque = HashPageGetOpaque(page);
bucket = opaque->hasho_bucket;
@@ -387,7 +382,6 @@ _hash_first(IndexScanDesc scan, ScanDirection dir)
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
old_buf = _hash_getbuf(rel, old_blkno, HASH_READ, LH_BUCKET_PAGE);
TestForOldSnapshot(scan->xs_snapshot, rel, BufferGetPage(old_buf));
/*
* remember the split bucket buffer so as to use it later for

View File

@@ -425,7 +425,6 @@ heapgetpage(TableScanDesc sscan, BlockNumber block)
LockBuffer(buffer, BUFFER_LOCK_SHARE);
page = BufferGetPage(buffer);
TestForOldSnapshot(snapshot, scan->rs_base.rs_rd, page);
lines = PageGetMaxOffsetNumber(page);
ntup = 0;
@@ -565,8 +564,6 @@ heapgettup_start_page(HeapScanDesc scan, ScanDirection dir, int *linesleft,
/* Caller is responsible for ensuring buffer is locked if needed */
page = BufferGetPage(scan->rs_cbuf);
TestForOldSnapshot(scan->rs_base.rs_snapshot, scan->rs_base.rs_rd, page);
*linesleft = PageGetMaxOffsetNumber(page) - FirstOffsetNumber + 1;
if (ScanDirectionIsForward(dir))
@@ -598,8 +595,6 @@ heapgettup_continue_page(HeapScanDesc scan, ScanDirection dir, int *linesleft,
/* Caller is responsible for ensuring buffer is locked if needed */
page = BufferGetPage(scan->rs_cbuf);
TestForOldSnapshot(scan->rs_base.rs_snapshot, scan->rs_base.rs_rd, page);
if (ScanDirectionIsForward(dir))
{
*lineoff = OffsetNumberNext(scan->rs_coffset);
@@ -864,7 +859,6 @@ heapgettup_pagemode(HeapScanDesc scan,
/* continue from previously returned page/tuple */
block = scan->rs_cblock; /* current page */
page = BufferGetPage(scan->rs_cbuf);
TestForOldSnapshot(scan->rs_base.rs_snapshot, scan->rs_base.rs_rd, page);
lineindex = scan->rs_cindex + dir;
if (ScanDirectionIsForward(dir))
@@ -884,7 +878,6 @@ heapgettup_pagemode(HeapScanDesc scan,
{
heapgetpage((TableScanDesc) scan, block);
page = BufferGetPage(scan->rs_cbuf);
TestForOldSnapshot(scan->rs_base.rs_snapshot, scan->rs_base.rs_rd, page);
linesleft = scan->rs_ntuples;
lineindex = ScanDirectionIsForward(dir) ? 0 : linesleft - 1;
@@ -1372,7 +1365,6 @@ heap_fetch(Relation relation,
*/
LockBuffer(buffer, BUFFER_LOCK_SHARE);
page = BufferGetPage(buffer);
TestForOldSnapshot(snapshot, relation, page);
/*
* We'd better check for out-of-range offnum in case of VACUUM since the
@@ -1663,7 +1655,6 @@ heap_get_latest_tid(TableScanDesc sscan,
buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(&ctid));
LockBuffer(buffer, BUFFER_LOCK_SHARE);
page = BufferGetPage(buffer);
TestForOldSnapshot(snapshot, relation, page);
/*
* Check for bogus item number. This is not treated as an error

View File

@@ -36,18 +36,6 @@ typedef struct
/* tuple visibility test, initialized for the relation */
GlobalVisState *vistest;
/*
* Thresholds set by TransactionIdLimitedForOldSnapshots() if they have
* been computed (done on demand, and only if
* OldSnapshotThresholdActive()). The first time a tuple is about to be
* removed based on the limited horizon, old_snap_used is set to true, and
* SetOldSnapshotThresholdTimestamp() is called. See
* heap_prune_satisfies_vacuum().
*/
TimestampTz old_snap_ts;
TransactionId old_snap_xmin;
bool old_snap_used;
TransactionId new_prune_xid; /* new prune hint value for page */
TransactionId snapshotConflictHorizon; /* latest xid removed */
int nredirected; /* numbers of entries in arrays below */
@@ -110,8 +98,6 @@ heap_page_prune_opt(Relation relation, Buffer buffer)
Page page = BufferGetPage(buffer);
TransactionId prune_xid;
GlobalVisState *vistest;
TransactionId limited_xmin = InvalidTransactionId;
TimestampTz limited_ts = 0;
Size minfree;
/*
@@ -122,15 +108,6 @@ heap_page_prune_opt(Relation relation, Buffer buffer)
if (RecoveryInProgress())
return;
/*
* XXX: Magic to keep old_snapshot_threshold tests appear "working". They
* currently are broken, and discussion of what to do about them is
* ongoing. See
* https://www.postgresql.org/message-id/20200403001235.e6jfdll3gh2ygbuc%40alap3.anarazel.de
*/
if (old_snapshot_threshold == 0)
SnapshotTooOldMagicForTest();
/*
* First check whether there's any chance there's something to prune,
* determining the appropriate horizon is a waste if there's no prune_xid
@@ -143,35 +120,11 @@ heap_page_prune_opt(Relation relation, Buffer buffer)
/*
* Check whether prune_xid indicates that there may be dead rows that can
* be cleaned up.
*
* It is OK to check the old snapshot limit before acquiring the cleanup
* lock because the worst that can happen is that we are not quite as
* aggressive about the cleanup (by however many transaction IDs are
* consumed between this point and acquiring the lock). This allows us to
* save significant overhead in the case where the page is found not to be
* prunable.
*
* Even if old_snapshot_threshold is set, we first check whether the page
* can be pruned without. Both because
* TransactionIdLimitedForOldSnapshots() is not cheap, and because not
* unnecessarily relying on old_snapshot_threshold avoids causing
* conflicts.
*/
vistest = GlobalVisTestFor(relation);
if (!GlobalVisTestIsRemovableXid(vistest, prune_xid))
{
if (!OldSnapshotThresholdActive())
return;
if (!TransactionIdLimitedForOldSnapshots(GlobalVisTestNonRemovableHorizon(vistest),
relation,
&limited_xmin, &limited_ts))
return;
if (!TransactionIdPrecedes(prune_xid, limited_xmin))
return;
}
return;
/*
* We prune when a previous UPDATE failed to find enough space on the page
@@ -205,8 +158,8 @@ heap_page_prune_opt(Relation relation, Buffer buffer)
int ndeleted,
nnewlpdead;
ndeleted = heap_page_prune(relation, buffer, vistest, limited_xmin,
limited_ts, &nnewlpdead, NULL);
ndeleted = heap_page_prune(relation, buffer, vistest,
&nnewlpdead, NULL);
/*
* Report the number of tuples reclaimed to pgstats. This is
@@ -249,9 +202,7 @@ heap_page_prune_opt(Relation relation, Buffer buffer)
*
* vistest is used to distinguish whether tuples are DEAD or RECENTLY_DEAD
* (see heap_prune_satisfies_vacuum and
* HeapTupleSatisfiesVacuum). old_snap_xmin / old_snap_ts need to
* either have been set by TransactionIdLimitedForOldSnapshots, or
* InvalidTransactionId/0 respectively.
* HeapTupleSatisfiesVacuum).
*
* Sets *nnewlpdead for caller, indicating the number of items that were
* newly set LP_DEAD during prune operation.
@@ -264,8 +215,6 @@ heap_page_prune_opt(Relation relation, Buffer buffer)
int
heap_page_prune(Relation relation, Buffer buffer,
GlobalVisState *vistest,
TransactionId old_snap_xmin,
TimestampTz old_snap_ts,
int *nnewlpdead,
OffsetNumber *off_loc)
{
@@ -291,9 +240,6 @@ heap_page_prune(Relation relation, Buffer buffer,
prstate.new_prune_xid = InvalidTransactionId;
prstate.rel = relation;
prstate.vistest = vistest;
prstate.old_snap_xmin = old_snap_xmin;
prstate.old_snap_ts = old_snap_ts;
prstate.old_snap_used = false;
prstate.snapshotConflictHorizon = InvalidTransactionId;
prstate.nredirected = prstate.ndead = prstate.nunused = 0;
memset(prstate.marked, 0, sizeof(prstate.marked));
@@ -481,19 +427,6 @@ heap_page_prune(Relation relation, Buffer buffer,
/*
* Perform visibility checks for heap pruning.
*
* This is more complicated than just using GlobalVisTestIsRemovableXid()
* because of old_snapshot_threshold. We only want to increase the threshold
* that triggers errors for old snapshots when we actually decide to remove a
* row based on the limited horizon.
*
* Due to its cost we also only want to call
* TransactionIdLimitedForOldSnapshots() if necessary, i.e. we might not have
* done so in heap_page_prune_opt() if pd_prune_xid was old enough. But we
* still want to be able to remove rows that are too new to be removed
* according to prstate->vistest, but that can be removed based on
* old_snapshot_threshold. So we call TransactionIdLimitedForOldSnapshots() on
* demand in here, if appropriate.
*/
static HTSV_Result
heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer)
@@ -506,53 +439,8 @@ heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer)
if (res != HEAPTUPLE_RECENTLY_DEAD)
return res;
/*
* If we are already relying on the limited xmin, there is no need to
* delay doing so anymore.
*/
if (prstate->old_snap_used)
{
Assert(TransactionIdIsValid(prstate->old_snap_xmin));
if (TransactionIdPrecedes(dead_after, prstate->old_snap_xmin))
res = HEAPTUPLE_DEAD;
return res;
}
/*
* First check if GlobalVisTestIsRemovableXid() is sufficient to find the
* row dead. If not, and old_snapshot_threshold is enabled, try to use the
* lowered horizon.
*/
if (GlobalVisTestIsRemovableXid(prstate->vistest, dead_after))
res = HEAPTUPLE_DEAD;
else if (OldSnapshotThresholdActive())
{
/* haven't determined limited horizon yet, requests */
if (!TransactionIdIsValid(prstate->old_snap_xmin))
{
TransactionId horizon =
GlobalVisTestNonRemovableHorizon(prstate->vistest);
TransactionIdLimitedForOldSnapshots(horizon, prstate->rel,
&prstate->old_snap_xmin,
&prstate->old_snap_ts);
}
if (TransactionIdIsValid(prstate->old_snap_xmin) &&
TransactionIdPrecedes(dead_after, prstate->old_snap_xmin))
{
/*
* About to remove row based on snapshot_too_old. Need to raise
* the threshold so problematic accesses would error.
*/
Assert(!prstate->old_snap_used);
SetOldSnapshotThresholdTimestamp(prstate->old_snap_ts,
prstate->old_snap_xmin);
prstate->old_snap_used = true;
res = HEAPTUPLE_DEAD;
}
}
return res;
}

View File

@@ -1588,7 +1588,7 @@ retry:
* that were deleted from indexes.
*/
tuples_deleted = heap_page_prune(rel, buf, vacrel->vistest,
InvalidTransactionId, 0, &nnewlpdead,
&nnewlpdead,
&vacrel->offnum);
/*
@@ -2875,8 +2875,7 @@ should_attempt_truncation(LVRelState *vacrel)
{
BlockNumber possibly_freeable;
if (!vacrel->do_rel_truncate || VacuumFailsafeActive ||
old_snapshot_threshold >= 0)
if (!vacrel->do_rel_truncate || VacuumFailsafeActive)
return false;
possibly_freeable = vacrel->rel_pages - vacrel->nonempty_pages;

View File

@@ -278,7 +278,6 @@ _bt_moveright(Relation rel,
for (;;)
{
page = BufferGetPage(buf);
TestForOldSnapshot(snapshot, rel, page);
opaque = BTPageGetOpaque(page);
if (P_RIGHTMOST(opaque))
@@ -2029,7 +2028,6 @@ _bt_readnextpage(IndexScanDesc scan, BlockNumber blkno, ScanDirection dir)
/* step right one page */
so->currPos.buf = _bt_getbuf(rel, blkno, BT_READ);
page = BufferGetPage(so->currPos.buf);
TestForOldSnapshot(scan->xs_snapshot, rel, page);
opaque = BTPageGetOpaque(page);
/* check for deleted page */
if (!P_IGNORE(opaque))
@@ -2132,7 +2130,6 @@ _bt_readnextpage(IndexScanDesc scan, BlockNumber blkno, ScanDirection dir)
* and do it all again.
*/
page = BufferGetPage(so->currPos.buf);
TestForOldSnapshot(scan->xs_snapshot, rel, page);
opaque = BTPageGetOpaque(page);
if (!P_IGNORE(opaque))
{
@@ -2238,7 +2235,6 @@ _bt_walk_left(Relation rel, Buffer buf, Snapshot snapshot)
CHECK_FOR_INTERRUPTS();
buf = _bt_getbuf(rel, blkno, BT_READ);
page = BufferGetPage(buf);
TestForOldSnapshot(snapshot, rel, page);
opaque = BTPageGetOpaque(page);
/*
@@ -2265,14 +2261,12 @@ _bt_walk_left(Relation rel, Buffer buf, Snapshot snapshot)
blkno = opaque->btpo_next;
buf = _bt_relandgetbuf(rel, buf, blkno, BT_READ);
page = BufferGetPage(buf);
TestForOldSnapshot(snapshot, rel, page);
opaque = BTPageGetOpaque(page);
}
/* Return to the original page to see what's up */
buf = _bt_relandgetbuf(rel, buf, obknum, BT_READ);
page = BufferGetPage(buf);
TestForOldSnapshot(snapshot, rel, page);
opaque = BTPageGetOpaque(page);
if (P_ISDELETED(opaque))
{
@@ -2290,7 +2284,6 @@ _bt_walk_left(Relation rel, Buffer buf, Snapshot snapshot)
blkno = opaque->btpo_next;
buf = _bt_relandgetbuf(rel, buf, blkno, BT_READ);
page = BufferGetPage(buf);
TestForOldSnapshot(snapshot, rel, page);
opaque = BTPageGetOpaque(page);
if (!P_ISDELETED(opaque))
break;
@@ -2351,7 +2344,6 @@ _bt_get_endpoint(Relation rel, uint32 level, bool rightmost,
return InvalidBuffer;
page = BufferGetPage(buf);
TestForOldSnapshot(snapshot, rel, page);
opaque = BTPageGetOpaque(page);
for (;;)
@@ -2371,7 +2363,6 @@ _bt_get_endpoint(Relation rel, uint32 level, bool rightmost,
RelationGetRelationName(rel));
buf = _bt_relandgetbuf(rel, buf, blkno, BT_READ);
page = BufferGetPage(buf);
TestForOldSnapshot(snapshot, rel, page);
opaque = BTPageGetOpaque(page);
}

View File

@@ -862,7 +862,6 @@ redirect:
/* else new pointer points to the same page, no work needed */
page = BufferGetPage(buffer);
TestForOldSnapshot(snapshot, index, page);
isnull = SpGistPageStoresNulls(page) ? true : false;

View File

@@ -3048,12 +3048,11 @@ index_build(Relation heapRelation,
/*
* If we found any potentially broken HOT chains, mark the index as not
* being usable until the current transaction is below the event horizon.
* See src/backend/access/heap/README.HOT for discussion. Also set this
* if early pruning/vacuuming is enabled for the heap relation. While it
* might become safe to use the index earlier based on actual cleanup
* activity and other active transactions, the test for that would be much
* more complex and would require some form of blocking, so keep it simple
* and fast by just using the current transaction.
* See src/backend/access/heap/README.HOT for discussion. While it might
* become safe to use the index earlier based on actual cleanup activity
* and other active transactions, the test for that would be much more
* complex and would require some form of blocking, so keep it simple and
* fast by just using the current transaction.
*
* However, when reindexing an existing index, we should do nothing here.
* Any HOT chains that are broken with respect to the index must predate
@@ -3065,7 +3064,7 @@ index_build(Relation heapRelation,
*
* We also need not set indcheckxmin during a concurrent index build,
* because we won't set indisvalid true until all transactions that care
* about the broken HOT chains or early pruning/vacuuming are gone.
* about the broken HOT chains are gone.
*
* Therefore, this code path can only be taken during non-concurrent
* CREATE INDEX. Thus the fact that heap_update will set the pg_index
@@ -3074,7 +3073,7 @@ index_build(Relation heapRelation,
* about any concurrent readers of the tuple; no other transaction can see
* it yet.
*/
if ((indexInfo->ii_BrokenHotChain || EarlyPruningEnabled(heapRelation)) &&
if (indexInfo->ii_BrokenHotChain &&
!isreindex &&
!indexInfo->ii_Concurrent)
{
@@ -3759,11 +3758,6 @@ reindex_index(Oid indexId, bool skip_constraint_checks, char persistence,
* reindexing pg_index itself, we must not try to update tuples in it.
* pg_index's indexes should always have these flags in their clean state,
* so that won't happen.
*
* If early pruning/vacuuming is enabled for the heap relation, the
* usability horizon must be advanced to the current transaction on every
* build or rebuild. pg_index is OK in this regard because catalog tables
* are not subject to early cleanup.
*/
if (!skipped_constraint)
{
@@ -3771,7 +3765,6 @@ reindex_index(Oid indexId, bool skip_constraint_checks, char persistence,
HeapTuple indexTuple;
Form_pg_index indexForm;
bool index_bad;
bool early_pruning_enabled = EarlyPruningEnabled(heapRelation);
pg_index = table_open(IndexRelationId, RowExclusiveLock);
@@ -3785,12 +3778,11 @@ reindex_index(Oid indexId, bool skip_constraint_checks, char persistence,
!indexForm->indisready ||
!indexForm->indislive);
if (index_bad ||
(indexForm->indcheckxmin && !indexInfo->ii_BrokenHotChain) ||
early_pruning_enabled)
(indexForm->indcheckxmin && !indexInfo->ii_BrokenHotChain))
{
if (!indexInfo->ii_BrokenHotChain && !early_pruning_enabled)
if (!indexInfo->ii_BrokenHotChain)
indexForm->indcheckxmin = false;
else if (index_bad || early_pruning_enabled)
else if (index_bad)
indexForm->indcheckxmin = true;
indexForm->indisvalid = true;
indexForm->indisready = true;

View File

@@ -1110,25 +1110,6 @@ vacuum_get_cutoffs(Relation rel, const VacuumParams *params,
*/
cutoffs->OldestXmin = GetOldestNonRemovableTransactionId(rel);
if (OldSnapshotThresholdActive())
{
TransactionId limit_xmin;
TimestampTz limit_ts;
if (TransactionIdLimitedForOldSnapshots(cutoffs->OldestXmin, rel,
&limit_xmin, &limit_ts))
{
/*
* TODO: We should only set the threshold if we are pruning on the
* basis of the increased limits. Not as crucial here as it is
* for opportunistic pruning (which often happens at a much higher
* frequency), but would still be a significant improvement.
*/
SetOldSnapshotThresholdTimestamp(limit_ts, limit_xmin);
cutoffs->OldestXmin = limit_xmin;
}
}
Assert(TransactionIdIsNormal(cutoffs->OldestXmin));
/* Acquire OldestMxact */

View File

@@ -5575,20 +5575,3 @@ IssuePendingWritebacks(WritebackContext *wb_context, IOContext io_context)
wb_context->nr_pending = 0;
}
/*
* Implement slower/larger portions of TestForOldSnapshot
*
* Smaller/faster portions are put inline, but the entire set of logic is too
* big for that.
*/
void
TestForOldSnapshot_impl(Snapshot snapshot, Relation relation)
{
if (RelationAllowsEarlyPruning(relation)
&& (snapshot)->whenTaken < GetOldSnapshotThresholdTimestamp())
ereport(ERROR,
(errcode(ERRCODE_SNAPSHOT_TOO_OLD),
errmsg("snapshot too old")));
}

View File

@@ -138,7 +138,6 @@ CalculateShmemSize(int *num_semaphores)
size = add_size(size, WalRcvShmemSize());
size = add_size(size, PgArchShmemSize());
size = add_size(size, ApplyLauncherShmemSize());
size = add_size(size, SnapMgrShmemSize());
size = add_size(size, BTreeShmemSize());
size = add_size(size, SyncScanShmemSize());
size = add_size(size, AsyncShmemSize());
@@ -298,7 +297,6 @@ CreateSharedMemoryAndSemaphores(void)
/*
* Set up other modules that need some shared memory space
*/
SnapMgrInit();
BTreeShmemInit();
SyncScanShmemInit();
AsyncShmemInit();

View File

@@ -2066,34 +2066,6 @@ GetMaxSnapshotSubxidCount(void)
return TOTAL_MAX_CACHED_SUBXIDS;
}
/*
* Initialize old_snapshot_threshold specific parts of a newly build snapshot.
*/
static void
GetSnapshotDataInitOldSnapshot(Snapshot snapshot)
{
if (!OldSnapshotThresholdActive())
{
/*
* If not using "snapshot too old" feature, fill related fields with
* dummy values that don't require any locking.
*/
snapshot->lsn = InvalidXLogRecPtr;
snapshot->whenTaken = 0;
}
else
{
/*
* Capture the current time and WAL stream location in case this
* snapshot becomes old enough to need to fall back on the special
* "old snapshot" logic.
*/
snapshot->lsn = GetXLogInsertRecPtr();
snapshot->whenTaken = GetSnapshotCurrentTimestamp();
MaintainOldSnapshotTimeMapping(snapshot->whenTaken, snapshot->xmin);
}
}
/*
* Helper function for GetSnapshotData() that checks if the bulk of the
* visibility information in the snapshot is still valid. If so, it updates
@@ -2147,8 +2119,8 @@ GetSnapshotDataReuse(Snapshot snapshot)
snapshot->active_count = 0;
snapshot->regd_count = 0;
snapshot->copied = false;
GetSnapshotDataInitOldSnapshot(snapshot);
snapshot->lsn = InvalidXLogRecPtr;
snapshot->whenTaken = 0;
return true;
}
@@ -2529,8 +2501,8 @@ GetSnapshotData(Snapshot snapshot)
snapshot->active_count = 0;
snapshot->regd_count = 0;
snapshot->copied = false;
GetSnapshotDataInitOldSnapshot(snapshot);
snapshot->lsn = InvalidXLogRecPtr;
snapshot->whenTaken = 0;
return snapshot;
}

View File

@@ -47,7 +47,7 @@ CommitTsSLRULock 38
CommitTsLock 39
ReplicationOriginLock 40
MultiXactTruncationLock 41
OldSnapshotTimeMapLock 42
# 42 was OldSnapshotTimeMapLock
LogicalRepWorkerLock 43
XactTruncationLock 44
# 45 was XactTruncationLock until removal of BackendRandomLock

View File

@@ -312,7 +312,6 @@ WAIT_EVENT_DOCONLY CommitTsSLRU "Waiting to access the commit timestamp SLRU cac
WAIT_EVENT_DOCONLY CommitTs "Waiting to read or update the last value set for a transaction commit timestamp."
WAIT_EVENT_DOCONLY ReplicationOrigin "Waiting to create, drop or use a replication origin."
WAIT_EVENT_DOCONLY MultiXactTruncation "Waiting to read or truncate multixact information."
WAIT_EVENT_DOCONLY OldSnapshotTimeMap "Waiting to read or update old snapshot control information."
WAIT_EVENT_DOCONLY LogicalRepWorker "Waiting to read or update the state of logical replication workers."
WAIT_EVENT_DOCONLY XactTruncation "Waiting to execute <function>pg_xact_status</function> or update the oldest transaction ID available to it."
WAIT_EVENT_DOCONLY WrapLimitsVacuum "Waiting to update limits on transaction id and multixact consumption."

View File

@@ -439,10 +439,6 @@ Section: Class 58 - System Error (errors external to PostgreSQL itself)
58P01 E ERRCODE_UNDEFINED_FILE undefined_file
58P02 E ERRCODE_DUPLICATE_FILE duplicate_file
Section: Class 72 - Snapshot Failure
# (class borrowed from Oracle)
72000 E ERRCODE_SNAPSHOT_TOO_OLD snapshot_too_old
Section: Class F0 - Configuration File Error
# (PostgreSQL-specific error class)

View File

@@ -3282,17 +3282,6 @@ struct config_int ConfigureNamesInt[] =
check_autovacuum_work_mem, NULL, NULL
},
{
{"old_snapshot_threshold", PGC_POSTMASTER, RESOURCES_ASYNCHRONOUS,
gettext_noop("Time before a snapshot is too old to read pages changed after the snapshot was taken."),
gettext_noop("A value of -1 disables this feature."),
GUC_UNIT_MIN
},
&old_snapshot_threshold,
-1, -1, MINS_PER_HOUR * HOURS_PER_DAY * 60,
NULL, NULL, NULL
},
{
{"tcp_keepalives_idle", PGC_USERSET, CONN_AUTH_TCP,
gettext_noop("Time between issuing TCP keepalives."),

View File

@@ -197,8 +197,6 @@
#max_parallel_workers = 8 # maximum number of max_worker_processes that
# can be used in parallel operations
#parallel_leader_participation = on
#old_snapshot_threshold = -1 # 1min-60d; -1 disables; 0 is immediate
# (change requires restart)
#------------------------------------------------------------------------------

View File

@@ -65,7 +65,6 @@
#include "storage/spin.h"
#include "utils/builtins.h"
#include "utils/memutils.h"
#include "utils/old_snapshot.h"
#include "utils/rel.h"
#include "utils/resowner_private.h"
#include "utils/snapmgr.h"
@@ -73,14 +72,6 @@
#include "utils/timestamp.h"
/*
* GUC parameters
*/
int old_snapshot_threshold; /* number of minutes, -1 disables */
volatile OldSnapshotControlData *oldSnapshotControl;
/*
* CurrentSnapshot points to the only snapshot taken in transaction-snapshot
* mode, and to the latest one taken in a read-committed transaction.
@@ -170,7 +161,6 @@ typedef struct ExportedSnapshot
static List *exportedSnapshots = NIL;
/* Prototypes for local functions */
static TimestampTz AlignTimestampToMinuteBoundary(TimestampTz ts);
static Snapshot CopySnapshot(Snapshot snapshot);
static void FreeSnapshot(Snapshot snapshot);
static void SnapshotResetXmin(void);
@@ -194,50 +184,6 @@ typedef struct SerializedSnapshotData
XLogRecPtr lsn;
} SerializedSnapshotData;
Size
SnapMgrShmemSize(void)
{
Size size;
size = offsetof(OldSnapshotControlData, xid_by_minute);
if (old_snapshot_threshold > 0)
size = add_size(size, mul_size(sizeof(TransactionId),
OLD_SNAPSHOT_TIME_MAP_ENTRIES));
return size;
}
/*
* Initialize for managing old snapshot detection.
*/
void
SnapMgrInit(void)
{
bool found;
/*
* Create or attach to the OldSnapshotControlData structure.
*/
oldSnapshotControl = (volatile OldSnapshotControlData *)
ShmemInitStruct("OldSnapshotControlData",
SnapMgrShmemSize(), &found);
if (!found)
{
SpinLockInit(&oldSnapshotControl->mutex_current);
oldSnapshotControl->current_timestamp = 0;
SpinLockInit(&oldSnapshotControl->mutex_latest_xmin);
oldSnapshotControl->latest_xmin = InvalidTransactionId;
oldSnapshotControl->next_map_update = 0;
SpinLockInit(&oldSnapshotControl->mutex_threshold);
oldSnapshotControl->threshold_timestamp = 0;
oldSnapshotControl->threshold_xid = InvalidTransactionId;
oldSnapshotControl->head_offset = 0;
oldSnapshotControl->head_timestamp = 0;
oldSnapshotControl->count_used = 0;
}
}
/*
* GetTransactionSnapshot
* Get the appropriate snapshot for a new query in a transaction.
@@ -1656,420 +1602,6 @@ HaveRegisteredOrActiveSnapshot(void)
}
/*
* Return a timestamp that is exactly on a minute boundary.
*
* If the argument is already aligned, return that value, otherwise move to
* the next minute boundary following the given time.
*/
static TimestampTz
AlignTimestampToMinuteBoundary(TimestampTz ts)
{
TimestampTz retval = ts + (USECS_PER_MINUTE - 1);
return retval - (retval % USECS_PER_MINUTE);
}
/*
* Get current timestamp for snapshots
*
* This is basically GetCurrentTimestamp(), but with a guarantee that
* the result never moves backward.
*/
TimestampTz
GetSnapshotCurrentTimestamp(void)
{
TimestampTz now = GetCurrentTimestamp();
/*
* Don't let time move backward; if it hasn't advanced, use the old value.
*/
SpinLockAcquire(&oldSnapshotControl->mutex_current);
if (now <= oldSnapshotControl->current_timestamp)
now = oldSnapshotControl->current_timestamp;
else
oldSnapshotControl->current_timestamp = now;
SpinLockRelease(&oldSnapshotControl->mutex_current);
return now;
}
/*
* Get timestamp through which vacuum may have processed based on last stored
* value for threshold_timestamp.
*
* XXX: So far, we never trust that a 64-bit value can be read atomically; if
* that ever changes, we could get rid of the spinlock here.
*/
TimestampTz
GetOldSnapshotThresholdTimestamp(void)
{
TimestampTz threshold_timestamp;
SpinLockAcquire(&oldSnapshotControl->mutex_threshold);
threshold_timestamp = oldSnapshotControl->threshold_timestamp;
SpinLockRelease(&oldSnapshotControl->mutex_threshold);
return threshold_timestamp;
}
void
SetOldSnapshotThresholdTimestamp(TimestampTz ts, TransactionId xlimit)
{
SpinLockAcquire(&oldSnapshotControl->mutex_threshold);
Assert(oldSnapshotControl->threshold_timestamp <= ts);
Assert(TransactionIdPrecedesOrEquals(oldSnapshotControl->threshold_xid, xlimit));
oldSnapshotControl->threshold_timestamp = ts;
oldSnapshotControl->threshold_xid = xlimit;
SpinLockRelease(&oldSnapshotControl->mutex_threshold);
}
/*
* XXX: Magic to keep old_snapshot_threshold tests appear "working". They
* currently are broken, and discussion of what to do about them is
* ongoing. See
* https://www.postgresql.org/message-id/20200403001235.e6jfdll3gh2ygbuc%40alap3.anarazel.de
*/
void
SnapshotTooOldMagicForTest(void)
{
TimestampTz ts = GetSnapshotCurrentTimestamp();
Assert(old_snapshot_threshold == 0);
ts -= 5 * USECS_PER_SEC;
SpinLockAcquire(&oldSnapshotControl->mutex_threshold);
oldSnapshotControl->threshold_timestamp = ts;
SpinLockRelease(&oldSnapshotControl->mutex_threshold);
}
/*
* If there is a valid mapping for the timestamp, set *xlimitp to
* that. Returns whether there is such a mapping.
*/
static bool
GetOldSnapshotFromTimeMapping(TimestampTz ts, TransactionId *xlimitp)
{
bool in_mapping = false;
Assert(ts == AlignTimestampToMinuteBoundary(ts));
LWLockAcquire(OldSnapshotTimeMapLock, LW_SHARED);
if (oldSnapshotControl->count_used > 0
&& ts >= oldSnapshotControl->head_timestamp)
{
int offset;
offset = ((ts - oldSnapshotControl->head_timestamp)
/ USECS_PER_MINUTE);
if (offset > oldSnapshotControl->count_used - 1)
offset = oldSnapshotControl->count_used - 1;
offset = (oldSnapshotControl->head_offset + offset)
% OLD_SNAPSHOT_TIME_MAP_ENTRIES;
*xlimitp = oldSnapshotControl->xid_by_minute[offset];
in_mapping = true;
}
LWLockRelease(OldSnapshotTimeMapLock);
return in_mapping;
}
/*
* TransactionIdLimitedForOldSnapshots
*
* Apply old snapshot limit. This is intended to be called for page pruning
* and table vacuuming, to allow old_snapshot_threshold to override the normal
* global xmin value. Actual testing for snapshot too old will be based on
* whether a snapshot timestamp is prior to the threshold timestamp set in
* this function.
*
* If the limited horizon allows a cleanup action that otherwise would not be
* possible, SetOldSnapshotThresholdTimestamp(*limit_ts, *limit_xid) needs to
* be called before that cleanup action.
*/
bool
TransactionIdLimitedForOldSnapshots(TransactionId recentXmin,
Relation relation,
TransactionId *limit_xid,
TimestampTz *limit_ts)
{
TimestampTz ts;
TransactionId xlimit = recentXmin;
TransactionId latest_xmin;
TimestampTz next_map_update_ts;
TransactionId threshold_timestamp;
TransactionId threshold_xid;
Assert(TransactionIdIsNormal(recentXmin));
Assert(OldSnapshotThresholdActive());
Assert(limit_ts != NULL && limit_xid != NULL);
/*
* TestForOldSnapshot() assumes early pruning advances the page LSN, so we
* can't prune early when skipping WAL.
*/
if (!RelationAllowsEarlyPruning(relation) || !RelationNeedsWAL(relation))
return false;
ts = GetSnapshotCurrentTimestamp();
SpinLockAcquire(&oldSnapshotControl->mutex_latest_xmin);
latest_xmin = oldSnapshotControl->latest_xmin;
next_map_update_ts = oldSnapshotControl->next_map_update;
SpinLockRelease(&oldSnapshotControl->mutex_latest_xmin);
/*
* Zero threshold always overrides to latest xmin, if valid. Without some
* heuristic it will find its own snapshot too old on, for example, a
* simple UPDATE -- which would make it useless for most testing, but
* there is no principled way to ensure that it doesn't fail in this way.
* Use a five-second delay to try to get useful testing behavior, but this
* may need adjustment.
*/
if (old_snapshot_threshold == 0)
{
if (TransactionIdPrecedes(latest_xmin, MyProc->xmin)
&& TransactionIdFollows(latest_xmin, xlimit))
xlimit = latest_xmin;
ts -= 5 * USECS_PER_SEC;
}
else
{
ts = AlignTimestampToMinuteBoundary(ts)
- (old_snapshot_threshold * USECS_PER_MINUTE);
/* Check for fast exit without LW locking. */
SpinLockAcquire(&oldSnapshotControl->mutex_threshold);
threshold_timestamp = oldSnapshotControl->threshold_timestamp;
threshold_xid = oldSnapshotControl->threshold_xid;
SpinLockRelease(&oldSnapshotControl->mutex_threshold);
if (ts == threshold_timestamp)
{
/*
* Current timestamp is in same bucket as the last limit that was
* applied. Reuse.
*/
xlimit = threshold_xid;
}
else if (ts == next_map_update_ts)
{
/*
* FIXME: This branch is super iffy - but that should probably
* fixed separately.
*/
xlimit = latest_xmin;
}
else if (GetOldSnapshotFromTimeMapping(ts, &xlimit))
{
}
/*
* Failsafe protection against vacuuming work of active transaction.
*
* This is not an assertion because we avoid the spinlock for
* performance, leaving open the possibility that xlimit could advance
* and be more current; but it seems prudent to apply this limit. It
* might make pruning a tiny bit less aggressive than it could be, but
* protects against data loss bugs.
*/
if (TransactionIdIsNormal(latest_xmin)
&& TransactionIdPrecedes(latest_xmin, xlimit))
xlimit = latest_xmin;
}
if (TransactionIdIsValid(xlimit) &&
TransactionIdFollowsOrEquals(xlimit, recentXmin))
{
*limit_ts = ts;
*limit_xid = xlimit;
return true;
}
return false;
}
/*
* Take care of the circular buffer that maps time to xid.
*/
void
MaintainOldSnapshotTimeMapping(TimestampTz whenTaken, TransactionId xmin)
{
TimestampTz ts;
TransactionId latest_xmin;
TimestampTz update_ts;
bool map_update_required = false;
/* Never call this function when old snapshot checking is disabled. */
Assert(old_snapshot_threshold >= 0);
ts = AlignTimestampToMinuteBoundary(whenTaken);
/*
* Keep track of the latest xmin seen by any process. Update mapping with
* a new value when we have crossed a bucket boundary.
*/
SpinLockAcquire(&oldSnapshotControl->mutex_latest_xmin);
latest_xmin = oldSnapshotControl->latest_xmin;
update_ts = oldSnapshotControl->next_map_update;
if (ts > update_ts)
{
oldSnapshotControl->next_map_update = ts;
map_update_required = true;
}
if (TransactionIdFollows(xmin, latest_xmin))
oldSnapshotControl->latest_xmin = xmin;
SpinLockRelease(&oldSnapshotControl->mutex_latest_xmin);
/* We only needed to update the most recent xmin value. */
if (!map_update_required)
return;
/* No further tracking needed for 0 (used for testing). */
if (old_snapshot_threshold == 0)
return;
/*
* We don't want to do something stupid with unusual values, but we don't
* want to litter the log with warnings or break otherwise normal
* processing for this feature; so if something seems unreasonable, just
* log at DEBUG level and return without doing anything.
*/
if (whenTaken < 0)
{
elog(DEBUG1,
"MaintainOldSnapshotTimeMapping called with negative whenTaken = %ld",
(long) whenTaken);
return;
}
if (!TransactionIdIsNormal(xmin))
{
elog(DEBUG1,
"MaintainOldSnapshotTimeMapping called with xmin = %lu",
(unsigned long) xmin);
return;
}
LWLockAcquire(OldSnapshotTimeMapLock, LW_EXCLUSIVE);
Assert(oldSnapshotControl->head_offset >= 0);
Assert(oldSnapshotControl->head_offset < OLD_SNAPSHOT_TIME_MAP_ENTRIES);
Assert((oldSnapshotControl->head_timestamp % USECS_PER_MINUTE) == 0);
Assert(oldSnapshotControl->count_used >= 0);
Assert(oldSnapshotControl->count_used <= OLD_SNAPSHOT_TIME_MAP_ENTRIES);
if (oldSnapshotControl->count_used == 0)
{
/* set up first entry for empty mapping */
oldSnapshotControl->head_offset = 0;
oldSnapshotControl->head_timestamp = ts;
oldSnapshotControl->count_used = 1;
oldSnapshotControl->xid_by_minute[0] = xmin;
}
else if (ts < oldSnapshotControl->head_timestamp)
{
/* old ts; log it at DEBUG */
LWLockRelease(OldSnapshotTimeMapLock);
elog(DEBUG1,
"MaintainOldSnapshotTimeMapping called with old whenTaken = %ld",
(long) whenTaken);
return;
}
else if (ts <= (oldSnapshotControl->head_timestamp +
((oldSnapshotControl->count_used - 1)
* USECS_PER_MINUTE)))
{
/* existing mapping; advance xid if possible */
int bucket = (oldSnapshotControl->head_offset
+ ((ts - oldSnapshotControl->head_timestamp)
/ USECS_PER_MINUTE))
% OLD_SNAPSHOT_TIME_MAP_ENTRIES;
if (TransactionIdPrecedes(oldSnapshotControl->xid_by_minute[bucket], xmin))
oldSnapshotControl->xid_by_minute[bucket] = xmin;
}
else
{
/* We need a new bucket, but it might not be the very next one. */
int distance_to_new_tail;
int distance_to_current_tail;
int advance;
/*
* Our goal is for the new "tail" of the mapping, that is, the entry
* which is newest and thus furthest from the "head" entry, to
* correspond to "ts". Since there's one entry per minute, the
* distance between the current head and the new tail is just the
* number of minutes of difference between ts and the current
* head_timestamp.
*
* The distance from the current head to the current tail is one less
* than the number of entries in the mapping, because the entry at the
* head_offset is for 0 minutes after head_timestamp.
*
* The difference between these two values is the number of minutes by
* which we need to advance the mapping, either adding new entries or
* rotating old ones out.
*/
distance_to_new_tail =
(ts - oldSnapshotControl->head_timestamp) / USECS_PER_MINUTE;
distance_to_current_tail =
oldSnapshotControl->count_used - 1;
advance = distance_to_new_tail - distance_to_current_tail;
Assert(advance > 0);
if (advance >= OLD_SNAPSHOT_TIME_MAP_ENTRIES)
{
/* Advance is so far that all old data is junk; start over. */
oldSnapshotControl->head_offset = 0;
oldSnapshotControl->count_used = 1;
oldSnapshotControl->xid_by_minute[0] = xmin;
oldSnapshotControl->head_timestamp = ts;
}
else
{
/* Store the new value in one or more buckets. */
int i;
for (i = 0; i < advance; i++)
{
if (oldSnapshotControl->count_used == OLD_SNAPSHOT_TIME_MAP_ENTRIES)
{
/* Map full and new value replaces old head. */
int old_head = oldSnapshotControl->head_offset;
if (old_head == (OLD_SNAPSHOT_TIME_MAP_ENTRIES - 1))
oldSnapshotControl->head_offset = 0;
else
oldSnapshotControl->head_offset = old_head + 1;
oldSnapshotControl->xid_by_minute[old_head] = xmin;
oldSnapshotControl->head_timestamp += USECS_PER_MINUTE;
}
else
{
/* Extend map to unused entry. */
int new_tail = (oldSnapshotControl->head_offset
+ oldSnapshotControl->count_used)
% OLD_SNAPSHOT_TIME_MAP_ENTRIES;
oldSnapshotControl->count_used++;
oldSnapshotControl->xid_by_minute[new_tail] = xmin;
}
}
}
}
LWLockRelease(OldSnapshotTimeMapLock);
}
/*
* Setup a snapshot that replaces normal catalog snapshots that allows catalog
* access to behave just like it did at a certain point in the past.