mirror of
https://github.com/postgres/postgres.git
synced 2025-11-04 20:11:56 +03:00
Remove the "snapshot too old" feature.
Remove the old_snapshot_threshold setting and mechanism for producing
the error "snapshot too old", originally added by commit 848ef42b.
Unfortunately it had a number of known problems in terms of correctness
and performance, mostly reported by Andres in the course of his work on
snapshot scalability. We agreed to remove it, after a long period
without an active plan to fix it.
This is certainly a desirable feature, and someone might propose a new
or improved implementation in the future.
Reported-by: Andres Freund <andres@anarazel.de>
Discussion: https://postgr.es/m/CACG%3DezYV%2BEvO135fLRdVn-ZusfVsTY6cH1OZqWtezuEYH6ciQA%40mail.gmail.com
Discussion: https://postgr.es/m/20200401064008.qob7bfnnbu4w5cw4%40alap3.anarazel.de
Discussion: https://postgr.es/m/CA%2BTgmoY%3Daqf0zjTD%2B3dUWYkgMiNDegDLFjo%2B6ze%3DWtpik%2B3XqA%40mail.gmail.com
This commit is contained in:
@@ -65,7 +65,6 @@
|
||||
#include "storage/spin.h"
|
||||
#include "utils/builtins.h"
|
||||
#include "utils/memutils.h"
|
||||
#include "utils/old_snapshot.h"
|
||||
#include "utils/rel.h"
|
||||
#include "utils/resowner_private.h"
|
||||
#include "utils/snapmgr.h"
|
||||
@@ -73,14 +72,6 @@
|
||||
#include "utils/timestamp.h"
|
||||
|
||||
|
||||
/*
|
||||
* GUC parameters
|
||||
*/
|
||||
int old_snapshot_threshold; /* number of minutes, -1 disables */
|
||||
|
||||
volatile OldSnapshotControlData *oldSnapshotControl;
|
||||
|
||||
|
||||
/*
|
||||
* CurrentSnapshot points to the only snapshot taken in transaction-snapshot
|
||||
* mode, and to the latest one taken in a read-committed transaction.
|
||||
@@ -170,7 +161,6 @@ typedef struct ExportedSnapshot
|
||||
static List *exportedSnapshots = NIL;
|
||||
|
||||
/* Prototypes for local functions */
|
||||
static TimestampTz AlignTimestampToMinuteBoundary(TimestampTz ts);
|
||||
static Snapshot CopySnapshot(Snapshot snapshot);
|
||||
static void FreeSnapshot(Snapshot snapshot);
|
||||
static void SnapshotResetXmin(void);
|
||||
@@ -194,50 +184,6 @@ typedef struct SerializedSnapshotData
|
||||
XLogRecPtr lsn;
|
||||
} SerializedSnapshotData;
|
||||
|
||||
Size
|
||||
SnapMgrShmemSize(void)
|
||||
{
|
||||
Size size;
|
||||
|
||||
size = offsetof(OldSnapshotControlData, xid_by_minute);
|
||||
if (old_snapshot_threshold > 0)
|
||||
size = add_size(size, mul_size(sizeof(TransactionId),
|
||||
OLD_SNAPSHOT_TIME_MAP_ENTRIES));
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize for managing old snapshot detection.
|
||||
*/
|
||||
void
|
||||
SnapMgrInit(void)
|
||||
{
|
||||
bool found;
|
||||
|
||||
/*
|
||||
* Create or attach to the OldSnapshotControlData structure.
|
||||
*/
|
||||
oldSnapshotControl = (volatile OldSnapshotControlData *)
|
||||
ShmemInitStruct("OldSnapshotControlData",
|
||||
SnapMgrShmemSize(), &found);
|
||||
|
||||
if (!found)
|
||||
{
|
||||
SpinLockInit(&oldSnapshotControl->mutex_current);
|
||||
oldSnapshotControl->current_timestamp = 0;
|
||||
SpinLockInit(&oldSnapshotControl->mutex_latest_xmin);
|
||||
oldSnapshotControl->latest_xmin = InvalidTransactionId;
|
||||
oldSnapshotControl->next_map_update = 0;
|
||||
SpinLockInit(&oldSnapshotControl->mutex_threshold);
|
||||
oldSnapshotControl->threshold_timestamp = 0;
|
||||
oldSnapshotControl->threshold_xid = InvalidTransactionId;
|
||||
oldSnapshotControl->head_offset = 0;
|
||||
oldSnapshotControl->head_timestamp = 0;
|
||||
oldSnapshotControl->count_used = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* GetTransactionSnapshot
|
||||
* Get the appropriate snapshot for a new query in a transaction.
|
||||
@@ -1656,420 +1602,6 @@ HaveRegisteredOrActiveSnapshot(void)
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Return a timestamp that is exactly on a minute boundary.
|
||||
*
|
||||
* If the argument is already aligned, return that value, otherwise move to
|
||||
* the next minute boundary following the given time.
|
||||
*/
|
||||
static TimestampTz
|
||||
AlignTimestampToMinuteBoundary(TimestampTz ts)
|
||||
{
|
||||
TimestampTz retval = ts + (USECS_PER_MINUTE - 1);
|
||||
|
||||
return retval - (retval % USECS_PER_MINUTE);
|
||||
}
|
||||
|
||||
/*
|
||||
* Get current timestamp for snapshots
|
||||
*
|
||||
* This is basically GetCurrentTimestamp(), but with a guarantee that
|
||||
* the result never moves backward.
|
||||
*/
|
||||
TimestampTz
|
||||
GetSnapshotCurrentTimestamp(void)
|
||||
{
|
||||
TimestampTz now = GetCurrentTimestamp();
|
||||
|
||||
/*
|
||||
* Don't let time move backward; if it hasn't advanced, use the old value.
|
||||
*/
|
||||
SpinLockAcquire(&oldSnapshotControl->mutex_current);
|
||||
if (now <= oldSnapshotControl->current_timestamp)
|
||||
now = oldSnapshotControl->current_timestamp;
|
||||
else
|
||||
oldSnapshotControl->current_timestamp = now;
|
||||
SpinLockRelease(&oldSnapshotControl->mutex_current);
|
||||
|
||||
return now;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get timestamp through which vacuum may have processed based on last stored
|
||||
* value for threshold_timestamp.
|
||||
*
|
||||
* XXX: So far, we never trust that a 64-bit value can be read atomically; if
|
||||
* that ever changes, we could get rid of the spinlock here.
|
||||
*/
|
||||
TimestampTz
|
||||
GetOldSnapshotThresholdTimestamp(void)
|
||||
{
|
||||
TimestampTz threshold_timestamp;
|
||||
|
||||
SpinLockAcquire(&oldSnapshotControl->mutex_threshold);
|
||||
threshold_timestamp = oldSnapshotControl->threshold_timestamp;
|
||||
SpinLockRelease(&oldSnapshotControl->mutex_threshold);
|
||||
|
||||
return threshold_timestamp;
|
||||
}
|
||||
|
||||
void
|
||||
SetOldSnapshotThresholdTimestamp(TimestampTz ts, TransactionId xlimit)
|
||||
{
|
||||
SpinLockAcquire(&oldSnapshotControl->mutex_threshold);
|
||||
Assert(oldSnapshotControl->threshold_timestamp <= ts);
|
||||
Assert(TransactionIdPrecedesOrEquals(oldSnapshotControl->threshold_xid, xlimit));
|
||||
oldSnapshotControl->threshold_timestamp = ts;
|
||||
oldSnapshotControl->threshold_xid = xlimit;
|
||||
SpinLockRelease(&oldSnapshotControl->mutex_threshold);
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX: Magic to keep old_snapshot_threshold tests appear "working". They
|
||||
* currently are broken, and discussion of what to do about them is
|
||||
* ongoing. See
|
||||
* https://www.postgresql.org/message-id/20200403001235.e6jfdll3gh2ygbuc%40alap3.anarazel.de
|
||||
*/
|
||||
void
|
||||
SnapshotTooOldMagicForTest(void)
|
||||
{
|
||||
TimestampTz ts = GetSnapshotCurrentTimestamp();
|
||||
|
||||
Assert(old_snapshot_threshold == 0);
|
||||
|
||||
ts -= 5 * USECS_PER_SEC;
|
||||
|
||||
SpinLockAcquire(&oldSnapshotControl->mutex_threshold);
|
||||
oldSnapshotControl->threshold_timestamp = ts;
|
||||
SpinLockRelease(&oldSnapshotControl->mutex_threshold);
|
||||
}
|
||||
|
||||
/*
|
||||
* If there is a valid mapping for the timestamp, set *xlimitp to
|
||||
* that. Returns whether there is such a mapping.
|
||||
*/
|
||||
static bool
|
||||
GetOldSnapshotFromTimeMapping(TimestampTz ts, TransactionId *xlimitp)
|
||||
{
|
||||
bool in_mapping = false;
|
||||
|
||||
Assert(ts == AlignTimestampToMinuteBoundary(ts));
|
||||
|
||||
LWLockAcquire(OldSnapshotTimeMapLock, LW_SHARED);
|
||||
|
||||
if (oldSnapshotControl->count_used > 0
|
||||
&& ts >= oldSnapshotControl->head_timestamp)
|
||||
{
|
||||
int offset;
|
||||
|
||||
offset = ((ts - oldSnapshotControl->head_timestamp)
|
||||
/ USECS_PER_MINUTE);
|
||||
if (offset > oldSnapshotControl->count_used - 1)
|
||||
offset = oldSnapshotControl->count_used - 1;
|
||||
offset = (oldSnapshotControl->head_offset + offset)
|
||||
% OLD_SNAPSHOT_TIME_MAP_ENTRIES;
|
||||
|
||||
*xlimitp = oldSnapshotControl->xid_by_minute[offset];
|
||||
|
||||
in_mapping = true;
|
||||
}
|
||||
|
||||
LWLockRelease(OldSnapshotTimeMapLock);
|
||||
|
||||
return in_mapping;
|
||||
}
|
||||
|
||||
/*
|
||||
* TransactionIdLimitedForOldSnapshots
|
||||
*
|
||||
* Apply old snapshot limit. This is intended to be called for page pruning
|
||||
* and table vacuuming, to allow old_snapshot_threshold to override the normal
|
||||
* global xmin value. Actual testing for snapshot too old will be based on
|
||||
* whether a snapshot timestamp is prior to the threshold timestamp set in
|
||||
* this function.
|
||||
*
|
||||
* If the limited horizon allows a cleanup action that otherwise would not be
|
||||
* possible, SetOldSnapshotThresholdTimestamp(*limit_ts, *limit_xid) needs to
|
||||
* be called before that cleanup action.
|
||||
*/
|
||||
bool
|
||||
TransactionIdLimitedForOldSnapshots(TransactionId recentXmin,
|
||||
Relation relation,
|
||||
TransactionId *limit_xid,
|
||||
TimestampTz *limit_ts)
|
||||
{
|
||||
TimestampTz ts;
|
||||
TransactionId xlimit = recentXmin;
|
||||
TransactionId latest_xmin;
|
||||
TimestampTz next_map_update_ts;
|
||||
TransactionId threshold_timestamp;
|
||||
TransactionId threshold_xid;
|
||||
|
||||
Assert(TransactionIdIsNormal(recentXmin));
|
||||
Assert(OldSnapshotThresholdActive());
|
||||
Assert(limit_ts != NULL && limit_xid != NULL);
|
||||
|
||||
/*
|
||||
* TestForOldSnapshot() assumes early pruning advances the page LSN, so we
|
||||
* can't prune early when skipping WAL.
|
||||
*/
|
||||
if (!RelationAllowsEarlyPruning(relation) || !RelationNeedsWAL(relation))
|
||||
return false;
|
||||
|
||||
ts = GetSnapshotCurrentTimestamp();
|
||||
|
||||
SpinLockAcquire(&oldSnapshotControl->mutex_latest_xmin);
|
||||
latest_xmin = oldSnapshotControl->latest_xmin;
|
||||
next_map_update_ts = oldSnapshotControl->next_map_update;
|
||||
SpinLockRelease(&oldSnapshotControl->mutex_latest_xmin);
|
||||
|
||||
/*
|
||||
* Zero threshold always overrides to latest xmin, if valid. Without some
|
||||
* heuristic it will find its own snapshot too old on, for example, a
|
||||
* simple UPDATE -- which would make it useless for most testing, but
|
||||
* there is no principled way to ensure that it doesn't fail in this way.
|
||||
* Use a five-second delay to try to get useful testing behavior, but this
|
||||
* may need adjustment.
|
||||
*/
|
||||
if (old_snapshot_threshold == 0)
|
||||
{
|
||||
if (TransactionIdPrecedes(latest_xmin, MyProc->xmin)
|
||||
&& TransactionIdFollows(latest_xmin, xlimit))
|
||||
xlimit = latest_xmin;
|
||||
|
||||
ts -= 5 * USECS_PER_SEC;
|
||||
}
|
||||
else
|
||||
{
|
||||
ts = AlignTimestampToMinuteBoundary(ts)
|
||||
- (old_snapshot_threshold * USECS_PER_MINUTE);
|
||||
|
||||
/* Check for fast exit without LW locking. */
|
||||
SpinLockAcquire(&oldSnapshotControl->mutex_threshold);
|
||||
threshold_timestamp = oldSnapshotControl->threshold_timestamp;
|
||||
threshold_xid = oldSnapshotControl->threshold_xid;
|
||||
SpinLockRelease(&oldSnapshotControl->mutex_threshold);
|
||||
|
||||
if (ts == threshold_timestamp)
|
||||
{
|
||||
/*
|
||||
* Current timestamp is in same bucket as the last limit that was
|
||||
* applied. Reuse.
|
||||
*/
|
||||
xlimit = threshold_xid;
|
||||
}
|
||||
else if (ts == next_map_update_ts)
|
||||
{
|
||||
/*
|
||||
* FIXME: This branch is super iffy - but that should probably
|
||||
* fixed separately.
|
||||
*/
|
||||
xlimit = latest_xmin;
|
||||
}
|
||||
else if (GetOldSnapshotFromTimeMapping(ts, &xlimit))
|
||||
{
|
||||
}
|
||||
|
||||
/*
|
||||
* Failsafe protection against vacuuming work of active transaction.
|
||||
*
|
||||
* This is not an assertion because we avoid the spinlock for
|
||||
* performance, leaving open the possibility that xlimit could advance
|
||||
* and be more current; but it seems prudent to apply this limit. It
|
||||
* might make pruning a tiny bit less aggressive than it could be, but
|
||||
* protects against data loss bugs.
|
||||
*/
|
||||
if (TransactionIdIsNormal(latest_xmin)
|
||||
&& TransactionIdPrecedes(latest_xmin, xlimit))
|
||||
xlimit = latest_xmin;
|
||||
}
|
||||
|
||||
if (TransactionIdIsValid(xlimit) &&
|
||||
TransactionIdFollowsOrEquals(xlimit, recentXmin))
|
||||
{
|
||||
*limit_ts = ts;
|
||||
*limit_xid = xlimit;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Take care of the circular buffer that maps time to xid.
|
||||
*/
|
||||
void
|
||||
MaintainOldSnapshotTimeMapping(TimestampTz whenTaken, TransactionId xmin)
|
||||
{
|
||||
TimestampTz ts;
|
||||
TransactionId latest_xmin;
|
||||
TimestampTz update_ts;
|
||||
bool map_update_required = false;
|
||||
|
||||
/* Never call this function when old snapshot checking is disabled. */
|
||||
Assert(old_snapshot_threshold >= 0);
|
||||
|
||||
ts = AlignTimestampToMinuteBoundary(whenTaken);
|
||||
|
||||
/*
|
||||
* Keep track of the latest xmin seen by any process. Update mapping with
|
||||
* a new value when we have crossed a bucket boundary.
|
||||
*/
|
||||
SpinLockAcquire(&oldSnapshotControl->mutex_latest_xmin);
|
||||
latest_xmin = oldSnapshotControl->latest_xmin;
|
||||
update_ts = oldSnapshotControl->next_map_update;
|
||||
if (ts > update_ts)
|
||||
{
|
||||
oldSnapshotControl->next_map_update = ts;
|
||||
map_update_required = true;
|
||||
}
|
||||
if (TransactionIdFollows(xmin, latest_xmin))
|
||||
oldSnapshotControl->latest_xmin = xmin;
|
||||
SpinLockRelease(&oldSnapshotControl->mutex_latest_xmin);
|
||||
|
||||
/* We only needed to update the most recent xmin value. */
|
||||
if (!map_update_required)
|
||||
return;
|
||||
|
||||
/* No further tracking needed for 0 (used for testing). */
|
||||
if (old_snapshot_threshold == 0)
|
||||
return;
|
||||
|
||||
/*
|
||||
* We don't want to do something stupid with unusual values, but we don't
|
||||
* want to litter the log with warnings or break otherwise normal
|
||||
* processing for this feature; so if something seems unreasonable, just
|
||||
* log at DEBUG level and return without doing anything.
|
||||
*/
|
||||
if (whenTaken < 0)
|
||||
{
|
||||
elog(DEBUG1,
|
||||
"MaintainOldSnapshotTimeMapping called with negative whenTaken = %ld",
|
||||
(long) whenTaken);
|
||||
return;
|
||||
}
|
||||
if (!TransactionIdIsNormal(xmin))
|
||||
{
|
||||
elog(DEBUG1,
|
||||
"MaintainOldSnapshotTimeMapping called with xmin = %lu",
|
||||
(unsigned long) xmin);
|
||||
return;
|
||||
}
|
||||
|
||||
LWLockAcquire(OldSnapshotTimeMapLock, LW_EXCLUSIVE);
|
||||
|
||||
Assert(oldSnapshotControl->head_offset >= 0);
|
||||
Assert(oldSnapshotControl->head_offset < OLD_SNAPSHOT_TIME_MAP_ENTRIES);
|
||||
Assert((oldSnapshotControl->head_timestamp % USECS_PER_MINUTE) == 0);
|
||||
Assert(oldSnapshotControl->count_used >= 0);
|
||||
Assert(oldSnapshotControl->count_used <= OLD_SNAPSHOT_TIME_MAP_ENTRIES);
|
||||
|
||||
if (oldSnapshotControl->count_used == 0)
|
||||
{
|
||||
/* set up first entry for empty mapping */
|
||||
oldSnapshotControl->head_offset = 0;
|
||||
oldSnapshotControl->head_timestamp = ts;
|
||||
oldSnapshotControl->count_used = 1;
|
||||
oldSnapshotControl->xid_by_minute[0] = xmin;
|
||||
}
|
||||
else if (ts < oldSnapshotControl->head_timestamp)
|
||||
{
|
||||
/* old ts; log it at DEBUG */
|
||||
LWLockRelease(OldSnapshotTimeMapLock);
|
||||
elog(DEBUG1,
|
||||
"MaintainOldSnapshotTimeMapping called with old whenTaken = %ld",
|
||||
(long) whenTaken);
|
||||
return;
|
||||
}
|
||||
else if (ts <= (oldSnapshotControl->head_timestamp +
|
||||
((oldSnapshotControl->count_used - 1)
|
||||
* USECS_PER_MINUTE)))
|
||||
{
|
||||
/* existing mapping; advance xid if possible */
|
||||
int bucket = (oldSnapshotControl->head_offset
|
||||
+ ((ts - oldSnapshotControl->head_timestamp)
|
||||
/ USECS_PER_MINUTE))
|
||||
% OLD_SNAPSHOT_TIME_MAP_ENTRIES;
|
||||
|
||||
if (TransactionIdPrecedes(oldSnapshotControl->xid_by_minute[bucket], xmin))
|
||||
oldSnapshotControl->xid_by_minute[bucket] = xmin;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* We need a new bucket, but it might not be the very next one. */
|
||||
int distance_to_new_tail;
|
||||
int distance_to_current_tail;
|
||||
int advance;
|
||||
|
||||
/*
|
||||
* Our goal is for the new "tail" of the mapping, that is, the entry
|
||||
* which is newest and thus furthest from the "head" entry, to
|
||||
* correspond to "ts". Since there's one entry per minute, the
|
||||
* distance between the current head and the new tail is just the
|
||||
* number of minutes of difference between ts and the current
|
||||
* head_timestamp.
|
||||
*
|
||||
* The distance from the current head to the current tail is one less
|
||||
* than the number of entries in the mapping, because the entry at the
|
||||
* head_offset is for 0 minutes after head_timestamp.
|
||||
*
|
||||
* The difference between these two values is the number of minutes by
|
||||
* which we need to advance the mapping, either adding new entries or
|
||||
* rotating old ones out.
|
||||
*/
|
||||
distance_to_new_tail =
|
||||
(ts - oldSnapshotControl->head_timestamp) / USECS_PER_MINUTE;
|
||||
distance_to_current_tail =
|
||||
oldSnapshotControl->count_used - 1;
|
||||
advance = distance_to_new_tail - distance_to_current_tail;
|
||||
Assert(advance > 0);
|
||||
|
||||
if (advance >= OLD_SNAPSHOT_TIME_MAP_ENTRIES)
|
||||
{
|
||||
/* Advance is so far that all old data is junk; start over. */
|
||||
oldSnapshotControl->head_offset = 0;
|
||||
oldSnapshotControl->count_used = 1;
|
||||
oldSnapshotControl->xid_by_minute[0] = xmin;
|
||||
oldSnapshotControl->head_timestamp = ts;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Store the new value in one or more buckets. */
|
||||
int i;
|
||||
|
||||
for (i = 0; i < advance; i++)
|
||||
{
|
||||
if (oldSnapshotControl->count_used == OLD_SNAPSHOT_TIME_MAP_ENTRIES)
|
||||
{
|
||||
/* Map full and new value replaces old head. */
|
||||
int old_head = oldSnapshotControl->head_offset;
|
||||
|
||||
if (old_head == (OLD_SNAPSHOT_TIME_MAP_ENTRIES - 1))
|
||||
oldSnapshotControl->head_offset = 0;
|
||||
else
|
||||
oldSnapshotControl->head_offset = old_head + 1;
|
||||
oldSnapshotControl->xid_by_minute[old_head] = xmin;
|
||||
oldSnapshotControl->head_timestamp += USECS_PER_MINUTE;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Extend map to unused entry. */
|
||||
int new_tail = (oldSnapshotControl->head_offset
|
||||
+ oldSnapshotControl->count_used)
|
||||
% OLD_SNAPSHOT_TIME_MAP_ENTRIES;
|
||||
|
||||
oldSnapshotControl->count_used++;
|
||||
oldSnapshotControl->xid_by_minute[new_tail] = xmin;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
LWLockRelease(OldSnapshotTimeMapLock);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Setup a snapshot that replaces normal catalog snapshots that allows catalog
|
||||
* access to behave just like it did at a certain point in the past.
|
||||
|
||||
Reference in New Issue
Block a user