mirror of
https://github.com/postgres/postgres.git
synced 2025-07-27 12:41:57 +03:00
Improve documentation about reasoning behind the order of operations
in GetSnapshotData, GetNewTransactionId, CommitTransaction, AbortTransaction, etc. Correct race condition in transaction status testing in HeapTupleSatisfiesVacuum --- this wasn't important for old VACUUM with exclusive lock on its table, but it sure is important now. All per pghackers discussion 7/11/01 and 7/12/01.
This commit is contained in:
@ -6,7 +6,7 @@
|
|||||||
* Copyright (c) 2000, PostgreSQL Global Development Group
|
* Copyright (c) 2000, PostgreSQL Global Development Group
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $Header: /cvsroot/pgsql/src/backend/access/transam/varsup.c,v 1.41 2001/07/12 04:11:13 tgl Exp $
|
* $Header: /cvsroot/pgsql/src/backend/access/transam/varsup.c,v 1.42 2001/07/16 22:43:33 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -60,8 +60,16 @@ GetNewTransactionId(TransactionId *xid)
|
|||||||
* XXX by storing xid into MyProc without acquiring SInvalLock, we are
|
* XXX by storing xid into MyProc without acquiring SInvalLock, we are
|
||||||
* relying on fetch/store of an xid to be atomic, else other backends
|
* relying on fetch/store of an xid to be atomic, else other backends
|
||||||
* might see a partially-set xid here. But holding both locks at once
|
* might see a partially-set xid here. But holding both locks at once
|
||||||
* would be a nasty concurrency hit (and at this writing, could cause a
|
* would be a nasty concurrency hit (and in fact could cause a deadlock
|
||||||
* deadlock against GetSnapshotData). So for now, assume atomicity.
|
* against GetSnapshotData). So for now, assume atomicity. Note that
|
||||||
|
* readers of PROC xid field should be careful to fetch the value only
|
||||||
|
* once, rather than assume they can read it multiple times and get the
|
||||||
|
* same answer each time.
|
||||||
|
*
|
||||||
|
* A solution to the atomic-store problem would be to give each PROC
|
||||||
|
* its own spinlock used only for fetching/storing that PROC's xid.
|
||||||
|
* (SInvalLock would then mean primarily that PROCs couldn't be added/
|
||||||
|
* removed while holding the lock.)
|
||||||
*/
|
*/
|
||||||
if (MyProc != (PROC *) NULL)
|
if (MyProc != (PROC *) NULL)
|
||||||
MyProc->xid = *xid;
|
MyProc->xid = *xid;
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.107 2001/07/15 22:48:16 tgl Exp $
|
* $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.108 2001/07/16 22:43:33 tgl Exp $
|
||||||
*
|
*
|
||||||
* NOTES
|
* NOTES
|
||||||
* Transaction aborts can now occur two ways:
|
* Transaction aborts can now occur two ways:
|
||||||
@ -1018,16 +1018,21 @@ CommitTransaction(void)
|
|||||||
|
|
||||||
CloseSequences();
|
CloseSequences();
|
||||||
AtEOXact_portals();
|
AtEOXact_portals();
|
||||||
|
|
||||||
|
/* Here is where we really truly commit. */
|
||||||
RecordTransactionCommit();
|
RecordTransactionCommit();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Let others know about no transaction in progress by me. Note that
|
* Let others know about no transaction in progress by me. Note that
|
||||||
* this must be done _before_ releasing locks we hold and
|
* this must be done _before_ releasing locks we hold and _after_
|
||||||
|
* RecordTransactionCommit.
|
||||||
|
*
|
||||||
* SpinAcquire(SInvalLock) is required: UPDATE with xid 0 is blocked
|
* SpinAcquire(SInvalLock) is required: UPDATE with xid 0 is blocked
|
||||||
* by xid 1' UPDATE, xid 1 is doing commit while xid 2 gets snapshot -
|
* by xid 1' UPDATE, xid 1 is doing commit while xid 2 gets snapshot -
|
||||||
* if xid 2' GetSnapshotData sees xid 1 as running then it must see
|
* if xid 2' GetSnapshotData sees xid 1 as running then it must see
|
||||||
* xid 0 as running as well or it will see two tuple versions - one
|
* xid 0 as running as well or it will see two tuple versions - one
|
||||||
* deleted by xid 1 and one inserted by xid 0.
|
* deleted by xid 1 and one inserted by xid 0. See notes in
|
||||||
|
* GetSnapshotData.
|
||||||
*/
|
*/
|
||||||
if (MyProc != (PROC *) NULL)
|
if (MyProc != (PROC *) NULL)
|
||||||
{
|
{
|
||||||
@ -1038,6 +1043,12 @@ CommitTransaction(void)
|
|||||||
SpinRelease(SInvalLock);
|
SpinRelease(SInvalLock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This is all post-commit cleanup. Note that if an error is raised
|
||||||
|
* here, it's too late to abort the transaction. This should be just
|
||||||
|
* noncritical resource releasing.
|
||||||
|
*/
|
||||||
|
|
||||||
RelationPurgeLocalRelation(true);
|
RelationPurgeLocalRelation(true);
|
||||||
AtEOXact_temp_relations(true);
|
AtEOXact_temp_relations(true);
|
||||||
smgrDoPendingDeletes(true);
|
smgrDoPendingDeletes(true);
|
||||||
@ -1080,23 +1091,6 @@ AbortTransaction(void)
|
|||||||
/* Prevent cancel/die interrupt while cleaning up */
|
/* Prevent cancel/die interrupt while cleaning up */
|
||||||
HOLD_INTERRUPTS();
|
HOLD_INTERRUPTS();
|
||||||
|
|
||||||
/*
|
|
||||||
* Let others to know about no transaction in progress - vadim
|
|
||||||
* 11/26/96
|
|
||||||
*
|
|
||||||
* XXX it'd be nice to acquire SInvalLock for this, but too much risk of
|
|
||||||
* lockup: what if we were holding SInvalLock when we elog'd? Net effect
|
|
||||||
* is that we are relying on fetch/store of an xid to be atomic, else
|
|
||||||
* other backends might see a partially-zeroed xid here. Would it be
|
|
||||||
* safe to release spins before we reset xid/xmin? But see also
|
|
||||||
* GetNewTransactionId, which does the same thing.
|
|
||||||
*/
|
|
||||||
if (MyProc != (PROC *) NULL)
|
|
||||||
{
|
|
||||||
MyProc->xid = InvalidTransactionId;
|
|
||||||
MyProc->xmin = InvalidTransactionId;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Release any spinlocks or buffer context locks we might be holding
|
* Release any spinlocks or buffer context locks we might be holding
|
||||||
* as quickly as possible. (Real locks, however, must be held till we
|
* as quickly as possible. (Real locks, however, must be held till we
|
||||||
@ -1143,10 +1137,23 @@ AbortTransaction(void)
|
|||||||
AtAbort_Notify();
|
AtAbort_Notify();
|
||||||
CloseSequences();
|
CloseSequences();
|
||||||
AtEOXact_portals();
|
AtEOXact_portals();
|
||||||
|
|
||||||
|
/* Advertise the fact that we aborted in pg_log. */
|
||||||
RecordTransactionAbort();
|
RecordTransactionAbort();
|
||||||
|
|
||||||
/* Count transaction abort in statistics collector */
|
/*
|
||||||
pgstat_count_xact_rollback();
|
* Let others know about no transaction in progress by me. Note that
|
||||||
|
* this must be done _before_ releasing locks we hold and _after_
|
||||||
|
* RecordTransactionAbort.
|
||||||
|
*/
|
||||||
|
if (MyProc != (PROC *) NULL)
|
||||||
|
{
|
||||||
|
/* Lock SInvalLock because that's what GetSnapshotData uses. */
|
||||||
|
SpinAcquire(SInvalLock);
|
||||||
|
MyProc->xid = InvalidTransactionId;
|
||||||
|
MyProc->xmin = InvalidTransactionId;
|
||||||
|
SpinRelease(SInvalLock);
|
||||||
|
}
|
||||||
|
|
||||||
RelationPurgeLocalRelation(false);
|
RelationPurgeLocalRelation(false);
|
||||||
AtEOXact_temp_relations(false);
|
AtEOXact_temp_relations(false);
|
||||||
@ -1165,6 +1172,9 @@ AbortTransaction(void)
|
|||||||
|
|
||||||
SharedBufferChanged = false;/* safest place to do it */
|
SharedBufferChanged = false;/* safest place to do it */
|
||||||
|
|
||||||
|
/* Count transaction abort in statistics collector */
|
||||||
|
pgstat_count_xact_rollback();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* State remains TRANS_ABORT until CleanupTransaction().
|
* State remains TRANS_ABORT until CleanupTransaction().
|
||||||
*/
|
*/
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $Header: /cvsroot/pgsql/src/backend/storage/ipc/sinval.c,v 1.36 2001/07/12 04:11:13 tgl Exp $
|
* $Header: /cvsroot/pgsql/src/backend/storage/ipc/sinval.c,v 1.37 2001/07/16 22:43:34 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -193,8 +193,10 @@ TransactionIdIsInProgress(TransactionId xid)
|
|||||||
if (pOffset != INVALID_OFFSET)
|
if (pOffset != INVALID_OFFSET)
|
||||||
{
|
{
|
||||||
PROC *proc = (PROC *) MAKE_PTR(pOffset);
|
PROC *proc = (PROC *) MAKE_PTR(pOffset);
|
||||||
|
/* Fetch xid just once - see GetNewTransactionId */
|
||||||
|
TransactionId pxid = proc->xid;
|
||||||
|
|
||||||
if (TransactionIdEquals(proc->xid, xid))
|
if (TransactionIdEquals(pxid, xid))
|
||||||
{
|
{
|
||||||
result = true;
|
result = true;
|
||||||
break;
|
break;
|
||||||
@ -236,9 +238,9 @@ GetXmaxRecent(TransactionId *XmaxRecent)
|
|||||||
if (pOffset != INVALID_OFFSET)
|
if (pOffset != INVALID_OFFSET)
|
||||||
{
|
{
|
||||||
PROC *proc = (PROC *) MAKE_PTR(pOffset);
|
PROC *proc = (PROC *) MAKE_PTR(pOffset);
|
||||||
TransactionId xid;
|
/* Fetch xid just once - see GetNewTransactionId */
|
||||||
|
TransactionId xid = proc->xid;
|
||||||
|
|
||||||
xid = proc->xid;
|
|
||||||
if (! TransactionIdIsSpecial(xid))
|
if (! TransactionIdIsSpecial(xid))
|
||||||
{
|
{
|
||||||
if (TransactionIdPrecedes(xid, result))
|
if (TransactionIdPrecedes(xid, result))
|
||||||
@ -256,8 +258,19 @@ GetXmaxRecent(TransactionId *XmaxRecent)
|
|||||||
*XmaxRecent = result;
|
*XmaxRecent = result;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*----------
|
||||||
* GetSnapshotData -- returns information about running transactions.
|
* GetSnapshotData -- returns information about running transactions.
|
||||||
|
*
|
||||||
|
* The returned snapshot includes xmin (lowest still-running xact ID),
|
||||||
|
* xmax (next xact ID to be assigned), and a list of running xact IDs
|
||||||
|
* in the range xmin <= xid < xmax. It is used as follows:
|
||||||
|
* All xact IDs < xmin are considered finished.
|
||||||
|
* All xact IDs >= xmax are considered still running.
|
||||||
|
* For an xact ID xmin <= xid < xmax, consult list to see whether
|
||||||
|
* it is considered running or not.
|
||||||
|
* This ensures that the set of transactions seen as "running" by the
|
||||||
|
* current xact will not change after it takes the snapshot.
|
||||||
|
*----------
|
||||||
*/
|
*/
|
||||||
Snapshot
|
Snapshot
|
||||||
GetSnapshotData(bool serializable)
|
GetSnapshotData(bool serializable)
|
||||||
@ -287,12 +300,33 @@ GetSnapshotData(bool serializable)
|
|||||||
elog(ERROR, "Memory exhausted in GetSnapshotData");
|
elog(ERROR, "Memory exhausted in GetSnapshotData");
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*--------------------
|
||||||
* Unfortunately, we have to call ReadNewTransactionId() after
|
* Unfortunately, we have to call ReadNewTransactionId() after acquiring
|
||||||
* acquiring SInvalLock above. It's not good because
|
* SInvalLock above. It's not good because ReadNewTransactionId() does
|
||||||
* ReadNewTransactionId() does SpinAcquire(XidGenLockId) but
|
* SpinAcquire(XidGenLockId), but *necessary*. We need to be sure that
|
||||||
* _necessary_.
|
* no transactions exit the set of currently-running transactions
|
||||||
|
* between the time we fetch xmax and the time we finish building our
|
||||||
|
* snapshot. Otherwise we could have a situation like this:
|
||||||
|
*
|
||||||
|
* 1. Tx Old is running (in Read Committed mode).
|
||||||
|
* 2. Tx S reads new transaction ID into xmax, then
|
||||||
|
* is swapped out before acquiring SInvalLock.
|
||||||
|
* 3. Tx New gets new transaction ID (>= S' xmax),
|
||||||
|
* makes changes and commits.
|
||||||
|
* 4. Tx Old changes some row R changed by Tx New and commits.
|
||||||
|
* 5. Tx S finishes getting its snapshot data. It sees Tx Old as
|
||||||
|
* done, but sees Tx New as still running (since New >= xmax).
|
||||||
|
*
|
||||||
|
* Now S will see R changed by both Tx Old and Tx New, *but* does not
|
||||||
|
* see other changes made by Tx New. If S is supposed to be in
|
||||||
|
* Serializable mode, this is wrong.
|
||||||
|
*
|
||||||
|
* By locking SInvalLock before we read xmax, we ensure that TX Old
|
||||||
|
* cannot exit the set of running transactions seen by Tx S. Therefore
|
||||||
|
* both Old and New will be seen as still running => no inconsistency.
|
||||||
|
*--------------------
|
||||||
*/
|
*/
|
||||||
|
|
||||||
ReadNewTransactionId(&(snapshot->xmax));
|
ReadNewTransactionId(&(snapshot->xmax));
|
||||||
|
|
||||||
for (index = 0; index < segP->lastBackend; index++)
|
for (index = 0; index < segP->lastBackend; index++)
|
||||||
@ -302,6 +336,7 @@ GetSnapshotData(bool serializable)
|
|||||||
if (pOffset != INVALID_OFFSET)
|
if (pOffset != INVALID_OFFSET)
|
||||||
{
|
{
|
||||||
PROC *proc = (PROC *) MAKE_PTR(pOffset);
|
PROC *proc = (PROC *) MAKE_PTR(pOffset);
|
||||||
|
/* Fetch xid just once - see GetNewTransactionId */
|
||||||
TransactionId xid = proc->xid;
|
TransactionId xid = proc->xid;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -325,11 +360,12 @@ GetSnapshotData(bool serializable)
|
|||||||
|
|
||||||
if (serializable)
|
if (serializable)
|
||||||
MyProc->xmin = snapshot->xmin;
|
MyProc->xmin = snapshot->xmin;
|
||||||
/* Serializable snapshot must be computed before any other... */
|
|
||||||
Assert(MyProc->xmin != InvalidTransactionId);
|
|
||||||
|
|
||||||
SpinRelease(SInvalLock);
|
SpinRelease(SInvalLock);
|
||||||
|
|
||||||
|
/* Serializable snapshot must be computed before any other... */
|
||||||
|
Assert(MyProc->xmin != InvalidTransactionId);
|
||||||
|
|
||||||
snapshot->xcnt = count;
|
snapshot->xcnt = count;
|
||||||
return snapshot;
|
return snapshot;
|
||||||
}
|
}
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $Header: /cvsroot/pgsql/src/backend/utils/time/tqual.c,v 1.38 2001/07/12 04:11:13 tgl Exp $
|
* $Header: /cvsroot/pgsql/src/backend/utils/time/tqual.c,v 1.39 2001/07/16 22:43:34 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -610,6 +610,13 @@ HeapTupleSatisfiesVacuum(HeapTupleHeader tuple, TransactionId XmaxRecent)
|
|||||||
*
|
*
|
||||||
* If the inserting transaction aborted, then the tuple was never visible
|
* If the inserting transaction aborted, then the tuple was never visible
|
||||||
* to any other transaction, so we can delete it immediately.
|
* to any other transaction, so we can delete it immediately.
|
||||||
|
*
|
||||||
|
* NOTE: must check TransactionIdIsInProgress (which looks in shared mem)
|
||||||
|
* before TransactionIdDidCommit/TransactionIdDidAbort (which look in
|
||||||
|
* pg_log). Otherwise we have a race condition where we might decide
|
||||||
|
* that a just-committed transaction crashed, because none of the tests
|
||||||
|
* succeed. xact.c is careful to record commit/abort in pg_log before
|
||||||
|
* it unsets MyProc->xid in shared memory.
|
||||||
*/
|
*/
|
||||||
if (!(tuple->t_infomask & HEAP_XMIN_COMMITTED))
|
if (!(tuple->t_infomask & HEAP_XMIN_COMMITTED))
|
||||||
{
|
{
|
||||||
@ -636,19 +643,19 @@ HeapTupleSatisfiesVacuum(HeapTupleHeader tuple, TransactionId XmaxRecent)
|
|||||||
}
|
}
|
||||||
tuple->t_infomask |= HEAP_XMIN_COMMITTED;
|
tuple->t_infomask |= HEAP_XMIN_COMMITTED;
|
||||||
}
|
}
|
||||||
|
else if (TransactionIdIsInProgress(tuple->t_xmin))
|
||||||
|
return HEAPTUPLE_INSERT_IN_PROGRESS;
|
||||||
|
else if (TransactionIdDidCommit(tuple->t_xmin))
|
||||||
|
tuple->t_infomask |= HEAP_XMIN_COMMITTED;
|
||||||
else if (TransactionIdDidAbort(tuple->t_xmin))
|
else if (TransactionIdDidAbort(tuple->t_xmin))
|
||||||
{
|
{
|
||||||
tuple->t_infomask |= HEAP_XMIN_INVALID;
|
tuple->t_infomask |= HEAP_XMIN_INVALID;
|
||||||
return HEAPTUPLE_DEAD;
|
return HEAPTUPLE_DEAD;
|
||||||
}
|
}
|
||||||
else if (TransactionIdDidCommit(tuple->t_xmin))
|
|
||||||
tuple->t_infomask |= HEAP_XMIN_COMMITTED;
|
|
||||||
else if (TransactionIdIsInProgress(tuple->t_xmin))
|
|
||||||
return HEAPTUPLE_INSERT_IN_PROGRESS;
|
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* Not Aborted, Not Committed, Not in Progress -
|
* Not in Progress, Not Committed, Not Aborted -
|
||||||
* so it's from crashed process. - vadim 11/26/96
|
* so it's from crashed process. - vadim 11/26/96
|
||||||
*/
|
*/
|
||||||
tuple->t_infomask |= HEAP_XMIN_INVALID;
|
tuple->t_infomask |= HEAP_XMIN_INVALID;
|
||||||
@ -667,19 +674,19 @@ HeapTupleSatisfiesVacuum(HeapTupleHeader tuple, TransactionId XmaxRecent)
|
|||||||
|
|
||||||
if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
|
if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
|
||||||
{
|
{
|
||||||
if (TransactionIdDidAbort(tuple->t_xmax))
|
if (TransactionIdIsInProgress(tuple->t_xmax))
|
||||||
|
return HEAPTUPLE_DELETE_IN_PROGRESS;
|
||||||
|
else if (TransactionIdDidCommit(tuple->t_xmax))
|
||||||
|
tuple->t_infomask |= HEAP_XMAX_COMMITTED;
|
||||||
|
else if (TransactionIdDidAbort(tuple->t_xmax))
|
||||||
{
|
{
|
||||||
tuple->t_infomask |= HEAP_XMAX_INVALID;
|
tuple->t_infomask |= HEAP_XMAX_INVALID;
|
||||||
return HEAPTUPLE_LIVE;
|
return HEAPTUPLE_LIVE;
|
||||||
}
|
}
|
||||||
else if (TransactionIdDidCommit(tuple->t_xmax))
|
|
||||||
tuple->t_infomask |= HEAP_XMAX_COMMITTED;
|
|
||||||
else if (TransactionIdIsInProgress(tuple->t_xmax))
|
|
||||||
return HEAPTUPLE_DELETE_IN_PROGRESS;
|
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* Not Aborted, Not Committed, Not in Progress -
|
* Not in Progress, Not Committed, Not Aborted -
|
||||||
* so it's from crashed process. - vadim 06/02/97
|
* so it's from crashed process. - vadim 06/02/97
|
||||||
*/
|
*/
|
||||||
tuple->t_infomask |= HEAP_XMAX_INVALID;
|
tuple->t_infomask |= HEAP_XMAX_INVALID;
|
||||||
|
Reference in New Issue
Block a user