1
0
mirror of https://github.com/postgres/postgres.git synced 2025-10-24 01:29:19 +03:00

Fix Hot-Standby initialization of clog and subtrans.

These bugs can cause data loss on standbys started with hot_standby=on at
the moment they start to accept read only queries, by marking committed
transactions as uncommited. The likelihood of such corruptions is small
unless the primary has a high transaction rate.

5a031a5556 fixed bugs in HS's startup logic
by maintaining less state until at least STANDBY_SNAPSHOT_PENDING state
was reached, missing the fact that both clog and subtrans are written to
before that. This only failed to fail in common cases because the usage
of ExtendCLOG in procarray.c was superflous since clog extensions are
actually WAL logged.

f44eedc3f0f347a856eea8590730769125964597/I then tried to fix the missing
extensions of pg_subtrans due to the former commit's changes - which are
not WAL logged - by performing the extensions when switching to a state
> STANDBY_INITIALIZED and not performing xid assignments before that -
again missing the fact that ExtendCLOG is unneccessary - but screwed up
twice: Once because latestObservedXid wasn't updated anymore in that
state due to the earlier commit and once by having an off-by-one error in
the loop performing extensions. This means that whenever a
CLOG_XACTS_PER_PAGE (32768 with default settings) boundary was crossed
between the start of the checkpoint recovery started from and the first
xl_running_xact record old transactions commit bits in pg_clog could be
overwritten if they started and committed in that window.

Fix this mess by not performing ExtendCLOG() in HS at all anymore since
it's unneeded and evidently dangerous and by performing subtrans
extensions even before reaching STANDBY_SNAPSHOT_PENDING.

Analysis and patch by Andres Freund. Reported by Christophe Pettus.
Backpatch down to 9.0, like the previous commit that caused this.
This commit is contained in:
Heikki Linnakangas
2013-11-22 14:38:59 +02:00
parent e3a02a3926
commit fc8e54f399
2 changed files with 42 additions and 30 deletions

View File

@@ -593,7 +593,7 @@ ExtendCLOG(TransactionId newestXact)
LWLockAcquire(CLogControlLock, LW_EXCLUSIVE); LWLockAcquire(CLogControlLock, LW_EXCLUSIVE);
/* Zero the page and make an XLOG entry about it */ /* Zero the page and make an XLOG entry about it */
ZeroCLOGPage(pageno, !InRecovery); ZeroCLOGPage(pageno, true);
LWLockRelease(CLogControlLock); LWLockRelease(CLogControlLock);
} }

View File

@@ -439,7 +439,7 @@ ProcArrayClearTransaction(PGPROC *proc)
* ProcArrayInitRecovery -- initialize recovery xid mgmt environment * ProcArrayInitRecovery -- initialize recovery xid mgmt environment
* *
* Remember up to where the startup process initialized the CLOG and subtrans * Remember up to where the startup process initialized the CLOG and subtrans
* so we can ensure its initialized gaplessly up to the point where necessary * so we can ensure it's initialized gaplessly up to the point where necessary
* while in recovery. * while in recovery.
*/ */
void void
@@ -449,9 +449,10 @@ ProcArrayInitRecovery(TransactionId initializedUptoXID)
Assert(TransactionIdIsNormal(initializedUptoXID)); Assert(TransactionIdIsNormal(initializedUptoXID));
/* /*
* we set latestObservedXid to the xid SUBTRANS has been initialized upto * we set latestObservedXid to the xid SUBTRANS has been initialized upto,
* so we can extend it from that point onwards when we reach a consistent * so we can extend it from that point onwards in
* state in ProcArrayApplyRecoveryInfo(). * RecordKnownAssignedTransactionIds, and when we get consistent in
* ProcArrayApplyRecoveryInfo().
*/ */
latestObservedXid = initializedUptoXID; latestObservedXid = initializedUptoXID;
TransactionIdRetreat(latestObservedXid); TransactionIdRetreat(latestObservedXid);
@@ -620,17 +621,23 @@ ProcArrayApplyRecoveryInfo(RunningTransactions running)
pfree(xids); pfree(xids);
/* /*
* latestObservedXid is set to the the point where SUBTRANS was started up * latestObservedXid is at least set to the the point where SUBTRANS was
* to, initialize subtrans from thereon, up to nextXid - 1. * started up to (c.f. ProcArrayInitRecovery()) or to the biggest xid
* RecordKnownAssignedTransactionIds() was called for. Initialize
* subtrans from thereon, up to nextXid - 1.
*
* We need to duplicate parts of RecordKnownAssignedTransactionId() here,
* because we've just added xids to the known assigned xids machinery that
* haven't gone through RecordKnownAssignedTransactionId().
*/ */
Assert(TransactionIdIsNormal(latestObservedXid)); Assert(TransactionIdIsNormal(latestObservedXid));
TransactionIdAdvance(latestObservedXid);
while (TransactionIdPrecedes(latestObservedXid, running->nextXid)) while (TransactionIdPrecedes(latestObservedXid, running->nextXid))
{ {
ExtendCLOG(latestObservedXid);
ExtendSUBTRANS(latestObservedXid); ExtendSUBTRANS(latestObservedXid);
TransactionIdAdvance(latestObservedXid); TransactionIdAdvance(latestObservedXid);
} }
TransactionIdRetreat(latestObservedXid); /* = running->nextXid - 1 */
/* ---------- /* ----------
* Now we've got the running xids we need to set the global values that * Now we've got the running xids we need to set the global values that
@@ -704,10 +711,6 @@ ProcArrayApplyXidAssignment(TransactionId topxid,
Assert(standbyState >= STANDBY_INITIALIZED); Assert(standbyState >= STANDBY_INITIALIZED);
/* can't do anything useful unless we have more state setup */
if (standbyState == STANDBY_INITIALIZED)
return;
max_xid = TransactionIdLatest(topxid, nsubxids, subxids); max_xid = TransactionIdLatest(topxid, nsubxids, subxids);
/* /*
@@ -734,6 +737,10 @@ ProcArrayApplyXidAssignment(TransactionId topxid,
for (i = 0; i < nsubxids; i++) for (i = 0; i < nsubxids; i++)
SubTransSetParent(subxids[i], topxid, false); SubTransSetParent(subxids[i], topxid, false);
/* KnownAssignedXids isn't maintained yet, so we're done for now */
if (standbyState == STANDBY_INITIALIZED)
return;
/* /*
* Uses same locking as transaction commit * Uses same locking as transaction commit
*/ */
@@ -2454,18 +2461,11 @@ RecordKnownAssignedTransactionIds(TransactionId xid)
{ {
Assert(standbyState >= STANDBY_INITIALIZED); Assert(standbyState >= STANDBY_INITIALIZED);
Assert(TransactionIdIsValid(xid)); Assert(TransactionIdIsValid(xid));
Assert(TransactionIdIsValid(latestObservedXid));
elog(trace_recovery(DEBUG4), "record known xact %u latestObservedXid %u", elog(trace_recovery(DEBUG4), "record known xact %u latestObservedXid %u",
xid, latestObservedXid); xid, latestObservedXid);
/*
* If the KnownAssignedXids machinery isn't up yet, do nothing.
*/
if (standbyState <= STANDBY_INITIALIZED)
return;
Assert(TransactionIdIsValid(latestObservedXid));
/* /*
* When a newly observed xid arrives, it is frequently the case that it is * When a newly observed xid arrives, it is frequently the case that it is
* *not* the next xid in sequence. When this occurs, we must treat the * *not* the next xid in sequence. When this occurs, we must treat the
@@ -2476,22 +2476,34 @@ RecordKnownAssignedTransactionIds(TransactionId xid)
TransactionId next_expected_xid; TransactionId next_expected_xid;
/* /*
* Extend clog and subtrans like we do in GetNewTransactionId() during * Extend subtrans like we do in GetNewTransactionId() during normal
* normal operation using individual extend steps. Typical case * operation using individual extend steps. Note that we do not need
* requires almost no activity. * to extend clog since its extensions are WAL logged.
*
* This part has to be done regardless of standbyState since we
* immediately start assigning subtransactions to their toplevel
* transactions.
*/ */
next_expected_xid = latestObservedXid; next_expected_xid = latestObservedXid;
TransactionIdAdvance(next_expected_xid); while (TransactionIdPrecedes(next_expected_xid, xid))
while (TransactionIdPrecedesOrEquals(next_expected_xid, xid))
{ {
ExtendCLOG(next_expected_xid);
ExtendSUBTRANS(next_expected_xid);
TransactionIdAdvance(next_expected_xid); TransactionIdAdvance(next_expected_xid);
ExtendSUBTRANS(next_expected_xid);
}
Assert(next_expected_xid == xid);
/*
* If the KnownAssignedXids machinery isn't up yet, there's nothing
* more to do since we don't track assigned xids yet.
*/
if (standbyState <= STANDBY_INITIALIZED)
{
latestObservedXid = xid;
return;
} }
/* /*
* Add the new xids onto the KnownAssignedXids array. * Add (latestObservedXid, xid] onto the KnownAssignedXids array.
*/ */
next_expected_xid = latestObservedXid; next_expected_xid = latestObservedXid;
TransactionIdAdvance(next_expected_xid); TransactionIdAdvance(next_expected_xid);