1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-14 08:21:07 +03:00

Fix race condition in reading commit timestamps

If a user requests the commit timestamp for a transaction old enough
that its data is concurrently being truncated away by vacuum at just the
right time, they would receive an ugly internal file-not-found error
message from slru.c rather than the expected NULL return value.

In a primary server, the window for the race is very small: the lookup
has to occur exactly between the two calls by vacuum, and there's not a
lot that happens between them (mostly just a multixact truncate).  In a
standby server, however, the window is larger because the truncation is
executed as soon as the WAL record for it is replayed, but the advance
of the oldest-Xid is not executed until the next checkpoint record.

To fix in the primary, simply reverse the order of operations in
vac_truncate_clog.  To fix in the standby, augment the WAL truncation
record so that the standby is aware of the new oldest-XID value and can
apply the update immediately.  WAL version bumped because of this.

No backpatch, because of the low importance of the bug and its rarity.

Author: Craig Ringer
Reviewed-By: Petr Jelínek, Peter Eisentraut
Discussion: https://postgr.es/m/CAMsr+YFhVtRQT1VAwC+WGbbxZZRzNou=N9Ed-FrCqkwQ8H8oJQ@mail.gmail.com
This commit is contained in:
Alvaro Herrera
2017-01-19 18:23:09 -03:00
parent 8b0fec93ec
commit 8eace46d34
5 changed files with 34 additions and 13 deletions

View File

@ -113,7 +113,7 @@ static bool CommitTsPagePrecedes(int page1, int page2);
static void ActivateCommitTs(void);
static void DeactivateCommitTs(void);
static void WriteZeroPageXlogRec(int pageno);
static void WriteTruncateXlogRec(int pageno);
static void WriteTruncateXlogRec(int pageno, TransactionId oldestXid);
static void WriteSetTimestampXlogRec(TransactionId mainxid, int nsubxids,
TransactionId *subxids, TimestampTz timestamp,
RepOriginId nodeid);
@ -824,7 +824,7 @@ TruncateCommitTs(TransactionId oldestXact)
return; /* nothing to remove */
/* Write XLOG record */
WriteTruncateXlogRec(cutoffPage);
WriteTruncateXlogRec(cutoffPage, oldestXact);
/* Now we can remove the old CommitTs segment(s) */
SimpleLruTruncate(CommitTsCtl, cutoffPage);
@ -910,10 +910,15 @@ WriteZeroPageXlogRec(int pageno)
* Write a TRUNCATE xlog record
*/
static void
WriteTruncateXlogRec(int pageno)
WriteTruncateXlogRec(int pageno, TransactionId oldestXid)
{
xl_commit_ts_truncate xlrec;
xlrec.pageno = pageno;
xlrec.oldestXid = oldestXid;
XLogBeginInsert();
XLogRegisterData((char *) (&pageno), sizeof(int));
XLogRegisterData((char *) (&xlrec), SizeOfCommitTsTruncate);
(void) XLogInsert(RM_COMMIT_TS_ID, COMMIT_TS_TRUNCATE);
}
@ -967,17 +972,17 @@ commit_ts_redo(XLogReaderState *record)
}
else if (info == COMMIT_TS_TRUNCATE)
{
int pageno;
xl_commit_ts_truncate *trunc = (xl_commit_ts_truncate *) XLogRecGetData(record);
memcpy(&pageno, XLogRecGetData(record), sizeof(int));
AdvanceOldestCommitTsXid(trunc->oldestXid);
/*
* During XLOG replay, latest_page_number isn't set up yet; insert a
* suitable value to bypass the sanity test in SimpleLruTruncate.
*/
CommitTsCtl->shared->latest_page_number = pageno;
CommitTsCtl->shared->latest_page_number = trunc->pageno;
SimpleLruTruncate(CommitTsCtl, pageno);
SimpleLruTruncate(CommitTsCtl, trunc->pageno);
}
else if (info == COMMIT_TS_SETTS)
{