1
0
mirror of https://github.com/postgres/postgres.git synced 2025-06-27 23:21:58 +03:00

Fix bug in verifying TLI (timeline ID) in WAL page header during recovery..

Previously ValidXLOGHeader() could not handle properly the case where
we re-read the WAL segment after reading its subsequent segment having
larger TLI. This case can happen, for example, when the WAL record is split
across two segments having different TLI. In this case, since the segment
we're re-reading has the smaller TLI than its subsequent segment we've
already read, ValidXLOGHeader() reported an error "out-of-sequence TLI"
even though TLI sequence was valid (i.e., TLI doesn't go backwards across
successive WAL pages and segments).

This issue was fixed by commit 7fcbf6a405
in 9.3 or later though there is no mention to the bug fix in its commit log.
It changed the WAL check code so that it verifies TLI for pages that are
later than the last remembered LSN. This patch applies the same change to
9.2 where the issue still existed.

Author: Takayuki Tsunakawa and Amit Kapila
Reviewed-By: Robert Haas
Discussion: https://postgr.es/m/0A3221C70F24FB45833433255569204D1F5E15E5@G01JPEXMBYT05
This commit is contained in:
Fujii Masao
2017-01-25 07:02:25 +09:00
parent dbaa621cb7
commit 38bec18056

View File

@ -617,8 +617,8 @@ static uint32 readRecordBufSize = 0;
/* State information for XLOG reading */ /* State information for XLOG reading */
static XLogRecPtr ReadRecPtr; /* start of last record read */ static XLogRecPtr ReadRecPtr; /* start of last record read */
static XLogRecPtr EndRecPtr; /* end+1 of last record read */ static XLogRecPtr EndRecPtr; /* end+1 of last record read */
static TimeLineID lastPageTLI = 0; static XLogRecPtr latestPagePtr; /* start of last page read */
static TimeLineID lastSegmentTLI = 0; static TimeLineID latestPageTLI = 0;
static XLogRecPtr minRecoveryPoint; /* local copy of static XLogRecPtr minRecoveryPoint; /* local copy of
* ControlFile->minRecoveryPoint */ * ControlFile->minRecoveryPoint */
@ -706,7 +706,7 @@ static void CleanupBackupHistory(void);
static void UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force); static void UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force);
static XLogRecord *ReadRecord(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt); static XLogRecord *ReadRecord(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt);
static void CheckRecoveryConsistency(void); static void CheckRecoveryConsistency(void);
static bool ValidXLOGHeader(XLogPageHeader hdr, int emode, bool segmentonly); static bool ValidXLOGHeader(XLogPageHeader hdr, int emode);
static XLogRecord *ReadCheckpointRecord(XLogRecPtr RecPtr, int whichChkpt); static XLogRecord *ReadCheckpointRecord(XLogRecPtr RecPtr, int whichChkpt);
static List *readTimeLineHistory(TimeLineID targetTLI); static List *readTimeLineHistory(TimeLineID targetTLI);
static bool existsTimeLineHistory(TimeLineID probeTLI); static bool existsTimeLineHistory(TimeLineID probeTLI);
@ -4021,14 +4021,6 @@ ReadRecord(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt)
(errmsg("invalid record offset at %X/%X", (errmsg("invalid record offset at %X/%X",
RecPtr->xlogid, RecPtr->xrecoff))); RecPtr->xlogid, RecPtr->xrecoff)));
/*
* Since we are going to a random position in WAL, forget any prior
* state about what timeline we were in, and allow it to be any
* timeline in expectedTLIs. We also set a flag to allow curFileTLI
* to go backwards (but we can't reset that variable right here, since
* we might not change files at all).
*/
lastPageTLI = lastSegmentTLI = 0; /* see comment in ValidXLOGHeader */
randAccess = true; /* allow curFileTLI to go backwards too */ randAccess = true; /* allow curFileTLI to go backwards too */
} }
@ -4346,7 +4338,7 @@ next_record_is_invalid:
* ReadRecord. It's not intended for use from anywhere else. * ReadRecord. It's not intended for use from anywhere else.
*/ */
static bool static bool
ValidXLOGHeader(XLogPageHeader hdr, int emode, bool segmentonly) ValidXLOGHeader(XLogPageHeader hdr, int emode)
{ {
XLogRecPtr recaddr; XLogRecPtr recaddr;
@ -4440,31 +4432,25 @@ ValidXLOGHeader(XLogPageHeader hdr, int emode, bool segmentonly)
* immediate parent's TLI, we should never see TLI go backwards across * immediate parent's TLI, we should never see TLI go backwards across
* successive pages of a consistent WAL sequence. * successive pages of a consistent WAL sequence.
* *
* Of course this check should only be applied when advancing sequentially * Sometimes we re-read a segment that's already been (partially) read.
* across pages; therefore ReadRecord resets lastPageTLI and * This can happen when we read WAL segments from parent's TLI during
* lastSegmentTLI to zero when going to a random page. * archive recovery, refer XLogFileReadAnyTLI. So we only verify TLIs
* * for pages that are later than the last remembered LSN.
* Sometimes we re-open a segment that's already been partially replayed.
* In that case we cannot perform the normal TLI check: if there is a
* timeline switch within the segment, the first page has a smaller TLI
* than later pages following the timeline switch, and we might've read
* them already. As a weaker test, we still check that it's not smaller
* than the TLI we last saw at the beginning of a segment. Pass
* segmentonly = true when re-validating the first page like that, and the
* page you're actually interested in comes later.
*/ */
if (hdr->xlp_tli < (segmentonly ? lastSegmentTLI : lastPageTLI)) if (XLByteLT(latestPagePtr, recaddr))
{ {
ereport(emode_for_corrupt_record(emode, recaddr), if (hdr->xlp_tli < latestPageTLI)
(errmsg("out-of-sequence timeline ID %u (after %u) in log file %u, segment %u, offset %u", {
hdr->xlp_tli, ereport(emode_for_corrupt_record(emode, recaddr),
segmentonly ? lastSegmentTLI : lastPageTLI, (errmsg("out-of-sequence timeline ID %u (after %u) in log file %u, segment %u, offset %u",
readId, readSeg, readOff))); hdr->xlp_tli,
return false; latestPageTLI,
readId, readSeg, readOff)));
return false;
}
} }
lastPageTLI = hdr->xlp_tli; latestPagePtr = recaddr;
if (readOff == 0) latestPageTLI = hdr->xlp_tli;
lastSegmentTLI = hdr->xlp_tli;
return true; return true;
} }
@ -10927,7 +10913,7 @@ retry:
readId, readSeg, readOff))); readId, readSeg, readOff)));
goto next_record_is_invalid; goto next_record_is_invalid;
} }
if (!ValidXLOGHeader((XLogPageHeader) readBuf, emode, true)) if (!ValidXLOGHeader((XLogPageHeader) readBuf, emode))
goto next_record_is_invalid; goto next_record_is_invalid;
} }
@ -10949,7 +10935,7 @@ retry:
readId, readSeg, readOff))); readId, readSeg, readOff)));
goto next_record_is_invalid; goto next_record_is_invalid;
} }
if (!ValidXLOGHeader((XLogPageHeader) readBuf, emode, false)) if (!ValidXLOGHeader((XLogPageHeader) readBuf, emode))
goto next_record_is_invalid; goto next_record_is_invalid;
Assert(targetId == readId); Assert(targetId == readId);