1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-08 11:42:09 +03:00

Fix scenario where streaming standby gets stuck at a continuation record.

If a continuation record is split so that its first half has already been
removed from the master, and is only present in pg_wal, and there is a
recycled WAL segment in the standby server that looks like it would
contain the second half, recovery would get stuck. The code in
XLogPageRead() incorrectly started streaming at the beginning of the
WAL record, even if we had already read the first page.

Backpatch to 9.4. In principle, older versions have the same problem, but
without replication slots, there was no straightforward mechanism to
prevent the master from recycling old WAL that was still needed by standby.
Without such a mechanism, I think it's reasonable to assume that there's
enough slack in how many old segments are kept around to not run into this,
or you have a WAL archive.

Reported by Jonathon Nelson. Analysis and patch by Kyotaro HORIGUCHI, with
some extra comments by me.

Discussion: https://www.postgresql.org/message-id/CACJqAM3xVz0JY1XFDKPP%2BJoJAjoGx%3DGNuOAshEDWCext7BFvCQ%40mail.gmail.com
This commit is contained in:
Heikki Linnakangas
2018-05-05 01:34:53 +03:00
parent d2599ecfcc
commit 0668719801
3 changed files with 62 additions and 13 deletions

View File

@ -27,8 +27,6 @@
static bool allocate_recordbuf(XLogReaderState *state, uint32 reclength);
static bool ValidXLogPageHeader(XLogReaderState *state, XLogRecPtr recptr,
XLogPageHeader hdr);
static bool ValidXLogRecordHeader(XLogReaderState *state, XLogRecPtr RecPtr,
XLogRecPtr PrevRecPtr, XLogRecord *record, bool randAccess);
static bool ValidXLogRecord(XLogReaderState *state, XLogRecord *record,
@ -533,7 +531,6 @@ ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr, int reqLen)
*/
if (targetSegNo != state->readSegNo && targetPageOff != 0)
{
XLogPageHeader hdr;
XLogRecPtr targetSegmentPtr = pageptr - targetPageOff;
readLen = state->read_page(state, targetSegmentPtr, XLOG_BLCKSZ,
@ -545,9 +542,8 @@ ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr, int reqLen)
/* we can be sure to have enough WAL available, we scrolled back */
Assert(readLen == XLOG_BLCKSZ);
hdr = (XLogPageHeader) state->readBuf;
if (!ValidXLogPageHeader(state, targetSegmentPtr, hdr))
if (!XLogReaderValidatePageHeader(state, targetSegmentPtr,
state->readBuf))
goto err;
}
@ -584,7 +580,7 @@ ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr, int reqLen)
/*
* Now that we know we have the full header, validate it.
*/
if (!ValidXLogPageHeader(state, pageptr, hdr))
if (!XLogReaderValidatePageHeader(state, pageptr, (char *) hdr))
goto err;
/* update read state information */
@ -709,15 +705,19 @@ ValidXLogRecord(XLogReaderState *state, XLogRecord *record, XLogRecPtr recptr)
}
/*
* Validate a page header
* Validate a page header.
*
* Check if 'phdr' is valid as the header of the XLog page at position
* 'recptr'.
*/
static bool
ValidXLogPageHeader(XLogReaderState *state, XLogRecPtr recptr,
XLogPageHeader hdr)
bool
XLogReaderValidatePageHeader(XLogReaderState *state, XLogRecPtr recptr,
char *phdr)
{
XLogRecPtr recaddr;
XLogSegNo segno;
int32 offset;
XLogPageHeader hdr = (XLogPageHeader) phdr;
Assert((recptr % XLOG_BLCKSZ) == 0);
@ -805,6 +805,11 @@ ValidXLogPageHeader(XLogReaderState *state, XLogRecPtr recptr,
return false;
}
/*
* Check that the address on the page agrees with what we expected.
* This check typically fails when an old WAL segment is recycled,
* and hasn't yet been overwritten with new data yet.
*/
if (hdr->xlp_pageaddr != recaddr)
{
char fname[MAXFNAMELEN];