1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-12 05:01:15 +03:00

Refactor WAL file-reading code into WALRead()

XLogReader, walsender and pg_waldump all had their own routines to read
data from WAL files to memory, with slightly different approaches
according to the particular conditions of each environment.  There's a
lot of commonality, so we can refactor that into a single routine
WALRead in XLogReader, and move the differences to a separate (simpler)
callback that just opens the next WAL-segment.  This results in a
clearer (ahem) code flow.

The error reporting needs are covered by filling in a new error-info
struct, WALReadError, and it's the caller's responsibility to act on it.
The backend has WALReadRaiseError() to do so.

We no longer ever need to seek in this interface; switch to using
pg_pread().

Author: Antonin Houska, with contributions from Álvaro Herrera
Reviewed-by: Michaël Paquier, Kyotaro Horiguchi
Discussion: https://postgr.es/m/14984.1554998742@spoje.net
This commit is contained in:
Alvaro Herrera
2019-11-25 15:04:54 -03:00
parent 5883f5fe27
commit 0dc8ead463
6 changed files with 387 additions and 433 deletions

View File

@@ -639,128 +639,6 @@ XLogTruncateRelation(RelFileNode rnode, ForkNumber forkNum,
forget_invalid_pages(rnode, forkNum, nblocks);
}
/*
* Read 'count' bytes from WAL into 'buf', starting at location 'startptr'
* in timeline 'tli'.
*
* Will open, and keep open, one WAL segment stored in the static file
* descriptor 'sendFile'. This means if XLogRead is used once, there will
* always be one descriptor left open until the process ends, but never
* more than one.
*
* XXX This is very similar to pg_waldump's XLogDumpXLogRead and to XLogRead
* in walsender.c but for small differences (such as lack of elog() in
* frontend). Probably these should be merged at some point.
*/
static void
XLogRead(char *buf, int segsize, TimeLineID tli, XLogRecPtr startptr,
Size count)
{
char *p;
XLogRecPtr recptr;
Size nbytes;
/* state maintained across calls */
static int sendFile = -1;
static XLogSegNo sendSegNo = 0;
static TimeLineID sendTLI = 0;
static uint32 sendOff = 0;
Assert(segsize == wal_segment_size);
p = buf;
recptr = startptr;
nbytes = count;
while (nbytes > 0)
{
uint32 startoff;
int segbytes;
int readbytes;
startoff = XLogSegmentOffset(recptr, segsize);
/* Do we need to switch to a different xlog segment? */
if (sendFile < 0 || !XLByteInSeg(recptr, sendSegNo, segsize) ||
sendTLI != tli)
{
char path[MAXPGPATH];
if (sendFile >= 0)
close(sendFile);
XLByteToSeg(recptr, sendSegNo, segsize);
XLogFilePath(path, tli, sendSegNo, segsize);
sendFile = BasicOpenFile(path, O_RDONLY | PG_BINARY);
if (sendFile < 0)
{
if (errno == ENOENT)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("requested WAL segment %s has already been removed",
path)));
else
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not open file \"%s\": %m",
path)));
}
sendOff = 0;
sendTLI = tli;
}
/* Need to seek in the file? */
if (sendOff != startoff)
{
if (lseek(sendFile, (off_t) startoff, SEEK_SET) < 0)
{
char path[MAXPGPATH];
int save_errno = errno;
XLogFilePath(path, tli, sendSegNo, segsize);
errno = save_errno;
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not seek in log segment %s to offset %u: %m",
path, startoff)));
}
sendOff = startoff;
}
/* How many bytes are within this segment? */
if (nbytes > (segsize - startoff))
segbytes = segsize - startoff;
else
segbytes = nbytes;
pgstat_report_wait_start(WAIT_EVENT_WAL_READ);
readbytes = read(sendFile, p, segbytes);
pgstat_report_wait_end();
if (readbytes <= 0)
{
char path[MAXPGPATH];
int save_errno = errno;
XLogFilePath(path, tli, sendSegNo, segsize);
errno = save_errno;
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not read from log segment %s, offset %u, length %lu: %m",
path, sendOff, (unsigned long) segbytes)));
}
/* Update state for read */
recptr += readbytes;
sendOff += readbytes;
nbytes -= readbytes;
p += readbytes;
}
}
/*
* Determine which timeline to read an xlog page from and set the
* XLogReaderState's currTLI to that timeline ID.
@@ -802,8 +680,8 @@ XLogRead(char *buf, int segsize, TimeLineID tli, XLogRecPtr startptr,
void
XLogReadDetermineTimeline(XLogReaderState *state, XLogRecPtr wantPage, uint32 wantLength)
{
const XLogRecPtr lastReadPage = state->seg.ws_segno *
state->segcxt.ws_segsize + state->seg.ws_off;
const XLogRecPtr lastReadPage = (state->seg.ws_segno *
state->segcxt.ws_segsize + state->segoff);
Assert(wantPage != InvalidXLogRecPtr && wantPage % XLOG_BLCKSZ == 0);
Assert(wantLength <= XLOG_BLCKSZ);
@@ -896,6 +774,34 @@ XLogReadDetermineTimeline(XLogReaderState *state, XLogRecPtr wantPage, uint32 wa
}
}
/* openSegment callback for WALRead */
static int
wal_segment_open(XLogSegNo nextSegNo, WALSegmentContext *segcxt,
TimeLineID *tli_p)
{
TimeLineID tli = *tli_p;
char path[MAXPGPATH];
int fd;
XLogFilePath(path, tli, nextSegNo, segcxt->ws_segsize);
fd = BasicOpenFile(path, O_RDONLY | PG_BINARY);
if (fd >= 0)
return fd;
if (errno == ENOENT)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("requested WAL segment %s has already been removed",
path)));
else
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not open file \"%s\": %m",
path)));
return -1; /* keep compiler quiet */
}
/*
* read_page callback for reading local xlog files
*
@@ -913,7 +819,9 @@ read_local_xlog_page(XLogReaderState *state, XLogRecPtr targetPagePtr,
{
XLogRecPtr read_upto,
loc;
TimeLineID tli;
int count;
WALReadError errinfo;
loc = targetPagePtr + reqLen;
@@ -932,7 +840,7 @@ read_local_xlog_page(XLogReaderState *state, XLogRecPtr targetPagePtr,
read_upto = GetFlushRecPtr();
else
read_upto = GetXLogReplayRecPtr(&ThisTimeLineID);
state->seg.ws_tli = ThisTimeLineID;
tli = ThisTimeLineID;
/*
* Check which timeline to get the record from.
@@ -982,14 +890,14 @@ read_local_xlog_page(XLogReaderState *state, XLogRecPtr targetPagePtr,
read_upto = state->currTLIValidUntil;
/*
* Setting ws_tli to our wanted record's TLI is slightly wrong;
* the page might begin on an older timeline if it contains a
* timeline switch, since its xlog segment will have been copied
* from the prior timeline. This is pretty harmless though, as
* nothing cares so long as the timeline doesn't go backwards. We
* should read the page header instead; FIXME someday.
* Setting tli to our wanted record's TLI is slightly wrong; the
* page might begin on an older timeline if it contains a timeline
* switch, since its xlog segment will have been copied from the
* prior timeline. This is pretty harmless though, as nothing
* cares so long as the timeline doesn't go backwards. We should
* read the page header instead; FIXME someday.
*/
state->seg.ws_tli = state->currTLI;
tli = state->currTLI;
/* No need to wait on a historical timeline */
break;
@@ -1020,9 +928,38 @@ read_local_xlog_page(XLogReaderState *state, XLogRecPtr targetPagePtr,
* as 'count', read the whole page anyway. It's guaranteed to be
* zero-padded up to the page boundary if it's incomplete.
*/
XLogRead(cur_page, state->segcxt.ws_segsize, state->seg.ws_tli, targetPagePtr,
XLOG_BLCKSZ);
if (!WALRead(cur_page, targetPagePtr, XLOG_BLCKSZ, tli, &state->seg,
&state->segcxt, wal_segment_open, &errinfo))
WALReadRaiseError(&errinfo);
/* number of valid bytes in the buffer */
return count;
}
/*
* Backend-specific convenience code to handle read errors encountered by
* WALRead().
*/
void
WALReadRaiseError(WALReadError *errinfo)
{
WALOpenSegment *seg = &errinfo->wre_seg;
char *fname = XLogFileNameP(seg->ws_tli, seg->ws_segno);
if (errinfo->wre_read < 0)
{
errno = errinfo->wre_errno;
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not read from log segment %s, offset %u: %m",
fname, errinfo->wre_off)));
}
else if (errinfo->wre_read == 0)
{
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg("could not read from log segment %s, offset %u: read %d of %zu",
fname, errinfo->wre_off, errinfo->wre_read,
(Size) errinfo->wre_req)));
}
}