1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-08 11:42:09 +03:00

Refactor WAL file-reading code into WALRead()

XLogReader, walsender and pg_waldump all had their own routines to read
data from WAL files to memory, with slightly different approaches
according to the particular conditions of each environment.  There's a
lot of commonality, so we can refactor that into a single routine
WALRead in XLogReader, and move the differences to a separate (simpler)
callback that just opens the next WAL-segment.  This results in a
clearer (ahem) code flow.

The error reporting needs are covered by filling in a new error-info
struct, WALReadError, and it's the caller's responsibility to act on it.
The backend has WALReadRaiseError() to do so.

We no longer ever need to seek in this interface; switch to using
pg_pread().

Author: Antonin Houska, with contributions from Álvaro Herrera
Reviewed-by: Michaël Paquier, Kyotaro Horiguchi
Discussion: https://postgr.es/m/14984.1554998742@spoje.net
This commit is contained in:
Alvaro Herrera
2019-11-25 15:04:54 -03:00
parent 5883f5fe27
commit 0dc8ead463
6 changed files with 387 additions and 433 deletions

View File

@ -17,6 +17,8 @@
*/
#include "postgres.h"
#include <unistd.h>
#include "access/transam.h"
#include "access/xlog_internal.h"
#include "access/xlogreader.h"
@ -27,6 +29,7 @@
#ifndef FRONTEND
#include "miscadmin.h"
#include "pgstat.h"
#include "utils/memutils.h"
#endif
@ -208,7 +211,6 @@ WALOpenSegmentInit(WALOpenSegment *seg, WALSegmentContext *segcxt,
{
seg->ws_file = -1;
seg->ws_segno = 0;
seg->ws_off = 0;
seg->ws_tli = 0;
segcxt->ws_segsize = segsize;
@ -295,8 +297,7 @@ XLogReadRecord(XLogReaderState *state, XLogRecPtr RecPtr, char **errormsg)
* byte to cover the whole record header, or at least the part of it that
* fits on the same page.
*/
readOff = ReadPageInternal(state,
targetPagePtr,
readOff = ReadPageInternal(state, targetPagePtr,
Min(targetRecOff + SizeOfXLogRecord, XLOG_BLCKSZ));
if (readOff < 0)
goto err;
@ -556,7 +557,7 @@ ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr, int reqLen)
/* check whether we have all the requested data already */
if (targetSegNo == state->seg.ws_segno &&
targetPageOff == state->seg.ws_off && reqLen <= state->readLen)
targetPageOff == state->segoff && reqLen <= state->readLen)
return state->readLen;
/*
@ -627,7 +628,7 @@ ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr, int reqLen)
/* update read state information */
state->seg.ws_segno = targetSegNo;
state->seg.ws_off = targetPageOff;
state->segoff = targetPageOff;
state->readLen = readLen;
return readLen;
@ -644,7 +645,7 @@ static void
XLogReaderInvalReadState(XLogReaderState *state)
{
state->seg.ws_segno = 0;
state->seg.ws_off = 0;
state->segoff = 0;
state->readLen = 0;
}
@ -1015,6 +1016,99 @@ out:
#endif /* FRONTEND */
/*
* Read 'count' bytes into 'buf', starting at location 'startptr', from WAL
* fetched from timeline 'tli'.
*
* 'seg/segcxt' identify the last segment used. 'openSegment' is a callback
* to open the next segment, if necessary.
*
* Returns true if succeeded, false if an error occurs, in which case
* 'errinfo' receives error details.
*
* XXX probably this should be improved to suck data directly from the
* WAL buffers when possible.
*/
bool
WALRead(char *buf, XLogRecPtr startptr, Size count, TimeLineID tli,
WALOpenSegment *seg, WALSegmentContext *segcxt,
WALSegmentOpen openSegment, WALReadError *errinfo)
{
char *p;
XLogRecPtr recptr;
Size nbytes;
p = buf;
recptr = startptr;
nbytes = count;
while (nbytes > 0)
{
uint32 startoff;
int segbytes;
int readbytes;
startoff = XLogSegmentOffset(recptr, segcxt->ws_segsize);
/*
* If the data we want is not in a segment we have open, close what we
* have (if anything) and open the next one, using the caller's
* provided openSegment callback.
*/
if (seg->ws_file < 0 ||
!XLByteInSeg(recptr, seg->ws_segno, segcxt->ws_segsize) ||
tli != seg->ws_tli)
{
XLogSegNo nextSegNo;
if (seg->ws_file >= 0)
close(seg->ws_file);
XLByteToSeg(recptr, nextSegNo, segcxt->ws_segsize);
seg->ws_file = openSegment(nextSegNo, segcxt, &tli);
/* Update the current segment info. */
seg->ws_tli = tli;
seg->ws_segno = nextSegNo;
}
/* How many bytes are within this segment? */
if (nbytes > (segcxt->ws_segsize - startoff))
segbytes = segcxt->ws_segsize - startoff;
else
segbytes = nbytes;
#ifndef FRONTEND
pgstat_report_wait_start(WAIT_EVENT_WAL_READ);
#endif
/* Reset errno first; eases reporting non-errno-affecting errors */
errno = 0;
readbytes = pg_pread(seg->ws_file, p, segbytes, (off_t) startoff);
#ifndef FRONTEND
pgstat_report_wait_end();
#endif
if (readbytes <= 0)
{
errinfo->wre_errno = errno;
errinfo->wre_req = segbytes;
errinfo->wre_read = readbytes;
errinfo->wre_off = startoff;
errinfo->wre_seg = *seg;
return false;
}
/* Update state for read */
recptr += readbytes;
nbytes -= readbytes;
p += readbytes;
}
return true;
}
/* ----------------------------------------
* Functions for decoding the data and block references in a record.
* ----------------------------------------