mirror of
https://github.com/postgres/postgres.git
synced 2025-07-31 22:04:40 +03:00
Revert recovery prefetching feature.
This set of commits has some bugs with known fixes, but at this late stage in the release cycle it seems best to revert and resubmit next time, along with some new automated test coverage for this whole area. Commits reverted:dc88460c
: Doc: Review for "Optionally prefetch referenced data in recovery."1d257577
: Optionally prefetch referenced data in recovery.f003d9f8
: Add circular WAL decoding buffer.323cbe7c
: Remove read_page callback from XLogReader. Remove the new GUC group WAL_RECOVERY recently added bya55a9847
, as the corresponding section of config.sgml is now reverted. Discussion: https://postgr.es/m/CAOuzzgrn7iKnFRsB4MHp3UisEQAGgZMbk_ViTN4HV4-Ksq8zCg%40mail.gmail.com
This commit is contained in:
@ -39,7 +39,6 @@
|
||||
#endif
|
||||
|
||||
#include "access/xlogrecord.h"
|
||||
#include "storage/buf.h"
|
||||
|
||||
/* WALOpenSegment represents a WAL segment being read. */
|
||||
typedef struct WALOpenSegment
|
||||
@ -57,17 +56,65 @@ typedef struct WALSegmentContext
|
||||
} WALSegmentContext;
|
||||
|
||||
typedef struct XLogReaderState XLogReaderState;
|
||||
typedef struct XLogFindNextRecordState XLogFindNextRecordState;
|
||||
|
||||
/* Function type definition for the segment cleanup callback */
|
||||
typedef void (*WALSegmentCleanupCB) (XLogReaderState *xlogreader);
|
||||
|
||||
/* Function type definition for the open/close callbacks for WALRead() */
|
||||
/* Function type definitions for various xlogreader interactions */
|
||||
typedef int (*XLogPageReadCB) (XLogReaderState *xlogreader,
|
||||
XLogRecPtr targetPagePtr,
|
||||
int reqLen,
|
||||
XLogRecPtr targetRecPtr,
|
||||
char *readBuf);
|
||||
typedef void (*WALSegmentOpenCB) (XLogReaderState *xlogreader,
|
||||
XLogSegNo nextSegNo,
|
||||
TimeLineID *tli_p);
|
||||
typedef void (*WALSegmentCloseCB) (XLogReaderState *xlogreader);
|
||||
|
||||
typedef struct XLogReaderRoutine
|
||||
{
|
||||
/*
|
||||
* Data input callback
|
||||
*
|
||||
* This callback shall read at least reqLen valid bytes of the xlog page
|
||||
* starting at targetPagePtr, and store them in readBuf. The callback
|
||||
* shall return the number of bytes read (never more than XLOG_BLCKSZ), or
|
||||
* -1 on failure. The callback shall sleep, if necessary, to wait for the
|
||||
* requested bytes to become available. The callback will not be invoked
|
||||
* again for the same page unless more than the returned number of bytes
|
||||
* are needed.
|
||||
*
|
||||
* targetRecPtr is the position of the WAL record we're reading. Usually
|
||||
* it is equal to targetPagePtr + reqLen, but sometimes xlogreader needs
|
||||
* to read and verify the page or segment header, before it reads the
|
||||
* actual WAL record it's interested in. In that case, targetRecPtr can
|
||||
* be used to determine which timeline to read the page from.
|
||||
*
|
||||
* The callback shall set ->seg.ws_tli to the TLI of the file the page was
|
||||
* read from.
|
||||
*/
|
||||
XLogPageReadCB page_read;
|
||||
|
||||
/*
|
||||
* Callback to open the specified WAL segment for reading. ->seg.ws_file
|
||||
* shall be set to the file descriptor of the opened segment. In case of
|
||||
* failure, an error shall be raised by the callback and it shall not
|
||||
* return.
|
||||
*
|
||||
* "nextSegNo" is the number of the segment to be opened.
|
||||
*
|
||||
* "tli_p" is an input/output argument. WALRead() uses it to pass the
|
||||
* timeline in which the new segment should be found, but the callback can
|
||||
* use it to return the TLI that it actually opened.
|
||||
*/
|
||||
WALSegmentOpenCB segment_open;
|
||||
|
||||
/*
|
||||
* WAL segment close callback. ->seg.ws_file shall be set to a negative
|
||||
* number.
|
||||
*/
|
||||
WALSegmentCloseCB segment_close;
|
||||
} XLogReaderRoutine;
|
||||
|
||||
#define XL_ROUTINE(...) &(XLogReaderRoutine){__VA_ARGS__}
|
||||
|
||||
typedef struct
|
||||
{
|
||||
/* Is this block ref in use? */
|
||||
@ -78,9 +125,6 @@ typedef struct
|
||||
ForkNumber forknum;
|
||||
BlockNumber blkno;
|
||||
|
||||
/* Workspace for remembering last known buffer holding this block. */
|
||||
Buffer recent_buffer;
|
||||
|
||||
/* copy of the fork_flags field from the XLogRecordBlockHeader */
|
||||
uint8 flags;
|
||||
|
||||
@ -100,61 +144,12 @@ typedef struct
|
||||
uint16 data_bufsz;
|
||||
} DecodedBkpBlock;
|
||||
|
||||
/* Return code from XLogReadRecord */
|
||||
typedef enum XLogReadRecordResult
|
||||
{
|
||||
XLREAD_SUCCESS, /* record is successfully read */
|
||||
XLREAD_NEED_DATA, /* need more data. see XLogReadRecord. */
|
||||
XLREAD_FULL, /* cannot hold more data while reading ahead */
|
||||
XLREAD_FAIL /* failed during reading a record */
|
||||
} XLogReadRecordResult;
|
||||
|
||||
/*
|
||||
* internal state of XLogReadRecord
|
||||
*
|
||||
* XLogReadState runs a state machine while reading a record. Theses states
|
||||
* are not seen outside the function. Each state may repeat several times
|
||||
* exiting requesting caller for new data. See the comment of XLogReadRecrod
|
||||
* for details.
|
||||
*/
|
||||
typedef enum XLogReadRecordState
|
||||
{
|
||||
XLREAD_NEXT_RECORD,
|
||||
XLREAD_TOT_LEN,
|
||||
XLREAD_FIRST_FRAGMENT,
|
||||
XLREAD_CONTINUATION
|
||||
} XLogReadRecordState;
|
||||
|
||||
/*
|
||||
* The decoded contents of a record. This occupies a contiguous region of
|
||||
* memory, with main_data and blocks[n].data pointing to memory after the
|
||||
* members declared here.
|
||||
*/
|
||||
typedef struct DecodedXLogRecord
|
||||
{
|
||||
/* Private member used for resource management. */
|
||||
size_t size; /* total size of decoded record */
|
||||
bool oversized; /* outside the regular decode buffer? */
|
||||
struct DecodedXLogRecord *next; /* decoded record queue link */
|
||||
|
||||
/* Public members. */
|
||||
XLogRecPtr lsn; /* location */
|
||||
XLogRecPtr next_lsn; /* location of next record */
|
||||
XLogRecord header; /* header */
|
||||
RepOriginId record_origin;
|
||||
TransactionId toplevel_xid; /* XID of top-level transaction */
|
||||
char *main_data; /* record's main data portion */
|
||||
uint32 main_data_len; /* main data portion's length */
|
||||
int max_block_id; /* highest block_id in use (-1 if none) */
|
||||
DecodedBkpBlock blocks[FLEXIBLE_ARRAY_MEMBER];
|
||||
} DecodedXLogRecord;
|
||||
|
||||
struct XLogReaderState
|
||||
{
|
||||
/*
|
||||
* Operational callbacks
|
||||
*/
|
||||
WALSegmentCleanupCB cleanup_cb;
|
||||
XLogReaderRoutine routine;
|
||||
|
||||
/* ----------------------------------------
|
||||
* Public parameters
|
||||
@ -167,33 +162,19 @@ struct XLogReaderState
|
||||
*/
|
||||
uint64 system_identifier;
|
||||
|
||||
/*
|
||||
* Opaque data for callbacks to use. Not used by XLogReader.
|
||||
*/
|
||||
void *private_data;
|
||||
|
||||
/*
|
||||
* Start and end point of last record read. EndRecPtr is also used as the
|
||||
* position to read next. Calling XLogBeginRead() sets EndRecPtr to the
|
||||
* starting position and ReadRecPtr to invalid.
|
||||
*
|
||||
* Start and end point of last record returned by XLogReadRecord(). These
|
||||
* are also available as record->lsn and record->next_lsn.
|
||||
*/
|
||||
XLogRecPtr ReadRecPtr; /* start of last record read or being read */
|
||||
XLogRecPtr ReadRecPtr; /* start of last record read */
|
||||
XLogRecPtr EndRecPtr; /* end+1 of last record read */
|
||||
|
||||
/* ----------------------------------------
|
||||
* Communication with page reader
|
||||
* readBuf is XLOG_BLCKSZ bytes, valid up to at least reqLen bytes.
|
||||
* ----------------------------------------
|
||||
*/
|
||||
/* variables the clients of xlogreader can examine */
|
||||
XLogRecPtr readPagePtr; /* page pointer to read */
|
||||
int32 reqLen; /* bytes requested to the caller */
|
||||
char *readBuf; /* buffer to store data */
|
||||
bool page_verified; /* is the page header on the buffer verified? */
|
||||
bool record_verified;/* is the current record header verified? */
|
||||
|
||||
/* variables set by the client of xlogreader */
|
||||
int32 readLen; /* actual bytes copied into readBuf by client,
|
||||
* which should be >= reqLen. Client should
|
||||
* use XLogReaderSetInputData() to set. */
|
||||
|
||||
/* ----------------------------------------
|
||||
* Decoded representation of current record
|
||||
@ -201,17 +182,21 @@ struct XLogReaderState
|
||||
* Use XLogRecGet* functions to investigate the record; these fields
|
||||
* should not be accessed directly.
|
||||
* ----------------------------------------
|
||||
* Start and end point of the last record read and decoded by
|
||||
* XLogReadRecordInternal(). NextRecPtr is also used as the position to
|
||||
* decode next. Calling XLogBeginRead() sets NextRecPtr and EndRecPtr to
|
||||
* the requested starting position.
|
||||
*/
|
||||
XLogRecPtr DecodeRecPtr; /* start of last record decoded */
|
||||
XLogRecPtr NextRecPtr; /* end+1 of last record decoded */
|
||||
XLogRecPtr PrevRecPtr; /* start of previous record decoded */
|
||||
XLogRecord *decoded_record; /* currently decoded record */
|
||||
|
||||
/* Last record returned by XLogReadRecord(). */
|
||||
DecodedXLogRecord *record;
|
||||
char *main_data; /* record's main data portion */
|
||||
uint32 main_data_len; /* main data portion's length */
|
||||
uint32 main_data_bufsz; /* allocated size of the buffer */
|
||||
|
||||
RepOriginId record_origin;
|
||||
|
||||
TransactionId toplevel_xid; /* XID of top-level transaction */
|
||||
|
||||
/* information about blocks referenced by the record. */
|
||||
DecodedBkpBlock blocks[XLR_MAX_BLOCK_ID + 1];
|
||||
|
||||
int max_block_id; /* highest block_id in use (-1 if none) */
|
||||
|
||||
/* ----------------------------------------
|
||||
* private/internal state
|
||||
@ -219,24 +204,11 @@ struct XLogReaderState
|
||||
*/
|
||||
|
||||
/*
|
||||
* Buffer for decoded records. This is a circular buffer, though
|
||||
* individual records can't be split in the middle, so some space is often
|
||||
* wasted at the end. Oversized records that don't fit in this space are
|
||||
* allocated separately.
|
||||
* Buffer for currently read page (XLOG_BLCKSZ bytes, valid up to at least
|
||||
* readLen bytes)
|
||||
*/
|
||||
char *decode_buffer;
|
||||
size_t decode_buffer_size;
|
||||
bool free_decode_buffer; /* need to free? */
|
||||
char *decode_buffer_head; /* write head */
|
||||
char *decode_buffer_tail; /* read head */
|
||||
|
||||
/*
|
||||
* Queue of records that have been decoded. This is a linked list that
|
||||
* usually consists of consecutive records in decode_buffer, but may also
|
||||
* contain oversized records allocated with palloc().
|
||||
*/
|
||||
DecodedXLogRecord *decode_queue_head; /* newest decoded record */
|
||||
DecodedXLogRecord *decode_queue_tail; /* oldest decoded record */
|
||||
char *readBuf;
|
||||
uint32 readLen;
|
||||
|
||||
/* last read XLOG position for data currently in readBuf */
|
||||
WALSegmentContext segcxt;
|
||||
@ -250,6 +222,8 @@ struct XLogReaderState
|
||||
XLogRecPtr latestPagePtr;
|
||||
TimeLineID latestPageTLI;
|
||||
|
||||
/* beginning of the WAL record being read. */
|
||||
XLogRecPtr currRecPtr;
|
||||
/* timeline to read it from, 0 if a lookup is required */
|
||||
TimeLineID currTLI;
|
||||
|
||||
@ -276,70 +250,29 @@ struct XLogReaderState
|
||||
char *readRecordBuf;
|
||||
uint32 readRecordBufSize;
|
||||
|
||||
/*
|
||||
* XLogReadRecordInternal() state
|
||||
*/
|
||||
XLogReadRecordState readRecordState; /* state machine state */
|
||||
int recordGotLen; /* amount of current record that has already
|
||||
* been read */
|
||||
int recordRemainLen; /* length of current record that remains */
|
||||
XLogRecPtr recordContRecPtr; /* where the current record continues */
|
||||
|
||||
DecodedXLogRecord *decoding; /* record currently being decoded */
|
||||
|
||||
/* Buffer to hold error message */
|
||||
char *errormsg_buf;
|
||||
bool errormsg_deferred;
|
||||
};
|
||||
|
||||
struct XLogFindNextRecordState
|
||||
{
|
||||
XLogReaderState *reader_state;
|
||||
XLogRecPtr targetRecPtr;
|
||||
XLogRecPtr currRecPtr;
|
||||
};
|
||||
|
||||
/* Report that data is available for decoding. */
|
||||
static inline void
|
||||
XLogReaderSetInputData(XLogReaderState *state, int32 len)
|
||||
{
|
||||
state->readLen = len;
|
||||
}
|
||||
|
||||
/* Get a new XLogReader */
|
||||
extern XLogReaderState *XLogReaderAllocate(int wal_segment_size,
|
||||
const char *waldir,
|
||||
WALSegmentCleanupCB cleanup_cb);
|
||||
XLogReaderRoutine *routine,
|
||||
void *private_data);
|
||||
extern XLogReaderRoutine *LocalXLogReaderRoutine(void);
|
||||
|
||||
/* Free an XLogReader */
|
||||
extern void XLogReaderFree(XLogReaderState *state);
|
||||
|
||||
/* Optionally provide a circular decoding buffer to allow readahead. */
|
||||
extern void XLogReaderSetDecodeBuffer(XLogReaderState *state,
|
||||
void *buffer,
|
||||
size_t size);
|
||||
|
||||
/* Position the XLogReader to given record */
|
||||
extern void XLogBeginRead(XLogReaderState *state, XLogRecPtr RecPtr);
|
||||
#ifdef FRONTEND
|
||||
extern XLogFindNextRecordState *InitXLogFindNextRecord(XLogReaderState *reader_state, XLogRecPtr start_ptr);
|
||||
extern bool XLogFindNextRecord(XLogFindNextRecordState *state);
|
||||
extern XLogRecPtr XLogFindNextRecord(XLogReaderState *state, XLogRecPtr RecPtr);
|
||||
#endif /* FRONTEND */
|
||||
|
||||
/* Read the next record's header. Returns NULL on end-of-WAL or failure. */
|
||||
extern XLogReadRecordResult XLogReadRecord(XLogReaderState *state,
|
||||
XLogRecord **record,
|
||||
char **errormsg);
|
||||
|
||||
/* Read the next decoded record. Returns NULL on end-of-WAL or failure. */
|
||||
extern XLogReadRecordResult XLogNextRecord(XLogReaderState *state,
|
||||
DecodedXLogRecord **record,
|
||||
char **errormsg);
|
||||
|
||||
/* Try to read ahead, if there is space in the decoding buffer. */
|
||||
extern XLogReadRecordResult XLogReadAhead(XLogReaderState *state,
|
||||
DecodedXLogRecord **record,
|
||||
char **errormsg);
|
||||
/* Read the next XLog record. Returns NULL on end-of-WAL or failure */
|
||||
extern struct XLogRecord *XLogReadRecord(XLogReaderState *state,
|
||||
char **errormsg);
|
||||
|
||||
/* Validate a page */
|
||||
extern bool XLogReaderValidatePageHeader(XLogReaderState *state,
|
||||
@ -359,38 +292,30 @@ typedef struct WALReadError
|
||||
} WALReadError;
|
||||
|
||||
extern bool WALRead(XLogReaderState *state,
|
||||
WALSegmentOpenCB segopenfn, WALSegmentCloseCB sgclosefn,
|
||||
char *buf, XLogRecPtr startptr, Size count,
|
||||
TimeLineID tli, WALReadError *errinfo);
|
||||
|
||||
/* Functions for decoding an XLogRecord */
|
||||
|
||||
extern size_t DecodeXLogRecordRequiredSpace(size_t xl_tot_len);
|
||||
extern bool DecodeXLogRecord(XLogReaderState *state,
|
||||
DecodedXLogRecord *decoded,
|
||||
XLogRecord *record,
|
||||
XLogRecPtr lsn,
|
||||
extern bool DecodeXLogRecord(XLogReaderState *state, XLogRecord *record,
|
||||
char **errmsg);
|
||||
|
||||
#define XLogRecGetTotalLen(decoder) ((decoder)->record->header.xl_tot_len)
|
||||
#define XLogRecGetPrev(decoder) ((decoder)->record->header.xl_prev)
|
||||
#define XLogRecGetInfo(decoder) ((decoder)->record->header.xl_info)
|
||||
#define XLogRecGetRmid(decoder) ((decoder)->record->header.xl_rmid)
|
||||
#define XLogRecGetXid(decoder) ((decoder)->record->header.xl_xid)
|
||||
#define XLogRecGetOrigin(decoder) ((decoder)->record->record_origin)
|
||||
#define XLogRecGetTopXid(decoder) ((decoder)->record->toplevel_xid)
|
||||
#define XLogRecGetData(decoder) ((decoder)->record->main_data)
|
||||
#define XLogRecGetDataLen(decoder) ((decoder)->record->main_data_len)
|
||||
#define XLogRecHasAnyBlockRefs(decoder) ((decoder)->record->max_block_id >= 0)
|
||||
#define XLogRecMaxBlockId(decoder) ((decoder)->record->max_block_id)
|
||||
#define XLogRecGetBlock(decoder, i) (&(decoder)->record->blocks[(i)])
|
||||
#define XLogRecGetTotalLen(decoder) ((decoder)->decoded_record->xl_tot_len)
|
||||
#define XLogRecGetPrev(decoder) ((decoder)->decoded_record->xl_prev)
|
||||
#define XLogRecGetInfo(decoder) ((decoder)->decoded_record->xl_info)
|
||||
#define XLogRecGetRmid(decoder) ((decoder)->decoded_record->xl_rmid)
|
||||
#define XLogRecGetXid(decoder) ((decoder)->decoded_record->xl_xid)
|
||||
#define XLogRecGetOrigin(decoder) ((decoder)->record_origin)
|
||||
#define XLogRecGetTopXid(decoder) ((decoder)->toplevel_xid)
|
||||
#define XLogRecGetData(decoder) ((decoder)->main_data)
|
||||
#define XLogRecGetDataLen(decoder) ((decoder)->main_data_len)
|
||||
#define XLogRecHasAnyBlockRefs(decoder) ((decoder)->max_block_id >= 0)
|
||||
#define XLogRecHasBlockRef(decoder, block_id) \
|
||||
((decoder)->record->max_block_id >= (block_id) && \
|
||||
(decoder)->record->blocks[block_id].in_use)
|
||||
((decoder)->blocks[block_id].in_use)
|
||||
#define XLogRecHasBlockImage(decoder, block_id) \
|
||||
((decoder)->record->blocks[block_id].has_image)
|
||||
((decoder)->blocks[block_id].has_image)
|
||||
#define XLogRecBlockImageApply(decoder, block_id) \
|
||||
((decoder)->record->blocks[block_id].apply_image)
|
||||
((decoder)->blocks[block_id].apply_image)
|
||||
|
||||
#ifndef FRONTEND
|
||||
extern FullTransactionId XLogRecGetFullXid(XLogReaderState *record);
|
||||
@ -401,8 +326,5 @@ extern char *XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *
|
||||
extern bool XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id,
|
||||
RelFileNode *rnode, ForkNumber *forknum,
|
||||
BlockNumber *blknum);
|
||||
extern bool XLogRecGetRecentBuffer(XLogReaderState *record, uint8 block_id,
|
||||
RelFileNode *rnode, ForkNumber *forknum,
|
||||
BlockNumber *blknum, Buffer *recent_buffer);
|
||||
|
||||
#endif /* XLOGREADER_H */
|
||||
|
Reference in New Issue
Block a user