mirror of
https://github.com/postgres/postgres.git
synced 2025-10-24 01:29:19 +03:00
Revamp the WAL record format.
Each WAL record now carries information about the modified relation and block(s) in a standardized format. That makes it easier to write tools that need that information, like pg_rewind, prefetching the blocks to speed up recovery, etc. There's a whole new API for building WAL records, replacing the XLogRecData chains used previously. The new API consists of XLogRegister* functions, which are called for each buffer and chunk of data that is added to the record. The new API also gives more control over when a full-page image is written, by passing flags to the XLogRegisterBuffer function. This also simplifies the XLogReadBufferForRedo() calls. The function can dig the relation and block number from the WAL record, so they no longer need to be passed as arguments. For the convenience of redo routines, XLogReader now disects each WAL record after reading it, copying the main data part and the per-block data into MAXALIGNed buffers. The data chunks are not aligned within the WAL record, but the redo routines can assume that the pointers returned by XLogRecGet* functions are. Redo routines are now passed the XLogReaderState, which contains the record in the already-disected format, instead of the plain XLogRecord. The new record format also makes the fixed size XLogRecord header smaller, by removing the xl_len field. The length of the "main data" portion is now stored at the end of the WAL record, and there's a separate header after XLogRecord for it. The alignment padding at the end of XLogRecord is also removed. This compansates for the fact that the new format would otherwise be more bulky than the old format. Reviewed by Andres Freund, Amit Kapila, Michael Paquier, Alvaro Herrera, Fujii Masao.
This commit is contained in:
@@ -14,7 +14,7 @@
|
||||
#ifndef BRIN_XLOG_H
|
||||
#define BRIN_XLOG_H
|
||||
|
||||
#include "access/xlogrecord.h"
|
||||
#include "access/xlogreader.h"
|
||||
#include "lib/stringinfo.h"
|
||||
#include "storage/bufpage.h"
|
||||
#include "storage/itemptr.h"
|
||||
@@ -42,59 +42,82 @@
|
||||
*/
|
||||
#define XLOG_BRIN_INIT_PAGE 0x80
|
||||
|
||||
/* This is what we need to know about a BRIN index create */
|
||||
/*
|
||||
* This is what we need to know about a BRIN index create.
|
||||
*
|
||||
* Backup block 0: metapage
|
||||
*/
|
||||
typedef struct xl_brin_createidx
|
||||
{
|
||||
BlockNumber pagesPerRange;
|
||||
RelFileNode node;
|
||||
uint16 version;
|
||||
} xl_brin_createidx;
|
||||
#define SizeOfBrinCreateIdx (offsetof(xl_brin_createidx, version) + sizeof(uint16))
|
||||
|
||||
/*
|
||||
* This is what we need to know about a BRIN tuple insert
|
||||
*
|
||||
* Backup block 0: main page, block data is the new BrinTuple.
|
||||
* Backup block 1: revmap page
|
||||
*/
|
||||
typedef struct xl_brin_insert
|
||||
{
|
||||
RelFileNode node;
|
||||
BlockNumber heapBlk;
|
||||
|
||||
/* extra information needed to update the revmap */
|
||||
BlockNumber revmapBlk;
|
||||
BlockNumber pagesPerRange;
|
||||
|
||||
uint16 tuplen;
|
||||
ItemPointerData tid;
|
||||
/* tuple data follows at end of struct */
|
||||
/* offset number in the main page to insert the tuple to. */
|
||||
OffsetNumber offnum;
|
||||
} xl_brin_insert;
|
||||
|
||||
#define SizeOfBrinInsert (offsetof(xl_brin_insert, tid) + sizeof(ItemPointerData))
|
||||
#define SizeOfBrinInsert (offsetof(xl_brin_insert, offnum) + sizeof(OffsetNumber))
|
||||
|
||||
/*
|
||||
* A cross-page update is the same as an insert, but also store the old tid.
|
||||
* A cross-page update is the same as an insert, but also stores information
|
||||
* about the old tuple.
|
||||
*
|
||||
* Like in xlog_brin_update:
|
||||
* Backup block 0: new page, block data includes the new BrinTuple.
|
||||
* Backup block 1: revmap page
|
||||
*
|
||||
* And in addition:
|
||||
* Backup block 2: old page
|
||||
*/
|
||||
typedef struct xl_brin_update
|
||||
{
|
||||
ItemPointerData oldtid;
|
||||
/* offset number of old tuple on old page */
|
||||
OffsetNumber oldOffnum;
|
||||
|
||||
xl_brin_insert insert;
|
||||
} xl_brin_update;
|
||||
|
||||
#define SizeOfBrinUpdate (offsetof(xl_brin_update, insert) + SizeOfBrinInsert)
|
||||
|
||||
/* This is what we need to know about a BRIN tuple samepage update */
|
||||
/*
|
||||
* This is what we need to know about a BRIN tuple samepage update
|
||||
*
|
||||
* Backup block 0: updated page, with new BrinTuple as block data
|
||||
*/
|
||||
typedef struct xl_brin_samepage_update
|
||||
{
|
||||
RelFileNode node;
|
||||
ItemPointerData tid;
|
||||
/* tuple data follows at end of struct */
|
||||
OffsetNumber offnum;
|
||||
} xl_brin_samepage_update;
|
||||
|
||||
#define SizeOfBrinSamepageUpdate (offsetof(xl_brin_samepage_update, tid) + sizeof(ItemPointerData))
|
||||
#define SizeOfBrinSamepageUpdate (sizeof(OffsetNumber))
|
||||
|
||||
/* This is what we need to know about a revmap extension */
|
||||
/*
|
||||
* This is what we need to know about a revmap extension
|
||||
*
|
||||
* Backup block 0: metapage
|
||||
* Backup block 1: new revmap page
|
||||
*/
|
||||
typedef struct xl_brin_revmap_extend
|
||||
{
|
||||
RelFileNode node;
|
||||
/*
|
||||
* XXX: This is actually redundant - the block number is stored as part of
|
||||
* backup block 1.
|
||||
*/
|
||||
BlockNumber targetBlk;
|
||||
} xl_brin_revmap_extend;
|
||||
|
||||
@@ -102,8 +125,8 @@ typedef struct xl_brin_revmap_extend
|
||||
sizeof(BlockNumber))
|
||||
|
||||
|
||||
extern void brin_desc(StringInfo buf, XLogRecord *record);
|
||||
extern void brin_redo(XLogRecPtr lsn, XLogRecord *record);
|
||||
extern void brin_redo(XLogReaderState *record);
|
||||
extern void brin_desc(StringInfo buf, XLogReaderState *record);
|
||||
extern const char *brin_identify(uint8 info);
|
||||
|
||||
#endif /* BRIN_XLOG_H */
|
||||
|
||||
Reference in New Issue
Block a user