mirror of
https://github.com/postgres/postgres.git
synced 2025-10-24 01:29:19 +03:00
Add WAL consistency checking facility.
When the new GUC wal_consistency_checking is set to a non-empty value, it triggers recording of additional full-page images, which are compared on the standby against the results of applying the WAL record (without regard to those full-page images). Allowable differences such as hints are masked out, and the resulting pages are compared; any difference results in a FATAL error on the standby. Kuntal Ghosh, based on earlier patches by Michael Paquier and Heikki Linnakangas. Extensively reviewed and revised by Michael Paquier and by me, with additional reviews and comments from Amit Kapila, Álvaro Herrera, Simon Riggs, and Peter Eisentraut.
This commit is contained in:
@@ -128,5 +128,6 @@ typedef struct xl_brin_revmap_extend
|
||||
extern void brin_redo(XLogReaderState *record);
|
||||
extern void brin_desc(StringInfo buf, XLogReaderState *record);
|
||||
extern const char *brin_identify(uint8 info);
|
||||
extern void brin_mask(char *pagedata, BlockNumber blkno);
|
||||
|
||||
#endif /* BRIN_XLOG_H */
|
||||
|
||||
33
src/include/access/bufmask.h
Normal file
33
src/include/access/bufmask.h
Normal file
@@ -0,0 +1,33 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* bufmask.h
|
||||
* Definitions for buffer masking routines, used to mask certain bits
|
||||
* in a page which can be different when the WAL is generated
|
||||
* and when the WAL is applied. This is really the job of each
|
||||
* individual rmgr, but we make things easier by providing some
|
||||
* common routines to handle cases which occur in multiple rmgrs.
|
||||
*
|
||||
* Portions Copyright (c) 2016, PostgreSQL Global Development Group
|
||||
*
|
||||
* src/include/access/bufmask.h
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef BUFMASK_H
|
||||
#define BUFMASK_H
|
||||
|
||||
#include "postgres.h"
|
||||
#include "storage/block.h"
|
||||
#include "storage/bufmgr.h"
|
||||
|
||||
/* Marker used to mask pages consistently */
|
||||
#define MASK_MARKER 0
|
||||
|
||||
extern void mask_page_lsn(Page page);
|
||||
extern void mask_page_hint_bits(Page page);
|
||||
extern void mask_unused_space(Page page);
|
||||
extern void mask_lp_flags(Page page);
|
||||
extern void mask_page_content(Page page);
|
||||
|
||||
#endif
|
||||
@@ -40,5 +40,6 @@ extern void GenericXLogAbort(GenericXLogState *state);
|
||||
extern void generic_redo(XLogReaderState *record);
|
||||
extern const char *generic_identify(uint8 info);
|
||||
extern void generic_desc(StringInfo buf, XLogReaderState *record);
|
||||
extern void generic_mask(char *pagedata, BlockNumber blkno);
|
||||
|
||||
#endif /* GENERIC_XLOG_H */
|
||||
|
||||
@@ -79,5 +79,6 @@ extern void gin_desc(StringInfo buf, XLogReaderState *record);
|
||||
extern const char *gin_identify(uint8 info);
|
||||
extern void gin_xlog_startup(void);
|
||||
extern void gin_xlog_cleanup(void);
|
||||
extern void gin_mask(char *pagedata, BlockNumber blkno);
|
||||
|
||||
#endif /* GIN_H */
|
||||
|
||||
@@ -459,6 +459,7 @@ extern void gist_desc(StringInfo buf, XLogReaderState *record);
|
||||
extern const char *gist_identify(uint8 info);
|
||||
extern void gist_xlog_startup(void);
|
||||
extern void gist_xlog_cleanup(void);
|
||||
extern void gist_mask(char *pagedata, BlockNumber blkno);
|
||||
|
||||
extern XLogRecPtr gistXLogUpdate(Buffer buffer,
|
||||
OffsetNumber *todelete, int ntodelete,
|
||||
|
||||
@@ -373,6 +373,7 @@ extern void HeapTupleHeaderAdvanceLatestRemovedXid(HeapTupleHeader tuple,
|
||||
extern void heap_redo(XLogReaderState *record);
|
||||
extern void heap_desc(StringInfo buf, XLogReaderState *record);
|
||||
extern const char *heap_identify(uint8 info);
|
||||
extern void heap_mask(char *pagedata, BlockNumber blkno);
|
||||
extern void heap2_redo(XLogReaderState *record);
|
||||
extern void heap2_desc(StringInfo buf, XLogReaderState *record);
|
||||
extern const char *heap2_identify(uint8 info);
|
||||
|
||||
@@ -774,5 +774,6 @@ extern void _bt_leafbuild(BTSpool *btspool, BTSpool *spool2);
|
||||
extern void btree_redo(XLogReaderState *record);
|
||||
extern void btree_desc(StringInfo buf, XLogReaderState *record);
|
||||
extern const char *btree_identify(uint8 info);
|
||||
extern void btree_mask(char *pagedata, BlockNumber blkno);
|
||||
|
||||
#endif /* NBTREE_H */
|
||||
|
||||
@@ -19,7 +19,7 @@ typedef uint8 RmgrId;
|
||||
* Note: RM_MAX_ID must fit in RmgrId; widening that type will affect the XLOG
|
||||
* file format.
|
||||
*/
|
||||
#define PG_RMGR(symname,name,redo,desc,identify,startup,cleanup) \
|
||||
#define PG_RMGR(symname,name,redo,desc,identify,startup,cleanup,mask) \
|
||||
symname,
|
||||
|
||||
typedef enum RmgrIds
|
||||
|
||||
@@ -25,25 +25,25 @@
|
||||
*/
|
||||
|
||||
/* symbol name, textual name, redo, desc, identify, startup, cleanup */
|
||||
PG_RMGR(RM_XLOG_ID, "XLOG", xlog_redo, xlog_desc, xlog_identify, NULL, NULL)
|
||||
PG_RMGR(RM_XACT_ID, "Transaction", xact_redo, xact_desc, xact_identify, NULL, NULL)
|
||||
PG_RMGR(RM_SMGR_ID, "Storage", smgr_redo, smgr_desc, smgr_identify, NULL, NULL)
|
||||
PG_RMGR(RM_CLOG_ID, "CLOG", clog_redo, clog_desc, clog_identify, NULL, NULL)
|
||||
PG_RMGR(RM_DBASE_ID, "Database", dbase_redo, dbase_desc, dbase_identify, NULL, NULL)
|
||||
PG_RMGR(RM_TBLSPC_ID, "Tablespace", tblspc_redo, tblspc_desc, tblspc_identify, NULL, NULL)
|
||||
PG_RMGR(RM_MULTIXACT_ID, "MultiXact", multixact_redo, multixact_desc, multixact_identify, NULL, NULL)
|
||||
PG_RMGR(RM_RELMAP_ID, "RelMap", relmap_redo, relmap_desc, relmap_identify, NULL, NULL)
|
||||
PG_RMGR(RM_STANDBY_ID, "Standby", standby_redo, standby_desc, standby_identify, NULL, NULL)
|
||||
PG_RMGR(RM_HEAP2_ID, "Heap2", heap2_redo, heap2_desc, heap2_identify, NULL, NULL)
|
||||
PG_RMGR(RM_HEAP_ID, "Heap", heap_redo, heap_desc, heap_identify, NULL, NULL)
|
||||
PG_RMGR(RM_BTREE_ID, "Btree", btree_redo, btree_desc, btree_identify, NULL, NULL)
|
||||
PG_RMGR(RM_HASH_ID, "Hash", hash_redo, hash_desc, hash_identify, NULL, NULL)
|
||||
PG_RMGR(RM_GIN_ID, "Gin", gin_redo, gin_desc, gin_identify, gin_xlog_startup, gin_xlog_cleanup)
|
||||
PG_RMGR(RM_GIST_ID, "Gist", gist_redo, gist_desc, gist_identify, gist_xlog_startup, gist_xlog_cleanup)
|
||||
PG_RMGR(RM_SEQ_ID, "Sequence", seq_redo, seq_desc, seq_identify, NULL, NULL)
|
||||
PG_RMGR(RM_SPGIST_ID, "SPGist", spg_redo, spg_desc, spg_identify, spg_xlog_startup, spg_xlog_cleanup)
|
||||
PG_RMGR(RM_BRIN_ID, "BRIN", brin_redo, brin_desc, brin_identify, NULL, NULL)
|
||||
PG_RMGR(RM_COMMIT_TS_ID, "CommitTs", commit_ts_redo, commit_ts_desc, commit_ts_identify, NULL, NULL)
|
||||
PG_RMGR(RM_REPLORIGIN_ID, "ReplicationOrigin", replorigin_redo, replorigin_desc, replorigin_identify, NULL, NULL)
|
||||
PG_RMGR(RM_GENERIC_ID, "Generic", generic_redo, generic_desc, generic_identify, NULL, NULL)
|
||||
PG_RMGR(RM_LOGICALMSG_ID, "LogicalMessage", logicalmsg_redo, logicalmsg_desc, logicalmsg_identify, NULL, NULL)
|
||||
PG_RMGR(RM_XLOG_ID, "XLOG", xlog_redo, xlog_desc, xlog_identify, NULL, NULL, NULL)
|
||||
PG_RMGR(RM_XACT_ID, "Transaction", xact_redo, xact_desc, xact_identify, NULL, NULL, NULL)
|
||||
PG_RMGR(RM_SMGR_ID, "Storage", smgr_redo, smgr_desc, smgr_identify, NULL, NULL, NULL)
|
||||
PG_RMGR(RM_CLOG_ID, "CLOG", clog_redo, clog_desc, clog_identify, NULL, NULL, NULL)
|
||||
PG_RMGR(RM_DBASE_ID, "Database", dbase_redo, dbase_desc, dbase_identify, NULL, NULL, NULL)
|
||||
PG_RMGR(RM_TBLSPC_ID, "Tablespace", tblspc_redo, tblspc_desc, tblspc_identify, NULL, NULL, NULL)
|
||||
PG_RMGR(RM_MULTIXACT_ID, "MultiXact", multixact_redo, multixact_desc, multixact_identify, NULL, NULL, NULL)
|
||||
PG_RMGR(RM_RELMAP_ID, "RelMap", relmap_redo, relmap_desc, relmap_identify, NULL, NULL, NULL)
|
||||
PG_RMGR(RM_STANDBY_ID, "Standby", standby_redo, standby_desc, standby_identify, NULL, NULL, NULL)
|
||||
PG_RMGR(RM_HEAP2_ID, "Heap2", heap2_redo, heap2_desc, heap2_identify, NULL, NULL, heap_mask)
|
||||
PG_RMGR(RM_HEAP_ID, "Heap", heap_redo, heap_desc, heap_identify, NULL, NULL, heap_mask)
|
||||
PG_RMGR(RM_BTREE_ID, "Btree", btree_redo, btree_desc, btree_identify, NULL, NULL, btree_mask)
|
||||
PG_RMGR(RM_HASH_ID, "Hash", hash_redo, hash_desc, hash_identify, NULL, NULL, NULL)
|
||||
PG_RMGR(RM_GIN_ID, "Gin", gin_redo, gin_desc, gin_identify, gin_xlog_startup, gin_xlog_cleanup, gin_mask)
|
||||
PG_RMGR(RM_GIST_ID, "Gist", gist_redo, gist_desc, gist_identify, gist_xlog_startup, gist_xlog_cleanup, gist_mask)
|
||||
PG_RMGR(RM_SEQ_ID, "Sequence", seq_redo, seq_desc, seq_identify, NULL, NULL, seq_mask)
|
||||
PG_RMGR(RM_SPGIST_ID, "SPGist", spg_redo, spg_desc, spg_identify, spg_xlog_startup, spg_xlog_cleanup, spg_mask)
|
||||
PG_RMGR(RM_BRIN_ID, "BRIN", brin_redo, brin_desc, brin_identify, NULL, NULL, brin_mask)
|
||||
PG_RMGR(RM_COMMIT_TS_ID, "CommitTs", commit_ts_redo, commit_ts_desc, commit_ts_identify, NULL, NULL, NULL)
|
||||
PG_RMGR(RM_REPLORIGIN_ID, "ReplicationOrigin", replorigin_redo, replorigin_desc, replorigin_identify, NULL, NULL, NULL)
|
||||
PG_RMGR(RM_GENERIC_ID, "Generic", generic_redo, generic_desc, generic_identify, NULL, NULL, generic_mask)
|
||||
PG_RMGR(RM_LOGICALMSG_ID, "LogicalMessage", logicalmsg_redo, logicalmsg_desc, logicalmsg_identify, NULL, NULL, NULL)
|
||||
|
||||
@@ -219,5 +219,6 @@ extern void spg_desc(StringInfo buf, XLogReaderState *record);
|
||||
extern const char *spg_identify(uint8 info);
|
||||
extern void spg_xlog_startup(void);
|
||||
extern void spg_xlog_cleanup(void);
|
||||
extern void spg_mask(char *pagedata, BlockNumber blkno);
|
||||
|
||||
#endif /* SPGIST_H */
|
||||
|
||||
@@ -105,6 +105,8 @@ extern bool EnableHotStandby;
|
||||
extern bool fullPageWrites;
|
||||
extern bool wal_log_hints;
|
||||
extern bool wal_compression;
|
||||
extern bool *wal_consistency_checking;
|
||||
extern char *wal_consistency_checking_string;
|
||||
extern bool log_checkpoints;
|
||||
|
||||
extern int CheckPointSegments;
|
||||
|
||||
@@ -31,7 +31,7 @@
|
||||
/*
|
||||
* Each page of XLOG file has a header like this:
|
||||
*/
|
||||
#define XLOG_PAGE_MAGIC 0xD094 /* can be used as WAL version indicator */
|
||||
#define XLOG_PAGE_MAGIC 0xD095 /* can be used as WAL version indicator */
|
||||
|
||||
typedef struct XLogPageHeaderData
|
||||
{
|
||||
@@ -266,6 +266,9 @@ typedef enum
|
||||
* "VACUUM". rm_desc can then be called to obtain additional detail for the
|
||||
* record, if available (e.g. the last block).
|
||||
*
|
||||
* rm_mask takes as input a page modified by the resource manager and masks
|
||||
* out bits that shouldn't be flagged by wal_consistency_checking.
|
||||
*
|
||||
* RmgrTable[] is indexed by RmgrId values (see rmgrlist.h).
|
||||
*/
|
||||
typedef struct RmgrData
|
||||
@@ -276,6 +279,7 @@ typedef struct RmgrData
|
||||
const char *(*rm_identify) (uint8 info);
|
||||
void (*rm_startup) (void);
|
||||
void (*rm_cleanup) (void);
|
||||
void (*rm_mask) (char *pagedata, BlockNumber blkno);
|
||||
} RmgrData;
|
||||
|
||||
extern const RmgrData RmgrTable[];
|
||||
|
||||
@@ -51,7 +51,8 @@ typedef struct
|
||||
uint8 flags;
|
||||
|
||||
/* Information on full-page image, if any */
|
||||
bool has_image;
|
||||
bool has_image; /* has image, even for consistency checking */
|
||||
bool apply_image; /* has image that should be restored */
|
||||
char *bkp_image;
|
||||
uint16 hole_offset;
|
||||
uint16 hole_length;
|
||||
@@ -205,6 +206,8 @@ extern bool DecodeXLogRecord(XLogReaderState *state, XLogRecord *record,
|
||||
((decoder)->blocks[block_id].in_use)
|
||||
#define XLogRecHasBlockImage(decoder, block_id) \
|
||||
((decoder)->blocks[block_id].has_image)
|
||||
#define XLogRecBlockImageApply(decoder, block_id) \
|
||||
((decoder)->blocks[block_id].apply_image)
|
||||
|
||||
extern bool RestoreBlockImage(XLogReaderState *recoder, uint8 block_id, char *dst);
|
||||
extern char *XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len);
|
||||
|
||||
@@ -56,8 +56,8 @@ typedef struct XLogRecord
|
||||
|
||||
/*
|
||||
* The high 4 bits in xl_info may be used freely by rmgr. The
|
||||
* XLR_SPECIAL_REL_UPDATE bit can be passed by XLogInsert caller. The rest
|
||||
* are set internally by XLogInsert.
|
||||
* XLR_SPECIAL_REL_UPDATE and XLR_CHECK_CONSISTENCY bits can be passed by
|
||||
* XLogInsert caller. The rest are set internally by XLogInsert.
|
||||
*/
|
||||
#define XLR_INFO_MASK 0x0F
|
||||
#define XLR_RMGR_INFO_MASK 0xF0
|
||||
@@ -70,6 +70,15 @@ typedef struct XLogRecord
|
||||
*/
|
||||
#define XLR_SPECIAL_REL_UPDATE 0x01
|
||||
|
||||
/*
|
||||
* Enforces consistency checks of replayed WAL at recovery. If enabled,
|
||||
* each record will log a full-page write for each block modified by the
|
||||
* record and will reuse it afterwards for consistency checks. The caller
|
||||
* of XLogInsert can use this value if necessary, but if
|
||||
* wal_consistency_checking is enabled for a rmgr this is set unconditionally.
|
||||
*/
|
||||
#define XLR_CHECK_CONSISTENCY 0x02
|
||||
|
||||
/*
|
||||
* Header info for block data appended to an XLOG record.
|
||||
*
|
||||
@@ -137,6 +146,7 @@ typedef struct XLogRecordBlockImageHeader
|
||||
/* Information stored in bimg_info */
|
||||
#define BKPIMAGE_HAS_HOLE 0x01 /* page image has "hole" */
|
||||
#define BKPIMAGE_IS_COMPRESSED 0x02 /* page image is compressed */
|
||||
#define BKPIMAGE_APPLY 0x04 /* page image should be restored during replay */
|
||||
|
||||
/*
|
||||
* Extra header information used when page image has "hole" and
|
||||
|
||||
Reference in New Issue
Block a user