mirror of
https://github.com/postgres/postgres.git
synced 2025-10-24 01:29:19 +03:00
Introduce replication progress tracking infrastructure.
When implementing a replication solution ontop of logical decoding, two
related problems exist:
* How to safely keep track of replication progress
* How to change replication behavior, based on the origin of a row;
e.g. to avoid loops in bi-directional replication setups
The solution to these problems, as implemented here, consist out of
three parts:
1) 'replication origins', which identify nodes in a replication setup.
2) 'replication progress tracking', which remembers, for each
replication origin, how far replay has progressed in a efficient and
crash safe manner.
3) The ability to filter out changes performed on the behest of a
replication origin during logical decoding; this allows complex
replication topologies. E.g. by filtering all replayed changes out.
Most of this could also be implemented in "userspace", e.g. by inserting
additional rows contain origin information, but that ends up being much
less efficient and more complicated. We don't want to require various
replication solutions to reimplement logic for this independently. The
infrastructure is intended to be generic enough to be reusable.
This infrastructure also replaces the 'nodeid' infrastructure of commit
timestamps. It is intended to provide all the former capabilities,
except that there's only 2^16 different origins; but now they integrate
with logical decoding. Additionally more functionality is accessible via
SQL. Since the commit timestamp infrastructure has also been introduced
in 9.5 (commit 73c986add) changing the API is not a problem.
For now the number of origins for which the replication progress can be
tracked simultaneously is determined by the max_replication_slots
GUC. That GUC is not a perfect match to configure this, but there
doesn't seem to be sufficient reason to introduce a separate new one.
Bumps both catversion and wal page magic.
Author: Andres Freund, with contributions from Petr Jelinek and Craig Ringer
Reviewed-By: Heikki Linnakangas, Petr Jelinek, Robert Haas, Steve Singer
Discussion: 20150216002155.GI15326@awork2.anarazel.de,
20140923182422.GA15776@alap3.anarazel.de,
20131114172632.GE7522@alap2.anarazel.de
This commit is contained in:
@@ -13,6 +13,7 @@
|
||||
|
||||
#include "access/xlog.h"
|
||||
#include "datatype/timestamp.h"
|
||||
#include "replication/origin.h"
|
||||
#include "utils/guc.h"
|
||||
|
||||
|
||||
@@ -21,18 +22,13 @@ extern PGDLLIMPORT bool track_commit_timestamp;
|
||||
extern bool check_track_commit_timestamp(bool *newval, void **extra,
|
||||
GucSource source);
|
||||
|
||||
typedef uint32 CommitTsNodeId;
|
||||
#define InvalidCommitTsNodeId 0
|
||||
|
||||
extern void CommitTsSetDefaultNodeId(CommitTsNodeId nodeid);
|
||||
extern CommitTsNodeId CommitTsGetDefaultNodeId(void);
|
||||
extern void TransactionTreeSetCommitTsData(TransactionId xid, int nsubxids,
|
||||
TransactionId *subxids, TimestampTz timestamp,
|
||||
CommitTsNodeId nodeid, bool do_xlog);
|
||||
RepOriginId nodeid, bool do_xlog);
|
||||
extern bool TransactionIdGetCommitTsData(TransactionId xid,
|
||||
TimestampTz *ts, CommitTsNodeId *nodeid);
|
||||
TimestampTz *ts, RepOriginId *nodeid);
|
||||
extern TransactionId GetLatestCommitTsData(TimestampTz *ts,
|
||||
CommitTsNodeId *nodeid);
|
||||
RepOriginId *nodeid);
|
||||
|
||||
extern Size CommitTsShmemBuffers(void);
|
||||
extern Size CommitTsShmemSize(void);
|
||||
@@ -58,7 +54,7 @@ extern void AdvanceOldestCommitTs(TransactionId oldestXact);
|
||||
typedef struct xl_commit_ts_set
|
||||
{
|
||||
TimestampTz timestamp;
|
||||
CommitTsNodeId nodeid;
|
||||
RepOriginId nodeid;
|
||||
TransactionId mainxid;
|
||||
/* subxact Xids follow */
|
||||
} xl_commit_ts_set;
|
||||
|
||||
@@ -44,3 +44,4 @@ PG_RMGR(RM_SEQ_ID, "Sequence", seq_redo, seq_desc, seq_identify, NULL, NULL)
|
||||
PG_RMGR(RM_SPGIST_ID, "SPGist", spg_redo, spg_desc, spg_identify, spg_xlog_startup, spg_xlog_cleanup)
|
||||
PG_RMGR(RM_BRIN_ID, "BRIN", brin_redo, brin_desc, brin_identify, NULL, NULL)
|
||||
PG_RMGR(RM_COMMIT_TS_ID, "CommitTs", commit_ts_redo, commit_ts_desc, commit_ts_identify, NULL, NULL)
|
||||
PG_RMGR(RM_REPLORIGIN_ID, "ReplicationOrigin", replorigin_redo, replorigin_desc, replorigin_identify, NULL, NULL)
|
||||
|
||||
@@ -131,6 +131,7 @@ typedef void (*SubXactCallback) (SubXactEvent event, SubTransactionId mySubid,
|
||||
#define XACT_XINFO_HAS_RELFILENODES (1U << 2)
|
||||
#define XACT_XINFO_HAS_INVALS (1U << 3)
|
||||
#define XACT_XINFO_HAS_TWOPHASE (1U << 4)
|
||||
#define XACT_XINFO_HAS_ORIGIN (1U << 5)
|
||||
|
||||
/*
|
||||
* Also stored in xinfo, these indicating a variety of additional actions that
|
||||
@@ -217,6 +218,12 @@ typedef struct xl_xact_twophase
|
||||
} xl_xact_twophase;
|
||||
#define MinSizeOfXactInvals offsetof(xl_xact_invals, msgs)
|
||||
|
||||
typedef struct xl_xact_origin
|
||||
{
|
||||
XLogRecPtr origin_lsn;
|
||||
TimestampTz origin_timestamp;
|
||||
} xl_xact_origin;
|
||||
|
||||
typedef struct xl_xact_commit
|
||||
{
|
||||
TimestampTz xact_time; /* time of commit */
|
||||
@@ -227,6 +234,7 @@ typedef struct xl_xact_commit
|
||||
/* xl_xact_relfilenodes follows if XINFO_HAS_RELFILENODES */
|
||||
/* xl_xact_invals follows if XINFO_HAS_INVALS */
|
||||
/* xl_xact_twophase follows if XINFO_HAS_TWOPHASE */
|
||||
/* xl_xact_origin follows if XINFO_HAS_ORIGIN */
|
||||
} xl_xact_commit;
|
||||
#define MinSizeOfXactCommit (offsetof(xl_xact_commit, xact_time) + sizeof(TimestampTz))
|
||||
|
||||
@@ -267,6 +275,9 @@ typedef struct xl_xact_parsed_commit
|
||||
SharedInvalidationMessage *msgs;
|
||||
|
||||
TransactionId twophase_xid; /* only for 2PC */
|
||||
|
||||
XLogRecPtr origin_lsn;
|
||||
TimestampTz origin_timestamp;
|
||||
} xl_xact_parsed_commit;
|
||||
|
||||
typedef struct xl_xact_parsed_abort
|
||||
|
||||
@@ -85,6 +85,7 @@ typedef enum
|
||||
} RecoveryTargetType;
|
||||
|
||||
extern XLogRecPtr XactLastRecEnd;
|
||||
extern PGDLLIMPORT XLogRecPtr XactLastCommitEnd;
|
||||
|
||||
extern bool reachedConsistency;
|
||||
|
||||
|
||||
@@ -31,7 +31,7 @@
|
||||
/*
|
||||
* Each page of XLOG file has a header like this:
|
||||
*/
|
||||
#define XLOG_PAGE_MAGIC 0xD083 /* can be used as WAL version indicator */
|
||||
#define XLOG_PAGE_MAGIC 0xD085 /* can be used as WAL version indicator */
|
||||
|
||||
typedef struct XLogPageHeaderData
|
||||
{
|
||||
|
||||
@@ -44,6 +44,12 @@ typedef uint64 XLogSegNo;
|
||||
*/
|
||||
typedef uint32 TimeLineID;
|
||||
|
||||
/*
|
||||
* Replication origin id - this is located in this file to avoid having to
|
||||
* include origin.h in a bunch of xlog related places.
|
||||
*/
|
||||
typedef uint16 RepOriginId;
|
||||
|
||||
/*
|
||||
* Because O_DIRECT bypasses the kernel buffers, and because we never
|
||||
* read those buffers except during crash recovery or if wal_level != minimal,
|
||||
|
||||
@@ -39,6 +39,7 @@
|
||||
|
||||
/* prototypes for public functions in xloginsert.c: */
|
||||
extern void XLogBeginInsert(void);
|
||||
extern void XLogIncludeOrigin(void);
|
||||
extern XLogRecPtr XLogInsert(RmgrId rmid, uint8 info);
|
||||
extern void XLogEnsureRecordSpace(int nbuffers, int ndatas);
|
||||
extern void XLogRegisterData(char *data, int len);
|
||||
|
||||
@@ -127,6 +127,8 @@ struct XLogReaderState
|
||||
uint32 main_data_len; /* main data portion's length */
|
||||
uint32 main_data_bufsz; /* allocated size of the buffer */
|
||||
|
||||
RepOriginId record_origin;
|
||||
|
||||
/* information about blocks referenced by the record. */
|
||||
DecodedBkpBlock blocks[XLR_MAX_BLOCK_ID + 1];
|
||||
|
||||
@@ -186,6 +188,7 @@ extern bool DecodeXLogRecord(XLogReaderState *state, XLogRecord *record,
|
||||
#define XLogRecGetInfo(decoder) ((decoder)->decoded_record->xl_info)
|
||||
#define XLogRecGetRmid(decoder) ((decoder)->decoded_record->xl_rmid)
|
||||
#define XLogRecGetXid(decoder) ((decoder)->decoded_record->xl_xid)
|
||||
#define XLogRecGetOrigin(decoder) ((decoder)->record_origin)
|
||||
#define XLogRecGetData(decoder) ((decoder)->main_data)
|
||||
#define XLogRecGetDataLen(decoder) ((decoder)->main_data_len)
|
||||
#define XLogRecHasAnyBlockRefs(decoder) ((decoder)->max_block_id >= 0)
|
||||
|
||||
@@ -212,5 +212,6 @@ typedef struct XLogRecordDataHeaderLong
|
||||
|
||||
#define XLR_BLOCK_ID_DATA_SHORT 255
|
||||
#define XLR_BLOCK_ID_DATA_LONG 254
|
||||
#define XLR_BLOCK_ID_ORIGIN 253
|
||||
|
||||
#endif /* XLOGRECORD_H */
|
||||
|
||||
Reference in New Issue
Block a user