mirror of
https://github.com/postgres/postgres.git
synced 2025-09-02 04:21:28 +03:00
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating if the timeline in the primary doesn't exactly match the standby. The situation where it doesn't match is when you have a master, and two standbys, and you promote one of the standbys to become new master. Promoting bumps up the timeline ID, and after that bump, the other standby would refuse to continue. There's significantly more timeline related logic in streaming replication now. First of all, when a standby connects to primary, it will ask the primary for any timeline history files that are missing from the standby. The missing files are sent using a new replication command TIMELINE_HISTORY, and stored in standby's pg_xlog directory. Using the timeline history files, the standby can follow the latest timeline present in the primary (recovery_target_timeline='latest'), just as it can follow new timelines appearing in an archive directory. START_REPLICATION now takes a TIMELINE parameter, to specify exactly which timeline to stream WAL from. This allows the standby to request the primary to send over WAL that precedes the promotion. The replication protocol is changed slightly (in a backwards-compatible way although there's little hope of streaming replication working across major versions anyway), to allow replication to stop when the end of timeline reached, putting the walsender back into accepting a replication command. Many thanks to Amit Kapila for testing and reviewing various versions of this patch.
This commit is contained in:
@@ -34,6 +34,7 @@ extern bool existsTimeLineHistory(TimeLineID probeTLI);
|
||||
extern TimeLineID findNewestTimeLine(TimeLineID startTLI);
|
||||
extern void writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
|
||||
XLogRecPtr switchpoint, char *reason);
|
||||
extern void writeTimeLineHistoryFile(TimeLineID tli, char *content, int size);
|
||||
extern bool tliInHistory(TimeLineID tli, List *expectedTLIs);
|
||||
extern TimeLineID tliOfPointInHistory(XLogRecPtr ptr, List *history);
|
||||
extern XLogRecPtr tliSwitchPoint(TimeLineID tli, List *history);
|
||||
|
@@ -283,8 +283,8 @@ extern bool RecoveryInProgress(void);
|
||||
extern bool HotStandbyActive(void);
|
||||
extern bool XLogInsertAllowed(void);
|
||||
extern void GetXLogReceiptTime(TimestampTz *rtime, bool *fromStream);
|
||||
extern XLogRecPtr GetXLogReplayRecPtr(TimeLineID *targetTLI);
|
||||
extern XLogRecPtr GetStandbyFlushRecPtr(TimeLineID *targetTLI);
|
||||
extern XLogRecPtr GetXLogReplayRecPtr(void);
|
||||
extern XLogRecPtr GetStandbyFlushRecPtr(void);
|
||||
extern XLogRecPtr GetXLogInsertRecPtr(void);
|
||||
extern XLogRecPtr GetXLogWriteRecPtr(void);
|
||||
extern bool RecoveryIsPaused(void);
|
||||
|
@@ -407,6 +407,7 @@ typedef enum NodeTag
|
||||
T_IdentifySystemCmd,
|
||||
T_BaseBackupCmd,
|
||||
T_StartReplicationCmd,
|
||||
T_TimeLineHistoryCmd,
|
||||
|
||||
/*
|
||||
* TAGS FOR RANDOM OTHER STUFF
|
||||
|
@@ -46,7 +46,19 @@ typedef struct BaseBackupCmd
|
||||
typedef struct StartReplicationCmd
|
||||
{
|
||||
NodeTag type;
|
||||
TimeLineID timeline;
|
||||
XLogRecPtr startpoint;
|
||||
} StartReplicationCmd;
|
||||
|
||||
|
||||
/* ----------------------
|
||||
* TIMELINE_HISTORY command
|
||||
* ----------------------
|
||||
*/
|
||||
typedef struct TimeLineHistoryCmd
|
||||
{
|
||||
NodeTag type;
|
||||
TimeLineID timeline;
|
||||
} TimeLineHistoryCmd;
|
||||
|
||||
#endif /* REPLNODES_H */
|
||||
|
@@ -14,6 +14,7 @@
|
||||
|
||||
#include "access/xlog.h"
|
||||
#include "access/xlogdefs.h"
|
||||
#include "storage/latch.h"
|
||||
#include "storage/spin.h"
|
||||
#include "pgtime.h"
|
||||
|
||||
@@ -40,7 +41,9 @@ typedef enum
|
||||
WALRCV_STOPPED, /* stopped and mustn't start up again */
|
||||
WALRCV_STARTING, /* launched, but the process hasn't
|
||||
* initialized yet */
|
||||
WALRCV_RUNNING, /* walreceiver is running */
|
||||
WALRCV_STREAMING, /* walreceiver is streaming */
|
||||
WALRCV_WAITING, /* stopped streaming, waiting for orders */
|
||||
WALRCV_RESTARTING, /* asked to restart streaming */
|
||||
WALRCV_STOPPING /* requested to stop, but still running */
|
||||
} WalRcvState;
|
||||
|
||||
@@ -57,19 +60,23 @@ typedef struct
|
||||
pg_time_t startTime;
|
||||
|
||||
/*
|
||||
* receiveStart is the first byte position that will be received. When
|
||||
* startup process starts the walreceiver, it sets receiveStart to the
|
||||
* point where it wants the streaming to begin.
|
||||
* receiveStart and receiveStartTLI indicate the first byte position
|
||||
* and timeline that will be received. When startup process starts the
|
||||
* walreceiver, it sets these to the point where it wants the streaming
|
||||
* to begin.
|
||||
*/
|
||||
XLogRecPtr receiveStart;
|
||||
TimeLineID receiveStartTLI;
|
||||
|
||||
/*
|
||||
* receivedUpto-1 is the last byte position that has already been
|
||||
* received. At the first startup of walreceiver, receivedUpto is set to
|
||||
* receiveStart. After that, walreceiver updates this whenever it flushes
|
||||
* the received WAL to disk.
|
||||
* received, and receivedTLI is the timeline it came from. At the first
|
||||
* startup of walreceiver, these are set to receiveStart and
|
||||
* receiveStartTLI. After that, walreceiver updates these whenever it
|
||||
* flushes the received WAL to disk.
|
||||
*/
|
||||
XLogRecPtr receivedUpto;
|
||||
TimeLineID receivedTLI;
|
||||
|
||||
/*
|
||||
* latestChunkStart is the starting byte position of the current "batch"
|
||||
@@ -97,16 +104,34 @@ typedef struct
|
||||
char conninfo[MAXCONNINFO];
|
||||
|
||||
slock_t mutex; /* locks shared variables shown above */
|
||||
|
||||
/*
|
||||
* Latch used by startup process to wake up walreceiver after telling it
|
||||
* where to start streaming (after setting receiveStart and
|
||||
* receiveStartTLI).
|
||||
*/
|
||||
Latch latch;
|
||||
} WalRcvData;
|
||||
|
||||
extern WalRcvData *WalRcv;
|
||||
|
||||
/* libpqwalreceiver hooks */
|
||||
typedef bool (*walrcv_connect_type) (char *conninfo, XLogRecPtr startpoint);
|
||||
typedef void (*walrcv_connect_type) (char *conninfo);
|
||||
extern PGDLLIMPORT walrcv_connect_type walrcv_connect;
|
||||
|
||||
typedef bool (*walrcv_receive_type) (int timeout, unsigned char *type,
|
||||
char **buffer, int *len);
|
||||
typedef void (*walrcv_identify_system_type) (TimeLineID *primary_tli);
|
||||
extern PGDLLIMPORT walrcv_identify_system_type walrcv_identify_system;
|
||||
|
||||
typedef void (*walrcv_readtimelinehistoryfile_type) (TimeLineID tli, char **filename, char **content, int *size);
|
||||
extern PGDLLIMPORT walrcv_readtimelinehistoryfile_type walrcv_readtimelinehistoryfile;
|
||||
|
||||
typedef bool (*walrcv_startstreaming_type) (TimeLineID tli, XLogRecPtr startpoint);
|
||||
extern PGDLLIMPORT walrcv_startstreaming_type walrcv_startstreaming;
|
||||
|
||||
typedef void (*walrcv_endstreaming_type) (void);
|
||||
extern PGDLLIMPORT walrcv_endstreaming_type walrcv_endstreaming;
|
||||
|
||||
typedef int (*walrcv_receive_type) (int timeout, char **buffer);
|
||||
extern PGDLLIMPORT walrcv_receive_type walrcv_receive;
|
||||
|
||||
typedef void (*walrcv_send_type) (const char *buffer, int nbytes);
|
||||
@@ -122,9 +147,10 @@ extern void WalReceiverMain(void) __attribute__((noreturn));
|
||||
extern Size WalRcvShmemSize(void);
|
||||
extern void WalRcvShmemInit(void);
|
||||
extern void ShutdownWalRcv(void);
|
||||
extern bool WalRcvInProgress(void);
|
||||
extern void RequestXLogStreaming(XLogRecPtr recptr, const char *conninfo);
|
||||
extern XLogRecPtr GetWalRcvWriteRecPtr(XLogRecPtr *latestChunkStart);
|
||||
extern bool WalRcvStreaming(void);
|
||||
extern bool WalRcvRunning(void);
|
||||
extern void RequestXLogStreaming(TimeLineID tli, XLogRecPtr recptr, const char *conninfo);
|
||||
extern XLogRecPtr GetWalRcvWriteRecPtr(XLogRecPtr *latestChunkStart, TimeLineID *receiveTLI);
|
||||
extern int GetReplicationApplyDelay(void);
|
||||
extern int GetReplicationTransferLatency(void);
|
||||
|
||||
|
@@ -19,7 +19,6 @@
|
||||
/* global state */
|
||||
extern bool am_walsender;
|
||||
extern bool am_cascading_walsender;
|
||||
extern volatile sig_atomic_t walsender_ready_to_stop;
|
||||
extern bool wake_wal_senders;
|
||||
|
||||
/* user-settable parameters */
|
||||
|
@@ -95,7 +95,7 @@ extern WalSndCtlData *WalSndCtl;
|
||||
|
||||
|
||||
extern void WalSndSetState(WalSndState state);
|
||||
extern void XLogRead(char *buf, XLogRecPtr startptr, Size count);
|
||||
extern void XLogRead(char *buf, TimeLineID tli, XLogRecPtr startptr, Size count);
|
||||
|
||||
/*
|
||||
* Internal functions for parsing the replication grammar, in repl_gram.y and
|
||||
|
Reference in New Issue
Block a user