mirror of
https://github.com/postgres/postgres.git
synced 2025-11-01 21:31:19 +03:00
Currently, for large transactions, the publisher sends the data in multiple streams (changes divided into chunks depending upon logical_decoding_work_mem), and then on the subscriber-side, the apply worker writes the changes into temporary files and once it receives the commit, it reads from those files and applies the entire transaction. To improve the performance of such transactions, we can instead allow them to be applied via parallel workers. In this approach, we assign a new parallel apply worker (if available) as soon as the xact's first stream is received and the leader apply worker will send changes to this new worker via shared memory. The parallel apply worker will directly apply the change instead of writing it to temporary files. However, if the leader apply worker times out while attempting to send a message to the parallel apply worker, it will switch to "partial serialize" mode - in this mode, the leader serializes all remaining changes to a file and notifies the parallel apply workers to read and apply them at the end of the transaction. We use a non-blocking way to send the messages from the leader apply worker to the parallel apply to avoid deadlocks. We keep this parallel apply assigned till the transaction commit is received and also wait for the worker to finish at commit. This preserves commit ordering and avoid writing to and reading from files in most cases. We still need to spill if there is no worker available. This patch also extends the SUBSCRIPTION 'streaming' parameter so that the user can control whether to apply the streaming transaction in a parallel apply worker or spill the change to disk. The user can set the streaming parameter to 'on/off', or 'parallel'. The parameter value 'parallel' means the streaming will be applied via a parallel apply worker, if available. The parameter value 'on' means the streaming transaction will be spilled to disk. The default value is 'off' (same as current behaviour). In addition, the patch extends the logical replication STREAM_ABORT message so that abort_lsn and abort_time can also be sent which can be used to update the replication origin in parallel apply worker when the streaming transaction is aborted. Because this message extension is needed to support parallel streaming, parallel streaming is not supported for publications on servers < PG16. Author: Hou Zhijie, Wang wei, Amit Kapila with design inputs from Sawada Masahiko Reviewed-by: Sawada Masahiko, Peter Smith, Dilip Kumar, Shi yu, Kuroda Hayato, Shveta Mallik Discussion: https://postgr.es/m/CAA4eK1+wyN6zpaHUkCLorEWNx75MG0xhMwcFhvjqm2KURZEAGw@mail.gmail.com
74 lines
2.4 KiB
C
74 lines
2.4 KiB
C
/*-------------------------------------------------------------------------
|
|
* origin.h
|
|
* Exports from replication/logical/origin.c
|
|
*
|
|
* Copyright (c) 2013-2023, PostgreSQL Global Development Group
|
|
*
|
|
* src/include/replication/origin.h
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
#ifndef PG_ORIGIN_H
|
|
#define PG_ORIGIN_H
|
|
|
|
#include "access/xlog.h"
|
|
#include "access/xlogdefs.h"
|
|
#include "access/xlogreader.h"
|
|
#include "catalog/pg_replication_origin.h"
|
|
|
|
typedef struct xl_replorigin_set
|
|
{
|
|
XLogRecPtr remote_lsn;
|
|
RepOriginId node_id;
|
|
bool force;
|
|
} xl_replorigin_set;
|
|
|
|
typedef struct xl_replorigin_drop
|
|
{
|
|
RepOriginId node_id;
|
|
} xl_replorigin_drop;
|
|
|
|
#define XLOG_REPLORIGIN_SET 0x00
|
|
#define XLOG_REPLORIGIN_DROP 0x10
|
|
|
|
#define InvalidRepOriginId 0
|
|
#define DoNotReplicateId PG_UINT16_MAX
|
|
|
|
extern PGDLLIMPORT RepOriginId replorigin_session_origin;
|
|
extern PGDLLIMPORT XLogRecPtr replorigin_session_origin_lsn;
|
|
extern PGDLLIMPORT TimestampTz replorigin_session_origin_timestamp;
|
|
|
|
/* API for querying & manipulating replication origins */
|
|
extern RepOriginId replorigin_by_name(const char *roname, bool missing_ok);
|
|
extern RepOriginId replorigin_create(const char *roname);
|
|
extern void replorigin_drop_by_name(const char *name, bool missing_ok, bool nowait);
|
|
extern bool replorigin_by_oid(RepOriginId roident, bool missing_ok,
|
|
char **roname);
|
|
|
|
/* API for querying & manipulating replication progress tracking */
|
|
extern void replorigin_advance(RepOriginId node,
|
|
XLogRecPtr remote_commit,
|
|
XLogRecPtr local_commit,
|
|
bool go_backward, bool wal_log);
|
|
extern XLogRecPtr replorigin_get_progress(RepOriginId node, bool flush);
|
|
|
|
extern void replorigin_session_advance(XLogRecPtr remote_commit,
|
|
XLogRecPtr local_commit);
|
|
extern void replorigin_session_setup(RepOriginId node, int acquired_by);
|
|
extern void replorigin_session_reset(void);
|
|
extern XLogRecPtr replorigin_session_get_progress(bool flush);
|
|
|
|
/* Checkpoint/Startup integration */
|
|
extern void CheckPointReplicationOrigin(void);
|
|
extern void StartupReplicationOrigin(void);
|
|
|
|
/* WAL logging */
|
|
extern void replorigin_redo(XLogReaderState *record);
|
|
extern void replorigin_desc(StringInfo buf, XLogReaderState *record);
|
|
extern const char *replorigin_identify(uint8 info);
|
|
|
|
/* shared memory allocation */
|
|
extern Size ReplicationOriginShmemSize(void);
|
|
extern void ReplicationOriginShmemInit(void);
|
|
|
|
#endif /* PG_ORIGIN_H */
|