1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-09 06:21:09 +03:00

Perform apply of large transactions by parallel workers.

Currently, for large transactions, the publisher sends the data in
multiple streams (changes divided into chunks depending upon
logical_decoding_work_mem), and then on the subscriber-side, the apply
worker writes the changes into temporary files and once it receives the
commit, it reads from those files and applies the entire transaction. To
improve the performance of such transactions, we can instead allow them to
be applied via parallel workers.

In this approach, we assign a new parallel apply worker (if available) as
soon as the xact's first stream is received and the leader apply worker
will send changes to this new worker via shared memory. The parallel apply
worker will directly apply the change instead of writing it to temporary
files. However, if the leader apply worker times out while attempting to
send a message to the parallel apply worker, it will switch to
"partial serialize" mode -  in this mode, the leader serializes all
remaining changes to a file and notifies the parallel apply workers to
read and apply them at the end of the transaction. We use a non-blocking
way to send the messages from the leader apply worker to the parallel
apply to avoid deadlocks. We keep this parallel apply assigned till the
transaction commit is received and also wait for the worker to finish at
commit. This preserves commit ordering and avoid writing to and reading
from files in most cases. We still need to spill if there is no worker
available.

This patch also extends the SUBSCRIPTION 'streaming' parameter so that the
user can control whether to apply the streaming transaction in a parallel
apply worker or spill the change to disk. The user can set the streaming
parameter to 'on/off', or 'parallel'. The parameter value 'parallel' means
the streaming will be applied via a parallel apply worker, if available.
The parameter value 'on' means the streaming transaction will be spilled
to disk. The default value is 'off' (same as current behaviour).

In addition, the patch extends the logical replication STREAM_ABORT
message so that abort_lsn and abort_time can also be sent which can be
used to update the replication origin in parallel apply worker when the
streaming transaction is aborted. Because this message extension is needed
to support parallel streaming, parallel streaming is not supported for
publications on servers < PG16.

Author: Hou Zhijie, Wang wei, Amit Kapila with design inputs from Sawada Masahiko
Reviewed-by: Sawada Masahiko, Peter Smith, Dilip Kumar, Shi yu, Kuroda Hayato, Shveta Mallik
Discussion: https://postgr.es/m/CAA4eK1+wyN6zpaHUkCLorEWNx75MG0xhMwcFhvjqm2KURZEAGw@mail.gmail.com
This commit is contained in:
Amit Kapila
2023-01-09 07:00:39 +05:30
parent 5687e7810f
commit 216a784829
58 changed files with 4497 additions and 745 deletions

View File

@@ -443,9 +443,9 @@ libpqrcv_startstreaming(WalReceiverConn *conn,
appendStringInfo(&cmd, "proto_version '%u'",
options->proto.logical.proto_version);
if (options->proto.logical.streaming &&
PQserverVersion(conn->streamConn) >= 140000)
appendStringInfoString(&cmd, ", streaming 'on'");
if (options->proto.logical.streaming_str)
appendStringInfo(&cmd, ", streaming '%s'",
options->proto.logical.streaming_str);
if (options->proto.logical.twophase &&
PQserverVersion(conn->streamConn) >= 150000)

View File

@@ -15,6 +15,7 @@ include $(top_builddir)/src/Makefile.global
override CPPFLAGS := -I$(srcdir) $(CPPFLAGS)
OBJS = \
applyparallelworker.o \
decode.o \
launcher.o \
logical.o \

File diff suppressed because it is too large Load Diff

View File

@@ -822,10 +822,11 @@ DecodeAbort(LogicalDecodingContext *ctx, XLogRecordBuffer *buf,
for (i = 0; i < parsed->nsubxacts; i++)
{
ReorderBufferAbort(ctx->reorder, parsed->subxacts[i],
buf->record->EndRecPtr);
buf->record->EndRecPtr, abort_time);
}
ReorderBufferAbort(ctx->reorder, xid, buf->record->EndRecPtr);
ReorderBufferAbort(ctx->reorder, xid, buf->record->EndRecPtr,
abort_time);
}
/* update the decoding stats */

View File

@@ -55,6 +55,7 @@
/* GUC variables */
int max_logical_replication_workers = 4;
int max_sync_workers_per_subscription = 2;
int max_parallel_apply_workers_per_subscription = 2;
LogicalRepWorker *MyLogicalRepWorker = NULL;
@@ -74,6 +75,7 @@ static void logicalrep_launcher_onexit(int code, Datum arg);
static void logicalrep_worker_onexit(int code, Datum arg);
static void logicalrep_worker_detach(void);
static void logicalrep_worker_cleanup(LogicalRepWorker *worker);
static int logicalrep_pa_worker_count(Oid subid);
static bool on_commit_launcher_wakeup = false;
@@ -152,8 +154,10 @@ get_subscription_list(void)
*
* This is only needed for cleaning up the shared memory in case the worker
* fails to attach.
*
* Returns whether the attach was successful.
*/
static void
static bool
WaitForReplicationWorkerAttach(LogicalRepWorker *worker,
uint16 generation,
BackgroundWorkerHandle *handle)
@@ -169,11 +173,11 @@ WaitForReplicationWorkerAttach(LogicalRepWorker *worker,
LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
/* Worker either died or has started; no need to do anything. */
/* Worker either died or has started. Return false if died. */
if (!worker->in_use || worker->proc)
{
LWLockRelease(LogicalRepWorkerLock);
return;
return worker->in_use;
}
LWLockRelease(LogicalRepWorkerLock);
@@ -188,7 +192,7 @@ WaitForReplicationWorkerAttach(LogicalRepWorker *worker,
if (generation == worker->generation)
logicalrep_worker_cleanup(worker);
LWLockRelease(LogicalRepWorkerLock);
return;
return false;
}
/*
@@ -210,6 +214,8 @@ WaitForReplicationWorkerAttach(LogicalRepWorker *worker,
/*
* Walks the workers array and searches for one that matches given
* subscription id and relid.
*
* We are only interested in the leader apply worker or table sync worker.
*/
LogicalRepWorker *
logicalrep_worker_find(Oid subid, Oid relid, bool only_running)
@@ -224,6 +230,10 @@ logicalrep_worker_find(Oid subid, Oid relid, bool only_running)
{
LogicalRepWorker *w = &LogicalRepCtx->workers[i];
/* Skip parallel apply workers. */
if (isParallelApplyWorker(w))
continue;
if (w->in_use && w->subid == subid && w->relid == relid &&
(!only_running || w->proc))
{
@@ -260,11 +270,13 @@ logicalrep_workers_find(Oid subid, bool only_running)
}
/*
* Start new apply background worker, if possible.
* Start new logical replication background worker, if possible.
*
* Returns true on success, false on failure.
*/
void
bool
logicalrep_worker_launch(Oid dbid, Oid subid, const char *subname, Oid userid,
Oid relid)
Oid relid, dsm_handle subworker_dsm)
{
BackgroundWorker bgw;
BackgroundWorkerHandle *bgw_handle;
@@ -273,7 +285,12 @@ logicalrep_worker_launch(Oid dbid, Oid subid, const char *subname, Oid userid,
int slot = 0;
LogicalRepWorker *worker = NULL;
int nsyncworkers;
int nparallelapplyworkers;
TimestampTz now;
bool is_parallel_apply_worker = (subworker_dsm != DSM_HANDLE_INVALID);
/* Sanity check - tablesync worker cannot be a subworker */
Assert(!(is_parallel_apply_worker && OidIsValid(relid)));
ereport(DEBUG1,
(errmsg_internal("starting logical replication worker for subscription \"%s\"",
@@ -351,7 +368,20 @@ retry:
if (OidIsValid(relid) && nsyncworkers >= max_sync_workers_per_subscription)
{
LWLockRelease(LogicalRepWorkerLock);
return;
return false;
}
nparallelapplyworkers = logicalrep_pa_worker_count(subid);
/*
* Return false if the number of parallel apply workers reached the limit
* per subscription.
*/
if (is_parallel_apply_worker &&
nparallelapplyworkers >= max_parallel_apply_workers_per_subscription)
{
LWLockRelease(LogicalRepWorkerLock);
return false;
}
/*
@@ -365,7 +395,7 @@ retry:
(errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
errmsg("out of logical replication worker slots"),
errhint("You might need to increase max_logical_replication_workers.")));
return;
return false;
}
/* Prepare the worker slot. */
@@ -380,6 +410,8 @@ retry:
worker->relstate = SUBREL_STATE_UNKNOWN;
worker->relstate_lsn = InvalidXLogRecPtr;
worker->stream_fileset = NULL;
worker->apply_leader_pid = is_parallel_apply_worker ? MyProcPid : InvalidPid;
worker->parallel_apply = is_parallel_apply_worker;
worker->last_lsn = InvalidXLogRecPtr;
TIMESTAMP_NOBEGIN(worker->last_send_time);
TIMESTAMP_NOBEGIN(worker->last_recv_time);
@@ -397,19 +429,34 @@ retry:
BGWORKER_BACKEND_DATABASE_CONNECTION;
bgw.bgw_start_time = BgWorkerStart_RecoveryFinished;
snprintf(bgw.bgw_library_name, BGW_MAXLEN, "postgres");
snprintf(bgw.bgw_function_name, BGW_MAXLEN, "ApplyWorkerMain");
if (is_parallel_apply_worker)
snprintf(bgw.bgw_function_name, BGW_MAXLEN, "ParallelApplyWorkerMain");
else
snprintf(bgw.bgw_function_name, BGW_MAXLEN, "ApplyWorkerMain");
if (OidIsValid(relid))
snprintf(bgw.bgw_name, BGW_MAXLEN,
"logical replication worker for subscription %u sync %u", subid, relid);
else if (is_parallel_apply_worker)
snprintf(bgw.bgw_name, BGW_MAXLEN,
"logical replication parallel apply worker for subscription %u", subid);
else
snprintf(bgw.bgw_name, BGW_MAXLEN,
"logical replication worker for subscription %u", subid);
snprintf(bgw.bgw_type, BGW_MAXLEN, "logical replication worker");
"logical replication apply worker for subscription %u", subid);
if (is_parallel_apply_worker)
snprintf(bgw.bgw_type, BGW_MAXLEN, "logical replication parallel worker");
else
snprintf(bgw.bgw_type, BGW_MAXLEN, "logical replication worker");
bgw.bgw_restart_time = BGW_NEVER_RESTART;
bgw.bgw_notify_pid = MyProcPid;
bgw.bgw_main_arg = Int32GetDatum(slot);
if (is_parallel_apply_worker)
memcpy(bgw.bgw_extra, &subworker_dsm, sizeof(dsm_handle));
if (!RegisterDynamicBackgroundWorker(&bgw, &bgw_handle))
{
/* Failed to start worker, so clean up the worker slot. */
@@ -422,33 +469,23 @@ retry:
(errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
errmsg("out of background worker slots"),
errhint("You might need to increase max_worker_processes.")));
return;
return false;
}
/* Now wait until it attaches. */
WaitForReplicationWorkerAttach(worker, generation, bgw_handle);
return WaitForReplicationWorkerAttach(worker, generation, bgw_handle);
}
/*
* Stop the logical replication worker for subid/relid, if any, and wait until
* it detaches from the slot.
* Internal function to stop the worker and wait until it detaches from the
* slot.
*/
void
logicalrep_worker_stop(Oid subid, Oid relid)
static void
logicalrep_worker_stop_internal(LogicalRepWorker *worker, int signo)
{
LogicalRepWorker *worker;
uint16 generation;
LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
worker = logicalrep_worker_find(subid, relid, false);
/* No worker, nothing to do. */
if (!worker)
{
LWLockRelease(LogicalRepWorkerLock);
return;
}
Assert(LWLockHeldByMeInMode(LogicalRepWorkerLock, LW_SHARED));
/*
* Remember which generation was our worker so we can check if what we see
@@ -486,10 +523,7 @@ logicalrep_worker_stop(Oid subid, Oid relid)
* different, meaning that a different worker has taken the slot.
*/
if (!worker->in_use || worker->generation != generation)
{
LWLockRelease(LogicalRepWorkerLock);
return;
}
/* Worker has assigned proc, so it has started. */
if (worker->proc)
@@ -497,7 +531,7 @@ logicalrep_worker_stop(Oid subid, Oid relid)
}
/* Now terminate the worker ... */
kill(worker->proc->pid, SIGTERM);
kill(worker->proc->pid, signo);
/* ... and wait for it to die. */
for (;;)
@@ -523,6 +557,53 @@ logicalrep_worker_stop(Oid subid, Oid relid)
LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
}
}
/*
* Stop the logical replication worker for subid/relid, if any.
*/
void
logicalrep_worker_stop(Oid subid, Oid relid)
{
LogicalRepWorker *worker;
LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
worker = logicalrep_worker_find(subid, relid, false);
if (worker)
{
Assert(!isParallelApplyWorker(worker));
logicalrep_worker_stop_internal(worker, SIGTERM);
}
LWLockRelease(LogicalRepWorkerLock);
}
/*
* Stop the logical replication parallel apply worker corresponding to the
* input slot number.
*
* Node that the function sends SIGINT instead of SIGTERM to the parallel apply
* worker so that the worker exits cleanly.
*/
void
logicalrep_pa_worker_stop(int slot_no, uint16 generation)
{
LogicalRepWorker *worker;
Assert(slot_no >= 0 && slot_no < max_logical_replication_workers);
LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
worker = &LogicalRepCtx->workers[slot_no];
Assert(isParallelApplyWorker(worker));
/*
* Only stop the worker if the generation matches and the worker is alive.
*/
if (worker->generation == generation && worker->proc)
logicalrep_worker_stop_internal(worker, SIGINT);
LWLockRelease(LogicalRepWorkerLock);
}
@@ -595,11 +676,40 @@ logicalrep_worker_attach(int slot)
}
/*
* Detach the worker (cleans up the worker info).
* Stop the parallel apply workers if any, and detach the leader apply worker
* (cleans up the worker info).
*/
static void
logicalrep_worker_detach(void)
{
/* Stop the parallel apply workers. */
if (am_leader_apply_worker())
{
List *workers;
ListCell *lc;
/*
* Detach from the error_mq_handle for all parallel apply workers
* before terminating them. This prevents the leader apply worker from
* receiving the worker termination message and sending it to logs
* when the same is already done by the parallel worker.
*/
pa_detach_all_error_mq();
LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
workers = logicalrep_workers_find(MyLogicalRepWorker->subid, true);
foreach(lc, workers)
{
LogicalRepWorker *w = (LogicalRepWorker *) lfirst(lc);
if (isParallelApplyWorker(w))
logicalrep_worker_stop_internal(w, SIGTERM);
}
LWLockRelease(LogicalRepWorkerLock);
}
/* Block concurrent access. */
LWLockAcquire(LogicalRepWorkerLock, LW_EXCLUSIVE);
@@ -622,6 +732,8 @@ logicalrep_worker_cleanup(LogicalRepWorker *worker)
worker->userid = InvalidOid;
worker->subid = InvalidOid;
worker->relid = InvalidOid;
worker->apply_leader_pid = InvalidPid;
worker->parallel_apply = false;
}
/*
@@ -653,6 +765,13 @@ logicalrep_worker_onexit(int code, Datum arg)
if (MyLogicalRepWorker->stream_fileset != NULL)
FileSetDeleteAll(MyLogicalRepWorker->stream_fileset);
/*
* Session level locks may be acquired outside of a transaction in
* parallel apply mode and will not be released when the worker
* terminates, so manually release all locks before the worker exits.
*/
LockReleaseAll(DEFAULT_LOCKMETHOD, true);
ApplyLauncherWakeup();
}
@@ -680,6 +799,33 @@ logicalrep_sync_worker_count(Oid subid)
return res;
}
/*
* Count the number of registered (but not necessarily running) parallel apply
* workers for a subscription.
*/
static int
logicalrep_pa_worker_count(Oid subid)
{
int i;
int res = 0;
Assert(LWLockHeldByMe(LogicalRepWorkerLock));
/*
* Scan all attached parallel apply workers, only counting those which
* have the given subscription id.
*/
for (i = 0; i < max_logical_replication_workers; i++)
{
LogicalRepWorker *w = &LogicalRepCtx->workers[i];
if (w->subid == subid && isParallelApplyWorker(w))
res++;
}
return res;
}
/*
* ApplyLauncherShmemSize
* Compute space needed for replication launcher shared memory
@@ -869,7 +1015,7 @@ ApplyLauncherMain(Datum main_arg)
wait_time = wal_retrieve_retry_interval;
logicalrep_worker_launch(sub->dbid, sub->oid, sub->name,
sub->owner, InvalidOid);
sub->owner, InvalidOid, DSM_HANDLE_INVALID);
}
}
@@ -952,6 +1098,10 @@ pg_stat_get_subscription(PG_FUNCTION_ARGS)
if (OidIsValid(subid) && worker.subid != subid)
continue;
/* Skip if this is a parallel apply worker */
if (isParallelApplyWorker(&worker))
continue;
worker_pid = worker.proc->pid;
values[0] = ObjectIdGetDatum(worker.subid);

View File

@@ -1,6 +1,7 @@
# Copyright (c) 2022-2023, PostgreSQL Global Development Group
backend_sources += files(
'applyparallelworker.c',
'decode.c',
'launcher.c',
'logical.c',

View File

@@ -1075,12 +1075,20 @@ ReplicationOriginExitCleanup(int code, Datum arg)
* array doesn't have to be searched when calling
* replorigin_session_advance().
*
* Obviously only one such cached origin can exist per process and the current
* cached value can only be set again after the previous value is torn down
* with replorigin_session_reset().
* Normally only one such cached origin can exist per process so the cached
* value can only be set again after the previous value is torn down with
* replorigin_session_reset(). For this normal case pass acquired_by = 0
* (meaning the slot is not allowed to be already acquired by another process).
*
* However, sometimes multiple processes can safely re-use the same origin slot
* (for example, multiple parallel apply processes can safely use the same
* origin, provided they maintain commit order by allowing only one process to
* commit at a time). For this case the first process must pass acquired_by =
* 0, and then the other processes sharing that same origin can pass
* acquired_by = PID of the first process.
*/
void
replorigin_session_setup(RepOriginId node)
replorigin_session_setup(RepOriginId node, int acquired_by)
{
static bool registered_cleanup;
int i;
@@ -1122,7 +1130,7 @@ replorigin_session_setup(RepOriginId node)
if (curstate->roident != node)
continue;
else if (curstate->acquired_by != 0)
else if (curstate->acquired_by != 0 && acquired_by == 0)
{
ereport(ERROR,
(errcode(ERRCODE_OBJECT_IN_USE),
@@ -1153,7 +1161,11 @@ replorigin_session_setup(RepOriginId node)
Assert(session_replication_state->roident != InvalidRepOriginId);
session_replication_state->acquired_by = MyProcPid;
if (acquired_by == 0)
session_replication_state->acquired_by = MyProcPid;
else if (session_replication_state->acquired_by != acquired_by)
elog(ERROR, "could not find replication state slot for replication origin with OID %u which was acquired by %d",
node, acquired_by);
LWLockRelease(ReplicationOriginLock);
@@ -1337,7 +1349,7 @@ pg_replication_origin_session_setup(PG_FUNCTION_ARGS)
name = text_to_cstring((text *) DatumGetPointer(PG_GETARG_DATUM(0)));
origin = replorigin_by_name(name, false);
replorigin_session_setup(origin);
replorigin_session_setup(origin, 0);
replorigin_session_origin = origin;

View File

@@ -1164,10 +1164,14 @@ logicalrep_read_stream_commit(StringInfo in, LogicalRepCommitData *commit_data)
/*
* Write STREAM ABORT to the output stream. Note that xid and subxid will be
* same for the top-level transaction abort.
*
* If write_abort_info is true, send the abort_lsn and abort_time fields,
* otherwise don't.
*/
void
logicalrep_write_stream_abort(StringInfo out, TransactionId xid,
TransactionId subxid)
TransactionId subxid, XLogRecPtr abort_lsn,
TimestampTz abort_time, bool write_abort_info)
{
pq_sendbyte(out, LOGICAL_REP_MSG_STREAM_ABORT);
@@ -1176,19 +1180,40 @@ logicalrep_write_stream_abort(StringInfo out, TransactionId xid,
/* transaction ID */
pq_sendint32(out, xid);
pq_sendint32(out, subxid);
if (write_abort_info)
{
pq_sendint64(out, abort_lsn);
pq_sendint64(out, abort_time);
}
}
/*
* Read STREAM ABORT from the output stream.
*
* If read_abort_info is true, read the abort_lsn and abort_time fields,
* otherwise don't.
*/
void
logicalrep_read_stream_abort(StringInfo in, TransactionId *xid,
TransactionId *subxid)
logicalrep_read_stream_abort(StringInfo in,
LogicalRepStreamAbortData *abort_data,
bool read_abort_info)
{
Assert(xid && subxid);
Assert(abort_data);
*xid = pq_getmsgint(in, 4);
*subxid = pq_getmsgint(in, 4);
abort_data->xid = pq_getmsgint(in, 4);
abort_data->subxid = pq_getmsgint(in, 4);
if (read_abort_info)
{
abort_data->abort_lsn = pq_getmsgint64(in);
abort_data->abort_time = pq_getmsgint64(in);
}
else
{
abort_data->abort_lsn = InvalidXLogRecPtr;
abort_data->abort_time = 0;
}
}
/*

View File

@@ -2873,7 +2873,8 @@ ReorderBufferFinishPrepared(ReorderBuffer *rb, TransactionId xid,
* disk.
*/
void
ReorderBufferAbort(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn)
ReorderBufferAbort(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn,
TimestampTz abort_time)
{
ReorderBufferTXN *txn;
@@ -2884,6 +2885,8 @@ ReorderBufferAbort(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn)
if (txn == NULL)
return;
txn->xact_time.abort_time = abort_time;
/* For streamed transactions notify the remote node about the abort. */
if (rbtxn_is_streamed(txn))
{

View File

@@ -14,7 +14,7 @@
* The initial data synchronization is done separately for each table,
* in a separate apply worker that only fetches the initial snapshot data
* from the publisher and then synchronizes the position in the stream with
* the main apply worker.
* the leader apply worker.
*
* There are several reasons for doing the synchronization this way:
* - It allows us to parallelize the initial data synchronization
@@ -153,7 +153,7 @@ finish_sync_worker(void)
get_rel_name(MyLogicalRepWorker->relid))));
CommitTransactionCommand();
/* Find the main apply worker and signal it. */
/* Find the leader apply worker and signal it. */
logicalrep_worker_wakeup(MyLogicalRepWorker->subid, InvalidOid);
/* Stop gracefully */
@@ -588,7 +588,8 @@ process_syncing_tables_for_apply(XLogRecPtr current_lsn)
MySubscription->oid,
MySubscription->name,
MyLogicalRepWorker->userid,
rstate->relid);
rstate->relid,
DSM_HANDLE_INVALID);
hentry->last_start_time = now;
}
}
@@ -636,6 +637,14 @@ process_syncing_tables_for_apply(XLogRecPtr current_lsn)
void
process_syncing_tables(XLogRecPtr current_lsn)
{
/*
* Skip for parallel apply workers because they only operate on tables
* that are in a READY state. See pa_can_start() and
* should_apply_changes_for_rel().
*/
if (am_parallel_apply_worker())
return;
if (am_tablesync_worker())
process_syncing_tables_for_sync(current_lsn);
else
@@ -1254,7 +1263,7 @@ LogicalRepSyncTableStart(XLogRecPtr *origin_startpos)
/*
* Here we use the slot name instead of the subscription name as the
* application_name, so that it is different from the main apply worker,
* application_name, so that it is different from the leader apply worker,
* so that synchronous replication can distinguish them.
*/
LogRepWorkerWalRcvConn =
@@ -1302,7 +1311,7 @@ LogicalRepSyncTableStart(XLogRecPtr *origin_startpos)
* time this tablesync was launched.
*/
originid = replorigin_by_name(originname, false);
replorigin_session_setup(originid);
replorigin_session_setup(originid, 0);
replorigin_session_origin = originid;
*origin_startpos = replorigin_session_get_progress(false);
@@ -1413,7 +1422,7 @@ LogicalRepSyncTableStart(XLogRecPtr *origin_startpos)
true /* go backward */ , true /* WAL log */ );
UnlockRelationOid(ReplicationOriginRelationId, RowExclusiveLock);
replorigin_session_setup(originid);
replorigin_session_setup(originid, 0);
replorigin_session_origin = originid;
}
else
@@ -1468,8 +1477,8 @@ copy_table_done:
SpinLockRelease(&MyLogicalRepWorker->relmutex);
/*
* Finally, wait until the main apply worker tells us to catch up and then
* return to let LogicalRepApplyLoop do it.
* Finally, wait until the leader apply worker tells us to catch up and
* then return to let LogicalRepApplyLoop do it.
*/
wait_for_worker_state_change(SUBREL_STATE_CATCHUP);
return slotname;

1366
src/backend/replication/logical/worker.c Normal file → Executable file

File diff suppressed because it is too large Load Diff

View File

@@ -18,6 +18,7 @@
#include "catalog/pg_publication_rel.h"
#include "catalog/pg_subscription.h"
#include "commands/defrem.h"
#include "commands/subscriptioncmds.h"
#include "executor/executor.h"
#include "fmgr.h"
#include "nodes/makefuncs.h"
@@ -290,7 +291,7 @@ parse_output_parameters(List *options, PGOutputData *data)
bool origin_option_given = false;
data->binary = false;
data->streaming = false;
data->streaming = LOGICALREP_STREAM_OFF;
data->messages = false;
data->two_phase = false;
@@ -369,7 +370,7 @@ parse_output_parameters(List *options, PGOutputData *data)
errmsg("conflicting or redundant options")));
streaming_given = true;
data->streaming = defGetBoolean(defel);
data->streaming = defGetStreamingMode(defel);
}
else if (strcmp(defel->defname, "two_phase") == 0)
{
@@ -461,13 +462,20 @@ pgoutput_startup(LogicalDecodingContext *ctx, OutputPluginOptions *opt,
* we only allow it with sufficient version of the protocol, and when
* the output plugin supports it.
*/
if (!data->streaming)
if (data->streaming == LOGICALREP_STREAM_OFF)
ctx->streaming = false;
else if (data->protocol_version < LOGICALREP_PROTO_STREAM_VERSION_NUM)
else if (data->streaming == LOGICALREP_STREAM_ON &&
data->protocol_version < LOGICALREP_PROTO_STREAM_VERSION_NUM)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("requested proto_version=%d does not support streaming, need %d or higher",
data->protocol_version, LOGICALREP_PROTO_STREAM_VERSION_NUM)));
else if (data->streaming == LOGICALREP_STREAM_PARALLEL &&
data->protocol_version < LOGICALREP_PROTO_STREAM_PARALLEL_VERSION_NUM)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("requested proto_version=%d does not support parallel streaming, need %d or higher",
data->protocol_version, LOGICALREP_PROTO_STREAM_PARALLEL_VERSION_NUM)));
else if (!ctx->streaming)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
@@ -1841,6 +1849,8 @@ pgoutput_stream_abort(struct LogicalDecodingContext *ctx,
XLogRecPtr abort_lsn)
{
ReorderBufferTXN *toptxn;
PGOutputData *data = (PGOutputData *) ctx->output_plugin_private;
bool write_abort_info = (data->streaming == LOGICALREP_STREAM_PARALLEL);
/*
* The abort should happen outside streaming block, even for streamed
@@ -1854,7 +1864,9 @@ pgoutput_stream_abort(struct LogicalDecodingContext *ctx,
Assert(rbtxn_is_streamed(toptxn));
OutputPluginPrepareWrite(ctx, true);
logicalrep_write_stream_abort(ctx->out, toptxn->xid, txn->xid);
logicalrep_write_stream_abort(ctx->out, toptxn->xid, txn->xid, abort_lsn,
txn->xact_time.abort_time, write_abort_info);
OutputPluginWrite(ctx, true);
cleanup_rel_sync_cache(toptxn->xid, false);