Perform apply of large transactions by parallel workers.

Currently, for large transactions, the publisher sends the data in multiple streams (changes divided into chunks depending upon logical_decoding_work_mem), and then on the subscriber-side, the apply worker writes the changes into temporary files and once it receives the commit, it reads from those files and applies the entire transaction. To improve the performance of such transactions, we can instead allow them to be applied via parallel workers. In this approach, we assign a new parallel apply worker (if available) as soon as the xact's first stream is received and the leader apply worker will send changes to this new worker via shared memory. The parallel apply worker will directly apply the change instead of writing it to temporary files. However, if the leader apply worker times out while attempting to send a message to the parallel apply worker, it will switch to "partial serialize" mode - in this mode, the leader serializes all remaining changes to a file and notifies the parallel apply workers to read and apply them at the end of the transaction. We use a non-blocking way to send the messages from the leader apply worker to the parallel apply to avoid deadlocks. We keep this parallel apply assigned till the transaction commit is received and also wait for the worker to finish at commit. This preserves commit ordering and avoid writing to and reading from files in most cases. We still need to spill if there is no worker available. This patch also extends the SUBSCRIPTION 'streaming' parameter so that the user can control whether to apply the streaming transaction in a parallel apply worker or spill the change to disk. The user can set the streaming parameter to 'on/off', or 'parallel'. The parameter value 'parallel' means the streaming will be applied via a parallel apply worker, if available. The parameter value 'on' means the streaming transaction will be spilled to disk. The default value is 'off' (same as current behaviour). In addition, the patch extends the logical replication STREAM_ABORT message so that abort_lsn and abort_time can also be sent which can be used to update the replication origin in parallel apply worker when the streaming transaction is aborted. Because this message extension is needed to support parallel streaming, parallel streaming is not supported for publications on servers < PG16. Author: Hou Zhijie, Wang wei, Amit Kapila with design inputs from Sawada Masahiko Reviewed-by: Sawada Masahiko, Peter Smith, Dilip Kumar, Shi yu, Kuroda Hayato, Shveta Mallik Discussion: https://postgr.es/m/CAA4eK1+wyN6zpaHUkCLorEWNx75MG0xhMwcFhvjqm2KURZEAGw@mail.gmail.com
2025-11-09 06:21:09 +03:00 · 2023-01-09 07:00:39 +05:30
parent 5687e7810f
commit 216a784829
58 changed files with 4497 additions and 745 deletions
--- a/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c
+++ b/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c
@@ -443,9 +443,9 @@ libpqrcv_startstreaming(WalReceiverConn *conn,
 		appendStringInfo(&cmd, "proto_version '%u'",
 						 options->proto.logical.proto_version);

-		if (options->proto.logical.streaming &&
-			PQserverVersion(conn->streamConn) >= 140000)
-			appendStringInfoString(&cmd, ", streaming 'on'");
+		if (options->proto.logical.streaming_str)
+			appendStringInfo(&cmd, ", streaming '%s'",
+							 options->proto.logical.streaming_str);

 		if (options->proto.logical.twophase &&
 			PQserverVersion(conn->streamConn) >= 150000)
--- a/src/backend/replication/logical/Makefile
+++ b/src/backend/replication/logical/Makefile
@@ -15,6 +15,7 @@ include $(top_builddir)/src/Makefile.global
 override CPPFLAGS := -I$(srcdir) $(CPPFLAGS)

 OBJS = \
+	applyparallelworker.o \
 	decode.o \
 	launcher.o \
 	logical.o \
--- a/src/backend/replication/logical/applyparallelworker.c
+++ b/src/backend/replication/logical/applyparallelworker.c
--- a/src/backend/replication/logical/decode.c
+++ b/src/backend/replication/logical/decode.c
@@ -822,10 +822,11 @@ DecodeAbort(LogicalDecodingContext *ctx, XLogRecordBuffer *buf,
 		for (i = 0; i < parsed->nsubxacts; i++)
 		{
 			ReorderBufferAbort(ctx->reorder, parsed->subxacts[i],
-							   buf->record->EndRecPtr);
+							   buf->record->EndRecPtr, abort_time);
 		}

-		ReorderBufferAbort(ctx->reorder, xid, buf->record->EndRecPtr);
+		ReorderBufferAbort(ctx->reorder, xid, buf->record->EndRecPtr,
+						   abort_time);
 	}

 	/* update the decoding stats */
--- a/src/backend/replication/logical/launcher.c
+++ b/src/backend/replication/logical/launcher.c
@@ -55,6 +55,7 @@
 /* GUC variables */
 int			max_logical_replication_workers = 4;
 int			max_sync_workers_per_subscription = 2;
+int			max_parallel_apply_workers_per_subscription = 2;

 LogicalRepWorker *MyLogicalRepWorker = NULL;

@@ -74,6 +75,7 @@ static void logicalrep_launcher_onexit(int code, Datum arg);
 static void logicalrep_worker_onexit(int code, Datum arg);
 static void logicalrep_worker_detach(void);
 static void logicalrep_worker_cleanup(LogicalRepWorker *worker);
+static int	logicalrep_pa_worker_count(Oid subid);

 static bool on_commit_launcher_wakeup = false;

@@ -152,8 +154,10 @@ get_subscription_list(void)
 *
 * This is only needed for cleaning up the shared memory in case the worker
 * fails to attach.
+ *
+ * Returns whether the attach was successful.
 */
-static void
+static bool
 WaitForReplicationWorkerAttach(LogicalRepWorker *worker,
 							   uint16 generation,
 							   BackgroundWorkerHandle *handle)
@@ -169,11 +173,11 @@ WaitForReplicationWorkerAttach(LogicalRepWorker *worker,

 		LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);

-		/* Worker either died or has started; no need to do anything. */
+		/* Worker either died or has started. Return false if died. */
 		if (!worker->in_use || worker->proc)
 		{
 			LWLockRelease(LogicalRepWorkerLock);
-			return;
+			return worker->in_use;
 		}

 		LWLockRelease(LogicalRepWorkerLock);
@@ -188,7 +192,7 @@ WaitForReplicationWorkerAttach(LogicalRepWorker *worker,
 			if (generation == worker->generation)
 				logicalrep_worker_cleanup(worker);
 			LWLockRelease(LogicalRepWorkerLock);
-			return;
+			return false;
 		}

 		/*
@@ -210,6 +214,8 @@ WaitForReplicationWorkerAttach(LogicalRepWorker *worker,
 /*
 * Walks the workers array and searches for one that matches given
 * subscription id and relid.
+ *
+ * We are only interested in the leader apply worker or table sync worker.
 */
 LogicalRepWorker *
 logicalrep_worker_find(Oid subid, Oid relid, bool only_running)
@@ -224,6 +230,10 @@ logicalrep_worker_find(Oid subid, Oid relid, bool only_running)
 	{
 		LogicalRepWorker *w = &LogicalRepCtx->workers[i];

+		/* Skip parallel apply workers. */
+		if (isParallelApplyWorker(w))
+			continue;
+
 		if (w->in_use && w->subid == subid && w->relid == relid &&
 			(!only_running || w->proc))
 		{
@@ -260,11 +270,13 @@ logicalrep_workers_find(Oid subid, bool only_running)
 }

 /*
- * Start new apply background worker, if possible.
+ * Start new logical replication background worker, if possible.
+ *
+ * Returns true on success, false on failure.
 */
-void
+bool
 logicalrep_worker_launch(Oid dbid, Oid subid, const char *subname, Oid userid,
-						 Oid relid)
+						 Oid relid, dsm_handle subworker_dsm)
 {
 	BackgroundWorker bgw;
 	BackgroundWorkerHandle *bgw_handle;
@@ -273,7 +285,12 @@ logicalrep_worker_launch(Oid dbid, Oid subid, const char *subname, Oid userid,
 	int			slot = 0;
 	LogicalRepWorker *worker = NULL;
 	int			nsyncworkers;
+	int			nparallelapplyworkers;
 	TimestampTz now;
+	bool		is_parallel_apply_worker = (subworker_dsm != DSM_HANDLE_INVALID);
+
+	/* Sanity check - tablesync worker cannot be a subworker */
+	Assert(!(is_parallel_apply_worker && OidIsValid(relid)));

 	ereport(DEBUG1,
 			(errmsg_internal("starting logical replication worker for subscription \"%s\"",
@@ -351,7 +368,20 @@ retry:
 	if (OidIsValid(relid) && nsyncworkers >= max_sync_workers_per_subscription)
 	{
 		LWLockRelease(LogicalRepWorkerLock);
-		return;
+		return false;
+	}
+
+	nparallelapplyworkers = logicalrep_pa_worker_count(subid);
+
+	/*
+	 * Return false if the number of parallel apply workers reached the limit
+	 * per subscription.
+	 */
+	if (is_parallel_apply_worker &&
+		nparallelapplyworkers >= max_parallel_apply_workers_per_subscription)
+	{
+		LWLockRelease(LogicalRepWorkerLock);
+		return false;
 	}

 	/*
@@ -365,7 +395,7 @@ retry:
 				(errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
 				 errmsg("out of logical replication worker slots"),
 				 errhint("You might need to increase max_logical_replication_workers.")));
-		return;
+		return false;
 	}

 	/* Prepare the worker slot. */
@@ -380,6 +410,8 @@ retry:
 	worker->relstate = SUBREL_STATE_UNKNOWN;
 	worker->relstate_lsn = InvalidXLogRecPtr;
 	worker->stream_fileset = NULL;
+	worker->apply_leader_pid = is_parallel_apply_worker ? MyProcPid : InvalidPid;
+	worker->parallel_apply = is_parallel_apply_worker;
 	worker->last_lsn = InvalidXLogRecPtr;
 	TIMESTAMP_NOBEGIN(worker->last_send_time);
 	TIMESTAMP_NOBEGIN(worker->last_recv_time);
@@ -397,19 +429,34 @@ retry:
 		BGWORKER_BACKEND_DATABASE_CONNECTION;
 	bgw.bgw_start_time = BgWorkerStart_RecoveryFinished;
 	snprintf(bgw.bgw_library_name, BGW_MAXLEN, "postgres");
-	snprintf(bgw.bgw_function_name, BGW_MAXLEN, "ApplyWorkerMain");
+
+	if (is_parallel_apply_worker)
+		snprintf(bgw.bgw_function_name, BGW_MAXLEN, "ParallelApplyWorkerMain");
+	else
+		snprintf(bgw.bgw_function_name, BGW_MAXLEN, "ApplyWorkerMain");
+
 	if (OidIsValid(relid))
 		snprintf(bgw.bgw_name, BGW_MAXLEN,
 				 "logical replication worker for subscription %u sync %u", subid, relid);
+	else if (is_parallel_apply_worker)
+		snprintf(bgw.bgw_name, BGW_MAXLEN,
+				 "logical replication parallel apply worker for subscription %u", subid);
 	else
 		snprintf(bgw.bgw_name, BGW_MAXLEN,
-				 "logical replication worker for subscription %u", subid);
-	snprintf(bgw.bgw_type, BGW_MAXLEN, "logical replication worker");
+				 "logical replication apply worker for subscription %u", subid);
+
+	if (is_parallel_apply_worker)
+		snprintf(bgw.bgw_type, BGW_MAXLEN, "logical replication parallel worker");
+	else
+		snprintf(bgw.bgw_type, BGW_MAXLEN, "logical replication worker");

 	bgw.bgw_restart_time = BGW_NEVER_RESTART;
 	bgw.bgw_notify_pid = MyProcPid;
 	bgw.bgw_main_arg = Int32GetDatum(slot);

+	if (is_parallel_apply_worker)
+		memcpy(bgw.bgw_extra, &subworker_dsm, sizeof(dsm_handle));
+
 	if (!RegisterDynamicBackgroundWorker(&bgw, &bgw_handle))
 	{
 		/* Failed to start worker, so clean up the worker slot. */
@@ -422,33 +469,23 @@ retry:
 				(errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
 				 errmsg("out of background worker slots"),
 				 errhint("You might need to increase max_worker_processes.")));
-		return;
+		return false;
 	}

 	/* Now wait until it attaches. */
-	WaitForReplicationWorkerAttach(worker, generation, bgw_handle);
+	return WaitForReplicationWorkerAttach(worker, generation, bgw_handle);
 }

 /*
- * Stop the logical replication worker for subid/relid, if any, and wait until
- * it detaches from the slot.
+ * Internal function to stop the worker and wait until it detaches from the
+ * slot.
 */
-void
-logicalrep_worker_stop(Oid subid, Oid relid)
+static void
+logicalrep_worker_stop_internal(LogicalRepWorker *worker, int signo)
 {
-	LogicalRepWorker *worker;
 	uint16		generation;

-	LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
-
-	worker = logicalrep_worker_find(subid, relid, false);
-
-	/* No worker, nothing to do. */
-	if (!worker)
-	{
-		LWLockRelease(LogicalRepWorkerLock);
-		return;
-	}
+	Assert(LWLockHeldByMeInMode(LogicalRepWorkerLock, LW_SHARED));

 	/*
 	 * Remember which generation was our worker so we can check if what we see
@@ -486,10 +523,7 @@ logicalrep_worker_stop(Oid subid, Oid relid)
 		 * different, meaning that a different worker has taken the slot.
 		 */
 		if (!worker->in_use || worker->generation != generation)
-		{
-			LWLockRelease(LogicalRepWorkerLock);
 			return;
-		}

 		/* Worker has assigned proc, so it has started. */
 		if (worker->proc)
@@ -497,7 +531,7 @@ logicalrep_worker_stop(Oid subid, Oid relid)
 	}

 	/* Now terminate the worker ... */
-	kill(worker->proc->pid, SIGTERM);
+	kill(worker->proc->pid, signo);

 	/* ... and wait for it to die. */
 	for (;;)
@@ -523,6 +557,53 @@ logicalrep_worker_stop(Oid subid, Oid relid)

 		LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
 	}
+}
+
+/*
+ * Stop the logical replication worker for subid/relid, if any.
+ */
+void
+logicalrep_worker_stop(Oid subid, Oid relid)
+{
+	LogicalRepWorker *worker;
+
+	LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
+
+	worker = logicalrep_worker_find(subid, relid, false);
+
+	if (worker)
+	{
+		Assert(!isParallelApplyWorker(worker));
+		logicalrep_worker_stop_internal(worker, SIGTERM);
+	}
+
+	LWLockRelease(LogicalRepWorkerLock);
+}
+
+/*
+ * Stop the logical replication parallel apply worker corresponding to the
+ * input slot number.
+ *
+ * Node that the function sends SIGINT instead of SIGTERM to the parallel apply
+ * worker so that the worker exits cleanly.
+ */
+void
+logicalrep_pa_worker_stop(int slot_no, uint16 generation)
+{
+	LogicalRepWorker *worker;
+
+	Assert(slot_no >= 0 && slot_no < max_logical_replication_workers);
+
+	LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
+
+	worker = &LogicalRepCtx->workers[slot_no];
+	Assert(isParallelApplyWorker(worker));
+
+	/*
+	 * Only stop the worker if the generation matches and the worker is alive.
+	 */
+	if (worker->generation == generation && worker->proc)
+		logicalrep_worker_stop_internal(worker, SIGINT);

 	LWLockRelease(LogicalRepWorkerLock);
 }
@@ -595,11 +676,40 @@ logicalrep_worker_attach(int slot)
 }

 /*
- * Detach the worker (cleans up the worker info).
+ * Stop the parallel apply workers if any, and detach the leader apply worker
+ * (cleans up the worker info).
 */
 static void
 logicalrep_worker_detach(void)
 {
+	/* Stop the parallel apply workers. */
+	if (am_leader_apply_worker())
+	{
+		List	   *workers;
+		ListCell   *lc;
+
+		/*
+		 * Detach from the error_mq_handle for all parallel apply workers
+		 * before terminating them. This prevents the leader apply worker from
+		 * receiving the worker termination message and sending it to logs
+		 * when the same is already done by the parallel worker.
+		 */
+		pa_detach_all_error_mq();
+
+		LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
+
+		workers = logicalrep_workers_find(MyLogicalRepWorker->subid, true);
+		foreach(lc, workers)
+		{
+			LogicalRepWorker *w = (LogicalRepWorker *) lfirst(lc);
+
+			if (isParallelApplyWorker(w))
+				logicalrep_worker_stop_internal(w, SIGTERM);
+		}
+
+		LWLockRelease(LogicalRepWorkerLock);
+	}
+
 	/* Block concurrent access. */
 	LWLockAcquire(LogicalRepWorkerLock, LW_EXCLUSIVE);

@@ -622,6 +732,8 @@ logicalrep_worker_cleanup(LogicalRepWorker *worker)
 	worker->userid = InvalidOid;
 	worker->subid = InvalidOid;
 	worker->relid = InvalidOid;
+	worker->apply_leader_pid = InvalidPid;
+	worker->parallel_apply = false;
 }

 /*
@@ -653,6 +765,13 @@ logicalrep_worker_onexit(int code, Datum arg)
 	if (MyLogicalRepWorker->stream_fileset != NULL)
 		FileSetDeleteAll(MyLogicalRepWorker->stream_fileset);

+	/*
+	 * Session level locks may be acquired outside of a transaction in
+	 * parallel apply mode and will not be released when the worker
+	 * terminates, so manually release all locks before the worker exits.
+	 */
+	LockReleaseAll(DEFAULT_LOCKMETHOD, true);
+
 	ApplyLauncherWakeup();
 }

@@ -680,6 +799,33 @@ logicalrep_sync_worker_count(Oid subid)
 	return res;
 }

+/*
+ * Count the number of registered (but not necessarily running) parallel apply
+ * workers for a subscription.
+ */
+static int
+logicalrep_pa_worker_count(Oid subid)
+{
+	int			i;
+	int			res = 0;
+
+	Assert(LWLockHeldByMe(LogicalRepWorkerLock));
+
+	/*
+	 * Scan all attached parallel apply workers, only counting those which
+	 * have the given subscription id.
+	 */
+	for (i = 0; i < max_logical_replication_workers; i++)
+	{
+		LogicalRepWorker *w = &LogicalRepCtx->workers[i];
+
+		if (w->subid == subid && isParallelApplyWorker(w))
+			res++;
+	}
+
+	return res;
+}
+
 /*
 * ApplyLauncherShmemSize
 *		Compute space needed for replication launcher shared memory
@@ -869,7 +1015,7 @@ ApplyLauncherMain(Datum main_arg)
 					wait_time = wal_retrieve_retry_interval;

 					logicalrep_worker_launch(sub->dbid, sub->oid, sub->name,
-											 sub->owner, InvalidOid);
+											 sub->owner, InvalidOid, DSM_HANDLE_INVALID);
 				}
 			}

@@ -952,6 +1098,10 @@ pg_stat_get_subscription(PG_FUNCTION_ARGS)
 		if (OidIsValid(subid) && worker.subid != subid)
 			continue;

+		/* Skip if this is a parallel apply worker */
+		if (isParallelApplyWorker(&worker))
+			continue;
+
 		worker_pid = worker.proc->pid;

 		values[0] = ObjectIdGetDatum(worker.subid);
--- a/src/backend/replication/logical/meson.build
+++ b/src/backend/replication/logical/meson.build
@@ -1,6 +1,7 @@
 # Copyright (c) 2022-2023, PostgreSQL Global Development Group

 backend_sources += files(
+  'applyparallelworker.c',
  'decode.c',
  'launcher.c',
  'logical.c',
--- a/src/backend/replication/logical/origin.c
+++ b/src/backend/replication/logical/origin.c
@@ -1075,12 +1075,20 @@ ReplicationOriginExitCleanup(int code, Datum arg)
 * array doesn't have to be searched when calling
 * replorigin_session_advance().
 *
- * Obviously only one such cached origin can exist per process and the current
- * cached value can only be set again after the previous value is torn down
- * with replorigin_session_reset().
+ * Normally only one such cached origin can exist per process so the cached
+ * value can only be set again after the previous value is torn down with
+ * replorigin_session_reset(). For this normal case pass acquired_by = 0
+ * (meaning the slot is not allowed to be already acquired by another process).
+ *
+ * However, sometimes multiple processes can safely re-use the same origin slot
+ * (for example, multiple parallel apply processes can safely use the same
+ * origin, provided they maintain commit order by allowing only one process to
+ * commit at a time). For this case the first process must pass acquired_by =
+ * 0, and then the other processes sharing that same origin can pass
+ * acquired_by = PID of the first process.
 */
 void
-replorigin_session_setup(RepOriginId node)
+replorigin_session_setup(RepOriginId node, int acquired_by)
 {
 	static bool registered_cleanup;
 	int			i;
@@ -1122,7 +1130,7 @@ replorigin_session_setup(RepOriginId node)
 		if (curstate->roident != node)
 			continue;

-		else if (curstate->acquired_by != 0)
+		else if (curstate->acquired_by != 0 && acquired_by == 0)
 		{
 			ereport(ERROR,
 					(errcode(ERRCODE_OBJECT_IN_USE),
@@ -1153,7 +1161,11 @@ replorigin_session_setup(RepOriginId node)

 	Assert(session_replication_state->roident != InvalidRepOriginId);

-	session_replication_state->acquired_by = MyProcPid;
+	if (acquired_by == 0)
+		session_replication_state->acquired_by = MyProcPid;
+	else if (session_replication_state->acquired_by != acquired_by)
+		elog(ERROR, "could not find replication state slot for replication origin with OID %u which was acquired by %d",
+			 node, acquired_by);

 	LWLockRelease(ReplicationOriginLock);

@@ -1337,7 +1349,7 @@ pg_replication_origin_session_setup(PG_FUNCTION_ARGS)

 	name = text_to_cstring((text *) DatumGetPointer(PG_GETARG_DATUM(0)));
 	origin = replorigin_by_name(name, false);
-	replorigin_session_setup(origin);
+	replorigin_session_setup(origin, 0);

 	replorigin_session_origin = origin;

--- a/src/backend/replication/logical/proto.c
+++ b/src/backend/replication/logical/proto.c
@@ -1164,10 +1164,14 @@ logicalrep_read_stream_commit(StringInfo in, LogicalRepCommitData *commit_data)
 /*
 * Write STREAM ABORT to the output stream. Note that xid and subxid will be
 * same for the top-level transaction abort.
+ *
+ * If write_abort_info is true, send the abort_lsn and abort_time fields,
+ * otherwise don't.
 */
 void
 logicalrep_write_stream_abort(StringInfo out, TransactionId xid,
-							  TransactionId subxid)
+							  TransactionId subxid, XLogRecPtr abort_lsn,
+							  TimestampTz abort_time, bool write_abort_info)
 {
 	pq_sendbyte(out, LOGICAL_REP_MSG_STREAM_ABORT);

@@ -1176,19 +1180,40 @@ logicalrep_write_stream_abort(StringInfo out, TransactionId xid,
 	/* transaction ID */
 	pq_sendint32(out, xid);
 	pq_sendint32(out, subxid);
+
+	if (write_abort_info)
+	{
+		pq_sendint64(out, abort_lsn);
+		pq_sendint64(out, abort_time);
+	}
 }

 /*
 * Read STREAM ABORT from the output stream.
+ *
+ * If read_abort_info is true, read the abort_lsn and abort_time fields,
+ * otherwise don't.
 */
 void
-logicalrep_read_stream_abort(StringInfo in, TransactionId *xid,
-							 TransactionId *subxid)
+logicalrep_read_stream_abort(StringInfo in,
+							 LogicalRepStreamAbortData *abort_data,
+							 bool read_abort_info)
 {
-	Assert(xid && subxid);
+	Assert(abort_data);

-	*xid = pq_getmsgint(in, 4);
-	*subxid = pq_getmsgint(in, 4);
+	abort_data->xid = pq_getmsgint(in, 4);
+	abort_data->subxid = pq_getmsgint(in, 4);
+
+	if (read_abort_info)
+	{
+		abort_data->abort_lsn = pq_getmsgint64(in);
+		abort_data->abort_time = pq_getmsgint64(in);
+	}
+	else
+	{
+		abort_data->abort_lsn = InvalidXLogRecPtr;
+		abort_data->abort_time = 0;
+	}
 }

 /*
--- a/src/backend/replication/logical/reorderbuffer.c
+++ b/src/backend/replication/logical/reorderbuffer.c
@@ -2873,7 +2873,8 @@ ReorderBufferFinishPrepared(ReorderBuffer *rb, TransactionId xid,
 * disk.
 */
 void
-ReorderBufferAbort(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn)
+ReorderBufferAbort(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn,
+				   TimestampTz abort_time)
 {
 	ReorderBufferTXN *txn;

@@ -2884,6 +2885,8 @@ ReorderBufferAbort(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn)
 	if (txn == NULL)
 		return;

+	txn->xact_time.abort_time = abort_time;
+
 	/* For streamed transactions notify the remote node about the abort. */
 	if (rbtxn_is_streamed(txn))
 	{
--- a/src/backend/replication/logical/tablesync.c
+++ b/src/backend/replication/logical/tablesync.c
@@ -14,7 +14,7 @@
 *	  The initial data synchronization is done separately for each table,
 *	  in a separate apply worker that only fetches the initial snapshot data
 *	  from the publisher and then synchronizes the position in the stream with
- *	  the main apply worker.
+ *	  the leader apply worker.
 *
 *	  There are several reasons for doing the synchronization this way:
 *	   - It allows us to parallelize the initial data synchronization
@@ -153,7 +153,7 @@ finish_sync_worker(void)
 					get_rel_name(MyLogicalRepWorker->relid))));
 	CommitTransactionCommand();

-	/* Find the main apply worker and signal it. */
+	/* Find the leader apply worker and signal it. */
 	logicalrep_worker_wakeup(MyLogicalRepWorker->subid, InvalidOid);

 	/* Stop gracefully */
@@ -588,7 +588,8 @@ process_syncing_tables_for_apply(XLogRecPtr current_lsn)
 												 MySubscription->oid,
 												 MySubscription->name,
 												 MyLogicalRepWorker->userid,
-												 rstate->relid);
+												 rstate->relid,
+												 DSM_HANDLE_INVALID);
 						hentry->last_start_time = now;
 					}
 				}
@@ -636,6 +637,14 @@ process_syncing_tables_for_apply(XLogRecPtr current_lsn)
 void
 process_syncing_tables(XLogRecPtr current_lsn)
 {
+	/*
+	 * Skip for parallel apply workers because they only operate on tables
+	 * that are in a READY state. See pa_can_start() and
+	 * should_apply_changes_for_rel().
+	 */
+	if (am_parallel_apply_worker())
+		return;
+
 	if (am_tablesync_worker())
 		process_syncing_tables_for_sync(current_lsn);
 	else
@@ -1254,7 +1263,7 @@ LogicalRepSyncTableStart(XLogRecPtr *origin_startpos)

 	/*
 	 * Here we use the slot name instead of the subscription name as the
-	 * application_name, so that it is different from the main apply worker,
+	 * application_name, so that it is different from the leader apply worker,
 	 * so that synchronous replication can distinguish them.
 	 */
 	LogRepWorkerWalRcvConn =
@@ -1302,7 +1311,7 @@ LogicalRepSyncTableStart(XLogRecPtr *origin_startpos)
 		 * time this tablesync was launched.
 		 */
 		originid = replorigin_by_name(originname, false);
-		replorigin_session_setup(originid);
+		replorigin_session_setup(originid, 0);
 		replorigin_session_origin = originid;
 		*origin_startpos = replorigin_session_get_progress(false);

@@ -1413,7 +1422,7 @@ LogicalRepSyncTableStart(XLogRecPtr *origin_startpos)
 						   true /* go backward */ , true /* WAL log */ );
 		UnlockRelationOid(ReplicationOriginRelationId, RowExclusiveLock);

-		replorigin_session_setup(originid);
+		replorigin_session_setup(originid, 0);
 		replorigin_session_origin = originid;
 	}
 	else
@@ -1468,8 +1477,8 @@ copy_table_done:
 	SpinLockRelease(&MyLogicalRepWorker->relmutex);

 	/*
-	 * Finally, wait until the main apply worker tells us to catch up and then
-	 * return to let LogicalRepApplyLoop do it.
+	 * Finally, wait until the leader apply worker tells us to catch up and
+	 * then return to let LogicalRepApplyLoop do it.
 	 */
 	wait_for_worker_state_change(SUBREL_STATE_CATCHUP);
 	return slotname;
--- a/src/backend/replication/logical/worker.c
+++ b/src/backend/replication/logical/worker.c
--- a/src/backend/replication/pgoutput/pgoutput.c
+++ b/src/backend/replication/pgoutput/pgoutput.c
@@ -18,6 +18,7 @@
 #include "catalog/pg_publication_rel.h"
 #include "catalog/pg_subscription.h"
 #include "commands/defrem.h"
+#include "commands/subscriptioncmds.h"
 #include "executor/executor.h"
 #include "fmgr.h"
 #include "nodes/makefuncs.h"
@@ -290,7 +291,7 @@ parse_output_parameters(List *options, PGOutputData *data)
 	bool		origin_option_given = false;

 	data->binary = false;
-	data->streaming = false;
+	data->streaming = LOGICALREP_STREAM_OFF;
 	data->messages = false;
 	data->two_phase = false;

@@ -369,7 +370,7 @@ parse_output_parameters(List *options, PGOutputData *data)
 						 errmsg("conflicting or redundant options")));
 			streaming_given = true;

-			data->streaming = defGetBoolean(defel);
+			data->streaming = defGetStreamingMode(defel);
 		}
 		else if (strcmp(defel->defname, "two_phase") == 0)
 		{
@@ -461,13 +462,20 @@ pgoutput_startup(LogicalDecodingContext *ctx, OutputPluginOptions *opt,
 		 * we only allow it with sufficient version of the protocol, and when
 		 * the output plugin supports it.
 		 */
-		if (!data->streaming)
+		if (data->streaming == LOGICALREP_STREAM_OFF)
 			ctx->streaming = false;
-		else if (data->protocol_version < LOGICALREP_PROTO_STREAM_VERSION_NUM)
+		else if (data->streaming == LOGICALREP_STREAM_ON &&
+				 data->protocol_version < LOGICALREP_PROTO_STREAM_VERSION_NUM)
 			ereport(ERROR,
 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 					 errmsg("requested proto_version=%d does not support streaming, need %d or higher",
 							data->protocol_version, LOGICALREP_PROTO_STREAM_VERSION_NUM)));
+		else if (data->streaming == LOGICALREP_STREAM_PARALLEL &&
+				 data->protocol_version < LOGICALREP_PROTO_STREAM_PARALLEL_VERSION_NUM)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("requested proto_version=%d does not support parallel streaming, need %d or higher",
+							data->protocol_version, LOGICALREP_PROTO_STREAM_PARALLEL_VERSION_NUM)));
 		else if (!ctx->streaming)
 			ereport(ERROR,
 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
@@ -1841,6 +1849,8 @@ pgoutput_stream_abort(struct LogicalDecodingContext *ctx,
 					  XLogRecPtr abort_lsn)
 {
 	ReorderBufferTXN *toptxn;
+	PGOutputData *data = (PGOutputData *) ctx->output_plugin_private;
+	bool		write_abort_info = (data->streaming == LOGICALREP_STREAM_PARALLEL);

 	/*
 	 * The abort should happen outside streaming block, even for streamed
@@ -1854,7 +1864,9 @@ pgoutput_stream_abort(struct LogicalDecodingContext *ctx,
 	Assert(rbtxn_is_streamed(toptxn));

 	OutputPluginPrepareWrite(ctx, true);
-	logicalrep_write_stream_abort(ctx->out, toptxn->xid, txn->xid);
+	logicalrep_write_stream_abort(ctx->out, toptxn->xid, txn->xid, abort_lsn,
+								  txn->xact_time.abort_time, write_abort_info);
+
 	OutputPluginWrite(ctx, true);

 	cleanup_rel_sync_cache(toptxn->xid, false);