diff --git a/contrib/test_decoding/logical.conf b/contrib/test_decoding/logical.conf
index 367f7066514..07c4d3d7c8f 100644
--- a/contrib/test_decoding/logical.conf
+++ b/contrib/test_decoding/logical.conf
@@ -1,2 +1,3 @@
 wal_level = logical
 max_replication_slots = 4
+logical_decoding_work_mem = 64kB
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index f83770350ed..d4d1fe45cc1 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -1732,6 +1732,27 @@ include_dir 'conf.d'
       </listitem>
      </varlistentry>
 
+     <varlistentry id="guc-logical-decoding-work-mem" xreflabel="logical_decoding_work_mem">
+      <term><varname>logical_decoding_work_mem</varname> (<type>integer</type>)
+      <indexterm>
+       <primary><varname>logical_decoding_work_mem</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Specifies the maximum amount of memory to be used by logical decoding,
+        before some of the decoded changes are written to local disk. This
+        limits the amount of memory used by logical streaming replication
+        connections. It defaults to 64 megabytes (<literal>64MB</literal>).
+        Since each replication connection only uses a single buffer of this size,
+        and an installation normally doesn't have many such connections
+        concurrently (as limited by <varname>max_wal_senders</varname>), it's
+        safe to set this value significantly higher than <varname>work_mem</varname>,
+        reducing the amount of decoded changes written to disk.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry id="guc-max-stack-depth" xreflabel="max_stack_depth">
       <term><varname>max_stack_depth</varname> (<type>integer</type>)
       <indexterm>
diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c
index 62e54240ec1..d82a5f18b0a 100644
--- a/src/backend/replication/logical/reorderbuffer.c
+++ b/src/backend/replication/logical/reorderbuffer.c
@@ -49,6 +49,34 @@
  *	  GenerationContext for the variable-length transaction data (allocated
  *	  and freed in groups with similar lifespan).
  *
+ *	  To limit the amount of memory used by decoded changes, we track memory
+ *	  used at the reorder buffer level (i.e. total amount of memory), and for
+ *	  each transaction. When the total amount of used memory exceeds the
+ *	  limit, the transaction consuming the most memory is then serialized to
+ *	  disk.
+ *
+ *	  Only decoded changes are evicted from memory (spilled to disk), not the
+ *	  transaction records. The number of toplevel transactions is limited,
+ *	  but a transaction with many subtransactions may still consume significant
+ *	  amounts of memory. The transaction records are fairly small, though, and
+ *	  are not included in the memory limit.
+ *
+ *	  The current eviction algorithm is very simple - the transaction is
+ *	  picked merely by size, while it might be useful to also consider age
+ *	  (LSN) of the changes for example. With the new Generational memory
+ *	  allocator, evicting the oldest changes would make it more likely the
+ *	  memory gets actually freed.
+ *
+ *	  We still rely on max_changes_in_memory when loading serialized changes
+ *	  back into memory. At that point we can't use the memory limit directly
+ *	  as we load the subxacts independently. One option do deal with this
+ *	  would be to count the subxacts, and allow each to allocate 1/N of the
+ *	  memory limit. That however does not seem very appealing, because with
+ *	  many subtransactions it may easily cause trashing (short cycles of
+ *	  deserializing and applying very few changes). We probably should give
+ *	  a bit more memory to the oldest subtransactions, because it's likely
+ *	  the source for the next sequence of changes.
+ *
  * -------------------------------------------------------------------------
  */
 #include "postgres.h"
@@ -154,7 +182,8 @@ typedef struct ReorderBufferDiskChange
  * resource management here, but it's not entirely clear what that would look
  * like.
  */
-static const Size max_changes_in_memory = 4096;
+int			logical_decoding_work_mem;
+static const Size max_changes_in_memory = 4096; /* XXX for restore only */
 
 /* ---------------------------------------
  * primary reorderbuffer support routines
@@ -189,7 +218,7 @@ static void ReorderBufferExecuteInvalidations(ReorderBuffer *rb, ReorderBufferTX
  * Disk serialization support functions
  * ---------------------------------------
  */
-static void ReorderBufferCheckSerializeTXN(ReorderBuffer *rb, ReorderBufferTXN *txn);
+static void ReorderBufferCheckMemoryLimit(ReorderBuffer *rb);
 static void ReorderBufferSerializeTXN(ReorderBuffer *rb, ReorderBufferTXN *txn);
 static void ReorderBufferSerializeChange(ReorderBuffer *rb, ReorderBufferTXN *txn,
 										 int fd, ReorderBufferChange *change);
@@ -217,6 +246,14 @@ static void ReorderBufferToastReplace(ReorderBuffer *rb, ReorderBufferTXN *txn,
 static void ReorderBufferToastAppendChunk(ReorderBuffer *rb, ReorderBufferTXN *txn,
 										  Relation relation, ReorderBufferChange *change);
 
+/*
+ * ---------------------------------------
+ * memory accounting
+ * ---------------------------------------
+ */
+static Size ReorderBufferChangeSize(ReorderBufferChange *change);
+static void ReorderBufferChangeMemoryUpdate(ReorderBuffer *rb,
+											ReorderBufferChange *change, bool addition);
 
 /*
  * Allocate a new ReorderBuffer and clean out any old serialized state from
@@ -269,6 +306,7 @@ ReorderBufferAllocate(void)
 
 	buffer->outbuf = NULL;
 	buffer->outbufsize = 0;
+	buffer->size = 0;
 
 	buffer->current_restart_decoding_lsn = InvalidXLogRecPtr;
 
@@ -374,6 +412,9 @@ ReorderBufferGetChange(ReorderBuffer *rb)
 void
 ReorderBufferReturnChange(ReorderBuffer *rb, ReorderBufferChange *change)
 {
+	/* update memory accounting info */
+	ReorderBufferChangeMemoryUpdate(rb, change, false);
+
 	/* free contained data */
 	switch (change->action)
 	{
@@ -585,12 +626,18 @@ ReorderBufferQueueChange(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn,
 	txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
 
 	change->lsn = lsn;
+	change->txn = txn;
+
 	Assert(InvalidXLogRecPtr != lsn);
 	dlist_push_tail(&txn->changes, &change->node);
 	txn->nentries++;
 	txn->nentries_mem++;
 
-	ReorderBufferCheckSerializeTXN(rb, txn);
+	/* update memory accounting information */
+	ReorderBufferChangeMemoryUpdate(rb, change, true);
+
+	/* check the memory limits and evict something if needed */
+	ReorderBufferCheckMemoryLimit(rb);
 }
 
 /*
@@ -1217,6 +1264,9 @@ ReorderBufferCleanupTXN(ReorderBuffer *rb, ReorderBufferTXN *txn)
 
 		change = dlist_container(ReorderBufferChange, node, iter.cur);
 
+		/* Check we're not mixing changes from different transactions. */
+		Assert(change->txn == txn);
+
 		ReorderBufferReturnChange(rb, change);
 	}
 
@@ -1229,7 +1279,11 @@ ReorderBufferCleanupTXN(ReorderBuffer *rb, ReorderBufferTXN *txn)
 		ReorderBufferChange *change;
 
 		change = dlist_container(ReorderBufferChange, node, iter.cur);
+
+		/* Check we're not mixing changes from different transactions. */
+		Assert(change->txn == txn);
 		Assert(change->action == REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID);
+
 		ReorderBufferReturnChange(rb, change);
 	}
 
@@ -2082,9 +2136,48 @@ ReorderBufferAddNewCommandId(ReorderBuffer *rb, TransactionId xid,
 	ReorderBufferQueueChange(rb, xid, lsn, change);
 }
 
+/*
+ * Update the memory accounting info. We track memory used by the whole
+ * reorder buffer and the transaction containing the change.
+ */
+static void
+ReorderBufferChangeMemoryUpdate(ReorderBuffer *rb,
+								ReorderBufferChange *change,
+								bool addition)
+{
+	Size		sz;
+
+	Assert(change->txn);
+
+	/*
+	 * Ignore tuple CID changes, because those are not evicted when reaching
+	 * memory limit. So we just don't count them, because it might easily
+	 * trigger a pointless attempt to spill.
+	 */
+	if (change->action == REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID)
+		return;
+
+	sz = ReorderBufferChangeSize(change);
+
+	if (addition)
+	{
+		change->txn->size += sz;
+		rb->size += sz;
+	}
+	else
+	{
+		Assert((rb->size >= sz) && (change->txn->size >= sz));
+		change->txn->size -= sz;
+		rb->size -= sz;
+	}
+}
 
 /*
  * Add new (relfilenode, tid) -> (cmin, cmax) mappings.
+ *
+ * We do not include this change type in memory accounting, because we
+ * keep CIDs in a separate list and do not evict them when reaching
+ * the memory limit.
  */
 void
 ReorderBufferAddNewTupleCids(ReorderBuffer *rb, TransactionId xid,
@@ -2103,6 +2196,7 @@ ReorderBufferAddNewTupleCids(ReorderBuffer *rb, TransactionId xid,
 	change->data.tuplecid.cmax = cmax;
 	change->data.tuplecid.combocid = combocid;
 	change->lsn = lsn;
+	change->txn = txn;
 	change->action = REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID;
 
 	dlist_push_tail(&txn->tuplecids, &change->node);
@@ -2230,20 +2324,84 @@ ReorderBufferSerializeReserve(ReorderBuffer *rb, Size sz)
 }
 
 /*
- * Check whether the transaction tx should spill its data to disk.
+ * Find the largest transaction (toplevel or subxact) to evict (spill to disk).
+ *
+ * XXX With many subtransactions this might be quite slow, because we'll have
+ * to walk through all of them. There are some options how we could improve
+ * that: (a) maintain some secondary structure with transactions sorted by
+ * amount of changes, (b) not looking for the entirely largest transaction,
+ * but e.g. for transaction using at least some fraction of the memory limit,
+ * and (c) evicting multiple transactions at once, e.g. to free a given portion
+ * of the memory limit (e.g. 50%).
+ */
+static ReorderBufferTXN *
+ReorderBufferLargestTXN(ReorderBuffer *rb)
+{
+	HASH_SEQ_STATUS hash_seq;
+	ReorderBufferTXNByIdEnt *ent;
+	ReorderBufferTXN *largest = NULL;
+
+	hash_seq_init(&hash_seq, rb->by_txn);
+	while ((ent = hash_seq_search(&hash_seq)) != NULL)
+	{
+		ReorderBufferTXN *txn = ent->txn;
+
+		/* if the current transaction is larger, remember it */
+		if ((!largest) || (txn->size > largest->size))
+			largest = txn;
+	}
+
+	Assert(largest);
+	Assert(largest->size > 0);
+	Assert(largest->size <= rb->size);
+
+	return largest;
+}
+
+/*
+ * Check whether the logical_decoding_work_mem limit was reached, and if yes
+ * pick the transaction to evict and spill the changes to disk.
+ *
+ * XXX At this point we select just a single (largest) transaction, but
+ * we might also adapt a more elaborate eviction strategy - for example
+ * evicting enough transactions to free certain fraction (e.g. 50%) of
+ * the memory limit.
  */
 static void
-ReorderBufferCheckSerializeTXN(ReorderBuffer *rb, ReorderBufferTXN *txn)
+ReorderBufferCheckMemoryLimit(ReorderBuffer *rb)
 {
+	ReorderBufferTXN *txn;
+
+	/* bail out if we haven't exceeded the memory limit */
+	if (rb->size < logical_decoding_work_mem * 1024L)
+		return;
+
 	/*
-	 * TODO: improve accounting so we cheaply can take subtransactions into
-	 * account here.
+	 * Pick the largest transaction (or subtransaction) and evict it from
+	 * memory by serializing it to disk.
 	 */
-	if (txn->nentries_mem >= max_changes_in_memory)
-	{
-		ReorderBufferSerializeTXN(rb, txn);
-		Assert(txn->nentries_mem == 0);
-	}
+	txn = ReorderBufferLargestTXN(rb);
+
+	ReorderBufferSerializeTXN(rb, txn);
+
+	/*
+	 * After eviction, the transaction should have no entries in memory, and
+	 * should use 0 bytes for changes.
+	 */
+	Assert(txn->size == 0);
+	Assert(txn->nentries_mem == 0);
+
+	/*
+	 * And furthermore, evicting the transaction should get us below the
+	 * memory limit again - it is not possible that we're still exceeding the
+	 * memory limit after evicting the transaction.
+	 *
+	 * This follows from the simple fact that the selected transaction is at
+	 * least as large as the most recent change (which caused us to go over
+	 * the memory limit). So by evicting it we're definitely back below the
+	 * memory limit.
+	 */
+	Assert(rb->size < logical_decoding_work_mem * 1024L);
 }
 
 /*
@@ -2512,6 +2670,84 @@ ReorderBufferSerializeChange(ReorderBuffer *rb, ReorderBufferTXN *txn,
 	Assert(ondisk->change.action == change->action);
 }
 
+/*
+ * Size of a change in memory.
+ */
+static Size
+ReorderBufferChangeSize(ReorderBufferChange *change)
+{
+	Size		sz = sizeof(ReorderBufferChange);
+
+	switch (change->action)
+	{
+			/* fall through these, they're all similar enough */
+		case REORDER_BUFFER_CHANGE_INSERT:
+		case REORDER_BUFFER_CHANGE_UPDATE:
+		case REORDER_BUFFER_CHANGE_DELETE:
+		case REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT:
+			{
+				ReorderBufferTupleBuf *oldtup,
+						   *newtup;
+				Size		oldlen = 0;
+				Size		newlen = 0;
+
+				oldtup = change->data.tp.oldtuple;
+				newtup = change->data.tp.newtuple;
+
+				if (oldtup)
+				{
+					sz += sizeof(HeapTupleData);
+					oldlen = oldtup->tuple.t_len;
+					sz += oldlen;
+				}
+
+				if (newtup)
+				{
+					sz += sizeof(HeapTupleData);
+					newlen = newtup->tuple.t_len;
+					sz += newlen;
+				}
+
+				break;
+			}
+		case REORDER_BUFFER_CHANGE_MESSAGE:
+			{
+				Size		prefix_size = strlen(change->data.msg.prefix) + 1;
+
+				sz += prefix_size + change->data.msg.message_size +
+					sizeof(Size) + sizeof(Size);
+
+				break;
+			}
+		case REORDER_BUFFER_CHANGE_INTERNAL_SNAPSHOT:
+			{
+				Snapshot	snap;
+
+				snap = change->data.snapshot;
+
+				sz += sizeof(SnapshotData) +
+					sizeof(TransactionId) * snap->xcnt +
+					sizeof(TransactionId) * snap->subxcnt;
+
+				break;
+			}
+		case REORDER_BUFFER_CHANGE_TRUNCATE:
+			{
+				sz += sizeof(Oid) * change->data.truncate.nrelids;
+
+				break;
+			}
+		case REORDER_BUFFER_CHANGE_INTERNAL_SPEC_CONFIRM:
+		case REORDER_BUFFER_CHANGE_INTERNAL_COMMAND_ID:
+		case REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID:
+			/* ReorderBufferChange contains everything important */
+			break;
+	}
+
+	return sz;
+}
+
+
 /*
  * Restore a number of changes spilled to disk back into memory.
  */
@@ -2784,6 +3020,16 @@ ReorderBufferRestoreChange(ReorderBuffer *rb, ReorderBufferTXN *txn,
 
 	dlist_push_tail(&txn->changes, &change->node);
 	txn->nentries_mem++;
+
+	/*
+	 * Update memory accounting for the restored change.  We need to do this
+	 * although we don't check the memory limit when restoring the changes in
+	 * this branch (we only do that when initially queueing the changes after
+	 * decoding), because we will release the changes later, and that will
+	 * update the accounting too (subtracting the size from the counters). And
+	 * we don't want to underflow there.
+	 */
+	ReorderBufferChangeMemoryUpdate(rb, change, true);
 }
 
 /*
@@ -3003,6 +3249,19 @@ ReorderBufferToastAppendChunk(ReorderBuffer *rb, ReorderBufferTXN *txn,
  *
  * We cannot replace unchanged toast tuples though, so those will still point
  * to on-disk toast data.
+ *
+ * While updating the existing change with detoasted tuple data, we need to
+ * update the memory accounting info, because the change size will differ.
+ * Otherwise the accounting may get out of sync, triggering serialization
+ * at unexpected times.
+ *
+ * We simply subtract size of the change before rejiggering the tuple, and
+ * then adding the new size. This makes it look like the change was removed
+ * and then added back, except it only tweaks the accounting info.
+ *
+ * In particular it can't trigger serialization, which would be pointless
+ * anyway as it happens during commit processing right before handing
+ * the change to the output plugin.
  */
 static void
 ReorderBufferToastReplace(ReorderBuffer *rb, ReorderBufferTXN *txn,
@@ -3023,6 +3282,13 @@ ReorderBufferToastReplace(ReorderBuffer *rb, ReorderBufferTXN *txn,
 	if (txn->toast_hash == NULL)
 		return;
 
+	/*
+	 * We're going to modify the size of the change, so to make sure the
+	 * accounting is correct we'll make it look like we're removing the change
+	 * now (with the old size), and then re-add it at the end.
+	 */
+	ReorderBufferChangeMemoryUpdate(rb, change, false);
+
 	oldcontext = MemoryContextSwitchTo(rb->context);
 
 	/* we should only have toast tuples in an INSERT or UPDATE */
@@ -3172,6 +3438,9 @@ ReorderBufferToastReplace(ReorderBuffer *rb, ReorderBufferTXN *txn,
 	pfree(isnull);
 
 	MemoryContextSwitchTo(oldcontext);
+
+	/* now add the change back, with the correct size */
+	ReorderBufferChangeMemoryUpdate(rb, change, true);
 }
 
 /*
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 4b3769b8b05..ba4edde71a3 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -66,6 +66,7 @@
 #include "postmaster/syslogger.h"
 #include "postmaster/walwriter.h"
 #include "replication/logicallauncher.h"
+#include "replication/reorderbuffer.h"
 #include "replication/slot.h"
 #include "replication/syncrep.h"
 #include "replication/walreceiver.h"
@@ -2257,6 +2258,18 @@ static struct config_int ConfigureNamesInt[] =
 		NULL, NULL, NULL
 	},
 
+	{
+		{"logical_decoding_work_mem", PGC_USERSET, RESOURCES_MEM,
+			gettext_noop("Sets the maximum memory to be used for logical decoding."),
+			gettext_noop("This much memory can be used by each internal "
+						 "reorder buffer before spilling to disk."),
+			GUC_UNIT_KB
+		},
+		&logical_decoding_work_mem,
+		65536, 64, MAX_KILOBYTES,
+		NULL, NULL, NULL
+	},
+
 	/*
 	 * We use the hopefully-safely-small value of 100kB as the compiled-in
 	 * default for max_stack_depth.  InitializeGUCOptions will increase it if
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index be02a76d9d8..46a06ffacd4 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -130,6 +130,7 @@
 #work_mem = 4MB				# min 64kB
 #maintenance_work_mem = 64MB		# min 1MB
 #autovacuum_work_mem = -1		# min 1MB, or -1 to use maintenance_work_mem
+#logical_decoding_work_mem = 64MB	# min 64kB
 #max_stack_depth = 2MB			# min 100kB
 #shared_memory_type = mmap		# the default is the first option
 					# supported by the operating system:
diff --git a/src/include/replication/reorderbuffer.h b/src/include/replication/reorderbuffer.h
index 4c06a78c11f..7c94d920fe9 100644
--- a/src/include/replication/reorderbuffer.h
+++ b/src/include/replication/reorderbuffer.h
@@ -17,6 +17,8 @@
 #include "utils/snapshot.h"
 #include "utils/timestamp.h"
 
+extern PGDLLIMPORT int logical_decoding_work_mem;
+
 /* an individual tuple, stored in one chunk of memory */
 typedef struct ReorderBufferTupleBuf
 {
@@ -63,6 +65,9 @@ enum ReorderBufferChangeType
 	REORDER_BUFFER_CHANGE_TRUNCATE
 };
 
+/* forward declaration */
+struct ReorderBufferTXN;
+
 /*
  * a single 'change', can be an insert (with one tuple), an update (old, new),
  * or a delete (old).
@@ -77,6 +82,9 @@ typedef struct ReorderBufferChange
 	/* The type of change. */
 	enum ReorderBufferChangeType action;
 
+	/* Transaction this change belongs to. */
+	struct ReorderBufferTXN *txn;
+
 	RepOriginId origin_id;
 
 	/*
@@ -286,6 +294,11 @@ typedef struct ReorderBufferTXN
 	 */
 	dlist_node	node;
 
+	/*
+	 * Size of this transaction (changes currently in memory, in bytes).
+	 */
+	Size		size;
+
 } ReorderBufferTXN;
 
 /* so we can define the callbacks used inside struct ReorderBuffer itself */
@@ -386,6 +399,9 @@ struct ReorderBuffer
 	/* buffer for disk<->memory conversions */
 	char	   *outbuf;
 	Size		outbufsize;
+
+	/* memory accounting */
+	Size		size;
 };