1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-27 12:41:57 +03:00

Migrate logical slots to the new node during an upgrade.

While reading information from the old cluster, a list of logical
slots is fetched. At the later part of upgrading, pg_upgrade revisits the
list and restores slots by executing pg_create_logical_replication_slot()
on the new cluster. Migration of logical replication slots is only
supported when the old cluster is version 17.0 or later.

If the old node has invalid slots or slots with unconsumed WAL records,
the pg_upgrade fails. These checks are needed to prevent data loss.

The significant advantage of this commit is that it makes it easy to
continue logical replication even after upgrading the publisher node.
Previously, pg_upgrade allowed copying publications to a new node. With
this patch, adjusting the connection string to the new publisher will
cause the apply worker on the subscriber to connect to the new publisher
automatically. This enables seamless continuation of logical replication,
even after an upgrade.

Author: Hayato Kuroda, Hou Zhijie
Reviewed-by: Peter Smith, Bharath Rupireddy, Dilip Kumar, Vignesh C, Shlok Kyal
Discussion: http://postgr.es/m/TYAPR01MB58664C81887B3AF2EB6B16E3F5939@TYAPR01MB5866.jpnprd01.prod.outlook.com
Discussion: http://postgr.es/m/CAA4eK1+t7xYcfa0rEQw839=b2MzsfvYDPz3xbD+ZqOdP3zpKYg@mail.gmail.com
This commit is contained in:
Amit Kapila
2023-10-26 06:54:16 +05:30
parent bddc2f7480
commit 29d0a77fa6
18 changed files with 927 additions and 27 deletions

View File

@ -600,12 +600,8 @@ logicalmsg_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
ReorderBufferProcessXid(ctx->reorder, XLogRecGetXid(r), buf->origptr);
/*
* If we don't have snapshot or we are just fast-forwarding, there is no
* point in decoding messages.
*/
if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT ||
ctx->fast_forward)
/* If we don't have snapshot, there is no point in decoding messages */
if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT)
return;
message = (xl_logical_message *) XLogRecGetData(r);
@ -622,6 +618,26 @@ logicalmsg_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
SnapBuildXactNeedsSkip(builder, buf->origptr)))
return;
/*
* We also skip decoding in fast_forward mode. This check must be last
* because we don't want to set the processing_required flag unless we
* have a decodable message.
*/
if (ctx->fast_forward)
{
/*
* We need to set processing_required flag to notify the message's
* existence to the caller. Usually, the flag is set when either the
* COMMIT or ABORT records are decoded, but this must be turned on
* here because the non-transactional logical message is decoded
* without waiting for these records.
*/
if (!message->transactional)
ctx->processing_required = true;
return;
}
/*
* If this is a non-transactional change, get the snapshot we're expected
* to use. We only get here when the snapshot is consistent, and the
@ -1286,7 +1302,21 @@ static bool
DecodeTXNNeedSkip(LogicalDecodingContext *ctx, XLogRecordBuffer *buf,
Oid txn_dbid, RepOriginId origin_id)
{
return (SnapBuildXactNeedsSkip(ctx->snapshot_builder, buf->origptr) ||
(txn_dbid != InvalidOid && txn_dbid != ctx->slot->data.database) ||
ctx->fast_forward || FilterByOrigin(ctx, origin_id));
if (SnapBuildXactNeedsSkip(ctx->snapshot_builder, buf->origptr) ||
(txn_dbid != InvalidOid && txn_dbid != ctx->slot->data.database) ||
FilterByOrigin(ctx, origin_id))
return true;
/*
* We also skip decoding in fast_forward mode. In passing set the
* processing_required flag to indicate that if it were not for
* fast_forward mode, processing would have been required.
*/
if (ctx->fast_forward)
{
ctx->processing_required = true;
return true;
}
return false;
}

View File

@ -29,6 +29,7 @@
#include "postgres.h"
#include "access/xact.h"
#include "access/xlogutils.h"
#include "access/xlog_internal.h"
#include "fmgr.h"
#include "miscadmin.h"
@ -41,6 +42,7 @@
#include "storage/proc.h"
#include "storage/procarray.h"
#include "utils/builtins.h"
#include "utils/inval.h"
#include "utils/memutils.h"
/* data for errcontext callback */
@ -1949,3 +1951,76 @@ UpdateDecodingStats(LogicalDecodingContext *ctx)
rb->totalTxns = 0;
rb->totalBytes = 0;
}
/*
* Read up to the end of WAL starting from the decoding slot's restart_lsn.
* Return true if any meaningful/decodable WAL records are encountered,
* otherwise false.
*/
bool
LogicalReplicationSlotHasPendingWal(XLogRecPtr end_of_wal)
{
bool has_pending_wal = false;
Assert(MyReplicationSlot);
PG_TRY();
{
LogicalDecodingContext *ctx;
/*
* Create our decoding context in fast_forward mode, passing start_lsn
* as InvalidXLogRecPtr, so that we start processing from the slot's
* confirmed_flush.
*/
ctx = CreateDecodingContext(InvalidXLogRecPtr,
NIL,
true, /* fast_forward */
XL_ROUTINE(.page_read = read_local_xlog_page,
.segment_open = wal_segment_open,
.segment_close = wal_segment_close),
NULL, NULL, NULL);
/*
* Start reading at the slot's restart_lsn, which we know points to a
* valid record.
*/
XLogBeginRead(ctx->reader, MyReplicationSlot->data.restart_lsn);
/* Invalidate non-timetravel entries */
InvalidateSystemCaches();
/* Loop until the end of WAL or some changes are processed */
while (!has_pending_wal && ctx->reader->EndRecPtr < end_of_wal)
{
XLogRecord *record;
char *errm = NULL;
record = XLogReadRecord(ctx->reader, &errm);
if (errm)
elog(ERROR, "could not find record for logical decoding: %s", errm);
if (record != NULL)
LogicalDecodingProcessRecord(ctx, ctx->reader);
has_pending_wal = ctx->processing_required;
CHECK_FOR_INTERRUPTS();
}
/* Clean up */
FreeDecodingContext(ctx);
InvalidateSystemCaches();
}
PG_CATCH();
{
/* clear all timetravel entries */
InvalidateSystemCaches();
PG_RE_THROW();
}
PG_END_TRY();
return has_pending_wal;
}

View File

@ -1423,6 +1423,20 @@ InvalidatePossiblyObsoleteSlot(ReplicationSlotInvalidationCause cause,
SpinLockRelease(&s->mutex);
/*
* The logical replication slots shouldn't be invalidated as
* max_slot_wal_keep_size GUC is set to -1 during the upgrade.
*
* The following is just a sanity check.
*/
if (*invalidated && SlotIsLogical(s) && IsBinaryUpgrade)
{
ereport(ERROR,
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("replication slots must not be invalidated during the upgrade"),
errhint("\"max_slot_wal_keep_size\" must be set to -1 during the upgrade"));
}
if (active_pid != 0)
{
/*