1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-10 17:42:29 +03:00

Ensure that the sync slots reach a consistent state after promotion without losing data.

We were directly copying the LSN locations while syncing the slots on the
standby. Now, it is possible that at some particular restart_lsn there are
some running xacts, which means if we start reading the WAL from that
location after promotion, we won't reach a consistent snapshot state at
that point. However, on the primary, we would have already been in a
consistent snapshot state at that restart_lsn so we would have just
serialized the existing snapshot.

To avoid this problem we will use the advance_slot functionality unless
the snapshot already exists at the synced restart_lsn location. This will
help us to ensure that snapbuilder/slot statuses are updated properly
without generating any changes. Note that the synced slot will remain as
RS_TEMPORARY till the decoding from corresponding restart_lsn can reach a
consistent snapshot state after which they will be marked as
RS_PERSISTENT.

Per buildfarm

Author: Hou Zhijie
Reviewed-by: Bertrand Drouvot, Shveta Malik, Bharath Rupireddy, Amit Kapila
Discussion: https://postgr.es/m/OS0PR01MB5716B3942AE49F3F725ACA92943B2@OS0PR01MB5716.jpnprd01.prod.outlook.com
This commit is contained in:
Amit Kapila
2024-04-03 14:04:59 +05:30
parent e37662f221
commit 2ec005b4e2
7 changed files with 350 additions and 170 deletions

View File

@@ -492,125 +492,13 @@ pg_physical_replication_slot_advance(XLogRecPtr moveto)
}
/*
* Helper function for advancing our logical replication slot forward.
*
* The slot's restart_lsn is used as start point for reading records, while
* confirmed_flush is used as base point for the decoding context.
*
* We cannot just do LogicalConfirmReceivedLocation to update confirmed_flush,
* because we need to digest WAL to advance restart_lsn allowing to recycle
* WAL and removal of old catalog tuples. As decoding is done in fast_forward
* mode, no changes are generated anyway.
* Advance our logical replication slot forward. See
* LogicalSlotAdvanceAndCheckSnapState for details.
*/
static XLogRecPtr
pg_logical_replication_slot_advance(XLogRecPtr moveto)
{
LogicalDecodingContext *ctx;
ResourceOwner old_resowner = CurrentResourceOwner;
XLogRecPtr retlsn;
Assert(moveto != InvalidXLogRecPtr);
PG_TRY();
{
/*
* Create our decoding context in fast_forward mode, passing start_lsn
* as InvalidXLogRecPtr, so that we start processing from my slot's
* confirmed_flush.
*/
ctx = CreateDecodingContext(InvalidXLogRecPtr,
NIL,
true, /* fast_forward */
XL_ROUTINE(.page_read = read_local_xlog_page,
.segment_open = wal_segment_open,
.segment_close = wal_segment_close),
NULL, NULL, NULL);
/*
* Wait for specified streaming replication standby servers (if any)
* to confirm receipt of WAL up to moveto lsn.
*/
WaitForStandbyConfirmation(moveto);
/*
* Start reading at the slot's restart_lsn, which we know to point to
* a valid record.
*/
XLogBeginRead(ctx->reader, MyReplicationSlot->data.restart_lsn);
/* invalidate non-timetravel entries */
InvalidateSystemCaches();
/* Decode records until we reach the requested target */
while (ctx->reader->EndRecPtr < moveto)
{
char *errm = NULL;
XLogRecord *record;
/*
* Read records. No changes are generated in fast_forward mode,
* but snapbuilder/slot statuses are updated properly.
*/
record = XLogReadRecord(ctx->reader, &errm);
if (errm)
elog(ERROR, "could not find record while advancing replication slot: %s",
errm);
/*
* Process the record. Storage-level changes are ignored in
* fast_forward mode, but other modules (such as snapbuilder)
* might still have critical updates to do.
*/
if (record)
LogicalDecodingProcessRecord(ctx, ctx->reader);
CHECK_FOR_INTERRUPTS();
}
/*
* Logical decoding could have clobbered CurrentResourceOwner during
* transaction management, so restore the executor's value. (This is
* a kluge, but it's not worth cleaning up right now.)
*/
CurrentResourceOwner = old_resowner;
if (ctx->reader->EndRecPtr != InvalidXLogRecPtr)
{
LogicalConfirmReceivedLocation(moveto);
/*
* If only the confirmed_flush LSN has changed the slot won't get
* marked as dirty by the above. Callers on the walsender
* interface are expected to keep track of their own progress and
* don't need it written out. But SQL-interface users cannot
* specify their own start positions and it's harder for them to
* keep track of their progress, so we should make more of an
* effort to save it for them.
*
* Dirty the slot so it is written out at the next checkpoint. The
* LSN position advanced to may still be lost on a crash but this
* makes the data consistent after a clean shutdown.
*/
ReplicationSlotMarkDirty();
}
retlsn = MyReplicationSlot->data.confirmed_flush;
/* free context, call shutdown callback */
FreeDecodingContext(ctx);
InvalidateSystemCaches();
}
PG_CATCH();
{
/* clear all timetravel entries */
InvalidateSystemCaches();
PG_RE_THROW();
}
PG_END_TRY();
return retlsn;
return LogicalSlotAdvanceAndCheckSnapState(moveto, NULL);
}
/*