From e9c8747ee934db9b5bad0b0f5aa3709a41ad0a9c Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Thu, 27 Jun 2024 21:06:32 +0300 Subject: [PATCH] Fix MVCC bug with prepared xact with subxacts on standby We did not recover the subtransaction IDs of prepared transactions when starting a hot standby from a shutdown checkpoint. As a result, such subtransactions were considered as aborted, rather than in-progress. That would lead to hint bits being set incorrectly, and the subtransactions suddenly becoming visible to old snapshots when the prepared transaction was committed. To fix, update pg_subtrans with prepared transactions's subxids when starting hot standby from a shutdown checkpoint. The snapshots taken from that state need to be marked as "suboverflowed", so that we also check the pg_subtrans. Backport to all supported versions. Discussion: https://www.postgresql.org/message-id/6b852e98-2d49-4ca1-9e95-db419a2696e0@iki.fi --- src/backend/access/transam/twophase.c | 7 ++-- src/backend/access/transam/xlog.c | 14 ++++---- src/backend/storage/ipc/procarray.c | 18 ++++++++-- src/backend/storage/ipc/standby.c | 6 ++-- src/include/storage/standby.h | 10 +++++- src/test/recovery/t/009_twophase.pl | 47 ++++++++++++++++++++++++++- src/tools/pgindent/typedefs.list | 1 + 7 files changed, 85 insertions(+), 18 deletions(-) diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c index f8272c2f0d0..0d57fb25c27 100644 --- a/src/backend/access/transam/twophase.c +++ b/src/backend/access/transam/twophase.c @@ -1997,9 +1997,8 @@ PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p) * This is never called at the end of recovery - we use * RecoverPreparedTransactions() at that point. * - * The lack of calls to SubTransSetParent() calls here is by design; - * those calls are made by RecoverPreparedTransactions() at the end of recovery - * for those xacts that need this. + * This updates pg_subtrans, so that any subtransactions will be correctly + * seen as in-progress in snapshots taken during recovery. */ void StandbyRecoverPreparedTransactions(void) @@ -2019,7 +2018,7 @@ StandbyRecoverPreparedTransactions(void) buf = ProcessTwoPhaseBuffer(xid, gxact->prepare_start_lsn, - gxact->ondisk, false, false); + gxact->ondisk, true, false); if (buf != NULL) pfree(buf); } diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index c15a720eb7a..746b0882bd5 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -7134,6 +7134,9 @@ StartupXLOG(void) RunningTransactionsData running; TransactionId latestCompletedXid; + /* Update pg_subtrans entries for any prepared transactions */ + StandbyRecoverPreparedTransactions(); + /* * Construct a RunningTransactions snapshot representing a * shut down server, with only prepared transactions still @@ -7142,7 +7145,7 @@ StartupXLOG(void) */ running.xcnt = nxids; running.subxcnt = 0; - running.subxid_overflow = false; + running.subxid_status = SUBXIDS_IN_SUBTRANS; running.nextXid = XidFromFullTransactionId(checkPoint.nextFullXid); running.oldestRunningXid = oldestActiveXID; latestCompletedXid = XidFromFullTransactionId(checkPoint.nextFullXid); @@ -7152,8 +7155,6 @@ StartupXLOG(void) running.xids = xids; ProcArrayApplyRecoveryInfo(&running); - - StandbyRecoverPreparedTransactions(); } } @@ -10217,6 +10218,9 @@ xlog_redo(XLogReaderState *record) oldestActiveXID = PrescanPreparedTransactions(&xids, &nxids); + /* Update pg_subtrans entries for any prepared transactions */ + StandbyRecoverPreparedTransactions(); + /* * Construct a RunningTransactions snapshot representing a shut * down server, with only prepared transactions still alive. We're @@ -10225,7 +10229,7 @@ xlog_redo(XLogReaderState *record) */ running.xcnt = nxids; running.subxcnt = 0; - running.subxid_overflow = false; + running.subxid_status = SUBXIDS_IN_SUBTRANS; running.nextXid = XidFromFullTransactionId(checkPoint.nextFullXid); running.oldestRunningXid = oldestActiveXID; latestCompletedXid = XidFromFullTransactionId(checkPoint.nextFullXid); @@ -10235,8 +10239,6 @@ xlog_redo(XLogReaderState *record) running.xids = xids; ProcArrayApplyRecoveryInfo(&running); - - StandbyRecoverPreparedTransactions(); } /* ControlFile->checkPointCopy always tracks the latest ckpt XID */ diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c index 7583e30e034..9dbf92360ec 100644 --- a/src/backend/storage/ipc/procarray.c +++ b/src/backend/storage/ipc/procarray.c @@ -734,7 +734,7 @@ ProcArrayApplyRecoveryInfo(RunningTransactions running) * If the snapshot isn't overflowed or if its empty we can reset our * pending state and use this snapshot instead. */ - if (!running->subxid_overflow || running->xcnt == 0) + if (running->subxid_status != SUBXIDS_MISSING || running->xcnt == 0) { /* * If we have already collected known assigned xids, we need to @@ -886,7 +886,7 @@ ProcArrayApplyRecoveryInfo(RunningTransactions running) * missing, so conservatively assume the last one is latestObservedXid. * ---------- */ - if (running->subxid_overflow) + if (running->subxid_status == SUBXIDS_MISSING) { standbyState = STANDBY_SNAPSHOT_PENDING; @@ -898,6 +898,18 @@ ProcArrayApplyRecoveryInfo(RunningTransactions running) standbyState = STANDBY_SNAPSHOT_READY; standbySnapshotPendingXmin = InvalidTransactionId; + + /* + * If the 'xids' array didn't include all subtransactions, we have to + * mark any snapshots taken as overflowed. + */ + if (running->subxid_status == SUBXIDS_IN_SUBTRANS) + procArray->lastOverflowedXid = latestObservedXid; + else + { + Assert(running->subxid_status == SUBXIDS_IN_ARRAY); + procArray->lastOverflowedXid = InvalidTransactionId; + } } /* @@ -2129,7 +2141,7 @@ GetRunningTransactionData(void) CurrentRunningXacts->xcnt = count - subcount; CurrentRunningXacts->subxcnt = subcount; - CurrentRunningXacts->subxid_overflow = suboverflowed; + CurrentRunningXacts->subxid_status = suboverflowed ? SUBXIDS_IN_SUBTRANS : SUBXIDS_IN_ARRAY; CurrentRunningXacts->nextXid = XidFromFullTransactionId(ShmemVariableCache->nextFullXid); CurrentRunningXacts->oldestRunningXid = oldestRunningXid; CurrentRunningXacts->latestCompletedXid = latestCompletedXid; diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c index 146b80db110..7b9bba290ac 100644 --- a/src/backend/storage/ipc/standby.c +++ b/src/backend/storage/ipc/standby.c @@ -937,7 +937,7 @@ standby_redo(XLogReaderState *record) running.xcnt = xlrec->xcnt; running.subxcnt = xlrec->subxcnt; - running.subxid_overflow = xlrec->subxid_overflow; + running.subxid_status = xlrec->subxid_overflow ? SUBXIDS_MISSING : SUBXIDS_IN_ARRAY; running.nextXid = xlrec->nextXid; running.latestCompletedXid = xlrec->latestCompletedXid; running.oldestRunningXid = xlrec->oldestRunningXid; @@ -1093,7 +1093,7 @@ LogCurrentRunningXacts(RunningTransactions CurrRunningXacts) xlrec.xcnt = CurrRunningXacts->xcnt; xlrec.subxcnt = CurrRunningXacts->subxcnt; - xlrec.subxid_overflow = CurrRunningXacts->subxid_overflow; + xlrec.subxid_overflow = (CurrRunningXacts->subxid_status != SUBXIDS_IN_ARRAY); xlrec.nextXid = CurrRunningXacts->nextXid; xlrec.oldestRunningXid = CurrRunningXacts->oldestRunningXid; xlrec.latestCompletedXid = CurrRunningXacts->latestCompletedXid; @@ -1110,7 +1110,7 @@ LogCurrentRunningXacts(RunningTransactions CurrRunningXacts) recptr = XLogInsert(RM_STANDBY_ID, XLOG_RUNNING_XACTS); - if (CurrRunningXacts->subxid_overflow) + if (xlrec.subxid_overflow) elog(trace_recovery(DEBUG2), "snapshot of %u running transactions overflowed (lsn %X/%X oldest xid %u latest complete %u next xid %u)", CurrRunningXacts->xcnt, diff --git a/src/include/storage/standby.h b/src/include/storage/standby.h index cfbe426e5ae..317fd9f6b84 100644 --- a/src/include/storage/standby.h +++ b/src/include/storage/standby.h @@ -67,11 +67,19 @@ extern void StandbyReleaseOldLocks(TransactionId oldxid); * almost immediately see the data we need to begin executing queries. */ +typedef enum +{ + SUBXIDS_IN_ARRAY, /* xids array includes all running subxids */ + SUBXIDS_MISSING, /* snapshot overflowed, subxids are missing */ + SUBXIDS_IN_SUBTRANS, /* subxids are not included in 'xids', but + * pg_subtrans is fully up-to-date */ +} subxids_array_status; + typedef struct RunningTransactionsData { int xcnt; /* # of xact ids in xids[] */ int subxcnt; /* # of subxact ids in xids[] */ - bool subxid_overflow; /* snapshot overflowed, subxids missing */ + subxids_array_status subxid_status; TransactionId nextXid; /* xid from ShmemVariableCache->nextFullXid */ TransactionId oldestRunningXid; /* *not* oldestXmin */ TransactionId latestCompletedXid; /* so we can set xmax */ diff --git a/src/test/recovery/t/009_twophase.pl b/src/test/recovery/t/009_twophase.pl index 4a79c5ebfdb..b280add10ef 100644 --- a/src/test/recovery/t/009_twophase.pl +++ b/src/test/recovery/t/009_twophase.pl @@ -4,7 +4,7 @@ use warnings; use PostgresNode; use TestLib; -use Test::More tests => 24; +use Test::More tests => 27; my $psql_out = ''; my $psql_rc = ''; @@ -305,6 +305,51 @@ $cur_standby->start; $cur_master->psql('postgres', "COMMIT PREPARED 'xact_009_12'"); +############################################################################### +# Check visibility of prepared transactions in standby after a restart while +# primary is down. +############################################################################### + +$cur_master->psql( + 'postgres', " + CREATE TABLE t_009_tbl_standby_mvcc (id int, msg text); + BEGIN; + INSERT INTO t_009_tbl_standby_mvcc VALUES (1, 'issued to ${cur_master_name}'); + SAVEPOINT s1; + INSERT INTO t_009_tbl_standby_mvcc VALUES (2, 'issued to ${cur_master_name}'); + PREPARE TRANSACTION 'xact_009_standby_mvcc'; + "); +$cur_master->stop; +$cur_standby->restart; + +# Acquire a snapshot in standby, before we commit the prepared transaction +my $standby_session = $cur_standby->background_psql('postgres', on_error_die => 1); +$standby_session->query_safe("BEGIN ISOLATION LEVEL REPEATABLE READ"); +$psql_out = $standby_session->query_safe( + "SELECT count(*) FROM t_009_tbl_standby_mvcc"); +is($psql_out, '0', + "Prepared transaction not visible in standby before commit"); + +# Commit the transaction in primary +$cur_master->start; +$cur_master->psql('postgres', " +SET synchronous_commit='remote_apply'; -- To ensure the standby is caught up +COMMIT PREPARED 'xact_009_standby_mvcc' +"); + +# Still not visible to the old snapshot +$psql_out = $standby_session->query_safe( + "SELECT count(*) FROM t_009_tbl_standby_mvcc"); +is($psql_out, '0', + "Committed prepared transaction not visible to old snapshot in standby"); + +# Is visible to a new snapshot +$standby_session->query_safe("COMMIT"); +$psql_out = $standby_session->query_safe( + "SELECT count(*) FROM t_009_tbl_standby_mvcc"); +is($psql_out, '2', + "Committed prepared transaction is visible to new snapshot in standby"); + ############################################################################### # Check for a lock conflict between prepared transaction with DDL inside and # replay of XLOG_STANDBY_LOCK wal record. diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 50d026a09ef..c5447351981 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -3373,6 +3373,7 @@ string substitute_actual_parameters_context substitute_actual_srf_parameters_context substitute_phv_relids_context +subxids_array_status svtype symbol tablespaceinfo