1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-09 06:21:09 +03:00

Fix a deadlock during ALTER SUBSCRIPTION ... DROP PUBLICATION.

A deadlock can occur when the DDL command and the apply worker acquire
catalog locks in different orders while dropping replication origins.

The issue is rare in PG16 and higher branches because, in most cases, the
tablesync worker performs the origin drop in those branches, and its
locking sequence does not conflict with DDL operations.

This patch ensures consistent lock acquisition to prevent such deadlocks.

As per buildfarm.

Reported-by: Alexander Lakhin <exclusion@gmail.com>
Author: Ajin Cherian <itsajin@gmail.com>
Reviewed-by: Hayato Kuroda <kuroda.hayato@fujitsu.com>
Reviewed-by: vignesh C <vignesh21@gmail.com>
Reviewed-by: Amit Kapila <amit.kapila16@gmail.com>
Backpatch-through: 14, where it was introduced
Discussion: https://postgr.es/m/bab95e12-6cc5-4ebb-80a8-3e41956aa297@gmail.com
This commit is contained in:
Amit Kapila
2025-08-01 07:58:48 +00:00
parent ca09ef3a6a
commit 2ab2d6f970
3 changed files with 49 additions and 8 deletions

View File

@@ -316,7 +316,8 @@ process_syncing_tables_for_sync(XLogRecPtr current_lsn)
UpdateSubscriptionRelState(MyLogicalRepWorker->subid,
MyLogicalRepWorker->relid,
MyLogicalRepWorker->relstate,
MyLogicalRepWorker->relstate_lsn);
MyLogicalRepWorker->relstate_lsn,
false);
/*
* End streaming so that LogRepWorkerWalRcvConn can be used to drop
@@ -425,6 +426,7 @@ process_syncing_tables_for_apply(XLogRecPtr current_lsn)
ListCell *lc;
bool started_tx = false;
bool should_exit = false;
Relation rel = NULL;
Assert(!IsTransactionState());
@@ -492,7 +494,17 @@ process_syncing_tables_for_apply(XLogRecPtr current_lsn)
* worker to remove the origin tracking as if there is any
* error while dropping we won't restart it to drop the
* origin. So passing missing_ok = true.
*
* Lock the subscription and origin in the same order as we
* are doing during DDL commands to avoid deadlocks. See
* AlterSubscription_refresh.
*/
LockSharedObject(SubscriptionRelationId, MyLogicalRepWorker->subid,
0, AccessShareLock);
if (!rel)
rel = table_open(SubscriptionRelRelationId, RowExclusiveLock);
ReplicationOriginNameForLogicalRep(MyLogicalRepWorker->subid,
rstate->relid,
originname,
@@ -504,7 +516,7 @@ process_syncing_tables_for_apply(XLogRecPtr current_lsn)
*/
UpdateSubscriptionRelState(MyLogicalRepWorker->subid,
rstate->relid, rstate->state,
rstate->lsn);
rstate->lsn, true);
}
}
else
@@ -555,7 +567,14 @@ process_syncing_tables_for_apply(XLogRecPtr current_lsn)
* This is required to avoid any undetected deadlocks
* due to any existing lock as deadlock detector won't
* be able to detect the waits on the latch.
*
* Also close any tables prior to the commit.
*/
if (rel)
{
table_close(rel, NoLock);
rel = NULL;
}
CommitTransactionCommand();
pgstat_report_stat(false);
}
@@ -623,6 +642,11 @@ process_syncing_tables_for_apply(XLogRecPtr current_lsn)
}
}
/* Close table if opened */
if (rel)
table_close(rel, NoLock);
if (started_tx)
{
/*
@@ -1414,7 +1438,8 @@ LogicalRepSyncTableStart(XLogRecPtr *origin_startpos)
UpdateSubscriptionRelState(MyLogicalRepWorker->subid,
MyLogicalRepWorker->relid,
MyLogicalRepWorker->relstate,
MyLogicalRepWorker->relstate_lsn);
MyLogicalRepWorker->relstate_lsn,
false);
CommitTransactionCommand();
pgstat_report_stat(true);
@@ -1547,7 +1572,8 @@ LogicalRepSyncTableStart(XLogRecPtr *origin_startpos)
UpdateSubscriptionRelState(MyLogicalRepWorker->subid,
MyLogicalRepWorker->relid,
SUBREL_STATE_FINISHEDCOPY,
MyLogicalRepWorker->relstate_lsn);
MyLogicalRepWorker->relstate_lsn,
false);
CommitTransactionCommand();