From 6d0eb38557155855539cd007f04736dc3b2ba16f Mon Sep 17 00:00:00 2001 From: Amit Kapila Date: Fri, 21 May 2021 07:54:27 +0530 Subject: [PATCH] Fix deadlock for multiple replicating truncates of the same table. While applying the truncate change, the logical apply worker acquires RowExclusiveLock on the relation being truncated. This allowed truncate on the relation at a time by two apply workers which lead to a deadlock. The reason was that one of the workers after updating the pg_class tuple tries to acquire SHARE lock on the relation and started to wait for the second worker which has acquired RowExclusiveLock on the relation. And when the second worker tries to update the pg_class tuple, it starts to wait for the first worker which leads to a deadlock. Fix it by acquiring AccessExclusiveLock on the relation before applying the truncate change as we do for normal truncate operation. Author: Peter Smith, test case by Haiying Tang Reviewed-by: Dilip Kumar, Amit Kapila Backpatch-through: 11 Discussion: https://postgr.es/m/CAHut+PsNm43p0jM+idTvWwiGZPcP0hGrHMPK9TOAkc+a4UpUqw@mail.gmail.com --- src/backend/replication/logical/worker.c | 9 ++-- src/test/subscription/t/010_truncate.pl | 57 +++++++++++++++++++++++- 2 files changed, 61 insertions(+), 5 deletions(-) diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c index 1432554d5a7..60bf7f7ee9c 100644 --- a/src/backend/replication/logical/worker.c +++ b/src/backend/replication/logical/worker.c @@ -1818,6 +1818,7 @@ apply_handle_truncate(StringInfo s) List *relids = NIL; List *relids_logged = NIL; ListCell *lc; + LOCKMODE lockmode = AccessExclusiveLock; if (handle_streamed_transaction(LOGICAL_REP_MSG_TRUNCATE, s)) return; @@ -1831,14 +1832,14 @@ apply_handle_truncate(StringInfo s) LogicalRepRelId relid = lfirst_oid(lc); LogicalRepRelMapEntry *rel; - rel = logicalrep_rel_open(relid, RowExclusiveLock); + rel = logicalrep_rel_open(relid, lockmode); if (!should_apply_changes_for_rel(rel)) { /* * The relation can't become interesting in the middle of the * transaction so it's safe to unlock it. */ - logicalrep_rel_close(rel, RowExclusiveLock); + logicalrep_rel_close(rel, lockmode); continue; } @@ -1856,7 +1857,7 @@ apply_handle_truncate(StringInfo s) { ListCell *child; List *children = find_all_inheritors(rel->localreloid, - RowExclusiveLock, + lockmode, NULL); foreach(child, children) @@ -1876,7 +1877,7 @@ apply_handle_truncate(StringInfo s) */ if (RELATION_IS_OTHER_TEMP(childrel)) { - table_close(childrel, RowExclusiveLock); + table_close(childrel, lockmode); continue; } diff --git a/src/test/subscription/t/010_truncate.pl b/src/test/subscription/t/010_truncate.pl index 2d49f2537b8..065f5b0a3c6 100644 --- a/src/test/subscription/t/010_truncate.pl +++ b/src/test/subscription/t/010_truncate.pl @@ -6,7 +6,7 @@ use strict; use warnings; use PostgresNode; use TestLib; -use Test::More tests => 11; +use Test::More tests => 14; # setup @@ -16,6 +16,8 @@ $node_publisher->start; my $node_subscriber = get_new_node('subscriber'); $node_subscriber->init(allows_streaming => 'logical'); +$node_subscriber->append_conf('postgresql.conf', + qq(max_logical_replication_workers = 6)); $node_subscriber->start; my $publisher_connstr = $node_publisher->connstr . ' dbname=postgres'; @@ -187,3 +189,56 @@ $result = $node_subscriber->safe_psql('postgres', "SELECT count(*), min(a), max(a) FROM tab1"); is($result, qq(0||), 'truncate replicated in synchronous logical replication'); + +$node_publisher->safe_psql('postgres', + "ALTER SYSTEM RESET synchronous_standby_names"); +$node_publisher->safe_psql('postgres', "SELECT pg_reload_conf()"); + +# test that truncate works for logical replication when there are multiple +# subscriptions for a single table + +$node_publisher->safe_psql('postgres', + "CREATE TABLE tab5 (a int)"); + +$node_subscriber->safe_psql('postgres', + "CREATE TABLE tab5 (a int)"); + +$node_publisher->safe_psql('postgres', + "CREATE PUBLICATION pub5 FOR TABLE tab5"); +$node_subscriber->safe_psql('postgres', + "CREATE SUBSCRIPTION sub5_1 CONNECTION '$publisher_connstr' PUBLICATION pub5" +); +$node_subscriber->safe_psql('postgres', + "CREATE SUBSCRIPTION sub5_2 CONNECTION '$publisher_connstr' PUBLICATION pub5" +); + +# wait for initial data sync +$node_subscriber->poll_query_until('postgres', $synced_query) + or die "Timed out while waiting for subscriber to synchronize data"; + +# insert data to truncate + +$node_publisher->safe_psql('postgres', + "INSERT INTO tab5 VALUES (1), (2), (3)"); + +$node_publisher->wait_for_catchup('sub5_1'); +$node_publisher->wait_for_catchup('sub5_2'); + +$result = $node_subscriber->safe_psql('postgres', + "SELECT count(*), min(a), max(a) FROM tab5"); +is($result, qq(6|1|3), 'insert replicated for multiple subscriptions'); + +$node_publisher->safe_psql('postgres', "TRUNCATE tab5"); + +$node_publisher->wait_for_catchup('sub5_1'); +$node_publisher->wait_for_catchup('sub5_2'); + +$result = $node_subscriber->safe_psql('postgres', + "SELECT count(*), min(a), max(a) FROM tab5"); +is($result, qq(0||), + 'truncate replicated for multiple subscriptions'); + +# check deadlocks +$result = $node_subscriber->safe_psql('postgres', + "SELECT deadlocks FROM pg_stat_database WHERE datname='postgres'"); +is($result, qq(0), 'no deadlocks detected');