diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c index caa6b297560..ed4c8b3ad55 100644 --- a/src/backend/replication/slot.c +++ b/src/backend/replication/slot.c @@ -702,8 +702,13 @@ ReplicationSlotDropPtr(ReplicationSlot *slot) slot->active_pid = 0; slot->in_use = false; LWLockRelease(ReplicationSlotControlLock); + + elog(DEBUG3, "replication slot drop: %s: marked as not in use", NameStr(slot->data.name)); + ConditionVariableBroadcast(&slot->active_cv); + elog(DEBUG3, "replication slot drop: %s: notified others", NameStr(slot->data.name)); + /* * Slot is dead and doesn't prevent resource removal anymore, recompute * limits. @@ -711,6 +716,8 @@ ReplicationSlotDropPtr(ReplicationSlot *slot) ReplicationSlotsComputeRequiredXmin(false); ReplicationSlotsComputeRequiredLSN(); + elog(DEBUG3, "replication slot drop: %s: computed required", NameStr(slot->data.name)); + /* * If removing the directory fails, the worst thing that will happen is * that the user won't be able to create a new slot with the same name @@ -720,6 +727,8 @@ ReplicationSlotDropPtr(ReplicationSlot *slot) ereport(WARNING, (errmsg("could not remove directory \"%s\"", tmppath))); + elog(DEBUG3, "replication slot drop: %s: removed directory", NameStr(slot->data.name)); + /* * Send a message to drop the replication slot to the stats collector. * Since there is no guarantee of the order of message transfer on a UDP diff --git a/src/test/recovery/t/019_replslot_limit.pl b/src/test/recovery/t/019_replslot_limit.pl index 77bb401bc5f..5654f3b5455 100644 --- a/src/test/recovery/t/019_replslot_limit.pl +++ b/src/test/recovery/t/019_replslot_limit.pl @@ -339,8 +339,8 @@ my $senderpid; # We've seen occasional cases where multiple walsender pids are active. It # could be that we're just observing process shutdown being slow. To collect # more information, retry a couple times, print a bit of debugging information -# each iteration. For now report a test failure even if later iterations -# succeed. +# each iteration. Don't fail the test if retries find just one pid, the +# buildfarm failures are too noisy. my $i = 0; while (1) { @@ -349,7 +349,9 @@ while (1) $senderpid = $node_primary3->safe_psql('postgres', "SELECT pid FROM pg_stat_activity WHERE backend_type = 'walsender'"); - last if like($senderpid, qr/^[0-9]+$/, "have walsender pid $senderpid"); + last if $senderpid =~ qr/^[0-9]+$/; + + diag "multiple walsenders active in iteration $i"; # show information about all active connections $node_primary3->psql('postgres', @@ -370,6 +372,8 @@ while (1) usleep(100_000); } +like($senderpid, qr/^[0-9]+$/, "have walsender pid $senderpid"); + my $receiverpid = $node_standby3->safe_psql('postgres', "SELECT pid FROM pg_stat_activity WHERE backend_type = 'walreceiver'"); like($receiverpid, qr/^[0-9]+$/, "have walreceiver pid $receiverpid");