mirror of
https://github.com/postgres/postgres.git
synced 2025-11-09 06:21:09 +03:00
Add further debug info to help debug 019_replslot_limit.pl failures.
See also afdeff1052. Failures after that commit provided a few more hints,
but not yet enough to understand what's going on.
In 019_replslot_limit.pl shut down nodes with fast instead of immediate mode
if we observe the failure mode. That should tell us whether the failures we're
observing are just a timing issue under high load. PGCTLTIMEOUT should prevent
buildfarm animals from hanging endlessly.
Also adds a bit more logging to replication slot drop and ShutdownPostgres().
Discussion: https://postgr.es/m/20220225192941.hqnvefgdzaro6gzg@alap3.anarazel.de
This commit is contained in:
@@ -569,6 +569,10 @@ restart:
|
||||
if (!s->in_use)
|
||||
continue;
|
||||
|
||||
/* unlocked read of active_pid is ok for debugging purposes */
|
||||
elog(DEBUG3, "temporary replication slot cleanup: %d in use, active_pid: %d",
|
||||
i, s->active_pid);
|
||||
|
||||
SpinLockAcquire(&s->mutex);
|
||||
if (s->active_pid == MyProcPid)
|
||||
{
|
||||
@@ -629,6 +633,9 @@ ReplicationSlotDropPtr(ReplicationSlot *slot)
|
||||
char path[MAXPGPATH];
|
||||
char tmppath[MAXPGPATH];
|
||||
|
||||
/* temp debugging aid to analyze 019_replslot_limit failures */
|
||||
elog(DEBUG3, "replication slot drop: %s: begin", NameStr(slot->data.name));
|
||||
|
||||
/*
|
||||
* If some other backend ran this code concurrently with us, we might try
|
||||
* to delete a slot with a certain name while someone else was trying to
|
||||
@@ -679,6 +686,9 @@ ReplicationSlotDropPtr(ReplicationSlot *slot)
|
||||
path, tmppath)));
|
||||
}
|
||||
|
||||
elog(DEBUG3, "replication slot drop: %s: removed on-disk",
|
||||
NameStr(slot->data.name));
|
||||
|
||||
/*
|
||||
* The slot is definitely gone. Lock out concurrent scans of the array
|
||||
* long enough to kill it. It's OK to clear the active PID here without
|
||||
@@ -734,6 +744,9 @@ ReplicationSlotDropPtr(ReplicationSlot *slot)
|
||||
* a slot while we're still cleaning up the detritus of the old one.
|
||||
*/
|
||||
LWLockRelease(ReplicationSlotAllocationLock);
|
||||
|
||||
elog(DEBUG3, "replication slot drop: %s: done",
|
||||
NameStr(slot->data.name));
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
Reference in New Issue
Block a user