1
0
mirror of https://github.com/postgres/postgres.git synced 2025-09-02 04:21:28 +03:00

Add GUC to log long wait times on recovery conflicts.

This commit adds GUC log_recovery_conflict_waits that controls whether
a log message is produced when the startup process is waiting longer than
deadlock_timeout for recovery conflicts. This is useful in determining
if recovery conflicts prevent the recovery from applying WAL.

Note that currently a log message is produced only when recovery conflict
has not been resolved yet even after deadlock_timeout passes, i.e.,
only when the startup process is still waiting for recovery conflict
even after deadlock_timeout.

Author: Bertrand Drouvot, Masahiko Sawada
Reviewed-by: Alvaro Herrera, Kyotaro Horiguchi, Fujii Masao
Discussion: https://postgr.es/m/9a60178c-a853-1440-2cdc-c3af916cff59@amazon.com
This commit is contained in:
Fujii Masao
2021-01-08 00:47:03 +09:00
parent f7a1a805cb
commit 0650ff2303
8 changed files with 292 additions and 30 deletions

View File

@@ -1064,8 +1064,10 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
LWLock *partitionLock = LockHashPartitionLock(hashcode);
PROC_QUEUE *waitQueue = &(lock->waitProcs);
LOCKMASK myHeldLocks = MyProc->heldLocks;
TimestampTz standbyWaitStart = 0;
bool early_deadlock = false;
bool allow_autovacuum_cancel = true;
bool logged_recovery_conflict = false;
ProcWaitStatus myWaitStatus;
PGPROC *proc;
PGPROC *leader = MyProc->lockGroupLeader;
@@ -1261,6 +1263,14 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
else
enable_timeout_after(DEADLOCK_TIMEOUT, DeadlockTimeout);
}
else if (log_recovery_conflict_waits)
{
/*
* Set the wait start timestamp if logging is enabled and in hot
* standby.
*/
standbyWaitStart = GetCurrentTimestamp();
}
/*
* If somebody wakes us between LWLockRelease and WaitLatch, the latch
@@ -1280,8 +1290,42 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
{
if (InHotStandby)
{
/* Set a timer and wait for that or for the Lock to be granted */
ResolveRecoveryConflictWithLock(locallock->tag.lock);
bool maybe_log_conflict =
(standbyWaitStart != 0 && !logged_recovery_conflict);
/* Set a timer and wait for that or for the lock to be granted */
ResolveRecoveryConflictWithLock(locallock->tag.lock,
maybe_log_conflict);
/*
* Emit the log message if the startup process is waiting longer
* than deadlock_timeout for recovery conflict on lock.
*/
if (maybe_log_conflict)
{
TimestampTz now = GetCurrentTimestamp();
if (TimestampDifferenceExceeds(standbyWaitStart, now,
DeadlockTimeout))
{
VirtualTransactionId *vxids;
int cnt;
vxids = GetLockConflicts(&locallock->tag.lock,
AccessExclusiveLock, &cnt);
/*
* Log the recovery conflict and the list of PIDs of
* backends holding the conflicting lock. Note that we do
* logging even if there are no such backends right now
* because the startup process here has already waited
* longer than deadlock_timeout.
*/
LogRecoveryConflict(PROCSIG_RECOVERY_CONFLICT_LOCK,
standbyWaitStart, now, cnt > 0 ? vxids : NULL);
logged_recovery_conflict = true;
}
}
}
else
{