mirror of
https://github.com/postgres/postgres.git
synced 2025-06-16 06:01:02 +03:00
Log long wait time on recovery conflict when it's resolved.
This is a follow-up of the work done in commit 0650ff2303
. This commit
extends log_recovery_conflict_waits so that a log message is produced
also when recovery conflict has already been resolved after deadlock_timeout
passes, i.e., when the startup process finishes waiting for recovery
conflict after deadlock_timeout. This is useful in investigating how long
recovery conflicts prevented the recovery from applying WAL.
Author: Fujii Masao
Reviewed-by: Kyotaro Horiguchi, Bertrand Drouvot
Discussion: https://postgr.es/m/9a60178c-a853-1440-2cdc-c3af916cff59@amazon.com
This commit is contained in:
@ -6961,7 +6961,7 @@ log_line_prefix = '%m [%p] %q%u@%d/%a '
|
|||||||
<listitem>
|
<listitem>
|
||||||
<para>
|
<para>
|
||||||
Controls whether a log message is produced when the startup process
|
Controls whether a log message is produced when the startup process
|
||||||
is waiting longer than <varname>deadlock_timeout</varname>
|
waits longer than <varname>deadlock_timeout</varname>
|
||||||
for recovery conflicts. This is useful in determining if recovery
|
for recovery conflicts. This is useful in determining if recovery
|
||||||
conflicts prevent the recovery from applying WAL.
|
conflicts prevent the recovery from applying WAL.
|
||||||
</para>
|
</para>
|
||||||
|
@ -4034,6 +4034,16 @@ LockBufferForCleanup(Buffer buffer)
|
|||||||
/* Successfully acquired exclusive lock with pincount 1 */
|
/* Successfully acquired exclusive lock with pincount 1 */
|
||||||
UnlockBufHdr(bufHdr, buf_state);
|
UnlockBufHdr(bufHdr, buf_state);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Emit the log message if recovery conflict on buffer pin was
|
||||||
|
* resolved but the startup process waited longer than
|
||||||
|
* deadlock_timeout for it.
|
||||||
|
*/
|
||||||
|
if (logged_recovery_conflict)
|
||||||
|
LogRecoveryConflict(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN,
|
||||||
|
waitStart, GetCurrentTimestamp(),
|
||||||
|
NULL, false);
|
||||||
|
|
||||||
/* Report change to non-waiting status */
|
/* Report change to non-waiting status */
|
||||||
if (new_status)
|
if (new_status)
|
||||||
{
|
{
|
||||||
@ -4088,7 +4098,7 @@ LockBufferForCleanup(Buffer buffer)
|
|||||||
DeadlockTimeout))
|
DeadlockTimeout))
|
||||||
{
|
{
|
||||||
LogRecoveryConflict(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN,
|
LogRecoveryConflict(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN,
|
||||||
waitStart, now, NULL);
|
waitStart, now, NULL, true);
|
||||||
logged_recovery_conflict = true;
|
logged_recovery_conflict = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -226,11 +226,14 @@ WaitExceedsMaxStandbyDelay(uint32 wait_event_info)
|
|||||||
* wait_start is the timestamp when the caller started to wait.
|
* wait_start is the timestamp when the caller started to wait.
|
||||||
* now is the timestamp when this function has been called.
|
* now is the timestamp when this function has been called.
|
||||||
* wait_list is the list of virtual transaction ids assigned to
|
* wait_list is the list of virtual transaction ids assigned to
|
||||||
* conflicting processes.
|
* conflicting processes. still_waiting indicates whether
|
||||||
|
* the startup process is still waiting for the recovery conflict
|
||||||
|
* to be resolved or not.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start,
|
LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start,
|
||||||
TimestampTz now, VirtualTransactionId *wait_list)
|
TimestampTz now, VirtualTransactionId *wait_list,
|
||||||
|
bool still_waiting)
|
||||||
{
|
{
|
||||||
long secs;
|
long secs;
|
||||||
int usecs;
|
int usecs;
|
||||||
@ -238,6 +241,12 @@ LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start,
|
|||||||
StringInfoData buf;
|
StringInfoData buf;
|
||||||
int nprocs = 0;
|
int nprocs = 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* There must be no conflicting processes when the recovery conflict has
|
||||||
|
* already been resolved.
|
||||||
|
*/
|
||||||
|
Assert(still_waiting || wait_list == NULL);
|
||||||
|
|
||||||
TimestampDifference(wait_start, now, &secs, &usecs);
|
TimestampDifference(wait_start, now, &secs, &usecs);
|
||||||
msecs = secs * 1000 + usecs / 1000;
|
msecs = secs * 1000 + usecs / 1000;
|
||||||
usecs = usecs % 1000;
|
usecs = usecs % 1000;
|
||||||
@ -275,12 +284,21 @@ LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start,
|
|||||||
* conflicting backends in a detail message. Note that if all the backends
|
* conflicting backends in a detail message. Note that if all the backends
|
||||||
* in the list are not active, no detail message is logged.
|
* in the list are not active, no detail message is logged.
|
||||||
*/
|
*/
|
||||||
|
if (still_waiting)
|
||||||
|
{
|
||||||
ereport(LOG,
|
ereport(LOG,
|
||||||
errmsg("recovery still waiting after %ld.%03d ms: %s",
|
errmsg("recovery still waiting after %ld.%03d ms: %s",
|
||||||
msecs, usecs, _(get_recovery_conflict_desc(reason))),
|
msecs, usecs, _(get_recovery_conflict_desc(reason))),
|
||||||
nprocs > 0 ? errdetail_log_plural("Conflicting process: %s.",
|
nprocs > 0 ? errdetail_log_plural("Conflicting process: %s.",
|
||||||
"Conflicting processes: %s.",
|
"Conflicting processes: %s.",
|
||||||
nprocs, buf.data) : 0);
|
nprocs, buf.data) : 0);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ereport(LOG,
|
||||||
|
errmsg("recovery finished waiting after %ld.%03d ms: %s",
|
||||||
|
msecs, usecs, _(get_recovery_conflict_desc(reason))));
|
||||||
|
}
|
||||||
|
|
||||||
if (nprocs > 0)
|
if (nprocs > 0)
|
||||||
pfree(buf.data);
|
pfree(buf.data);
|
||||||
@ -375,13 +393,12 @@ ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Emit the log message if the startup process is waiting
|
* Emit the log message if the startup process is waiting
|
||||||
* longer than deadlock_timeout for recovery conflict on
|
* longer than deadlock_timeout for recovery conflict.
|
||||||
* buffer pin.
|
|
||||||
*/
|
*/
|
||||||
if (maybe_log_conflict &&
|
if (maybe_log_conflict &&
|
||||||
TimestampDifferenceExceeds(waitStart, now, DeadlockTimeout))
|
TimestampDifferenceExceeds(waitStart, now, DeadlockTimeout))
|
||||||
{
|
{
|
||||||
LogRecoveryConflict(reason, waitStart, now, waitlist);
|
LogRecoveryConflict(reason, waitStart, now, waitlist, true);
|
||||||
logged_recovery_conflict = true;
|
logged_recovery_conflict = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -391,6 +408,14 @@ ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
|
|||||||
waitlist++;
|
waitlist++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Emit the log message if recovery conflict was resolved but the startup
|
||||||
|
* process waited longer than deadlock_timeout for it.
|
||||||
|
*/
|
||||||
|
if (logged_recovery_conflict)
|
||||||
|
LogRecoveryConflict(reason, waitStart, GetCurrentTimestamp(),
|
||||||
|
NULL, false);
|
||||||
|
|
||||||
/* Reset ps display if we changed it */
|
/* Reset ps display if we changed it */
|
||||||
if (new_status)
|
if (new_status)
|
||||||
{
|
{
|
||||||
|
@ -1322,7 +1322,8 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
|
|||||||
* longer than deadlock_timeout.
|
* longer than deadlock_timeout.
|
||||||
*/
|
*/
|
||||||
LogRecoveryConflict(PROCSIG_RECOVERY_CONFLICT_LOCK,
|
LogRecoveryConflict(PROCSIG_RECOVERY_CONFLICT_LOCK,
|
||||||
standbyWaitStart, now, cnt > 0 ? vxids : NULL);
|
standbyWaitStart, now,
|
||||||
|
cnt > 0 ? vxids : NULL, true);
|
||||||
logged_recovery_conflict = true;
|
logged_recovery_conflict = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1607,6 +1608,15 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
|
|||||||
disable_timeout(DEADLOCK_TIMEOUT, false);
|
disable_timeout(DEADLOCK_TIMEOUT, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Emit the log message if recovery conflict on lock was resolved but the
|
||||||
|
* startup process waited longer than deadlock_timeout for it.
|
||||||
|
*/
|
||||||
|
if (InHotStandby && logged_recovery_conflict)
|
||||||
|
LogRecoveryConflict(PROCSIG_RECOVERY_CONFLICT_LOCK,
|
||||||
|
standbyWaitStart, GetCurrentTimestamp(),
|
||||||
|
NULL, false);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Re-acquire the lock table's partition lock. We have to do this to hold
|
* Re-acquire the lock table's partition lock. We have to do this to hold
|
||||||
* off cancel/die interrupts before we can mess with lockAwaited (else we
|
* off cancel/die interrupts before we can mess with lockAwaited (else we
|
||||||
|
@ -40,7 +40,8 @@ extern void StandbyDeadLockHandler(void);
|
|||||||
extern void StandbyTimeoutHandler(void);
|
extern void StandbyTimeoutHandler(void);
|
||||||
extern void StandbyLockTimeoutHandler(void);
|
extern void StandbyLockTimeoutHandler(void);
|
||||||
extern void LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start,
|
extern void LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start,
|
||||||
TimestampTz cur_ts, VirtualTransactionId *wait_list);
|
TimestampTz cur_ts, VirtualTransactionId *wait_list,
|
||||||
|
bool still_waiting);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Standby Rmgr (RM_STANDBY_ID)
|
* Standby Rmgr (RM_STANDBY_ID)
|
||||||
|
Reference in New Issue
Block a user