1
0
mirror of https://github.com/postgres/postgres.git synced 2025-09-03 15:22:11 +03:00

Log long wait time on recovery conflict when it's resolved.

This is a follow-up of the work done in commit 0650ff2303. This commit
extends log_recovery_conflict_waits so that a log message is produced
also when recovery conflict has already been resolved after deadlock_timeout
passes, i.e., when the startup process finishes waiting for recovery
conflict after deadlock_timeout. This is useful in investigating how long
recovery conflicts prevented the recovery from applying WAL.

Author: Fujii Masao
Reviewed-by: Kyotaro Horiguchi, Bertrand Drouvot
Discussion: https://postgr.es/m/9a60178c-a853-1440-2cdc-c3af916cff59@amazon.com
This commit is contained in:
Fujii Masao
2021-01-13 22:59:17 +09:00
parent 6ecaaf810b
commit 39b03690b5
5 changed files with 61 additions and 15 deletions

View File

@@ -226,11 +226,14 @@ WaitExceedsMaxStandbyDelay(uint32 wait_event_info)
* wait_start is the timestamp when the caller started to wait.
* now is the timestamp when this function has been called.
* wait_list is the list of virtual transaction ids assigned to
* conflicting processes.
* conflicting processes. still_waiting indicates whether
* the startup process is still waiting for the recovery conflict
* to be resolved or not.
*/
void
LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start,
TimestampTz now, VirtualTransactionId *wait_list)
TimestampTz now, VirtualTransactionId *wait_list,
bool still_waiting)
{
long secs;
int usecs;
@@ -238,6 +241,12 @@ LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start,
StringInfoData buf;
int nprocs = 0;
/*
* There must be no conflicting processes when the recovery conflict has
* already been resolved.
*/
Assert(still_waiting || wait_list == NULL);
TimestampDifference(wait_start, now, &secs, &usecs);
msecs = secs * 1000 + usecs / 1000;
usecs = usecs % 1000;
@@ -275,12 +284,21 @@ LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start,
* conflicting backends in a detail message. Note that if all the backends
* in the list are not active, no detail message is logged.
*/
ereport(LOG,
errmsg("recovery still waiting after %ld.%03d ms: %s",
msecs, usecs, _(get_recovery_conflict_desc(reason))),
nprocs > 0 ? errdetail_log_plural("Conflicting process: %s.",
"Conflicting processes: %s.",
nprocs, buf.data) : 0);
if (still_waiting)
{
ereport(LOG,
errmsg("recovery still waiting after %ld.%03d ms: %s",
msecs, usecs, _(get_recovery_conflict_desc(reason))),
nprocs > 0 ? errdetail_log_plural("Conflicting process: %s.",
"Conflicting processes: %s.",
nprocs, buf.data) : 0);
}
else
{
ereport(LOG,
errmsg("recovery finished waiting after %ld.%03d ms: %s",
msecs, usecs, _(get_recovery_conflict_desc(reason))));
}
if (nprocs > 0)
pfree(buf.data);
@@ -375,13 +393,12 @@ ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
/*
* Emit the log message if the startup process is waiting
* longer than deadlock_timeout for recovery conflict on
* buffer pin.
* longer than deadlock_timeout for recovery conflict.
*/
if (maybe_log_conflict &&
TimestampDifferenceExceeds(waitStart, now, DeadlockTimeout))
{
LogRecoveryConflict(reason, waitStart, now, waitlist);
LogRecoveryConflict(reason, waitStart, now, waitlist, true);
logged_recovery_conflict = true;
}
}
@@ -391,6 +408,14 @@ ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
waitlist++;
}
/*
* Emit the log message if recovery conflict was resolved but the startup
* process waited longer than deadlock_timeout for it.
*/
if (logged_recovery_conflict)
LogRecoveryConflict(reason, waitStart, GetCurrentTimestamp(),
NULL, false);
/* Reset ps display if we changed it */
if (new_status)
{