1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-15 19:21:59 +03:00

Code review for log_lock_waits patch. Don't try to issue log messages from

within a signal handler (this might be safe given the relatively narrow code
range in which the interrupt is enabled, but it seems awfully risky); do issue
more informative log messages that tell what is being waited for and the exact
length of the wait; minor other code cleanup.  Greg Stark and Tom Lane
This commit is contained in:
Tom Lane
2007-06-19 20:13:22 +00:00
parent 4c310eca2e
commit 6e07228728
7 changed files with 229 additions and 155 deletions

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/storage/lmgr/proc.c,v 1.188 2007/04/16 18:29:53 alvherre Exp $
* $PostgreSQL: pgsql/src/backend/storage/lmgr/proc.c,v 1.189 2007/06/19 20:13:21 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -40,6 +40,7 @@
#include "miscadmin.h"
#include "postmaster/autovacuum.h"
#include "storage/ipc.h"
#include "storage/lmgr.h"
#include "storage/proc.h"
#include "storage/procarray.h"
#include "storage/spin.h"
@ -72,8 +73,12 @@ static LOCALLOCK *lockAwaited = NULL;
/* Mark these volatile because they can be changed by signal handler */
static volatile bool statement_timeout_active = false;
static volatile bool deadlock_timeout_active = false;
static volatile DeadLockState deadlock_state = DS_NOT_YET_CHECKED;
volatile bool cancel_from_timeout = false;
/* timeout_start_time is set when log_lock_waits is true */
static TimestampTz timeout_start_time;
/* statement_fin_time is valid only if statement_timeout_active is true */
static TimestampTz statement_fin_time;
@ -837,6 +842,9 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
*/
LWLockRelease(partitionLock);
/* Reset deadlock_state before enabling the signal handler */
deadlock_state = DS_NOT_YET_CHECKED;
/*
* Set timer so we can wake up after awhile and check for a deadlock. If a
* deadlock is detected, the handler releases the process's semaphore and
@ -869,6 +877,60 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
do
{
PGSemaphoreLock(&MyProc->sem, true);
/*
* If awoken after the deadlock check interrupt has run, and
* log_lock_waits is on, then report about the wait.
*/
if (log_lock_waits)
{
switch (deadlock_state)
{
case DS_NOT_YET_CHECKED:
/* Spurious wakeup as described above */
break;
case DS_NO_DEADLOCK:
case DS_SOFT_DEADLOCK:
{
StringInfoData buf;
const char *modename;
long secs;
int usecs;
long msecs;
initStringInfo(&buf);
DescribeLockTag(&buf, &locallock->tag.lock);
modename = GetLockmodeName(locallock->tag.lock.locktag_lockmethodid,
lockmode);
TimestampDifference(timeout_start_time,
GetCurrentTimestamp(),
&secs, &usecs);
msecs = secs * 1000 + usecs / 1000;
usecs = usecs % 1000;
if (deadlock_state == DS_SOFT_DEADLOCK)
ereport(LOG,
(errmsg("deadlock for %s on %s avoided by rearranging queue order after %ld.%03d ms",
modename, buf.data,
msecs, usecs)));
else if (MyProc->waitStatus == STATUS_WAITING)
ereport(LOG,
(errmsg("process %d still waiting for %s on %s after %ld.%03d ms",
MyProcPid, modename, buf.data,
msecs, usecs)));
else
ereport(LOG,
(errmsg("process %d acquired %s on %s after %ld.%03d ms",
MyProcPid, modename, buf.data,
msecs, usecs)));
pfree(buf.data);
break;
}
case DS_HARD_DEADLOCK:
/* ERROR will be reported below, so no message here */
break;
}
}
} while (MyProc->waitStatus == STATUS_WAITING);
/*
@ -1011,14 +1073,17 @@ ProcLockWakeup(LockMethod lockMethodTable, LOCK *lock)
* We only get to this routine if we got SIGALRM after DeadlockTimeout
* while waiting for a lock to be released by some other process. Look
* to see if there's a deadlock; if not, just return and continue waiting.
* (But signal ProcSleep to log a message, if log_lock_waits is true.)
* If we have a real deadlock, remove ourselves from the lock's wait queue
* and signal an error to ProcSleep.
*
* NB: this is run inside a signal handler, so be very wary about what is done
* here or in called routines.
*/
static void
CheckDeadLock(void)
{
int i;
DeadlockState deadlock_state = DS_DEADLOCK_NOT_FOUND;
/*
* Acquire exclusive lock on the entire shared lock data structures. Must
@ -1044,22 +1109,31 @@ CheckDeadLock(void)
* This is quicker than checking our semaphore's state, since no kernel
* call is needed, and it is safe because we hold the lock partition lock.
*/
if (MyProc->links.prev != INVALID_OFFSET &&
MyProc->links.next != INVALID_OFFSET)
deadlock_state = DeadLockCheck(MyProc);
if (MyProc->links.prev == INVALID_OFFSET ||
MyProc->links.next == INVALID_OFFSET)
goto check_done;
#ifdef LOCK_DEBUG
if (Debug_deadlocks)
DumpAllLocks();
#endif
/* Run the deadlock check, and set deadlock_state for use by ProcSleep */
deadlock_state = DeadLockCheck(MyProc);
if (deadlock_state == DS_HARD_DEADLOCK)
{
/*
* Oops. We have a deadlock.
*
* Get this process out of wait state. (Note: we could do this more
* efficiently by relying on lockAwaited, but use this coding to preserve
* the flexibility to kill some other transaction than the one detecting
* the deadlock.)
* Get this process out of wait state. (Note: we could do this more
* efficiently by relying on lockAwaited, but use this coding to
* preserve the flexibility to kill some other transaction than the
* one detecting the deadlock.)
*
* RemoveFromWaitQueue sets MyProc->waitStatus to STATUS_ERROR, so
* ProcSleep will report an error after we return from the signal handler.
* ProcSleep will report an error after we return from the signal
* handler.
*/
Assert(MyProc->waitLock != NULL);
RemoveFromWaitQueue(MyProc, LockTagHashCode(&(MyProc->waitLock->tag)));
@ -1071,50 +1145,35 @@ CheckDeadLock(void)
PGSemaphoreUnlock(&MyProc->sem);
/*
* We're done here. Transaction abort caused by the error that ProcSleep
* will raise will cause any other locks we hold to be released, thus
* allowing other processes to wake up; we don't need to do that here.
* NOTE: an exception is that releasing locks we hold doesn't consider the
* possibility of waiters that were blocked behind us on the lock we just
* failed to get, and might now be wakable because we're not in front of
* them anymore. However, RemoveFromWaitQueue took care of waking up any
* such processes.
* We're done here. Transaction abort caused by the error that
* ProcSleep will raise will cause any other locks we hold to be
* released, thus allowing other processes to wake up; we don't need
* to do that here. NOTE: an exception is that releasing locks we
* hold doesn't consider the possibility of waiters that were blocked
* behind us on the lock we just failed to get, and might now be
* wakable because we're not in front of them anymore. However,
* RemoveFromWaitQueue took care of waking up any such processes.
*/
}
else if (log_lock_waits)
{
/*
* Unlock my semaphore so that the interrupted ProcSleep() call can
* print the log message (we daren't do it here because we are inside
* a signal handler). It will then sleep again until someone
* releases the lock.
*/
PGSemaphoreUnlock(&MyProc->sem);
}
/*
* Release locks acquired at head of routine. Order is not critical, so
* do it back-to-front to avoid waking another CheckDeadLock instance
* before it can get all the locks.
*/
check_done:
for (i = NUM_LOCK_PARTITIONS; --i >= 0;)
LWLockRelease(FirstLockMgrLock + i);
/*
* Issue any log messages requested.
*
* Deadlock ERROR messages are issued as part of transaction abort, so
* these messages should not raise error states intentionally.
*/
if (log_lock_waits)
{
switch (deadlock_state)
{
case DS_SOFT_DEADLOCK:
ereport(LOG,
(errmsg("deadlock avoided by rearranging lock order")));
break;
case DS_DEADLOCK_NOT_FOUND:
ereport(LOG,
(errmsg("statement waiting for lock for at least %d ms",
DeadlockTimeout)));
break;
case DS_HARD_DEADLOCK:
break; /* ERROR message handled during abort */
default:
break;
}
}
}
@ -1211,8 +1270,8 @@ enable_sig_alarm(int delayms, bool is_statement_timeout)
* to the state variables. The deadlock checker may get run earlier
* than normal, but that does no harm.
*/
fin_time = GetCurrentTimestamp();
fin_time = TimestampTzPlusMilliseconds(fin_time, delayms);
timeout_start_time = GetCurrentTimestamp();
fin_time = TimestampTzPlusMilliseconds(timeout_start_time, delayms);
deadlock_timeout_active = true;
if (fin_time >= statement_fin_time)
return true;
@ -1221,6 +1280,9 @@ enable_sig_alarm(int delayms, bool is_statement_timeout)
{
/* Begin deadlock timeout with no statement-level timeout */
deadlock_timeout_active = true;
/* GetCurrentTimestamp can be expensive, so only do it if we must */
if (log_lock_waits)
timeout_start_time = GetCurrentTimestamp();
}
/* If we reach here, okay to set the timer interrupt */