mirror of
https://github.com/postgres/postgres.git
synced 2025-11-07 19:06:32 +03:00
Introduce timeout handling framework
Management of timeouts was getting a little cumbersome; what we
originally had was more than enough back when we were only concerned
about deadlocks and query cancel; however, when we added timeouts for
standby processes, the code got considerably messier. Since there are
plans to add more complex timeouts, this seems a good time to introduce
a central timeout handling module.
External modules register their timeout handlers during process
initialization, and later enable and disable them as they see fit using
a simple API; timeout.c is in charge of keeping track of which timeouts
are in effect at any time, installing a common SIGALRM signal handler,
and calling setitimer() as appropriate to ensure timely firing of
external handlers.
timeout.c additionally supports pluggable modules to add their own
timeouts, though this capability isn't exercised anywhere yet.
Additionally, as of this commit, walsender processes are aware of
timeouts; we had a preexisting bug there that made those ignore SIGALRM,
thus being subject to unhandled deadlocks, particularly during the
authentication phase. This has already been fixed in back branches in
commit 0bf8eb2a, which see for more details.
Main author: Zoltán Böszörményi
Some review and cleanup by Álvaro Herrera
Extensive reworking by Tom Lane
This commit is contained in:
@@ -48,6 +48,7 @@
|
||||
#include "storage/procarray.h"
|
||||
#include "storage/procsignal.h"
|
||||
#include "storage/spin.h"
|
||||
#include "utils/timeout.h"
|
||||
#include "utils/timestamp.h"
|
||||
|
||||
|
||||
@@ -77,26 +78,13 @@ PGPROC *PreparedXactProcs = NULL;
|
||||
/* If we are waiting for a lock, this points to the associated LOCALLOCK */
|
||||
static LOCALLOCK *lockAwaited = NULL;
|
||||
|
||||
/* Mark these volatile because they can be changed by signal handler */
|
||||
static volatile bool standby_timeout_active = false;
|
||||
static volatile bool statement_timeout_active = false;
|
||||
static volatile bool deadlock_timeout_active = false;
|
||||
/* Mark this volatile because it can be changed by signal handler */
|
||||
static volatile DeadLockState deadlock_state = DS_NOT_YET_CHECKED;
|
||||
volatile bool cancel_from_timeout = false;
|
||||
|
||||
/* timeout_start_time is set when log_lock_waits is true */
|
||||
static TimestampTz timeout_start_time;
|
||||
|
||||
/* statement_fin_time is valid only if statement_timeout_active is true */
|
||||
static TimestampTz statement_fin_time;
|
||||
static TimestampTz statement_fin_time2; /* valid only in recovery */
|
||||
|
||||
|
||||
static void RemoveProcFromArray(int code, Datum arg);
|
||||
static void ProcKill(int code, Datum arg);
|
||||
static void AuxiliaryProcKill(int code, Datum arg);
|
||||
static bool CheckStatementTimeout(void);
|
||||
static bool CheckStandbyTimeout(void);
|
||||
|
||||
|
||||
/*
|
||||
@@ -653,7 +641,7 @@ LockErrorCleanup(void)
|
||||
return;
|
||||
|
||||
/* Turn off the deadlock timer, if it's still running (see ProcSleep) */
|
||||
disable_sig_alarm(false);
|
||||
disable_timeout(DEADLOCK_TIMEOUT, false);
|
||||
|
||||
/* Unlink myself from the wait queue, if on it (might not be anymore!) */
|
||||
partitionLock = LockHashPartitionLock(lockAwaited->hashcode);
|
||||
@@ -1036,7 +1024,7 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
|
||||
if (RecoveryInProgress() && !InRecovery)
|
||||
CheckRecoveryConflictDeadlock();
|
||||
|
||||
/* Reset deadlock_state before enabling the signal handler */
|
||||
/* Reset deadlock_state before enabling the timeout handler */
|
||||
deadlock_state = DS_NOT_YET_CHECKED;
|
||||
|
||||
/*
|
||||
@@ -1048,8 +1036,7 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
|
||||
* By delaying the check until we've waited for a bit, we can avoid
|
||||
* running the rather expensive deadlock-check code in most cases.
|
||||
*/
|
||||
if (!enable_sig_alarm(DeadlockTimeout, false))
|
||||
elog(FATAL, "could not set timer for process wakeup");
|
||||
enable_timeout_after(DEADLOCK_TIMEOUT, DeadlockTimeout);
|
||||
|
||||
/*
|
||||
* If someone wakes us between LWLockRelease and PGSemaphoreLock,
|
||||
@@ -1065,8 +1052,8 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
|
||||
* that we don't mind losing control to a cancel/die interrupt here. We
|
||||
* don't, because we have no shared-state-change work to do after being
|
||||
* granted the lock (the grantor did it all). We do have to worry about
|
||||
* updating the locallock table, but if we lose control to an error,
|
||||
* LockErrorCleanup will fix that up.
|
||||
* canceling the deadlock timeout and updating the locallock table, but if
|
||||
* we lose control to an error, LockErrorCleanup will fix that up.
|
||||
*/
|
||||
do
|
||||
{
|
||||
@@ -1138,7 +1125,8 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
|
||||
DescribeLockTag(&buf, &locallock->tag.lock);
|
||||
modename = GetLockmodeName(locallock->tag.lock.locktag_lockmethodid,
|
||||
lockmode);
|
||||
TimestampDifference(timeout_start_time, GetCurrentTimestamp(),
|
||||
TimestampDifference(get_timeout_start_time(DEADLOCK_TIMEOUT),
|
||||
GetCurrentTimestamp(),
|
||||
&secs, &usecs);
|
||||
msecs = secs * 1000 + usecs / 1000;
|
||||
usecs = usecs % 1000;
|
||||
@@ -1200,8 +1188,7 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
|
||||
/*
|
||||
* Disable the timer, if it's still running
|
||||
*/
|
||||
if (!disable_sig_alarm(false))
|
||||
elog(FATAL, "could not disable timer for process wakeup");
|
||||
disable_timeout(DEADLOCK_TIMEOUT, false);
|
||||
|
||||
/*
|
||||
* Re-acquire the lock table's partition lock. We have to do this to hold
|
||||
@@ -1334,7 +1321,7 @@ ProcLockWakeup(LockMethod lockMethodTable, LOCK *lock)
|
||||
/*
|
||||
* CheckDeadLock
|
||||
*
|
||||
* We only get to this routine if we got SIGALRM after DeadlockTimeout
|
||||
* We only get to this routine if the DEADLOCK_TIMEOUT fired
|
||||
* while waiting for a lock to be released by some other process. Look
|
||||
* to see if there's a deadlock; if not, just return and continue waiting.
|
||||
* (But signal ProcSleep to log a message, if log_lock_waits is true.)
|
||||
@@ -1344,7 +1331,7 @@ ProcLockWakeup(LockMethod lockMethodTable, LOCK *lock)
|
||||
* NB: this is run inside a signal handler, so be very wary about what is done
|
||||
* here or in called routines.
|
||||
*/
|
||||
static void
|
||||
void
|
||||
CheckDeadLock(void)
|
||||
{
|
||||
int i;
|
||||
@@ -1498,401 +1485,3 @@ ProcSendSignal(int pid)
|
||||
if (proc != NULL)
|
||||
PGSemaphoreUnlock(&proc->sem);
|
||||
}
|
||||
|
||||
|
||||
/*****************************************************************************
|
||||
* SIGALRM interrupt support
|
||||
*
|
||||
* Maybe these should be in pqsignal.c?
|
||||
*****************************************************************************/
|
||||
|
||||
/*
|
||||
* Enable the SIGALRM interrupt to fire after the specified delay
|
||||
*
|
||||
* Delay is given in milliseconds. Caller should be sure a SIGALRM
|
||||
* signal handler is installed before this is called.
|
||||
*
|
||||
* This code properly handles nesting of deadlock timeout alarms within
|
||||
* statement timeout alarms.
|
||||
*
|
||||
* Returns TRUE if okay, FALSE on failure.
|
||||
*/
|
||||
bool
|
||||
enable_sig_alarm(int delayms, bool is_statement_timeout)
|
||||
{
|
||||
TimestampTz fin_time;
|
||||
struct itimerval timeval;
|
||||
|
||||
if (is_statement_timeout)
|
||||
{
|
||||
/*
|
||||
* Begin statement-level timeout
|
||||
*
|
||||
* Note that we compute statement_fin_time with reference to the
|
||||
* statement_timestamp, but apply the specified delay without any
|
||||
* correction; that is, we ignore whatever time has elapsed since
|
||||
* statement_timestamp was set. In the normal case only a small
|
||||
* interval will have elapsed and so this doesn't matter, but there
|
||||
* are corner cases (involving multi-statement query strings with
|
||||
* embedded COMMIT or ROLLBACK) where we might re-initialize the
|
||||
* statement timeout long after initial receipt of the message. In
|
||||
* such cases the enforcement of the statement timeout will be a bit
|
||||
* inconsistent. This annoyance is judged not worth the cost of
|
||||
* performing an additional gettimeofday() here.
|
||||
*/
|
||||
Assert(!deadlock_timeout_active);
|
||||
fin_time = GetCurrentStatementStartTimestamp();
|
||||
fin_time = TimestampTzPlusMilliseconds(fin_time, delayms);
|
||||
statement_fin_time = fin_time;
|
||||
cancel_from_timeout = false;
|
||||
statement_timeout_active = true;
|
||||
}
|
||||
else if (statement_timeout_active)
|
||||
{
|
||||
/*
|
||||
* Begin deadlock timeout with statement-level timeout active
|
||||
*
|
||||
* Here, we want to interrupt at the closer of the two timeout times.
|
||||
* If fin_time >= statement_fin_time then we need not touch the
|
||||
* existing timer setting; else set up to interrupt at the deadlock
|
||||
* timeout time.
|
||||
*
|
||||
* NOTE: in this case it is possible that this routine will be
|
||||
* interrupted by the previously-set timer alarm. This is okay
|
||||
* because the signal handler will do only what it should do according
|
||||
* to the state variables. The deadlock checker may get run earlier
|
||||
* than normal, but that does no harm.
|
||||
*/
|
||||
timeout_start_time = GetCurrentTimestamp();
|
||||
fin_time = TimestampTzPlusMilliseconds(timeout_start_time, delayms);
|
||||
deadlock_timeout_active = true;
|
||||
if (fin_time >= statement_fin_time)
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Begin deadlock timeout with no statement-level timeout */
|
||||
deadlock_timeout_active = true;
|
||||
/* GetCurrentTimestamp can be expensive, so only do it if we must */
|
||||
if (log_lock_waits)
|
||||
timeout_start_time = GetCurrentTimestamp();
|
||||
}
|
||||
|
||||
/* If we reach here, okay to set the timer interrupt */
|
||||
MemSet(&timeval, 0, sizeof(struct itimerval));
|
||||
timeval.it_value.tv_sec = delayms / 1000;
|
||||
timeval.it_value.tv_usec = (delayms % 1000) * 1000;
|
||||
if (setitimer(ITIMER_REAL, &timeval, NULL))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Cancel the SIGALRM timer, either for a deadlock timeout or a statement
|
||||
* timeout. If a deadlock timeout is canceled, any active statement timeout
|
||||
* remains in force.
|
||||
*
|
||||
* Returns TRUE if okay, FALSE on failure.
|
||||
*/
|
||||
bool
|
||||
disable_sig_alarm(bool is_statement_timeout)
|
||||
{
|
||||
/*
|
||||
* Always disable the interrupt if it is active; this avoids being
|
||||
* interrupted by the signal handler and thereby possibly getting
|
||||
* confused.
|
||||
*
|
||||
* We will re-enable the interrupt if necessary in CheckStatementTimeout.
|
||||
*/
|
||||
if (statement_timeout_active || deadlock_timeout_active)
|
||||
{
|
||||
struct itimerval timeval;
|
||||
|
||||
MemSet(&timeval, 0, sizeof(struct itimerval));
|
||||
if (setitimer(ITIMER_REAL, &timeval, NULL))
|
||||
{
|
||||
statement_timeout_active = false;
|
||||
cancel_from_timeout = false;
|
||||
deadlock_timeout_active = false;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/* Always cancel deadlock timeout, in case this is error cleanup */
|
||||
deadlock_timeout_active = false;
|
||||
|
||||
/* Cancel or reschedule statement timeout */
|
||||
if (is_statement_timeout)
|
||||
{
|
||||
statement_timeout_active = false;
|
||||
cancel_from_timeout = false;
|
||||
}
|
||||
else if (statement_timeout_active)
|
||||
{
|
||||
if (!CheckStatementTimeout())
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Check for statement timeout. If the timeout time has come,
|
||||
* trigger a query-cancel interrupt; if not, reschedule the SIGALRM
|
||||
* interrupt to occur at the right time.
|
||||
*
|
||||
* Returns true if okay, false if failed to set the interrupt.
|
||||
*/
|
||||
static bool
|
||||
CheckStatementTimeout(void)
|
||||
{
|
||||
TimestampTz now;
|
||||
|
||||
if (!statement_timeout_active)
|
||||
return true; /* do nothing if not active */
|
||||
|
||||
now = GetCurrentTimestamp();
|
||||
|
||||
if (now >= statement_fin_time)
|
||||
{
|
||||
/* Time to die */
|
||||
statement_timeout_active = false;
|
||||
cancel_from_timeout = true;
|
||||
#ifdef HAVE_SETSID
|
||||
/* try to signal whole process group */
|
||||
kill(-MyProcPid, SIGINT);
|
||||
#endif
|
||||
kill(MyProcPid, SIGINT);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Not time yet, so (re)schedule the interrupt */
|
||||
long secs;
|
||||
int usecs;
|
||||
struct itimerval timeval;
|
||||
|
||||
TimestampDifference(now, statement_fin_time,
|
||||
&secs, &usecs);
|
||||
|
||||
/*
|
||||
* It's possible that the difference is less than a microsecond;
|
||||
* ensure we don't cancel, rather than set, the interrupt.
|
||||
*/
|
||||
if (secs == 0 && usecs == 0)
|
||||
usecs = 1;
|
||||
MemSet(&timeval, 0, sizeof(struct itimerval));
|
||||
timeval.it_value.tv_sec = secs;
|
||||
timeval.it_value.tv_usec = usecs;
|
||||
if (setitimer(ITIMER_REAL, &timeval, NULL))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Signal handler for SIGALRM for normal user backends
|
||||
*
|
||||
* Process deadlock check and/or statement timeout check, as needed.
|
||||
* To avoid various edge cases, we must be careful to do nothing
|
||||
* when there is nothing to be done. We also need to be able to
|
||||
* reschedule the timer interrupt if called before end of statement.
|
||||
*/
|
||||
void
|
||||
handle_sig_alarm(SIGNAL_ARGS)
|
||||
{
|
||||
int save_errno = errno;
|
||||
|
||||
/* SIGALRM is cause for waking anything waiting on the process latch */
|
||||
if (MyProc)
|
||||
SetLatch(&MyProc->procLatch);
|
||||
|
||||
if (deadlock_timeout_active)
|
||||
{
|
||||
deadlock_timeout_active = false;
|
||||
CheckDeadLock();
|
||||
}
|
||||
|
||||
if (statement_timeout_active)
|
||||
(void) CheckStatementTimeout();
|
||||
|
||||
errno = save_errno;
|
||||
}
|
||||
|
||||
/*
|
||||
* Signal handler for SIGALRM in Startup process
|
||||
*
|
||||
* To avoid various edge cases, we must be careful to do nothing
|
||||
* when there is nothing to be done. We also need to be able to
|
||||
* reschedule the timer interrupt if called before end of statement.
|
||||
*
|
||||
* We set either deadlock_timeout_active or statement_timeout_active
|
||||
* or both. Interrupts are enabled if standby_timeout_active.
|
||||
*/
|
||||
bool
|
||||
enable_standby_sig_alarm(TimestampTz now, TimestampTz fin_time, bool deadlock_only)
|
||||
{
|
||||
TimestampTz deadlock_time = TimestampTzPlusMilliseconds(now,
|
||||
DeadlockTimeout);
|
||||
|
||||
if (deadlock_only)
|
||||
{
|
||||
/*
|
||||
* Wake up at deadlock_time only, then wait forever
|
||||
*/
|
||||
statement_fin_time = deadlock_time;
|
||||
deadlock_timeout_active = true;
|
||||
statement_timeout_active = false;
|
||||
}
|
||||
else if (fin_time > deadlock_time)
|
||||
{
|
||||
/*
|
||||
* Wake up at deadlock_time, then again at fin_time
|
||||
*/
|
||||
statement_fin_time = deadlock_time;
|
||||
statement_fin_time2 = fin_time;
|
||||
deadlock_timeout_active = true;
|
||||
statement_timeout_active = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* Wake only at fin_time because its fairly soon
|
||||
*/
|
||||
statement_fin_time = fin_time;
|
||||
deadlock_timeout_active = false;
|
||||
statement_timeout_active = true;
|
||||
}
|
||||
|
||||
if (deadlock_timeout_active || statement_timeout_active)
|
||||
{
|
||||
long secs;
|
||||
int usecs;
|
||||
struct itimerval timeval;
|
||||
|
||||
TimestampDifference(now, statement_fin_time,
|
||||
&secs, &usecs);
|
||||
if (secs == 0 && usecs == 0)
|
||||
usecs = 1;
|
||||
MemSet(&timeval, 0, sizeof(struct itimerval));
|
||||
timeval.it_value.tv_sec = secs;
|
||||
timeval.it_value.tv_usec = usecs;
|
||||
if (setitimer(ITIMER_REAL, &timeval, NULL))
|
||||
return false;
|
||||
standby_timeout_active = true;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
disable_standby_sig_alarm(void)
|
||||
{
|
||||
/*
|
||||
* Always disable the interrupt if it is active; this avoids being
|
||||
* interrupted by the signal handler and thereby possibly getting
|
||||
* confused.
|
||||
*
|
||||
* We will re-enable the interrupt if necessary in CheckStandbyTimeout.
|
||||
*/
|
||||
if (standby_timeout_active)
|
||||
{
|
||||
struct itimerval timeval;
|
||||
|
||||
MemSet(&timeval, 0, sizeof(struct itimerval));
|
||||
if (setitimer(ITIMER_REAL, &timeval, NULL))
|
||||
{
|
||||
standby_timeout_active = false;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
standby_timeout_active = false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* CheckStandbyTimeout() runs unconditionally in the Startup process
|
||||
* SIGALRM handler. Timers will only be set when InHotStandby.
|
||||
* We simply ignore any signals unless the timer has been set.
|
||||
*/
|
||||
static bool
|
||||
CheckStandbyTimeout(void)
|
||||
{
|
||||
TimestampTz now;
|
||||
bool reschedule = false;
|
||||
|
||||
standby_timeout_active = false;
|
||||
|
||||
now = GetCurrentTimestamp();
|
||||
|
||||
/*
|
||||
* Reschedule the timer if its not time to wake yet, or if we have both
|
||||
* timers set and the first one has just been reached.
|
||||
*/
|
||||
if (now >= statement_fin_time)
|
||||
{
|
||||
if (deadlock_timeout_active)
|
||||
{
|
||||
/*
|
||||
* We're still waiting when we reach deadlock timeout, so send out
|
||||
* a request to have other backends check themselves for deadlock.
|
||||
* Then continue waiting until statement_fin_time, if that's set.
|
||||
*/
|
||||
SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK);
|
||||
deadlock_timeout_active = false;
|
||||
|
||||
/*
|
||||
* Begin second waiting period if required.
|
||||
*/
|
||||
if (statement_timeout_active)
|
||||
{
|
||||
reschedule = true;
|
||||
statement_fin_time = statement_fin_time2;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* We've now reached statement_fin_time, so ask all conflicts to
|
||||
* leave, so we can press ahead with applying changes in recovery.
|
||||
*/
|
||||
SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN);
|
||||
}
|
||||
}
|
||||
else
|
||||
reschedule = true;
|
||||
|
||||
if (reschedule)
|
||||
{
|
||||
long secs;
|
||||
int usecs;
|
||||
struct itimerval timeval;
|
||||
|
||||
TimestampDifference(now, statement_fin_time,
|
||||
&secs, &usecs);
|
||||
if (secs == 0 && usecs == 0)
|
||||
usecs = 1;
|
||||
MemSet(&timeval, 0, sizeof(struct itimerval));
|
||||
timeval.it_value.tv_sec = secs;
|
||||
timeval.it_value.tv_usec = usecs;
|
||||
if (setitimer(ITIMER_REAL, &timeval, NULL))
|
||||
return false;
|
||||
standby_timeout_active = true;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
handle_standby_sig_alarm(SIGNAL_ARGS)
|
||||
{
|
||||
int save_errno = errno;
|
||||
|
||||
if (standby_timeout_active)
|
||||
(void) CheckStandbyTimeout();
|
||||
|
||||
errno = save_errno;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user