mirror of
				https://github.com/postgres/postgres.git
				synced 2025-11-03 09:13:20 +03:00 
			
		
		
		
	It's not OK to do that without calling CHECK_FOR_INTERRUPTS().
Let the next wait loop deal with it, following the usual pattern.
One consequence of this bug was that a SIGTERM delivered in a very
narrow timing window could leave a parallel worker process waiting
forever for a condition variable that will never be signaled, after
an error was raised in other process.
The code is a bit different in the stable branches due to commit
1321509f, making problems less likely there.  No back-patch for now,
but we may finish up deciding to make a similar change after more
discussion.
Author: Thomas Munro
Reviewed-by: Shawn Debnath
Reported-by: Tomas Vondra
Discussion: https://postgr.es/m/CA%2BhUKGJOm8zZHjVA8svoNT3tHY0XdqmaC_kHitmgXDQM49m1dA%40mail.gmail.com
		
	
		
			
				
	
	
		
			376 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			376 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/*-------------------------------------------------------------------------
 | 
						|
 *
 | 
						|
 * condition_variable.c
 | 
						|
 *	  Implementation of condition variables.  Condition variables provide
 | 
						|
 *	  a way for one process to wait until a specific condition occurs,
 | 
						|
 *	  without needing to know the specific identity of the process for
 | 
						|
 *	  which they are waiting.  Waits for condition variables can be
 | 
						|
 *	  interrupted, unlike LWLock waits.  Condition variables are safe
 | 
						|
 *	  to use within dynamic shared memory segments.
 | 
						|
 *
 | 
						|
 * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
 | 
						|
 * Portions Copyright (c) 1994, Regents of the University of California
 | 
						|
 *
 | 
						|
 * src/backend/storage/lmgr/condition_variable.c
 | 
						|
 *
 | 
						|
 *-------------------------------------------------------------------------
 | 
						|
 */
 | 
						|
 | 
						|
#include "postgres.h"
 | 
						|
 | 
						|
#include "miscadmin.h"
 | 
						|
#include "portability/instr_time.h"
 | 
						|
#include "storage/condition_variable.h"
 | 
						|
#include "storage/ipc.h"
 | 
						|
#include "storage/proc.h"
 | 
						|
#include "storage/proclist.h"
 | 
						|
#include "storage/spin.h"
 | 
						|
#include "utils/memutils.h"
 | 
						|
 | 
						|
/* Initially, we are not prepared to sleep on any condition variable. */
 | 
						|
static ConditionVariable *cv_sleep_target = NULL;
 | 
						|
 | 
						|
/* Reusable WaitEventSet. */
 | 
						|
static WaitEventSet *cv_wait_event_set = NULL;
 | 
						|
 | 
						|
/*
 | 
						|
 * Initialize a condition variable.
 | 
						|
 */
 | 
						|
void
 | 
						|
ConditionVariableInit(ConditionVariable *cv)
 | 
						|
{
 | 
						|
	SpinLockInit(&cv->mutex);
 | 
						|
	proclist_init(&cv->wakeup);
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * Prepare to wait on a given condition variable.
 | 
						|
 *
 | 
						|
 * This can optionally be called before entering a test/sleep loop.
 | 
						|
 * Doing so is more efficient if we'll need to sleep at least once.
 | 
						|
 * However, if the first test of the exit condition is likely to succeed,
 | 
						|
 * it's more efficient to omit the ConditionVariablePrepareToSleep call.
 | 
						|
 * See comments in ConditionVariableSleep for more detail.
 | 
						|
 *
 | 
						|
 * Caution: "before entering the loop" means you *must* test the exit
 | 
						|
 * condition between calling ConditionVariablePrepareToSleep and calling
 | 
						|
 * ConditionVariableSleep.  If that is inconvenient, omit calling
 | 
						|
 * ConditionVariablePrepareToSleep.
 | 
						|
 */
 | 
						|
void
 | 
						|
ConditionVariablePrepareToSleep(ConditionVariable *cv)
 | 
						|
{
 | 
						|
	int			pgprocno = MyProc->pgprocno;
 | 
						|
 | 
						|
	/*
 | 
						|
	 * If first time through in this process, create a WaitEventSet, which
 | 
						|
	 * we'll reuse for all condition variable sleeps.
 | 
						|
	 */
 | 
						|
	if (cv_wait_event_set == NULL)
 | 
						|
	{
 | 
						|
		WaitEventSet *new_event_set;
 | 
						|
 | 
						|
		new_event_set = CreateWaitEventSet(TopMemoryContext, 2);
 | 
						|
		AddWaitEventToSet(new_event_set, WL_LATCH_SET, PGINVALID_SOCKET,
 | 
						|
						  MyLatch, NULL);
 | 
						|
		AddWaitEventToSet(new_event_set, WL_EXIT_ON_PM_DEATH, PGINVALID_SOCKET,
 | 
						|
						  NULL, NULL);
 | 
						|
		/* Don't set cv_wait_event_set until we have a correct WES. */
 | 
						|
		cv_wait_event_set = new_event_set;
 | 
						|
	}
 | 
						|
 | 
						|
	/*
 | 
						|
	 * If some other sleep is already prepared, cancel it; this is necessary
 | 
						|
	 * because we have just one static variable tracking the prepared sleep,
 | 
						|
	 * and also only one cvWaitLink in our PGPROC.  It's okay to do this
 | 
						|
	 * because whenever control does return to the other test-and-sleep loop,
 | 
						|
	 * its ConditionVariableSleep call will just re-establish that sleep as
 | 
						|
	 * the prepared one.
 | 
						|
	 */
 | 
						|
	if (cv_sleep_target != NULL)
 | 
						|
		ConditionVariableCancelSleep();
 | 
						|
 | 
						|
	/* Record the condition variable on which we will sleep. */
 | 
						|
	cv_sleep_target = cv;
 | 
						|
 | 
						|
	/* Add myself to the wait queue. */
 | 
						|
	SpinLockAcquire(&cv->mutex);
 | 
						|
	proclist_push_tail(&cv->wakeup, pgprocno, cvWaitLink);
 | 
						|
	SpinLockRelease(&cv->mutex);
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * Wait for the given condition variable to be signaled.
 | 
						|
 *
 | 
						|
 * This should be called in a predicate loop that tests for a specific exit
 | 
						|
 * condition and otherwise sleeps, like so:
 | 
						|
 *
 | 
						|
 *	 ConditionVariablePrepareToSleep(cv);  // optional
 | 
						|
 *	 while (condition for which we are waiting is not true)
 | 
						|
 *		 ConditionVariableSleep(cv, wait_event_info);
 | 
						|
 *	 ConditionVariableCancelSleep();
 | 
						|
 *
 | 
						|
 * wait_event_info should be a value from one of the WaitEventXXX enums
 | 
						|
 * defined in pgstat.h.  This controls the contents of pg_stat_activity's
 | 
						|
 * wait_event_type and wait_event columns while waiting.
 | 
						|
 */
 | 
						|
void
 | 
						|
ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
 | 
						|
{
 | 
						|
	(void) ConditionVariableTimedSleep(cv, -1 /* no timeout */ ,
 | 
						|
									   wait_event_info);
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * Wait for a condition variable to be signaled or a timeout to be reached.
 | 
						|
 *
 | 
						|
 * Returns true when timeout expires, otherwise returns false.
 | 
						|
 *
 | 
						|
 * See ConditionVariableSleep() for general usage.
 | 
						|
 */
 | 
						|
bool
 | 
						|
ConditionVariableTimedSleep(ConditionVariable *cv, long timeout,
 | 
						|
							uint32 wait_event_info)
 | 
						|
{
 | 
						|
	long		cur_timeout = -1;
 | 
						|
	instr_time	start_time;
 | 
						|
	instr_time	cur_time;
 | 
						|
 | 
						|
	/*
 | 
						|
	 * If the caller didn't prepare to sleep explicitly, then do so now and
 | 
						|
	 * return immediately.  The caller's predicate loop should immediately
 | 
						|
	 * call again if its exit condition is not yet met.  This will result in
 | 
						|
	 * the exit condition being tested twice before we first sleep.  The extra
 | 
						|
	 * test can be prevented by calling ConditionVariablePrepareToSleep(cv)
 | 
						|
	 * first.  Whether it's worth doing that depends on whether you expect the
 | 
						|
	 * exit condition to be met initially, in which case skipping the prepare
 | 
						|
	 * is recommended because it avoids manipulations of the wait list, or not
 | 
						|
	 * met initially, in which case preparing first is better because it
 | 
						|
	 * avoids one extra test of the exit condition.
 | 
						|
	 *
 | 
						|
	 * If we are currently prepared to sleep on some other CV, we just cancel
 | 
						|
	 * that and prepare this one; see ConditionVariablePrepareToSleep.
 | 
						|
	 */
 | 
						|
	if (cv_sleep_target != cv)
 | 
						|
	{
 | 
						|
		ConditionVariablePrepareToSleep(cv);
 | 
						|
		return false;
 | 
						|
	}
 | 
						|
 | 
						|
	/*
 | 
						|
	 * Record the current time so that we can calculate the remaining timeout
 | 
						|
	 * if we are woken up spuriously.
 | 
						|
	 */
 | 
						|
	if (timeout >= 0)
 | 
						|
	{
 | 
						|
		INSTR_TIME_SET_CURRENT(start_time);
 | 
						|
		Assert(timeout >= 0 && timeout <= INT_MAX);
 | 
						|
		cur_timeout = timeout;
 | 
						|
	}
 | 
						|
 | 
						|
	while (true)
 | 
						|
	{
 | 
						|
		WaitEvent	event;
 | 
						|
		bool		done = false;
 | 
						|
 | 
						|
		/*
 | 
						|
		 * Wait for latch to be set.  (If we're awakened for some other
 | 
						|
		 * reason, the code below will cope anyway.)
 | 
						|
		 */
 | 
						|
		(void) WaitEventSetWait(cv_wait_event_set, cur_timeout, &event, 1,
 | 
						|
								wait_event_info);
 | 
						|
 | 
						|
		/* Reset latch before examining the state of the wait list. */
 | 
						|
		ResetLatch(MyLatch);
 | 
						|
 | 
						|
		CHECK_FOR_INTERRUPTS();
 | 
						|
 | 
						|
		/*
 | 
						|
		 * If this process has been taken out of the wait list, then we know
 | 
						|
		 * that it has been signaled by ConditionVariableSignal (or
 | 
						|
		 * ConditionVariableBroadcast), so we should return to the caller. But
 | 
						|
		 * that doesn't guarantee that the exit condition is met, only that we
 | 
						|
		 * ought to check it.  So we must put the process back into the wait
 | 
						|
		 * list, to ensure we don't miss any additional wakeup occurring while
 | 
						|
		 * the caller checks its exit condition.  We can take ourselves out of
 | 
						|
		 * the wait list only when the caller calls
 | 
						|
		 * ConditionVariableCancelSleep.
 | 
						|
		 *
 | 
						|
		 * If we're still in the wait list, then the latch must have been set
 | 
						|
		 * by something other than ConditionVariableSignal; though we don't
 | 
						|
		 * guarantee not to return spuriously, we'll avoid this obvious case.
 | 
						|
		 */
 | 
						|
		SpinLockAcquire(&cv->mutex);
 | 
						|
		if (!proclist_contains(&cv->wakeup, MyProc->pgprocno, cvWaitLink))
 | 
						|
		{
 | 
						|
			done = true;
 | 
						|
			proclist_push_tail(&cv->wakeup, MyProc->pgprocno, cvWaitLink);
 | 
						|
		}
 | 
						|
		SpinLockRelease(&cv->mutex);
 | 
						|
 | 
						|
		/* We were signaled, so return */
 | 
						|
		if (done)
 | 
						|
			return false;
 | 
						|
 | 
						|
		/* If we're not done, update cur_timeout for next iteration */
 | 
						|
		if (timeout >= 0)
 | 
						|
		{
 | 
						|
			INSTR_TIME_SET_CURRENT(cur_time);
 | 
						|
			INSTR_TIME_SUBTRACT(cur_time, start_time);
 | 
						|
			cur_timeout = timeout - (long) INSTR_TIME_GET_MILLISEC(cur_time);
 | 
						|
 | 
						|
			/* Have we crossed the timeout threshold? */
 | 
						|
			if (cur_timeout <= 0)
 | 
						|
				return true;
 | 
						|
		}
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * Cancel any pending sleep operation.
 | 
						|
 *
 | 
						|
 * We just need to remove ourselves from the wait queue of any condition
 | 
						|
 * variable for which we have previously prepared a sleep.
 | 
						|
 *
 | 
						|
 * Do nothing if nothing is pending; this allows this function to be called
 | 
						|
 * during transaction abort to clean up any unfinished CV sleep.
 | 
						|
 */
 | 
						|
void
 | 
						|
ConditionVariableCancelSleep(void)
 | 
						|
{
 | 
						|
	ConditionVariable *cv = cv_sleep_target;
 | 
						|
	bool		signaled = false;
 | 
						|
 | 
						|
	if (cv == NULL)
 | 
						|
		return;
 | 
						|
 | 
						|
	SpinLockAcquire(&cv->mutex);
 | 
						|
	if (proclist_contains(&cv->wakeup, MyProc->pgprocno, cvWaitLink))
 | 
						|
		proclist_delete(&cv->wakeup, MyProc->pgprocno, cvWaitLink);
 | 
						|
	else
 | 
						|
		signaled = true;
 | 
						|
	SpinLockRelease(&cv->mutex);
 | 
						|
 | 
						|
	/*
 | 
						|
	 * If we've received a signal, pass it on to another waiting process, if
 | 
						|
	 * there is one.  Otherwise a call to ConditionVariableSignal() might get
 | 
						|
	 * lost, despite there being another process ready to handle it.
 | 
						|
	 */
 | 
						|
	if (signaled)
 | 
						|
		ConditionVariableSignal(cv);
 | 
						|
 | 
						|
	cv_sleep_target = NULL;
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * Wake up the oldest process sleeping on the CV, if there is any.
 | 
						|
 *
 | 
						|
 * Note: it's difficult to tell whether this has any real effect: we know
 | 
						|
 * whether we took an entry off the list, but the entry might only be a
 | 
						|
 * sentinel.  Hence, think twice before proposing that this should return
 | 
						|
 * a flag telling whether it woke somebody.
 | 
						|
 */
 | 
						|
void
 | 
						|
ConditionVariableSignal(ConditionVariable *cv)
 | 
						|
{
 | 
						|
	PGPROC	   *proc = NULL;
 | 
						|
 | 
						|
	/* Remove the first process from the wakeup queue (if any). */
 | 
						|
	SpinLockAcquire(&cv->mutex);
 | 
						|
	if (!proclist_is_empty(&cv->wakeup))
 | 
						|
		proc = proclist_pop_head_node(&cv->wakeup, cvWaitLink);
 | 
						|
	SpinLockRelease(&cv->mutex);
 | 
						|
 | 
						|
	/* If we found someone sleeping, set their latch to wake them up. */
 | 
						|
	if (proc != NULL)
 | 
						|
		SetLatch(&proc->procLatch);
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * Wake up all processes sleeping on the given CV.
 | 
						|
 *
 | 
						|
 * This guarantees to wake all processes that were sleeping on the CV
 | 
						|
 * at time of call, but processes that add themselves to the list mid-call
 | 
						|
 * will typically not get awakened.
 | 
						|
 */
 | 
						|
void
 | 
						|
ConditionVariableBroadcast(ConditionVariable *cv)
 | 
						|
{
 | 
						|
	int			pgprocno = MyProc->pgprocno;
 | 
						|
	PGPROC	   *proc = NULL;
 | 
						|
	bool		have_sentinel = false;
 | 
						|
 | 
						|
	/*
 | 
						|
	 * In some use-cases, it is common for awakened processes to immediately
 | 
						|
	 * re-queue themselves.  If we just naively try to reduce the wakeup list
 | 
						|
	 * to empty, we'll get into a potentially-indefinite loop against such a
 | 
						|
	 * process.  The semantics we really want are just to be sure that we have
 | 
						|
	 * wakened all processes that were in the list at entry.  We can use our
 | 
						|
	 * own cvWaitLink as a sentinel to detect when we've finished.
 | 
						|
	 *
 | 
						|
	 * A seeming flaw in this approach is that someone else might signal the
 | 
						|
	 * CV and in doing so remove our sentinel entry.  But that's fine: since
 | 
						|
	 * CV waiters are always added and removed in order, that must mean that
 | 
						|
	 * every previous waiter has been wakened, so we're done.  We'll get an
 | 
						|
	 * extra "set" on our latch from the someone else's signal, which is
 | 
						|
	 * slightly inefficient but harmless.
 | 
						|
	 *
 | 
						|
	 * We can't insert our cvWaitLink as a sentinel if it's already in use in
 | 
						|
	 * some other proclist.  While that's not expected to be true for typical
 | 
						|
	 * uses of this function, we can deal with it by simply canceling any
 | 
						|
	 * prepared CV sleep.  The next call to ConditionVariableSleep will take
 | 
						|
	 * care of re-establishing the lost state.
 | 
						|
	 */
 | 
						|
	if (cv_sleep_target != NULL)
 | 
						|
		ConditionVariableCancelSleep();
 | 
						|
 | 
						|
	/*
 | 
						|
	 * Inspect the state of the queue.  If it's empty, we have nothing to do.
 | 
						|
	 * If there's exactly one entry, we need only remove and signal that
 | 
						|
	 * entry.  Otherwise, remove the first entry and insert our sentinel.
 | 
						|
	 */
 | 
						|
	SpinLockAcquire(&cv->mutex);
 | 
						|
	/* While we're here, let's assert we're not in the list. */
 | 
						|
	Assert(!proclist_contains(&cv->wakeup, pgprocno, cvWaitLink));
 | 
						|
 | 
						|
	if (!proclist_is_empty(&cv->wakeup))
 | 
						|
	{
 | 
						|
		proc = proclist_pop_head_node(&cv->wakeup, cvWaitLink);
 | 
						|
		if (!proclist_is_empty(&cv->wakeup))
 | 
						|
		{
 | 
						|
			proclist_push_tail(&cv->wakeup, pgprocno, cvWaitLink);
 | 
						|
			have_sentinel = true;
 | 
						|
		}
 | 
						|
	}
 | 
						|
	SpinLockRelease(&cv->mutex);
 | 
						|
 | 
						|
	/* Awaken first waiter, if there was one. */
 | 
						|
	if (proc != NULL)
 | 
						|
		SetLatch(&proc->procLatch);
 | 
						|
 | 
						|
	while (have_sentinel)
 | 
						|
	{
 | 
						|
		/*
 | 
						|
		 * Each time through the loop, remove the first wakeup list entry, and
 | 
						|
		 * signal it unless it's our sentinel.  Repeat as long as the sentinel
 | 
						|
		 * remains in the list.
 | 
						|
		 *
 | 
						|
		 * Notice that if someone else removes our sentinel, we will waken one
 | 
						|
		 * additional process before exiting.  That's intentional, because if
 | 
						|
		 * someone else signals the CV, they may be intending to waken some
 | 
						|
		 * third process that added itself to the list after we added the
 | 
						|
		 * sentinel.  Better to give a spurious wakeup (which should be
 | 
						|
		 * harmless beyond wasting some cycles) than to lose a wakeup.
 | 
						|
		 */
 | 
						|
		proc = NULL;
 | 
						|
		SpinLockAcquire(&cv->mutex);
 | 
						|
		if (!proclist_is_empty(&cv->wakeup))
 | 
						|
			proc = proclist_pop_head_node(&cv->wakeup, cvWaitLink);
 | 
						|
		have_sentinel = proclist_contains(&cv->wakeup, pgprocno, cvWaitLink);
 | 
						|
		SpinLockRelease(&cv->mutex);
 | 
						|
 | 
						|
		if (proc != NULL && proc != MyProc)
 | 
						|
			SetLatch(&proc->procLatch);
 | 
						|
	}
 | 
						|
}
 |