From d56a5f994c216847df3338e148b2ae72f20b2e0f Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 9 Jan 2018 12:34:46 -0500 Subject: [PATCH] While waiting for a condition variable, detect postmaster death. The general assumption for postmaster child processes is that they should just exit(1), reasonably promptly, if the postmaster disappears. condition_variable.c neglected this consideration and could be left waiting forever, if the counterpart process it is waiting for has done the right thing and exited. We had some discussion of adjusting the WaitEventSet API to make it harder to make this type of mistake in future; but for the moment, and for v10, let's make this narrow fix. Discussion: https://postgr.es/m/20412.1515456143@sss.pgh.pa.us --- src/backend/storage/lmgr/condition_variable.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/backend/storage/lmgr/condition_variable.c b/src/backend/storage/lmgr/condition_variable.c index 730adf1c57f..da22ee819ba 100644 --- a/src/backend/storage/lmgr/condition_variable.c +++ b/src/backend/storage/lmgr/condition_variable.c @@ -62,9 +62,11 @@ ConditionVariablePrepareToSleep(ConditionVariable *cv) { WaitEventSet *new_event_set; - new_event_set = CreateWaitEventSet(TopMemoryContext, 1); + new_event_set = CreateWaitEventSet(TopMemoryContext, 2); AddWaitEventToSet(new_event_set, WL_LATCH_SET, PGINVALID_SOCKET, MyLatch, NULL); + AddWaitEventToSet(new_event_set, WL_POSTMASTER_DEATH, PGINVALID_SOCKET, + NULL, NULL); /* Don't set cv_wait_event_set until we have a correct WES. */ cv_wait_event_set = new_event_set; } @@ -141,11 +143,20 @@ ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info) CHECK_FOR_INTERRUPTS(); /* - * Wait for latch to be set. We don't care about the result because - * our contract permits spurious returns. + * Wait for latch to be set. (If we're awakened for some other + * reason, the code below will cope anyway.) */ WaitEventSetWait(cv_wait_event_set, -1, &event, 1, wait_event_info); + if (event.events & WL_POSTMASTER_DEATH) + { + /* + * Emergency bailout if postmaster has died. This is to avoid the + * necessity for manual cleanup of all postmaster children. + */ + exit(1); + } + /* Reset latch before testing whether we can return. */ ResetLatch(MyLatch);