mirror of
https://github.com/postgres/postgres.git
synced 2025-07-28 23:42:10 +03:00
Add new function WaitForParallelWorkersToAttach.
Once this function has been called, we know that all workers have started and attached to their error queues -- so if any of them subsequently exit uncleanly, we'll be sure to throw an ERROR promptly. Otherwise, users of the ParallelContext machinery must be careful not to wait forever for a worker that has failed to start. Parallel query manages to work without needing this for reasons explained in new comments added by this patch, but it's a useful primitive for other parallel operations, such as the pending patch to make creating a btree index run in parallel. Amit Kapila, revised by me. Additional review by Peter Geoghegan. Discussion: http://postgr.es/m/CAA4eK1+e2MzyouF5bg=OtyhDSX+=Ao=3htN=T-r_6s3gCtKFiw@mail.gmail.com
This commit is contained in:
@ -437,10 +437,11 @@ ReinitializeParallelDSM(ParallelContext *pcxt)
|
|||||||
WaitForParallelWorkersToFinish(pcxt);
|
WaitForParallelWorkersToFinish(pcxt);
|
||||||
WaitForParallelWorkersToExit(pcxt);
|
WaitForParallelWorkersToExit(pcxt);
|
||||||
pcxt->nworkers_launched = 0;
|
pcxt->nworkers_launched = 0;
|
||||||
if (pcxt->any_message_received)
|
if (pcxt->known_attached_workers)
|
||||||
{
|
{
|
||||||
pfree(pcxt->any_message_received);
|
pfree(pcxt->known_attached_workers);
|
||||||
pcxt->any_message_received = NULL;
|
pcxt->known_attached_workers = NULL;
|
||||||
|
pcxt->nknown_attached_workers = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -542,16 +543,147 @@ LaunchParallelWorkers(ParallelContext *pcxt)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Now that nworkers_launched has taken its final value, we can initialize
|
* Now that nworkers_launched has taken its final value, we can initialize
|
||||||
* any_message_received.
|
* known_attached_workers.
|
||||||
*/
|
*/
|
||||||
if (pcxt->nworkers_launched > 0)
|
if (pcxt->nworkers_launched > 0)
|
||||||
pcxt->any_message_received =
|
{
|
||||||
|
pcxt->known_attached_workers =
|
||||||
palloc0(sizeof(bool) * pcxt->nworkers_launched);
|
palloc0(sizeof(bool) * pcxt->nworkers_launched);
|
||||||
|
pcxt->nknown_attached_workers = 0;
|
||||||
|
}
|
||||||
|
|
||||||
/* Restore previous memory context. */
|
/* Restore previous memory context. */
|
||||||
MemoryContextSwitchTo(oldcontext);
|
MemoryContextSwitchTo(oldcontext);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Wait for all workers to attach to their error queues, and throw an error if
|
||||||
|
* any worker fails to do this.
|
||||||
|
*
|
||||||
|
* Callers can assume that if this function returns successfully, then the
|
||||||
|
* number of workers given by pcxt->nworkers_launched have initialized and
|
||||||
|
* attached to their error queues. Whether or not these workers are guaranteed
|
||||||
|
* to still be running depends on what code the caller asked them to run;
|
||||||
|
* this function does not guarantee that they have not exited. However, it
|
||||||
|
* does guarantee that any workers which exited must have done so cleanly and
|
||||||
|
* after successfully performing the work with which they were tasked.
|
||||||
|
*
|
||||||
|
* If this function is not called, then some of the workers that were launched
|
||||||
|
* may not have been started due to a fork() failure, or may have exited during
|
||||||
|
* early startup prior to attaching to the error queue, so nworkers_launched
|
||||||
|
* cannot be viewed as completely reliable. It will never be less than the
|
||||||
|
* number of workers which actually started, but it might be more. Any workers
|
||||||
|
* that failed to start will still be discovered by
|
||||||
|
* WaitForParallelWorkersToFinish and an error will be thrown at that time,
|
||||||
|
* provided that function is eventually reached.
|
||||||
|
*
|
||||||
|
* In general, the leader process should do as much work as possible before
|
||||||
|
* calling this function. fork() failures and other early-startup failures
|
||||||
|
* are very uncommon, and having the leader sit idle when it could be doing
|
||||||
|
* useful work is undesirable. However, if the leader needs to wait for
|
||||||
|
* all of its workers or for a specific worker, it may want to call this
|
||||||
|
* function before doing so. If not, it must make some other provision for
|
||||||
|
* the failure-to-start case, lest it wait forever. On the other hand, a
|
||||||
|
* leader which never waits for a worker that might not be started yet, or
|
||||||
|
* at least never does so prior to WaitForParallelWorkersToFinish(), need not
|
||||||
|
* call this function at all.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
WaitForParallelWorkersToAttach(ParallelContext *pcxt)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
/* Skip this if we have no launched workers. */
|
||||||
|
if (pcxt->nworkers_launched == 0)
|
||||||
|
return;
|
||||||
|
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* This will process any parallel messages that are pending and it may
|
||||||
|
* also throw an error propagated from a worker.
|
||||||
|
*/
|
||||||
|
CHECK_FOR_INTERRUPTS();
|
||||||
|
|
||||||
|
for (i = 0; i < pcxt->nworkers_launched; ++i)
|
||||||
|
{
|
||||||
|
BgwHandleStatus status;
|
||||||
|
shm_mq *mq;
|
||||||
|
int rc;
|
||||||
|
pid_t pid;
|
||||||
|
|
||||||
|
if (pcxt->known_attached_workers[i])
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If error_mqh is NULL, then the worker has already exited
|
||||||
|
* cleanly.
|
||||||
|
*/
|
||||||
|
if (pcxt->worker[i].error_mqh == NULL)
|
||||||
|
{
|
||||||
|
pcxt->known_attached_workers[i] = true;
|
||||||
|
++pcxt->nknown_attached_workers;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
status = GetBackgroundWorkerPid(pcxt->worker[i].bgwhandle, &pid);
|
||||||
|
if (status == BGWH_STARTED)
|
||||||
|
{
|
||||||
|
/* Has the worker attached to the error queue? */
|
||||||
|
mq = shm_mq_get_queue(pcxt->worker[i].error_mqh);
|
||||||
|
if (shm_mq_get_sender(mq) != NULL)
|
||||||
|
{
|
||||||
|
/* Yes, so it is known to be attached. */
|
||||||
|
pcxt->known_attached_workers[i] = true;
|
||||||
|
++pcxt->nknown_attached_workers;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (status == BGWH_STOPPED)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* If the worker stopped without attaching to the error queue,
|
||||||
|
* throw an error.
|
||||||
|
*/
|
||||||
|
mq = shm_mq_get_queue(pcxt->worker[i].error_mqh);
|
||||||
|
if (shm_mq_get_sender(mq) == NULL)
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
|
||||||
|
errmsg("parallel worker failed to initialize"),
|
||||||
|
errhint("More details may be available in the server log.")));
|
||||||
|
|
||||||
|
pcxt->known_attached_workers[i] = true;
|
||||||
|
++pcxt->nknown_attached_workers;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Worker not yet started, so we must wait. The postmaster
|
||||||
|
* will notify us if the worker's state changes. Our latch
|
||||||
|
* might also get set for some other reason, but if so we'll
|
||||||
|
* just end up waiting for the same worker again.
|
||||||
|
*/
|
||||||
|
rc = WaitLatch(MyLatch,
|
||||||
|
WL_LATCH_SET | WL_POSTMASTER_DEATH,
|
||||||
|
-1, WAIT_EVENT_BGWORKER_STARTUP);
|
||||||
|
|
||||||
|
/* emergency bailout if postmaster has died */
|
||||||
|
if (rc & WL_POSTMASTER_DEATH)
|
||||||
|
proc_exit(1);
|
||||||
|
|
||||||
|
if (rc & WL_LATCH_SET)
|
||||||
|
ResetLatch(MyLatch);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If all workers are known to have started, we're done. */
|
||||||
|
if (pcxt->nknown_attached_workers >= pcxt->nworkers_launched)
|
||||||
|
{
|
||||||
|
Assert(pcxt->nknown_attached_workers == pcxt->nworkers_launched);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Wait for all workers to finish computing.
|
* Wait for all workers to finish computing.
|
||||||
*
|
*
|
||||||
@ -589,7 +721,7 @@ WaitForParallelWorkersToFinish(ParallelContext *pcxt)
|
|||||||
*/
|
*/
|
||||||
if (pcxt->worker[i].error_mqh == NULL)
|
if (pcxt->worker[i].error_mqh == NULL)
|
||||||
++nfinished;
|
++nfinished;
|
||||||
else if (pcxt->any_message_received[i])
|
else if (pcxt->known_attached_workers[i])
|
||||||
{
|
{
|
||||||
anyone_alive = true;
|
anyone_alive = true;
|
||||||
break;
|
break;
|
||||||
@ -909,8 +1041,12 @@ HandleParallelMessage(ParallelContext *pcxt, int i, StringInfo msg)
|
|||||||
{
|
{
|
||||||
char msgtype;
|
char msgtype;
|
||||||
|
|
||||||
if (pcxt->any_message_received != NULL)
|
if (pcxt->known_attached_workers != NULL &&
|
||||||
pcxt->any_message_received[i] = true;
|
!pcxt->known_attached_workers[i])
|
||||||
|
{
|
||||||
|
pcxt->known_attached_workers[i] = true;
|
||||||
|
pcxt->nknown_attached_workers++;
|
||||||
|
}
|
||||||
|
|
||||||
msgtype = pq_getmsgbyte(msg);
|
msgtype = pq_getmsgbyte(msg);
|
||||||
|
|
||||||
|
@ -312,7 +312,14 @@ gather_readnext(GatherState *gatherstate)
|
|||||||
/* Check for async events, particularly messages from workers. */
|
/* Check for async events, particularly messages from workers. */
|
||||||
CHECK_FOR_INTERRUPTS();
|
CHECK_FOR_INTERRUPTS();
|
||||||
|
|
||||||
/* Attempt to read a tuple, but don't block if none is available. */
|
/*
|
||||||
|
* Attempt to read a tuple, but don't block if none is available.
|
||||||
|
*
|
||||||
|
* Note that TupleQueueReaderNext will just return NULL for a worker
|
||||||
|
* which fails to initialize. We'll treat that worker as having
|
||||||
|
* produced no tuples; WaitForParallelWorkersToFinish will error out
|
||||||
|
* when we get there.
|
||||||
|
*/
|
||||||
Assert(gatherstate->nextreader < gatherstate->nreaders);
|
Assert(gatherstate->nextreader < gatherstate->nreaders);
|
||||||
reader = gatherstate->reader[gatherstate->nextreader];
|
reader = gatherstate->reader[gatherstate->nextreader];
|
||||||
tup = TupleQueueReaderNext(reader, true, &readerdone);
|
tup = TupleQueueReaderNext(reader, true, &readerdone);
|
||||||
|
@ -710,7 +710,14 @@ gm_readnext_tuple(GatherMergeState *gm_state, int nreader, bool nowait,
|
|||||||
/* Check for async events, particularly messages from workers. */
|
/* Check for async events, particularly messages from workers. */
|
||||||
CHECK_FOR_INTERRUPTS();
|
CHECK_FOR_INTERRUPTS();
|
||||||
|
|
||||||
/* Attempt to read a tuple. */
|
/*
|
||||||
|
* Attempt to read a tuple.
|
||||||
|
*
|
||||||
|
* Note that TupleQueueReaderNext will just return NULL for a worker which
|
||||||
|
* fails to initialize. We'll treat that worker as having produced no
|
||||||
|
* tuples; WaitForParallelWorkersToFinish will error out when we get
|
||||||
|
* there.
|
||||||
|
*/
|
||||||
reader = gm_state->reader[nreader - 1];
|
reader = gm_state->reader[nreader - 1];
|
||||||
tup = TupleQueueReaderNext(reader, nowait, done);
|
tup = TupleQueueReaderNext(reader, nowait, done);
|
||||||
|
|
||||||
|
@ -43,7 +43,8 @@ typedef struct ParallelContext
|
|||||||
void *private_memory;
|
void *private_memory;
|
||||||
shm_toc *toc;
|
shm_toc *toc;
|
||||||
ParallelWorkerInfo *worker;
|
ParallelWorkerInfo *worker;
|
||||||
bool *any_message_received;
|
int nknown_attached_workers;
|
||||||
|
bool *known_attached_workers;
|
||||||
} ParallelContext;
|
} ParallelContext;
|
||||||
|
|
||||||
typedef struct ParallelWorkerContext
|
typedef struct ParallelWorkerContext
|
||||||
@ -62,6 +63,7 @@ extern ParallelContext *CreateParallelContext(const char *library_name, const ch
|
|||||||
extern void InitializeParallelDSM(ParallelContext *pcxt);
|
extern void InitializeParallelDSM(ParallelContext *pcxt);
|
||||||
extern void ReinitializeParallelDSM(ParallelContext *pcxt);
|
extern void ReinitializeParallelDSM(ParallelContext *pcxt);
|
||||||
extern void LaunchParallelWorkers(ParallelContext *pcxt);
|
extern void LaunchParallelWorkers(ParallelContext *pcxt);
|
||||||
|
extern void WaitForParallelWorkersToAttach(ParallelContext *pcxt);
|
||||||
extern void WaitForParallelWorkersToFinish(ParallelContext *pcxt);
|
extern void WaitForParallelWorkersToFinish(ParallelContext *pcxt);
|
||||||
extern void DestroyParallelContext(ParallelContext *pcxt);
|
extern void DestroyParallelContext(ParallelContext *pcxt);
|
||||||
extern bool ParallelContextActive(void);
|
extern bool ParallelContextActive(void);
|
||||||
|
Reference in New Issue
Block a user