mirror of
https://github.com/postgres/postgres.git
synced 2025-05-11 05:41:32 +03:00
Remove select(2) backed latch implementation.
poll(2) is required by Single Unix Spec v2, the usual baseline for postgres (leaving windows aside). There's not been any buildfarm animals without poll(2) for a long while, leaving the select(2) implementation to be largely untested. On windows, including mingw, poll() is not available, but we have a special case implementation for windows anyway. Author: Andres Freund Discussion: https://postgr.es/m/20170420003611.7r2sdvehesdyiz2i@alap3.anarazel.de
This commit is contained in:
parent
546c13e11b
commit
61c21ddad0
@ -3,27 +3,24 @@
|
||||
* latch.c
|
||||
* Routines for inter-process latches
|
||||
*
|
||||
* The Unix implementation uses the so-called self-pipe trick to overcome
|
||||
* the race condition involved with select() and setting a global flag
|
||||
* in the signal handler. When a latch is set and the current process
|
||||
* is waiting for it, the signal handler wakes up the select() in
|
||||
* WaitLatch by writing a byte to a pipe. A signal by itself doesn't
|
||||
* interrupt select() on all platforms, and even on platforms where it
|
||||
* does, a signal that arrives just before the select() call does not
|
||||
* prevent the select() from entering sleep. An incoming byte on a pipe
|
||||
* however reliably interrupts the sleep, and causes select() to return
|
||||
* immediately even if the signal arrives before select() begins.
|
||||
*
|
||||
* (Actually, we prefer epoll_wait() over poll() over select() where
|
||||
* available, but the same comments apply.)
|
||||
* The Unix implementation uses the so-called self-pipe trick to overcome the
|
||||
* race condition involved with poll() (or epoll_wait() on linux) and setting
|
||||
* a global flag in the signal handler. When a latch is set and the current
|
||||
* process is waiting for it, the signal handler wakes up the poll() in
|
||||
* WaitLatch by writing a byte to a pipe. A signal by itself doesn't interrupt
|
||||
* poll() on all platforms, and even on platforms where it does, a signal that
|
||||
* arrives just before the poll() call does not prevent poll() from entering
|
||||
* sleep. An incoming byte on a pipe however reliably interrupts the sleep,
|
||||
* and causes poll() to return immediately even if the signal arrives before
|
||||
* poll() begins.
|
||||
*
|
||||
* When SetLatch is called from the same process that owns the latch,
|
||||
* SetLatch writes the byte directly to the pipe. If it's owned by another
|
||||
* process, SIGUSR1 is sent and the signal handler in the waiting process
|
||||
* writes the byte to the pipe on behalf of the signaling process.
|
||||
*
|
||||
* The Windows implementation uses Windows events that are inherited by
|
||||
* all postmaster child processes.
|
||||
* The Windows implementation uses Windows events that are inherited by all
|
||||
* postmaster child processes. There's no need for the self-pipe trick there.
|
||||
*
|
||||
* Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
@ -39,7 +36,6 @@
|
||||
#include <limits.h>
|
||||
#include <signal.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/time.h>
|
||||
#ifdef HAVE_SYS_EPOLL_H
|
||||
#include <sys/epoll.h>
|
||||
#endif
|
||||
@ -49,9 +45,6 @@
|
||||
#ifdef HAVE_SYS_POLL_H
|
||||
#include <sys/poll.h>
|
||||
#endif
|
||||
#ifdef HAVE_SYS_SELECT_H
|
||||
#include <sys/select.h>
|
||||
#endif
|
||||
|
||||
#include "miscadmin.h"
|
||||
#include "pgstat.h"
|
||||
@ -69,14 +62,12 @@
|
||||
* define somewhere before this block.
|
||||
*/
|
||||
#if defined(WAIT_USE_EPOLL) || defined(WAIT_USE_POLL) || \
|
||||
defined(WAIT_USE_SELECT) || defined(WAIT_USE_WIN32)
|
||||
defined(WAIT_USE_WIN32)
|
||||
/* don't overwrite manual choice */
|
||||
#elif defined(HAVE_SYS_EPOLL_H)
|
||||
#define WAIT_USE_EPOLL
|
||||
#elif defined(HAVE_POLL)
|
||||
#define WAIT_USE_POLL
|
||||
#elif HAVE_SYS_SELECT_H
|
||||
#define WAIT_USE_SELECT
|
||||
#elif WIN32
|
||||
#define WAIT_USE_WIN32
|
||||
#else
|
||||
@ -162,8 +153,8 @@ InitializeLatchSupport(void)
|
||||
|
||||
/*
|
||||
* Set up the self-pipe that allows a signal handler to wake up the
|
||||
* select() in WaitLatch. Make the write-end non-blocking, so that
|
||||
* SetLatch won't block if the event has already been set many times
|
||||
* poll()/epoll_wait() in WaitLatch. Make the write-end non-blocking, so
|
||||
* that SetLatch won't block if the event has already been set many times
|
||||
* filling the kernel buffer. Make the read-end non-blocking too, so that
|
||||
* we can easily clear the pipe by reading until EAGAIN or EWOULDBLOCK.
|
||||
*/
|
||||
@ -401,8 +392,9 @@ SetLatch(volatile Latch *latch)
|
||||
|
||||
/*
|
||||
* See if anyone's waiting for the latch. It can be the current process if
|
||||
* we're in a signal handler. We use the self-pipe to wake up the select()
|
||||
* in that case. If it's another process, send a signal.
|
||||
* we're in a signal handler. We use the self-pipe to wake up the
|
||||
* poll()/epoll_wait() in that case. If it's another process, send a
|
||||
* signal.
|
||||
*
|
||||
* Fetch owner_pid only once, in case the latch is concurrently getting
|
||||
* owned or disowned. XXX: This assumes that pid_t is atomic, which isn't
|
||||
@ -666,8 +658,6 @@ AddWaitEventToSet(WaitEventSet *set, uint32 events, pgsocket fd, Latch *latch,
|
||||
WaitEventAdjustEpoll(set, event, EPOLL_CTL_ADD);
|
||||
#elif defined(WAIT_USE_POLL)
|
||||
WaitEventAdjustPoll(set, event);
|
||||
#elif defined(WAIT_USE_SELECT)
|
||||
/* nothing to do */
|
||||
#elif defined(WAIT_USE_WIN32)
|
||||
WaitEventAdjustWin32(set, event);
|
||||
#endif
|
||||
@ -724,8 +714,6 @@ ModifyWaitEvent(WaitEventSet *set, int pos, uint32 events, Latch *latch)
|
||||
WaitEventAdjustEpoll(set, event, EPOLL_CTL_MOD);
|
||||
#elif defined(WAIT_USE_POLL)
|
||||
WaitEventAdjustPoll(set, event);
|
||||
#elif defined(WAIT_USE_SELECT)
|
||||
/* nothing to do */
|
||||
#elif defined(WAIT_USE_WIN32)
|
||||
WaitEventAdjustWin32(set, event);
|
||||
#endif
|
||||
@ -1055,9 +1043,11 @@ WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
|
||||
* because we don't expect the pipe to become readable or to have
|
||||
* any errors either, treat those cases as postmaster death, too.
|
||||
*
|
||||
* As explained in the WAIT_USE_SELECT implementation, select(2)
|
||||
* may spuriously return. Be paranoid about that here too, a
|
||||
* spurious WL_POSTMASTER_DEATH would be painful.
|
||||
* Be paranoid about a spurious event signalling the postmaster as
|
||||
* being dead. There have been reports about that happening with
|
||||
* older primitives (select(2) to be specific), and a spurious
|
||||
* WL_POSTMASTER_DEATH event would be painful. Re-checking doesn't
|
||||
* cost much.
|
||||
*/
|
||||
if (!PostmasterIsAlive())
|
||||
{
|
||||
@ -1171,9 +1161,11 @@ WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
|
||||
* we don't expect the pipe to become readable or to have any
|
||||
* errors either, treat those cases as postmaster death, too.
|
||||
*
|
||||
* As explained in the WAIT_USE_SELECT implementation, select(2)
|
||||
* may spuriously return. Be paranoid about that here too, a
|
||||
* spurious WL_POSTMASTER_DEATH would be painful.
|
||||
* Be paranoid about a spurious event signalling the postmaster as
|
||||
* being dead. There have been reports about that happening with
|
||||
* older primitives (select(2) to be specific), and a spurious
|
||||
* WL_POSTMASTER_DEATH event would be painful. Re-checking doesn't
|
||||
* cost much.
|
||||
*/
|
||||
if (!PostmasterIsAlive())
|
||||
{
|
||||
@ -1214,163 +1206,6 @@ WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
|
||||
return returned_events;
|
||||
}
|
||||
|
||||
#elif defined(WAIT_USE_SELECT)
|
||||
|
||||
/*
|
||||
* Wait using select(2).
|
||||
*
|
||||
* XXX: On at least older linux kernels select(), in violation of POSIX,
|
||||
* doesn't reliably return a socket as writable if closed - but we rely on
|
||||
* that. So far all the known cases of this problem are on platforms that also
|
||||
* provide a poll() implementation without that bug. If we find one where
|
||||
* that's not the case, we'll need to add a workaround.
|
||||
*/
|
||||
static inline int
|
||||
WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
|
||||
WaitEvent *occurred_events, int nevents)
|
||||
{
|
||||
int returned_events = 0;
|
||||
int rc;
|
||||
WaitEvent *cur_event;
|
||||
fd_set input_mask;
|
||||
fd_set output_mask;
|
||||
int hifd;
|
||||
struct timeval tv;
|
||||
struct timeval *tvp = NULL;
|
||||
|
||||
FD_ZERO(&input_mask);
|
||||
FD_ZERO(&output_mask);
|
||||
|
||||
/*
|
||||
* Prepare input/output masks. We do so every loop iteration as there's no
|
||||
* entirely portable way to copy fd_sets.
|
||||
*/
|
||||
for (cur_event = set->events;
|
||||
cur_event < (set->events + set->nevents);
|
||||
cur_event++)
|
||||
{
|
||||
if (cur_event->events == WL_LATCH_SET)
|
||||
FD_SET(cur_event->fd, &input_mask);
|
||||
else if (cur_event->events == WL_POSTMASTER_DEATH)
|
||||
FD_SET(cur_event->fd, &input_mask);
|
||||
else
|
||||
{
|
||||
Assert(cur_event->events & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE));
|
||||
if (cur_event->events == WL_SOCKET_READABLE)
|
||||
FD_SET(cur_event->fd, &input_mask);
|
||||
else if (cur_event->events == WL_SOCKET_WRITEABLE)
|
||||
FD_SET(cur_event->fd, &output_mask);
|
||||
}
|
||||
|
||||
if (cur_event->fd > hifd)
|
||||
hifd = cur_event->fd;
|
||||
}
|
||||
|
||||
/* Sleep */
|
||||
if (cur_timeout >= 0)
|
||||
{
|
||||
tv.tv_sec = cur_timeout / 1000L;
|
||||
tv.tv_usec = (cur_timeout % 1000L) * 1000L;
|
||||
tvp = &tv;
|
||||
}
|
||||
rc = select(hifd + 1, &input_mask, &output_mask, NULL, tvp);
|
||||
|
||||
/* Check return code */
|
||||
if (rc < 0)
|
||||
{
|
||||
/* EINTR is okay, otherwise complain */
|
||||
if (errno != EINTR)
|
||||
{
|
||||
waiting = false;
|
||||
ereport(ERROR,
|
||||
(errcode_for_socket_access(),
|
||||
errmsg("select() failed: %m")));
|
||||
}
|
||||
return 0; /* retry */
|
||||
}
|
||||
else if (rc == 0)
|
||||
{
|
||||
/* timeout exceeded */
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* To associate events with select's masks, we have to check the status of
|
||||
* the file descriptors associated with an event; by looping through all
|
||||
* events.
|
||||
*/
|
||||
for (cur_event = set->events;
|
||||
cur_event < (set->events + set->nevents)
|
||||
&& returned_events < nevents;
|
||||
cur_event++)
|
||||
{
|
||||
occurred_events->pos = cur_event->pos;
|
||||
occurred_events->user_data = cur_event->user_data;
|
||||
occurred_events->events = 0;
|
||||
|
||||
if (cur_event->events == WL_LATCH_SET &&
|
||||
FD_ISSET(cur_event->fd, &input_mask))
|
||||
{
|
||||
/* There's data in the self-pipe, clear it. */
|
||||
drainSelfPipe();
|
||||
|
||||
if (set->latch->is_set)
|
||||
{
|
||||
occurred_events->fd = PGINVALID_SOCKET;
|
||||
occurred_events->events = WL_LATCH_SET;
|
||||
occurred_events++;
|
||||
returned_events++;
|
||||
}
|
||||
}
|
||||
else if (cur_event->events == WL_POSTMASTER_DEATH &&
|
||||
FD_ISSET(cur_event->fd, &input_mask))
|
||||
{
|
||||
/*
|
||||
* According to the select(2) man page on Linux, select(2) may
|
||||
* spuriously return and report a file descriptor as readable,
|
||||
* when it's not; and presumably so can poll(2). It's not clear
|
||||
* that the relevant cases would ever apply to the postmaster
|
||||
* pipe, but since the consequences of falsely returning
|
||||
* WL_POSTMASTER_DEATH could be pretty unpleasant, we take the
|
||||
* trouble to positively verify EOF with PostmasterIsAlive().
|
||||
*/
|
||||
if (!PostmasterIsAlive())
|
||||
{
|
||||
occurred_events->fd = PGINVALID_SOCKET;
|
||||
occurred_events->events = WL_POSTMASTER_DEATH;
|
||||
occurred_events++;
|
||||
returned_events++;
|
||||
}
|
||||
}
|
||||
else if (cur_event->events & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE))
|
||||
{
|
||||
Assert(cur_event->fd != PGINVALID_SOCKET);
|
||||
|
||||
if ((cur_event->events & WL_SOCKET_READABLE) &&
|
||||
FD_ISSET(cur_event->fd, &input_mask))
|
||||
{
|
||||
/* data available in socket, or EOF */
|
||||
occurred_events->events |= WL_SOCKET_READABLE;
|
||||
}
|
||||
|
||||
if ((cur_event->events & WL_SOCKET_WRITEABLE) &&
|
||||
FD_ISSET(cur_event->fd, &output_mask))
|
||||
{
|
||||
/* socket is writeable, or EOF */
|
||||
occurred_events->events |= WL_SOCKET_WRITEABLE;
|
||||
}
|
||||
|
||||
if (occurred_events->events != 0)
|
||||
{
|
||||
occurred_events->fd = cur_event->fd;
|
||||
occurred_events++;
|
||||
returned_events++;
|
||||
}
|
||||
}
|
||||
}
|
||||
return returned_events;
|
||||
}
|
||||
|
||||
#elif defined(WAIT_USE_WIN32)
|
||||
|
||||
/*
|
||||
|
Loading…
x
Reference in New Issue
Block a user