mirror of
https://github.com/postgres/postgres.git
synced 2025-04-25 21:42:33 +03:00
Detect POLLHUP/POLLRDHUP while running queries.
Provide a new GUC check_client_connection_interval that can be used to check whether the client connection has gone away, while running very long queries. It is disabled by default. For now this uses a non-standard Linux extension (also adopted by at least one other OS). POLLRDHUP is not defined by POSIX, and other OSes don't have a reliable way to know if a connection was closed without actually trying to read or write. In future we might consider trying to send a no-op/heartbeat message instead, but that could require protocol changes. Author: Sergey Cherkashin <s.cherkashin@postgrespro.ru> Author: Thomas Munro <thomas.munro@gmail.com> Reviewed-by: Thomas Munro <thomas.munro@gmail.com> Reviewed-by: Tatsuo Ishii <ishii@sraoss.co.jp> Reviewed-by: Konstantin Knizhnik <k.knizhnik@postgrespro.ru> Reviewed-by: Zhihong Yu <zyu@yugabyte.com> Reviewed-by: Andres Freund <andres@anarazel.de> Reviewed-by: Maksim Milyutin <milyutinma@gmail.com> Reviewed-by: Tsunakawa, Takayuki/綱川 貴之 <tsunakawa.takay@fujitsu.com> Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us> (much earlier version) Discussion: https://postgr.es/m/77def86b27e41f0efcba411460e929ae%40postgrespro.ru
This commit is contained in:
parent
174edbe9f9
commit
c30f54ad73
@ -998,6 +998,43 @@ include_dir 'conf.d'
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry id="guc-client-connection-check-interval" xreflabel="client_connection_check_interval">
|
||||
<term><varname>client_connection_check_interval</varname> (<type>integer</type>)
|
||||
<indexterm>
|
||||
<primary><varname>client_connection_check_interval</varname> configuration parameter</primary>
|
||||
</indexterm>
|
||||
</term>
|
||||
<listitem>
|
||||
<para>
|
||||
Sets the time interval between optional checks that the client is still
|
||||
connected, while running queries. The check is performed by polling
|
||||
the socket, and allows long running queries to be aborted sooner if
|
||||
the kernel reports that the connection is closed.
|
||||
</para>
|
||||
<para>
|
||||
This option is currently available only on systems that support the
|
||||
non-standard <symbol>POLLRDHUP</symbol> extension to the
|
||||
<symbol>poll</symbol> system call, including Linux.
|
||||
</para>
|
||||
<para>
|
||||
If the value is specified without units, it is taken as milliseconds.
|
||||
The default value is <literal>0</literal>, which disables connection
|
||||
checks. Without connection checks, the server will detect the loss of
|
||||
the connection only at the next interaction with the socket, when it
|
||||
waits for, receives or sends data.
|
||||
</para>
|
||||
<para>
|
||||
For the kernel itself to detect lost TCP connections reliably and within
|
||||
a known timeframe in all scenarios including network failure, it may
|
||||
also be necessary to adjust the TCP keepalive settings of the operating
|
||||
system, or the <xref linkend="guc-tcp-keepalives-idle"/>,
|
||||
<xref linkend="guc-tcp-keepalives-interval"/> and
|
||||
<xref linkend="guc-tcp-keepalives-count"/> settings of
|
||||
<productname>PostgreSQL</productname>.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
</variablelist>
|
||||
</sect2>
|
||||
|
||||
|
@ -54,6 +54,9 @@
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#ifdef HAVE_POLL_H
|
||||
#include <poll.h>
|
||||
#endif
|
||||
#include <signal.h>
|
||||
#include <fcntl.h>
|
||||
#include <grp.h>
|
||||
@ -1921,3 +1924,40 @@ pq_settcpusertimeout(int timeout, Port *port)
|
||||
|
||||
return STATUS_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if the client is still connected.
|
||||
*/
|
||||
bool
|
||||
pq_check_connection(void)
|
||||
{
|
||||
#if defined(POLLRDHUP)
|
||||
/*
|
||||
* POLLRDHUP is a Linux extension to poll(2) to detect sockets closed by
|
||||
* the other end. We don't have a portable way to do that without
|
||||
* actually trying to read or write data on other systems. We don't want
|
||||
* to read because that would be confused by pipelined queries and COPY
|
||||
* data. Perhaps in future we'll try to write a heartbeat message instead.
|
||||
*/
|
||||
struct pollfd pollfd;
|
||||
int rc;
|
||||
|
||||
pollfd.fd = MyProcPort->sock;
|
||||
pollfd.events = POLLOUT | POLLIN | POLLRDHUP;
|
||||
pollfd.revents = 0;
|
||||
|
||||
rc = poll(&pollfd, 1, 0);
|
||||
|
||||
if (rc < 0)
|
||||
{
|
||||
ereport(COMMERROR,
|
||||
(errcode_for_socket_access(),
|
||||
errmsg("could not poll socket: %m")));
|
||||
return false;
|
||||
}
|
||||
else if (rc == 1 && (pollfd.revents & (POLLHUP | POLLRDHUP)))
|
||||
return false;
|
||||
#endif
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -102,6 +102,9 @@ int max_stack_depth = 100;
|
||||
/* wait N seconds to allow attach from a debugger */
|
||||
int PostAuthDelay = 0;
|
||||
|
||||
/* Time between checks that the client is still connected. */
|
||||
int client_connection_check_interval = 0;
|
||||
|
||||
/* ----------------
|
||||
* private typedefs etc
|
||||
* ----------------
|
||||
@ -2671,6 +2674,14 @@ start_xact_command(void)
|
||||
* not desired, the timeout has to be disabled explicitly.
|
||||
*/
|
||||
enable_statement_timeout();
|
||||
|
||||
/* Start timeout for checking if the client has gone away if necessary. */
|
||||
if (client_connection_check_interval > 0 &&
|
||||
IsUnderPostmaster &&
|
||||
MyProcPort &&
|
||||
!get_timeout_active(CLIENT_CONNECTION_CHECK_TIMEOUT))
|
||||
enable_timeout_after(CLIENT_CONNECTION_CHECK_TIMEOUT,
|
||||
client_connection_check_interval);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -3149,6 +3160,27 @@ ProcessInterrupts(void)
|
||||
(errcode(ERRCODE_ADMIN_SHUTDOWN),
|
||||
errmsg("terminating connection due to administrator command")));
|
||||
}
|
||||
|
||||
if (CheckClientConnectionPending)
|
||||
{
|
||||
CheckClientConnectionPending = false;
|
||||
|
||||
/*
|
||||
* Check for lost connection and re-arm, if still configured, but not
|
||||
* if we've arrived back at DoingCommandRead state. We don't want to
|
||||
* wake up idle sessions, and they already know how to detect lost
|
||||
* connections.
|
||||
*/
|
||||
if (!DoingCommandRead && client_connection_check_interval > 0)
|
||||
{
|
||||
if (!pq_check_connection())
|
||||
ClientConnectionLost = true;
|
||||
else
|
||||
enable_timeout_after(CLIENT_CONNECTION_CHECK_TIMEOUT,
|
||||
client_connection_check_interval);
|
||||
}
|
||||
}
|
||||
|
||||
if (ClientConnectionLost)
|
||||
{
|
||||
QueryCancelPending = false; /* lost connection trumps QueryCancel */
|
||||
|
@ -30,6 +30,7 @@ ProtocolVersion FrontendProtocol;
|
||||
volatile sig_atomic_t InterruptPending = false;
|
||||
volatile sig_atomic_t QueryCancelPending = false;
|
||||
volatile sig_atomic_t ProcDiePending = false;
|
||||
volatile sig_atomic_t CheckClientConnectionPending = false;
|
||||
volatile sig_atomic_t ClientConnectionLost = false;
|
||||
volatile sig_atomic_t IdleInTransactionSessionTimeoutPending = false;
|
||||
volatile sig_atomic_t IdleSessionTimeoutPending = false;
|
||||
|
@ -73,6 +73,7 @@ static void StatementTimeoutHandler(void);
|
||||
static void LockTimeoutHandler(void);
|
||||
static void IdleInTransactionSessionTimeoutHandler(void);
|
||||
static void IdleSessionTimeoutHandler(void);
|
||||
static void ClientCheckTimeoutHandler(void);
|
||||
static bool ThereIsAtLeastOneRole(void);
|
||||
static void process_startup_options(Port *port, bool am_superuser);
|
||||
static void process_settings(Oid databaseid, Oid roleid);
|
||||
@ -620,6 +621,7 @@ InitPostgres(const char *in_dbname, Oid dboid, const char *username,
|
||||
RegisterTimeout(IDLE_IN_TRANSACTION_SESSION_TIMEOUT,
|
||||
IdleInTransactionSessionTimeoutHandler);
|
||||
RegisterTimeout(IDLE_SESSION_TIMEOUT, IdleSessionTimeoutHandler);
|
||||
RegisterTimeout(CLIENT_CONNECTION_CHECK_TIMEOUT, ClientCheckTimeoutHandler);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1242,6 +1244,14 @@ IdleSessionTimeoutHandler(void)
|
||||
SetLatch(MyLatch);
|
||||
}
|
||||
|
||||
static void
|
||||
ClientCheckTimeoutHandler(void)
|
||||
{
|
||||
CheckClientConnectionPending = true;
|
||||
InterruptPending = true;
|
||||
SetLatch(MyLatch);
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns true if at least one role is defined in this database cluster.
|
||||
*/
|
||||
|
@ -20,6 +20,9 @@
|
||||
#include <float.h>
|
||||
#include <math.h>
|
||||
#include <limits.h>
|
||||
#ifdef HAVE_POLL_H
|
||||
#include <poll.h>
|
||||
#endif
|
||||
#ifndef WIN32
|
||||
#include <sys/mman.h>
|
||||
#endif
|
||||
@ -204,6 +207,7 @@ static bool check_autovacuum_work_mem(int *newval, void **extra, GucSource sourc
|
||||
static bool check_effective_io_concurrency(int *newval, void **extra, GucSource source);
|
||||
static bool check_maintenance_io_concurrency(int *newval, void **extra, GucSource source);
|
||||
static bool check_huge_page_size(int *newval, void **extra, GucSource source);
|
||||
static bool check_client_connection_check_interval(int *newval, void **extra, GucSource source);
|
||||
static void assign_pgstat_temp_directory(const char *newval, void *extra);
|
||||
static bool check_application_name(char **newval, void **extra, GucSource source);
|
||||
static void assign_application_name(const char *newval, void *extra);
|
||||
@ -3501,6 +3505,17 @@ static struct config_int ConfigureNamesInt[] =
|
||||
NULL, NULL, NULL
|
||||
},
|
||||
|
||||
{
|
||||
{"client_connection_check_interval", PGC_USERSET, CLIENT_CONN_OTHER,
|
||||
gettext_noop("Sets the time interval between checks for disconnection while running queries."),
|
||||
NULL,
|
||||
GUC_UNIT_MS
|
||||
},
|
||||
&client_connection_check_interval,
|
||||
0, 0, INT_MAX,
|
||||
check_client_connection_check_interval, NULL, NULL
|
||||
},
|
||||
|
||||
/* End-of-list marker */
|
||||
{
|
||||
{NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL
|
||||
@ -11980,6 +11995,20 @@ check_huge_page_size(int *newval, void **extra, GucSource source)
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
check_client_connection_check_interval(int *newval, void **extra, GucSource source)
|
||||
{
|
||||
#ifndef POLLRDHUP
|
||||
/* Linux only, for now. See pq_check_connection(). */
|
||||
if (*newval != 0)
|
||||
{
|
||||
GUC_check_errdetail("client_connection_check_interval must be set to 0 on platforms that lack POLLRDHUP.");
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
assign_pgstat_temp_directory(const char *newval, void *extra)
|
||||
{
|
||||
|
@ -719,6 +719,9 @@
|
||||
|
||||
#dynamic_library_path = '$libdir'
|
||||
|
||||
#client_connection_check_interval = 0 # time between checks for client
|
||||
# disconnection while running queries;
|
||||
# 0 for never
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# LOCK MANAGEMENT
|
||||
|
@ -71,6 +71,7 @@ extern int pq_getbyte(void);
|
||||
extern int pq_peekbyte(void);
|
||||
extern int pq_getbyte_if_available(unsigned char *c);
|
||||
extern int pq_putmessage_v2(char msgtype, const char *s, size_t len);
|
||||
extern bool pq_check_connection(void);
|
||||
|
||||
/*
|
||||
* prototypes for functions in be-secure.c
|
||||
|
@ -85,6 +85,7 @@ extern PGDLLIMPORT volatile sig_atomic_t IdleInTransactionSessionTimeoutPending;
|
||||
extern PGDLLIMPORT volatile sig_atomic_t IdleSessionTimeoutPending;
|
||||
extern PGDLLIMPORT volatile sig_atomic_t ProcSignalBarrierPending;
|
||||
|
||||
extern PGDLLIMPORT volatile sig_atomic_t CheckClientConnectionPending;
|
||||
extern PGDLLIMPORT volatile sig_atomic_t ClientConnectionLost;
|
||||
|
||||
/* these are marked volatile because they are examined by signal handlers: */
|
||||
|
@ -29,6 +29,7 @@ extern CommandDest whereToSendOutput;
|
||||
extern PGDLLIMPORT const char *debug_query_string;
|
||||
extern int max_stack_depth;
|
||||
extern int PostAuthDelay;
|
||||
extern int client_connection_check_interval;
|
||||
|
||||
/* GUC-configurable parameters */
|
||||
|
||||
|
@ -32,6 +32,7 @@ typedef enum TimeoutId
|
||||
STANDBY_LOCK_TIMEOUT,
|
||||
IDLE_IN_TRANSACTION_SESSION_TIMEOUT,
|
||||
IDLE_SESSION_TIMEOUT,
|
||||
CLIENT_CONNECTION_CHECK_TIMEOUT,
|
||||
/* First user-definable timeout reason */
|
||||
USER_TIMEOUT,
|
||||
/* Maximum number of timeout reasons */
|
||||
|
Loading…
x
Reference in New Issue
Block a user