mirror of
https://github.com/postgres/postgres.git
synced 2025-04-25 21:42:33 +03:00
Detect POLLHUP/POLLRDHUP while running queries.
Provide a new GUC check_client_connection_interval that can be used to check whether the client connection has gone away, while running very long queries. It is disabled by default. For now this uses a non-standard Linux extension (also adopted by at least one other OS). POLLRDHUP is not defined by POSIX, and other OSes don't have a reliable way to know if a connection was closed without actually trying to read or write. In future we might consider trying to send a no-op/heartbeat message instead, but that could require protocol changes. Author: Sergey Cherkashin <s.cherkashin@postgrespro.ru> Author: Thomas Munro <thomas.munro@gmail.com> Reviewed-by: Thomas Munro <thomas.munro@gmail.com> Reviewed-by: Tatsuo Ishii <ishii@sraoss.co.jp> Reviewed-by: Konstantin Knizhnik <k.knizhnik@postgrespro.ru> Reviewed-by: Zhihong Yu <zyu@yugabyte.com> Reviewed-by: Andres Freund <andres@anarazel.de> Reviewed-by: Maksim Milyutin <milyutinma@gmail.com> Reviewed-by: Tsunakawa, Takayuki/綱川 貴之 <tsunakawa.takay@fujitsu.com> Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us> (much earlier version) Discussion: https://postgr.es/m/77def86b27e41f0efcba411460e929ae%40postgrespro.ru
This commit is contained in:
parent
174edbe9f9
commit
c30f54ad73
@ -998,6 +998,43 @@ include_dir 'conf.d'
|
|||||||
</listitem>
|
</listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry id="guc-client-connection-check-interval" xreflabel="client_connection_check_interval">
|
||||||
|
<term><varname>client_connection_check_interval</varname> (<type>integer</type>)
|
||||||
|
<indexterm>
|
||||||
|
<primary><varname>client_connection_check_interval</varname> configuration parameter</primary>
|
||||||
|
</indexterm>
|
||||||
|
</term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Sets the time interval between optional checks that the client is still
|
||||||
|
connected, while running queries. The check is performed by polling
|
||||||
|
the socket, and allows long running queries to be aborted sooner if
|
||||||
|
the kernel reports that the connection is closed.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
This option is currently available only on systems that support the
|
||||||
|
non-standard <symbol>POLLRDHUP</symbol> extension to the
|
||||||
|
<symbol>poll</symbol> system call, including Linux.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
If the value is specified without units, it is taken as milliseconds.
|
||||||
|
The default value is <literal>0</literal>, which disables connection
|
||||||
|
checks. Without connection checks, the server will detect the loss of
|
||||||
|
the connection only at the next interaction with the socket, when it
|
||||||
|
waits for, receives or sends data.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
For the kernel itself to detect lost TCP connections reliably and within
|
||||||
|
a known timeframe in all scenarios including network failure, it may
|
||||||
|
also be necessary to adjust the TCP keepalive settings of the operating
|
||||||
|
system, or the <xref linkend="guc-tcp-keepalives-idle"/>,
|
||||||
|
<xref linkend="guc-tcp-keepalives-interval"/> and
|
||||||
|
<xref linkend="guc-tcp-keepalives-count"/> settings of
|
||||||
|
<productname>PostgreSQL</productname>.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
</variablelist>
|
</variablelist>
|
||||||
</sect2>
|
</sect2>
|
||||||
|
|
||||||
|
@ -54,6 +54,9 @@
|
|||||||
*/
|
*/
|
||||||
#include "postgres.h"
|
#include "postgres.h"
|
||||||
|
|
||||||
|
#ifdef HAVE_POLL_H
|
||||||
|
#include <poll.h>
|
||||||
|
#endif
|
||||||
#include <signal.h>
|
#include <signal.h>
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
#include <grp.h>
|
#include <grp.h>
|
||||||
@ -1921,3 +1924,40 @@ pq_settcpusertimeout(int timeout, Port *port)
|
|||||||
|
|
||||||
return STATUS_OK;
|
return STATUS_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check if the client is still connected.
|
||||||
|
*/
|
||||||
|
bool
|
||||||
|
pq_check_connection(void)
|
||||||
|
{
|
||||||
|
#if defined(POLLRDHUP)
|
||||||
|
/*
|
||||||
|
* POLLRDHUP is a Linux extension to poll(2) to detect sockets closed by
|
||||||
|
* the other end. We don't have a portable way to do that without
|
||||||
|
* actually trying to read or write data on other systems. We don't want
|
||||||
|
* to read because that would be confused by pipelined queries and COPY
|
||||||
|
* data. Perhaps in future we'll try to write a heartbeat message instead.
|
||||||
|
*/
|
||||||
|
struct pollfd pollfd;
|
||||||
|
int rc;
|
||||||
|
|
||||||
|
pollfd.fd = MyProcPort->sock;
|
||||||
|
pollfd.events = POLLOUT | POLLIN | POLLRDHUP;
|
||||||
|
pollfd.revents = 0;
|
||||||
|
|
||||||
|
rc = poll(&pollfd, 1, 0);
|
||||||
|
|
||||||
|
if (rc < 0)
|
||||||
|
{
|
||||||
|
ereport(COMMERROR,
|
||||||
|
(errcode_for_socket_access(),
|
||||||
|
errmsg("could not poll socket: %m")));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
else if (rc == 1 && (pollfd.revents & (POLLHUP | POLLRDHUP)))
|
||||||
|
return false;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
@ -102,6 +102,9 @@ int max_stack_depth = 100;
|
|||||||
/* wait N seconds to allow attach from a debugger */
|
/* wait N seconds to allow attach from a debugger */
|
||||||
int PostAuthDelay = 0;
|
int PostAuthDelay = 0;
|
||||||
|
|
||||||
|
/* Time between checks that the client is still connected. */
|
||||||
|
int client_connection_check_interval = 0;
|
||||||
|
|
||||||
/* ----------------
|
/* ----------------
|
||||||
* private typedefs etc
|
* private typedefs etc
|
||||||
* ----------------
|
* ----------------
|
||||||
@ -2671,6 +2674,14 @@ start_xact_command(void)
|
|||||||
* not desired, the timeout has to be disabled explicitly.
|
* not desired, the timeout has to be disabled explicitly.
|
||||||
*/
|
*/
|
||||||
enable_statement_timeout();
|
enable_statement_timeout();
|
||||||
|
|
||||||
|
/* Start timeout for checking if the client has gone away if necessary. */
|
||||||
|
if (client_connection_check_interval > 0 &&
|
||||||
|
IsUnderPostmaster &&
|
||||||
|
MyProcPort &&
|
||||||
|
!get_timeout_active(CLIENT_CONNECTION_CHECK_TIMEOUT))
|
||||||
|
enable_timeout_after(CLIENT_CONNECTION_CHECK_TIMEOUT,
|
||||||
|
client_connection_check_interval);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@ -3149,6 +3160,27 @@ ProcessInterrupts(void)
|
|||||||
(errcode(ERRCODE_ADMIN_SHUTDOWN),
|
(errcode(ERRCODE_ADMIN_SHUTDOWN),
|
||||||
errmsg("terminating connection due to administrator command")));
|
errmsg("terminating connection due to administrator command")));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (CheckClientConnectionPending)
|
||||||
|
{
|
||||||
|
CheckClientConnectionPending = false;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check for lost connection and re-arm, if still configured, but not
|
||||||
|
* if we've arrived back at DoingCommandRead state. We don't want to
|
||||||
|
* wake up idle sessions, and they already know how to detect lost
|
||||||
|
* connections.
|
||||||
|
*/
|
||||||
|
if (!DoingCommandRead && client_connection_check_interval > 0)
|
||||||
|
{
|
||||||
|
if (!pq_check_connection())
|
||||||
|
ClientConnectionLost = true;
|
||||||
|
else
|
||||||
|
enable_timeout_after(CLIENT_CONNECTION_CHECK_TIMEOUT,
|
||||||
|
client_connection_check_interval);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (ClientConnectionLost)
|
if (ClientConnectionLost)
|
||||||
{
|
{
|
||||||
QueryCancelPending = false; /* lost connection trumps QueryCancel */
|
QueryCancelPending = false; /* lost connection trumps QueryCancel */
|
||||||
|
@ -30,6 +30,7 @@ ProtocolVersion FrontendProtocol;
|
|||||||
volatile sig_atomic_t InterruptPending = false;
|
volatile sig_atomic_t InterruptPending = false;
|
||||||
volatile sig_atomic_t QueryCancelPending = false;
|
volatile sig_atomic_t QueryCancelPending = false;
|
||||||
volatile sig_atomic_t ProcDiePending = false;
|
volatile sig_atomic_t ProcDiePending = false;
|
||||||
|
volatile sig_atomic_t CheckClientConnectionPending = false;
|
||||||
volatile sig_atomic_t ClientConnectionLost = false;
|
volatile sig_atomic_t ClientConnectionLost = false;
|
||||||
volatile sig_atomic_t IdleInTransactionSessionTimeoutPending = false;
|
volatile sig_atomic_t IdleInTransactionSessionTimeoutPending = false;
|
||||||
volatile sig_atomic_t IdleSessionTimeoutPending = false;
|
volatile sig_atomic_t IdleSessionTimeoutPending = false;
|
||||||
|
@ -73,6 +73,7 @@ static void StatementTimeoutHandler(void);
|
|||||||
static void LockTimeoutHandler(void);
|
static void LockTimeoutHandler(void);
|
||||||
static void IdleInTransactionSessionTimeoutHandler(void);
|
static void IdleInTransactionSessionTimeoutHandler(void);
|
||||||
static void IdleSessionTimeoutHandler(void);
|
static void IdleSessionTimeoutHandler(void);
|
||||||
|
static void ClientCheckTimeoutHandler(void);
|
||||||
static bool ThereIsAtLeastOneRole(void);
|
static bool ThereIsAtLeastOneRole(void);
|
||||||
static void process_startup_options(Port *port, bool am_superuser);
|
static void process_startup_options(Port *port, bool am_superuser);
|
||||||
static void process_settings(Oid databaseid, Oid roleid);
|
static void process_settings(Oid databaseid, Oid roleid);
|
||||||
@ -620,6 +621,7 @@ InitPostgres(const char *in_dbname, Oid dboid, const char *username,
|
|||||||
RegisterTimeout(IDLE_IN_TRANSACTION_SESSION_TIMEOUT,
|
RegisterTimeout(IDLE_IN_TRANSACTION_SESSION_TIMEOUT,
|
||||||
IdleInTransactionSessionTimeoutHandler);
|
IdleInTransactionSessionTimeoutHandler);
|
||||||
RegisterTimeout(IDLE_SESSION_TIMEOUT, IdleSessionTimeoutHandler);
|
RegisterTimeout(IDLE_SESSION_TIMEOUT, IdleSessionTimeoutHandler);
|
||||||
|
RegisterTimeout(CLIENT_CONNECTION_CHECK_TIMEOUT, ClientCheckTimeoutHandler);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1242,6 +1244,14 @@ IdleSessionTimeoutHandler(void)
|
|||||||
SetLatch(MyLatch);
|
SetLatch(MyLatch);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
ClientCheckTimeoutHandler(void)
|
||||||
|
{
|
||||||
|
CheckClientConnectionPending = true;
|
||||||
|
InterruptPending = true;
|
||||||
|
SetLatch(MyLatch);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Returns true if at least one role is defined in this database cluster.
|
* Returns true if at least one role is defined in this database cluster.
|
||||||
*/
|
*/
|
||||||
|
@ -20,6 +20,9 @@
|
|||||||
#include <float.h>
|
#include <float.h>
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
#include <limits.h>
|
#include <limits.h>
|
||||||
|
#ifdef HAVE_POLL_H
|
||||||
|
#include <poll.h>
|
||||||
|
#endif
|
||||||
#ifndef WIN32
|
#ifndef WIN32
|
||||||
#include <sys/mman.h>
|
#include <sys/mman.h>
|
||||||
#endif
|
#endif
|
||||||
@ -204,6 +207,7 @@ static bool check_autovacuum_work_mem(int *newval, void **extra, GucSource sourc
|
|||||||
static bool check_effective_io_concurrency(int *newval, void **extra, GucSource source);
|
static bool check_effective_io_concurrency(int *newval, void **extra, GucSource source);
|
||||||
static bool check_maintenance_io_concurrency(int *newval, void **extra, GucSource source);
|
static bool check_maintenance_io_concurrency(int *newval, void **extra, GucSource source);
|
||||||
static bool check_huge_page_size(int *newval, void **extra, GucSource source);
|
static bool check_huge_page_size(int *newval, void **extra, GucSource source);
|
||||||
|
static bool check_client_connection_check_interval(int *newval, void **extra, GucSource source);
|
||||||
static void assign_pgstat_temp_directory(const char *newval, void *extra);
|
static void assign_pgstat_temp_directory(const char *newval, void *extra);
|
||||||
static bool check_application_name(char **newval, void **extra, GucSource source);
|
static bool check_application_name(char **newval, void **extra, GucSource source);
|
||||||
static void assign_application_name(const char *newval, void *extra);
|
static void assign_application_name(const char *newval, void *extra);
|
||||||
@ -3501,6 +3505,17 @@ static struct config_int ConfigureNamesInt[] =
|
|||||||
NULL, NULL, NULL
|
NULL, NULL, NULL
|
||||||
},
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
{"client_connection_check_interval", PGC_USERSET, CLIENT_CONN_OTHER,
|
||||||
|
gettext_noop("Sets the time interval between checks for disconnection while running queries."),
|
||||||
|
NULL,
|
||||||
|
GUC_UNIT_MS
|
||||||
|
},
|
||||||
|
&client_connection_check_interval,
|
||||||
|
0, 0, INT_MAX,
|
||||||
|
check_client_connection_check_interval, NULL, NULL
|
||||||
|
},
|
||||||
|
|
||||||
/* End-of-list marker */
|
/* End-of-list marker */
|
||||||
{
|
{
|
||||||
{NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL
|
{NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL
|
||||||
@ -11980,6 +11995,20 @@ check_huge_page_size(int *newval, void **extra, GucSource source)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
check_client_connection_check_interval(int *newval, void **extra, GucSource source)
|
||||||
|
{
|
||||||
|
#ifndef POLLRDHUP
|
||||||
|
/* Linux only, for now. See pq_check_connection(). */
|
||||||
|
if (*newval != 0)
|
||||||
|
{
|
||||||
|
GUC_check_errdetail("client_connection_check_interval must be set to 0 on platforms that lack POLLRDHUP.");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
assign_pgstat_temp_directory(const char *newval, void *extra)
|
assign_pgstat_temp_directory(const char *newval, void *extra)
|
||||||
{
|
{
|
||||||
|
@ -719,6 +719,9 @@
|
|||||||
|
|
||||||
#dynamic_library_path = '$libdir'
|
#dynamic_library_path = '$libdir'
|
||||||
|
|
||||||
|
#client_connection_check_interval = 0 # time between checks for client
|
||||||
|
# disconnection while running queries;
|
||||||
|
# 0 for never
|
||||||
|
|
||||||
#------------------------------------------------------------------------------
|
#------------------------------------------------------------------------------
|
||||||
# LOCK MANAGEMENT
|
# LOCK MANAGEMENT
|
||||||
|
@ -71,6 +71,7 @@ extern int pq_getbyte(void);
|
|||||||
extern int pq_peekbyte(void);
|
extern int pq_peekbyte(void);
|
||||||
extern int pq_getbyte_if_available(unsigned char *c);
|
extern int pq_getbyte_if_available(unsigned char *c);
|
||||||
extern int pq_putmessage_v2(char msgtype, const char *s, size_t len);
|
extern int pq_putmessage_v2(char msgtype, const char *s, size_t len);
|
||||||
|
extern bool pq_check_connection(void);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* prototypes for functions in be-secure.c
|
* prototypes for functions in be-secure.c
|
||||||
|
@ -85,6 +85,7 @@ extern PGDLLIMPORT volatile sig_atomic_t IdleInTransactionSessionTimeoutPending;
|
|||||||
extern PGDLLIMPORT volatile sig_atomic_t IdleSessionTimeoutPending;
|
extern PGDLLIMPORT volatile sig_atomic_t IdleSessionTimeoutPending;
|
||||||
extern PGDLLIMPORT volatile sig_atomic_t ProcSignalBarrierPending;
|
extern PGDLLIMPORT volatile sig_atomic_t ProcSignalBarrierPending;
|
||||||
|
|
||||||
|
extern PGDLLIMPORT volatile sig_atomic_t CheckClientConnectionPending;
|
||||||
extern PGDLLIMPORT volatile sig_atomic_t ClientConnectionLost;
|
extern PGDLLIMPORT volatile sig_atomic_t ClientConnectionLost;
|
||||||
|
|
||||||
/* these are marked volatile because they are examined by signal handlers: */
|
/* these are marked volatile because they are examined by signal handlers: */
|
||||||
|
@ -29,6 +29,7 @@ extern CommandDest whereToSendOutput;
|
|||||||
extern PGDLLIMPORT const char *debug_query_string;
|
extern PGDLLIMPORT const char *debug_query_string;
|
||||||
extern int max_stack_depth;
|
extern int max_stack_depth;
|
||||||
extern int PostAuthDelay;
|
extern int PostAuthDelay;
|
||||||
|
extern int client_connection_check_interval;
|
||||||
|
|
||||||
/* GUC-configurable parameters */
|
/* GUC-configurable parameters */
|
||||||
|
|
||||||
|
@ -32,6 +32,7 @@ typedef enum TimeoutId
|
|||||||
STANDBY_LOCK_TIMEOUT,
|
STANDBY_LOCK_TIMEOUT,
|
||||||
IDLE_IN_TRANSACTION_SESSION_TIMEOUT,
|
IDLE_IN_TRANSACTION_SESSION_TIMEOUT,
|
||||||
IDLE_SESSION_TIMEOUT,
|
IDLE_SESSION_TIMEOUT,
|
||||||
|
CLIENT_CONNECTION_CHECK_TIMEOUT,
|
||||||
/* First user-definable timeout reason */
|
/* First user-definable timeout reason */
|
||||||
USER_TIMEOUT,
|
USER_TIMEOUT,
|
||||||
/* Maximum number of timeout reasons */
|
/* Maximum number of timeout reasons */
|
||||||
|
Loading…
x
Reference in New Issue
Block a user