From c30f54ad732ca5c8762bb68bbe0f51de9137dd72 Mon Sep 17 00:00:00 2001 From: Thomas Munro Date: Sat, 3 Apr 2021 08:52:30 +1300 Subject: [PATCH] Detect POLLHUP/POLLRDHUP while running queries. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Provide a new GUC check_client_connection_interval that can be used to check whether the client connection has gone away, while running very long queries. It is disabled by default. For now this uses a non-standard Linux extension (also adopted by at least one other OS). POLLRDHUP is not defined by POSIX, and other OSes don't have a reliable way to know if a connection was closed without actually trying to read or write. In future we might consider trying to send a no-op/heartbeat message instead, but that could require protocol changes. Author: Sergey Cherkashin Author: Thomas Munro Reviewed-by: Thomas Munro Reviewed-by: Tatsuo Ishii Reviewed-by: Konstantin Knizhnik Reviewed-by: Zhihong Yu Reviewed-by: Andres Freund Reviewed-by: Maksim Milyutin Reviewed-by: Tsunakawa, Takayuki/綱川 貴之 Reviewed-by: Tom Lane (much earlier version) Discussion: https://postgr.es/m/77def86b27e41f0efcba411460e929ae%40postgrespro.ru --- doc/src/sgml/config.sgml | 37 +++++++++++++++++ src/backend/libpq/pqcomm.c | 40 +++++++++++++++++++ src/backend/tcop/postgres.c | 32 +++++++++++++++ src/backend/utils/init/globals.c | 1 + src/backend/utils/init/postinit.c | 10 +++++ src/backend/utils/misc/guc.c | 29 ++++++++++++++ src/backend/utils/misc/postgresql.conf.sample | 3 ++ src/include/libpq/libpq.h | 1 + src/include/miscadmin.h | 1 + src/include/tcop/tcopprot.h | 1 + src/include/utils/timeout.h | 1 + 11 files changed, 156 insertions(+) diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 9d87b5097af..0c9128a55d0 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -998,6 +998,43 @@ include_dir 'conf.d' + + client_connection_check_interval (integer) + + client_connection_check_interval configuration parameter + + + + + Sets the time interval between optional checks that the client is still + connected, while running queries. The check is performed by polling + the socket, and allows long running queries to be aborted sooner if + the kernel reports that the connection is closed. + + + This option is currently available only on systems that support the + non-standard POLLRDHUP extension to the + poll system call, including Linux. + + + If the value is specified without units, it is taken as milliseconds. + The default value is 0, which disables connection + checks. Without connection checks, the server will detect the loss of + the connection only at the next interaction with the socket, when it + waits for, receives or sends data. + + + For the kernel itself to detect lost TCP connections reliably and within + a known timeframe in all scenarios including network failure, it may + also be necessary to adjust the TCP keepalive settings of the operating + system, or the , + and + settings of + PostgreSQL. + + + + diff --git a/src/backend/libpq/pqcomm.c b/src/backend/libpq/pqcomm.c index 4c7b1e7bfdf..4cd6d6dfbb9 100644 --- a/src/backend/libpq/pqcomm.c +++ b/src/backend/libpq/pqcomm.c @@ -54,6 +54,9 @@ */ #include "postgres.h" +#ifdef HAVE_POLL_H +#include +#endif #include #include #include @@ -1921,3 +1924,40 @@ pq_settcpusertimeout(int timeout, Port *port) return STATUS_OK; } + +/* + * Check if the client is still connected. + */ +bool +pq_check_connection(void) +{ +#if defined(POLLRDHUP) + /* + * POLLRDHUP is a Linux extension to poll(2) to detect sockets closed by + * the other end. We don't have a portable way to do that without + * actually trying to read or write data on other systems. We don't want + * to read because that would be confused by pipelined queries and COPY + * data. Perhaps in future we'll try to write a heartbeat message instead. + */ + struct pollfd pollfd; + int rc; + + pollfd.fd = MyProcPort->sock; + pollfd.events = POLLOUT | POLLIN | POLLRDHUP; + pollfd.revents = 0; + + rc = poll(&pollfd, 1, 0); + + if (rc < 0) + { + ereport(COMMERROR, + (errcode_for_socket_access(), + errmsg("could not poll socket: %m"))); + return false; + } + else if (rc == 1 && (pollfd.revents & (POLLHUP | POLLRDHUP))) + return false; +#endif + + return true; +} diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index 2b1b68109fd..ad351e2fd1e 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -102,6 +102,9 @@ int max_stack_depth = 100; /* wait N seconds to allow attach from a debugger */ int PostAuthDelay = 0; +/* Time between checks that the client is still connected. */ +int client_connection_check_interval = 0; + /* ---------------- * private typedefs etc * ---------------- @@ -2671,6 +2674,14 @@ start_xact_command(void) * not desired, the timeout has to be disabled explicitly. */ enable_statement_timeout(); + + /* Start timeout for checking if the client has gone away if necessary. */ + if (client_connection_check_interval > 0 && + IsUnderPostmaster && + MyProcPort && + !get_timeout_active(CLIENT_CONNECTION_CHECK_TIMEOUT)) + enable_timeout_after(CLIENT_CONNECTION_CHECK_TIMEOUT, + client_connection_check_interval); } static void @@ -3149,6 +3160,27 @@ ProcessInterrupts(void) (errcode(ERRCODE_ADMIN_SHUTDOWN), errmsg("terminating connection due to administrator command"))); } + + if (CheckClientConnectionPending) + { + CheckClientConnectionPending = false; + + /* + * Check for lost connection and re-arm, if still configured, but not + * if we've arrived back at DoingCommandRead state. We don't want to + * wake up idle sessions, and they already know how to detect lost + * connections. + */ + if (!DoingCommandRead && client_connection_check_interval > 0) + { + if (!pq_check_connection()) + ClientConnectionLost = true; + else + enable_timeout_after(CLIENT_CONNECTION_CHECK_TIMEOUT, + client_connection_check_interval); + } + } + if (ClientConnectionLost) { QueryCancelPending = false; /* lost connection trumps QueryCancel */ diff --git a/src/backend/utils/init/globals.c b/src/backend/utils/init/globals.c index 73e0a672ae3..a9f0fc3017c 100644 --- a/src/backend/utils/init/globals.c +++ b/src/backend/utils/init/globals.c @@ -30,6 +30,7 @@ ProtocolVersion FrontendProtocol; volatile sig_atomic_t InterruptPending = false; volatile sig_atomic_t QueryCancelPending = false; volatile sig_atomic_t ProcDiePending = false; +volatile sig_atomic_t CheckClientConnectionPending = false; volatile sig_atomic_t ClientConnectionLost = false; volatile sig_atomic_t IdleInTransactionSessionTimeoutPending = false; volatile sig_atomic_t IdleSessionTimeoutPending = false; diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index 7abeccb5362..a3ec358538a 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -73,6 +73,7 @@ static void StatementTimeoutHandler(void); static void LockTimeoutHandler(void); static void IdleInTransactionSessionTimeoutHandler(void); static void IdleSessionTimeoutHandler(void); +static void ClientCheckTimeoutHandler(void); static bool ThereIsAtLeastOneRole(void); static void process_startup_options(Port *port, bool am_superuser); static void process_settings(Oid databaseid, Oid roleid); @@ -620,6 +621,7 @@ InitPostgres(const char *in_dbname, Oid dboid, const char *username, RegisterTimeout(IDLE_IN_TRANSACTION_SESSION_TIMEOUT, IdleInTransactionSessionTimeoutHandler); RegisterTimeout(IDLE_SESSION_TIMEOUT, IdleSessionTimeoutHandler); + RegisterTimeout(CLIENT_CONNECTION_CHECK_TIMEOUT, ClientCheckTimeoutHandler); } /* @@ -1242,6 +1244,14 @@ IdleSessionTimeoutHandler(void) SetLatch(MyLatch); } +static void +ClientCheckTimeoutHandler(void) +{ + CheckClientConnectionPending = true; + InterruptPending = true; + SetLatch(MyLatch); +} + /* * Returns true if at least one role is defined in this database cluster. */ diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 584daffc8a9..60a9c7a2a0b 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -20,6 +20,9 @@ #include #include #include +#ifdef HAVE_POLL_H +#include +#endif #ifndef WIN32 #include #endif @@ -204,6 +207,7 @@ static bool check_autovacuum_work_mem(int *newval, void **extra, GucSource sourc static bool check_effective_io_concurrency(int *newval, void **extra, GucSource source); static bool check_maintenance_io_concurrency(int *newval, void **extra, GucSource source); static bool check_huge_page_size(int *newval, void **extra, GucSource source); +static bool check_client_connection_check_interval(int *newval, void **extra, GucSource source); static void assign_pgstat_temp_directory(const char *newval, void *extra); static bool check_application_name(char **newval, void **extra, GucSource source); static void assign_application_name(const char *newval, void *extra); @@ -3501,6 +3505,17 @@ static struct config_int ConfigureNamesInt[] = NULL, NULL, NULL }, + { + {"client_connection_check_interval", PGC_USERSET, CLIENT_CONN_OTHER, + gettext_noop("Sets the time interval between checks for disconnection while running queries."), + NULL, + GUC_UNIT_MS + }, + &client_connection_check_interval, + 0, 0, INT_MAX, + check_client_connection_check_interval, NULL, NULL + }, + /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL @@ -11980,6 +11995,20 @@ check_huge_page_size(int *newval, void **extra, GucSource source) return true; } +static bool +check_client_connection_check_interval(int *newval, void **extra, GucSource source) +{ +#ifndef POLLRDHUP + /* Linux only, for now. See pq_check_connection(). */ + if (*newval != 0) + { + GUC_check_errdetail("client_connection_check_interval must be set to 0 on platforms that lack POLLRDHUP."); + return false; + } +#endif + return true; +} + static void assign_pgstat_temp_directory(const char *newval, void *extra) { diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 30cfddac1f7..39da7cc9427 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -719,6 +719,9 @@ #dynamic_library_path = '$libdir' +#client_connection_check_interval = 0 # time between checks for client + # disconnection while running queries; + # 0 for never #------------------------------------------------------------------------------ # LOCK MANAGEMENT diff --git a/src/include/libpq/libpq.h b/src/include/libpq/libpq.h index b20deeb5550..3ebbc8d6656 100644 --- a/src/include/libpq/libpq.h +++ b/src/include/libpq/libpq.h @@ -71,6 +71,7 @@ extern int pq_getbyte(void); extern int pq_peekbyte(void); extern int pq_getbyte_if_available(unsigned char *c); extern int pq_putmessage_v2(char msgtype, const char *s, size_t len); +extern bool pq_check_connection(void); /* * prototypes for functions in be-secure.c diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h index 013850ac288..6f8251e0b07 100644 --- a/src/include/miscadmin.h +++ b/src/include/miscadmin.h @@ -85,6 +85,7 @@ extern PGDLLIMPORT volatile sig_atomic_t IdleInTransactionSessionTimeoutPending; extern PGDLLIMPORT volatile sig_atomic_t IdleSessionTimeoutPending; extern PGDLLIMPORT volatile sig_atomic_t ProcSignalBarrierPending; +extern PGDLLIMPORT volatile sig_atomic_t CheckClientConnectionPending; extern PGDLLIMPORT volatile sig_atomic_t ClientConnectionLost; /* these are marked volatile because they are examined by signal handlers: */ diff --git a/src/include/tcop/tcopprot.h b/src/include/tcop/tcopprot.h index e5472100a43..241e7c99614 100644 --- a/src/include/tcop/tcopprot.h +++ b/src/include/tcop/tcopprot.h @@ -29,6 +29,7 @@ extern CommandDest whereToSendOutput; extern PGDLLIMPORT const char *debug_query_string; extern int max_stack_depth; extern int PostAuthDelay; +extern int client_connection_check_interval; /* GUC-configurable parameters */ diff --git a/src/include/utils/timeout.h b/src/include/utils/timeout.h index ecb2a366a5f..93e6a691b3f 100644 --- a/src/include/utils/timeout.h +++ b/src/include/utils/timeout.h @@ -32,6 +32,7 @@ typedef enum TimeoutId STANDBY_LOCK_TIMEOUT, IDLE_IN_TRANSACTION_SESSION_TIMEOUT, IDLE_SESSION_TIMEOUT, + CLIENT_CONNECTION_CHECK_TIMEOUT, /* First user-definable timeout reason */ USER_TIMEOUT, /* Maximum number of timeout reasons */