mirror of
				https://github.com/postgres/postgres.git
				synced 2025-10-25 13:17:41 +03:00 
			
		
		
		
	Introduce WaitEventSet API.
Commit ac1d794 ("Make idle backends exit if the postmaster dies.")
introduced a regression on, at least, large linux systems. Constantly
adding the same postmaster_alive_fds to the OSs internal datastructures
for implementing poll/select can cause significant contention; leading
to a performance regression of nearly 3x in one example.
This can be avoided by using e.g. linux' epoll, which avoids having to
add/remove file descriptors to the wait datastructures at a high rate.
Unfortunately the current latch interface makes it hard to allocate any
persistent per-backend resources.
Replace, with a backward compatibility layer, WaitLatchOrSocket with a
new WaitEventSet API. Users can allocate such a Set across multiple
calls, and add more than one file-descriptor to wait on. The latter has
been added because there's upcoming postgres features where that will be
helpful.
In addition to the previously existing poll(2), select(2),
WaitForMultipleObjects() implementations also provide an epoll_wait(2)
based implementation to address the aforementioned performance
problem. Epoll is only available on linux, but that is the most likely
OS for machines large enough (four sockets) to reproduce the problem.
To actually address the aforementioned regression, create and use a
long-lived WaitEventSet for FE/BE communication.  There are additional
places that would benefit from a long-lived set, but that's a task for
another day.
Thanks to Amit Kapila, who helped make the windows code I blindly wrote
actually work.
Reported-By: Dmitry Vasilyev Discussion:
CAB-SwXZh44_2ybvS5Z67p_CDz=XFn4hNAD=CnMEF+QqkXwFrGg@mail.gmail.com
20160114143931.GG10941@awork2.anarazel.de
			
			
This commit is contained in:
		
							
								
								
									
										2
									
								
								configure
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								configure
									
									
									
									
										vendored
									
									
								
							| @@ -10193,7 +10193,7 @@ fi | ||||
| ## Header files | ||||
| ## | ||||
|  | ||||
| for ac_header in atomic.h crypt.h dld.h fp_class.h getopt.h ieeefp.h ifaddrs.h langinfo.h mbarrier.h poll.h pwd.h sys/ioctl.h sys/ipc.h sys/poll.h sys/pstat.h sys/resource.h sys/select.h sys/sem.h sys/shm.h sys/socket.h sys/sockio.h sys/tas.h sys/time.h sys/un.h termios.h ucred.h utime.h wchar.h wctype.h | ||||
| for ac_header in atomic.h crypt.h dld.h fp_class.h getopt.h ieeefp.h ifaddrs.h langinfo.h mbarrier.h poll.h pwd.h sys/epoll.h sys/ioctl.h sys/ipc.h sys/poll.h sys/pstat.h sys/resource.h sys/select.h sys/sem.h sys/shm.h sys/socket.h sys/sockio.h sys/tas.h sys/time.h sys/un.h termios.h ucred.h utime.h wchar.h wctype.h | ||||
| do : | ||||
|   as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` | ||||
| ac_fn_c_check_header_mongrel "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default" | ||||
|   | ||||
| @@ -1183,7 +1183,7 @@ AC_SUBST(UUID_LIBS) | ||||
| ## | ||||
|  | ||||
| dnl sys/socket.h is required by AC_FUNC_ACCEPT_ARGTYPES | ||||
| AC_CHECK_HEADERS([atomic.h crypt.h dld.h fp_class.h getopt.h ieeefp.h ifaddrs.h langinfo.h mbarrier.h poll.h pwd.h sys/ioctl.h sys/ipc.h sys/poll.h sys/pstat.h sys/resource.h sys/select.h sys/sem.h sys/shm.h sys/socket.h sys/sockio.h sys/tas.h sys/time.h sys/un.h termios.h ucred.h utime.h wchar.h wctype.h]) | ||||
| AC_CHECK_HEADERS([atomic.h crypt.h dld.h fp_class.h getopt.h ieeefp.h ifaddrs.h langinfo.h mbarrier.h poll.h pwd.h sys/epoll.h sys/ioctl.h sys/ipc.h sys/poll.h sys/pstat.h sys/resource.h sys/select.h sys/sem.h sys/shm.h sys/socket.h sys/sockio.h sys/tas.h sys/time.h sys/un.h termios.h ucred.h utime.h wchar.h wctype.h]) | ||||
|  | ||||
| # On BSD, test for net/if.h will fail unless sys/socket.h | ||||
| # is included first. | ||||
|   | ||||
| @@ -140,13 +140,13 @@ retry: | ||||
| 	/* In blocking mode, wait until the socket is ready */ | ||||
| 	if (n < 0 && !port->noblock && (errno == EWOULDBLOCK || errno == EAGAIN)) | ||||
| 	{ | ||||
| 		int			w; | ||||
| 		WaitEvent   event; | ||||
|  | ||||
| 		Assert(waitfor); | ||||
|  | ||||
| 		w = WaitLatchOrSocket(MyLatch, | ||||
| 							  WL_LATCH_SET | WL_POSTMASTER_DEATH | waitfor, | ||||
| 							  port->sock, 0); | ||||
| 		ModifyWaitEvent(FeBeWaitSet, 0, waitfor, NULL); | ||||
|  | ||||
| 		WaitEventSetWait(FeBeWaitSet, -1 /* no timeout */, &event, 1); | ||||
|  | ||||
| 		/* | ||||
| 		 * If the postmaster has died, it's not safe to continue running, | ||||
| @@ -165,13 +165,13 @@ retry: | ||||
| 		 * cycles checking for this very rare condition, and this should cause | ||||
| 		 * us to exit quickly in most cases.) | ||||
| 		 */ | ||||
| 		if (w & WL_POSTMASTER_DEATH) | ||||
| 		if (event.events & WL_POSTMASTER_DEATH) | ||||
| 			ereport(FATAL, | ||||
| 					(errcode(ERRCODE_ADMIN_SHUTDOWN), | ||||
| 					errmsg("terminating connection due to unexpected postmaster exit"))); | ||||
|  | ||||
| 		/* Handle interrupt. */ | ||||
| 		if (w & WL_LATCH_SET) | ||||
| 		if (event.events & WL_LATCH_SET) | ||||
| 		{ | ||||
| 			ResetLatch(MyLatch); | ||||
| 			ProcessClientReadInterrupt(true); | ||||
| @@ -241,22 +241,22 @@ retry: | ||||
|  | ||||
| 	if (n < 0 && !port->noblock && (errno == EWOULDBLOCK || errno == EAGAIN)) | ||||
| 	{ | ||||
| 		int			w; | ||||
| 		WaitEvent   event; | ||||
|  | ||||
| 		Assert(waitfor); | ||||
|  | ||||
| 		w = WaitLatchOrSocket(MyLatch, | ||||
| 							  WL_LATCH_SET | WL_POSTMASTER_DEATH | waitfor, | ||||
| 							  port->sock, 0); | ||||
| 		ModifyWaitEvent(FeBeWaitSet, 0, waitfor, NULL); | ||||
|  | ||||
| 		WaitEventSetWait(FeBeWaitSet, -1 /* no timeout */, &event, 1); | ||||
|  | ||||
| 		/* See comments in secure_read. */ | ||||
| 		if (w & WL_POSTMASTER_DEATH) | ||||
| 		if (event.events & WL_POSTMASTER_DEATH) | ||||
| 			ereport(FATAL, | ||||
| 					(errcode(ERRCODE_ADMIN_SHUTDOWN), | ||||
| 					errmsg("terminating connection due to unexpected postmaster exit"))); | ||||
|  | ||||
| 		/* Handle interrupt. */ | ||||
| 		if (w & WL_LATCH_SET) | ||||
| 		if (event.events & WL_LATCH_SET) | ||||
| 		{ | ||||
| 			ResetLatch(MyLatch); | ||||
| 			ProcessClientWriteInterrupt(true); | ||||
|   | ||||
| @@ -201,6 +201,11 @@ pq_init(void) | ||||
| 				(errmsg("could not set socket to nonblocking mode: %m"))); | ||||
| #endif | ||||
|  | ||||
| 	FeBeWaitSet = CreateWaitEventSet(TopMemoryContext, 3); | ||||
| 	AddWaitEventToSet(FeBeWaitSet, WL_SOCKET_WRITEABLE, MyProcPort->sock, | ||||
| 					  NULL, NULL); | ||||
| 	AddWaitEventToSet(FeBeWaitSet, WL_LATCH_SET, -1, MyLatch, NULL); | ||||
| 	AddWaitEventToSet(FeBeWaitSet, WL_POSTMASTER_DEATH, -1, NULL, NULL); | ||||
| } | ||||
|  | ||||
| /* -------------------------------- | ||||
|   | ||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -33,6 +33,7 @@ | ||||
|  | ||||
| #include "access/htup_details.h" | ||||
| #include "catalog/pg_authid.h" | ||||
| #include "libpq/libpq.h" | ||||
| #include "mb/pg_wchar.h" | ||||
| #include "miscadmin.h" | ||||
| #include "postmaster/autovacuum.h" | ||||
| @@ -247,6 +248,9 @@ SwitchToSharedLatch(void) | ||||
|  | ||||
| 	MyLatch = &MyProc->procLatch; | ||||
|  | ||||
| 	if (FeBeWaitSet) | ||||
| 		ModifyWaitEvent(FeBeWaitSet, 1, WL_LATCH_SET, MyLatch); | ||||
|  | ||||
| 	/* | ||||
| 	 * Set the shared latch as the local one might have been set. This | ||||
| 	 * shouldn't normally be necessary as code is supposed to check the | ||||
| @@ -262,6 +266,10 @@ SwitchBackToLocalLatch(void) | ||||
| 	Assert(MyProc != NULL && MyLatch == &MyProc->procLatch); | ||||
|  | ||||
| 	MyLatch = &LocalLatchData; | ||||
|  | ||||
| 	if (FeBeWaitSet) | ||||
| 		ModifyWaitEvent(FeBeWaitSet, 1, WL_LATCH_SET, MyLatch); | ||||
|  | ||||
| 	SetLatch(MyLatch); | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -19,6 +19,7 @@ | ||||
|  | ||||
| #include "lib/stringinfo.h" | ||||
| #include "libpq/libpq-be.h" | ||||
| #include "storage/latch.h" | ||||
|  | ||||
|  | ||||
| typedef struct | ||||
| @@ -95,6 +96,8 @@ extern ssize_t secure_raw_write(Port *port, const void *ptr, size_t len); | ||||
|  | ||||
| extern bool ssl_loaded_verify_locations; | ||||
|  | ||||
| WaitEventSet *FeBeWaitSet; | ||||
|  | ||||
| /* GUCs */ | ||||
| extern char *SSLCipherSuites; | ||||
| extern char *SSLECDHCurve; | ||||
|   | ||||
| @@ -530,6 +530,9 @@ | ||||
| /* Define to 1 if you have the syslog interface. */ | ||||
| #undef HAVE_SYSLOG | ||||
|  | ||||
| /* Define to 1 if you have the <sys/epoll.h> header file. */ | ||||
| #undef HAVE_SYS_EPOLL_H | ||||
|  | ||||
| /* Define to 1 if you have the <sys/ioctl.h> header file. */ | ||||
| #undef HAVE_SYS_IOCTL_H | ||||
|  | ||||
|   | ||||
| @@ -68,6 +68,12 @@ | ||||
|  * use of any generic handler. | ||||
|  * | ||||
|  * | ||||
|  * WaitEventSets allow to wait for latches being set and additional events - | ||||
|  * postmaster dying and socket readiness of several sockets currently - at the | ||||
|  * same time.  On many platforms using a long lived event set is more | ||||
|  * efficient than using WaitLatch or WaitLatchOrSocket. | ||||
|  * | ||||
|  * | ||||
|  * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group | ||||
|  * Portions Copyright (c) 1994, Regents of the University of California | ||||
|  * | ||||
| @@ -95,13 +101,27 @@ typedef struct Latch | ||||
| #endif | ||||
| } Latch; | ||||
|  | ||||
| /* Bitmasks for events that may wake-up WaitLatch() clients */ | ||||
| /* | ||||
|  * Bitmasks for events that may wake-up WaitLatch(), WaitLatchOrSocket(), or | ||||
|  * WaitEventSetWait(). | ||||
|  */ | ||||
| #define WL_LATCH_SET		 (1 << 0) | ||||
| #define WL_SOCKET_READABLE	 (1 << 1) | ||||
| #define WL_SOCKET_WRITEABLE  (1 << 2) | ||||
| #define WL_TIMEOUT			 (1 << 3) | ||||
| #define WL_TIMEOUT			 (1 << 3)	/* not for WaitEventSetWait() */ | ||||
| #define WL_POSTMASTER_DEATH  (1 << 4) | ||||
|  | ||||
| typedef struct WaitEvent | ||||
| { | ||||
| 	int			pos;			/* position in the event data structure */ | ||||
| 	uint32		events;			/* triggered events */ | ||||
| 	pgsocket	fd;				/* socket fd associated with event */ | ||||
| 	void	   *user_data;		/* pointer provided in AddWaitEventToSet */ | ||||
| } WaitEvent; | ||||
|  | ||||
| /* forward declaration to avoid exposing latch.c implementation details */ | ||||
| typedef struct WaitEventSet WaitEventSet; | ||||
|  | ||||
| /* | ||||
|  * prototypes for functions in latch.c | ||||
|  */ | ||||
| @@ -110,12 +130,19 @@ extern void InitLatch(volatile Latch *latch); | ||||
| extern void InitSharedLatch(volatile Latch *latch); | ||||
| extern void OwnLatch(volatile Latch *latch); | ||||
| extern void DisownLatch(volatile Latch *latch); | ||||
| extern int	WaitLatch(volatile Latch *latch, int wakeEvents, long timeout); | ||||
| extern int WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, | ||||
| 				  pgsocket sock, long timeout); | ||||
| extern void SetLatch(volatile Latch *latch); | ||||
| extern void ResetLatch(volatile Latch *latch); | ||||
|  | ||||
| extern WaitEventSet *CreateWaitEventSet(MemoryContext context, int nevents); | ||||
| extern void FreeWaitEventSet(WaitEventSet *set); | ||||
| extern int AddWaitEventToSet(WaitEventSet *set, uint32 events, pgsocket fd, | ||||
| 				  Latch *latch, void *user_data); | ||||
| extern void ModifyWaitEvent(WaitEventSet *set, int pos, uint32 events, Latch *latch); | ||||
|  | ||||
| extern int	WaitEventSetWait(WaitEventSet *set, long timeout, WaitEvent *occurred_events, int nevents); | ||||
| extern int	WaitLatch(volatile Latch *latch, int wakeEvents, long timeout); | ||||
| extern int WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, | ||||
| 				  pgsocket sock, long timeout); | ||||
|  | ||||
| /* | ||||
|  * Unix implementation uses SIGUSR1 for inter-process signaling. | ||||
|   | ||||
| @@ -2113,6 +2113,8 @@ WalSnd | ||||
| WalSndCtlData | ||||
| WalSndSendDataCallback | ||||
| WalSndState | ||||
| WaitEvent | ||||
| WaitEventSet | ||||
| WholeRowVarExprState | ||||
| WindowAgg | ||||
| WindowAggState | ||||
|   | ||||
		Reference in New Issue
	
	Block a user