From 1da569ca1f1fd08ae728ccde0952b688feff7d9c Mon Sep 17 00:00:00 2001 From: Thomas Munro Date: Fri, 3 Mar 2023 10:28:47 +1300 Subject: [PATCH] Don't leak descriptors into subprograms. Open long-lived data and WAL file descriptors with O_CLOEXEC. This flag was introduced by SUSv4 (POSIX.1-2008), and by now all of our target Unix systems have it. Our open() implementation for Windows already had that behavior, so provide a dummy O_CLOEXEC flag on that platform. For now, callers of open() and the "thin" wrappers in fd.c that deal in raw descriptors need to pass in O_CLOEXEC explicitly if desired. This commit does that for WAL files, and automatically for everything accessed via VFDs including SMgrRelation and BufFile. (With more discussion we might decide to turn it on automatically for the thin open()-wrappers too to avoid risk of missing places that need it, but these are typically used for short-lived descriptors where we don't expect to fork/exec, and it's remotely possible that extensions could be using these APIs and passing descriptors to subprograms deliberately, so that hasn't been done here.) Do the same for sockets and the postmaster pipe with FD_CLOEXEC. (Later commits might use modern interfaces to remove these extra fcntl() calls and more where possible, but we'll need them as a fallback for a couple of systems, so do it that way in this initial commit.) With this change, subprograms executed for archiving, copying etc will no longer have access to the server's descriptors, other than the ones that we decide to pass down. Reviewed-by: Andres Freund (earlier version) Discussion: https://postgr.es/m/CA%2BhUKGKb6FsAdQWcRL35KJsftv%2B9zXqQbzwkfRf1i0J2e57%2BhQ%40mail.gmail.com --- src/backend/access/transam/xlog.c | 9 ++++++--- src/backend/libpq/pqcomm.c | 7 +++++++ src/backend/storage/file/fd.c | 8 ++++++++ src/backend/utils/init/miscinit.c | 8 ++++++++ src/include/port/win32_port.h | 7 +++++++ 5 files changed, 36 insertions(+), 3 deletions(-) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index f9f0f6db8d1..87af608d155 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -2936,7 +2936,8 @@ XLogFileInitInternal(XLogSegNo logsegno, TimeLineID logtli, * Try to use existent file (checkpoint maker may have created it already) */ *added = false; - fd = BasicOpenFile(path, O_RDWR | PG_BINARY | get_sync_bit(sync_method)); + fd = BasicOpenFile(path, O_RDWR | PG_BINARY | O_CLOEXEC | + get_sync_bit(sync_method)); if (fd < 0) { if (errno != ENOENT) @@ -3097,7 +3098,8 @@ XLogFileInit(XLogSegNo logsegno, TimeLineID logtli) return fd; /* Now open original target segment (might not be file I just made) */ - fd = BasicOpenFile(path, O_RDWR | PG_BINARY | get_sync_bit(sync_method)); + fd = BasicOpenFile(path, O_RDWR | PG_BINARY | O_CLOEXEC | + get_sync_bit(sync_method)); if (fd < 0) ereport(ERROR, (errcode_for_file_access(), @@ -3328,7 +3330,8 @@ XLogFileOpen(XLogSegNo segno, TimeLineID tli) XLogFilePath(path, tli, segno, wal_segment_size); - fd = BasicOpenFile(path, O_RDWR | PG_BINARY | get_sync_bit(sync_method)); + fd = BasicOpenFile(path, O_RDWR | PG_BINARY | O_CLOEXEC | + get_sync_bit(sync_method)); if (fd < 0) ereport(PANIC, (errcode_for_file_access(), diff --git a/src/backend/libpq/pqcomm.c b/src/backend/libpq/pqcomm.c index 864c9debe82..da5bb5fc5d3 100644 --- a/src/backend/libpq/pqcomm.c +++ b/src/backend/libpq/pqcomm.c @@ -200,6 +200,13 @@ pq_init(void) (errmsg("could not set socket to nonblocking mode: %m"))); #endif +#ifndef WIN32 + + /* Don't give the socket to any subprograms we execute. */ + if (fcntl(MyProcPort->sock, F_SETFD, FD_CLOEXEC) < 0) + elog(FATAL, "fcntl(F_SETFD) failed on socket: %m"); +#endif + FeBeWaitSet = CreateWaitEventSet(TopMemoryContext, FeBeWaitSetNEvents); socket_pos = AddWaitEventToSet(FeBeWaitSet, WL_SOCKET_WRITEABLE, MyProcPort->sock, NULL, NULL); diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c index ea690f05c69..9fd8444ed4d 100644 --- a/src/backend/storage/file/fd.c +++ b/src/backend/storage/file/fd.c @@ -1515,6 +1515,14 @@ PathNameOpenFilePerm(const char *fileName, int fileFlags, mode_t fileMode) /* Close excess kernel FDs. */ ReleaseLruFiles(); + /* + * Descriptors managed by VFDs are implicitly marked O_CLOEXEC. The + * client shouldn't be expected to know which kernel descriptors are + * currently open, so it wouldn't make sense for them to be inherited by + * executed subprograms. + */ + fileFlags |= O_CLOEXEC; + vfdP->fd = BasicOpenFilePerm(fileName, fileFlags, fileMode); if (vfdP->fd < 0) diff --git a/src/backend/utils/init/miscinit.c b/src/backend/utils/init/miscinit.c index 59532bbd80d..7eb7fe87f68 100644 --- a/src/backend/utils/init/miscinit.c +++ b/src/backend/utils/init/miscinit.c @@ -163,6 +163,14 @@ InitPostmasterChild(void) /* Request a signal if the postmaster dies, if possible. */ PostmasterDeathSignalInit(); + + /* Don't give the pipe to subprograms that we execute. */ +#ifndef WIN32 + if (fcntl(postmaster_alive_fds[POSTMASTER_FD_WATCH], F_SETFD, FD_CLOEXEC) < 0) + ereport(FATAL, + (errcode_for_socket_access(), + errmsg_internal("could not set postmaster death monitoring pipe to FD_CLOEXEC mode: %m"))); +#endif } /* diff --git a/src/include/port/win32_port.h b/src/include/port/win32_port.h index 94881957995..5116c2fc06c 100644 --- a/src/include/port/win32_port.h +++ b/src/include/port/win32_port.h @@ -353,6 +353,13 @@ extern int _pglstat64(const char *name, struct stat *buf); */ #define O_DSYNC 0x0080 +/* + * Our open() replacement does not create inheritable handles, so it is safe to + * ignore O_CLOEXEC. (If we were using Windows' own open(), it might be + * necessary to convert this to _O_NOINHERIT.) + */ +#define O_CLOEXEC 0 + /* * Supplement to . *