From 56b39cce778f93cd95a01df0da083e937424662d Mon Sep 17 00:00:00 2001
From: Thomas Munro <tmunro@postgresql.org>
Date: Fri, 10 Dec 2021 16:13:14 +1300
Subject: [PATCH] Check for STATUS_DELETE_PENDING on Windows.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

1.  Update our open() wrapper to check for NT's STATUS_DELETE_PENDING
and translate it to Unix-like errors.  This is done with
RtlGetLastNtStatus(), which is dynamically loaded from ntdll.  A new
file win32ntdll.c centralizes lookup of NT functions, in case we decide
to add more in the future.

2.  Remove non-working code that was trying to do something similar for
stat(), and just reuse the open() wrapper code.  As a side effect,
stat() also gains resilience against "sharing violation" errors.

3.  Since stat() is used very early in process startup, remove the
requirement that the Win32 signal event has been created before
pgwin32_open_handle() is reached.  Instead, teach pg_usleep() to fall
back to a non-interruptible sleep if reached before the signal event is
available.

This could be back-patched, but for now it's in master only.  The
problem has apparently been with us for a long time and generated only a
few complaints.  Proposed patches trigger it more often, which led to
this investigation and fix.

Reviewed-by: Andres Freund <andres@anarazel.de>
Reviewed-by: Alexander Lakhin <exclusion@gmail.com>
Reviewed-by: Juan José Santamaría Flecha <juanjo.santamaria@gmail.com>
Discussion: https://postgr.es/m/CA%2BhUKGJz_pZTF9mckn6XgSv69%2BjGwdgLkxZ6b3NWGLBCVjqUZA%40mail.gmail.com
(cherry picked from commit e2f0f8ed251d02c1eda79e1ca3cb3db2681e7a86)

Author: Thomas Munro <tmunro@postgresql.org>
Author: Alexandra Wang <alexandra.wang.oss@gmail.com>
---
 configure                       |   6 ++
 configure.in                    |   1 +
 src/backend/port/win32/signal.c |  12 +++-
 src/include/port.h              |   1 +
 src/include/port/win32ntdll.h   |  27 +++++++++
 src/port/open.c                 | 104 +++++++++++++++++---------------
 src/port/win32ntdll.c           |  69 +++++++++++++++++++++
 src/port/win32stat.c            |  78 ++----------------------
 src/tools/msvc/Mkvcbuild.pm     |   3 +-
 9 files changed, 179 insertions(+), 122 deletions(-)
 create mode 100644 src/include/port/win32ntdll.h
 create mode 100644 src/port/win32ntdll.c

diff --git a/configure b/configure
index 2f10787c7dd..41b1db2f880 100755
--- a/configure
+++ b/configure
@@ -16638,6 +16638,12 @@ esac
  ;;
 esac
 
+  case " $LIBOBJS " in
+  *" win32ntdll.$ac_objext "* ) ;;
+  *) LIBOBJS="$LIBOBJS win32ntdll.$ac_objext"
+ ;;
+esac
+
   case " $LIBOBJS " in
   *" win32security.$ac_objext "* ) ;;
   *) LIBOBJS="$LIBOBJS win32security.$ac_objext"
diff --git a/configure.in b/configure.in
index ce6a1f3e41a..a44ac9b0852 100644
--- a/configure.in
+++ b/configure.in
@@ -1879,6 +1879,7 @@ if test "$PORTNAME" = "win32"; then
   AC_LIBOBJ(system)
   AC_LIBOBJ(win32env)
   AC_LIBOBJ(win32error)
+  AC_LIBOBJ(win32ntdll)
   AC_LIBOBJ(win32security)
   AC_LIBOBJ(win32setlocale)
   AC_LIBOBJ(win32stat)
diff --git a/src/backend/port/win32/signal.c b/src/backend/port/win32/signal.c
index 3218b38240c..d99d8a36d19 100644
--- a/src/backend/port/win32/signal.c
+++ b/src/backend/port/win32/signal.c
@@ -52,7 +52,17 @@ static BOOL WINAPI pg_console_handler(DWORD dwCtrlType);
 void
 pg_usleep(long microsec)
 {
-	Assert(pgwin32_signal_event != NULL);
+	if (unlikely(pgwin32_signal_event == NULL))
+	{
+		/*
+		 * If we're reached by pgwin32_open_handle() early in startup before
+		 * the signal event is set up, just fall back to a regular
+		 * non-interruptible sleep.
+		 */
+		SleepEx((microsec < 500 ? 1 : (microsec + 500) / 1000), FALSE);
+		return;
+	}
+
 	if (WaitForSingleObject(pgwin32_signal_event,
 							(microsec < 500 ? 1 : (microsec + 500) / 1000))
 		== WAIT_OBJECT_0)
diff --git a/src/include/port.h b/src/include/port.h
index 70b193657e4..8a5a395fcb2 100644
--- a/src/include/port.h
+++ b/src/include/port.h
@@ -280,6 +280,7 @@ extern bool rmtree(const char *path, bool rmtopdir);
  * passing of other special options.
  */
 #define		O_DIRECT	0x80000000
+extern HANDLE pgwin32_open_handle(const char *, int, bool);
 extern int	pgwin32_open(const char *, int,...);
 extern FILE *pgwin32_fopen(const char *, const char *);
 #define		open(a,b,c) pgwin32_open(a,b,c)
diff --git a/src/include/port/win32ntdll.h b/src/include/port/win32ntdll.h
new file mode 100644
index 00000000000..4d8808b3aaf
--- /dev/null
+++ b/src/include/port/win32ntdll.h
@@ -0,0 +1,27 @@
+/*-------------------------------------------------------------------------
+ *
+ * win32ntdll.h
+ *	  Dynamically loaded Windows NT functions.
+ *
+ * Portions Copyright (c) 2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/port/win32ntdll.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/*
+ * Because this includes NT headers that normally conflict with Win32 headers,
+ * any translation unit that includes it should #define UMDF_USING_NTSTATUS
+ * before including <windows.h>.
+ */
+
+#include <ntstatus.h>
+#include <winternl.h>
+
+typedef NTSTATUS (__stdcall *RtlGetLastNtStatus_t) (void);
+
+extern RtlGetLastNtStatus_t pg_RtlGetLastNtStatus;
+
+extern int	initialize_ntdll(void);
diff --git a/src/port/open.c b/src/port/open.c
index c8521314f1c..3d74b49a324 100644
--- a/src/port/open.c
+++ b/src/port/open.c
@@ -13,17 +13,20 @@
 
 #ifdef WIN32
 
+#define UMDF_USING_NTSTATUS
+
 #ifndef FRONTEND
 #include "postgres.h"
 #else
 #include "postgres_fe.h"
 #endif
 
+#include "port/win32ntdll.h"
+
 #include <fcntl.h>
 #include <assert.h>
 #include <sys/stat.h>
 
-
 static int
 openFlagsToCreateFileFlags(int openFlags)
 {
@@ -56,38 +59,25 @@ openFlagsToCreateFileFlags(int openFlags)
 }
 
 /*
- *	 - file attribute setting, based on fileMode?
+ * Internal function used by pgwin32_open() and _pgstat64().  When
+ * backup_semantics is true, directories may be opened (for limited uses).  On
+ * failure, INVALID_HANDLE_VALUE is returned and errno is set.
  */
-int
-pgwin32_open(const char *fileName, int fileFlags,...)
+HANDLE
+pgwin32_open_handle(const char *fileName, int fileFlags, bool backup_semantics)
 {
-	int			fd;
-	HANDLE		h = INVALID_HANDLE_VALUE;
+	HANDLE		h;
 	SECURITY_ATTRIBUTES sa;
 	int			loops = 0;
 
+	if (initialize_ntdll() < 0)
+		return INVALID_HANDLE_VALUE;
+
 	/* Check that we can handle the request */
 	assert((fileFlags & ((O_RDONLY | O_WRONLY | O_RDWR) | O_APPEND |
 						 (O_RANDOM | O_SEQUENTIAL | O_TEMPORARY) |
 						 _O_SHORT_LIVED | O_DSYNC | O_DIRECT |
 						 (O_CREAT | O_TRUNC | O_EXCL) | (O_TEXT | O_BINARY))) == fileFlags);
-#ifndef FRONTEND
-	Assert(pgwin32_signal_event != NULL);	/* small chance of pg_usleep() */
-#endif
-
-#ifdef FRONTEND
-
-	/*
-	 * Since PostgreSQL 12, those concurrent-safe versions of open() and
-	 * fopen() can be used by frontends, having as side-effect to switch the
-	 * file-translation mode from O_TEXT to O_BINARY if none is specified.
-	 * Caller may want to enforce the binary or text mode, but if nothing is
-	 * defined make sure that the default mode maps with what versions older
-	 * than 12 have been doing.
-	 */
-	if ((fileFlags & O_BINARY) == 0)
-		fileFlags |= O_TEXT;
-#endif
 
 	sa.nLength = sizeof(sa);
 	sa.bInheritHandle = TRUE;
@@ -102,6 +92,7 @@ pgwin32_open(const char *fileName, int fileFlags,...)
 						   &sa,
 						   openFlagsToCreateFileFlags(fileFlags),
 						   FILE_ATTRIBUTE_NORMAL |
+						   (backup_semantics ? FILE_FLAG_BACKUP_SEMANTICS : 0) |
 						   ((fileFlags & O_RANDOM) ? FILE_FLAG_RANDOM_ACCESS : 0) |
 						   ((fileFlags & O_SEQUENTIAL) ? FILE_FLAG_SEQUENTIAL_SCAN : 0) |
 						   ((fileFlags & _O_SHORT_LIVED) ? FILE_ATTRIBUTE_TEMPORARY : 0) |
@@ -140,38 +131,55 @@ pgwin32_open(const char *fileName, int fileFlags,...)
 		/*
 		 * ERROR_ACCESS_DENIED is returned if the file is deleted but not yet
 		 * gone (Windows NT status code is STATUS_DELETE_PENDING).  In that
-		 * case we want to wait a bit and try again, giving up after 1 second
-		 * (since this condition should never persist very long).  However,
-		 * there are other commonly-hit cases that return ERROR_ACCESS_DENIED,
-		 * so care is needed.  In particular that happens if we try to open a
-		 * directory, or of course if there's an actual file-permissions
-		 * problem.  To distinguish these cases, try a stat().  In the
-		 * delete-pending case, it will either also get STATUS_DELETE_PENDING,
-		 * or it will see the file as gone and fail with ENOENT.  In other
-		 * cases it will usually succeed.  The only somewhat-likely case where
-		 * this coding will uselessly wait is if there's a permissions problem
-		 * with a containing directory, which we hope will never happen in any
-		 * performance-critical code paths.
+		 * case, we'd better ask for the NT status too so we can translate it
+		 * to a more Unix-like error.  We hope that nothing clobbers the NT
+		 * status in between the internal NtCreateFile() call and CreateFile()
+		 * returning.
+		 *
+		 * If there's no O_CREAT flag, then we'll pretend the file is
+		 * invisible.  With O_CREAT, we have no choice but to report that
+		 * there's a file in the way (which wouldn't happen on Unix).
 		 */
-		if (err == ERROR_ACCESS_DENIED)
+		if (err == ERROR_ACCESS_DENIED &&
+			pg_RtlGetLastNtStatus() == STATUS_DELETE_PENDING)
 		{
-			if (loops < 10)
-			{
-				struct microsoft_native_stat st;
-
-				if (microsoft_native_stat(fileName, &st) != 0)
-				{
-					pg_usleep(100000);
-					loops++;
-					continue;
-				}
-			}
+			if (fileFlags & O_CREAT)
+				err = ERROR_FILE_EXISTS;
+			else
+				err = ERROR_FILE_NOT_FOUND;
 		}
 
 		_dosmaperr(err);
-		return -1;
+		return INVALID_HANDLE_VALUE;
 	}
 
+	return h;
+}
+
+int
+pgwin32_open(const char *fileName, int fileFlags,...)
+{
+	HANDLE		h;
+	int			fd;
+
+	h = pgwin32_open_handle(fileName, fileFlags, false);
+	if (h == INVALID_HANDLE_VALUE)
+		return -1;
+
+#ifdef FRONTEND
+
+	/*
+	 * Since PostgreSQL 12, those concurrent-safe versions of open() and
+	 * fopen() can be used by frontends, having as side-effect to switch the
+	 * file-translation mode from O_TEXT to O_BINARY if none is specified.
+	 * Caller may want to enforce the binary or text mode, but if nothing is
+	 * defined make sure that the default mode maps with what versions older
+	 * than 12 have been doing.
+	 */
+	if ((fileFlags & O_BINARY) == 0)
+		fileFlags |= O_TEXT;
+#endif
+
 	/* _open_osfhandle will, on error, set errno accordingly */
 	if ((fd = _open_osfhandle((intptr_t) h, fileFlags & O_APPEND)) < 0)
 		CloseHandle(h);			/* will not affect errno */
diff --git a/src/port/win32ntdll.c b/src/port/win32ntdll.c
new file mode 100644
index 00000000000..aa3d37c50e6
--- /dev/null
+++ b/src/port/win32ntdll.c
@@ -0,0 +1,69 @@
+/*-------------------------------------------------------------------------
+ *
+ * win32ntdll.c
+ *	  Dynamically loaded Windows NT functions.
+ *
+ * Portions Copyright (c) 2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/port/win32ntdll.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#define UMDF_USING_NTSTATUS
+
+#include "c.h"
+
+#include "port/win32ntdll.h"
+
+RtlGetLastNtStatus_t pg_RtlGetLastNtStatus;
+
+typedef struct NtDllRoutine
+{
+	const char *name;
+	pg_funcptr_t *address;
+} NtDllRoutine;
+
+static const NtDllRoutine routines[] = {
+	{"RtlGetLastNtStatus", (pg_funcptr_t *) &pg_RtlGetLastNtStatus}
+};
+
+static bool initialized;
+
+int
+initialize_ntdll(void)
+{
+	HMODULE		module;
+
+	if (initialized)
+		return 0;
+
+	if (!(module = LoadLibraryEx("ntdll.dll", NULL, 0)))
+	{
+		_dosmaperr(GetLastError());
+		return -1;
+	}
+
+	for (int i = 0; i < lengthof(routines); ++i)
+	{
+		pg_funcptr_t address;
+
+		address = (pg_funcptr_t) GetProcAddress(module, routines[i].name);
+		if (!address)
+		{
+			_dosmaperr(GetLastError());
+			FreeLibrary(module);
+
+			return -1;
+		}
+
+		*(pg_funcptr_t *) routines[i].address = address;
+	}
+
+	initialized = true;
+
+	return 0;
+}
diff --git a/src/port/win32stat.c b/src/port/win32stat.c
index e09589c165b..6f7cd09087d 100644
--- a/src/port/win32stat.c
+++ b/src/port/win32stat.c
@@ -115,84 +115,18 @@ int
 _pgstat64(const char *name, struct stat *buf)
 {
 	/*
-	 * We must use a handle so lstat() returns the information of the target
-	 * file.  To have a reliable test for ERROR_DELETE_PENDING, this uses a
-	 * method similar to open() with a loop using stat() and some waits when
-	 * facing ERROR_ACCESS_DENIED.
+	 * Our open wrapper will report STATUS_DELETE_PENDING as ENOENT.  We
+	 * request FILE_FLAG_BACKUP_SEMANTICS so that we can open directories too,
+	 * for limited purposes.  We use the private handle-based version, so we
+	 * don't risk running out of fds.
 	 */
-	SECURITY_ATTRIBUTES sa;
 	HANDLE		hFile;
 	int			ret;
-	int			loops = 0;
 
-	if (name == NULL || buf == NULL)
-	{
-		errno = EINVAL;
+	hFile = pgwin32_open_handle(name, O_RDONLY, true);
+	if (hFile == INVALID_HANDLE_VALUE)
 		return -1;
-	}
-	/* fast not-exists check */
-	if (GetFileAttributes(name) == INVALID_FILE_ATTRIBUTES)
-	{
-		DWORD		err = GetLastError();
 
-		if (err != ERROR_ACCESS_DENIED)
-		{
-			_dosmaperr(err);
-			return -1;
-		}
-	}
-
-	/* get a file handle as lightweight as we can */
-	sa.nLength = sizeof(SECURITY_ATTRIBUTES);
-	sa.bInheritHandle = TRUE;
-	sa.lpSecurityDescriptor = NULL;
-	while ((hFile = CreateFile(name,
-							   GENERIC_READ,
-							   (FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE),
-							   &sa,
-							   OPEN_EXISTING,
-							   (FILE_FLAG_NO_BUFFERING | FILE_FLAG_BACKUP_SEMANTICS |
-								FILE_FLAG_OVERLAPPED),
-							   NULL)) == INVALID_HANDLE_VALUE)
-	{
-		DWORD		err = GetLastError();
-
-		/*
-		 * ERROR_ACCESS_DENIED is returned if the file is deleted but not yet
-		 * gone (Windows NT status code is STATUS_DELETE_PENDING).  In that
-		 * case we want to wait a bit and try again, giving up after 1 second
-		 * (since this condition should never persist very long).  However,
-		 * there are other commonly-hit cases that return ERROR_ACCESS_DENIED,
-		 * so care is needed.  In particular that happens if we try to open a
-		 * directory, or of course if there's an actual file-permissions
-		 * problem.  To distinguish these cases, try a stat().  In the
-		 * delete-pending case, it will either also get STATUS_DELETE_PENDING,
-		 * or it will see the file as gone and fail with ENOENT.  In other
-		 * cases it will usually succeed.  The only somewhat-likely case where
-		 * this coding will uselessly wait is if there's a permissions problem
-		 * with a containing directory, which we hope will never happen in any
-		 * performance-critical code paths.
-		 */
-		if (err == ERROR_ACCESS_DENIED)
-		{
-			if (loops < 10)
-			{
-				struct microsoft_native_stat st;
-
-				if (microsoft_native_stat(name, &st) != 0)
-				{
-					pg_usleep(100000);
-					loops++;
-					continue;
-				}
-			}
-		}
-
-		_dosmaperr(err);
-		return -1;
-	}
-
-	/* At last we can invoke fileinfo_to_stat */
 	ret = fileinfo_to_stat(hFile, buf);
 
 	CloseHandle(hFile);
diff --git a/src/tools/msvc/Mkvcbuild.pm b/src/tools/msvc/Mkvcbuild.pm
index 45bec3ba000..e89fcffd546 100644
--- a/src/tools/msvc/Mkvcbuild.pm
+++ b/src/tools/msvc/Mkvcbuild.pm
@@ -103,7 +103,8 @@ sub mkvcbuild
 	  pg_strong_random.c pgcheckdir.c pgmkdirp.c pgsleep.c pgstrcasecmp.c
 	  pqsignal.c mkdtemp.c qsort.c qsort_arg.c quotes.c setenv.c system.c
 	  sprompt.c strerror.c tar.c thread.c
-	  win32env.c win32error.c win32security.c win32setlocale.c win32stat.c);
+	  win32env.c win32error.c win32ntdll.c
+	  win32security.c win32setlocale.c win32stat.c);
 
 	push(@pgportfiles, 'strtof.c') if ($vsVersion < '14.00');