diff --git a/configure b/configure
index 8176e99756f..6d34243dca2 100755
--- a/configure
+++ b/configure
@@ -15409,7 +15409,7 @@ fi
LIBS_including_readline="$LIBS"
LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'`
-for ac_func in backtrace_symbols clock_gettime copyfile fdatasync getifaddrs getpeerucred getrlimit kqueue mbstowcs_l memset_s poll posix_fallocate ppoll pstat pthread_is_threaded_np readlink readv setproctitle setproctitle_fast setsid shm_open strchrnul strsignal symlink sync_file_range uselocale wcstombs_l writev
+for ac_func in backtrace_symbols clock_gettime copyfile fdatasync getifaddrs getpeerucred getrlimit kqueue mbstowcs_l memset_s poll posix_fallocate ppoll pstat pthread_is_threaded_np readlink readv setproctitle setproctitle_fast setsid shm_open strchrnul strsignal symlink syncfs sync_file_range uselocale wcstombs_l writev
do :
as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var"
diff --git a/configure.ac b/configure.ac
index 54efbb22a31..e54e2fb632c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1701,6 +1701,7 @@ AC_CHECK_FUNCS(m4_normalize([
strchrnul
strsignal
symlink
+ syncfs
sync_file_range
uselocale
wcstombs_l
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 863ac31c6b4..ee4925d6d92 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -9721,6 +9721,41 @@ dynamic_library_path = 'C:\tools\postgresql;H:\my_project\lib;$libdir'
+
+ recovery_init_sync_method (enum)
+
+ recovery_init_sync_method configuration parameter
+
+
+
+
+ When set to fsync, which is the default,
+ PostgreSQL will recursively open and
+ synchronize all files in the data directory before crash recovery
+ begins. The search for files will follow symbolic links for the WAL
+ directory and each configured tablespace (but not any other symbolic
+ links). This is intended to make sure that all WAL and data files are
+ durably stored on disk before replaying changes. This applies whenever
+ starting a database cluster that did not shut down cleanly, including
+ copies created with pg_basebackup.
+
+
+ On Linux, syncfs may be used instead, to ask the
+ operating system to synchronize the whole file systems that contain the
+ data directory, the WAL files and each tablespace (but not any other
+ file systems that may be reachable through symbolic links). This may
+ be a lot faster than the fsync setting, because it
+ doesn't need to open each file one by one. On the other hand, it may
+ be slower if a file system is shared by other applications that
+ modify a lot of files, since those files will also be written to disk.
+ Furthermore, on versions of Linux before 5.8, I/O errors encountered
+ while writing data to disk may not be reported to
+ PostgreSQL, and relevant error messages may
+ appear only in kernel logs.
+
+
+
+
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index 110ba31517a..28933f8bbe1 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -72,9 +72,11 @@
#include "postgres.h"
+#include
#include
#include
#include
+#include
#ifndef WIN32
#include
#endif
@@ -158,6 +160,9 @@ int max_safe_fds = FD_MINFREE; /* default if not changed */
/* Whether it is safe to continue running after fsync() fails. */
bool data_sync_retry = false;
+/* How SyncDataDirectory() should do its job. */
+int recovery_init_sync_method = RECOVERY_INIT_SYNC_METHOD_FSYNC;
+
/* Debugging.... */
#ifdef FDDEBUG
@@ -3265,9 +3270,31 @@ looks_like_temp_rel_name(const char *name)
return true;
}
+#ifdef HAVE_SYNCFS
+static void
+do_syncfs(const char *path)
+{
+ int fd;
+
+ fd = OpenTransientFile(path, O_RDONLY);
+ if (fd < 0)
+ {
+ ereport(LOG,
+ (errcode_for_file_access(),
+ errmsg("could not open %s: %m", path)));
+ return;
+ }
+ if (syncfs(fd) < 0)
+ ereport(LOG,
+ (errcode_for_file_access(),
+ errmsg("could not sync filesystem for \"%s\": %m", path)));
+ CloseTransientFile(fd);
+}
+#endif
/*
- * Issue fsync recursively on PGDATA and all its contents.
+ * Issue fsync recursively on PGDATA and all its contents, or issue syncfs for
+ * all potential filesystem, depending on recovery_init_sync_method setting.
*
* We fsync regular files and directories wherever they are, but we
* follow symlinks only for pg_wal and immediately under pg_tblspc.
@@ -3319,6 +3346,42 @@ SyncDataDirectory(void)
xlog_is_symlink = true;
#endif
+#ifdef HAVE_SYNCFS
+ if (recovery_init_sync_method == RECOVERY_INIT_SYNC_METHOD_SYNCFS)
+ {
+ DIR *dir;
+ struct dirent *de;
+
+ /*
+ * On Linux, we don't have to open every single file one by one. We
+ * can use syncfs() to sync whole filesystems. We only expect
+ * filesystem boundaries to exist where we tolerate symlinks, namely
+ * pg_wal and the tablespaces, so we call syncfs() for each of those
+ * directories.
+ */
+
+ /* Sync the top level pgdata directory. */
+ do_syncfs(".");
+ /* If any tablespaces are configured, sync each of those. */
+ dir = AllocateDir("pg_tblspc");
+ while ((de = ReadDirExtended(dir, "pg_tblspc", LOG)))
+ {
+ char path[MAXPGPATH];
+
+ if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
+ continue;
+
+ snprintf(path, MAXPGPATH, "pg_tblspc/%s", de->d_name);
+ do_syncfs(path);
+ }
+ FreeDir(dir);
+ /* If pg_wal is a symlink, process that too. */
+ if (xlog_is_symlink)
+ do_syncfs("pg_wal");
+ return;
+ }
+#endif /* !HAVE_SYNCFS */
+
/*
* If possible, hint to the kernel that we're soon going to fsync the data
* directory and its contents. Errors in this step are even less
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index f720b093fe3..2964efda967 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -488,6 +488,14 @@ const struct config_enum_entry ssl_protocol_versions_info[] = {
StaticAssertDecl(lengthof(ssl_protocol_versions_info) == (PG_TLS1_3_VERSION + 2),
"array length mismatch");
+static struct config_enum_entry recovery_init_sync_method_options[] = {
+ {"fsync", RECOVERY_INIT_SYNC_METHOD_FSYNC, false},
+#ifdef HAVE_SYNCFS
+ {"syncfs", RECOVERY_INIT_SYNC_METHOD_SYNCFS, false},
+#endif
+ {NULL, 0, false}
+};
+
static struct config_enum_entry shared_memory_options[] = {
#ifndef WIN32
{"sysv", SHMEM_TYPE_SYSV, false},
@@ -4871,6 +4879,15 @@ static struct config_enum ConfigureNamesEnum[] =
NULL, NULL, NULL
},
+ {
+ {"recovery_init_sync_method", PGC_POSTMASTER, ERROR_HANDLING_OPTIONS,
+ gettext_noop("Sets the method for synchronizing the data directory before crash recovery."),
+ },
+ &recovery_init_sync_method,
+ RECOVERY_INIT_SYNC_METHOD_FSYNC, recovery_init_sync_method_options,
+ NULL, NULL, NULL
+ },
+
/* End-of-list marker */
{
{NULL, 0, 0, NULL, NULL}, NULL, 0, NULL, NULL, NULL, NULL
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index b0b49b38233..86425965d0d 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -761,6 +761,7 @@
#restart_after_crash = on # reinitialize after backend crash?
#remove_temp_files_after_crash = on # remove temporary files after
# backend crash?
+#recovery_init_sync_method = fsync # fsync, syncfs (Linux 5.8+)
#data_sync_retry = off # retry or panic on failure to fsync
# data?
# (change requires restart)
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index 0a6422da4f3..d873c177cb7 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -590,6 +590,9 @@
/* Define to 1 if you have the `symlink' function. */
#undef HAVE_SYMLINK
+/* Define to 1 if you have the `syncfs' function. */
+#undef HAVE_SYNCFS
+
/* Define to 1 if you have the `sync_file_range' function. */
#undef HAVE_SYNC_FILE_RANGE
diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h
index 30bf7d2193f..328473bdc94 100644
--- a/src/include/storage/fd.h
+++ b/src/include/storage/fd.h
@@ -45,6 +45,11 @@
#include
+typedef enum RecoveryInitSyncMethod {
+ RECOVERY_INIT_SYNC_METHOD_FSYNC,
+ RECOVERY_INIT_SYNC_METHOD_SYNCFS
+} RecoveryInitSyncMethod;
+
struct iovec; /* avoid including port/pg_iovec.h here */
typedef int File;
@@ -53,6 +58,7 @@ typedef int File;
/* GUC parameter */
extern PGDLLIMPORT int max_files_per_process;
extern PGDLLIMPORT bool data_sync_retry;
+extern int recovery_init_sync_method;
/*
* This is private to fd.c, but exported for save/restore_backend_variables()
diff --git a/src/tools/msvc/Solution.pm b/src/tools/msvc/Solution.pm
index 14605371bb1..ea8ed4be309 100644
--- a/src/tools/msvc/Solution.pm
+++ b/src/tools/msvc/Solution.pm
@@ -388,6 +388,7 @@ sub GenerateFiles
HAVE_STRUCT_TM_TM_ZONE => undef,
HAVE_SYNC_FILE_RANGE => undef,
HAVE_SYMLINK => 1,
+ HAVE_SYNCFS => undef,
HAVE_SYSLOG => undef,
HAVE_SYS_EPOLL_H => undef,
HAVE_SYS_EVENT_H => undef,