1
0
mirror of https://github.com/postgres/postgres.git synced 2025-12-21 05:21:08 +03:00

initdb: Add --no-sync-data-files.

This new option instructs initdb to skip synchronizing any files
in database directories, the database directories themselves, and
the tablespace directories, i.e., everything in the base/
subdirectory and any other tablespace directories.  Other files,
such as those in pg_wal/ and pg_xact/, will still be synchronized
unless --no-sync is also specified.  --no-sync-data-files is
primarily intended for internal use by tools that separately ensure
the skipped files are synchronized to disk.  A follow-up commit
will use this to help optimize pg_upgrade's file transfer step.

The --sync-method=fsync implementation of this option makes use of
a new exclude_dir parameter for walkdir().  When not NULL,
exclude_dir specifies a directory to skip processing.  The
--sync-method=syncfs implementation of this option just skips
synchronizing the non-default tablespace directories.  This means
that initdb will still synchronize some or all of the database
files, but there's not much we can do about that.

Discussion: https://postgr.es/m/Zyvop-LxLXBLrZil%40nathan
This commit is contained in:
Nathan Bossart
2025-03-25 16:02:35 -05:00
parent 650ab8aaf1
commit cf131fa942
9 changed files with 99 additions and 40 deletions

View File

@@ -50,7 +50,8 @@ static int pre_sync_fname(const char *fname, bool isdir);
#endif
static void walkdir(const char *path,
int (*action) (const char *fname, bool isdir),
bool process_symlinks);
bool process_symlinks,
const char *exclude_dir);
#ifdef HAVE_SYNCFS
@@ -93,11 +94,15 @@ do_syncfs(const char *path)
* syncing, and might not have privileges to write at all.
*
* serverVersion indicates the version of the server to be sync'd.
*
* If sync_data_files is false, this function skips syncing "base/" and any
* other tablespace directories.
*/
void
sync_pgdata(const char *pg_data,
int serverVersion,
DataDirSyncMethod sync_method)
DataDirSyncMethod sync_method,
bool sync_data_files)
{
bool xlog_is_symlink;
char pg_wal[MAXPGPATH];
@@ -147,30 +152,33 @@ sync_pgdata(const char *pg_data,
do_syncfs(pg_data);
/* If any tablespaces are configured, sync each of those. */
dir = opendir(pg_tblspc);
if (dir == NULL)
pg_log_error("could not open directory \"%s\": %m",
pg_tblspc);
else
if (sync_data_files)
{
while (errno = 0, (de = readdir(dir)) != NULL)
{
char subpath[MAXPGPATH * 2];
if (strcmp(de->d_name, ".") == 0 ||
strcmp(de->d_name, "..") == 0)
continue;
snprintf(subpath, sizeof(subpath), "%s/%s",
pg_tblspc, de->d_name);
do_syncfs(subpath);
}
if (errno)
pg_log_error("could not read directory \"%s\": %m",
dir = opendir(pg_tblspc);
if (dir == NULL)
pg_log_error("could not open directory \"%s\": %m",
pg_tblspc);
else
{
while (errno = 0, (de = readdir(dir)) != NULL)
{
char subpath[MAXPGPATH * 2];
(void) closedir(dir);
if (strcmp(de->d_name, ".") == 0 ||
strcmp(de->d_name, "..") == 0)
continue;
snprintf(subpath, sizeof(subpath), "%s/%s",
pg_tblspc, de->d_name);
do_syncfs(subpath);
}
if (errno)
pg_log_error("could not read directory \"%s\": %m",
pg_tblspc);
(void) closedir(dir);
}
}
/* If pg_wal is a symlink, process that too. */
@@ -182,15 +190,21 @@ sync_pgdata(const char *pg_data,
case DATA_DIR_SYNC_METHOD_FSYNC:
{
char *exclude_dir = NULL;
if (!sync_data_files)
exclude_dir = psprintf("%s/base", pg_data);
/*
* If possible, hint to the kernel that we're soon going to
* fsync the data directory and its contents.
*/
#ifdef PG_FLUSH_DATA_WORKS
walkdir(pg_data, pre_sync_fname, false);
walkdir(pg_data, pre_sync_fname, false, exclude_dir);
if (xlog_is_symlink)
walkdir(pg_wal, pre_sync_fname, false);
walkdir(pg_tblspc, pre_sync_fname, true);
walkdir(pg_wal, pre_sync_fname, false, NULL);
if (sync_data_files)
walkdir(pg_tblspc, pre_sync_fname, true, NULL);
#endif
/*
@@ -203,10 +217,14 @@ sync_pgdata(const char *pg_data,
* get fsync'd twice. That's not an expected case so we don't
* worry about optimizing it.
*/
walkdir(pg_data, fsync_fname, false);
walkdir(pg_data, fsync_fname, false, exclude_dir);
if (xlog_is_symlink)
walkdir(pg_wal, fsync_fname, false);
walkdir(pg_tblspc, fsync_fname, true);
walkdir(pg_wal, fsync_fname, false, NULL);
if (sync_data_files)
walkdir(pg_tblspc, fsync_fname, true, NULL);
if (exclude_dir)
pfree(exclude_dir);
}
break;
}
@@ -245,10 +263,10 @@ sync_dir_recurse(const char *dir, DataDirSyncMethod sync_method)
* fsync the data directory and its contents.
*/
#ifdef PG_FLUSH_DATA_WORKS
walkdir(dir, pre_sync_fname, false);
walkdir(dir, pre_sync_fname, false, NULL);
#endif
walkdir(dir, fsync_fname, false);
walkdir(dir, fsync_fname, false, NULL);
}
break;
}
@@ -264,6 +282,9 @@ sync_dir_recurse(const char *dir, DataDirSyncMethod sync_method)
* ignored in subdirectories, ie we intentionally don't pass down the
* process_symlinks flag to recursive calls.
*
* If exclude_dir is not NULL, it specifies a directory path to skip
* processing.
*
* Errors are reported but not considered fatal.
*
* See also walkdir in fd.c, which is a backend version of this logic.
@@ -271,11 +292,15 @@ sync_dir_recurse(const char *dir, DataDirSyncMethod sync_method)
static void
walkdir(const char *path,
int (*action) (const char *fname, bool isdir),
bool process_symlinks)
bool process_symlinks,
const char *exclude_dir)
{
DIR *dir;
struct dirent *de;
if (exclude_dir && strcmp(exclude_dir, path) == 0)
return;
dir = opendir(path);
if (dir == NULL)
{
@@ -299,7 +324,7 @@ walkdir(const char *path,
(*action) (subpath, false);
break;
case PGFILETYPE_DIR:
walkdir(subpath, action, false);
walkdir(subpath, action, false, exclude_dir);
break;
default: