diff --git a/doc/src/sgml/ref/pg_combinebackup.sgml b/doc/src/sgml/ref/pg_combinebackup.sgml index 6f90dba281f..658e9a759c8 100644 --- a/doc/src/sgml/ref/pg_combinebackup.sgml +++ b/doc/src/sgml/ref/pg_combinebackup.sgml @@ -185,6 +185,51 @@ PostgreSQL documentation + + + + + Use efficient file cloning (also known as reflinks on + some systems) instead of copying files to the new data directory, + which can result in near-instantaneous copying of the data files. + + + + If a backup manifest is not available or does not contain checksum of + the right type, file cloning will be used to copy the file, but the + file will be also read block-by-block for the checksum calculation. + + + + File cloning is only supported on some operating systems and file + systems. If it is selected but not supported, the + pg_combinebackup run will error. At present, + it is supported on Linux (kernel 4.5 or later) with Btrfs and XFS (on + file systems created with reflink support), and on macOS with APFS. + + + + + + + + + Use the copy_file_range system call for efficient + copying. On some file systems this gives results similar to + , sharing physical disk blocks, while on others + it may still copy blocks, but do so via an optimized path. At present, + it is supported on Linux and FreeBSD. + + + + If a backup manifest is not available or does not contain checksum of + the right type, copy_file_range will be used to + copy the file, but the file will be also read block-by-block for the + checksum calculation. + + + + diff --git a/src/bin/pg_combinebackup/copy_file.c b/src/bin/pg_combinebackup/copy_file.c index e6d2423278a..844896d66eb 100644 --- a/src/bin/pg_combinebackup/copy_file.c +++ b/src/bin/pg_combinebackup/copy_file.c @@ -14,6 +14,7 @@ #include #endif #include +#include #include #include @@ -24,8 +25,15 @@ static void copy_file_blocks(const char *src, const char *dst, pg_checksum_context *checksum_ctx); +static void copy_file_clone(const char *src, const char *dst, + pg_checksum_context *checksum_ctx); + +static void copy_file_by_range(const char *src, const char *dst, + pg_checksum_context *checksum_ctx); + #ifdef WIN32 -static void copy_file_copyfile(const char *src, const char *dst); +static void copy_file_copyfile(const char *src, const char *dst, + pg_checksum_context *checksum_ctx); #endif /* @@ -35,8 +43,13 @@ static void copy_file_copyfile(const char *src, const char *dst); */ void copy_file(const char *src, const char *dst, - pg_checksum_context *checksum_ctx, bool dry_run) + pg_checksum_context *checksum_ctx, + CopyMethod copy_method, bool dry_run) { + char *strategy_name = NULL; + void (*strategy_implementation) (const char *, const char *, + pg_checksum_context *checksum_ctx) = NULL; + /* * In dry-run mode, we don't actually copy anything, nor do we read any * data from the source file, but we do verify that we can open it. @@ -51,61 +64,94 @@ copy_file(const char *src, const char *dst, pg_fatal("could not close \"%s\": %m", src); } - /* - * If we don't need to compute a checksum, then we can use any special - * operating system primitives that we know about to copy the file; this - * may be quicker than a naive block copy. - */ - if (checksum_ctx->type == CHECKSUM_TYPE_NONE) - { - char *strategy_name = NULL; - void (*strategy_implementation) (const char *, const char *) = NULL; - #ifdef WIN32 - strategy_name = "CopyFile"; - strategy_implementation = copy_file_copyfile; + copy_method = COPY_METHOD_COPYFILE; #endif - if (strategy_name != NULL) - { - if (dry_run) - pg_log_debug("would copy \"%s\" to \"%s\" using strategy %s", - src, dst, strategy_name); - else - { - pg_log_debug("copying \"%s\" to \"%s\" using strategy %s", - src, dst, strategy_name); - (*strategy_implementation) (src, dst); - } - return; - } + /* Determine the name of the copy strategy for use in log messages. */ + switch (copy_method) + { + case COPY_METHOD_CLONE: + strategy_name = "clone"; + strategy_implementation = copy_file_clone; + break; + case COPY_METHOD_COPY: + /* leave NULL for simple block-by-block copy */ + strategy_implementation = copy_file_blocks; + break; + case COPY_METHOD_COPY_FILE_RANGE: + strategy_name = "copy_file_range"; + strategy_implementation = copy_file_by_range; + break; +#ifdef WIN32 + case COPY_METHOD_COPYFILE: + strategy_name = "CopyFile"; + strategy_implementation = copy_file_copyfile; + break; +#endif } - /* - * Fall back to the simple approach of reading and writing all the blocks, - * feeding them into the checksum context as we go. - */ if (dry_run) { - if (checksum_ctx->type == CHECKSUM_TYPE_NONE) + if (strategy_name) + pg_log_debug("would copy \"%s\" to \"%s\" using strategy %s", + src, dst, strategy_name); + else pg_log_debug("would copy \"%s\" to \"%s\"", src, dst); - else - pg_log_debug("would copy \"%s\" to \"%s\" and checksum with %s", - src, dst, pg_checksum_type_name(checksum_ctx->type)); } else { - if (checksum_ctx->type == CHECKSUM_TYPE_NONE) + if (strategy_name) + pg_log_debug("copying \"%s\" to \"%s\" using strategy %s", + src, dst, strategy_name); + else if (checksum_ctx->type == CHECKSUM_TYPE_NONE) pg_log_debug("copying \"%s\" to \"%s\"", src, dst); else pg_log_debug("copying \"%s\" to \"%s\" and checksumming with %s", src, dst, pg_checksum_type_name(checksum_ctx->type)); - copy_file_blocks(src, dst, checksum_ctx); + + strategy_implementation(src, dst, checksum_ctx); } } +/* + * Calculate checksum for the src file. + */ +static void +checksum_file(const char *src, pg_checksum_context *checksum_ctx) +{ + int src_fd; + uint8 *buffer; + const int buffer_size = 50 * BLCKSZ; + ssize_t rb; + unsigned offset = 0; + + /* bail out if no checksum needed */ + if (checksum_ctx->type == CHECKSUM_TYPE_NONE) + return; + + if ((src_fd = open(src, O_RDONLY | PG_BINARY, 0)) < 0) + pg_fatal("could not open file \"%s\": %m", src); + + buffer = pg_malloc(buffer_size); + + while ((rb = read(src_fd, buffer, buffer_size)) > 0) + { + if (pg_checksum_update(checksum_ctx, buffer, rb) < 0) + pg_fatal("could not update checksum of file \"%s\"", src); + + offset += rb; + } + + if (rb < 0) + pg_fatal("could not read file \"%s\": %m", src); + + pg_free(buffer); + close(src_fd); +} + /* * Copy a file block by block, and optionally compute a checksum as we go. */ @@ -156,14 +202,98 @@ copy_file_blocks(const char *src, const char *dst, close(dest_fd); } +/* + * copy_file_clone + * Clones/reflinks a file from src to dest. + * + * If needed, also reads the file and calculates the checksum. + */ +static void +copy_file_clone(const char *src, const char *dest, + pg_checksum_context *checksum_ctx) +{ +#if defined(HAVE_COPYFILE) && defined(COPYFILE_CLONE_FORCE) + if (copyfile(src, dest, NULL, COPYFILE_CLONE_FORCE) < 0) + pg_fatal("error while cloning file \"%s\" to \"%s\": %m", src, dest); +#elif defined(__linux__) && defined(FICLONE) + { + if ((src_fd = open(src, O_RDONLY | PG_BINARY, 0)) < 0) + pg_fatal("could not open file \"%s\": %m", src); + + if ((dest_fd = open(dest, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, + pg_file_create_mode)) < 0) + pg_fatal("could not create file \"%s\": %m", dest); + + if (ioctl(dest_fd, FICLONE, src_fd) < 0) + { + int save_errno = errno; + + unlink(dest); + + pg_fatal("error while cloning file \"%s\" to \"%s\": %s", + src, dest); + } + } +#else + pg_fatal("file cloning not supported on this platform"); +#endif + + /* if needed, calculate checksum of the file */ + checksum_file(src, checksum_ctx); +} + +/* + * copy_file_by_range + * Copies a file from src to dest using copy_file_range system call. + * + * If needed, also reads the file and calculates the checksum. + */ +static void +copy_file_by_range(const char *src, const char *dest, + pg_checksum_context *checksum_ctx) +{ +#if defined(HAVE_COPY_FILE_RANGE) + int src_fd; + int dest_fd; + ssize_t nbytes; + + if ((src_fd = open(src, O_RDONLY | PG_BINARY, 0)) < 0) + pg_fatal("could not open file \"%s\": %m", src); + + if ((dest_fd = open(dest, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, + pg_file_create_mode)) < 0) + pg_fatal("could not create file \"%s\": %m", dest); + + do + { + nbytes = copy_file_range(src_fd, NULL, dest_fd, NULL, SSIZE_MAX, 0); + if (nbytes < 0) + pg_fatal("error while copying file range from \"%s\" to \"%s\": %m", + src, dest); + } while (nbytes > 0); + + close(src_fd); + close(dest_fd); +#else + pg_fatal("copy_file_range not supported on this platform"); +#endif + + /* if needed, calculate checksum of the file */ + checksum_file(src, checksum_ctx); +} + #ifdef WIN32 static void -copy_file_copyfile(const char *src, const char *dst) +copy_file_copyfile(const char *src, const char *dst, + pg_checksum_context *checksum_ctx) { if (CopyFile(src, dst, true) == 0) { _dosmaperr(GetLastError()); pg_fatal("could not copy \"%s\" to \"%s\": %m", src, dst); } + + /* if needed, calculate checksum of the file */ + checksum_file(src, checksum_ctx); } #endif /* WIN32 */ diff --git a/src/bin/pg_combinebackup/copy_file.h b/src/bin/pg_combinebackup/copy_file.h index 0f6bc09403f..cedb7607385 100644 --- a/src/bin/pg_combinebackup/copy_file.h +++ b/src/bin/pg_combinebackup/copy_file.h @@ -11,9 +11,25 @@ #ifndef COPY_FILE_H #define COPY_FILE_H +#include "c.h" #include "common/checksum_helper.h" +#include "common/file_utils.h" + +/* + * Enumeration to denote copy modes. + */ +typedef enum CopyMethod +{ + COPY_METHOD_CLONE, + COPY_METHOD_COPY, + COPY_METHOD_COPY_FILE_RANGE, +#ifdef WIN32 + COPY_METHOD_COPYFILE, +#endif +} CopyMethod; extern void copy_file(const char *src, const char *dst, - pg_checksum_context *checksum_ctx, bool dry_run); + pg_checksum_context *checksum_ctx, + CopyMethod copy_method, bool dry_run); #endif /* COPY_FILE_H */ diff --git a/src/bin/pg_combinebackup/pg_combinebackup.c b/src/bin/pg_combinebackup/pg_combinebackup.c index 74f8be9eeac..1b07ca3fb64 100644 --- a/src/bin/pg_combinebackup/pg_combinebackup.c +++ b/src/bin/pg_combinebackup/pg_combinebackup.c @@ -69,6 +69,7 @@ typedef struct cb_options pg_checksum_type manifest_checksums; bool no_manifest; DataDirSyncMethod sync_method; + CopyMethod copy_method; } cb_options; /* @@ -129,6 +130,8 @@ main(int argc, char *argv[]) {"manifest-checksums", required_argument, NULL, 1}, {"no-manifest", no_argument, NULL, 2}, {"sync-method", required_argument, NULL, 3}, + {"clone", no_argument, NULL, 4}, + {"copy-file-range", no_argument, NULL, 5}, {NULL, 0, NULL, 0} }; @@ -156,6 +159,7 @@ main(int argc, char *argv[]) memset(&opt, 0, sizeof(opt)); opt.manifest_checksums = CHECKSUM_TYPE_CRC32C; opt.sync_method = DATA_DIR_SYNC_METHOD_FSYNC; + opt.copy_method = COPY_METHOD_COPY; /* process command-line options */ while ((c = getopt_long(argc, argv, "dnNPo:T:", @@ -192,6 +196,12 @@ main(int argc, char *argv[]) if (!parse_sync_method(optarg, &opt.sync_method)) exit(1); break; + case 4: + opt.copy_method = COPY_METHOD_CLONE; + break; + case 5: + opt.copy_method = COPY_METHOD_COPY_FILE_RANGE; + break; default: /* getopt_long already emitted a complaint */ pg_log_error_hint("Try \"%s --help\" for more information.", progname); @@ -213,6 +223,35 @@ main(int argc, char *argv[]) if (opt.no_manifest) opt.manifest_checksums = CHECKSUM_TYPE_NONE; + /* Check that the platform supports the requested copy method. */ + if (opt.copy_method == COPY_METHOD_CLONE) + { +#if (defined(HAVE_COPYFILE) && defined(COPYFILE_CLONE_FORCE)) || \ + (defined(__linux__) && defined(FICLONE)) + + if (opt.dry_run) + pg_log_debug("would use cloning to copy files"); + else + pg_log_debug("will use cloning to copy files"); + +#else + pg_fatal("file cloning not supported on this platform"); +#endif + } + else if (opt.copy_method == COPY_METHOD_COPY_FILE_RANGE) + { +#if defined(HAVE_COPY_FILE_RANGE) + + if (opt.dry_run) + pg_log_debug("would use copy_file_range to copy blocks"); + else + pg_log_debug("will use copy_file_range to copy blocks"); + +#else + pg_fatal("copy_file_range not supported on this platform"); +#endif + } + /* Read the server version from the final backup. */ version = read_pg_version_file(argv[argc - 1]); @@ -696,6 +735,8 @@ help(const char *progname) " use algorithm for manifest checksums\n")); printf(_(" --no-manifest suppress generation of backup manifest\n")); printf(_(" --sync-method=METHOD set method for syncing files to disk\n")); + printf(_(" --clone clone (reflink) instead of copying files\n")); + printf(_(" --copy-file-range copy using copy_file_range() syscall\n")); printf(_(" -?, --help show this help, then exit\n")); printf(_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT); @@ -936,6 +977,7 @@ process_directory_recursively(Oid tsoid, checksum_type, &checksum_length, &checksum_payload, + opt->copy_method, opt->debug, opt->dry_run); } @@ -993,7 +1035,8 @@ process_directory_recursively(Oid tsoid, /* Actually copy the file. */ snprintf(ofullpath, MAXPGPATH, "%s/%s", ofulldir, de->d_name); - copy_file(ifullpath, ofullpath, &checksum_ctx, opt->dry_run); + copy_file(ifullpath, ofullpath, &checksum_ctx, + opt->copy_method, opt->dry_run); /* * If copy_file() performed a checksum calculation for us, then diff --git a/src/bin/pg_combinebackup/reconstruct.c b/src/bin/pg_combinebackup/reconstruct.c index 33c6da02a8c..b083c5ce15b 100644 --- a/src/bin/pg_combinebackup/reconstruct.c +++ b/src/bin/pg_combinebackup/reconstruct.c @@ -89,6 +89,7 @@ reconstruct_from_incremental_file(char *input_filename, pg_checksum_type checksum_type, int *checksum_length, uint8 **checksum_payload, + CopyMethod copy_method, bool debug, bool dry_run) { @@ -319,7 +320,7 @@ reconstruct_from_incremental_file(char *input_filename, */ if (copy_source != NULL) copy_file(copy_source->filename, output_filename, - &checksum_ctx, dry_run); + &checksum_ctx, copy_method, dry_run); else { write_reconstructed_file(input_filename, output_filename, diff --git a/src/bin/pg_combinebackup/reconstruct.h b/src/bin/pg_combinebackup/reconstruct.h index 8e33a8a95a0..902a8e9abb5 100644 --- a/src/bin/pg_combinebackup/reconstruct.h +++ b/src/bin/pg_combinebackup/reconstruct.h @@ -27,6 +27,7 @@ extern void reconstruct_from_incremental_file(char *input_filename, pg_checksum_type checksum_type, int *checksum_length, uint8 **checksum_payload, + CopyMethod copy_method, bool debug, bool dry_run); diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index f3b8641d767..01845ee71d5 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -480,6 +480,7 @@ CopyFromState CopyFromStateData CopyHeaderChoice CopyInsertMethod +CopyMethod CopyLogVerbosityChoice CopyMultiInsertBuffer CopyMultiInsertInfo