From 4ab8c81bd90ae442dbd092df04a12dbb7e68f562 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Thu, 27 Oct 2022 14:39:42 +0900 Subject: [PATCH] Move pg_pwritev_with_retry() to src/common/file_utils.c This commit moves pg_pwritev_with_retry(), a convenience wrapper of pg_writev() able to handle partial writes, to common/file_utils.c so that the frontend code is able to use it. A first use-case targetted for this routine is pg_basebackup and pg_receivewal, for the zero-padding of a newly-initialized WAL segment. This is used currently in the backend when the GUC wal_init_zero is enabled (default). Author: Bharath Rupireddy Reviewed-by: Nathan Bossart, Thomas Munro Discussion: https://postgr.es/m/CALj2ACUq7nAb7=bJNbK3yYmp-SZhJcXFR_pLk8un6XgDzDF3OA@mail.gmail.com --- src/backend/storage/file/fd.c | 65 -------------------------------- src/common/file_utils.c | 67 +++++++++++++++++++++++++++++++++ src/include/common/file_utils.h | 7 ++++ src/include/storage/fd.h | 6 --- 4 files changed, 74 insertions(+), 71 deletions(-) diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c index e4d954578c8..4151cafec54 100644 --- a/src/backend/storage/file/fd.c +++ b/src/backend/storage/file/fd.c @@ -93,7 +93,6 @@ #include "common/pg_prng.h" #include "miscadmin.h" #include "pgstat.h" -#include "port/pg_iovec.h" #include "portability/mem.h" #include "postmaster/startup.h" #include "storage/fd.h" @@ -3738,67 +3737,3 @@ data_sync_elevel(int elevel) { return data_sync_retry ? elevel : PANIC; } - -/* - * A convenience wrapper for pg_pwritev() that retries on partial write. If an - * error is returned, it is unspecified how much has been written. - */ -ssize_t -pg_pwritev_with_retry(int fd, const struct iovec *iov, int iovcnt, off_t offset) -{ - struct iovec iov_copy[PG_IOV_MAX]; - ssize_t sum = 0; - ssize_t part; - - /* We'd better have space to make a copy, in case we need to retry. */ - if (iovcnt > PG_IOV_MAX) - { - errno = EINVAL; - return -1; - } - - for (;;) - { - /* Write as much as we can. */ - part = pg_pwritev(fd, iov, iovcnt, offset); - if (part < 0) - return -1; - -#ifdef SIMULATE_SHORT_WRITE - part = Min(part, 4096); -#endif - - /* Count our progress. */ - sum += part; - offset += part; - - /* Step over iovecs that are done. */ - while (iovcnt > 0 && iov->iov_len <= part) - { - part -= iov->iov_len; - ++iov; - --iovcnt; - } - - /* Are they all done? */ - if (iovcnt == 0) - { - /* We don't expect the kernel to write more than requested. */ - Assert(part == 0); - break; - } - - /* - * Move whatever's left to the front of our mutable copy and adjust - * the leading iovec. - */ - Assert(iovcnt > 0); - memmove(iov_copy, iov, sizeof(*iov) * iovcnt); - Assert(iov->iov_len > part); - iov_copy[0].iov_base = (char *) iov_copy[0].iov_base + part; - iov_copy[0].iov_len -= part; - iov = iov_copy; - } - - return sum; -} diff --git a/src/common/file_utils.c b/src/common/file_utils.c index df4d6d240c0..eac05a13ed5 100644 --- a/src/common/file_utils.c +++ b/src/common/file_utils.c @@ -28,6 +28,7 @@ #ifdef FRONTEND #include "common/logging.h" #endif +#include "port/pg_iovec.h" #ifdef FRONTEND @@ -460,3 +461,69 @@ get_dirent_type(const char *path, return result; } + +/* + * pg_pwritev_with_retry + * + * Convenience wrapper for pg_pwritev() that retries on partial write. If an + * error is returned, it is unspecified how much has been written. + */ +ssize_t +pg_pwritev_with_retry(int fd, const struct iovec *iov, int iovcnt, off_t offset) +{ + struct iovec iov_copy[PG_IOV_MAX]; + ssize_t sum = 0; + ssize_t part; + + /* We'd better have space to make a copy, in case we need to retry. */ + if (iovcnt > PG_IOV_MAX) + { + errno = EINVAL; + return -1; + } + + for (;;) + { + /* Write as much as we can. */ + part = pg_pwritev(fd, iov, iovcnt, offset); + if (part < 0) + return -1; + +#ifdef SIMULATE_SHORT_WRITE + part = Min(part, 4096); +#endif + + /* Count our progress. */ + sum += part; + offset += part; + + /* Step over iovecs that are done. */ + while (iovcnt > 0 && iov->iov_len <= part) + { + part -= iov->iov_len; + ++iov; + --iovcnt; + } + + /* Are they all done? */ + if (iovcnt == 0) + { + /* We don't expect the kernel to write more than requested. */ + Assert(part == 0); + break; + } + + /* + * Move whatever's left to the front of our mutable copy and adjust + * the leading iovec. + */ + Assert(iovcnt > 0); + memmove(iov_copy, iov, sizeof(*iov) * iovcnt); + Assert(iov->iov_len > part); + iov_copy[0].iov_base = (char *) iov_copy[0].iov_base + part; + iov_copy[0].iov_len -= part; + iov = iov_copy; + } + + return sum; +} diff --git a/src/include/common/file_utils.h b/src/include/common/file_utils.h index 2811744c12f..2c5dbcb0b1e 100644 --- a/src/include/common/file_utils.h +++ b/src/include/common/file_utils.h @@ -24,6 +24,8 @@ typedef enum PGFileType PGFILETYPE_LNK } PGFileType; +struct iovec; /* avoid including port/pg_iovec.h here */ + #ifdef FRONTEND extern int fsync_fname(const char *fname, bool isdir); extern void fsync_pgdata(const char *pg_data, int serverVersion); @@ -37,4 +39,9 @@ extern PGFileType get_dirent_type(const char *path, bool look_through_symlinks, int elevel); +extern ssize_t pg_pwritev_with_retry(int fd, + const struct iovec *iov, + int iovcnt, + off_t offset); + #endif /* FILE_UTILS_H */ diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h index 5a48fccd9c2..c0a212487d9 100644 --- a/src/include/storage/fd.h +++ b/src/include/storage/fd.h @@ -51,8 +51,6 @@ typedef enum RecoveryInitSyncMethod RECOVERY_INIT_SYNC_METHOD_SYNCFS } RecoveryInitSyncMethod; -struct iovec; /* avoid including port/pg_iovec.h here */ - typedef int File; @@ -178,10 +176,6 @@ extern int pg_fsync_no_writethrough(int fd); extern int pg_fsync_writethrough(int fd); extern int pg_fdatasync(int fd); extern void pg_flush_data(int fd, off_t offset, off_t nbytes); -extern ssize_t pg_pwritev_with_retry(int fd, - const struct iovec *iov, - int iovcnt, - off_t offset); extern int pg_truncate(const char *path, off_t length); extern void fsync_fname(const char *fname, bool isdir); extern int fsync_fname_ext(const char *fname, bool isdir, bool ignore_perm, int elevel);