diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 89662cc0a36..dfa9d0d6410 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -1193,6 +1193,10 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser
+
+ BaseBackupRead
+ Waiting for base backup to read from a file.
+
BufFileRead
Waiting for a read from a buffered file.
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index e96134dac8a..c022597bc09 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -3931,6 +3931,9 @@ pgstat_get_wait_io(WaitEventIO w)
switch (w)
{
+ case WAIT_EVENT_BASEBACKUP_READ:
+ event_name = "BaseBackupRead";
+ break;
case WAIT_EVENT_BUFFILE_READ:
event_name = "BufFileRead";
break;
diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c
index efcf1e6eb56..096b0fcef0d 100644
--- a/src/backend/replication/basebackup.c
+++ b/src/backend/replication/basebackup.c
@@ -81,6 +81,8 @@ static int compareWalFileNames(const ListCell *a, const ListCell *b);
static void throttle(size_t increment);
static void update_basebackup_progress(int64 delta);
static bool is_checksummed_file(const char *fullpath, const char *filename);
+static int basebackup_read_file(int fd, char *buf, size_t nbytes, off_t offset,
+ const char *filename, bool partial_read_ok);
/* Was the backup currently in-progress initiated in recovery mode? */
static bool backup_started_in_recovery = false;
@@ -98,18 +100,6 @@ static char *statrelpath = NULL;
*/
#define THROTTLING_FREQUENCY 8
-/*
- * Checks whether we encountered any error in fread(). fread() doesn't give
- * any clue what has happened, so we check with ferror(). Also, neither
- * fread() nor ferror() set errno, so we just throw a generic error.
- */
-#define CHECK_FREAD_ERROR(fp, filename) \
-do { \
- if (ferror(fp)) \
- ereport(ERROR, \
- (errmsg("could not read from file \"%s\"", filename))); \
-} while (0)
-
/* The actual number of bytes, transfer of which may cause sleep. */
static uint64 throttling_sample;
@@ -600,7 +590,7 @@ perform_base_backup(basebackup_options *opt)
foreach(lc, walFileList)
{
char *walFileName = (char *) lfirst(lc);
- FILE *fp;
+ int fd;
char buf[TAR_SEND_SIZE];
size_t cnt;
pgoff_t len = 0;
@@ -608,8 +598,8 @@ perform_base_backup(basebackup_options *opt)
snprintf(pathbuf, MAXPGPATH, XLOGDIR "/%s", walFileName);
XLogFromFileName(walFileName, &tli, &segno, wal_segment_size);
- fp = AllocateFile(pathbuf, "rb");
- if (fp == NULL)
+ fd = OpenTransientFile(pathbuf, O_RDONLY | PG_BINARY);
+ if (fd < 0)
{
int save_errno = errno;
@@ -626,7 +616,7 @@ perform_base_backup(basebackup_options *opt)
errmsg("could not open file \"%s\": %m", pathbuf)));
}
- if (fstat(fileno(fp), &statbuf) != 0)
+ if (fstat(fd, &statbuf) != 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not stat file \"%s\": %m",
@@ -642,9 +632,10 @@ perform_base_backup(basebackup_options *opt)
/* send the WAL file itself */
_tarWriteHeader(pathbuf, NULL, &statbuf, false);
- while ((cnt = fread(buf, 1,
- Min(sizeof(buf), wal_segment_size - len),
- fp)) > 0)
+ while ((cnt = basebackup_read_file(fd, buf,
+ Min(sizeof(buf),
+ wal_segment_size - len),
+ len, pathbuf, true)) > 0)
{
CheckXLogRemoved(segno, tli);
/* Send the chunk as a CopyData message */
@@ -660,8 +651,6 @@ perform_base_backup(basebackup_options *opt)
break;
}
- CHECK_FREAD_ERROR(fp, pathbuf);
-
if (len != wal_segment_size)
{
CheckXLogRemoved(segno, tli);
@@ -676,7 +665,7 @@ perform_base_backup(basebackup_options *opt)
*/
Assert(wal_segment_size % TAR_BLOCK_SIZE == 0);
- FreeFile(fp);
+ CloseTransientFile(fd);
/*
* Mark file as archived, otherwise files can get archived again
@@ -1575,7 +1564,7 @@ sendFile(const char *readfilename, const char *tarfilename,
struct stat *statbuf, bool missing_ok, Oid dboid,
backup_manifest_info *manifest, const char *spcoid)
{
- FILE *fp;
+ int fd;
BlockNumber blkno = 0;
bool block_retry = false;
char buf[TAR_SEND_SIZE];
@@ -1594,8 +1583,8 @@ sendFile(const char *readfilename, const char *tarfilename,
pg_checksum_init(&checksum_ctx, manifest->checksum_type);
- fp = AllocateFile(readfilename, "rb");
- if (fp == NULL)
+ fd = OpenTransientFile(readfilename, O_RDONLY | PG_BINARY);
+ if (fd < 0)
{
if (errno == ENOENT && missing_ok)
return false;
@@ -1637,8 +1626,27 @@ sendFile(const char *readfilename, const char *tarfilename,
}
}
- while ((cnt = fread(buf, 1, Min(sizeof(buf), statbuf->st_size - len), fp)) > 0)
+ /*
+ * Loop until we read the amount of data the caller told us to expect. The
+ * file could be longer, if it was extended while we were sending it, but
+ * for a base backup we can ignore such extended data. It will be restored
+ * from WAL.
+ */
+ while (len < statbuf->st_size)
{
+ /* Try to read some more data. */
+ cnt = basebackup_read_file(fd, buf,
+ Min(sizeof(buf), statbuf->st_size - len),
+ len, readfilename, true);
+
+ /*
+ * If we hit end-of-file, a concurrent truncation must have occurred.
+ * That's not an error condition, because WAL replay will fix things
+ * up.
+ */
+ if (cnt == 0)
+ break;
+
/*
* The checksums are verified at block level, so we iterate over the
* buffer in chunks of BLCKSZ, after making sure that
@@ -1689,16 +1697,15 @@ sendFile(const char *readfilename, const char *tarfilename,
*/
if (block_retry == false)
{
- /* Reread the failed block */
- if (fseek(fp, -(cnt - BLCKSZ * i), SEEK_CUR) == -1)
- {
- ereport(ERROR,
- (errcode_for_file_access(),
- errmsg("could not fseek in file \"%s\": %m",
- readfilename)));
- }
+ int reread_cnt;
- if (fread(buf + BLCKSZ * i, 1, BLCKSZ, fp) != BLCKSZ)
+ /* Reread the failed block */
+ reread_cnt =
+ basebackup_read_file(fd, buf + BLCKSZ * i,
+ BLCKSZ, len + BLCKSZ * i,
+ readfilename,
+ false);
+ if (reread_cnt == 0)
{
/*
* If we hit end-of-file, a concurrent
@@ -1708,24 +1715,8 @@ sendFile(const char *readfilename, const char *tarfilename,
* code that handles that case. (We must fix
* up cnt first, though.)
*/
- if (feof(fp))
- {
- cnt = BLCKSZ * i;
- break;
- }
-
- ereport(ERROR,
- (errcode_for_file_access(),
- errmsg("could not reread block %d of file \"%s\": %m",
- blkno, readfilename)));
- }
-
- if (fseek(fp, cnt - BLCKSZ * i - BLCKSZ, SEEK_CUR) == -1)
- {
- ereport(ERROR,
- (errcode_for_file_access(),
- errmsg("could not fseek in file \"%s\": %m",
- readfilename)));
+ cnt = BLCKSZ * i;
+ break;
}
/* Set flag so we know a retry was attempted */
@@ -1768,20 +1759,8 @@ sendFile(const char *readfilename, const char *tarfilename,
len += cnt;
throttle(cnt);
-
- if (feof(fp) || len >= statbuf->st_size)
- {
- /*
- * Reached end of file. The file could be longer, if it was
- * extended while we were sending it, but for a base backup we can
- * ignore such extended data. It will be restored from WAL.
- */
- break;
- }
}
- CHECK_FREAD_ERROR(fp, readfilename);
-
/* If the file was truncated while we were sending it, pad it with zeros */
if (len < statbuf->st_size)
{
@@ -1810,7 +1789,7 @@ sendFile(const char *readfilename, const char *tarfilename,
update_basebackup_progress(pad);
}
- FreeFile(fp);
+ CloseTransientFile(fd);
if (checksum_failures > 1)
{
@@ -1996,3 +1975,35 @@ update_basebackup_progress(int64 delta)
pgstat_progress_update_multi_param(nparam, index, val);
}
+
+/*
+ * Read some data from a file, setting a wait event and reporting any error
+ * encountered.
+ *
+ * If partial_read_ok is false, also report an error if the number of bytes
+ * read is not equal to the number of bytes requested.
+ *
+ * Returns the number of bytes read.
+ */
+static int
+basebackup_read_file(int fd, char *buf, size_t nbytes, off_t offset,
+ const char *filename, bool partial_read_ok)
+{
+ int rc;
+
+ pgstat_report_wait_start(WAIT_EVENT_BASEBACKUP_READ);
+ rc = pg_pread(fd, buf, nbytes, offset);
+ pgstat_report_wait_end();
+
+ if (rc < 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not read file \"%s\": %m", filename)));
+ if (!partial_read_ok && rc > 0 && rc != nbytes)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not read file \"%s\": read %d of %zu",
+ filename, rc, nbytes)));
+
+ return rc;
+}
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index c55dc1481ca..13872013823 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -913,7 +913,8 @@ typedef enum
*/
typedef enum
{
- WAIT_EVENT_BUFFILE_READ = PG_WAIT_IO,
+ WAIT_EVENT_BASEBACKUP_READ = PG_WAIT_IO,
+ WAIT_EVENT_BUFFILE_READ,
WAIT_EVENT_BUFFILE_WRITE,
WAIT_EVENT_CONTROL_FILE_READ,
WAIT_EVENT_CONTROL_FILE_SYNC,