1
0
mirror of https://github.com/postgres/postgres.git synced 2025-08-05 07:41:25 +03:00

Provide vectored variants of FileRead() and FileWrite().

FileReadV() and FileWriteV() adapt pg_preadv() and pg_pwritev() for
fd.c's virtual file descriptors.  The simple FileRead() and FileWrite()
functions are now implemented in terms of the vectored functions, to
avoid code duplication, and they are converted back to the corresponding
simple system calls further down (commit 15c9ac36).  Later work will
make more interesting multi-iovec calls.

The traditional behavior of reporting a "fake" ENOSPC error is
simplified.  It's now always set for non-failing writes, for the benefit
of callers that expect to log a meaningful "%m" if they determine that
the write was short.  (Perhaps we should consider getting rid of that
expectation one day.)

Reviewed-by: Heikki Linnakangas <hlinnaka@iki.fi>
Discussion: https://postgr.es/m/CA+hUKGJkOiOCa+mag4BF+zHo7qo=o9CFheB8=g6uT5TUm2gkvA@mail.gmail.com
This commit is contained in:
Thomas Munro
2023-12-12 11:56:11 +13:00
parent 0c6be59f5e
commit 871fe4917e
2 changed files with 54 additions and 21 deletions

View File

@@ -2110,7 +2110,7 @@ FileWriteback(File file, off_t offset, off_t nbytes, uint32 wait_event_info)
} }
int int
FileRead(File file, void *buffer, size_t amount, off_t offset, FileReadV(File file, const struct iovec *iov, int iovcnt, off_t offset,
uint32 wait_event_info) uint32 wait_event_info)
{ {
int returnCode; int returnCode;
@@ -2118,10 +2118,10 @@ FileRead(File file, void *buffer, size_t amount, off_t offset,
Assert(FileIsValid(file)); Assert(FileIsValid(file));
DO_DB(elog(LOG, "FileRead: %d (%s) " INT64_FORMAT " %zu %p", DO_DB(elog(LOG, "FileReadV: %d (%s) " INT64_FORMAT " %d",
file, VfdCache[file].fileName, file, VfdCache[file].fileName,
(int64) offset, (int64) offset,
amount, buffer)); iovcnt));
returnCode = FileAccess(file); returnCode = FileAccess(file);
if (returnCode < 0) if (returnCode < 0)
@@ -2131,7 +2131,7 @@ FileRead(File file, void *buffer, size_t amount, off_t offset,
retry: retry:
pgstat_report_wait_start(wait_event_info); pgstat_report_wait_start(wait_event_info);
returnCode = pg_pread(vfdP->fd, buffer, amount, offset); returnCode = pg_preadv(vfdP->fd, iov, iovcnt, offset);
pgstat_report_wait_end(); pgstat_report_wait_end();
if (returnCode < 0) if (returnCode < 0)
@@ -2166,7 +2166,7 @@ retry:
} }
int int
FileWrite(File file, const void *buffer, size_t amount, off_t offset, FileWriteV(File file, const struct iovec *iov, int iovcnt, off_t offset,
uint32 wait_event_info) uint32 wait_event_info)
{ {
int returnCode; int returnCode;
@@ -2174,10 +2174,10 @@ FileWrite(File file, const void *buffer, size_t amount, off_t offset,
Assert(FileIsValid(file)); Assert(FileIsValid(file));
DO_DB(elog(LOG, "FileWrite: %d (%s) " INT64_FORMAT " %zu %p", DO_DB(elog(LOG, "FileWriteV: %d (%s) " INT64_FORMAT " %d",
file, VfdCache[file].fileName, file, VfdCache[file].fileName,
(int64) offset, (int64) offset,
amount, buffer)); iovcnt));
returnCode = FileAccess(file); returnCode = FileAccess(file);
if (returnCode < 0) if (returnCode < 0)
@@ -2195,7 +2195,10 @@ FileWrite(File file, const void *buffer, size_t amount, off_t offset,
*/ */
if (temp_file_limit >= 0 && (vfdP->fdstate & FD_TEMP_FILE_LIMIT)) if (temp_file_limit >= 0 && (vfdP->fdstate & FD_TEMP_FILE_LIMIT))
{ {
off_t past_write = offset + amount; off_t past_write = offset;
for (int i = 0; i < iovcnt; ++i)
past_write += iov[i].iov_len;
if (past_write > vfdP->fileSize) if (past_write > vfdP->fileSize)
{ {
@@ -2211,23 +2214,27 @@ FileWrite(File file, const void *buffer, size_t amount, off_t offset,
} }
retry: retry:
errno = 0;
pgstat_report_wait_start(wait_event_info); pgstat_report_wait_start(wait_event_info);
returnCode = pg_pwrite(VfdCache[file].fd, buffer, amount, offset); returnCode = pg_pwritev(vfdP->fd, iov, iovcnt, offset);
pgstat_report_wait_end(); pgstat_report_wait_end();
/* if write didn't set errno, assume problem is no disk space */
if (returnCode != amount && errno == 0)
errno = ENOSPC;
if (returnCode >= 0) if (returnCode >= 0)
{ {
/*
* Some callers expect short writes to set errno, and traditionally we
* have assumed that they imply disk space shortage. We don't want to
* waste CPU cycles adding up the total size here, so we'll just set
* it for all successful writes in case such a caller determines that
* the write was short and ereports "%m".
*/
errno = ENOSPC;
/* /*
* Maintain fileSize and temporary_files_size if it's a temp file. * Maintain fileSize and temporary_files_size if it's a temp file.
*/ */
if (vfdP->fdstate & FD_TEMP_FILE_LIMIT) if (vfdP->fdstate & FD_TEMP_FILE_LIMIT)
{ {
off_t past_write = offset + amount; off_t past_write = offset + returnCode;
if (past_write > vfdP->fileSize) if (past_write > vfdP->fileSize)
{ {
@@ -2239,7 +2246,7 @@ retry:
else else
{ {
/* /*
* See comments in FileRead() * See comments in FileReadV()
*/ */
#ifdef WIN32 #ifdef WIN32
DWORD error = GetLastError(); DWORD error = GetLastError();

View File

@@ -15,7 +15,7 @@
/* /*
* calls: * calls:
* *
* File {Close, Read, Write, Size, Sync} * File {Close, Read, ReadV, Write, WriteV, Size, Sync}
* {Path Name Open, Allocate, Free} File * {Path Name Open, Allocate, Free} File
* *
* These are NOT JUST RENAMINGS OF THE UNIX ROUTINES. * These are NOT JUST RENAMINGS OF THE UNIX ROUTINES.
@@ -43,6 +43,8 @@
#ifndef FD_H #ifndef FD_H
#define FD_H #define FD_H
#include "port/pg_iovec.h"
#include <dirent.h> #include <dirent.h>
#include <fcntl.h> #include <fcntl.h>
@@ -105,8 +107,8 @@ extern File PathNameOpenFilePerm(const char *fileName, int fileFlags, mode_t fil
extern File OpenTemporaryFile(bool interXact); extern File OpenTemporaryFile(bool interXact);
extern void FileClose(File file); extern void FileClose(File file);
extern int FilePrefetch(File file, off_t offset, off_t amount, uint32 wait_event_info); extern int FilePrefetch(File file, off_t offset, off_t amount, uint32 wait_event_info);
extern int FileRead(File file, void *buffer, size_t amount, off_t offset, uint32 wait_event_info); extern int FileReadV(File file, const struct iovec *ioc, int iovcnt, off_t offset, uint32 wait_event_info);
extern int FileWrite(File file, const void *buffer, size_t amount, off_t offset, uint32 wait_event_info); extern int FileWriteV(File file, const struct iovec *ioc, int iovcnt, off_t offset, uint32 wait_event_info);
extern int FileSync(File file, uint32 wait_event_info); extern int FileSync(File file, uint32 wait_event_info);
extern int FileZero(File file, off_t offset, off_t amount, uint32 wait_event_info); extern int FileZero(File file, off_t offset, off_t amount, uint32 wait_event_info);
extern int FileFallocate(File file, off_t offset, off_t amount, uint32 wait_event_info); extern int FileFallocate(File file, off_t offset, off_t amount, uint32 wait_event_info);
@@ -189,4 +191,28 @@ extern int durable_unlink(const char *fname, int elevel);
extern void SyncDataDirectory(void); extern void SyncDataDirectory(void);
extern int data_sync_elevel(int elevel); extern int data_sync_elevel(int elevel);
static inline int
FileRead(File file, void *buffer, size_t amount, off_t offset,
uint32 wait_event_info)
{
struct iovec iov = {
.iov_base = buffer,
.iov_len = amount
};
return FileReadV(file, &iov, 1, offset, wait_event_info);
}
static inline int
FileWrite(File file, const void *buffer, size_t amount, off_t offset,
uint32 wait_event_info)
{
struct iovec iov = {
.iov_base = unconstify(void *, buffer),
.iov_len = amount
};
return FileWriteV(file, &iov, 1, offset, wait_event_info);
}
#endif /* FD_H */ #endif /* FD_H */