mirror of
https://github.com/postgres/postgres.git
synced 2025-08-27 07:42:10 +03:00
Provide pg_preadv() and pg_pwritev().
Provide synchronous vectored file I/O routines. These map to preadv() and pwritev(), with fallback implementations for systems that don't have them. Also provide a wrapper pg_pwritev_with_retry() that automatically retries on short writes. Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us> Reviewed-by: Andres Freund <andres@anarazel.de> Discussion: https://postgr.es/m/CA%2BhUKGJA%2Bu-220VONeoREBXJ9P3S94Y7J%2BkqCnTYmahvZJwM%3Dg%40mail.gmail.com
This commit is contained in:
@@ -53,6 +53,8 @@ OBJS = \
|
||||
pgstrcasecmp.o \
|
||||
pgstrsignal.o \
|
||||
pqsignal.o \
|
||||
pread.o \
|
||||
pwrite.o \
|
||||
qsort.o \
|
||||
qsort_arg.o \
|
||||
quotes.o \
|
||||
|
@@ -1,7 +1,7 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* pread.c
|
||||
* Implementation of pread(2) for platforms that lack one.
|
||||
* Implementation of pread[v](2) for platforms that lack one.
|
||||
*
|
||||
* Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
|
||||
*
|
||||
@@ -9,7 +9,8 @@
|
||||
* src/port/pread.c
|
||||
*
|
||||
* Note that this implementation changes the current file position, unlike
|
||||
* the POSIX function, so we use the name pg_pread().
|
||||
* the POSIX function, so we use the name pg_pread(). Likewise for the
|
||||
* iovec version.
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -23,6 +24,9 @@
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include "port/pg_iovec.h"
|
||||
|
||||
#ifndef HAVE_PREAD
|
||||
ssize_t
|
||||
pg_pread(int fd, void *buf, size_t size, off_t offset)
|
||||
{
|
||||
@@ -56,3 +60,38 @@ pg_pread(int fd, void *buf, size_t size, off_t offset)
|
||||
return read(fd, buf, size);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef HAVE_PREADV
|
||||
ssize_t
|
||||
pg_preadv(int fd, const struct iovec *iov, int iovcnt, off_t offset)
|
||||
{
|
||||
#ifdef HAVE_READV
|
||||
if (iovcnt == 1)
|
||||
return pg_pread(fd, iov[0].iov_base, iov[0].iov_len, offset);
|
||||
if (lseek(fd, offset, SEEK_SET) < 0)
|
||||
return -1;
|
||||
return readv(fd, iov, iovcnt);
|
||||
#else
|
||||
ssize_t sum = 0;
|
||||
ssize_t part;
|
||||
|
||||
for (int i = 0; i < iovcnt; ++i)
|
||||
{
|
||||
part = pg_pread(fd, iov[i].iov_base, iov[i].iov_len, offset);
|
||||
if (part < 0)
|
||||
{
|
||||
if (i == 0)
|
||||
return -1;
|
||||
else
|
||||
return sum;
|
||||
}
|
||||
sum += part;
|
||||
offset += part;
|
||||
if (part < iov[i].iov_len)
|
||||
return sum;
|
||||
}
|
||||
return sum;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
@@ -1,7 +1,7 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* pwrite.c
|
||||
* Implementation of pwrite(2) for platforms that lack one.
|
||||
* Implementation of pwrite[v](2) for platforms that lack one.
|
||||
*
|
||||
* Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
|
||||
*
|
||||
@@ -9,7 +9,8 @@
|
||||
* src/port/pwrite.c
|
||||
*
|
||||
* Note that this implementation changes the current file position, unlike
|
||||
* the POSIX function, so we use the name pg_pwrite().
|
||||
* the POSIX function, so we use the name pg_pwrite(). Likewise for the
|
||||
* iovec version.
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -23,6 +24,9 @@
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include "port/pg_iovec.h"
|
||||
|
||||
#ifndef HAVE_PWRITE
|
||||
ssize_t
|
||||
pg_pwrite(int fd, const void *buf, size_t size, off_t offset)
|
||||
{
|
||||
@@ -53,3 +57,102 @@ pg_pwrite(int fd, const void *buf, size_t size, off_t offset)
|
||||
return write(fd, buf, size);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef HAVE_PWRITEV
|
||||
ssize_t
|
||||
pg_pwritev(int fd, const struct iovec *iov, int iovcnt, off_t offset)
|
||||
{
|
||||
#ifdef HAVE_WRITEV
|
||||
if (iovcnt == 1)
|
||||
return pg_pwrite(fd, iov[0].iov_base, iov[0].iov_len, offset);
|
||||
if (lseek(fd, offset, SEEK_SET) < 0)
|
||||
return -1;
|
||||
return writev(fd, iov, iovcnt);
|
||||
#else
|
||||
ssize_t sum = 0;
|
||||
ssize_t part;
|
||||
|
||||
for (int i = 0; i < iovcnt; ++i)
|
||||
{
|
||||
part = pg_pwrite(fd, iov[i].iov_base, iov[i].iov_len, offset);
|
||||
if (part < 0)
|
||||
{
|
||||
if (i == 0)
|
||||
return -1;
|
||||
else
|
||||
return sum;
|
||||
}
|
||||
sum += part;
|
||||
offset += part;
|
||||
if (part < iov[i].iov_len)
|
||||
return sum;
|
||||
}
|
||||
return sum;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* A convenience wrapper for pg_pwritev() that retries on partial write. If an
|
||||
* error is returned, it is unspecified how much has been written.
|
||||
*/
|
||||
ssize_t
|
||||
pg_pwritev_with_retry(int fd, const struct iovec *iov, int iovcnt, off_t offset)
|
||||
{
|
||||
struct iovec iov_copy[PG_IOV_MAX];
|
||||
ssize_t sum = 0;
|
||||
ssize_t part;
|
||||
|
||||
/* We'd better have space to make a copy, in case we need to retry. */
|
||||
if (iovcnt > PG_IOV_MAX)
|
||||
{
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (;;)
|
||||
{
|
||||
/* Write as much as we can. */
|
||||
part = pg_pwritev(fd, iov, iovcnt, offset);
|
||||
if (part < 0)
|
||||
return -1;
|
||||
|
||||
#ifdef SIMULATE_SHORT_WRITE
|
||||
part = Min(part, 4096);
|
||||
#endif
|
||||
|
||||
/* Count our progress. */
|
||||
sum += part;
|
||||
offset += part;
|
||||
|
||||
/* Step over iovecs that are done. */
|
||||
while (iovcnt > 0 && iov->iov_len <= part)
|
||||
{
|
||||
part -= iov->iov_len;
|
||||
++iov;
|
||||
--iovcnt;
|
||||
}
|
||||
|
||||
/* Are they all done? */
|
||||
if (iovcnt == 0)
|
||||
{
|
||||
if (part > 0)
|
||||
elog(ERROR, "unexpectedly wrote more than requested");
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Move whatever's left to the front of our mutable copy and adjust the
|
||||
* leading iovec.
|
||||
*/
|
||||
Assert(iovcnt > 0);
|
||||
memmove(iov_copy, iov, sizeof(*iov) * iovcnt);
|
||||
Assert(iov->iov_len > part);
|
||||
iov_copy[0].iov_base = (char *) iov_copy[0].iov_base + part;
|
||||
iov_copy[0].iov_len -= part;
|
||||
iov = iov_copy;
|
||||
}
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
Reference in New Issue
Block a user