1
0
mirror of https://github.com/postgres/postgres.git synced 2025-04-21 12:05:57 +03:00

Address set of issues with errno handling

System calls mixed up in error code paths are causing two issues which
several code paths have not correctly handled:
1) For write() calls, sometimes the system may return less bytes than
what has been written without errno being set.  Some paths were careful
enough to consider that case, and assumed that errno should be set to
ENOSPC, other calls missed that.
2) errno generated by a system call is overwritten by other system calls
which may succeed once an error code path is taken, causing what is
reported to the user to be incorrect.

This patch uses the brute-force approach of correcting all those code
paths.  Some refactoring could happen in the future, but this is let as
future work, which is not targeted for back-branches anyway.

Author: Michael Paquier
Reviewed-by: Ashutosh Sharma
Discussion: https://postgr.es/m/20180622061535.GD5215@paquier.xyz
This commit is contained in:
Michael Paquier 2018-06-25 11:21:49 +09:00
parent dbce4cb124
commit 910e2aca12
9 changed files with 94 additions and 3 deletions

View File

@ -1165,9 +1165,14 @@ heap_xlog_logical_rewrite(XLogReaderState *r)
/* write out tail end of mapping file (again) */ /* write out tail end of mapping file (again) */
if (write(fd, data, len) != len) if (write(fd, data, len) != len)
{
/* if write didn't set errno, assume problem is no disk space */
if (errno == 0)
errno = ENOSPC;
ereport(ERROR, ereport(ERROR,
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not write to file \"%s\": %m", path))); errmsg("could not write to file \"%s\": %m", path)));
}
/* /*
* Now fsync all previously written data. We could improve things and only * Now fsync all previously written data. We could improve things and only

View File

@ -1249,12 +1249,17 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
*/ */
if (fstat(fd, &stat)) if (fstat(fd, &stat))
{ {
int save_errno = errno;
CloseTransientFile(fd); CloseTransientFile(fd);
if (give_warnings) if (give_warnings)
{
errno = save_errno;
ereport(WARNING, ereport(WARNING,
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not stat two-phase state file \"%s\": %m", errmsg("could not stat two-phase state file \"%s\": %m",
path))); path)));
}
return NULL; return NULL;
} }
@ -1281,12 +1286,17 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
if (read(fd, buf, stat.st_size) != stat.st_size) if (read(fd, buf, stat.st_size) != stat.st_size)
{ {
int save_errno = errno;
CloseTransientFile(fd); CloseTransientFile(fd);
if (give_warnings) if (give_warnings)
{
errno = save_errno;
ereport(WARNING, ereport(WARNING,
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not read two-phase state file \"%s\": %m", errmsg("could not read two-phase state file \"%s\": %m",
path))); path)));
}
pfree(buf); pfree(buf);
return NULL; return NULL;
} }
@ -1574,14 +1584,24 @@ RecreateTwoPhaseFile(TransactionId xid, void *content, int len)
/* Write content and CRC */ /* Write content and CRC */
if (write(fd, content, len) != len) if (write(fd, content, len) != len)
{ {
int save_errno = errno;
CloseTransientFile(fd); CloseTransientFile(fd);
/* if write didn't set errno, assume problem is no disk space */
errno = save_errno ? save_errno : ENOSPC;
ereport(ERROR, ereport(ERROR,
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not write two-phase state file: %m"))); errmsg("could not write two-phase state file: %m")));
} }
if (write(fd, &statefile_crc, sizeof(pg_crc32c)) != sizeof(pg_crc32c)) if (write(fd, &statefile_crc, sizeof(pg_crc32c)) != sizeof(pg_crc32c))
{ {
int save_errno = errno;
CloseTransientFile(fd); CloseTransientFile(fd);
/* if write didn't set errno, assume problem is no disk space */
errno = save_errno ? save_errno : ENOSPC;
ereport(ERROR, ereport(ERROR,
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not write two-phase state file: %m"))); errmsg("could not write two-phase state file: %m")));
@ -1593,7 +1613,10 @@ RecreateTwoPhaseFile(TransactionId xid, void *content, int len)
*/ */
if (pg_fsync(fd) != 0) if (pg_fsync(fd) != 0)
{ {
int save_errno = errno;
CloseTransientFile(fd); CloseTransientFile(fd);
errno = save_errno;
ereport(ERROR, ereport(ERROR,
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not fsync two-phase state file: %m"))); errmsg("could not fsync two-phase state file: %m")));

View File

@ -3034,7 +3034,10 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock)
if (pg_fsync(fd) != 0) if (pg_fsync(fd) != 0)
{ {
int save_errno = errno;
close(fd); close(fd);
errno = save_errno;
ereport(ERROR, ereport(ERROR,
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not fsync file \"%s\": %m", tmppath))); errmsg("could not fsync file \"%s\": %m", tmppath)));
@ -11159,8 +11162,10 @@ retry:
if (lseek(readFile, (off_t) readOff, SEEK_SET) < 0) if (lseek(readFile, (off_t) readOff, SEEK_SET) < 0)
{ {
char fname[MAXFNAMELEN]; char fname[MAXFNAMELEN];
int save_errno = errno;
XLogFileName(fname, curFileTLI, readSegNo); XLogFileName(fname, curFileTLI, readSegNo);
errno = save_errno;
ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen), ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not seek in log segment %s to offset %u: %m", errmsg("could not seek in log segment %s to offset %u: %m",
@ -11171,8 +11176,10 @@ retry:
if (read(readFile, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ) if (read(readFile, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
{ {
char fname[MAXFNAMELEN]; char fname[MAXFNAMELEN];
int save_errno = errno;
XLogFileName(fname, curFileTLI, readSegNo); XLogFileName(fname, curFileTLI, readSegNo);
errno = save_errno;
ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen), ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not read from log segment %s, offset %u: %m", errmsg("could not read from log segment %s, offset %u: %m",

View File

@ -388,6 +388,8 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
fp = AllocateFile(pathbuf, "rb"); fp = AllocateFile(pathbuf, "rb");
if (fp == NULL) if (fp == NULL)
{ {
int save_errno = errno;
/* /*
* Most likely reason for this is that the file was already * Most likely reason for this is that the file was already
* removed by a checkpoint, so check for that to get a better * removed by a checkpoint, so check for that to get a better
@ -395,6 +397,7 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
*/ */
CheckXLogRemoved(segno, tli); CheckXLogRemoved(segno, tli);
errno = save_errno;
ereport(ERROR, ereport(ERROR,
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not open file \"%s\": %m", pathbuf))); errmsg("could not open file \"%s\": %m", pathbuf)));

View File

@ -549,7 +549,12 @@ CheckPointReplicationOrigin(void)
/* write magic */ /* write magic */
if ((write(tmpfd, &magic, sizeof(magic))) != sizeof(magic)) if ((write(tmpfd, &magic, sizeof(magic))) != sizeof(magic))
{ {
int save_errno = errno;
CloseTransientFile(tmpfd); CloseTransientFile(tmpfd);
/* if write didn't set errno, assume problem is no disk space */
errno = save_errno ? save_errno : ENOSPC;
ereport(PANIC, ereport(PANIC,
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not write to file \"%s\": %m", errmsg("could not write to file \"%s\": %m",
@ -588,7 +593,12 @@ CheckPointReplicationOrigin(void)
if ((write(tmpfd, &disk_state, sizeof(disk_state))) != if ((write(tmpfd, &disk_state, sizeof(disk_state))) !=
sizeof(disk_state)) sizeof(disk_state))
{ {
int save_errno = errno;
CloseTransientFile(tmpfd); CloseTransientFile(tmpfd);
/* if write didn't set errno, assume problem is no disk space */
errno = save_errno ? save_errno : ENOSPC;
ereport(PANIC, ereport(PANIC,
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not write to file \"%s\": %m", errmsg("could not write to file \"%s\": %m",
@ -604,7 +614,12 @@ CheckPointReplicationOrigin(void)
FIN_CRC32C(crc); FIN_CRC32C(crc);
if ((write(tmpfd, &crc, sizeof(crc))) != sizeof(crc)) if ((write(tmpfd, &crc, sizeof(crc))) != sizeof(crc))
{ {
int save_errno = errno;
CloseTransientFile(tmpfd); CloseTransientFile(tmpfd);
/* if write didn't set errno, assume problem is no disk space */
errno = save_errno ? save_errno : ENOSPC;
ereport(PANIC, ereport(PANIC,
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not write to file \"%s\": %m", errmsg("could not write to file \"%s\": %m",

View File

@ -2241,7 +2241,9 @@ ReorderBufferSerializeChange(ReorderBuffer *rb, ReorderBufferTXN *txn,
int save_errno = errno; int save_errno = errno;
CloseTransientFile(fd); CloseTransientFile(fd);
errno = save_errno;
/* if write didn't set errno, assume problem is no disk space */
errno = save_errno ? save_errno : ENOSPC;
ereport(ERROR, ereport(ERROR,
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not write to data file for XID %u: %m", errmsg("could not write to data file for XID %u: %m",

View File

@ -1566,7 +1566,12 @@ SnapBuildSerialize(SnapBuild *builder, XLogRecPtr lsn)
if ((write(fd, ondisk, needed_length)) != needed_length) if ((write(fd, ondisk, needed_length)) != needed_length)
{ {
int save_errno = errno;
CloseTransientFile(fd); CloseTransientFile(fd);
/* if write didn't set errno, assume problem is no disk space */
errno = save_errno ? save_errno : ENOSPC;
ereport(ERROR, ereport(ERROR,
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not write to file \"%s\": %m", tmppath))); errmsg("could not write to file \"%s\": %m", tmppath)));
@ -1582,7 +1587,10 @@ SnapBuildSerialize(SnapBuild *builder, XLogRecPtr lsn)
*/ */
if (pg_fsync(fd) != 0) if (pg_fsync(fd) != 0)
{ {
int save_errno = errno;
CloseTransientFile(fd); CloseTransientFile(fd);
errno = save_errno;
ereport(ERROR, ereport(ERROR,
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not fsync file \"%s\": %m", tmppath))); errmsg("could not fsync file \"%s\": %m", tmppath)));
@ -1664,7 +1672,10 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
readBytes = read(fd, &ondisk, SnapBuildOnDiskConstantSize); readBytes = read(fd, &ondisk, SnapBuildOnDiskConstantSize);
if (readBytes != SnapBuildOnDiskConstantSize) if (readBytes != SnapBuildOnDiskConstantSize)
{ {
int save_errno = errno;
CloseTransientFile(fd); CloseTransientFile(fd);
errno = save_errno;
ereport(ERROR, ereport(ERROR,
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not read file \"%s\", read %d of %d: %m", errmsg("could not read file \"%s\", read %d of %d: %m",
@ -1690,7 +1701,10 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
readBytes = read(fd, &ondisk.builder, sizeof(SnapBuild)); readBytes = read(fd, &ondisk.builder, sizeof(SnapBuild));
if (readBytes != sizeof(SnapBuild)) if (readBytes != sizeof(SnapBuild))
{ {
int save_errno = errno;
CloseTransientFile(fd); CloseTransientFile(fd);
errno = save_errno;
ereport(ERROR, ereport(ERROR,
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not read file \"%s\", read %d of %d: %m", errmsg("could not read file \"%s\", read %d of %d: %m",
@ -1705,7 +1719,10 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
readBytes = read(fd, ondisk.builder.was_running.was_xip, sz); readBytes = read(fd, ondisk.builder.was_running.was_xip, sz);
if (readBytes != sz) if (readBytes != sz)
{ {
int save_errno = errno;
CloseTransientFile(fd); CloseTransientFile(fd);
errno = save_errno;
ereport(ERROR, ereport(ERROR,
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not read file \"%s\", read %d of %d: %m", errmsg("could not read file \"%s\", read %d of %d: %m",
@ -1719,7 +1736,10 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
readBytes = read(fd, ondisk.builder.committed.xip, sz); readBytes = read(fd, ondisk.builder.committed.xip, sz);
if (readBytes != sz) if (readBytes != sz)
{ {
int save_errno = errno;
CloseTransientFile(fd); CloseTransientFile(fd);
errno = save_errno;
ereport(ERROR, ereport(ERROR,
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not read file \"%s\", read %d of %d: %m", errmsg("could not read file \"%s\", read %d of %d: %m",

View File

@ -1033,7 +1033,9 @@ SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel)
int save_errno = errno; int save_errno = errno;
CloseTransientFile(fd); CloseTransientFile(fd);
errno = save_errno;
/* if write didn't set errno, assume problem is no disk space */
errno = save_errno ? save_errno : ENOSPC;
ereport(elevel, ereport(elevel,
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not write to file \"%s\": %m", errmsg("could not write to file \"%s\": %m",
@ -1136,7 +1138,10 @@ RestoreSlotFromDisk(const char *name)
*/ */
if (pg_fsync(fd) != 0) if (pg_fsync(fd) != 0)
{ {
int save_errno = errno;
CloseTransientFile(fd); CloseTransientFile(fd);
errno = save_errno;
ereport(PANIC, ereport(PANIC,
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not fsync file \"%s\": %m", errmsg("could not fsync file \"%s\": %m",

View File

@ -159,6 +159,9 @@ open_walfile(XLogRecPtr startpoint, uint32 timeline, char *basedir,
{ {
if (write(f, zerobuf, XLOG_BLCKSZ) != XLOG_BLCKSZ) if (write(f, zerobuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
{ {
/* if write didn't set errno, assume problem is no disk space */
if (errno == 0)
errno = ENOSPC;
fprintf(stderr, fprintf(stderr,
_("%s: could not pad transaction log file \"%s\": %s\n"), _("%s: could not pad transaction log file \"%s\": %s\n"),
progname, fn, strerror(errno)); progname, fn, strerror(errno));
@ -345,7 +348,9 @@ writeTimeLineHistoryFile(char *basedir, TimeLineID tli, char *filename,
*/ */
close(fd); close(fd);
unlink(tmppath); unlink(tmppath);
errno = save_errno;
/* if write didn't set errno, assume problem is no disk space */
errno = save_errno ? save_errno : ENOSPC;
fprintf(stderr, _("%s: could not write timeline history file \"%s\": %s\n"), fprintf(stderr, _("%s: could not write timeline history file \"%s\": %s\n"),
progname, tmppath, strerror(errno)); progname, tmppath, strerror(errno));
@ -354,7 +359,10 @@ writeTimeLineHistoryFile(char *basedir, TimeLineID tli, char *filename,
if (fsync(fd) != 0) if (fsync(fd) != 0)
{ {
int save_errno = errno;
close(fd); close(fd);
errno = save_errno;
fprintf(stderr, _("%s: could not fsync file \"%s\": %s\n"), fprintf(stderr, _("%s: could not fsync file \"%s\": %s\n"),
progname, tmppath, strerror(errno)); progname, tmppath, strerror(errno));
return false; return false;
@ -1213,6 +1221,9 @@ ProcessXLogDataMsg(PGconn *conn, char *copybuf, int len,
copybuf + hdr_len + bytes_written, copybuf + hdr_len + bytes_written,
bytes_to_write) != bytes_to_write) bytes_to_write) != bytes_to_write)
{ {
/* if write didn't set errno, assume problem is no disk space */
if (errno == 0)
errno = ENOSPC;
fprintf(stderr, fprintf(stderr,
_("%s: could not write %u bytes to WAL file \"%s\": %s\n"), _("%s: could not write %u bytes to WAL file \"%s\": %s\n"),
progname, bytes_to_write, current_walfile_name, progname, bytes_to_write, current_walfile_name,