1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-28 23:42:10 +03:00

Per previous discussions, get rid of use of sync(2) in favor of

explicitly fsync'ing every (non-temp) file we have written since the
last checkpoint.  In the vast majority of cases, the burden of the
fsyncs should fall on the bgwriter process not on backends.  (To this
end, we assume that an fsync issued by the bgwriter will force out
blocks written to the same file by other processes using other file
descriptors.  Anyone have a problem with that?)  This makes the world
safe for WIN32, which ain't even got sync(2), and really makes the world
safe for Unixen as well, because sync(2) never had the semantics we need:
it offers no way to wait for the requested I/O to finish.

Along the way, fix a bug I recently introduced in xlog recovery:
file truncation replay failed to clear bufmgr buffers for the dropped
blocks, which could result in 'PANIC:  heap_delete_redo: no block'
later on in xlog replay.
This commit is contained in:
Tom Lane
2004-05-31 03:48:10 +00:00
parent f024086db3
commit 9b178555fc
13 changed files with 779 additions and 250 deletions

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/storage/file/fd.c,v 1.108 2004/02/23 23:03:10 tgl Exp $
* $PostgreSQL: pgsql/src/backend/storage/file/fd.c,v 1.109 2004/05/31 03:48:04 tgl Exp $
*
* NOTES:
*
@ -484,6 +484,7 @@ Insert(File file)
DO_DB(_dump_lru());
}
/* returns 0 on success, -1 on re-open failure (with errno set) */
static int
LruInsert(File file)
{
@ -685,6 +686,7 @@ filepath(const char *filename)
return buf;
}
/* returns 0 on success, -1 on re-open failure (with errno set) */
static int
FileAccess(File file)
{
@ -954,7 +956,10 @@ FileRead(File file, char *buffer, int amount)
file, VfdCache[file].fileName,
VfdCache[file].seekPos, amount, buffer));
FileAccess(file);
returnCode = FileAccess(file);
if (returnCode < 0)
return returnCode;
returnCode = read(VfdCache[file].fd, buffer, amount);
if (returnCode > 0)
VfdCache[file].seekPos += returnCode;
@ -975,7 +980,9 @@ FileWrite(File file, char *buffer, int amount)
file, VfdCache[file].fileName,
VfdCache[file].seekPos, amount, buffer));
FileAccess(file);
returnCode = FileAccess(file);
if (returnCode < 0)
return returnCode;
errno = 0;
returnCode = write(VfdCache[file].fd, buffer, amount);
@ -992,9 +999,28 @@ FileWrite(File file, char *buffer, int amount)
return returnCode;
}
int
FileSync(File file)
{
int returnCode;
Assert(FileIsValid(file));
DO_DB(elog(LOG, "FileSync: %d (%s)",
file, VfdCache[file].fileName));
returnCode = FileAccess(file);
if (returnCode < 0)
return returnCode;
return pg_fsync(VfdCache[file].fd);
}
long
FileSeek(File file, long offset, int whence)
{
int returnCode;
Assert(FileIsValid(file));
DO_DB(elog(LOG, "FileSeek: %d (%s) %ld %ld %d",
@ -1014,8 +1040,11 @@ FileSeek(File file, long offset, int whence)
VfdCache[file].seekPos += offset;
break;
case SEEK_END:
FileAccess(file);
VfdCache[file].seekPos = lseek(VfdCache[file].fd, offset, whence);
returnCode = FileAccess(file);
if (returnCode < 0)
return returnCode;
VfdCache[file].seekPos = lseek(VfdCache[file].fd,
offset, whence);
break;
default:
elog(ERROR, "invalid whence: %d", whence);
@ -1030,14 +1059,17 @@ FileSeek(File file, long offset, int whence)
if (offset < 0)
elog(ERROR, "invalid seek offset: %ld", offset);
if (VfdCache[file].seekPos != offset)
VfdCache[file].seekPos = lseek(VfdCache[file].fd, offset, whence);
VfdCache[file].seekPos = lseek(VfdCache[file].fd,
offset, whence);
break;
case SEEK_CUR:
if (offset != 0 || VfdCache[file].seekPos == FileUnknownPos)
VfdCache[file].seekPos = lseek(VfdCache[file].fd, offset, whence);
VfdCache[file].seekPos = lseek(VfdCache[file].fd,
offset, whence);
break;
case SEEK_END:
VfdCache[file].seekPos = lseek(VfdCache[file].fd, offset, whence);
VfdCache[file].seekPos = lseek(VfdCache[file].fd,
offset, whence);
break;
default:
elog(ERROR, "invalid whence: %d", whence);
@ -1071,7 +1103,10 @@ FileTruncate(File file, long offset)
DO_DB(elog(LOG, "FileTruncate %d (%s)",
file, VfdCache[file].fileName));
FileAccess(file);
returnCode = FileAccess(file);
if (returnCode < 0)
return returnCode;
returnCode = ftruncate(VfdCache[file].fd, (size_t) offset);
return returnCode;
}