mirror of
https://github.com/postgres/postgres.git
synced 2025-07-28 23:42:10 +03:00
Add smgrzeroextend(), FileZero(), FileFallocate()
smgrzeroextend() uses FileFallocate() to efficiently extend files by multiple blocks. When extending by a small number of blocks, use FileZero() instead, as using posix_fallocate() for small numbers of blocks is inefficient for some file systems / operating systems. FileZero() is also used as the fallback for FileFallocate() on platforms / filesystems that don't support fallocate. A big advantage of using posix_fallocate() is that it typically won't cause dirty buffers in the kernel pagecache. So far the most common pattern in our code is that we smgrextend() a page full of zeroes and put the corresponding page into shared buffers, from where we later write out the actual contents of the page. If the kernel, e.g. due to memory pressure or elapsed time, already wrote back the all-zeroes page, this can lead to doubling the amount of writes reaching storage. There are no users of smgrzeroextend() as of this commit. That will follow in future commits. Reviewed-by: Melanie Plageman <melanieplageman@gmail.com> Reviewed-by: Heikki Linnakangas <hlinnaka@iki.fi> Reviewed-by: Kyotaro Horiguchi <horikyota.ntt@gmail.com> Reviewed-by: David Rowley <dgrowleyml@gmail.com> Reviewed-by: John Naylor <john.naylor@enterprisedb.com> Discussion: https://postgr.es/m/20221029025420.eplyow6k7tgu6he3@awork3.anarazel.de
This commit is contained in:
@ -2206,6 +2206,94 @@ FileSync(File file, uint32 wait_event_info)
|
||||
return returnCode;
|
||||
}
|
||||
|
||||
/*
|
||||
* Zero a region of the file.
|
||||
*
|
||||
* Returns 0 on success, -1 otherwise. In the latter case errno is set to the
|
||||
* appropriate error.
|
||||
*/
|
||||
int
|
||||
FileZero(File file, off_t offset, off_t amount, uint32 wait_event_info)
|
||||
{
|
||||
int returnCode;
|
||||
ssize_t written;
|
||||
|
||||
Assert(FileIsValid(file));
|
||||
|
||||
DO_DB(elog(LOG, "FileZero: %d (%s) " INT64_FORMAT " " INT64_FORMAT,
|
||||
file, VfdCache[file].fileName,
|
||||
(int64) offset, (int64) amount));
|
||||
|
||||
returnCode = FileAccess(file);
|
||||
if (returnCode < 0)
|
||||
return returnCode;
|
||||
|
||||
pgstat_report_wait_start(wait_event_info);
|
||||
written = pg_pwrite_zeros(VfdCache[file].fd, amount, offset);
|
||||
pgstat_report_wait_end();
|
||||
|
||||
if (written < 0)
|
||||
return -1;
|
||||
else if (written != amount)
|
||||
{
|
||||
/* if errno is unset, assume problem is no disk space */
|
||||
if (errno == 0)
|
||||
errno = ENOSPC;
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Try to reserve file space with posix_fallocate(). If posix_fallocate() is
|
||||
* not implemented on the operating system or fails with EINVAL / EOPNOTSUPP,
|
||||
* use FileZero() instead.
|
||||
*
|
||||
* Note that at least glibc() implements posix_fallocate() in userspace if not
|
||||
* implemented by the filesystem. That's not the case for all environments
|
||||
* though.
|
||||
*
|
||||
* Returns 0 on success, -1 otherwise. In the latter case errno is set to the
|
||||
* appropriate error.
|
||||
*/
|
||||
int
|
||||
FileFallocate(File file, off_t offset, off_t amount, uint32 wait_event_info)
|
||||
{
|
||||
#ifdef HAVE_POSIX_FALLOCATE
|
||||
int returnCode;
|
||||
|
||||
Assert(FileIsValid(file));
|
||||
|
||||
DO_DB(elog(LOG, "FileFallocate: %d (%s) " INT64_FORMAT " " INT64_FORMAT,
|
||||
file, VfdCache[file].fileName,
|
||||
(int64) offset, (int64) amount));
|
||||
|
||||
returnCode = FileAccess(file);
|
||||
if (returnCode < 0)
|
||||
return -1;
|
||||
|
||||
pgstat_report_wait_start(wait_event_info);
|
||||
returnCode = posix_fallocate(VfdCache[file].fd, offset, amount);
|
||||
pgstat_report_wait_end();
|
||||
|
||||
if (returnCode == 0)
|
||||
return 0;
|
||||
|
||||
/* for compatibility with %m printing etc */
|
||||
errno = returnCode;
|
||||
|
||||
/*
|
||||
* Return in cases of a "real" failure, if fallocate is not supported,
|
||||
* fall through to the FileZero() backed implementation.
|
||||
*/
|
||||
if (returnCode != EINVAL && returnCode != EOPNOTSUPP)
|
||||
return -1;
|
||||
#endif
|
||||
|
||||
return FileZero(file, offset, amount, wait_event_info);
|
||||
}
|
||||
|
||||
off_t
|
||||
FileSize(File file)
|
||||
{
|
||||
|
Reference in New Issue
Block a user