mirror of
https://github.com/postgres/postgres.git
synced 2025-04-20 00:42:27 +03:00
The POSIX standard does not say that the success return value for fcntl(F_SETFD) and fcntl(F_SETFL) is zero; it says only that it's not -1. We had several calls that were making the stronger assumption. Adjust them to test specifically for -1 for strict spec compliance. The standard further leaves open the possibility that the O_NONBLOCK flag bit is not the only active one in F_SETFL's argument. Formally, therefore, one ought to get the current flags with F_GETFL and store them back with only the O_NONBLOCK bit changed when trying to change the nonblock state. In port/noblock.c, we were doing the full pushup in pg_set_block but not in pg_set_noblock, which is just weird. Make both of them do it properly, since they have little business making any assumptions about the socket they're handed. The other places where we're issuing F_SETFL are working with FDs we just got from pipe(2), so it's reasonable to assume the FDs' properties are all default, so I didn't bother adding F_GETFL steps there. Also, while pg_set_block deserves some points for trying to do things right, somebody had decided that it'd be even better to cast fcntl's third argument to "long". Which is completely loony, because POSIX clearly says the third argument for an F_SETFL call is "int". Given the lack of field complaints, these missteps apparently are not of significance on any common platforms. But they're still wrong, so back-patch to all supported branches. Discussion: https://postgr.es/m/30882.1492800880@sss.pgh.pa.us
597 lines
13 KiB
C
597 lines
13 KiB
C
/*
|
|
* pg_test_fsync.c
|
|
* tests all supported fsync() methods
|
|
*/
|
|
|
|
#include "postgres_fe.h"
|
|
|
|
#include <sys/stat.h>
|
|
#include <sys/time.h>
|
|
#include <fcntl.h>
|
|
#include <time.h>
|
|
#include <unistd.h>
|
|
#include <signal.h>
|
|
|
|
#include "getopt_long.h"
|
|
#include "access/xlogdefs.h"
|
|
|
|
|
|
/*
|
|
* put the temp files in the local directory
|
|
* unless the user specifies otherwise
|
|
*/
|
|
#define FSYNC_FILENAME "./pg_test_fsync.out"
|
|
|
|
#define XLOG_BLCKSZ_K (XLOG_BLCKSZ / 1024)
|
|
|
|
#define LABEL_FORMAT " %-32s"
|
|
#define NA_FORMAT "%18s"
|
|
#define OPS_FORMAT "%9.3f ops/sec"
|
|
|
|
/* These are macros to avoid timing the function call overhead. */
|
|
#ifndef WIN32
|
|
#define START_TIMER \
|
|
do { \
|
|
alarm_triggered = false; \
|
|
alarm(secs_per_test); \
|
|
gettimeofday(&start_t, NULL); \
|
|
} while (0)
|
|
#else
|
|
/* WIN32 doesn't support alarm, so we create a thread and sleep there */
|
|
#define START_TIMER \
|
|
do { \
|
|
alarm_triggered = false; \
|
|
if (CreateThread(NULL, 0, process_alarm, NULL, 0, NULL) == \
|
|
INVALID_HANDLE_VALUE) \
|
|
{ \
|
|
fprintf(stderr, "Cannot create thread for alarm\n"); \
|
|
exit(1); \
|
|
} \
|
|
gettimeofday(&start_t, NULL); \
|
|
} while (0)
|
|
#endif
|
|
|
|
#define STOP_TIMER \
|
|
do { \
|
|
gettimeofday(&stop_t, NULL); \
|
|
print_elapse(start_t, stop_t, ops); \
|
|
} while (0)
|
|
|
|
|
|
static const char *progname;
|
|
|
|
static int secs_per_test = 2;
|
|
static int needs_unlink = 0;
|
|
static char full_buf[XLOG_SEG_SIZE],
|
|
*buf,
|
|
*filename = FSYNC_FILENAME;
|
|
static struct timeval start_t,
|
|
stop_t;
|
|
static bool alarm_triggered = false;
|
|
|
|
|
|
static void handle_args(int argc, char *argv[]);
|
|
static void prepare_buf(void);
|
|
static void test_open(void);
|
|
static void test_non_sync(void);
|
|
static void test_sync(int writes_per_op);
|
|
static void test_open_syncs(void);
|
|
static void test_open_sync(const char *msg, int writes_size);
|
|
static void test_file_descriptor_sync(void);
|
|
|
|
#ifndef WIN32
|
|
static void process_alarm(int sig);
|
|
#else
|
|
static DWORD WINAPI process_alarm(LPVOID param);
|
|
#endif
|
|
static void signal_cleanup(int sig);
|
|
|
|
#ifdef HAVE_FSYNC_WRITETHROUGH
|
|
static int pg_fsync_writethrough(int fd);
|
|
#endif
|
|
static void print_elapse(struct timeval start_t, struct timeval stop_t, int ops);
|
|
static void die(const char *str);
|
|
|
|
|
|
int
|
|
main(int argc, char *argv[])
|
|
{
|
|
progname = get_progname(argv[0]);
|
|
|
|
handle_args(argc, argv);
|
|
|
|
/* Prevent leaving behind the test file */
|
|
signal(SIGINT, signal_cleanup);
|
|
signal(SIGTERM, signal_cleanup);
|
|
#ifndef WIN32
|
|
signal(SIGALRM, process_alarm);
|
|
#endif
|
|
#ifdef SIGHUP
|
|
/* Not defined on win32 */
|
|
signal(SIGHUP, signal_cleanup);
|
|
#endif
|
|
|
|
prepare_buf();
|
|
|
|
test_open();
|
|
|
|
/* Test using 1 XLOG_BLCKSZ write */
|
|
test_sync(1);
|
|
|
|
/* Test using 2 XLOG_BLCKSZ writes */
|
|
test_sync(2);
|
|
|
|
test_open_syncs();
|
|
|
|
test_file_descriptor_sync();
|
|
|
|
test_non_sync();
|
|
|
|
unlink(filename);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void
|
|
handle_args(int argc, char *argv[])
|
|
{
|
|
static struct option long_options[] = {
|
|
{"filename", required_argument, NULL, 'f'},
|
|
{"secs-per-test", required_argument, NULL, 's'},
|
|
{NULL, 0, NULL, 0}
|
|
};
|
|
int option; /* Command line option */
|
|
int optindex = 0; /* used by getopt_long */
|
|
|
|
if (argc > 1)
|
|
{
|
|
if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-h") == 0 ||
|
|
strcmp(argv[1], "-?") == 0)
|
|
{
|
|
printf("Usage: %s [-f FILENAME] [-s SECS-PER-TEST]\n", progname);
|
|
exit(0);
|
|
}
|
|
if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
|
|
{
|
|
puts("pg_test_fsync (PostgreSQL) " PG_VERSION);
|
|
exit(0);
|
|
}
|
|
}
|
|
|
|
while ((option = getopt_long(argc, argv, "f:s:",
|
|
long_options, &optindex)) != -1)
|
|
{
|
|
switch (option)
|
|
{
|
|
case 'f':
|
|
filename = strdup(optarg);
|
|
break;
|
|
|
|
case 's':
|
|
secs_per_test = atoi(optarg);
|
|
break;
|
|
|
|
default:
|
|
fprintf(stderr, "Try \"%s --help\" for more information.\n",
|
|
progname);
|
|
exit(1);
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (argc > optind)
|
|
{
|
|
fprintf(stderr,
|
|
"%s: too many command-line arguments (first is \"%s\")\n",
|
|
progname, argv[optind]);
|
|
fprintf(stderr, "Try \"%s --help\" for more information.\n",
|
|
progname);
|
|
exit(1);
|
|
}
|
|
|
|
printf("%d seconds per test\n", secs_per_test);
|
|
#if PG_O_DIRECT != 0
|
|
printf("O_DIRECT supported on this platform for open_datasync and open_sync.\n");
|
|
#else
|
|
printf("Direct I/O is not supported on this platform.\n");
|
|
#endif
|
|
}
|
|
|
|
static void
|
|
prepare_buf(void)
|
|
{
|
|
int ops;
|
|
|
|
/* write random data into buffer */
|
|
for (ops = 0; ops < XLOG_SEG_SIZE; ops++)
|
|
full_buf[ops] = random();
|
|
|
|
buf = (char *) TYPEALIGN(ALIGNOF_XLOG_BUFFER, full_buf);
|
|
}
|
|
|
|
static void
|
|
test_open(void)
|
|
{
|
|
int tmpfile;
|
|
|
|
/*
|
|
* test if we can open the target file
|
|
*/
|
|
if ((tmpfile = open(filename, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR)) == -1)
|
|
die("could not open output file");
|
|
needs_unlink = 1;
|
|
if (write(tmpfile, full_buf, XLOG_SEG_SIZE) != XLOG_SEG_SIZE)
|
|
die("write failed");
|
|
|
|
/* fsync now so that dirty buffers don't skew later tests */
|
|
if (fsync(tmpfile) != 0)
|
|
die("fsync failed");
|
|
|
|
close(tmpfile);
|
|
}
|
|
|
|
static void
|
|
test_sync(int writes_per_op)
|
|
{
|
|
int tmpfile,
|
|
ops,
|
|
writes;
|
|
bool fs_warning = false;
|
|
|
|
if (writes_per_op == 1)
|
|
printf("\nCompare file sync methods using one %dkB write:\n", XLOG_BLCKSZ_K);
|
|
else
|
|
printf("\nCompare file sync methods using two %dkB writes:\n", XLOG_BLCKSZ_K);
|
|
printf("(in wal_sync_method preference order, except fdatasync\n");
|
|
printf("is Linux's default)\n");
|
|
|
|
/*
|
|
* Test open_datasync if available
|
|
*/
|
|
printf(LABEL_FORMAT, "open_datasync");
|
|
fflush(stdout);
|
|
|
|
#ifdef OPEN_DATASYNC_FLAG
|
|
if ((tmpfile = open(filename, O_RDWR | O_DSYNC | PG_O_DIRECT, 0)) == -1)
|
|
{
|
|
printf(NA_FORMAT, "n/a*\n");
|
|
fs_warning = true;
|
|
}
|
|
else
|
|
{
|
|
START_TIMER;
|
|
for (ops = 0; alarm_triggered == false; ops++)
|
|
{
|
|
for (writes = 0; writes < writes_per_op; writes++)
|
|
if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
|
|
die("write failed");
|
|
if (lseek(tmpfile, 0, SEEK_SET) == -1)
|
|
die("seek failed");
|
|
}
|
|
STOP_TIMER;
|
|
close(tmpfile);
|
|
}
|
|
#else
|
|
printf(NA_FORMAT, "n/a\n");
|
|
#endif
|
|
|
|
/*
|
|
* Test fdatasync if available
|
|
*/
|
|
printf(LABEL_FORMAT, "fdatasync");
|
|
fflush(stdout);
|
|
|
|
#ifdef HAVE_FDATASYNC
|
|
if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
|
|
die("could not open output file");
|
|
START_TIMER;
|
|
for (ops = 0; alarm_triggered == false; ops++)
|
|
{
|
|
for (writes = 0; writes < writes_per_op; writes++)
|
|
if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
|
|
die("write failed");
|
|
fdatasync(tmpfile);
|
|
if (lseek(tmpfile, 0, SEEK_SET) == -1)
|
|
die("seek failed");
|
|
}
|
|
STOP_TIMER;
|
|
close(tmpfile);
|
|
#else
|
|
printf(NA_FORMAT, "n/a\n");
|
|
#endif
|
|
|
|
/*
|
|
* Test fsync
|
|
*/
|
|
printf(LABEL_FORMAT, "fsync");
|
|
fflush(stdout);
|
|
|
|
if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
|
|
die("could not open output file");
|
|
START_TIMER;
|
|
for (ops = 0; alarm_triggered == false; ops++)
|
|
{
|
|
for (writes = 0; writes < writes_per_op; writes++)
|
|
if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
|
|
die("write failed");
|
|
if (fsync(tmpfile) != 0)
|
|
die("fsync failed");
|
|
if (lseek(tmpfile, 0, SEEK_SET) == -1)
|
|
die("seek failed");
|
|
}
|
|
STOP_TIMER;
|
|
close(tmpfile);
|
|
|
|
/*
|
|
* If fsync_writethrough is available, test as well
|
|
*/
|
|
printf(LABEL_FORMAT, "fsync_writethrough");
|
|
fflush(stdout);
|
|
|
|
#ifdef HAVE_FSYNC_WRITETHROUGH
|
|
if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
|
|
die("could not open output file");
|
|
START_TIMER;
|
|
for (ops = 0; alarm_triggered == false; ops++)
|
|
{
|
|
for (writes = 0; writes < writes_per_op; writes++)
|
|
if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
|
|
die("write failed");
|
|
if (pg_fsync_writethrough(tmpfile) != 0)
|
|
die("fsync failed");
|
|
if (lseek(tmpfile, 0, SEEK_SET) == -1)
|
|
die("seek failed");
|
|
}
|
|
STOP_TIMER;
|
|
close(tmpfile);
|
|
#else
|
|
printf(NA_FORMAT, "n/a\n");
|
|
#endif
|
|
|
|
/*
|
|
* Test open_sync if available
|
|
*/
|
|
printf(LABEL_FORMAT, "open_sync");
|
|
fflush(stdout);
|
|
|
|
#ifdef OPEN_SYNC_FLAG
|
|
if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG | PG_O_DIRECT, 0)) == -1)
|
|
{
|
|
printf(NA_FORMAT, "n/a*\n");
|
|
fs_warning = true;
|
|
}
|
|
else
|
|
{
|
|
START_TIMER;
|
|
for (ops = 0; alarm_triggered == false; ops++)
|
|
{
|
|
for (writes = 0; writes < writes_per_op; writes++)
|
|
if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
|
|
die("write failed");
|
|
if (lseek(tmpfile, 0, SEEK_SET) == -1)
|
|
die("seek failed");
|
|
}
|
|
STOP_TIMER;
|
|
close(tmpfile);
|
|
}
|
|
#else
|
|
printf(NA_FORMAT, "n/a\n");
|
|
#endif
|
|
|
|
if (fs_warning)
|
|
{
|
|
printf("* This file system and its mount options do not support direct\n");
|
|
printf("I/O, e.g. ext4 in journaled mode.\n");
|
|
}
|
|
}
|
|
|
|
static void
|
|
test_open_syncs(void)
|
|
{
|
|
printf("\nCompare open_sync with different write sizes:\n");
|
|
printf("(This is designed to compare the cost of writing 16kB\n");
|
|
printf("in different write open_sync sizes.)\n");
|
|
|
|
test_open_sync(" 1 * 16kB open_sync write", 16);
|
|
test_open_sync(" 2 * 8kB open_sync writes", 8);
|
|
test_open_sync(" 4 * 4kB open_sync writes", 4);
|
|
test_open_sync(" 8 * 2kB open_sync writes", 2);
|
|
test_open_sync("16 * 1kB open_sync writes", 1);
|
|
}
|
|
|
|
/*
|
|
* Test open_sync with different size files
|
|
*/
|
|
static void
|
|
test_open_sync(const char *msg, int writes_size)
|
|
{
|
|
#ifdef OPEN_SYNC_FLAG
|
|
int tmpfile,
|
|
ops,
|
|
writes;
|
|
#endif
|
|
|
|
printf(LABEL_FORMAT, msg);
|
|
fflush(stdout);
|
|
|
|
#ifdef OPEN_SYNC_FLAG
|
|
if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG | PG_O_DIRECT, 0)) == -1)
|
|
printf(NA_FORMAT, "n/a*\n");
|
|
else
|
|
{
|
|
START_TIMER;
|
|
for (ops = 0; alarm_triggered == false; ops++)
|
|
{
|
|
for (writes = 0; writes < 16 / writes_size; writes++)
|
|
if (write(tmpfile, buf, writes_size * 1024) !=
|
|
writes_size * 1024)
|
|
die("write failed");
|
|
if (lseek(tmpfile, 0, SEEK_SET) == -1)
|
|
die("seek failed");
|
|
}
|
|
STOP_TIMER;
|
|
close(tmpfile);
|
|
}
|
|
#else
|
|
printf(NA_FORMAT, "n/a\n");
|
|
#endif
|
|
}
|
|
|
|
static void
|
|
test_file_descriptor_sync(void)
|
|
{
|
|
int tmpfile,
|
|
ops;
|
|
|
|
/*
|
|
* Test whether fsync can sync data written on a different descriptor for
|
|
* the same file. This checks the efficiency of multi-process fsyncs
|
|
* against the same file. Possibly this should be done with writethrough
|
|
* on platforms which support it.
|
|
*/
|
|
printf("\nTest if fsync on non-write file descriptor is honored:\n");
|
|
printf("(If the times are similar, fsync() can sync data written\n");
|
|
printf("on a different descriptor.)\n");
|
|
|
|
/*
|
|
* first write, fsync and close, which is the normal behavior without
|
|
* multiple descriptors
|
|
*/
|
|
printf(LABEL_FORMAT, "write, fsync, close");
|
|
fflush(stdout);
|
|
|
|
START_TIMER;
|
|
for (ops = 0; alarm_triggered == false; ops++)
|
|
{
|
|
if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
|
|
die("could not open output file");
|
|
if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
|
|
die("write failed");
|
|
if (fsync(tmpfile) != 0)
|
|
die("fsync failed");
|
|
close(tmpfile);
|
|
|
|
/*
|
|
* open and close the file again to be consistent with the following
|
|
* test
|
|
*/
|
|
if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
|
|
die("could not open output file");
|
|
close(tmpfile);
|
|
}
|
|
STOP_TIMER;
|
|
|
|
/*
|
|
* Now open, write, close, open again and fsync This simulates processes
|
|
* fsyncing each other's writes.
|
|
*/
|
|
printf(LABEL_FORMAT, "write, close, fsync");
|
|
fflush(stdout);
|
|
|
|
START_TIMER;
|
|
for (ops = 0; alarm_triggered == false; ops++)
|
|
{
|
|
if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
|
|
die("could not open output file");
|
|
if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
|
|
die("write failed");
|
|
close(tmpfile);
|
|
/* reopen file */
|
|
if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
|
|
die("could not open output file");
|
|
if (fsync(tmpfile) != 0)
|
|
die("fsync failed");
|
|
close(tmpfile);
|
|
}
|
|
STOP_TIMER;
|
|
}
|
|
|
|
static void
|
|
test_non_sync(void)
|
|
{
|
|
int tmpfile,
|
|
ops;
|
|
|
|
/*
|
|
* Test a simple write without fsync
|
|
*/
|
|
printf("\nNon-Sync'ed %dkB writes:\n", XLOG_BLCKSZ_K);
|
|
printf(LABEL_FORMAT, "write");
|
|
fflush(stdout);
|
|
|
|
START_TIMER;
|
|
for (ops = 0; alarm_triggered == false; ops++)
|
|
{
|
|
if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
|
|
die("could not open output file");
|
|
if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
|
|
die("write failed");
|
|
close(tmpfile);
|
|
}
|
|
STOP_TIMER;
|
|
}
|
|
|
|
static void
|
|
signal_cleanup(int signum)
|
|
{
|
|
/* Delete the file if it exists. Ignore errors */
|
|
if (needs_unlink)
|
|
unlink(filename);
|
|
/* Finish incomplete line on stdout */
|
|
puts("");
|
|
exit(signum);
|
|
}
|
|
|
|
#ifdef HAVE_FSYNC_WRITETHROUGH
|
|
|
|
static int
|
|
pg_fsync_writethrough(int fd)
|
|
{
|
|
#ifdef WIN32
|
|
return _commit(fd);
|
|
#elif defined(F_FULLFSYNC)
|
|
return (fcntl(fd, F_FULLFSYNC, 0) == -1) ? -1 : 0;
|
|
#else
|
|
errno = ENOSYS;
|
|
return -1;
|
|
#endif
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
* print out the writes per second for tests
|
|
*/
|
|
static void
|
|
print_elapse(struct timeval start_t, struct timeval stop_t, int ops)
|
|
{
|
|
double total_time = (stop_t.tv_sec - start_t.tv_sec) +
|
|
(stop_t.tv_usec - start_t.tv_usec) * 0.000001;
|
|
double per_second = ops / total_time;
|
|
|
|
printf(OPS_FORMAT "\n", per_second);
|
|
}
|
|
|
|
#ifndef WIN32
|
|
static void
|
|
process_alarm(int sig)
|
|
{
|
|
alarm_triggered = true;
|
|
}
|
|
#else
|
|
static DWORD WINAPI
|
|
process_alarm(LPVOID param)
|
|
{
|
|
/* WIN32 doesn't support alarm, so we create a thread and sleep here */
|
|
Sleep(secs_per_test * 1000);
|
|
alarm_triggered = true;
|
|
ExitThread(0);
|
|
}
|
|
#endif
|
|
|
|
static void
|
|
die(const char *str)
|
|
{
|
|
fprintf(stderr, "%s: %s\n", str, strerror(errno));
|
|
exit(1);
|
|
}
|