1
0
mirror of https://github.com/postgres/postgres.git synced 2025-04-20 00:42:27 +03:00
postgres/contrib/pg_test_fsync/pg_test_fsync.c
Tom Lane ad8fb69ccc Avoid depending on non-POSIX behavior of fcntl(2).
The POSIX standard does not say that the success return value for
fcntl(F_SETFD) and fcntl(F_SETFL) is zero; it says only that it's not -1.
We had several calls that were making the stronger assumption.  Adjust
them to test specifically for -1 for strict spec compliance.

The standard further leaves open the possibility that the O_NONBLOCK
flag bit is not the only active one in F_SETFL's argument.  Formally,
therefore, one ought to get the current flags with F_GETFL and store
them back with only the O_NONBLOCK bit changed when trying to change
the nonblock state.  In port/noblock.c, we were doing the full pushup
in pg_set_block but not in pg_set_noblock, which is just weird.  Make
both of them do it properly, since they have little business making
any assumptions about the socket they're handed.  The other places
where we're issuing F_SETFL are working with FDs we just got from
pipe(2), so it's reasonable to assume the FDs' properties are all
default, so I didn't bother adding F_GETFL steps there.

Also, while pg_set_block deserves some points for trying to do things
right, somebody had decided that it'd be even better to cast fcntl's
third argument to "long".  Which is completely loony, because POSIX
clearly says the third argument for an F_SETFL call is "int".

Given the lack of field complaints, these missteps apparently are not
of significance on any common platforms.  But they're still wrong,
so back-patch to all supported branches.

Discussion: https://postgr.es/m/30882.1492800880@sss.pgh.pa.us
2017-04-21 15:55:56 -04:00

597 lines
13 KiB
C

/*
* pg_test_fsync.c
* tests all supported fsync() methods
*/
#include "postgres_fe.h"
#include <sys/stat.h>
#include <sys/time.h>
#include <fcntl.h>
#include <time.h>
#include <unistd.h>
#include <signal.h>
#include "getopt_long.h"
#include "access/xlogdefs.h"
/*
* put the temp files in the local directory
* unless the user specifies otherwise
*/
#define FSYNC_FILENAME "./pg_test_fsync.out"
#define XLOG_BLCKSZ_K (XLOG_BLCKSZ / 1024)
#define LABEL_FORMAT " %-32s"
#define NA_FORMAT "%18s"
#define OPS_FORMAT "%9.3f ops/sec"
/* These are macros to avoid timing the function call overhead. */
#ifndef WIN32
#define START_TIMER \
do { \
alarm_triggered = false; \
alarm(secs_per_test); \
gettimeofday(&start_t, NULL); \
} while (0)
#else
/* WIN32 doesn't support alarm, so we create a thread and sleep there */
#define START_TIMER \
do { \
alarm_triggered = false; \
if (CreateThread(NULL, 0, process_alarm, NULL, 0, NULL) == \
INVALID_HANDLE_VALUE) \
{ \
fprintf(stderr, "Cannot create thread for alarm\n"); \
exit(1); \
} \
gettimeofday(&start_t, NULL); \
} while (0)
#endif
#define STOP_TIMER \
do { \
gettimeofday(&stop_t, NULL); \
print_elapse(start_t, stop_t, ops); \
} while (0)
static const char *progname;
static int secs_per_test = 2;
static int needs_unlink = 0;
static char full_buf[XLOG_SEG_SIZE],
*buf,
*filename = FSYNC_FILENAME;
static struct timeval start_t,
stop_t;
static bool alarm_triggered = false;
static void handle_args(int argc, char *argv[]);
static void prepare_buf(void);
static void test_open(void);
static void test_non_sync(void);
static void test_sync(int writes_per_op);
static void test_open_syncs(void);
static void test_open_sync(const char *msg, int writes_size);
static void test_file_descriptor_sync(void);
#ifndef WIN32
static void process_alarm(int sig);
#else
static DWORD WINAPI process_alarm(LPVOID param);
#endif
static void signal_cleanup(int sig);
#ifdef HAVE_FSYNC_WRITETHROUGH
static int pg_fsync_writethrough(int fd);
#endif
static void print_elapse(struct timeval start_t, struct timeval stop_t, int ops);
static void die(const char *str);
int
main(int argc, char *argv[])
{
progname = get_progname(argv[0]);
handle_args(argc, argv);
/* Prevent leaving behind the test file */
signal(SIGINT, signal_cleanup);
signal(SIGTERM, signal_cleanup);
#ifndef WIN32
signal(SIGALRM, process_alarm);
#endif
#ifdef SIGHUP
/* Not defined on win32 */
signal(SIGHUP, signal_cleanup);
#endif
prepare_buf();
test_open();
/* Test using 1 XLOG_BLCKSZ write */
test_sync(1);
/* Test using 2 XLOG_BLCKSZ writes */
test_sync(2);
test_open_syncs();
test_file_descriptor_sync();
test_non_sync();
unlink(filename);
return 0;
}
static void
handle_args(int argc, char *argv[])
{
static struct option long_options[] = {
{"filename", required_argument, NULL, 'f'},
{"secs-per-test", required_argument, NULL, 's'},
{NULL, 0, NULL, 0}
};
int option; /* Command line option */
int optindex = 0; /* used by getopt_long */
if (argc > 1)
{
if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-h") == 0 ||
strcmp(argv[1], "-?") == 0)
{
printf("Usage: %s [-f FILENAME] [-s SECS-PER-TEST]\n", progname);
exit(0);
}
if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
{
puts("pg_test_fsync (PostgreSQL) " PG_VERSION);
exit(0);
}
}
while ((option = getopt_long(argc, argv, "f:s:",
long_options, &optindex)) != -1)
{
switch (option)
{
case 'f':
filename = strdup(optarg);
break;
case 's':
secs_per_test = atoi(optarg);
break;
default:
fprintf(stderr, "Try \"%s --help\" for more information.\n",
progname);
exit(1);
break;
}
}
if (argc > optind)
{
fprintf(stderr,
"%s: too many command-line arguments (first is \"%s\")\n",
progname, argv[optind]);
fprintf(stderr, "Try \"%s --help\" for more information.\n",
progname);
exit(1);
}
printf("%d seconds per test\n", secs_per_test);
#if PG_O_DIRECT != 0
printf("O_DIRECT supported on this platform for open_datasync and open_sync.\n");
#else
printf("Direct I/O is not supported on this platform.\n");
#endif
}
static void
prepare_buf(void)
{
int ops;
/* write random data into buffer */
for (ops = 0; ops < XLOG_SEG_SIZE; ops++)
full_buf[ops] = random();
buf = (char *) TYPEALIGN(ALIGNOF_XLOG_BUFFER, full_buf);
}
static void
test_open(void)
{
int tmpfile;
/*
* test if we can open the target file
*/
if ((tmpfile = open(filename, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR)) == -1)
die("could not open output file");
needs_unlink = 1;
if (write(tmpfile, full_buf, XLOG_SEG_SIZE) != XLOG_SEG_SIZE)
die("write failed");
/* fsync now so that dirty buffers don't skew later tests */
if (fsync(tmpfile) != 0)
die("fsync failed");
close(tmpfile);
}
static void
test_sync(int writes_per_op)
{
int tmpfile,
ops,
writes;
bool fs_warning = false;
if (writes_per_op == 1)
printf("\nCompare file sync methods using one %dkB write:\n", XLOG_BLCKSZ_K);
else
printf("\nCompare file sync methods using two %dkB writes:\n", XLOG_BLCKSZ_K);
printf("(in wal_sync_method preference order, except fdatasync\n");
printf("is Linux's default)\n");
/*
* Test open_datasync if available
*/
printf(LABEL_FORMAT, "open_datasync");
fflush(stdout);
#ifdef OPEN_DATASYNC_FLAG
if ((tmpfile = open(filename, O_RDWR | O_DSYNC | PG_O_DIRECT, 0)) == -1)
{
printf(NA_FORMAT, "n/a*\n");
fs_warning = true;
}
else
{
START_TIMER;
for (ops = 0; alarm_triggered == false; ops++)
{
for (writes = 0; writes < writes_per_op; writes++)
if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
die("write failed");
if (lseek(tmpfile, 0, SEEK_SET) == -1)
die("seek failed");
}
STOP_TIMER;
close(tmpfile);
}
#else
printf(NA_FORMAT, "n/a\n");
#endif
/*
* Test fdatasync if available
*/
printf(LABEL_FORMAT, "fdatasync");
fflush(stdout);
#ifdef HAVE_FDATASYNC
if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
die("could not open output file");
START_TIMER;
for (ops = 0; alarm_triggered == false; ops++)
{
for (writes = 0; writes < writes_per_op; writes++)
if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
die("write failed");
fdatasync(tmpfile);
if (lseek(tmpfile, 0, SEEK_SET) == -1)
die("seek failed");
}
STOP_TIMER;
close(tmpfile);
#else
printf(NA_FORMAT, "n/a\n");
#endif
/*
* Test fsync
*/
printf(LABEL_FORMAT, "fsync");
fflush(stdout);
if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
die("could not open output file");
START_TIMER;
for (ops = 0; alarm_triggered == false; ops++)
{
for (writes = 0; writes < writes_per_op; writes++)
if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
die("write failed");
if (fsync(tmpfile) != 0)
die("fsync failed");
if (lseek(tmpfile, 0, SEEK_SET) == -1)
die("seek failed");
}
STOP_TIMER;
close(tmpfile);
/*
* If fsync_writethrough is available, test as well
*/
printf(LABEL_FORMAT, "fsync_writethrough");
fflush(stdout);
#ifdef HAVE_FSYNC_WRITETHROUGH
if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
die("could not open output file");
START_TIMER;
for (ops = 0; alarm_triggered == false; ops++)
{
for (writes = 0; writes < writes_per_op; writes++)
if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
die("write failed");
if (pg_fsync_writethrough(tmpfile) != 0)
die("fsync failed");
if (lseek(tmpfile, 0, SEEK_SET) == -1)
die("seek failed");
}
STOP_TIMER;
close(tmpfile);
#else
printf(NA_FORMAT, "n/a\n");
#endif
/*
* Test open_sync if available
*/
printf(LABEL_FORMAT, "open_sync");
fflush(stdout);
#ifdef OPEN_SYNC_FLAG
if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG | PG_O_DIRECT, 0)) == -1)
{
printf(NA_FORMAT, "n/a*\n");
fs_warning = true;
}
else
{
START_TIMER;
for (ops = 0; alarm_triggered == false; ops++)
{
for (writes = 0; writes < writes_per_op; writes++)
if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
die("write failed");
if (lseek(tmpfile, 0, SEEK_SET) == -1)
die("seek failed");
}
STOP_TIMER;
close(tmpfile);
}
#else
printf(NA_FORMAT, "n/a\n");
#endif
if (fs_warning)
{
printf("* This file system and its mount options do not support direct\n");
printf("I/O, e.g. ext4 in journaled mode.\n");
}
}
static void
test_open_syncs(void)
{
printf("\nCompare open_sync with different write sizes:\n");
printf("(This is designed to compare the cost of writing 16kB\n");
printf("in different write open_sync sizes.)\n");
test_open_sync(" 1 * 16kB open_sync write", 16);
test_open_sync(" 2 * 8kB open_sync writes", 8);
test_open_sync(" 4 * 4kB open_sync writes", 4);
test_open_sync(" 8 * 2kB open_sync writes", 2);
test_open_sync("16 * 1kB open_sync writes", 1);
}
/*
* Test open_sync with different size files
*/
static void
test_open_sync(const char *msg, int writes_size)
{
#ifdef OPEN_SYNC_FLAG
int tmpfile,
ops,
writes;
#endif
printf(LABEL_FORMAT, msg);
fflush(stdout);
#ifdef OPEN_SYNC_FLAG
if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG | PG_O_DIRECT, 0)) == -1)
printf(NA_FORMAT, "n/a*\n");
else
{
START_TIMER;
for (ops = 0; alarm_triggered == false; ops++)
{
for (writes = 0; writes < 16 / writes_size; writes++)
if (write(tmpfile, buf, writes_size * 1024) !=
writes_size * 1024)
die("write failed");
if (lseek(tmpfile, 0, SEEK_SET) == -1)
die("seek failed");
}
STOP_TIMER;
close(tmpfile);
}
#else
printf(NA_FORMAT, "n/a\n");
#endif
}
static void
test_file_descriptor_sync(void)
{
int tmpfile,
ops;
/*
* Test whether fsync can sync data written on a different descriptor for
* the same file. This checks the efficiency of multi-process fsyncs
* against the same file. Possibly this should be done with writethrough
* on platforms which support it.
*/
printf("\nTest if fsync on non-write file descriptor is honored:\n");
printf("(If the times are similar, fsync() can sync data written\n");
printf("on a different descriptor.)\n");
/*
* first write, fsync and close, which is the normal behavior without
* multiple descriptors
*/
printf(LABEL_FORMAT, "write, fsync, close");
fflush(stdout);
START_TIMER;
for (ops = 0; alarm_triggered == false; ops++)
{
if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
die("could not open output file");
if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
die("write failed");
if (fsync(tmpfile) != 0)
die("fsync failed");
close(tmpfile);
/*
* open and close the file again to be consistent with the following
* test
*/
if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
die("could not open output file");
close(tmpfile);
}
STOP_TIMER;
/*
* Now open, write, close, open again and fsync This simulates processes
* fsyncing each other's writes.
*/
printf(LABEL_FORMAT, "write, close, fsync");
fflush(stdout);
START_TIMER;
for (ops = 0; alarm_triggered == false; ops++)
{
if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
die("could not open output file");
if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
die("write failed");
close(tmpfile);
/* reopen file */
if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
die("could not open output file");
if (fsync(tmpfile) != 0)
die("fsync failed");
close(tmpfile);
}
STOP_TIMER;
}
static void
test_non_sync(void)
{
int tmpfile,
ops;
/*
* Test a simple write without fsync
*/
printf("\nNon-Sync'ed %dkB writes:\n", XLOG_BLCKSZ_K);
printf(LABEL_FORMAT, "write");
fflush(stdout);
START_TIMER;
for (ops = 0; alarm_triggered == false; ops++)
{
if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
die("could not open output file");
if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
die("write failed");
close(tmpfile);
}
STOP_TIMER;
}
static void
signal_cleanup(int signum)
{
/* Delete the file if it exists. Ignore errors */
if (needs_unlink)
unlink(filename);
/* Finish incomplete line on stdout */
puts("");
exit(signum);
}
#ifdef HAVE_FSYNC_WRITETHROUGH
static int
pg_fsync_writethrough(int fd)
{
#ifdef WIN32
return _commit(fd);
#elif defined(F_FULLFSYNC)
return (fcntl(fd, F_FULLFSYNC, 0) == -1) ? -1 : 0;
#else
errno = ENOSYS;
return -1;
#endif
}
#endif
/*
* print out the writes per second for tests
*/
static void
print_elapse(struct timeval start_t, struct timeval stop_t, int ops)
{
double total_time = (stop_t.tv_sec - start_t.tv_sec) +
(stop_t.tv_usec - start_t.tv_usec) * 0.000001;
double per_second = ops / total_time;
printf(OPS_FORMAT "\n", per_second);
}
#ifndef WIN32
static void
process_alarm(int sig)
{
alarm_triggered = true;
}
#else
static DWORD WINAPI
process_alarm(LPVOID param)
{
/* WIN32 doesn't support alarm, so we create a thread and sleep here */
Sleep(secs_per_test * 1000);
alarm_triggered = true;
ExitThread(0);
}
#endif
static void
die(const char *str)
{
fprintf(stderr, "%s: %s\n", str, strerror(errno));
exit(1);
}