mirror of
https://github.com/postgres/postgres.git
synced 2025-04-18 13:44:19 +03:00
As noted by the comment at the top of port/pqsignal.c, Windows frontend programs can only use pqsignal() with the 6 signals required by C. Most places avoid using invalid signals via #ifndef WIN32, but initdb and pg_test_fsync check whether the signal itself is defined, which doesn't work because win32_port.h defines many extra signals for the signal emulation code. pg_regress seems to have missed the memo completely. These issues aren't causing any real problems today because nobody checks the return value of pqsignal(), but a follow-up commit will add some error checking. To fix, surround all frontend calls to pqsignal() that use signals that are invalid on Windows with #ifndef WIN32. We cannot simply skip defining the extra signals in win32_port.h for frontends because they are needed in places such as pgkill(). Reviewed-by: Thomas Munro Discussion: https://postgr.es/m/Z4chOKfnthRH71mw%40nathan
654 lines
15 KiB
C
654 lines
15 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* pg_test_fsync --- tests all supported fsync() methods
|
|
*
|
|
* Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
|
|
*
|
|
* src/bin/pg_test_fsync/pg_test_fsync.c
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
|
|
#include "postgres_fe.h"
|
|
|
|
#include <limits.h>
|
|
#include <sys/stat.h>
|
|
#include <sys/time.h>
|
|
#include <fcntl.h>
|
|
#include <time.h>
|
|
#include <unistd.h>
|
|
#include <signal.h>
|
|
|
|
#include "common/logging.h"
|
|
#include "common/pg_prng.h"
|
|
#include "getopt_long.h"
|
|
|
|
/*
|
|
* put the temp files in the local directory
|
|
* unless the user specifies otherwise
|
|
*/
|
|
#define FSYNC_FILENAME "./pg_test_fsync.out"
|
|
|
|
#define XLOG_BLCKSZ_K (XLOG_BLCKSZ / 1024)
|
|
|
|
#define LABEL_FORMAT " %-30s"
|
|
#define NA_FORMAT "%21s\n"
|
|
/* translator: maintain alignment with NA_FORMAT */
|
|
#define OPS_FORMAT gettext_noop("%13.3f ops/sec %6.0f usecs/op\n")
|
|
#define USECS_SEC 1000000
|
|
|
|
/* These are macros to avoid timing the function call overhead. */
|
|
#ifndef WIN32
|
|
#define START_TIMER \
|
|
do { \
|
|
alarm_triggered = false; \
|
|
alarm(secs_per_test); \
|
|
gettimeofday(&start_t, NULL); \
|
|
} while (0)
|
|
#else
|
|
/* WIN32 doesn't support alarm, so we create a thread and sleep there */
|
|
#define START_TIMER \
|
|
do { \
|
|
alarm_triggered = false; \
|
|
if (CreateThread(NULL, 0, process_alarm, NULL, 0, NULL) == \
|
|
INVALID_HANDLE_VALUE) \
|
|
pg_fatal("could not create thread for alarm"); \
|
|
gettimeofday(&start_t, NULL); \
|
|
} while (0)
|
|
#endif
|
|
|
|
#define STOP_TIMER \
|
|
do { \
|
|
gettimeofday(&stop_t, NULL); \
|
|
print_elapse(start_t, stop_t, ops); \
|
|
} while (0)
|
|
|
|
|
|
static const char *progname;
|
|
|
|
static unsigned int secs_per_test = 5;
|
|
static int needs_unlink = 0;
|
|
static char full_buf[DEFAULT_XLOG_SEG_SIZE],
|
|
*buf,
|
|
*filename = FSYNC_FILENAME;
|
|
static struct timeval start_t,
|
|
stop_t;
|
|
static sig_atomic_t alarm_triggered = false;
|
|
|
|
|
|
static void handle_args(int argc, char *argv[]);
|
|
static void prepare_buf(void);
|
|
static void test_open(void);
|
|
static void test_non_sync(void);
|
|
static void test_sync(int writes_per_op);
|
|
static void test_open_syncs(void);
|
|
static void test_open_sync(const char *msg, int writes_size);
|
|
static void test_file_descriptor_sync(void);
|
|
|
|
#ifndef WIN32
|
|
static void process_alarm(SIGNAL_ARGS);
|
|
#else
|
|
static DWORD WINAPI process_alarm(LPVOID param);
|
|
#endif
|
|
static void signal_cleanup(SIGNAL_ARGS);
|
|
|
|
#ifdef HAVE_FSYNC_WRITETHROUGH
|
|
static int pg_fsync_writethrough(int fd);
|
|
#endif
|
|
static void print_elapse(struct timeval start_t, struct timeval stop_t, int ops);
|
|
|
|
#define die(msg) pg_fatal("%s: %m", _(msg))
|
|
|
|
|
|
int
|
|
main(int argc, char *argv[])
|
|
{
|
|
pg_logging_init(argv[0]);
|
|
set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_test_fsync"));
|
|
progname = get_progname(argv[0]);
|
|
|
|
handle_args(argc, argv);
|
|
|
|
/* Prevent leaving behind the test file */
|
|
pqsignal(SIGINT, signal_cleanup);
|
|
pqsignal(SIGTERM, signal_cleanup);
|
|
|
|
/* the following are not valid on Windows */
|
|
#ifndef WIN32
|
|
pqsignal(SIGALRM, process_alarm);
|
|
pqsignal(SIGHUP, signal_cleanup);
|
|
#endif
|
|
|
|
pg_prng_seed(&pg_global_prng_state, (uint64) time(NULL));
|
|
|
|
prepare_buf();
|
|
|
|
test_open();
|
|
|
|
/* Test using 1 XLOG_BLCKSZ write */
|
|
test_sync(1);
|
|
|
|
/* Test using 2 XLOG_BLCKSZ writes */
|
|
test_sync(2);
|
|
|
|
test_open_syncs();
|
|
|
|
test_file_descriptor_sync();
|
|
|
|
test_non_sync();
|
|
|
|
unlink(filename);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void
|
|
handle_args(int argc, char *argv[])
|
|
{
|
|
static struct option long_options[] = {
|
|
{"filename", required_argument, NULL, 'f'},
|
|
{"secs-per-test", required_argument, NULL, 's'},
|
|
{NULL, 0, NULL, 0}
|
|
};
|
|
|
|
int option; /* Command line option */
|
|
int optindex = 0; /* used by getopt_long */
|
|
unsigned long optval; /* used for option parsing */
|
|
char *endptr;
|
|
|
|
if (argc > 1)
|
|
{
|
|
if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
|
|
{
|
|
printf(_("Usage: %s [-f FILENAME] [-s SECS-PER-TEST]\n"), progname);
|
|
exit(0);
|
|
}
|
|
if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
|
|
{
|
|
puts("pg_test_fsync (PostgreSQL) " PG_VERSION);
|
|
exit(0);
|
|
}
|
|
}
|
|
|
|
while ((option = getopt_long(argc, argv, "f:s:",
|
|
long_options, &optindex)) != -1)
|
|
{
|
|
switch (option)
|
|
{
|
|
case 'f':
|
|
filename = pg_strdup(optarg);
|
|
break;
|
|
|
|
case 's':
|
|
errno = 0;
|
|
optval = strtoul(optarg, &endptr, 10);
|
|
|
|
if (endptr == optarg || *endptr != '\0' ||
|
|
errno != 0 || optval != (unsigned int) optval)
|
|
{
|
|
pg_log_error("invalid argument for option %s", "--secs-per-test");
|
|
pg_log_error_hint("Try \"%s --help\" for more information.", progname);
|
|
exit(1);
|
|
}
|
|
|
|
secs_per_test = (unsigned int) optval;
|
|
if (secs_per_test == 0)
|
|
pg_fatal("%s must be in range %u..%u",
|
|
"--secs-per-test", 1, UINT_MAX);
|
|
break;
|
|
|
|
default:
|
|
/* getopt_long already emitted a complaint */
|
|
pg_log_error_hint("Try \"%s --help\" for more information.", progname);
|
|
exit(1);
|
|
}
|
|
}
|
|
|
|
if (argc > optind)
|
|
{
|
|
pg_log_error("too many command-line arguments (first is \"%s\")",
|
|
argv[optind]);
|
|
pg_log_error_hint("Try \"%s --help\" for more information.", progname);
|
|
exit(1);
|
|
}
|
|
|
|
printf(ngettext("%u second per test\n",
|
|
"%u seconds per test\n",
|
|
secs_per_test),
|
|
secs_per_test);
|
|
#if defined(O_DIRECT)
|
|
printf(_("O_DIRECT supported on this platform for open_datasync and open_sync.\n"));
|
|
#elif defined(F_NOCACHE)
|
|
printf(_("F_NOCACHE supported on this platform for open_datasync and open_sync.\n"));
|
|
#else
|
|
printf(_("Direct I/O is not supported on this platform.\n"));
|
|
#endif
|
|
}
|
|
|
|
static void
|
|
prepare_buf(void)
|
|
{
|
|
int ops;
|
|
|
|
/* write random data into buffer */
|
|
for (ops = 0; ops < DEFAULT_XLOG_SEG_SIZE; ops++)
|
|
full_buf[ops] = (char) pg_prng_int32(&pg_global_prng_state);
|
|
|
|
buf = (char *) TYPEALIGN(XLOG_BLCKSZ, full_buf);
|
|
}
|
|
|
|
static void
|
|
test_open(void)
|
|
{
|
|
int tmpfile;
|
|
|
|
/*
|
|
* test if we can open the target file
|
|
*/
|
|
if ((tmpfile = open(filename, O_RDWR | O_CREAT | PG_BINARY, S_IRUSR | S_IWUSR)) == -1)
|
|
die("could not open output file");
|
|
needs_unlink = 1;
|
|
if (write(tmpfile, full_buf, DEFAULT_XLOG_SEG_SIZE) !=
|
|
DEFAULT_XLOG_SEG_SIZE)
|
|
die("write failed");
|
|
|
|
/* fsync now so that dirty buffers don't skew later tests */
|
|
if (fsync(tmpfile) != 0)
|
|
die("fsync failed");
|
|
|
|
close(tmpfile);
|
|
}
|
|
|
|
static int
|
|
open_direct(const char *path, int flags, mode_t mode)
|
|
{
|
|
int fd;
|
|
|
|
#ifdef O_DIRECT
|
|
flags |= O_DIRECT;
|
|
#endif
|
|
|
|
fd = open(path, flags, mode);
|
|
|
|
#if !defined(O_DIRECT) && defined(F_NOCACHE)
|
|
if (fd >= 0 && fcntl(fd, F_NOCACHE, 1) < 0)
|
|
{
|
|
int save_errno = errno;
|
|
|
|
close(fd);
|
|
errno = save_errno;
|
|
return -1;
|
|
}
|
|
#endif
|
|
|
|
return fd;
|
|
}
|
|
|
|
static void
|
|
test_sync(int writes_per_op)
|
|
{
|
|
int tmpfile,
|
|
ops,
|
|
writes;
|
|
bool fs_warning = false;
|
|
|
|
if (writes_per_op == 1)
|
|
printf(_("\nCompare file sync methods using one %dkB write:\n"), XLOG_BLCKSZ_K);
|
|
else
|
|
printf(_("\nCompare file sync methods using two %dkB writes:\n"), XLOG_BLCKSZ_K);
|
|
printf(_("(in \"wal_sync_method\" preference order, except fdatasync is Linux's default)\n"));
|
|
|
|
/*
|
|
* Test open_datasync if available
|
|
*/
|
|
printf(LABEL_FORMAT, "open_datasync");
|
|
fflush(stdout);
|
|
|
|
#ifdef O_DSYNC
|
|
if ((tmpfile = open_direct(filename, O_RDWR | O_DSYNC | PG_BINARY, 0)) == -1)
|
|
{
|
|
printf(NA_FORMAT, _("n/a*"));
|
|
fs_warning = true;
|
|
}
|
|
else
|
|
{
|
|
START_TIMER;
|
|
for (ops = 0; alarm_triggered == false; ops++)
|
|
{
|
|
for (writes = 0; writes < writes_per_op; writes++)
|
|
if (pg_pwrite(tmpfile,
|
|
buf,
|
|
XLOG_BLCKSZ,
|
|
writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
|
|
die("write failed");
|
|
}
|
|
STOP_TIMER;
|
|
close(tmpfile);
|
|
}
|
|
#else
|
|
printf(NA_FORMAT, _("n/a"));
|
|
#endif
|
|
|
|
/*
|
|
* Test fdatasync if available
|
|
*/
|
|
printf(LABEL_FORMAT, "fdatasync");
|
|
fflush(stdout);
|
|
|
|
if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
|
|
die("could not open output file");
|
|
START_TIMER;
|
|
for (ops = 0; alarm_triggered == false; ops++)
|
|
{
|
|
for (writes = 0; writes < writes_per_op; writes++)
|
|
if (pg_pwrite(tmpfile,
|
|
buf,
|
|
XLOG_BLCKSZ,
|
|
writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
|
|
die("write failed");
|
|
fdatasync(tmpfile);
|
|
}
|
|
STOP_TIMER;
|
|
close(tmpfile);
|
|
|
|
/*
|
|
* Test fsync
|
|
*/
|
|
printf(LABEL_FORMAT, "fsync");
|
|
fflush(stdout);
|
|
|
|
if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
|
|
die("could not open output file");
|
|
START_TIMER;
|
|
for (ops = 0; alarm_triggered == false; ops++)
|
|
{
|
|
for (writes = 0; writes < writes_per_op; writes++)
|
|
if (pg_pwrite(tmpfile,
|
|
buf,
|
|
XLOG_BLCKSZ,
|
|
writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
|
|
die("write failed");
|
|
if (fsync(tmpfile) != 0)
|
|
die("fsync failed");
|
|
}
|
|
STOP_TIMER;
|
|
close(tmpfile);
|
|
|
|
/*
|
|
* If fsync_writethrough is available, test as well
|
|
*/
|
|
printf(LABEL_FORMAT, "fsync_writethrough");
|
|
fflush(stdout);
|
|
|
|
#ifdef HAVE_FSYNC_WRITETHROUGH
|
|
if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
|
|
die("could not open output file");
|
|
START_TIMER;
|
|
for (ops = 0; alarm_triggered == false; ops++)
|
|
{
|
|
for (writes = 0; writes < writes_per_op; writes++)
|
|
if (pg_pwrite(tmpfile,
|
|
buf,
|
|
XLOG_BLCKSZ,
|
|
writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
|
|
die("write failed");
|
|
if (pg_fsync_writethrough(tmpfile) != 0)
|
|
die("fsync failed");
|
|
}
|
|
STOP_TIMER;
|
|
close(tmpfile);
|
|
#else
|
|
printf(NA_FORMAT, _("n/a"));
|
|
#endif
|
|
|
|
/*
|
|
* Test open_sync if available
|
|
*/
|
|
printf(LABEL_FORMAT, "open_sync");
|
|
fflush(stdout);
|
|
|
|
#ifdef O_SYNC
|
|
if ((tmpfile = open_direct(filename, O_RDWR | O_SYNC | PG_BINARY, 0)) == -1)
|
|
{
|
|
printf(NA_FORMAT, _("n/a*"));
|
|
fs_warning = true;
|
|
}
|
|
else
|
|
{
|
|
START_TIMER;
|
|
for (ops = 0; alarm_triggered == false; ops++)
|
|
{
|
|
for (writes = 0; writes < writes_per_op; writes++)
|
|
if (pg_pwrite(tmpfile,
|
|
buf,
|
|
XLOG_BLCKSZ,
|
|
writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
|
|
|
|
/*
|
|
* This can generate write failures if the filesystem has
|
|
* a large block size, e.g. 4k, and there is no support
|
|
* for O_DIRECT writes smaller than the file system block
|
|
* size, e.g. XFS.
|
|
*/
|
|
die("write failed");
|
|
}
|
|
STOP_TIMER;
|
|
close(tmpfile);
|
|
}
|
|
#else
|
|
printf(NA_FORMAT, _("n/a"));
|
|
#endif
|
|
|
|
if (fs_warning)
|
|
{
|
|
printf(_("* This file system and its mount options do not support direct\n"
|
|
" I/O, e.g. ext4 in journaled mode.\n"));
|
|
}
|
|
}
|
|
|
|
static void
|
|
test_open_syncs(void)
|
|
{
|
|
printf(_("\nCompare open_sync with different write sizes:\n"));
|
|
printf(_("(This is designed to compare the cost of writing 16kB in different write\n"
|
|
"open_sync sizes.)\n"));
|
|
|
|
test_open_sync(_(" 1 * 16kB open_sync write"), 16);
|
|
test_open_sync(_(" 2 * 8kB open_sync writes"), 8);
|
|
test_open_sync(_(" 4 * 4kB open_sync writes"), 4);
|
|
test_open_sync(_(" 8 * 2kB open_sync writes"), 2);
|
|
test_open_sync(_("16 * 1kB open_sync writes"), 1);
|
|
}
|
|
|
|
/*
|
|
* Test open_sync with different size files
|
|
*/
|
|
static void
|
|
test_open_sync(const char *msg, int writes_size)
|
|
{
|
|
#ifdef O_SYNC
|
|
int tmpfile,
|
|
ops,
|
|
writes;
|
|
#endif
|
|
|
|
printf(LABEL_FORMAT, msg);
|
|
fflush(stdout);
|
|
|
|
#ifdef O_SYNC
|
|
if ((tmpfile = open_direct(filename, O_RDWR | O_SYNC | PG_BINARY, 0)) == -1)
|
|
printf(NA_FORMAT, _("n/a*"));
|
|
else
|
|
{
|
|
START_TIMER;
|
|
for (ops = 0; alarm_triggered == false; ops++)
|
|
{
|
|
for (writes = 0; writes < 16 / writes_size; writes++)
|
|
if (pg_pwrite(tmpfile,
|
|
buf,
|
|
writes_size * 1024,
|
|
writes * writes_size * 1024) !=
|
|
writes_size * 1024)
|
|
die("write failed");
|
|
}
|
|
STOP_TIMER;
|
|
close(tmpfile);
|
|
}
|
|
#else
|
|
printf(NA_FORMAT, _("n/a"));
|
|
#endif
|
|
}
|
|
|
|
static void
|
|
test_file_descriptor_sync(void)
|
|
{
|
|
int tmpfile,
|
|
ops;
|
|
|
|
/*
|
|
* Test whether fsync can sync data written on a different descriptor for
|
|
* the same file. This checks the efficiency of multi-process fsyncs
|
|
* against the same file. Possibly this should be done with writethrough
|
|
* on platforms which support it.
|
|
*/
|
|
printf(_("\nTest if fsync on non-write file descriptor is honored:\n"));
|
|
printf(_("(If the times are similar, fsync() can sync data written on a different\n"
|
|
"descriptor.)\n"));
|
|
|
|
/*
|
|
* first write, fsync and close, which is the normal behavior without
|
|
* multiple descriptors
|
|
*/
|
|
printf(LABEL_FORMAT, "write, fsync, close");
|
|
fflush(stdout);
|
|
|
|
START_TIMER;
|
|
for (ops = 0; alarm_triggered == false; ops++)
|
|
{
|
|
if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
|
|
die("could not open output file");
|
|
if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
|
|
die("write failed");
|
|
if (fsync(tmpfile) != 0)
|
|
die("fsync failed");
|
|
close(tmpfile);
|
|
|
|
/*
|
|
* open and close the file again to be consistent with the following
|
|
* test
|
|
*/
|
|
if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
|
|
die("could not open output file");
|
|
close(tmpfile);
|
|
}
|
|
STOP_TIMER;
|
|
|
|
/*
|
|
* Now open, write, close, open again and fsync This simulates processes
|
|
* fsyncing each other's writes.
|
|
*/
|
|
printf(LABEL_FORMAT, "write, close, fsync");
|
|
fflush(stdout);
|
|
|
|
START_TIMER;
|
|
for (ops = 0; alarm_triggered == false; ops++)
|
|
{
|
|
if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
|
|
die("could not open output file");
|
|
if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
|
|
die("write failed");
|
|
close(tmpfile);
|
|
/* reopen file */
|
|
if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
|
|
die("could not open output file");
|
|
if (fsync(tmpfile) != 0)
|
|
die("fsync failed");
|
|
close(tmpfile);
|
|
}
|
|
STOP_TIMER;
|
|
}
|
|
|
|
static void
|
|
test_non_sync(void)
|
|
{
|
|
int tmpfile,
|
|
ops;
|
|
|
|
/*
|
|
* Test a simple write without fsync
|
|
*/
|
|
printf(_("\nNon-sync'ed %dkB writes:\n"), XLOG_BLCKSZ_K);
|
|
printf(LABEL_FORMAT, "write");
|
|
fflush(stdout);
|
|
|
|
if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
|
|
die("could not open output file");
|
|
START_TIMER;
|
|
for (ops = 0; alarm_triggered == false; ops++)
|
|
{
|
|
if (pg_pwrite(tmpfile, buf, XLOG_BLCKSZ, 0) != XLOG_BLCKSZ)
|
|
die("write failed");
|
|
}
|
|
STOP_TIMER;
|
|
close(tmpfile);
|
|
}
|
|
|
|
static void
|
|
signal_cleanup(SIGNAL_ARGS)
|
|
{
|
|
int rc;
|
|
|
|
/* Delete the file if it exists. Ignore errors */
|
|
if (needs_unlink)
|
|
unlink(filename);
|
|
/* Finish incomplete line on stdout */
|
|
rc = write(STDOUT_FILENO, "\n", 1);
|
|
(void) rc; /* silence compiler warnings */
|
|
_exit(1);
|
|
}
|
|
|
|
#ifdef HAVE_FSYNC_WRITETHROUGH
|
|
|
|
static int
|
|
pg_fsync_writethrough(int fd)
|
|
{
|
|
#if defined(F_FULLFSYNC)
|
|
return (fcntl(fd, F_FULLFSYNC, 0) == -1) ? -1 : 0;
|
|
#else
|
|
errno = ENOSYS;
|
|
return -1;
|
|
#endif
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
* print out the writes per second for tests
|
|
*/
|
|
static void
|
|
print_elapse(struct timeval start_t, struct timeval stop_t, int ops)
|
|
{
|
|
double total_time = (stop_t.tv_sec - start_t.tv_sec) +
|
|
(stop_t.tv_usec - start_t.tv_usec) * 0.000001;
|
|
double per_second = ops / total_time;
|
|
double avg_op_time_us = (total_time / ops) * USECS_SEC;
|
|
|
|
printf(_(OPS_FORMAT), per_second, avg_op_time_us);
|
|
}
|
|
|
|
#ifndef WIN32
|
|
static void
|
|
process_alarm(SIGNAL_ARGS)
|
|
{
|
|
alarm_triggered = true;
|
|
}
|
|
#else
|
|
static DWORD WINAPI
|
|
process_alarm(LPVOID param)
|
|
{
|
|
/* WIN32 doesn't support alarm, so we create a thread and sleep here */
|
|
Sleep(secs_per_test * 1000);
|
|
alarm_triggered = true;
|
|
ExitThread(0);
|
|
}
|
|
#endif
|