1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-12 05:01:15 +03:00

Create and use wait events for read, write, and fsync operations.

Previous commits, notably 53be0b1add and
6f3bd98ebf, made it possible to see from
pg_stat_activity when a backend was stuck waiting for another backend,
but it's also fairly common for a backend to be stuck waiting for an
I/O.  Add wait events for those operations, too.

Rushabh Lathia, with further hacking by me.  Reviewed and tested by
Michael Paquier, Amit Kapila, Rajkumar Raghuwanshi, and Rahila Syed.

Discussion: http://postgr.es/m/CAGPqQf0LsYHXREPAZqYGVkDqHSyjf=KsD=k0GTVPAuzyThh-VQ@mail.gmail.com
This commit is contained in:
Robert Haas
2017-03-18 07:43:01 -04:00
parent 928250aef5
commit 249cf070e3
21 changed files with 782 additions and 29 deletions

View File

@@ -119,6 +119,8 @@
#include "lib/ilist.h"
#include "pgstat.h"
#include "replication/logical.h"
#include "replication/slot.h"
@@ -916,7 +918,8 @@ logical_heap_rewrite_flush_mappings(RewriteState state)
* Note that we deviate from the usual WAL coding practices here,
* check the above "Logical rewrite support" comment for reasoning.
*/
written = FileWrite(src->vfd, waldata_start, len);
written = FileWrite(src->vfd, waldata_start, len,
WAIT_EVENT_LOGICAL_REWRITE_WRITE);
if (written != len)
ereport(ERROR,
(errcode_for_file_access(),
@@ -957,7 +960,7 @@ logical_end_heap_rewrite(RewriteState state)
hash_seq_init(&seq_status, state->rs_logical_mappings);
while ((src = (RewriteMappingFile *) hash_seq_search(&seq_status)) != NULL)
{
if (FileSync(src->vfd) != 0)
if (FileSync(src->vfd, WAIT_EVENT_LOGICAL_REWRITE_SYNC) != 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not fsync file \"%s\": %m", src->path)));
@@ -1141,11 +1144,13 @@ heap_xlog_logical_rewrite(XLogReaderState *r)
* Truncate all data that's not guaranteed to have been safely fsynced (by
* previous record or by the last checkpoint).
*/
pgstat_report_wait_start(WAIT_EVENT_LOGICAL_REWRITE_TRUNCATE);
if (ftruncate(fd, xlrec->offset) != 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not truncate file \"%s\" to %u: %m",
path, (uint32) xlrec->offset)));
pgstat_report_wait_end();
/* now seek to the position we want to write our data to */
if (lseek(fd, xlrec->offset, SEEK_SET) != xlrec->offset)
@@ -1159,20 +1164,24 @@ heap_xlog_logical_rewrite(XLogReaderState *r)
len = xlrec->num_mappings * sizeof(LogicalRewriteMappingData);
/* write out tail end of mapping file (again) */
pgstat_report_wait_start(WAIT_EVENT_LOGICAL_REWRITE_MAPPING_WRITE);
if (write(fd, data, len) != len)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not write to file \"%s\": %m", path)));
pgstat_report_wait_end();
/*
* Now fsync all previously written data. We could improve things and only
* do this for the last write to a file, but the required bookkeeping
* doesn't seem worth the trouble.
*/
pgstat_report_wait_start(WAIT_EVENT_LOGICAL_REWRITE_MAPPING_SYNC);
if (pg_fsync(fd) != 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not fsync file \"%s\": %m", path)));
pgstat_report_wait_end();
CloseTransientFile(fd);
}
@@ -1266,10 +1275,12 @@ CheckPointLogicalRewriteHeap(void)
* changed or have only been created since the checkpoint's start,
* but it's currently not deemed worth the effort.
*/
else if (pg_fsync(fd) != 0)
pgstat_report_wait_start(WAIT_EVENT_LOGICAL_REWRITE_CHECKPOINT_SYNC);
if (pg_fsync(fd) != 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not fsync file \"%s\": %m", path)));
pgstat_report_wait_end();
CloseTransientFile(fd);
}
}

View File

@@ -54,6 +54,7 @@
#include "access/slru.h"
#include "access/transam.h"
#include "access/xlog.h"
#include "pgstat.h"
#include "storage/fd.h"
#include "storage/shmem.h"
#include "miscadmin.h"
@@ -675,13 +676,16 @@ SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno)
}
errno = 0;
pgstat_report_wait_start(WAIT_EVENT_SLRU_READ);
if (read(fd, shared->page_buffer[slotno], BLCKSZ) != BLCKSZ)
{
pgstat_report_wait_end();
slru_errcause = SLRU_READ_FAILED;
slru_errno = errno;
CloseTransientFile(fd);
return false;
}
pgstat_report_wait_end();
if (CloseTransientFile(fd))
{
@@ -834,8 +838,10 @@ SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruFlush fdata)
}
errno = 0;
pgstat_report_wait_start(WAIT_EVENT_SLRU_WRITE);
if (write(fd, shared->page_buffer[slotno], BLCKSZ) != BLCKSZ)
{
pgstat_report_wait_end();
/* if write didn't set errno, assume problem is no disk space */
if (errno == 0)
errno = ENOSPC;
@@ -845,6 +851,7 @@ SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruFlush fdata)
CloseTransientFile(fd);
return false;
}
pgstat_report_wait_end();
/*
* If not part of Flush, need to fsync now. We assume this happens
@@ -852,13 +859,16 @@ SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruFlush fdata)
*/
if (!fdata)
{
pgstat_report_wait_start(WAIT_EVENT_SLRU_SYNC);
if (ctl->do_fsync && pg_fsync(fd))
{
pgstat_report_wait_end();
slru_errcause = SLRU_FSYNC_FAILED;
slru_errno = errno;
CloseTransientFile(fd);
return false;
}
pgstat_report_wait_end();
if (CloseTransientFile(fd))
{
@@ -1126,6 +1136,7 @@ SimpleLruFlush(SlruCtl ctl, bool allow_redirtied)
ok = true;
for (i = 0; i < fdata.num_files; i++)
{
pgstat_report_wait_start(WAIT_EVENT_SLRU_FLUSH_SYNC);
if (ctl->do_fsync && pg_fsync(fdata.fd[i]))
{
slru_errcause = SLRU_FSYNC_FAILED;
@@ -1133,6 +1144,7 @@ SimpleLruFlush(SlruCtl ctl, bool allow_redirtied)
pageno = fdata.segno[i] * SLRU_PAGES_PER_SEGMENT;
ok = false;
}
pgstat_report_wait_end();
if (CloseTransientFile(fdata.fd[i]))
{

View File

@@ -38,6 +38,7 @@
#include "access/xlog.h"
#include "access/xlog_internal.h"
#include "access/xlogdefs.h"
#include "pgstat.h"
#include "storage/fd.h"
/*
@@ -338,7 +339,9 @@ writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
for (;;)
{
errno = 0;
pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_READ);
nbytes = (int) read(srcfd, buffer, sizeof(buffer));
pgstat_report_wait_end();
if (nbytes < 0 || errno != 0)
ereport(ERROR,
(errcode_for_file_access(),
@@ -346,6 +349,7 @@ writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
if (nbytes == 0)
break;
errno = 0;
pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_WRITE);
if ((int) write(fd, buffer, nbytes) != nbytes)
{
int save_errno = errno;
@@ -365,6 +369,7 @@ writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
(errcode_for_file_access(),
errmsg("could not write to file \"%s\": %m", tmppath)));
}
pgstat_report_wait_end();
}
CloseTransientFile(srcfd);
}
@@ -400,10 +405,12 @@ writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
errmsg("could not write to file \"%s\": %m", tmppath)));
}
pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_SYNC);
if (pg_fsync(fd) != 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not fsync file \"%s\": %m", tmppath)));
pgstat_report_wait_end();
if (CloseTransientFile(fd))
ereport(ERROR,
@@ -460,6 +467,7 @@ writeTimeLineHistoryFile(TimeLineID tli, char *content, int size)
errmsg("could not create file \"%s\": %m", tmppath)));
errno = 0;
pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_FILE_WRITE);
if ((int) write(fd, content, size) != size)
{
int save_errno = errno;
@@ -475,11 +483,14 @@ writeTimeLineHistoryFile(TimeLineID tli, char *content, int size)
(errcode_for_file_access(),
errmsg("could not write to file \"%s\": %m", tmppath)));
}
pgstat_report_wait_end();
pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_FILE_SYNC);
if (pg_fsync(fd) != 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not fsync file \"%s\": %m", tmppath)));
pgstat_report_wait_end();
if (CloseTransientFile(fd))
ereport(ERROR,

View File

@@ -1200,8 +1200,10 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
*/
buf = (char *) palloc(stat.st_size);
pgstat_report_wait_start(WAIT_EVENT_TWOPHASE_FILE_READ);
if (read(fd, buf, stat.st_size) != stat.st_size)
{
pgstat_report_wait_end();
CloseTransientFile(fd);
if (give_warnings)
ereport(WARNING,
@@ -1212,6 +1214,7 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
return NULL;
}
pgstat_report_wait_end();
CloseTransientFile(fd);
hdr = (TwoPhaseFileHeader *) buf;
@@ -1542,8 +1545,10 @@ RecreateTwoPhaseFile(TransactionId xid, void *content, int len)
path)));
/* Write content and CRC */
pgstat_report_wait_start(WAIT_EVENT_TWOPHASE_FILE_WRITE);
if (write(fd, content, len) != len)
{
pgstat_report_wait_end();
CloseTransientFile(fd);
ereport(ERROR,
(errcode_for_file_access(),
@@ -1551,16 +1556,19 @@ RecreateTwoPhaseFile(TransactionId xid, void *content, int len)
}
if (write(fd, &statefile_crc, sizeof(pg_crc32c)) != sizeof(pg_crc32c))
{
pgstat_report_wait_end();
CloseTransientFile(fd);
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not write two-phase state file: %m")));
}
pgstat_report_wait_end();
/*
* We must fsync the file because the end-of-replay checkpoint will not do
* so, there being no GXACT in shared memory yet to tell it to.
*/
pgstat_report_wait_start(WAIT_EVENT_TWOPHASE_FILE_SYNC);
if (pg_fsync(fd) != 0)
{
CloseTransientFile(fd);
@@ -1568,6 +1576,7 @@ RecreateTwoPhaseFile(TransactionId xid, void *content, int len)
(errcode_for_file_access(),
errmsg("could not fsync two-phase state file: %m")));
}
pgstat_report_wait_end();
if (CloseTransientFile(fd) != 0)
ereport(ERROR,

View File

@@ -2456,7 +2456,9 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
do
{
errno = 0;
pgstat_report_wait_start(WAIT_EVENT_WAL_WRITE);
written = write(openLogFile, from, nleft);
pgstat_report_wait_end();
if (written <= 0)
{
if (errno == EINTR)
@@ -3207,6 +3209,7 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock)
for (nbytes = 0; nbytes < XLogSegSize; nbytes += XLOG_BLCKSZ)
{
errno = 0;
pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_WRITE);
if ((int) write(fd, zbuffer, XLOG_BLCKSZ) != (int) XLOG_BLCKSZ)
{
int save_errno = errno;
@@ -3225,8 +3228,10 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock)
(errcode_for_file_access(),
errmsg("could not write to file \"%s\": %m", tmppath)));
}
pgstat_report_wait_end();
}
pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_SYNC);
if (pg_fsync(fd) != 0)
{
close(fd);
@@ -3234,6 +3239,7 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock)
(errcode_for_file_access(),
errmsg("could not fsync file \"%s\": %m", tmppath)));
}
pgstat_report_wait_end();
if (close(fd))
ereport(ERROR,
@@ -3360,6 +3366,7 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno,
if (nread > sizeof(buffer))
nread = sizeof(buffer);
errno = 0;
pgstat_report_wait_start(WAIT_EVENT_WAL_COPY_READ);
if (read(srcfd, buffer, nread) != nread)
{
if (errno != 0)
@@ -3372,8 +3379,10 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno,
(errmsg("not enough data in file \"%s\"",
path)));
}
pgstat_report_wait_end();
}
errno = 0;
pgstat_report_wait_start(WAIT_EVENT_WAL_COPY_WRITE);
if ((int) write(fd, buffer, sizeof(buffer)) != (int) sizeof(buffer))
{
int save_errno = errno;
@@ -3389,12 +3398,15 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno,
(errcode_for_file_access(),
errmsg("could not write to file \"%s\": %m", tmppath)));
}
pgstat_report_wait_end();
}
pgstat_report_wait_start(WAIT_EVENT_WAL_COPY_SYNC);
if (pg_fsync(fd) != 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not fsync file \"%s\": %m", tmppath)));
pgstat_report_wait_end();
if (CloseTransientFile(fd))
ereport(ERROR,
@@ -4414,6 +4426,7 @@ WriteControlFile(void)
XLOG_CONTROL_FILE)));
errno = 0;
pgstat_report_wait_start(WAIT_EVENT_CONTROL_FILE_WRITE);
if (write(fd, buffer, PG_CONTROL_SIZE) != PG_CONTROL_SIZE)
{
/* if write didn't set errno, assume problem is no disk space */
@@ -4423,11 +4436,14 @@ WriteControlFile(void)
(errcode_for_file_access(),
errmsg("could not write to control file: %m")));
}
pgstat_report_wait_end();
pgstat_report_wait_start(WAIT_EVENT_CONTROL_FILE_SYNC);
if (pg_fsync(fd) != 0)
ereport(PANIC,
(errcode_for_file_access(),
errmsg("could not fsync control file: %m")));
pgstat_report_wait_end();
if (close(fd))
ereport(PANIC,
@@ -4453,10 +4469,12 @@ ReadControlFile(void)
errmsg("could not open control file \"%s\": %m",
XLOG_CONTROL_FILE)));
pgstat_report_wait_start(WAIT_EVENT_CONTROL_FILE_READ);
if (read(fd, ControlFile, sizeof(ControlFileData)) != sizeof(ControlFileData))
ereport(PANIC,
(errcode_for_file_access(),
errmsg("could not read from control file: %m")));
pgstat_report_wait_end();
close(fd);
@@ -4634,6 +4652,7 @@ UpdateControlFile(void)
XLOG_CONTROL_FILE)));
errno = 0;
pgstat_report_wait_start(WAIT_EVENT_CONTROL_FILE_WRITE_UPDATE);
if (write(fd, ControlFile, sizeof(ControlFileData)) != sizeof(ControlFileData))
{
/* if write didn't set errno, assume problem is no disk space */
@@ -4643,11 +4662,14 @@ UpdateControlFile(void)
(errcode_for_file_access(),
errmsg("could not write to control file: %m")));
}
pgstat_report_wait_end();
pgstat_report_wait_start(WAIT_EVENT_CONTROL_FILE_SYNC_UPDATE);
if (pg_fsync(fd) != 0)
ereport(PANIC,
(errcode_for_file_access(),
errmsg("could not fsync control file: %m")));
pgstat_report_wait_end();
if (close(fd))
ereport(PANIC,
@@ -5036,6 +5058,7 @@ BootStrapXLOG(void)
/* Write the first page with the initial record */
errno = 0;
pgstat_report_wait_start(WAIT_EVENT_WAL_BOOTSTRAP_WRITE);
if (write(openLogFile, page, XLOG_BLCKSZ) != XLOG_BLCKSZ)
{
/* if write didn't set errno, assume problem is no disk space */
@@ -5045,11 +5068,14 @@ BootStrapXLOG(void)
(errcode_for_file_access(),
errmsg("could not write bootstrap transaction log file: %m")));
}
pgstat_report_wait_end();
pgstat_report_wait_start(WAIT_EVENT_WAL_BOOTSTRAP_SYNC);
if (pg_fsync(openLogFile) != 0)
ereport(PANIC,
(errcode_for_file_access(),
errmsg("could not fsync bootstrap transaction log file: %m")));
pgstat_report_wait_end();
if (close(openLogFile))
ereport(PANIC,
@@ -9999,11 +10025,13 @@ assign_xlog_sync_method(int new_sync_method, void *extra)
*/
if (openLogFile >= 0)
{
pgstat_report_wait_start(WAIT_EVENT_WAL_SYNC_METHOD_ASSIGN);
if (pg_fsync(openLogFile) != 0)
ereport(PANIC,
(errcode_for_file_access(),
errmsg("could not fsync log segment %s: %m",
XLogFileNameP(ThisTimeLineID, openLogSegNo))));
pgstat_report_wait_end();
if (get_sync_bit(sync_method) != get_sync_bit(new_sync_method))
XLogFileClose();
}
@@ -11456,10 +11484,12 @@ retry:
goto next_record_is_invalid;
}
pgstat_report_wait_start(WAIT_EVENT_WAL_READ);
if (read(readFile, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
{
char fname[MAXFNAMELEN];
pgstat_report_wait_end();
XLogFileName(fname, curFileTLI, readSegNo);
ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
(errcode_for_file_access(),
@@ -11467,6 +11497,7 @@ retry:
fname, readOff)));
goto next_record_is_invalid;
}
pgstat_report_wait_end();
Assert(targetSegNo == readSegNo);
Assert(targetPageOff == readOff);

View File

@@ -24,6 +24,7 @@
#include "access/xlogutils.h"
#include "catalog/catalog.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "storage/smgr.h"
#include "utils/guc.h"
#include "utils/hsearch.h"
@@ -728,7 +729,9 @@ XLogRead(char *buf, TimeLineID tli, XLogRecPtr startptr, Size count)
else
segbytes = nbytes;
pgstat_report_wait_start(WAIT_EVENT_WAL_READ);
readbytes = read(sendFile, p, segbytes);
pgstat_report_wait_end();
if (readbytes <= 0)
{
char path[MAXPGPATH];