1
0
mirror of https://github.com/postgres/postgres.git synced 2025-09-02 04:21:28 +03:00

Track IO times in pg_stat_io

a9c70b46db and 8aaa04b32S added counting of IO operations to a new view,
pg_stat_io. Now, add IO timing for reads, writes, extends, and fsyncs to
pg_stat_io as well.

This combines the tracking for pgBufferUsage with the tracking for pg_stat_io
into a new function pgstat_count_io_op_time(). This should make it a bit
easier to avoid the somewhat costly instr_time conversion done for
pgBufferUsage.

Author: Melanie Plageman <melanieplageman@gmail.com>
Reviewed-by: Andres Freund <andres@anarazel.de>
Reviewed-by: Bertrand Drouvot <bertranddrouvot.pg@gmail.com>
Discussion: https://postgr.es/m/flat/CAAKRu_ay5iKmnbXZ3DsauViF3eMxu4m1oNnJXqV_HyqYeg55Ww%40mail.gmail.com
This commit is contained in:
Andres Freund
2023-04-07 16:05:26 -07:00
parent 1c453cfd89
commit ac8d53dae5
11 changed files with 276 additions and 108 deletions

View File

@@ -1112,23 +1112,12 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
MemSet((char *) bufBlock, 0, BLCKSZ);
else
{
instr_time io_start,
io_time;
if (track_io_timing)
INSTR_TIME_SET_CURRENT(io_start);
instr_time io_start = pgstat_prepare_io_time();
smgrread(smgr, forkNum, blockNum, bufBlock);
if (track_io_timing)
{
INSTR_TIME_SET_CURRENT(io_time);
INSTR_TIME_SUBTRACT(io_time, io_start);
pgstat_count_buffer_read_time(INSTR_TIME_GET_MICROSEC(io_time));
INSTR_TIME_ADD(pgBufferUsage.blk_read_time, io_time);
}
pgstat_count_io_op(io_object, io_context, IOOP_READ);
pgstat_count_io_op_time(io_object, io_context,
IOOP_READ, io_start, 1);
/* check for garbage data */
if (!PageIsVerifiedExtended((Page) bufBlock, blockNum,
@@ -1837,6 +1826,7 @@ ExtendBufferedRelShared(ExtendBufferedWhat eb,
{
BlockNumber first_block;
IOContext io_context = IOContextForStrategy(strategy);
instr_time io_start;
LimitAdditionalPins(&extend_by);
@@ -2044,6 +2034,8 @@ ExtendBufferedRelShared(ExtendBufferedWhat eb,
}
}
io_start = pgstat_prepare_io_time();
/*
* Note: if smgzerorextend fails, we will end up with buffers that are
* allocated but not marked BM_VALID. The next relation extension will
@@ -2066,6 +2058,9 @@ ExtendBufferedRelShared(ExtendBufferedWhat eb,
if (!(flags & EB_SKIP_EXTENSION_LOCK))
UnlockRelationForExtension(eb.rel, ExclusiveLock);
pgstat_count_io_op_time(IOOBJECT_RELATION, io_context, IOOP_EXTEND,
io_start, extend_by);
/* Set BM_VALID, terminate IO, and wake up any waiters */
for (int i = 0; i < extend_by; i++)
{
@@ -2089,8 +2084,6 @@ ExtendBufferedRelShared(ExtendBufferedWhat eb,
}
pgBufferUsage.shared_blks_written += extend_by;
pgstat_count_io_op_n(IOOBJECT_RELATION, io_context, IOOP_EXTEND,
extend_by);
*extended_by = extend_by;
@@ -3344,8 +3337,7 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object,
{
XLogRecPtr recptr;
ErrorContextCallback errcallback;
instr_time io_start,
io_time;
instr_time io_start;
Block bufBlock;
char *bufToWrite;
uint32 buf_state;
@@ -3420,10 +3412,7 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object,
*/
bufToWrite = PageSetChecksumCopy((Page) bufBlock, buf->tag.blockNum);
if (track_io_timing)
INSTR_TIME_SET_CURRENT(io_start);
else
INSTR_TIME_SET_ZERO(io_start);
io_start = pgstat_prepare_io_time();
/*
* bufToWrite is either the shared buffer or a copy, as appropriate.
@@ -3452,15 +3441,8 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object,
* When a strategy is not in use, the write can only be a "regular" write
* of a dirty shared buffer (IOCONTEXT_NORMAL IOOP_WRITE).
*/
pgstat_count_io_op(IOOBJECT_RELATION, io_context, IOOP_WRITE);
if (track_io_timing)
{
INSTR_TIME_SET_CURRENT(io_time);
INSTR_TIME_SUBTRACT(io_time, io_start);
pgstat_count_buffer_write_time(INSTR_TIME_GET_MICROSEC(io_time));
INSTR_TIME_ADD(pgBufferUsage.blk_write_time, io_time);
}
pgstat_count_io_op_time(IOOBJECT_RELATION, io_context,
IOOP_WRITE, io_start, 1);
pgBufferUsage.shared_blks_written++;
@@ -4062,14 +4044,13 @@ FlushRelationBuffers(Relation rel)
{
int i;
BufferDesc *bufHdr;
instr_time io_start,
io_time;
if (RelationUsesLocalBuffers(rel))
{
for (i = 0; i < NLocBuffer; i++)
{
uint32 buf_state;
instr_time io_start;
bufHdr = GetLocalBufferDescriptor(i);
if (BufTagMatchesRelFileLocator(&bufHdr->tag, &rel->rd_locator) &&
@@ -4089,10 +4070,7 @@ FlushRelationBuffers(Relation rel)
PageSetChecksumInplace(localpage, bufHdr->tag.blockNum);
if (track_io_timing)
INSTR_TIME_SET_CURRENT(io_start);
else
INSTR_TIME_SET_ZERO(io_start);
io_start = pgstat_prepare_io_time();
smgrwrite(RelationGetSmgr(rel),
BufTagGetForkNum(&bufHdr->tag),
@@ -4100,19 +4078,13 @@ FlushRelationBuffers(Relation rel)
localpage,
false);
pgstat_count_io_op_time(IOOBJECT_TEMP_RELATION,
IOCONTEXT_NORMAL, IOOP_WRITE,
io_start, 1);
buf_state &= ~(BM_DIRTY | BM_JUST_DIRTIED);
pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
pgstat_count_io_op(IOOBJECT_TEMP_RELATION, IOCONTEXT_NORMAL, IOOP_WRITE);
if (track_io_timing)
{
INSTR_TIME_SET_CURRENT(io_time);
INSTR_TIME_SUBTRACT(io_time, io_start);
pgstat_count_buffer_write_time(INSTR_TIME_GET_MICROSEC(io_time));
INSTR_TIME_ADD(pgBufferUsage.blk_write_time, io_time);
}
pgBufferUsage.local_blks_written++;
/* Pop the error context stack */

View File

@@ -176,8 +176,6 @@ GetLocalVictimBuffer(void)
int trycounter;
uint32 buf_state;
BufferDesc *bufHdr;
instr_time io_start,
io_time;
ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
@@ -233,6 +231,7 @@ GetLocalVictimBuffer(void)
*/
if (buf_state & BM_DIRTY)
{
instr_time io_start;
SMgrRelation oreln;
Page localpage = (char *) LocalBufHdrGetBlock(bufHdr);
@@ -241,10 +240,7 @@ GetLocalVictimBuffer(void)
PageSetChecksumInplace(localpage, bufHdr->tag.blockNum);
if (track_io_timing)
INSTR_TIME_SET_CURRENT(io_start);
else
INSTR_TIME_SET_ZERO(io_start);
io_start = pgstat_prepare_io_time();
/* And write... */
smgrwrite(oreln,
@@ -253,21 +249,14 @@ GetLocalVictimBuffer(void)
localpage,
false);
/* Temporary table I/O does not use Buffer Access Strategies */
pgstat_count_io_op_time(IOOBJECT_TEMP_RELATION, IOCONTEXT_NORMAL,
IOOP_WRITE, io_start, 1);
/* Mark not-dirty now in case we error out below */
buf_state &= ~BM_DIRTY;
pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
/* Temporary table I/O does not use Buffer Access Strategies */
pgstat_count_io_op(IOOBJECT_TEMP_RELATION, IOCONTEXT_NORMAL, IOOP_WRITE);
if (track_io_timing)
{
INSTR_TIME_SET_CURRENT(io_time);
INSTR_TIME_SUBTRACT(io_time, io_start);
pgstat_count_buffer_write_time(INSTR_TIME_GET_MICROSEC(io_time));
INSTR_TIME_ADD(pgBufferUsage.blk_write_time, io_time);
}
pgBufferUsage.local_blks_written++;
}
@@ -325,6 +314,7 @@ ExtendBufferedRelLocal(ExtendBufferedWhat eb,
uint32 *extended_by)
{
BlockNumber first_block;
instr_time io_start;
/* Initialize local buffers if first request in this session */
if (LocalBufHash == NULL)
@@ -415,9 +405,14 @@ ExtendBufferedRelLocal(ExtendBufferedWhat eb,
}
}
io_start = pgstat_prepare_io_time();
/* actually extend relation */
smgrzeroextend(eb.smgr, fork, first_block, extend_by, false);
pgstat_count_io_op_time(IOOBJECT_TEMP_RELATION, IOCONTEXT_NORMAL, IOOP_EXTEND,
io_start, extend_by);
for (int i = 0; i < extend_by; i++)
{
Buffer buf = buffers[i];
@@ -434,8 +429,6 @@ ExtendBufferedRelLocal(ExtendBufferedWhat eb,
*extended_by = extend_by;
pgBufferUsage.temp_blks_written += extend_by;
pgstat_count_io_op_n(IOOBJECT_TEMP_RELATION, IOCONTEXT_NORMAL, IOOP_EXTEND,
extend_by);
return first_block;
}

View File

@@ -1138,6 +1138,19 @@ register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
if (!RegisterSyncRequest(&tag, SYNC_REQUEST, false /* retryOnError */ ))
{
instr_time io_start;
ereport(DEBUG1,
(errmsg_internal("could not forward fsync request because request queue is full")));
io_start = pgstat_prepare_io_time();
if (FileSync(seg->mdfd_vfd, WAIT_EVENT_DATA_FILE_SYNC) < 0)
ereport(data_sync_elevel(ERROR),
(errcode_for_file_access(),
errmsg("could not fsync file \"%s\": %m",
FilePathName(seg->mdfd_vfd))));
/*
* We have no way of knowing if the current IOContext is
* IOCONTEXT_NORMAL or IOCONTEXT_[BULKREAD, BULKWRITE, VACUUM] at this
@@ -1149,16 +1162,8 @@ register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
* IOCONTEXT_NORMAL is likely clearer when investigating the number of
* backend fsyncs.
*/
pgstat_count_io_op(IOOBJECT_RELATION, IOCONTEXT_NORMAL, IOOP_FSYNC);
ereport(DEBUG1,
(errmsg_internal("could not forward fsync request because request queue is full")));
if (FileSync(seg->mdfd_vfd, WAIT_EVENT_DATA_FILE_SYNC) < 0)
ereport(data_sync_elevel(ERROR),
(errcode_for_file_access(),
errmsg("could not fsync file \"%s\": %m",
FilePathName(seg->mdfd_vfd))));
pgstat_count_io_op_time(IOOBJECT_RELATION, IOCONTEXT_NORMAL,
IOOP_FSYNC, io_start, 1);
}
}
@@ -1508,6 +1513,7 @@ mdsyncfiletag(const FileTag *ftag, char *path)
{
SMgrRelation reln = smgropen(ftag->rlocator, InvalidBackendId);
File file;
instr_time io_start;
bool need_to_close;
int result,
save_errno;
@@ -1533,6 +1539,8 @@ mdsyncfiletag(const FileTag *ftag, char *path)
need_to_close = true;
}
io_start = pgstat_prepare_io_time();
/* Sync the file. */
result = FileSync(file, WAIT_EVENT_DATA_FILE_SYNC);
save_errno = errno;
@@ -1540,7 +1548,8 @@ mdsyncfiletag(const FileTag *ftag, char *path)
if (need_to_close)
FileClose(file);
pgstat_count_io_op(IOOBJECT_RELATION, IOCONTEXT_NORMAL, IOOP_FSYNC);
pgstat_count_io_op_time(IOOBJECT_RELATION, IOCONTEXT_NORMAL,
IOOP_FSYNC, io_start, 1);
errno = save_errno;
return result;