mirror of
https://github.com/postgres/postgres.git
synced 2025-04-25 21:42:33 +03:00
Track IO times in pg_stat_io
a9c70b46dbe and 8aaa04b32S added counting of IO operations to a new view, pg_stat_io. Now, add IO timing for reads, writes, extends, and fsyncs to pg_stat_io as well. This combines the tracking for pgBufferUsage with the tracking for pg_stat_io into a new function pgstat_count_io_op_time(). This should make it a bit easier to avoid the somewhat costly instr_time conversion done for pgBufferUsage. Author: Melanie Plageman <melanieplageman@gmail.com> Reviewed-by: Andres Freund <andres@anarazel.de> Reviewed-by: Bertrand Drouvot <bertranddrouvot.pg@gmail.com> Discussion: https://postgr.es/m/flat/CAAKRu_ay5iKmnbXZ3DsauViF3eMxu4m1oNnJXqV_HyqYeg55Ww%40mail.gmail.com
This commit is contained in:
parent
1c453cfd89
commit
ac8d53dae5
@ -3814,6 +3814,18 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
|
|||||||
</entry>
|
</entry>
|
||||||
</row>
|
</row>
|
||||||
|
|
||||||
|
<row>
|
||||||
|
<entry role="catalog_table_entry">
|
||||||
|
<para role="column_definition">
|
||||||
|
<structfield>read_time</structfield> <type>double precision</type>
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Time spent in read operations in milliseconds (if
|
||||||
|
<xref linkend="guc-track-io-timing"/> is enabled, otherwise zero)
|
||||||
|
</para>
|
||||||
|
</entry>
|
||||||
|
</row>
|
||||||
|
|
||||||
<row>
|
<row>
|
||||||
<entry role="catalog_table_entry">
|
<entry role="catalog_table_entry">
|
||||||
<para role="column_definition">
|
<para role="column_definition">
|
||||||
@ -3826,6 +3838,18 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
|
|||||||
</entry>
|
</entry>
|
||||||
</row>
|
</row>
|
||||||
|
|
||||||
|
<row>
|
||||||
|
<entry role="catalog_table_entry">
|
||||||
|
<para role="column_definition">
|
||||||
|
<structfield>write_time</structfield> <type>double precision</type>
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Time spent in write operations in milliseconds (if
|
||||||
|
<xref linkend="guc-track-io-timing"/> is enabled, otherwise zero)
|
||||||
|
</para>
|
||||||
|
</entry>
|
||||||
|
</row>
|
||||||
|
|
||||||
<row>
|
<row>
|
||||||
<entry role="catalog_table_entry">
|
<entry role="catalog_table_entry">
|
||||||
<para role="column_definition">
|
<para role="column_definition">
|
||||||
@ -3838,6 +3862,18 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
|
|||||||
</entry>
|
</entry>
|
||||||
</row>
|
</row>
|
||||||
|
|
||||||
|
<row>
|
||||||
|
<entry role="catalog_table_entry">
|
||||||
|
<para role="column_definition">
|
||||||
|
<structfield>extend_time</structfield> <type>double precision</type>
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Time spent in extend operations in milliseconds (if
|
||||||
|
<xref linkend="guc-track-io-timing"/> is enabled, otherwise zero)
|
||||||
|
</para>
|
||||||
|
</entry>
|
||||||
|
</row>
|
||||||
|
|
||||||
<row>
|
<row>
|
||||||
<entry role="catalog_table_entry">
|
<entry role="catalog_table_entry">
|
||||||
<para role="column_definition">
|
<para role="column_definition">
|
||||||
@ -3913,6 +3949,18 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
|
|||||||
</entry>
|
</entry>
|
||||||
</row>
|
</row>
|
||||||
|
|
||||||
|
<row>
|
||||||
|
<entry role="catalog_table_entry">
|
||||||
|
<para role="column_definition">
|
||||||
|
<structfield>fsync_time</structfield> <type>double precision</type>
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Time spent in fsync operations in milliseconds (if
|
||||||
|
<xref linkend="guc-track-io-timing"/> is enabled, otherwise zero)
|
||||||
|
</para>
|
||||||
|
</entry>
|
||||||
|
</row>
|
||||||
|
|
||||||
<row>
|
<row>
|
||||||
<entry role="catalog_table_entry">
|
<entry role="catalog_table_entry">
|
||||||
<para role="column_definition">
|
<para role="column_definition">
|
||||||
@ -3978,6 +4026,17 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
|
|||||||
</itemizedlist>
|
</itemizedlist>
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
|
<note>
|
||||||
|
<para>
|
||||||
|
Columns tracking I/O time will only be non-zero when
|
||||||
|
<xref linkend="guc-track-io-timing"/> is enabled. The user should be
|
||||||
|
careful when referencing these columns in combination with their
|
||||||
|
corresponding IO operations in case <varname>track_io_timing</varname>
|
||||||
|
was not enabled for the entire time since the last stats reset.
|
||||||
|
</para>
|
||||||
|
</note>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
</sect2>
|
</sect2>
|
||||||
|
|
||||||
|
@ -1125,13 +1125,17 @@ SELECT
|
|||||||
b.io_object,
|
b.io_object,
|
||||||
b.io_context,
|
b.io_context,
|
||||||
b.reads,
|
b.reads,
|
||||||
|
b.read_time,
|
||||||
b.writes,
|
b.writes,
|
||||||
|
b.write_time,
|
||||||
b.extends,
|
b.extends,
|
||||||
|
b.extend_time,
|
||||||
b.op_bytes,
|
b.op_bytes,
|
||||||
b.hits,
|
b.hits,
|
||||||
b.evictions,
|
b.evictions,
|
||||||
b.reuses,
|
b.reuses,
|
||||||
b.fsyncs,
|
b.fsyncs,
|
||||||
|
b.fsync_time,
|
||||||
b.stats_reset
|
b.stats_reset
|
||||||
FROM pg_stat_get_io() b;
|
FROM pg_stat_get_io() b;
|
||||||
|
|
||||||
|
@ -1112,23 +1112,12 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
|
|||||||
MemSet((char *) bufBlock, 0, BLCKSZ);
|
MemSet((char *) bufBlock, 0, BLCKSZ);
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
instr_time io_start,
|
instr_time io_start = pgstat_prepare_io_time();
|
||||||
io_time;
|
|
||||||
|
|
||||||
if (track_io_timing)
|
|
||||||
INSTR_TIME_SET_CURRENT(io_start);
|
|
||||||
|
|
||||||
smgrread(smgr, forkNum, blockNum, bufBlock);
|
smgrread(smgr, forkNum, blockNum, bufBlock);
|
||||||
|
|
||||||
if (track_io_timing)
|
pgstat_count_io_op_time(io_object, io_context,
|
||||||
{
|
IOOP_READ, io_start, 1);
|
||||||
INSTR_TIME_SET_CURRENT(io_time);
|
|
||||||
INSTR_TIME_SUBTRACT(io_time, io_start);
|
|
||||||
pgstat_count_buffer_read_time(INSTR_TIME_GET_MICROSEC(io_time));
|
|
||||||
INSTR_TIME_ADD(pgBufferUsage.blk_read_time, io_time);
|
|
||||||
}
|
|
||||||
|
|
||||||
pgstat_count_io_op(io_object, io_context, IOOP_READ);
|
|
||||||
|
|
||||||
/* check for garbage data */
|
/* check for garbage data */
|
||||||
if (!PageIsVerifiedExtended((Page) bufBlock, blockNum,
|
if (!PageIsVerifiedExtended((Page) bufBlock, blockNum,
|
||||||
@ -1837,6 +1826,7 @@ ExtendBufferedRelShared(ExtendBufferedWhat eb,
|
|||||||
{
|
{
|
||||||
BlockNumber first_block;
|
BlockNumber first_block;
|
||||||
IOContext io_context = IOContextForStrategy(strategy);
|
IOContext io_context = IOContextForStrategy(strategy);
|
||||||
|
instr_time io_start;
|
||||||
|
|
||||||
LimitAdditionalPins(&extend_by);
|
LimitAdditionalPins(&extend_by);
|
||||||
|
|
||||||
@ -2044,6 +2034,8 @@ ExtendBufferedRelShared(ExtendBufferedWhat eb,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
io_start = pgstat_prepare_io_time();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Note: if smgzerorextend fails, we will end up with buffers that are
|
* Note: if smgzerorextend fails, we will end up with buffers that are
|
||||||
* allocated but not marked BM_VALID. The next relation extension will
|
* allocated but not marked BM_VALID. The next relation extension will
|
||||||
@ -2066,6 +2058,9 @@ ExtendBufferedRelShared(ExtendBufferedWhat eb,
|
|||||||
if (!(flags & EB_SKIP_EXTENSION_LOCK))
|
if (!(flags & EB_SKIP_EXTENSION_LOCK))
|
||||||
UnlockRelationForExtension(eb.rel, ExclusiveLock);
|
UnlockRelationForExtension(eb.rel, ExclusiveLock);
|
||||||
|
|
||||||
|
pgstat_count_io_op_time(IOOBJECT_RELATION, io_context, IOOP_EXTEND,
|
||||||
|
io_start, extend_by);
|
||||||
|
|
||||||
/* Set BM_VALID, terminate IO, and wake up any waiters */
|
/* Set BM_VALID, terminate IO, and wake up any waiters */
|
||||||
for (int i = 0; i < extend_by; i++)
|
for (int i = 0; i < extend_by; i++)
|
||||||
{
|
{
|
||||||
@ -2089,8 +2084,6 @@ ExtendBufferedRelShared(ExtendBufferedWhat eb,
|
|||||||
}
|
}
|
||||||
|
|
||||||
pgBufferUsage.shared_blks_written += extend_by;
|
pgBufferUsage.shared_blks_written += extend_by;
|
||||||
pgstat_count_io_op_n(IOOBJECT_RELATION, io_context, IOOP_EXTEND,
|
|
||||||
extend_by);
|
|
||||||
|
|
||||||
*extended_by = extend_by;
|
*extended_by = extend_by;
|
||||||
|
|
||||||
@ -3344,8 +3337,7 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object,
|
|||||||
{
|
{
|
||||||
XLogRecPtr recptr;
|
XLogRecPtr recptr;
|
||||||
ErrorContextCallback errcallback;
|
ErrorContextCallback errcallback;
|
||||||
instr_time io_start,
|
instr_time io_start;
|
||||||
io_time;
|
|
||||||
Block bufBlock;
|
Block bufBlock;
|
||||||
char *bufToWrite;
|
char *bufToWrite;
|
||||||
uint32 buf_state;
|
uint32 buf_state;
|
||||||
@ -3420,10 +3412,7 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object,
|
|||||||
*/
|
*/
|
||||||
bufToWrite = PageSetChecksumCopy((Page) bufBlock, buf->tag.blockNum);
|
bufToWrite = PageSetChecksumCopy((Page) bufBlock, buf->tag.blockNum);
|
||||||
|
|
||||||
if (track_io_timing)
|
io_start = pgstat_prepare_io_time();
|
||||||
INSTR_TIME_SET_CURRENT(io_start);
|
|
||||||
else
|
|
||||||
INSTR_TIME_SET_ZERO(io_start);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* bufToWrite is either the shared buffer or a copy, as appropriate.
|
* bufToWrite is either the shared buffer or a copy, as appropriate.
|
||||||
@ -3452,15 +3441,8 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object,
|
|||||||
* When a strategy is not in use, the write can only be a "regular" write
|
* When a strategy is not in use, the write can only be a "regular" write
|
||||||
* of a dirty shared buffer (IOCONTEXT_NORMAL IOOP_WRITE).
|
* of a dirty shared buffer (IOCONTEXT_NORMAL IOOP_WRITE).
|
||||||
*/
|
*/
|
||||||
pgstat_count_io_op(IOOBJECT_RELATION, io_context, IOOP_WRITE);
|
pgstat_count_io_op_time(IOOBJECT_RELATION, io_context,
|
||||||
|
IOOP_WRITE, io_start, 1);
|
||||||
if (track_io_timing)
|
|
||||||
{
|
|
||||||
INSTR_TIME_SET_CURRENT(io_time);
|
|
||||||
INSTR_TIME_SUBTRACT(io_time, io_start);
|
|
||||||
pgstat_count_buffer_write_time(INSTR_TIME_GET_MICROSEC(io_time));
|
|
||||||
INSTR_TIME_ADD(pgBufferUsage.blk_write_time, io_time);
|
|
||||||
}
|
|
||||||
|
|
||||||
pgBufferUsage.shared_blks_written++;
|
pgBufferUsage.shared_blks_written++;
|
||||||
|
|
||||||
@ -4062,14 +4044,13 @@ FlushRelationBuffers(Relation rel)
|
|||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
BufferDesc *bufHdr;
|
BufferDesc *bufHdr;
|
||||||
instr_time io_start,
|
|
||||||
io_time;
|
|
||||||
|
|
||||||
if (RelationUsesLocalBuffers(rel))
|
if (RelationUsesLocalBuffers(rel))
|
||||||
{
|
{
|
||||||
for (i = 0; i < NLocBuffer; i++)
|
for (i = 0; i < NLocBuffer; i++)
|
||||||
{
|
{
|
||||||
uint32 buf_state;
|
uint32 buf_state;
|
||||||
|
instr_time io_start;
|
||||||
|
|
||||||
bufHdr = GetLocalBufferDescriptor(i);
|
bufHdr = GetLocalBufferDescriptor(i);
|
||||||
if (BufTagMatchesRelFileLocator(&bufHdr->tag, &rel->rd_locator) &&
|
if (BufTagMatchesRelFileLocator(&bufHdr->tag, &rel->rd_locator) &&
|
||||||
@ -4089,10 +4070,7 @@ FlushRelationBuffers(Relation rel)
|
|||||||
|
|
||||||
PageSetChecksumInplace(localpage, bufHdr->tag.blockNum);
|
PageSetChecksumInplace(localpage, bufHdr->tag.blockNum);
|
||||||
|
|
||||||
if (track_io_timing)
|
io_start = pgstat_prepare_io_time();
|
||||||
INSTR_TIME_SET_CURRENT(io_start);
|
|
||||||
else
|
|
||||||
INSTR_TIME_SET_ZERO(io_start);
|
|
||||||
|
|
||||||
smgrwrite(RelationGetSmgr(rel),
|
smgrwrite(RelationGetSmgr(rel),
|
||||||
BufTagGetForkNum(&bufHdr->tag),
|
BufTagGetForkNum(&bufHdr->tag),
|
||||||
@ -4100,19 +4078,13 @@ FlushRelationBuffers(Relation rel)
|
|||||||
localpage,
|
localpage,
|
||||||
false);
|
false);
|
||||||
|
|
||||||
|
pgstat_count_io_op_time(IOOBJECT_TEMP_RELATION,
|
||||||
|
IOCONTEXT_NORMAL, IOOP_WRITE,
|
||||||
|
io_start, 1);
|
||||||
|
|
||||||
buf_state &= ~(BM_DIRTY | BM_JUST_DIRTIED);
|
buf_state &= ~(BM_DIRTY | BM_JUST_DIRTIED);
|
||||||
pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
|
pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
|
||||||
|
|
||||||
pgstat_count_io_op(IOOBJECT_TEMP_RELATION, IOCONTEXT_NORMAL, IOOP_WRITE);
|
|
||||||
|
|
||||||
if (track_io_timing)
|
|
||||||
{
|
|
||||||
INSTR_TIME_SET_CURRENT(io_time);
|
|
||||||
INSTR_TIME_SUBTRACT(io_time, io_start);
|
|
||||||
pgstat_count_buffer_write_time(INSTR_TIME_GET_MICROSEC(io_time));
|
|
||||||
INSTR_TIME_ADD(pgBufferUsage.blk_write_time, io_time);
|
|
||||||
}
|
|
||||||
|
|
||||||
pgBufferUsage.local_blks_written++;
|
pgBufferUsage.local_blks_written++;
|
||||||
|
|
||||||
/* Pop the error context stack */
|
/* Pop the error context stack */
|
||||||
|
@ -176,8 +176,6 @@ GetLocalVictimBuffer(void)
|
|||||||
int trycounter;
|
int trycounter;
|
||||||
uint32 buf_state;
|
uint32 buf_state;
|
||||||
BufferDesc *bufHdr;
|
BufferDesc *bufHdr;
|
||||||
instr_time io_start,
|
|
||||||
io_time;
|
|
||||||
|
|
||||||
ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
|
ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
|
||||||
|
|
||||||
@ -233,6 +231,7 @@ GetLocalVictimBuffer(void)
|
|||||||
*/
|
*/
|
||||||
if (buf_state & BM_DIRTY)
|
if (buf_state & BM_DIRTY)
|
||||||
{
|
{
|
||||||
|
instr_time io_start;
|
||||||
SMgrRelation oreln;
|
SMgrRelation oreln;
|
||||||
Page localpage = (char *) LocalBufHdrGetBlock(bufHdr);
|
Page localpage = (char *) LocalBufHdrGetBlock(bufHdr);
|
||||||
|
|
||||||
@ -241,10 +240,7 @@ GetLocalVictimBuffer(void)
|
|||||||
|
|
||||||
PageSetChecksumInplace(localpage, bufHdr->tag.blockNum);
|
PageSetChecksumInplace(localpage, bufHdr->tag.blockNum);
|
||||||
|
|
||||||
if (track_io_timing)
|
io_start = pgstat_prepare_io_time();
|
||||||
INSTR_TIME_SET_CURRENT(io_start);
|
|
||||||
else
|
|
||||||
INSTR_TIME_SET_ZERO(io_start);
|
|
||||||
|
|
||||||
/* And write... */
|
/* And write... */
|
||||||
smgrwrite(oreln,
|
smgrwrite(oreln,
|
||||||
@ -253,21 +249,14 @@ GetLocalVictimBuffer(void)
|
|||||||
localpage,
|
localpage,
|
||||||
false);
|
false);
|
||||||
|
|
||||||
|
/* Temporary table I/O does not use Buffer Access Strategies */
|
||||||
|
pgstat_count_io_op_time(IOOBJECT_TEMP_RELATION, IOCONTEXT_NORMAL,
|
||||||
|
IOOP_WRITE, io_start, 1);
|
||||||
|
|
||||||
/* Mark not-dirty now in case we error out below */
|
/* Mark not-dirty now in case we error out below */
|
||||||
buf_state &= ~BM_DIRTY;
|
buf_state &= ~BM_DIRTY;
|
||||||
pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
|
pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
|
||||||
|
|
||||||
/* Temporary table I/O does not use Buffer Access Strategies */
|
|
||||||
pgstat_count_io_op(IOOBJECT_TEMP_RELATION, IOCONTEXT_NORMAL, IOOP_WRITE);
|
|
||||||
|
|
||||||
if (track_io_timing)
|
|
||||||
{
|
|
||||||
INSTR_TIME_SET_CURRENT(io_time);
|
|
||||||
INSTR_TIME_SUBTRACT(io_time, io_start);
|
|
||||||
pgstat_count_buffer_write_time(INSTR_TIME_GET_MICROSEC(io_time));
|
|
||||||
INSTR_TIME_ADD(pgBufferUsage.blk_write_time, io_time);
|
|
||||||
}
|
|
||||||
|
|
||||||
pgBufferUsage.local_blks_written++;
|
pgBufferUsage.local_blks_written++;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -325,6 +314,7 @@ ExtendBufferedRelLocal(ExtendBufferedWhat eb,
|
|||||||
uint32 *extended_by)
|
uint32 *extended_by)
|
||||||
{
|
{
|
||||||
BlockNumber first_block;
|
BlockNumber first_block;
|
||||||
|
instr_time io_start;
|
||||||
|
|
||||||
/* Initialize local buffers if first request in this session */
|
/* Initialize local buffers if first request in this session */
|
||||||
if (LocalBufHash == NULL)
|
if (LocalBufHash == NULL)
|
||||||
@ -415,9 +405,14 @@ ExtendBufferedRelLocal(ExtendBufferedWhat eb,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
io_start = pgstat_prepare_io_time();
|
||||||
|
|
||||||
/* actually extend relation */
|
/* actually extend relation */
|
||||||
smgrzeroextend(eb.smgr, fork, first_block, extend_by, false);
|
smgrzeroextend(eb.smgr, fork, first_block, extend_by, false);
|
||||||
|
|
||||||
|
pgstat_count_io_op_time(IOOBJECT_TEMP_RELATION, IOCONTEXT_NORMAL, IOOP_EXTEND,
|
||||||
|
io_start, extend_by);
|
||||||
|
|
||||||
for (int i = 0; i < extend_by; i++)
|
for (int i = 0; i < extend_by; i++)
|
||||||
{
|
{
|
||||||
Buffer buf = buffers[i];
|
Buffer buf = buffers[i];
|
||||||
@ -434,8 +429,6 @@ ExtendBufferedRelLocal(ExtendBufferedWhat eb,
|
|||||||
*extended_by = extend_by;
|
*extended_by = extend_by;
|
||||||
|
|
||||||
pgBufferUsage.temp_blks_written += extend_by;
|
pgBufferUsage.temp_blks_written += extend_by;
|
||||||
pgstat_count_io_op_n(IOOBJECT_TEMP_RELATION, IOCONTEXT_NORMAL, IOOP_EXTEND,
|
|
||||||
extend_by);
|
|
||||||
|
|
||||||
return first_block;
|
return first_block;
|
||||||
}
|
}
|
||||||
|
@ -1138,6 +1138,19 @@ register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
|
|||||||
|
|
||||||
if (!RegisterSyncRequest(&tag, SYNC_REQUEST, false /* retryOnError */ ))
|
if (!RegisterSyncRequest(&tag, SYNC_REQUEST, false /* retryOnError */ ))
|
||||||
{
|
{
|
||||||
|
instr_time io_start;
|
||||||
|
|
||||||
|
ereport(DEBUG1,
|
||||||
|
(errmsg_internal("could not forward fsync request because request queue is full")));
|
||||||
|
|
||||||
|
io_start = pgstat_prepare_io_time();
|
||||||
|
|
||||||
|
if (FileSync(seg->mdfd_vfd, WAIT_EVENT_DATA_FILE_SYNC) < 0)
|
||||||
|
ereport(data_sync_elevel(ERROR),
|
||||||
|
(errcode_for_file_access(),
|
||||||
|
errmsg("could not fsync file \"%s\": %m",
|
||||||
|
FilePathName(seg->mdfd_vfd))));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We have no way of knowing if the current IOContext is
|
* We have no way of knowing if the current IOContext is
|
||||||
* IOCONTEXT_NORMAL or IOCONTEXT_[BULKREAD, BULKWRITE, VACUUM] at this
|
* IOCONTEXT_NORMAL or IOCONTEXT_[BULKREAD, BULKWRITE, VACUUM] at this
|
||||||
@ -1149,16 +1162,8 @@ register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
|
|||||||
* IOCONTEXT_NORMAL is likely clearer when investigating the number of
|
* IOCONTEXT_NORMAL is likely clearer when investigating the number of
|
||||||
* backend fsyncs.
|
* backend fsyncs.
|
||||||
*/
|
*/
|
||||||
pgstat_count_io_op(IOOBJECT_RELATION, IOCONTEXT_NORMAL, IOOP_FSYNC);
|
pgstat_count_io_op_time(IOOBJECT_RELATION, IOCONTEXT_NORMAL,
|
||||||
|
IOOP_FSYNC, io_start, 1);
|
||||||
ereport(DEBUG1,
|
|
||||||
(errmsg_internal("could not forward fsync request because request queue is full")));
|
|
||||||
|
|
||||||
if (FileSync(seg->mdfd_vfd, WAIT_EVENT_DATA_FILE_SYNC) < 0)
|
|
||||||
ereport(data_sync_elevel(ERROR),
|
|
||||||
(errcode_for_file_access(),
|
|
||||||
errmsg("could not fsync file \"%s\": %m",
|
|
||||||
FilePathName(seg->mdfd_vfd))));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1508,6 +1513,7 @@ mdsyncfiletag(const FileTag *ftag, char *path)
|
|||||||
{
|
{
|
||||||
SMgrRelation reln = smgropen(ftag->rlocator, InvalidBackendId);
|
SMgrRelation reln = smgropen(ftag->rlocator, InvalidBackendId);
|
||||||
File file;
|
File file;
|
||||||
|
instr_time io_start;
|
||||||
bool need_to_close;
|
bool need_to_close;
|
||||||
int result,
|
int result,
|
||||||
save_errno;
|
save_errno;
|
||||||
@ -1533,6 +1539,8 @@ mdsyncfiletag(const FileTag *ftag, char *path)
|
|||||||
need_to_close = true;
|
need_to_close = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
io_start = pgstat_prepare_io_time();
|
||||||
|
|
||||||
/* Sync the file. */
|
/* Sync the file. */
|
||||||
result = FileSync(file, WAIT_EVENT_DATA_FILE_SYNC);
|
result = FileSync(file, WAIT_EVENT_DATA_FILE_SYNC);
|
||||||
save_errno = errno;
|
save_errno = errno;
|
||||||
@ -1540,7 +1548,8 @@ mdsyncfiletag(const FileTag *ftag, char *path)
|
|||||||
if (need_to_close)
|
if (need_to_close)
|
||||||
FileClose(file);
|
FileClose(file);
|
||||||
|
|
||||||
pgstat_count_io_op(IOOBJECT_RELATION, IOCONTEXT_NORMAL, IOOP_FSYNC);
|
pgstat_count_io_op_time(IOOBJECT_RELATION, IOCONTEXT_NORMAL,
|
||||||
|
IOOP_FSYNC, io_start, 1);
|
||||||
|
|
||||||
errno = save_errno;
|
errno = save_errno;
|
||||||
return result;
|
return result;
|
||||||
|
@ -16,44 +16,55 @@
|
|||||||
|
|
||||||
#include "postgres.h"
|
#include "postgres.h"
|
||||||
|
|
||||||
|
#include "executor/instrument.h"
|
||||||
|
#include "storage/bufmgr.h"
|
||||||
#include "utils/pgstat_internal.h"
|
#include "utils/pgstat_internal.h"
|
||||||
|
|
||||||
|
|
||||||
static PgStat_BktypeIO PendingIOStats;
|
typedef struct PgStat_PendingIO
|
||||||
|
{
|
||||||
|
PgStat_Counter counts[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES];
|
||||||
|
instr_time pending_times[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES];
|
||||||
|
} PgStat_PendingIO;
|
||||||
|
|
||||||
|
|
||||||
|
static PgStat_PendingIO PendingIOStats;
|
||||||
bool have_iostats = false;
|
bool have_iostats = false;
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check that stats have not been counted for any combination of IOObject,
|
* Check that stats have not been counted for any combination of IOObject,
|
||||||
* IOContext, and IOOp which are not tracked for the passed-in BackendType. The
|
* IOContext, and IOOp which are not tracked for the passed-in BackendType. If
|
||||||
* passed-in PgStat_BktypeIO must contain stats from the BackendType specified
|
* stats are tracked for this combination and IO times are non-zero, counts
|
||||||
* by the second parameter. Caller is responsible for locking the passed-in
|
* should be non-zero.
|
||||||
* PgStat_BktypeIO, if needed.
|
*
|
||||||
|
* The passed-in PgStat_BktypeIO must contain stats from the BackendType
|
||||||
|
* specified by the second parameter. Caller is responsible for locking the
|
||||||
|
* passed-in PgStat_BktypeIO, if needed.
|
||||||
*/
|
*/
|
||||||
bool
|
bool
|
||||||
pgstat_bktype_io_stats_valid(PgStat_BktypeIO *backend_io,
|
pgstat_bktype_io_stats_valid(PgStat_BktypeIO *backend_io,
|
||||||
BackendType bktype)
|
BackendType bktype)
|
||||||
{
|
{
|
||||||
bool bktype_tracked = pgstat_tracks_io_bktype(bktype);
|
|
||||||
|
|
||||||
for (int io_object = 0; io_object < IOOBJECT_NUM_TYPES; io_object++)
|
for (int io_object = 0; io_object < IOOBJECT_NUM_TYPES; io_object++)
|
||||||
{
|
{
|
||||||
for (int io_context = 0; io_context < IOCONTEXT_NUM_TYPES; io_context++)
|
for (int io_context = 0; io_context < IOCONTEXT_NUM_TYPES; io_context++)
|
||||||
{
|
{
|
||||||
/*
|
|
||||||
* Don't bother trying to skip to the next loop iteration if
|
|
||||||
* pgstat_tracks_io_object() would return false here. We still
|
|
||||||
* need to validate that each counter is zero anyway.
|
|
||||||
*/
|
|
||||||
for (int io_op = 0; io_op < IOOP_NUM_TYPES; io_op++)
|
for (int io_op = 0; io_op < IOOP_NUM_TYPES; io_op++)
|
||||||
{
|
{
|
||||||
/* No stats, so nothing to validate */
|
/* we do track it */
|
||||||
if (backend_io->data[io_object][io_context][io_op] == 0)
|
if (pgstat_tracks_io_op(bktype, io_object, io_context, io_op))
|
||||||
continue;
|
{
|
||||||
|
/* ensure that if IO times are non-zero, counts are > 0 */
|
||||||
|
if (backend_io->times[io_object][io_context][io_op] != 0 &&
|
||||||
|
backend_io->counts[io_object][io_context][io_op] <= 0)
|
||||||
|
return false;
|
||||||
|
|
||||||
/* There are stats and there shouldn't be */
|
continue;
|
||||||
if (!bktype_tracked ||
|
}
|
||||||
!pgstat_tracks_io_op(bktype, io_object, io_context, io_op))
|
|
||||||
|
/* we don't track it, and it is not 0 */
|
||||||
|
if (backend_io->counts[io_object][io_context][io_op] != 0)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -76,11 +87,58 @@ pgstat_count_io_op_n(IOObject io_object, IOContext io_context, IOOp io_op, uint3
|
|||||||
Assert((unsigned int) io_op < IOOP_NUM_TYPES);
|
Assert((unsigned int) io_op < IOOP_NUM_TYPES);
|
||||||
Assert(pgstat_tracks_io_op(MyBackendType, io_object, io_context, io_op));
|
Assert(pgstat_tracks_io_op(MyBackendType, io_object, io_context, io_op));
|
||||||
|
|
||||||
PendingIOStats.data[io_object][io_context][io_op] += cnt;
|
PendingIOStats.counts[io_object][io_context][io_op] += cnt;
|
||||||
|
|
||||||
have_iostats = true;
|
have_iostats = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
instr_time
|
||||||
|
pgstat_prepare_io_time(void)
|
||||||
|
{
|
||||||
|
instr_time io_start;
|
||||||
|
|
||||||
|
if (track_io_timing)
|
||||||
|
INSTR_TIME_SET_CURRENT(io_start);
|
||||||
|
else
|
||||||
|
INSTR_TIME_SET_ZERO(io_start);
|
||||||
|
|
||||||
|
return io_start;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Like pgstat_count_io_op_n() except it also accumulates time.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
pgstat_count_io_op_time(IOObject io_obj, IOContext io_context, IOOp io_op,
|
||||||
|
instr_time start_time, uint32 cnt)
|
||||||
|
{
|
||||||
|
if (track_io_timing)
|
||||||
|
{
|
||||||
|
instr_time io_time;
|
||||||
|
|
||||||
|
INSTR_TIME_SET_CURRENT(io_time);
|
||||||
|
INSTR_TIME_SUBTRACT(io_time, start_time);
|
||||||
|
|
||||||
|
if (io_op == IOOP_WRITE)
|
||||||
|
{
|
||||||
|
pgstat_count_buffer_write_time(INSTR_TIME_GET_MICROSEC(io_time));
|
||||||
|
if (io_obj == IOOBJECT_RELATION)
|
||||||
|
INSTR_TIME_ADD(pgBufferUsage.blk_write_time, io_time);
|
||||||
|
}
|
||||||
|
else if (io_op == IOOP_READ)
|
||||||
|
{
|
||||||
|
pgstat_count_buffer_read_time(INSTR_TIME_GET_MICROSEC(io_time));
|
||||||
|
if (io_obj == IOOBJECT_RELATION)
|
||||||
|
INSTR_TIME_ADD(pgBufferUsage.blk_read_time, io_time);
|
||||||
|
}
|
||||||
|
|
||||||
|
INSTR_TIME_ADD(PendingIOStats.pending_times[io_obj][io_context][io_op],
|
||||||
|
io_time);
|
||||||
|
}
|
||||||
|
|
||||||
|
pgstat_count_io_op_n(io_obj, io_context, io_op, cnt);
|
||||||
|
}
|
||||||
|
|
||||||
PgStat_IO *
|
PgStat_IO *
|
||||||
pgstat_fetch_stat_io(void)
|
pgstat_fetch_stat_io(void)
|
||||||
{
|
{
|
||||||
@ -120,8 +178,17 @@ pgstat_flush_io(bool nowait)
|
|||||||
for (int io_context = 0; io_context < IOCONTEXT_NUM_TYPES; io_context++)
|
for (int io_context = 0; io_context < IOCONTEXT_NUM_TYPES; io_context++)
|
||||||
{
|
{
|
||||||
for (int io_op = 0; io_op < IOOP_NUM_TYPES; io_op++)
|
for (int io_op = 0; io_op < IOOP_NUM_TYPES; io_op++)
|
||||||
bktype_shstats->data[io_object][io_context][io_op] +=
|
{
|
||||||
PendingIOStats.data[io_object][io_context][io_op];
|
instr_time time;
|
||||||
|
|
||||||
|
bktype_shstats->counts[io_object][io_context][io_op] +=
|
||||||
|
PendingIOStats.counts[io_object][io_context][io_op];
|
||||||
|
|
||||||
|
time = PendingIOStats.pending_times[io_object][io_context][io_op];
|
||||||
|
|
||||||
|
bktype_shstats->times[io_object][io_context][io_op] +=
|
||||||
|
INSTR_TIME_GET_MICROSEC(time);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1252,17 +1252,22 @@ pg_stat_get_buf_alloc(PG_FUNCTION_ARGS)
|
|||||||
*/
|
*/
|
||||||
typedef enum io_stat_col
|
typedef enum io_stat_col
|
||||||
{
|
{
|
||||||
|
IO_COL_INVALID = -1,
|
||||||
IO_COL_BACKEND_TYPE,
|
IO_COL_BACKEND_TYPE,
|
||||||
IO_COL_IO_OBJECT,
|
IO_COL_IO_OBJECT,
|
||||||
IO_COL_IO_CONTEXT,
|
IO_COL_IO_CONTEXT,
|
||||||
IO_COL_READS,
|
IO_COL_READS,
|
||||||
|
IO_COL_READ_TIME,
|
||||||
IO_COL_WRITES,
|
IO_COL_WRITES,
|
||||||
|
IO_COL_WRITE_TIME,
|
||||||
IO_COL_EXTENDS,
|
IO_COL_EXTENDS,
|
||||||
|
IO_COL_EXTEND_TIME,
|
||||||
IO_COL_CONVERSION,
|
IO_COL_CONVERSION,
|
||||||
IO_COL_HITS,
|
IO_COL_HITS,
|
||||||
IO_COL_EVICTIONS,
|
IO_COL_EVICTIONS,
|
||||||
IO_COL_REUSES,
|
IO_COL_REUSES,
|
||||||
IO_COL_FSYNCS,
|
IO_COL_FSYNCS,
|
||||||
|
IO_COL_FSYNC_TIME,
|
||||||
IO_COL_RESET_TIME,
|
IO_COL_RESET_TIME,
|
||||||
IO_NUM_COLUMNS,
|
IO_NUM_COLUMNS,
|
||||||
} io_stat_col;
|
} io_stat_col;
|
||||||
@ -1296,6 +1301,38 @@ pgstat_get_io_op_index(IOOp io_op)
|
|||||||
pg_unreachable();
|
pg_unreachable();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Get the number of the column containing IO times for the specified IOOp.
|
||||||
|
* This function encodes our assumption that IO time for an IOOp is displayed
|
||||||
|
* in the view in the column directly after the IOOp counts. If an op has no
|
||||||
|
* associated time, IO_COL_INVALID is returned.
|
||||||
|
*/
|
||||||
|
static io_stat_col
|
||||||
|
pgstat_get_io_time_index(IOOp io_op)
|
||||||
|
{
|
||||||
|
switch (io_op)
|
||||||
|
{
|
||||||
|
case IOOP_READ:
|
||||||
|
case IOOP_WRITE:
|
||||||
|
case IOOP_EXTEND:
|
||||||
|
case IOOP_FSYNC:
|
||||||
|
return pgstat_get_io_op_index(io_op) + 1;
|
||||||
|
case IOOP_EVICT:
|
||||||
|
case IOOP_HIT:
|
||||||
|
case IOOP_REUSE:
|
||||||
|
return IO_COL_INVALID;
|
||||||
|
}
|
||||||
|
|
||||||
|
elog(ERROR, "unrecognized IOOp value: %d", io_op);
|
||||||
|
pg_unreachable();
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline double
|
||||||
|
pg_stat_us_to_ms(PgStat_Counter val_ms)
|
||||||
|
{
|
||||||
|
return val_ms * (double) 0.001;
|
||||||
|
}
|
||||||
|
|
||||||
Datum
|
Datum
|
||||||
pg_stat_get_io(PG_FUNCTION_ARGS)
|
pg_stat_get_io(PG_FUNCTION_ARGS)
|
||||||
{
|
{
|
||||||
@ -1363,20 +1400,37 @@ pg_stat_get_io(PG_FUNCTION_ARGS)
|
|||||||
|
|
||||||
for (int io_op = 0; io_op < IOOP_NUM_TYPES; io_op++)
|
for (int io_op = 0; io_op < IOOP_NUM_TYPES; io_op++)
|
||||||
{
|
{
|
||||||
int col_idx = pgstat_get_io_op_index(io_op);
|
int op_idx = pgstat_get_io_op_index(io_op);
|
||||||
|
int time_idx = pgstat_get_io_time_index(io_op);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Some combinations of BackendType and IOOp, of IOContext
|
* Some combinations of BackendType and IOOp, of IOContext
|
||||||
* and IOOp, and of IOObject and IOOp are not tracked. Set
|
* and IOOp, and of IOObject and IOOp are not tracked. Set
|
||||||
* these cells in the view NULL.
|
* these cells in the view NULL.
|
||||||
*/
|
*/
|
||||||
nulls[col_idx] = !pgstat_tracks_io_op(bktype, io_obj, io_context, io_op);
|
if (pgstat_tracks_io_op(bktype, io_obj, io_context, io_op))
|
||||||
|
{
|
||||||
|
PgStat_Counter count =
|
||||||
|
bktype_stats->counts[io_obj][io_context][io_op];
|
||||||
|
|
||||||
if (nulls[col_idx])
|
values[op_idx] = Int64GetDatum(count);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
nulls[op_idx] = true;
|
||||||
|
|
||||||
|
/* not every operation is timed */
|
||||||
|
if (time_idx == IO_COL_INVALID)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
values[col_idx] =
|
if (!nulls[op_idx])
|
||||||
Int64GetDatum(bktype_stats->data[io_obj][io_context][io_op]);
|
{
|
||||||
|
PgStat_Counter time =
|
||||||
|
bktype_stats->times[io_obj][io_context][io_op];
|
||||||
|
|
||||||
|
values[time_idx] = Float8GetDatum(pg_stat_us_to_ms(time));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
nulls[time_idx] = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
|
tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
|
||||||
|
@ -5755,9 +5755,9 @@
|
|||||||
proname => 'pg_stat_get_io', provolatile => 'v',
|
proname => 'pg_stat_get_io', provolatile => 'v',
|
||||||
prorows => '30', proretset => 't',
|
prorows => '30', proretset => 't',
|
||||||
proparallel => 'r', prorettype => 'record', proargtypes => '',
|
proparallel => 'r', prorettype => 'record', proargtypes => '',
|
||||||
proallargtypes => '{text,text,text,int8,int8,int8,int8,int8,int8,int8,int8,timestamptz}',
|
proallargtypes => '{text,text,text,int8,float8,int8,float8,int8,float8,int8,int8,int8,int8,int8,float8,timestamptz}',
|
||||||
proargmodes => '{o,o,o,o,o,o,o,o,o,o,o,o}',
|
proargmodes => '{o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o}',
|
||||||
proargnames => '{backend_type,io_object,io_context,reads,writes,extends,op_bytes,hits,evictions,reuses,fsyncs,stats_reset}',
|
proargnames => '{backend_type,io_object,io_context,reads,read_time,writes,write_time,extends,extend_time,op_bytes,hits,evictions,reuses,fsyncs,fsync_time,stats_reset}',
|
||||||
prosrc => 'pg_stat_get_io' },
|
prosrc => 'pg_stat_get_io' },
|
||||||
|
|
||||||
{ oid => '1136', descr => 'statistics: information about WAL activity',
|
{ oid => '1136', descr => 'statistics: information about WAL activity',
|
||||||
|
@ -306,7 +306,8 @@ typedef enum IOOp
|
|||||||
|
|
||||||
typedef struct PgStat_BktypeIO
|
typedef struct PgStat_BktypeIO
|
||||||
{
|
{
|
||||||
PgStat_Counter data[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES];
|
PgStat_Counter counts[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES];
|
||||||
|
PgStat_Counter times[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES];
|
||||||
} PgStat_BktypeIO;
|
} PgStat_BktypeIO;
|
||||||
|
|
||||||
typedef struct PgStat_IO
|
typedef struct PgStat_IO
|
||||||
@ -517,6 +518,10 @@ extern bool pgstat_bktype_io_stats_valid(PgStat_BktypeIO *context_ops,
|
|||||||
BackendType bktype);
|
BackendType bktype);
|
||||||
extern void pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op);
|
extern void pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op);
|
||||||
extern void pgstat_count_io_op_n(IOObject io_object, IOContext io_context, IOOp io_op, uint32 cnt);
|
extern void pgstat_count_io_op_n(IOObject io_object, IOContext io_context, IOOp io_op, uint32 cnt);
|
||||||
|
extern instr_time pgstat_prepare_io_time(void);
|
||||||
|
extern void pgstat_count_io_op_time(IOObject io_object, IOContext io_context,
|
||||||
|
IOOp io_op, instr_time time, uint32 cnt);
|
||||||
|
|
||||||
extern PgStat_IO *pgstat_fetch_stat_io(void);
|
extern PgStat_IO *pgstat_fetch_stat_io(void);
|
||||||
extern const char *pgstat_get_io_context_name(IOContext io_context);
|
extern const char *pgstat_get_io_context_name(IOContext io_context);
|
||||||
extern const char *pgstat_get_io_object_name(IOObject io_object);
|
extern const char *pgstat_get_io_object_name(IOObject io_object);
|
||||||
|
@ -1881,15 +1881,19 @@ pg_stat_io| SELECT backend_type,
|
|||||||
io_object,
|
io_object,
|
||||||
io_context,
|
io_context,
|
||||||
reads,
|
reads,
|
||||||
|
read_time,
|
||||||
writes,
|
writes,
|
||||||
|
write_time,
|
||||||
extends,
|
extends,
|
||||||
|
extend_time,
|
||||||
op_bytes,
|
op_bytes,
|
||||||
hits,
|
hits,
|
||||||
evictions,
|
evictions,
|
||||||
reuses,
|
reuses,
|
||||||
fsyncs,
|
fsyncs,
|
||||||
|
fsync_time,
|
||||||
stats_reset
|
stats_reset
|
||||||
FROM pg_stat_get_io() b(backend_type, io_object, io_context, reads, writes, extends, op_bytes, hits, evictions, reuses, fsyncs, stats_reset);
|
FROM pg_stat_get_io() b(backend_type, io_object, io_context, reads, read_time, writes, write_time, extends, extend_time, op_bytes, hits, evictions, reuses, fsyncs, fsync_time, stats_reset);
|
||||||
pg_stat_progress_analyze| SELECT s.pid,
|
pg_stat_progress_analyze| SELECT s.pid,
|
||||||
s.datid,
|
s.datid,
|
||||||
d.datname,
|
d.datname,
|
||||||
|
@ -2053,6 +2053,7 @@ PgStat_Kind
|
|||||||
PgStat_KindInfo
|
PgStat_KindInfo
|
||||||
PgStat_LocalState
|
PgStat_LocalState
|
||||||
PgStat_PendingDroppedStatsItem
|
PgStat_PendingDroppedStatsItem
|
||||||
|
PgStat_PendingIO
|
||||||
PgStat_PendingWalStats
|
PgStat_PendingWalStats
|
||||||
PgStat_SLRUStats
|
PgStat_SLRUStats
|
||||||
PgStat_ShmemControl
|
PgStat_ShmemControl
|
||||||
|
Loading…
x
Reference in New Issue
Block a user