diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index c809ff1ba4a..d5a45f996d0 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -3855,6 +3855,17 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i + + + + hits bigint + + + The number of times a desired block was found in a shared buffer. + + + + diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index 9508d8ba552..574cbc2e448 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -1128,6 +1128,7 @@ SELECT b.writes, b.extends, b.op_bytes, + b.hits, b.evictions, b.reuses, b.fsyncs, diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index fe029d2ea60..b3adbbe7d23 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -472,7 +472,7 @@ static BufferDesc *BufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, BufferAccessStrategy strategy, - bool *foundPtr, IOContext *io_context); + bool *foundPtr, IOContext io_context); static void FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object, IOContext io_context); static void FindAndDropRelationBuffers(RelFileLocator rlocator, @@ -850,13 +850,14 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, if (isLocalBuf) { /* - * LocalBufferAlloc() will set the io_context to IOCONTEXT_NORMAL. We - * do not use a BufferAccessStrategy for I/O of temporary tables. + * We do not use a BufferAccessStrategy for I/O of temporary tables. * However, in some cases, the "strategy" may not be NULL, so we can't * rely on IOContextForStrategy() to set the right IOContext for us. * This may happen in cases like CREATE TEMPORARY TABLE AS... */ - bufHdr = LocalBufferAlloc(smgr, forkNum, blockNum, &found, &io_context); + io_context = IOCONTEXT_NORMAL; + io_object = IOOBJECT_TEMP_RELATION; + bufHdr = LocalBufferAlloc(smgr, forkNum, blockNum, &found); if (found) pgBufferUsage.local_blks_hit++; else if (isExtend) @@ -871,8 +872,10 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, * lookup the buffer. IO_IN_PROGRESS is set if the requested block is * not currently in memory. */ + io_context = IOContextForStrategy(strategy); + io_object = IOOBJECT_RELATION; bufHdr = BufferAlloc(smgr, relpersistence, forkNum, blockNum, - strategy, &found, &io_context); + strategy, &found, io_context); if (found) pgBufferUsage.shared_blks_hit++; else if (isExtend) @@ -892,6 +895,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, /* Just need to update stats before we exit */ *hit = true; VacuumPageHit++; + pgstat_count_io_op(io_object, io_context, IOOP_HIT); if (VacuumCostActive) VacuumCostBalance += VacuumCostPageHit; @@ -987,16 +991,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, */ Assert(!(pg_atomic_read_u32(&bufHdr->state) & BM_VALID)); /* spinlock not needed */ - if (isLocalBuf) - { - bufBlock = LocalBufHdrGetBlock(bufHdr); - io_object = IOOBJECT_TEMP_RELATION; - } - else - { - bufBlock = BufHdrGetBlock(bufHdr); - io_object = IOOBJECT_RELATION; - } + bufBlock = isLocalBuf ? LocalBufHdrGetBlock(bufHdr) : BufHdrGetBlock(bufHdr); if (isExtend) { @@ -1139,7 +1134,7 @@ static BufferDesc * BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, BufferAccessStrategy strategy, - bool *foundPtr, IOContext *io_context) + bool *foundPtr, IOContext io_context) { bool from_ring; BufferTag newTag; /* identity of requested block */ @@ -1193,11 +1188,8 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, { /* * If we get here, previous attempts to read the buffer must - * have failed ... but we shall bravely try again. Set - * io_context since we will in fact need to count an IO - * Operation. + * have failed ... but we shall bravely try again. */ - *io_context = IOContextForStrategy(strategy); *foundPtr = false; } } @@ -1211,8 +1203,6 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, */ LWLockRelease(newPartitionLock); - *io_context = IOContextForStrategy(strategy); - /* Loop here in case we have to try another victim buffer */ for (;;) { @@ -1295,7 +1285,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, smgr->smgr_rlocator.locator.dbOid, smgr->smgr_rlocator.locator.relNumber); - FlushBuffer(buf, NULL, IOOBJECT_RELATION, *io_context); + FlushBuffer(buf, NULL, IOOBJECT_RELATION, io_context); LWLockRelease(BufferDescriptorGetContentLock(buf)); ScheduleBufferTagForWriteback(&BackendWritebackContext, @@ -1494,7 +1484,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, * we may have been forced to release the buffer due to concurrent * pinners or erroring out. */ - pgstat_count_io_op(IOOBJECT_RELATION, *io_context, + pgstat_count_io_op(IOOBJECT_RELATION, io_context, from_ring ? IOOP_REUSE : IOOP_EVICT); } diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c index 68b4817c67b..6f9e7eda57c 100644 --- a/src/backend/storage/buffer/localbuf.c +++ b/src/backend/storage/buffer/localbuf.c @@ -108,7 +108,7 @@ PrefetchLocalBuffer(SMgrRelation smgr, ForkNumber forkNum, */ BufferDesc * LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, - bool *foundPtr, IOContext *io_context) + bool *foundPtr) { BufferTag newTag; /* identity of requested block */ LocalBufferLookupEnt *hresult; @@ -128,14 +128,6 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, hresult = (LocalBufferLookupEnt *) hash_search(LocalBufHash, &newTag, HASH_FIND, NULL); - /* - * IO Operations on local buffers are only done in IOCONTEXT_NORMAL. Set - * io_context here (instead of after a buffer hit would have returned) for - * convenience since we don't have to worry about the overhead of calling - * IOContextForStrategy(). - */ - *io_context = IOCONTEXT_NORMAL; - if (hresult) { b = hresult->id; @@ -239,6 +231,7 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, buf_state &= ~BM_DIRTY; pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state); + /* Temporary table I/O does not use Buffer Access Strategies */ pgstat_count_io_op(IOOBJECT_TEMP_RELATION, IOCONTEXT_NORMAL, IOOP_WRITE); pgBufferUsage.local_blks_written++; } diff --git a/src/backend/utils/activity/pgstat_io.c b/src/backend/utils/activity/pgstat_io.c index af5d5546101..ae8bb34f78b 100644 --- a/src/backend/utils/activity/pgstat_io.c +++ b/src/backend/utils/activity/pgstat_io.c @@ -344,7 +344,7 @@ pgstat_tracks_io_op(BackendType bktype, IOObject io_object, * Some BackendTypes will not do certain IOOps. */ if ((bktype == B_BG_WRITER || bktype == B_CHECKPOINTER) && - (io_op == IOOP_READ || io_op == IOOP_EVICT)) + (io_op == IOOP_READ || io_op == IOOP_EVICT || io_op == IOOP_HIT)) return false; if ((bktype == B_AUTOVAC_LAUNCHER || bktype == B_BG_WRITER || diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c index 0ab31ec6e3b..eec9f3cf9b1 100644 --- a/src/backend/utils/adt/pgstatfuncs.c +++ b/src/backend/utils/adt/pgstatfuncs.c @@ -1259,6 +1259,7 @@ typedef enum io_stat_col IO_COL_WRITES, IO_COL_EXTENDS, IO_COL_CONVERSION, + IO_COL_HITS, IO_COL_EVICTIONS, IO_COL_REUSES, IO_COL_FSYNCS, @@ -1277,16 +1278,18 @@ pgstat_get_io_op_index(IOOp io_op) { case IOOP_EVICT: return IO_COL_EVICTIONS; + case IOOP_EXTEND: + return IO_COL_EXTENDS; + case IOOP_FSYNC: + return IO_COL_FSYNCS; + case IOOP_HIT: + return IO_COL_HITS; case IOOP_READ: return IO_COL_READS; case IOOP_REUSE: return IO_COL_REUSES; case IOOP_WRITE: return IO_COL_WRITES; - case IOOP_EXTEND: - return IO_COL_EXTENDS; - case IOOP_FSYNC: - return IO_COL_FSYNCS; } elog(ERROR, "unrecognized IOOp value: %d", io_op); diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 5736c1082cf..f9f26422015 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -5749,9 +5749,9 @@ proname => 'pg_stat_get_io', provolatile => 'v', prorows => '30', proretset => 't', proparallel => 'r', prorettype => 'record', proargtypes => '', - proallargtypes => '{text,text,text,int8,int8,int8,int8,int8,int8,int8,timestamptz}', - proargmodes => '{o,o,o,o,o,o,o,o,o,o,o}', - proargnames => '{backend_type,io_object,io_context,reads,writes,extends,op_bytes,evictions,reuses,fsyncs,stats_reset}', + proallargtypes => '{text,text,text,int8,int8,int8,int8,int8,int8,int8,int8,timestamptz}', + proargmodes => '{o,o,o,o,o,o,o,o,o,o,o,o}', + proargnames => '{backend_type,io_object,io_context,reads,writes,extends,op_bytes,hits,evictions,reuses,fsyncs,stats_reset}', prosrc => 'pg_stat_get_io' }, { oid => '1136', descr => 'statistics: information about WAL activity', diff --git a/src/include/pgstat.h b/src/include/pgstat.h index 17ee94d8b66..a56182af9fa 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -296,6 +296,7 @@ typedef enum IOOp IOOP_EVICT, IOOP_EXTEND, IOOP_FSYNC, + IOOP_HIT, IOOP_READ, IOOP_REUSE, IOOP_WRITE, diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h index 0b448147407..2afb9bb3099 100644 --- a/src/include/storage/buf_internals.h +++ b/src/include/storage/buf_internals.h @@ -419,7 +419,7 @@ extern PrefetchBufferResult PrefetchLocalBuffer(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum); extern BufferDesc *LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, - BlockNumber blockNum, bool *foundPtr, IOContext *io_context); + BlockNumber blockNum, bool *foundPtr); extern void MarkLocalBufferDirty(Buffer buffer); extern void DropRelationLocalBuffers(RelFileLocator rlocator, ForkNumber forkNum, diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index c71758db46e..ab1aebfde42 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -1884,11 +1884,12 @@ pg_stat_io| SELECT backend_type, writes, extends, op_bytes, + hits, evictions, reuses, fsyncs, stats_reset - FROM pg_stat_get_io() b(backend_type, io_object, io_context, reads, writes, extends, op_bytes, evictions, reuses, fsyncs, stats_reset); + FROM pg_stat_get_io() b(backend_type, io_object, io_context, reads, writes, extends, op_bytes, hits, evictions, reuses, fsyncs, stats_reset); pg_stat_progress_analyze| SELECT s.pid, s.datid, d.datname, diff --git a/src/test/regress/expected/stats.out b/src/test/regress/expected/stats.out index 55b4c6df014..5f1821938d5 100644 --- a/src/test/regress/expected/stats.out +++ b/src/test/regress/expected/stats.out @@ -1131,6 +1131,7 @@ SELECT pg_stat_get_subscription_stats(NULL); -- - writes of shared buffers to permanent storage -- - extends of relations using shared buffers -- - fsyncs done to ensure the durability of data dirtying shared buffers +-- - shared buffer hits -- There is no test for blocks evicted from shared buffers, because we cannot -- be sure of the state of shared buffers at the point the test is run. -- Create a regular table and insert some data to generate IOCONTEXT_NORMAL @@ -1208,6 +1209,47 @@ SELECT :io_sum_shared_after_reads > :io_sum_shared_before_reads; t (1 row) +SELECT sum(hits) AS io_sum_shared_before_hits + FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset +-- Select from the table again to count hits. +-- Ensure we generate hits by forcing a nested loop self-join with no +-- materialize node. The outer side's buffer will stay pinned, preventing its +-- eviction, while we loop through the inner side and generate hits. +BEGIN; +SET LOCAL enable_nestloop TO on; SET LOCAL enable_mergejoin TO off; +SET LOCAL enable_hashjoin TO off; SET LOCAL enable_material TO off; +-- ensure plan stays as we expect it to +EXPLAIN (COSTS OFF) SELECT COUNT(*) FROM test_io_shared t1 INNER JOIN test_io_shared t2 USING (a); + QUERY PLAN +------------------------------------------- + Aggregate + -> Nested Loop + Join Filter: (t1.a = t2.a) + -> Seq Scan on test_io_shared t1 + -> Seq Scan on test_io_shared t2 +(5 rows) + +SELECT COUNT(*) FROM test_io_shared t1 INNER JOIN test_io_shared t2 USING (a); + count +------- + 100 +(1 row) + +COMMIT; +SELECT pg_stat_force_next_flush(); + pg_stat_force_next_flush +-------------------------- + +(1 row) + +SELECT sum(hits) AS io_sum_shared_after_hits + FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset +SELECT :io_sum_shared_after_hits > :io_sum_shared_before_hits; + ?column? +---------- + t +(1 row) + DROP TABLE test_io_shared; -- Test that the follow IOCONTEXT_LOCAL IOOps are tracked in pg_stat_io: -- - eviction of local buffers in order to reuse them @@ -1342,7 +1384,7 @@ SELECT pg_stat_have_stats('io', 0, 0); t (1 row) -SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) AS io_stats_pre_reset +SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(hits) AS io_stats_pre_reset FROM pg_stat_io \gset SELECT pg_stat_reset_shared('io'); pg_stat_reset_shared @@ -1350,7 +1392,7 @@ SELECT pg_stat_reset_shared('io'); (1 row) -SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) AS io_stats_post_reset +SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(hits) AS io_stats_post_reset FROM pg_stat_io \gset SELECT :io_stats_post_reset < :io_stats_pre_reset; ?column? diff --git a/src/test/regress/sql/stats.sql b/src/test/regress/sql/stats.sql index d958e70a866..58db803ed69 100644 --- a/src/test/regress/sql/stats.sql +++ b/src/test/regress/sql/stats.sql @@ -541,6 +541,7 @@ SELECT pg_stat_get_subscription_stats(NULL); -- - writes of shared buffers to permanent storage -- - extends of relations using shared buffers -- - fsyncs done to ensure the durability of data dirtying shared buffers +-- - shared buffer hits -- There is no test for blocks evicted from shared buffers, because we cannot -- be sure of the state of shared buffers at the point the test is run. @@ -587,6 +588,25 @@ SELECT pg_stat_force_next_flush(); SELECT sum(reads) AS io_sum_shared_after_reads FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset SELECT :io_sum_shared_after_reads > :io_sum_shared_before_reads; + +SELECT sum(hits) AS io_sum_shared_before_hits + FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset +-- Select from the table again to count hits. +-- Ensure we generate hits by forcing a nested loop self-join with no +-- materialize node. The outer side's buffer will stay pinned, preventing its +-- eviction, while we loop through the inner side and generate hits. +BEGIN; +SET LOCAL enable_nestloop TO on; SET LOCAL enable_mergejoin TO off; +SET LOCAL enable_hashjoin TO off; SET LOCAL enable_material TO off; +-- ensure plan stays as we expect it to +EXPLAIN (COSTS OFF) SELECT COUNT(*) FROM test_io_shared t1 INNER JOIN test_io_shared t2 USING (a); +SELECT COUNT(*) FROM test_io_shared t1 INNER JOIN test_io_shared t2 USING (a); +COMMIT; +SELECT pg_stat_force_next_flush(); +SELECT sum(hits) AS io_sum_shared_after_hits + FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset +SELECT :io_sum_shared_after_hits > :io_sum_shared_before_hits; + DROP TABLE test_io_shared; -- Test that the follow IOCONTEXT_LOCAL IOOps are tracked in pg_stat_io: @@ -674,10 +694,10 @@ SELECT :io_sum_bulkwrite_strategy_extends_after > :io_sum_bulkwrite_strategy_ext -- Test IO stats reset SELECT pg_stat_have_stats('io', 0, 0); -SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) AS io_stats_pre_reset +SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(hits) AS io_stats_pre_reset FROM pg_stat_io \gset SELECT pg_stat_reset_shared('io'); -SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) AS io_stats_post_reset +SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(hits) AS io_stats_post_reset FROM pg_stat_io \gset SELECT :io_stats_post_reset < :io_stats_pre_reset;