From 8aaa04b32d790da595684de58ae4fc2db96becff Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Thu, 30 Mar 2023 19:22:40 -0700 Subject: [PATCH] Track shared buffer hits in pg_stat_io Among other things, this should make it easier to calculate a useful cache hit ratio by excluding buffer reads via buffer access strategies. As buffer access strategies reuse buffers (and thus evict the prior buffer contents), it is normal to see reads on repeated scans of the same data. Author: Melanie Plageman Reviewed-by: Bertrand Drouvot Reviewed-by: Andres Freund Discussion: https://postgr.es/m/CAAKRu_beMa9Hzih40%3DXPYqhDVz6tsgUGTrhZXRo%3Dunp%2Bszb%3DUA%40mail.gmail.com --- doc/src/sgml/monitoring.sgml | 11 ++++++ src/backend/catalog/system_views.sql | 1 + src/backend/storage/buffer/bufmgr.c | 38 ++++++++------------- src/backend/storage/buffer/localbuf.c | 11 ++---- src/backend/utils/activity/pgstat_io.c | 2 +- src/backend/utils/adt/pgstatfuncs.c | 11 +++--- src/include/catalog/pg_proc.dat | 6 ++-- src/include/pgstat.h | 1 + src/include/storage/buf_internals.h | 2 +- src/test/regress/expected/rules.out | 3 +- src/test/regress/expected/stats.out | 46 ++++++++++++++++++++++++-- src/test/regress/sql/stats.sql | 24 ++++++++++++-- 12 files changed, 109 insertions(+), 47 deletions(-) diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index c809ff1ba4a..d5a45f996d0 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -3855,6 +3855,17 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i + + + + hits bigint + + + The number of times a desired block was found in a shared buffer. + + + + diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index 9508d8ba552..574cbc2e448 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -1128,6 +1128,7 @@ SELECT b.writes, b.extends, b.op_bytes, + b.hits, b.evictions, b.reuses, b.fsyncs, diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index fe029d2ea60..b3adbbe7d23 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -472,7 +472,7 @@ static BufferDesc *BufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, BufferAccessStrategy strategy, - bool *foundPtr, IOContext *io_context); + bool *foundPtr, IOContext io_context); static void FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object, IOContext io_context); static void FindAndDropRelationBuffers(RelFileLocator rlocator, @@ -850,13 +850,14 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, if (isLocalBuf) { /* - * LocalBufferAlloc() will set the io_context to IOCONTEXT_NORMAL. We - * do not use a BufferAccessStrategy for I/O of temporary tables. + * We do not use a BufferAccessStrategy for I/O of temporary tables. * However, in some cases, the "strategy" may not be NULL, so we can't * rely on IOContextForStrategy() to set the right IOContext for us. * This may happen in cases like CREATE TEMPORARY TABLE AS... */ - bufHdr = LocalBufferAlloc(smgr, forkNum, blockNum, &found, &io_context); + io_context = IOCONTEXT_NORMAL; + io_object = IOOBJECT_TEMP_RELATION; + bufHdr = LocalBufferAlloc(smgr, forkNum, blockNum, &found); if (found) pgBufferUsage.local_blks_hit++; else if (isExtend) @@ -871,8 +872,10 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, * lookup the buffer. IO_IN_PROGRESS is set if the requested block is * not currently in memory. */ + io_context = IOContextForStrategy(strategy); + io_object = IOOBJECT_RELATION; bufHdr = BufferAlloc(smgr, relpersistence, forkNum, blockNum, - strategy, &found, &io_context); + strategy, &found, io_context); if (found) pgBufferUsage.shared_blks_hit++; else if (isExtend) @@ -892,6 +895,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, /* Just need to update stats before we exit */ *hit = true; VacuumPageHit++; + pgstat_count_io_op(io_object, io_context, IOOP_HIT); if (VacuumCostActive) VacuumCostBalance += VacuumCostPageHit; @@ -987,16 +991,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, */ Assert(!(pg_atomic_read_u32(&bufHdr->state) & BM_VALID)); /* spinlock not needed */ - if (isLocalBuf) - { - bufBlock = LocalBufHdrGetBlock(bufHdr); - io_object = IOOBJECT_TEMP_RELATION; - } - else - { - bufBlock = BufHdrGetBlock(bufHdr); - io_object = IOOBJECT_RELATION; - } + bufBlock = isLocalBuf ? LocalBufHdrGetBlock(bufHdr) : BufHdrGetBlock(bufHdr); if (isExtend) { @@ -1139,7 +1134,7 @@ static BufferDesc * BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, BufferAccessStrategy strategy, - bool *foundPtr, IOContext *io_context) + bool *foundPtr, IOContext io_context) { bool from_ring; BufferTag newTag; /* identity of requested block */ @@ -1193,11 +1188,8 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, { /* * If we get here, previous attempts to read the buffer must - * have failed ... but we shall bravely try again. Set - * io_context since we will in fact need to count an IO - * Operation. + * have failed ... but we shall bravely try again. */ - *io_context = IOContextForStrategy(strategy); *foundPtr = false; } } @@ -1211,8 +1203,6 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, */ LWLockRelease(newPartitionLock); - *io_context = IOContextForStrategy(strategy); - /* Loop here in case we have to try another victim buffer */ for (;;) { @@ -1295,7 +1285,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, smgr->smgr_rlocator.locator.dbOid, smgr->smgr_rlocator.locator.relNumber); - FlushBuffer(buf, NULL, IOOBJECT_RELATION, *io_context); + FlushBuffer(buf, NULL, IOOBJECT_RELATION, io_context); LWLockRelease(BufferDescriptorGetContentLock(buf)); ScheduleBufferTagForWriteback(&BackendWritebackContext, @@ -1494,7 +1484,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, * we may have been forced to release the buffer due to concurrent * pinners or erroring out. */ - pgstat_count_io_op(IOOBJECT_RELATION, *io_context, + pgstat_count_io_op(IOOBJECT_RELATION, io_context, from_ring ? IOOP_REUSE : IOOP_EVICT); } diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c index 68b4817c67b..6f9e7eda57c 100644 --- a/src/backend/storage/buffer/localbuf.c +++ b/src/backend/storage/buffer/localbuf.c @@ -108,7 +108,7 @@ PrefetchLocalBuffer(SMgrRelation smgr, ForkNumber forkNum, */ BufferDesc * LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, - bool *foundPtr, IOContext *io_context) + bool *foundPtr) { BufferTag newTag; /* identity of requested block */ LocalBufferLookupEnt *hresult; @@ -128,14 +128,6 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, hresult = (LocalBufferLookupEnt *) hash_search(LocalBufHash, &newTag, HASH_FIND, NULL); - /* - * IO Operations on local buffers are only done in IOCONTEXT_NORMAL. Set - * io_context here (instead of after a buffer hit would have returned) for - * convenience since we don't have to worry about the overhead of calling - * IOContextForStrategy(). - */ - *io_context = IOCONTEXT_NORMAL; - if (hresult) { b = hresult->id; @@ -239,6 +231,7 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, buf_state &= ~BM_DIRTY; pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state); + /* Temporary table I/O does not use Buffer Access Strategies */ pgstat_count_io_op(IOOBJECT_TEMP_RELATION, IOCONTEXT_NORMAL, IOOP_WRITE); pgBufferUsage.local_blks_written++; } diff --git a/src/backend/utils/activity/pgstat_io.c b/src/backend/utils/activity/pgstat_io.c index af5d5546101..ae8bb34f78b 100644 --- a/src/backend/utils/activity/pgstat_io.c +++ b/src/backend/utils/activity/pgstat_io.c @@ -344,7 +344,7 @@ pgstat_tracks_io_op(BackendType bktype, IOObject io_object, * Some BackendTypes will not do certain IOOps. */ if ((bktype == B_BG_WRITER || bktype == B_CHECKPOINTER) && - (io_op == IOOP_READ || io_op == IOOP_EVICT)) + (io_op == IOOP_READ || io_op == IOOP_EVICT || io_op == IOOP_HIT)) return false; if ((bktype == B_AUTOVAC_LAUNCHER || bktype == B_BG_WRITER || diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c index 0ab31ec6e3b..eec9f3cf9b1 100644 --- a/src/backend/utils/adt/pgstatfuncs.c +++ b/src/backend/utils/adt/pgstatfuncs.c @@ -1259,6 +1259,7 @@ typedef enum io_stat_col IO_COL_WRITES, IO_COL_EXTENDS, IO_COL_CONVERSION, + IO_COL_HITS, IO_COL_EVICTIONS, IO_COL_REUSES, IO_COL_FSYNCS, @@ -1277,16 +1278,18 @@ pgstat_get_io_op_index(IOOp io_op) { case IOOP_EVICT: return IO_COL_EVICTIONS; + case IOOP_EXTEND: + return IO_COL_EXTENDS; + case IOOP_FSYNC: + return IO_COL_FSYNCS; + case IOOP_HIT: + return IO_COL_HITS; case IOOP_READ: return IO_COL_READS; case IOOP_REUSE: return IO_COL_REUSES; case IOOP_WRITE: return IO_COL_WRITES; - case IOOP_EXTEND: - return IO_COL_EXTENDS; - case IOOP_FSYNC: - return IO_COL_FSYNCS; } elog(ERROR, "unrecognized IOOp value: %d", io_op); diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 5736c1082cf..f9f26422015 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -5749,9 +5749,9 @@ proname => 'pg_stat_get_io', provolatile => 'v', prorows => '30', proretset => 't', proparallel => 'r', prorettype => 'record', proargtypes => '', - proallargtypes => '{text,text,text,int8,int8,int8,int8,int8,int8,int8,timestamptz}', - proargmodes => '{o,o,o,o,o,o,o,o,o,o,o}', - proargnames => '{backend_type,io_object,io_context,reads,writes,extends,op_bytes,evictions,reuses,fsyncs,stats_reset}', + proallargtypes => '{text,text,text,int8,int8,int8,int8,int8,int8,int8,int8,timestamptz}', + proargmodes => '{o,o,o,o,o,o,o,o,o,o,o,o}', + proargnames => '{backend_type,io_object,io_context,reads,writes,extends,op_bytes,hits,evictions,reuses,fsyncs,stats_reset}', prosrc => 'pg_stat_get_io' }, { oid => '1136', descr => 'statistics: information about WAL activity', diff --git a/src/include/pgstat.h b/src/include/pgstat.h index 17ee94d8b66..a56182af9fa 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -296,6 +296,7 @@ typedef enum IOOp IOOP_EVICT, IOOP_EXTEND, IOOP_FSYNC, + IOOP_HIT, IOOP_READ, IOOP_REUSE, IOOP_WRITE, diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h index 0b448147407..2afb9bb3099 100644 --- a/src/include/storage/buf_internals.h +++ b/src/include/storage/buf_internals.h @@ -419,7 +419,7 @@ extern PrefetchBufferResult PrefetchLocalBuffer(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum); extern BufferDesc *LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, - BlockNumber blockNum, bool *foundPtr, IOContext *io_context); + BlockNumber blockNum, bool *foundPtr); extern void MarkLocalBufferDirty(Buffer buffer); extern void DropRelationLocalBuffers(RelFileLocator rlocator, ForkNumber forkNum, diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index c71758db46e..ab1aebfde42 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -1884,11 +1884,12 @@ pg_stat_io| SELECT backend_type, writes, extends, op_bytes, + hits, evictions, reuses, fsyncs, stats_reset - FROM pg_stat_get_io() b(backend_type, io_object, io_context, reads, writes, extends, op_bytes, evictions, reuses, fsyncs, stats_reset); + FROM pg_stat_get_io() b(backend_type, io_object, io_context, reads, writes, extends, op_bytes, hits, evictions, reuses, fsyncs, stats_reset); pg_stat_progress_analyze| SELECT s.pid, s.datid, d.datname, diff --git a/src/test/regress/expected/stats.out b/src/test/regress/expected/stats.out index 55b4c6df014..5f1821938d5 100644 --- a/src/test/regress/expected/stats.out +++ b/src/test/regress/expected/stats.out @@ -1131,6 +1131,7 @@ SELECT pg_stat_get_subscription_stats(NULL); -- - writes of shared buffers to permanent storage -- - extends of relations using shared buffers -- - fsyncs done to ensure the durability of data dirtying shared buffers +-- - shared buffer hits -- There is no test for blocks evicted from shared buffers, because we cannot -- be sure of the state of shared buffers at the point the test is run. -- Create a regular table and insert some data to generate IOCONTEXT_NORMAL @@ -1208,6 +1209,47 @@ SELECT :io_sum_shared_after_reads > :io_sum_shared_before_reads; t (1 row) +SELECT sum(hits) AS io_sum_shared_before_hits + FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset +-- Select from the table again to count hits. +-- Ensure we generate hits by forcing a nested loop self-join with no +-- materialize node. The outer side's buffer will stay pinned, preventing its +-- eviction, while we loop through the inner side and generate hits. +BEGIN; +SET LOCAL enable_nestloop TO on; SET LOCAL enable_mergejoin TO off; +SET LOCAL enable_hashjoin TO off; SET LOCAL enable_material TO off; +-- ensure plan stays as we expect it to +EXPLAIN (COSTS OFF) SELECT COUNT(*) FROM test_io_shared t1 INNER JOIN test_io_shared t2 USING (a); + QUERY PLAN +------------------------------------------- + Aggregate + -> Nested Loop + Join Filter: (t1.a = t2.a) + -> Seq Scan on test_io_shared t1 + -> Seq Scan on test_io_shared t2 +(5 rows) + +SELECT COUNT(*) FROM test_io_shared t1 INNER JOIN test_io_shared t2 USING (a); + count +------- + 100 +(1 row) + +COMMIT; +SELECT pg_stat_force_next_flush(); + pg_stat_force_next_flush +-------------------------- + +(1 row) + +SELECT sum(hits) AS io_sum_shared_after_hits + FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset +SELECT :io_sum_shared_after_hits > :io_sum_shared_before_hits; + ?column? +---------- + t +(1 row) + DROP TABLE test_io_shared; -- Test that the follow IOCONTEXT_LOCAL IOOps are tracked in pg_stat_io: -- - eviction of local buffers in order to reuse them @@ -1342,7 +1384,7 @@ SELECT pg_stat_have_stats('io', 0, 0); t (1 row) -SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) AS io_stats_pre_reset +SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(hits) AS io_stats_pre_reset FROM pg_stat_io \gset SELECT pg_stat_reset_shared('io'); pg_stat_reset_shared @@ -1350,7 +1392,7 @@ SELECT pg_stat_reset_shared('io'); (1 row) -SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) AS io_stats_post_reset +SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(hits) AS io_stats_post_reset FROM pg_stat_io \gset SELECT :io_stats_post_reset < :io_stats_pre_reset; ?column? diff --git a/src/test/regress/sql/stats.sql b/src/test/regress/sql/stats.sql index d958e70a866..58db803ed69 100644 --- a/src/test/regress/sql/stats.sql +++ b/src/test/regress/sql/stats.sql @@ -541,6 +541,7 @@ SELECT pg_stat_get_subscription_stats(NULL); -- - writes of shared buffers to permanent storage -- - extends of relations using shared buffers -- - fsyncs done to ensure the durability of data dirtying shared buffers +-- - shared buffer hits -- There is no test for blocks evicted from shared buffers, because we cannot -- be sure of the state of shared buffers at the point the test is run. @@ -587,6 +588,25 @@ SELECT pg_stat_force_next_flush(); SELECT sum(reads) AS io_sum_shared_after_reads FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset SELECT :io_sum_shared_after_reads > :io_sum_shared_before_reads; + +SELECT sum(hits) AS io_sum_shared_before_hits + FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset +-- Select from the table again to count hits. +-- Ensure we generate hits by forcing a nested loop self-join with no +-- materialize node. The outer side's buffer will stay pinned, preventing its +-- eviction, while we loop through the inner side and generate hits. +BEGIN; +SET LOCAL enable_nestloop TO on; SET LOCAL enable_mergejoin TO off; +SET LOCAL enable_hashjoin TO off; SET LOCAL enable_material TO off; +-- ensure plan stays as we expect it to +EXPLAIN (COSTS OFF) SELECT COUNT(*) FROM test_io_shared t1 INNER JOIN test_io_shared t2 USING (a); +SELECT COUNT(*) FROM test_io_shared t1 INNER JOIN test_io_shared t2 USING (a); +COMMIT; +SELECT pg_stat_force_next_flush(); +SELECT sum(hits) AS io_sum_shared_after_hits + FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset +SELECT :io_sum_shared_after_hits > :io_sum_shared_before_hits; + DROP TABLE test_io_shared; -- Test that the follow IOCONTEXT_LOCAL IOOps are tracked in pg_stat_io: @@ -674,10 +694,10 @@ SELECT :io_sum_bulkwrite_strategy_extends_after > :io_sum_bulkwrite_strategy_ext -- Test IO stats reset SELECT pg_stat_have_stats('io', 0, 0); -SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) AS io_stats_pre_reset +SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(hits) AS io_stats_pre_reset FROM pg_stat_io \gset SELECT pg_stat_reset_shared('io'); -SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) AS io_stats_post_reset +SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(hits) AS io_stats_post_reset FROM pg_stat_io \gset SELECT :io_stats_post_reset < :io_stats_pre_reset;