diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index a7a39dd2a1e..f9980cf80ce 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -480,18 +480,99 @@ static int ckpt_buforder_comparator(const void *pa, const void *pb); static int ts_ckpt_progress_comparator(Datum a, Datum b, void *arg); +/* + * Implementation of PrefetchBuffer() for shared buffers. + */ +PrefetchBufferResult +PrefetchSharedBuffer(SMgrRelation smgr_reln, + ForkNumber forkNum, + BlockNumber blockNum) +{ + PrefetchBufferResult result = {InvalidBuffer, false}; + BufferTag newTag; /* identity of requested block */ + uint32 newHash; /* hash value for newTag */ + LWLock *newPartitionLock; /* buffer partition lock for it */ + int buf_id; + + Assert(BlockNumberIsValid(blockNum)); + + /* create a tag so we can lookup the buffer */ + INIT_BUFFERTAG(newTag, smgr_reln->smgr_rnode.node, + forkNum, blockNum); + + /* determine its hash code and partition lock ID */ + newHash = BufTableHashCode(&newTag); + newPartitionLock = BufMappingPartitionLock(newHash); + + /* see if the block is in the buffer pool already */ + LWLockAcquire(newPartitionLock, LW_SHARED); + buf_id = BufTableLookup(&newTag, newHash); + LWLockRelease(newPartitionLock); + + /* If not in buffers, initiate prefetch */ + if (buf_id < 0) + { +#ifdef USE_PREFETCH + /* + * Try to initiate an asynchronous read. This returns false in + * recovery if the relation file doesn't exist. + */ + if (smgrprefetch(smgr_reln, forkNum, blockNum)) + result.initiated_io = true; +#endif /* USE_PREFETCH */ + } + else + { + /* + * Report the buffer it was in at that time. The caller may be able + * to avoid a buffer table lookup, but it's not pinned and it must be + * rechecked! + */ + result.recent_buffer = buf_id + 1; + } + + /* + * If the block *is* in buffers, we do nothing. This is not really ideal: + * the block might be just about to be evicted, which would be stupid + * since we know we are going to need it soon. But the only easy answer + * is to bump the usage_count, which does not seem like a great solution: + * when the caller does ultimately touch the block, usage_count would get + * bumped again, resulting in too much favoritism for blocks that are + * involved in a prefetch sequence. A real fix would involve some + * additional per-buffer state, and it's not clear that there's enough of + * a problem to justify that. + */ + + return result; +} + /* * PrefetchBuffer -- initiate asynchronous read of a block of a relation * * This is named by analogy to ReadBuffer but doesn't actually allocate a * buffer. Instead it tries to ensure that a future ReadBuffer for the given * block will not be delayed by the I/O. Prefetching is optional. - * No-op if prefetching isn't compiled in. + * + * There are three possible outcomes: + * + * 1. If the block is already cached, the result includes a valid buffer that + * could be used by the caller to avoid the need for a later buffer lookup, but + * it's not pinned, so the caller must recheck it. + * + * 2. If the kernel has been asked to initiate I/O, the initated_io member is + * true. Currently there is no way to know if the data was already cached by + * the kernel and therefore didn't really initiate I/O, and no way to know when + * the I/O completes other than using synchronous ReadBuffer(). + * + * 3. Otherwise, the buffer wasn't already cached by PostgreSQL, and either + * USE_PREFETCH is not defined (this build doesn't support prefetching due to + * lack of a kernel facility), or the underlying relation file wasn't found and + * we are in recovery. (If the relation file wasn't found and we are not in + * recovery, an error is raised). */ -void +PrefetchBufferResult PrefetchBuffer(Relation reln, ForkNumber forkNum, BlockNumber blockNum) { -#ifdef USE_PREFETCH Assert(RelationIsValid(reln)); Assert(BlockNumberIsValid(blockNum)); @@ -507,45 +588,13 @@ PrefetchBuffer(Relation reln, ForkNumber forkNum, BlockNumber blockNum) errmsg("cannot access temporary tables of other sessions"))); /* pass it off to localbuf.c */ - LocalPrefetchBuffer(reln->rd_smgr, forkNum, blockNum); + return PrefetchLocalBuffer(reln->rd_smgr, forkNum, blockNum); } else { - BufferTag newTag; /* identity of requested block */ - uint32 newHash; /* hash value for newTag */ - LWLock *newPartitionLock; /* buffer partition lock for it */ - int buf_id; - - /* create a tag so we can lookup the buffer */ - INIT_BUFFERTAG(newTag, reln->rd_smgr->smgr_rnode.node, - forkNum, blockNum); - - /* determine its hash code and partition lock ID */ - newHash = BufTableHashCode(&newTag); - newPartitionLock = BufMappingPartitionLock(newHash); - - /* see if the block is in the buffer pool already */ - LWLockAcquire(newPartitionLock, LW_SHARED); - buf_id = BufTableLookup(&newTag, newHash); - LWLockRelease(newPartitionLock); - - /* If not in buffers, initiate prefetch */ - if (buf_id < 0) - smgrprefetch(reln->rd_smgr, forkNum, blockNum); - - /* - * If the block *is* in buffers, we do nothing. This is not really - * ideal: the block might be just about to be evicted, which would be - * stupid since we know we are going to need it soon. But the only - * easy answer is to bump the usage_count, which does not seem like a - * great solution: when the caller does ultimately touch the block, - * usage_count would get bumped again, resulting in too much - * favoritism for blocks that are involved in a prefetch sequence. A - * real fix would involve some additional per-buffer state, and it's - * not clear that there's enough of a problem to justify that. - */ + /* pass it to the shared buffer version */ + return PrefetchSharedBuffer(reln->rd_smgr, forkNum, blockNum); } -#endif /* USE_PREFETCH */ } diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c index cac08e1b1ac..6ffd7b33062 100644 --- a/src/backend/storage/buffer/localbuf.c +++ b/src/backend/storage/buffer/localbuf.c @@ -54,17 +54,17 @@ static Block GetLocalBufferStorage(void); /* - * LocalPrefetchBuffer - + * PrefetchLocalBuffer - * initiate asynchronous read of a block of a relation * * Do PrefetchBuffer's work for temporary relations. * No-op if prefetching isn't compiled in. */ -void -LocalPrefetchBuffer(SMgrRelation smgr, ForkNumber forkNum, +PrefetchBufferResult +PrefetchLocalBuffer(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum) { -#ifdef USE_PREFETCH + PrefetchBufferResult result = {InvalidBuffer, false}; BufferTag newTag; /* identity of requested block */ LocalBufferLookupEnt *hresult; @@ -81,12 +81,18 @@ LocalPrefetchBuffer(SMgrRelation smgr, ForkNumber forkNum, if (hresult) { /* Yes, so nothing to do */ - return; + result.recent_buffer = -hresult->id - 1; + } + else + { +#ifdef USE_PREFETCH + /* Not in buffers, so initiate prefetch */ + smgrprefetch(smgr, forkNum, blockNum); + result.initiated_io = true; +#endif /* USE_PREFETCH */ } - /* Not in buffers, so initiate prefetch */ - smgrprefetch(smgr, forkNum, blockNum); -#endif /* USE_PREFETCH */ + return result; } diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c index ee9822c6e10..e0b020da111 100644 --- a/src/backend/storage/smgr/md.c +++ b/src/backend/storage/smgr/md.c @@ -524,14 +524,17 @@ mdclose(SMgrRelation reln, ForkNumber forknum) /* * mdprefetch() -- Initiate asynchronous read of the specified block of a relation */ -void +bool mdprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum) { #ifdef USE_PREFETCH off_t seekpos; MdfdVec *v; - v = _mdfd_getseg(reln, forknum, blocknum, false, EXTENSION_FAIL); + v = _mdfd_getseg(reln, forknum, blocknum, false, + InRecovery ? EXTENSION_RETURN_NULL : EXTENSION_FAIL); + if (v == NULL) + return false; seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)); @@ -539,6 +542,8 @@ mdprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum) (void) FilePrefetch(v->mdfd_vfd, seekpos, BLCKSZ, WAIT_EVENT_DATA_FILE_PREFETCH); #endif /* USE_PREFETCH */ + + return true; } /* diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c index 72c9696ad19..b053a4dc761 100644 --- a/src/backend/storage/smgr/smgr.c +++ b/src/backend/storage/smgr/smgr.c @@ -49,7 +49,7 @@ typedef struct f_smgr bool isRedo); void (*smgr_extend) (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync); - void (*smgr_prefetch) (SMgrRelation reln, ForkNumber forknum, + bool (*smgr_prefetch) (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum); void (*smgr_read) (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer); @@ -524,11 +524,15 @@ smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, /* * smgrprefetch() -- Initiate asynchronous read of the specified block of a relation. + * + * In recovery only, this can return false to indicate that a file + * doesn't exist (presumably it has been dropped by a later WAL + * record). */ -void +bool smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum) { - smgrsw[reln->smgr_which].smgr_prefetch(reln, forknum, blocknum); + return smgrsw[reln->smgr_which].smgr_prefetch(reln, forknum, blocknum); } /* diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h index bf3b8ad340e..e57f84ee9c8 100644 --- a/src/include/storage/buf_internals.h +++ b/src/include/storage/buf_internals.h @@ -327,8 +327,9 @@ extern int BufTableInsert(BufferTag *tagPtr, uint32 hashcode, int buf_id); extern void BufTableDelete(BufferTag *tagPtr, uint32 hashcode); /* localbuf.c */ -extern void LocalPrefetchBuffer(SMgrRelation smgr, ForkNumber forkNum, - BlockNumber blockNum); +extern PrefetchBufferResult PrefetchLocalBuffer(SMgrRelation smgr, + ForkNumber forkNum, + BlockNumber blockNum); extern BufferDesc *LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, bool *foundPtr); extern void MarkLocalBufferDirty(Buffer buffer); diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index bf3b12a2de3..ee91b8fa26c 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -46,6 +46,15 @@ typedef enum * replay; otherwise same as RBM_NORMAL */ } ReadBufferMode; +/* + * Type returned by PrefetchBuffer(). + */ +typedef struct PrefetchBufferResult +{ + Buffer recent_buffer; /* If valid, a hit (recheck needed!) */ + bool initiated_io; /* If true, a miss resulting in async I/O */ +} PrefetchBufferResult; + /* forward declared, to avoid having to expose buf_internals.h here */ struct WritebackContext; @@ -162,8 +171,11 @@ extern PGDLLIMPORT int32 *LocalRefCount; /* * prototypes for functions in bufmgr.c */ -extern void PrefetchBuffer(Relation reln, ForkNumber forkNum, - BlockNumber blockNum); +extern PrefetchBufferResult PrefetchSharedBuffer(struct SMgrRelationData *smgr_reln, + ForkNumber forkNum, + BlockNumber blockNum); +extern PrefetchBufferResult PrefetchBuffer(Relation reln, ForkNumber forkNum, + BlockNumber blockNum); extern Buffer ReadBuffer(Relation reln, BlockNumber blockNum); extern Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, diff --git a/src/include/storage/md.h b/src/include/storage/md.h index ec7630ce3b7..07fd1bb7d06 100644 --- a/src/include/storage/md.h +++ b/src/include/storage/md.h @@ -28,7 +28,7 @@ extern bool mdexists(SMgrRelation reln, ForkNumber forknum); extern void mdunlink(RelFileNodeBackend rnode, ForkNumber forknum, bool isRedo); extern void mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync); -extern void mdprefetch(SMgrRelation reln, ForkNumber forknum, +extern bool mdprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum); extern void mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer); diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h index 79dfe0e373a..bb8428f27f6 100644 --- a/src/include/storage/smgr.h +++ b/src/include/storage/smgr.h @@ -93,7 +93,7 @@ extern void smgrdosyncall(SMgrRelation *rels, int nrels); extern void smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo); extern void smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync); -extern void smgrprefetch(SMgrRelation reln, ForkNumber forknum, +extern bool smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum); extern void smgrread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer);