mirror of
https://github.com/postgres/postgres.git
synced 2025-07-02 09:02:37 +03:00
Introduce the concept of relation forks. An smgr relation can now consist
of multiple forks, and each fork can be created and grown separately. The bulk of this patch is about changing the smgr API to include an extra ForkNumber argument in every smgr function. Also, smgrscheduleunlink and smgrdounlink no longer implicitly call smgrclose, because other forks might still exist after unlinking one. The callers of those functions have been modified to call smgrclose instead. This patch in itself doesn't have any user-visible effect, but provides the infrastructure needed for upcoming patches. The additional forks envisioned are a rewritten FSM implementation that doesn't rely on a fixed-size shared memory block, and a visibility map to allow skipping portions of a table in VACUUM that have no dead tuples.
This commit is contained in:
@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.236 2008/08/05 15:09:04 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.237 2008/08/11 11:05:11 heikki Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -78,9 +78,10 @@ static bool IsForInput;
|
||||
static volatile BufferDesc *PinCountWaitBuf = NULL;
|
||||
|
||||
|
||||
static Buffer ReadBuffer_relcache(Relation reln, BlockNumber blockNum,
|
||||
bool zeroPage, BufferAccessStrategy strategy);
|
||||
static Buffer ReadBuffer_common(SMgrRelation reln, bool isLocalBuf, BlockNumber blockNum,
|
||||
static Buffer ReadBuffer_relcache(Relation reln, ForkNumber forkNum,
|
||||
BlockNumber blockNum, bool zeroPage, BufferAccessStrategy strategy);
|
||||
static Buffer ReadBuffer_common(SMgrRelation reln, bool isLocalBuf,
|
||||
ForkNumber forkNum, BlockNumber blockNum,
|
||||
bool zeroPage, BufferAccessStrategy strategy, bool *hit);
|
||||
static bool PinBuffer(volatile BufferDesc *buf, BufferAccessStrategy strategy);
|
||||
static void PinBuffer_Locked(volatile BufferDesc *buf);
|
||||
@ -92,7 +93,8 @@ static bool StartBufferIO(volatile BufferDesc *buf, bool forInput);
|
||||
static void TerminateBufferIO(volatile BufferDesc *buf, bool clear_dirty,
|
||||
int set_flag_bits);
|
||||
static void buffer_write_error_callback(void *arg);
|
||||
static volatile BufferDesc *BufferAlloc(SMgrRelation smgr, BlockNumber blockNum,
|
||||
static volatile BufferDesc *BufferAlloc(SMgrRelation smgr, ForkNumber forkNum,
|
||||
BlockNumber blockNum,
|
||||
BufferAccessStrategy strategy,
|
||||
bool *foundPtr);
|
||||
static void FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln);
|
||||
@ -117,7 +119,17 @@ static void AtProcExit_Buffers(int code, Datum arg);
|
||||
Buffer
|
||||
ReadBuffer(Relation reln, BlockNumber blockNum)
|
||||
{
|
||||
return ReadBuffer_relcache(reln, blockNum, false, NULL);
|
||||
return ReadBuffer_relcache(reln, MAIN_FORKNUM, blockNum, false, NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* ReadBufferWithFork -- same as ReadBuffer, but for accessing relation
|
||||
* forks other than MAIN_FORKNUM.
|
||||
*/
|
||||
Buffer
|
||||
ReadBufferWithFork(Relation reln, ForkNumber forkNum, BlockNumber blockNum)
|
||||
{
|
||||
return ReadBuffer_relcache(reln, forkNum, blockNum, false, NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -128,7 +140,7 @@ Buffer
|
||||
ReadBufferWithStrategy(Relation reln, BlockNumber blockNum,
|
||||
BufferAccessStrategy strategy)
|
||||
{
|
||||
return ReadBuffer_relcache(reln, blockNum, false, strategy);
|
||||
return ReadBuffer_relcache(reln, MAIN_FORKNUM, blockNum, false, strategy);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -143,32 +155,32 @@ ReadBufferWithStrategy(Relation reln, BlockNumber blockNum,
|
||||
* the page is modified and written out. P_NEW is OK, though.
|
||||
*/
|
||||
Buffer
|
||||
ReadOrZeroBuffer(Relation reln, BlockNumber blockNum)
|
||||
ReadOrZeroBuffer(Relation reln, ForkNumber forkNum, BlockNumber blockNum)
|
||||
{
|
||||
return ReadBuffer_relcache(reln, blockNum, true, NULL);
|
||||
return ReadBuffer_relcache(reln, forkNum, blockNum, true, NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* ReadBufferWithoutRelcache -- like ReadBuffer, but doesn't require a
|
||||
* ReadBufferWithoutRelcache -- like ReadBuffer, but doesn't require a
|
||||
* relcache entry for the relation. If zeroPage is true, this behaves
|
||||
* like ReadOrZeroBuffer rather than ReadBuffer.
|
||||
*/
|
||||
Buffer
|
||||
ReadBufferWithoutRelcache(RelFileNode rnode, bool isTemp,
|
||||
BlockNumber blockNum, bool zeroPage)
|
||||
ReadBufferWithoutRelcache(RelFileNode rnode, bool isTemp,
|
||||
ForkNumber forkNum, BlockNumber blockNum, bool zeroPage)
|
||||
{
|
||||
bool hit;
|
||||
|
||||
SMgrRelation smgr = smgropen(rnode);
|
||||
return ReadBuffer_common(smgr, isTemp, blockNum, zeroPage, NULL, &hit);
|
||||
return ReadBuffer_common(smgr, isTemp, forkNum, blockNum, zeroPage, NULL, &hit);
|
||||
}
|
||||
|
||||
/*
|
||||
* ReadBuffer_relcache -- common logic for ReadBuffer-variants that
|
||||
* ReadBuffer_relcache -- common logic for ReadBuffer-variants that
|
||||
* operate on a Relation.
|
||||
*/
|
||||
static Buffer
|
||||
ReadBuffer_relcache(Relation reln, BlockNumber blockNum,
|
||||
ReadBuffer_relcache(Relation reln, ForkNumber forkNum, BlockNumber blockNum,
|
||||
bool zeroPage, BufferAccessStrategy strategy)
|
||||
{
|
||||
bool hit;
|
||||
@ -182,7 +194,7 @@ ReadBuffer_relcache(Relation reln, BlockNumber blockNum,
|
||||
* hit or miss.
|
||||
*/
|
||||
pgstat_count_buffer_read(reln);
|
||||
buf = ReadBuffer_common(reln->rd_smgr, reln->rd_istemp, blockNum,
|
||||
buf = ReadBuffer_common(reln->rd_smgr, reln->rd_istemp, forkNum, blockNum,
|
||||
zeroPage, strategy, &hit);
|
||||
if (hit)
|
||||
pgstat_count_buffer_hit(reln);
|
||||
@ -195,8 +207,9 @@ ReadBuffer_relcache(Relation reln, BlockNumber blockNum,
|
||||
* *hit is set to true if the request was satisfied from shared buffer cache.
|
||||
*/
|
||||
static Buffer
|
||||
ReadBuffer_common(SMgrRelation smgr, bool isLocalBuf, BlockNumber blockNum,
|
||||
bool zeroPage, BufferAccessStrategy strategy, bool *hit)
|
||||
ReadBuffer_common(SMgrRelation smgr, bool isLocalBuf, ForkNumber forkNum,
|
||||
BlockNumber blockNum, bool zeroPage,
|
||||
BufferAccessStrategy strategy, bool *hit)
|
||||
{
|
||||
volatile BufferDesc *bufHdr;
|
||||
Block bufBlock;
|
||||
@ -212,7 +225,7 @@ ReadBuffer_common(SMgrRelation smgr, bool isLocalBuf, BlockNumber blockNum,
|
||||
|
||||
/* Substitute proper block number if caller asked for P_NEW */
|
||||
if (isExtend)
|
||||
blockNum = smgrnblocks(smgr);
|
||||
blockNum = smgrnblocks(smgr, forkNum);
|
||||
|
||||
TRACE_POSTGRESQL_BUFFER_READ_START(blockNum, smgr->smgr_rnode.spcNode,
|
||||
smgr->smgr_rnode.dbNode, smgr->smgr_rnode.relNode, isLocalBuf);
|
||||
@ -220,7 +233,7 @@ ReadBuffer_common(SMgrRelation smgr, bool isLocalBuf, BlockNumber blockNum,
|
||||
if (isLocalBuf)
|
||||
{
|
||||
ReadLocalBufferCount++;
|
||||
bufHdr = LocalBufferAlloc(smgr, blockNum, &found);
|
||||
bufHdr = LocalBufferAlloc(smgr, forkNum, blockNum, &found);
|
||||
if (found)
|
||||
{
|
||||
LocalBufferHitCount++;
|
||||
@ -239,7 +252,7 @@ ReadBuffer_common(SMgrRelation smgr, bool isLocalBuf, BlockNumber blockNum,
|
||||
* lookup the buffer. IO_IN_PROGRESS is set if the requested block is
|
||||
* not currently in memory.
|
||||
*/
|
||||
bufHdr = BufferAlloc(smgr, blockNum, strategy, &found);
|
||||
bufHdr = BufferAlloc(smgr, forkNum, blockNum, strategy, &found);
|
||||
if (found)
|
||||
{
|
||||
BufferHitCount++;
|
||||
@ -341,7 +354,7 @@ ReadBuffer_common(SMgrRelation smgr, bool isLocalBuf, BlockNumber blockNum,
|
||||
{
|
||||
/* new buffers are zero-filled */
|
||||
MemSet((char *) bufBlock, 0, BLCKSZ);
|
||||
smgrextend(smgr, blockNum, (char *) bufBlock, isLocalBuf);
|
||||
smgrextend(smgr, forkNum, blockNum, (char *) bufBlock, isLocalBuf);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -353,7 +366,7 @@ ReadBuffer_common(SMgrRelation smgr, bool isLocalBuf, BlockNumber blockNum,
|
||||
MemSet((char *) bufBlock, 0, BLCKSZ);
|
||||
else
|
||||
{
|
||||
smgrread(smgr, blockNum, (char *) bufBlock);
|
||||
smgrread(smgr, forkNum, blockNum, (char *) bufBlock);
|
||||
|
||||
/* check for garbage data */
|
||||
if (!PageHeaderIsValid((PageHeader) bufBlock))
|
||||
@ -363,7 +376,7 @@ ReadBuffer_common(SMgrRelation smgr, bool isLocalBuf, BlockNumber blockNum,
|
||||
ereport(WARNING,
|
||||
(errcode(ERRCODE_DATA_CORRUPTED),
|
||||
errmsg("invalid page header in block %u of relation %u/%u/%u; zeroing out page",
|
||||
blockNum,
|
||||
blockNum,
|
||||
smgr->smgr_rnode.spcNode,
|
||||
smgr->smgr_rnode.dbNode,
|
||||
smgr->smgr_rnode.relNode)));
|
||||
@ -421,7 +434,7 @@ ReadBuffer_common(SMgrRelation smgr, bool isLocalBuf, BlockNumber blockNum,
|
||||
* No locks are held either at entry or exit.
|
||||
*/
|
||||
static volatile BufferDesc *
|
||||
BufferAlloc(SMgrRelation smgr,
|
||||
BufferAlloc(SMgrRelation smgr, ForkNumber forkNum,
|
||||
BlockNumber blockNum,
|
||||
BufferAccessStrategy strategy,
|
||||
bool *foundPtr)
|
||||
@ -438,7 +451,7 @@ BufferAlloc(SMgrRelation smgr,
|
||||
bool valid;
|
||||
|
||||
/* create a tag so we can lookup the buffer */
|
||||
INIT_BUFFERTAG(newTag, smgr->smgr_rnode, blockNum);
|
||||
INIT_BUFFERTAG(newTag, smgr->smgr_rnode, forkNum, blockNum);
|
||||
|
||||
/* determine its hash code and partition lock ID */
|
||||
newHash = BufTableHashCode(&newTag);
|
||||
@ -903,6 +916,7 @@ ReleaseAndReadBuffer(Buffer buffer,
|
||||
Relation relation,
|
||||
BlockNumber blockNum)
|
||||
{
|
||||
ForkNumber forkNum = MAIN_FORKNUM;
|
||||
volatile BufferDesc *bufHdr;
|
||||
|
||||
if (BufferIsValid(buffer))
|
||||
@ -912,7 +926,8 @@ ReleaseAndReadBuffer(Buffer buffer,
|
||||
Assert(LocalRefCount[-buffer - 1] > 0);
|
||||
bufHdr = &LocalBufferDescriptors[-buffer - 1];
|
||||
if (bufHdr->tag.blockNum == blockNum &&
|
||||
RelFileNodeEquals(bufHdr->tag.rnode, relation->rd_node))
|
||||
RelFileNodeEquals(bufHdr->tag.rnode, relation->rd_node) &&
|
||||
bufHdr->tag.forkNum == forkNum)
|
||||
return buffer;
|
||||
ResourceOwnerForgetBuffer(CurrentResourceOwner, buffer);
|
||||
LocalRefCount[-buffer - 1]--;
|
||||
@ -923,7 +938,8 @@ ReleaseAndReadBuffer(Buffer buffer,
|
||||
bufHdr = &BufferDescriptors[buffer - 1];
|
||||
/* we have pin, so it's ok to examine tag without spinlock */
|
||||
if (bufHdr->tag.blockNum == blockNum &&
|
||||
RelFileNodeEquals(bufHdr->tag.rnode, relation->rd_node))
|
||||
RelFileNodeEquals(bufHdr->tag.rnode, relation->rd_node) &&
|
||||
bufHdr->tag.forkNum == forkNum)
|
||||
return buffer;
|
||||
UnpinBuffer(bufHdr, true);
|
||||
}
|
||||
@ -1734,23 +1750,28 @@ BufferGetBlockNumber(Buffer buffer)
|
||||
}
|
||||
|
||||
/*
|
||||
* BufferGetFileNode
|
||||
* Returns the relation ID (RelFileNode) associated with a buffer.
|
||||
*
|
||||
* This should make the same checks as BufferGetBlockNumber, but since the
|
||||
* two are generally called together, we don't bother.
|
||||
* BufferGetTag
|
||||
* Returns the relfilenode, fork number and block number associated with
|
||||
* a buffer.
|
||||
*/
|
||||
RelFileNode
|
||||
BufferGetFileNode(Buffer buffer)
|
||||
void
|
||||
BufferGetTag(Buffer buffer, RelFileNode *rnode, ForkNumber *forknum,
|
||||
BlockNumber *blknum)
|
||||
{
|
||||
volatile BufferDesc *bufHdr;
|
||||
|
||||
/* Do the same checks as BufferGetBlockNumber. */
|
||||
Assert(BufferIsPinned(buffer));
|
||||
|
||||
if (BufferIsLocal(buffer))
|
||||
bufHdr = &(LocalBufferDescriptors[-buffer - 1]);
|
||||
else
|
||||
bufHdr = &BufferDescriptors[buffer - 1];
|
||||
|
||||
return bufHdr->tag.rnode;
|
||||
/* pinned, so OK to read tag without spinlock */
|
||||
*rnode = bufHdr->tag.rnode;
|
||||
*forknum = bufHdr->tag.forkNum;
|
||||
*blknum = bufHdr->tag.blockNum;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1820,6 +1841,7 @@ FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln)
|
||||
UnlockBufHdr(buf);
|
||||
|
||||
smgrwrite(reln,
|
||||
buf->tag.forkNum,
|
||||
buf->tag.blockNum,
|
||||
(char *) BufHdrGetBlock(buf),
|
||||
false);
|
||||
@ -1849,7 +1871,7 @@ RelationGetNumberOfBlocks(Relation relation)
|
||||
/* Open it at the smgr level if not already done */
|
||||
RelationOpenSmgr(relation);
|
||||
|
||||
return smgrnblocks(relation->rd_smgr);
|
||||
return smgrnblocks(relation->rd_smgr, MAIN_FORKNUM);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1869,7 +1891,7 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
|
||||
rel->rd_targblock = InvalidBlockNumber;
|
||||
|
||||
/* Do the real work */
|
||||
smgrtruncate(rel->rd_smgr, nblocks, rel->rd_istemp);
|
||||
smgrtruncate(rel->rd_smgr, MAIN_FORKNUM, nblocks, rel->rd_istemp);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------
|
||||
@ -1899,14 +1921,14 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
|
||||
* --------------------------------------------------------------------
|
||||
*/
|
||||
void
|
||||
DropRelFileNodeBuffers(RelFileNode rnode, bool istemp,
|
||||
DropRelFileNodeBuffers(RelFileNode rnode, ForkNumber forkNum, bool istemp,
|
||||
BlockNumber firstDelBlock)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (istemp)
|
||||
{
|
||||
DropRelFileNodeLocalBuffers(rnode, firstDelBlock);
|
||||
DropRelFileNodeLocalBuffers(rnode, forkNum, firstDelBlock);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1916,6 +1938,7 @@ DropRelFileNodeBuffers(RelFileNode rnode, bool istemp,
|
||||
|
||||
LockBufHdr(bufHdr);
|
||||
if (RelFileNodeEquals(bufHdr->tag.rnode, rnode) &&
|
||||
bufHdr->tag.forkNum == forkNum &&
|
||||
bufHdr->tag.blockNum >= firstDelBlock)
|
||||
InvalidateBuffer(bufHdr); /* releases spinlock */
|
||||
else
|
||||
@ -2055,6 +2078,7 @@ FlushRelationBuffers(Relation rel)
|
||||
error_context_stack = &errcontext;
|
||||
|
||||
smgrwrite(rel->rd_smgr,
|
||||
bufHdr->tag.forkNum,
|
||||
bufHdr->tag.blockNum,
|
||||
(char *) LocalBufHdrGetBlock(bufHdr),
|
||||
true);
|
||||
|
Reference in New Issue
Block a user