mirror of
https://github.com/postgres/postgres.git
synced 2025-11-16 15:02:33 +03:00
Introduce the concept of relation forks. An smgr relation can now consist
of multiple forks, and each fork can be created and grown separately. The bulk of this patch is about changing the smgr API to include an extra ForkNumber argument in every smgr function. Also, smgrscheduleunlink and smgrdounlink no longer implicitly call smgrclose, because other forks might still exist after unlinking one. The callers of those functions have been modified to call smgrclose instead. This patch in itself doesn't have any user-visible effect, but provides the infrastructure needed for upcoming patches. The additional forks envisioned are a rewritten FSM implementation that doesn't rely on a fixed-size shared memory block, and a visibility map to allow skipping portions of a table in VACUUM that have no dead tuples.
This commit is contained in:
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.236 2008/08/05 15:09:04 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.237 2008/08/11 11:05:11 heikki Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -78,9 +78,10 @@ static bool IsForInput;
|
||||
static volatile BufferDesc *PinCountWaitBuf = NULL;
|
||||
|
||||
|
||||
static Buffer ReadBuffer_relcache(Relation reln, BlockNumber blockNum,
|
||||
bool zeroPage, BufferAccessStrategy strategy);
|
||||
static Buffer ReadBuffer_common(SMgrRelation reln, bool isLocalBuf, BlockNumber blockNum,
|
||||
static Buffer ReadBuffer_relcache(Relation reln, ForkNumber forkNum,
|
||||
BlockNumber blockNum, bool zeroPage, BufferAccessStrategy strategy);
|
||||
static Buffer ReadBuffer_common(SMgrRelation reln, bool isLocalBuf,
|
||||
ForkNumber forkNum, BlockNumber blockNum,
|
||||
bool zeroPage, BufferAccessStrategy strategy, bool *hit);
|
||||
static bool PinBuffer(volatile BufferDesc *buf, BufferAccessStrategy strategy);
|
||||
static void PinBuffer_Locked(volatile BufferDesc *buf);
|
||||
@@ -92,7 +93,8 @@ static bool StartBufferIO(volatile BufferDesc *buf, bool forInput);
|
||||
static void TerminateBufferIO(volatile BufferDesc *buf, bool clear_dirty,
|
||||
int set_flag_bits);
|
||||
static void buffer_write_error_callback(void *arg);
|
||||
static volatile BufferDesc *BufferAlloc(SMgrRelation smgr, BlockNumber blockNum,
|
||||
static volatile BufferDesc *BufferAlloc(SMgrRelation smgr, ForkNumber forkNum,
|
||||
BlockNumber blockNum,
|
||||
BufferAccessStrategy strategy,
|
||||
bool *foundPtr);
|
||||
static void FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln);
|
||||
@@ -117,7 +119,17 @@ static void AtProcExit_Buffers(int code, Datum arg);
|
||||
Buffer
|
||||
ReadBuffer(Relation reln, BlockNumber blockNum)
|
||||
{
|
||||
return ReadBuffer_relcache(reln, blockNum, false, NULL);
|
||||
return ReadBuffer_relcache(reln, MAIN_FORKNUM, blockNum, false, NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* ReadBufferWithFork -- same as ReadBuffer, but for accessing relation
|
||||
* forks other than MAIN_FORKNUM.
|
||||
*/
|
||||
Buffer
|
||||
ReadBufferWithFork(Relation reln, ForkNumber forkNum, BlockNumber blockNum)
|
||||
{
|
||||
return ReadBuffer_relcache(reln, forkNum, blockNum, false, NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -128,7 +140,7 @@ Buffer
|
||||
ReadBufferWithStrategy(Relation reln, BlockNumber blockNum,
|
||||
BufferAccessStrategy strategy)
|
||||
{
|
||||
return ReadBuffer_relcache(reln, blockNum, false, strategy);
|
||||
return ReadBuffer_relcache(reln, MAIN_FORKNUM, blockNum, false, strategy);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -143,32 +155,32 @@ ReadBufferWithStrategy(Relation reln, BlockNumber blockNum,
|
||||
* the page is modified and written out. P_NEW is OK, though.
|
||||
*/
|
||||
Buffer
|
||||
ReadOrZeroBuffer(Relation reln, BlockNumber blockNum)
|
||||
ReadOrZeroBuffer(Relation reln, ForkNumber forkNum, BlockNumber blockNum)
|
||||
{
|
||||
return ReadBuffer_relcache(reln, blockNum, true, NULL);
|
||||
return ReadBuffer_relcache(reln, forkNum, blockNum, true, NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* ReadBufferWithoutRelcache -- like ReadBuffer, but doesn't require a
|
||||
* ReadBufferWithoutRelcache -- like ReadBuffer, but doesn't require a
|
||||
* relcache entry for the relation. If zeroPage is true, this behaves
|
||||
* like ReadOrZeroBuffer rather than ReadBuffer.
|
||||
*/
|
||||
Buffer
|
||||
ReadBufferWithoutRelcache(RelFileNode rnode, bool isTemp,
|
||||
BlockNumber blockNum, bool zeroPage)
|
||||
ReadBufferWithoutRelcache(RelFileNode rnode, bool isTemp,
|
||||
ForkNumber forkNum, BlockNumber blockNum, bool zeroPage)
|
||||
{
|
||||
bool hit;
|
||||
|
||||
SMgrRelation smgr = smgropen(rnode);
|
||||
return ReadBuffer_common(smgr, isTemp, blockNum, zeroPage, NULL, &hit);
|
||||
return ReadBuffer_common(smgr, isTemp, forkNum, blockNum, zeroPage, NULL, &hit);
|
||||
}
|
||||
|
||||
/*
|
||||
* ReadBuffer_relcache -- common logic for ReadBuffer-variants that
|
||||
* ReadBuffer_relcache -- common logic for ReadBuffer-variants that
|
||||
* operate on a Relation.
|
||||
*/
|
||||
static Buffer
|
||||
ReadBuffer_relcache(Relation reln, BlockNumber blockNum,
|
||||
ReadBuffer_relcache(Relation reln, ForkNumber forkNum, BlockNumber blockNum,
|
||||
bool zeroPage, BufferAccessStrategy strategy)
|
||||
{
|
||||
bool hit;
|
||||
@@ -182,7 +194,7 @@ ReadBuffer_relcache(Relation reln, BlockNumber blockNum,
|
||||
* hit or miss.
|
||||
*/
|
||||
pgstat_count_buffer_read(reln);
|
||||
buf = ReadBuffer_common(reln->rd_smgr, reln->rd_istemp, blockNum,
|
||||
buf = ReadBuffer_common(reln->rd_smgr, reln->rd_istemp, forkNum, blockNum,
|
||||
zeroPage, strategy, &hit);
|
||||
if (hit)
|
||||
pgstat_count_buffer_hit(reln);
|
||||
@@ -195,8 +207,9 @@ ReadBuffer_relcache(Relation reln, BlockNumber blockNum,
|
||||
* *hit is set to true if the request was satisfied from shared buffer cache.
|
||||
*/
|
||||
static Buffer
|
||||
ReadBuffer_common(SMgrRelation smgr, bool isLocalBuf, BlockNumber blockNum,
|
||||
bool zeroPage, BufferAccessStrategy strategy, bool *hit)
|
||||
ReadBuffer_common(SMgrRelation smgr, bool isLocalBuf, ForkNumber forkNum,
|
||||
BlockNumber blockNum, bool zeroPage,
|
||||
BufferAccessStrategy strategy, bool *hit)
|
||||
{
|
||||
volatile BufferDesc *bufHdr;
|
||||
Block bufBlock;
|
||||
@@ -212,7 +225,7 @@ ReadBuffer_common(SMgrRelation smgr, bool isLocalBuf, BlockNumber blockNum,
|
||||
|
||||
/* Substitute proper block number if caller asked for P_NEW */
|
||||
if (isExtend)
|
||||
blockNum = smgrnblocks(smgr);
|
||||
blockNum = smgrnblocks(smgr, forkNum);
|
||||
|
||||
TRACE_POSTGRESQL_BUFFER_READ_START(blockNum, smgr->smgr_rnode.spcNode,
|
||||
smgr->smgr_rnode.dbNode, smgr->smgr_rnode.relNode, isLocalBuf);
|
||||
@@ -220,7 +233,7 @@ ReadBuffer_common(SMgrRelation smgr, bool isLocalBuf, BlockNumber blockNum,
|
||||
if (isLocalBuf)
|
||||
{
|
||||
ReadLocalBufferCount++;
|
||||
bufHdr = LocalBufferAlloc(smgr, blockNum, &found);
|
||||
bufHdr = LocalBufferAlloc(smgr, forkNum, blockNum, &found);
|
||||
if (found)
|
||||
{
|
||||
LocalBufferHitCount++;
|
||||
@@ -239,7 +252,7 @@ ReadBuffer_common(SMgrRelation smgr, bool isLocalBuf, BlockNumber blockNum,
|
||||
* lookup the buffer. IO_IN_PROGRESS is set if the requested block is
|
||||
* not currently in memory.
|
||||
*/
|
||||
bufHdr = BufferAlloc(smgr, blockNum, strategy, &found);
|
||||
bufHdr = BufferAlloc(smgr, forkNum, blockNum, strategy, &found);
|
||||
if (found)
|
||||
{
|
||||
BufferHitCount++;
|
||||
@@ -341,7 +354,7 @@ ReadBuffer_common(SMgrRelation smgr, bool isLocalBuf, BlockNumber blockNum,
|
||||
{
|
||||
/* new buffers are zero-filled */
|
||||
MemSet((char *) bufBlock, 0, BLCKSZ);
|
||||
smgrextend(smgr, blockNum, (char *) bufBlock, isLocalBuf);
|
||||
smgrextend(smgr, forkNum, blockNum, (char *) bufBlock, isLocalBuf);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -353,7 +366,7 @@ ReadBuffer_common(SMgrRelation smgr, bool isLocalBuf, BlockNumber blockNum,
|
||||
MemSet((char *) bufBlock, 0, BLCKSZ);
|
||||
else
|
||||
{
|
||||
smgrread(smgr, blockNum, (char *) bufBlock);
|
||||
smgrread(smgr, forkNum, blockNum, (char *) bufBlock);
|
||||
|
||||
/* check for garbage data */
|
||||
if (!PageHeaderIsValid((PageHeader) bufBlock))
|
||||
@@ -363,7 +376,7 @@ ReadBuffer_common(SMgrRelation smgr, bool isLocalBuf, BlockNumber blockNum,
|
||||
ereport(WARNING,
|
||||
(errcode(ERRCODE_DATA_CORRUPTED),
|
||||
errmsg("invalid page header in block %u of relation %u/%u/%u; zeroing out page",
|
||||
blockNum,
|
||||
blockNum,
|
||||
smgr->smgr_rnode.spcNode,
|
||||
smgr->smgr_rnode.dbNode,
|
||||
smgr->smgr_rnode.relNode)));
|
||||
@@ -421,7 +434,7 @@ ReadBuffer_common(SMgrRelation smgr, bool isLocalBuf, BlockNumber blockNum,
|
||||
* No locks are held either at entry or exit.
|
||||
*/
|
||||
static volatile BufferDesc *
|
||||
BufferAlloc(SMgrRelation smgr,
|
||||
BufferAlloc(SMgrRelation smgr, ForkNumber forkNum,
|
||||
BlockNumber blockNum,
|
||||
BufferAccessStrategy strategy,
|
||||
bool *foundPtr)
|
||||
@@ -438,7 +451,7 @@ BufferAlloc(SMgrRelation smgr,
|
||||
bool valid;
|
||||
|
||||
/* create a tag so we can lookup the buffer */
|
||||
INIT_BUFFERTAG(newTag, smgr->smgr_rnode, blockNum);
|
||||
INIT_BUFFERTAG(newTag, smgr->smgr_rnode, forkNum, blockNum);
|
||||
|
||||
/* determine its hash code and partition lock ID */
|
||||
newHash = BufTableHashCode(&newTag);
|
||||
@@ -903,6 +916,7 @@ ReleaseAndReadBuffer(Buffer buffer,
|
||||
Relation relation,
|
||||
BlockNumber blockNum)
|
||||
{
|
||||
ForkNumber forkNum = MAIN_FORKNUM;
|
||||
volatile BufferDesc *bufHdr;
|
||||
|
||||
if (BufferIsValid(buffer))
|
||||
@@ -912,7 +926,8 @@ ReleaseAndReadBuffer(Buffer buffer,
|
||||
Assert(LocalRefCount[-buffer - 1] > 0);
|
||||
bufHdr = &LocalBufferDescriptors[-buffer - 1];
|
||||
if (bufHdr->tag.blockNum == blockNum &&
|
||||
RelFileNodeEquals(bufHdr->tag.rnode, relation->rd_node))
|
||||
RelFileNodeEquals(bufHdr->tag.rnode, relation->rd_node) &&
|
||||
bufHdr->tag.forkNum == forkNum)
|
||||
return buffer;
|
||||
ResourceOwnerForgetBuffer(CurrentResourceOwner, buffer);
|
||||
LocalRefCount[-buffer - 1]--;
|
||||
@@ -923,7 +938,8 @@ ReleaseAndReadBuffer(Buffer buffer,
|
||||
bufHdr = &BufferDescriptors[buffer - 1];
|
||||
/* we have pin, so it's ok to examine tag without spinlock */
|
||||
if (bufHdr->tag.blockNum == blockNum &&
|
||||
RelFileNodeEquals(bufHdr->tag.rnode, relation->rd_node))
|
||||
RelFileNodeEquals(bufHdr->tag.rnode, relation->rd_node) &&
|
||||
bufHdr->tag.forkNum == forkNum)
|
||||
return buffer;
|
||||
UnpinBuffer(bufHdr, true);
|
||||
}
|
||||
@@ -1734,23 +1750,28 @@ BufferGetBlockNumber(Buffer buffer)
|
||||
}
|
||||
|
||||
/*
|
||||
* BufferGetFileNode
|
||||
* Returns the relation ID (RelFileNode) associated with a buffer.
|
||||
*
|
||||
* This should make the same checks as BufferGetBlockNumber, but since the
|
||||
* two are generally called together, we don't bother.
|
||||
* BufferGetTag
|
||||
* Returns the relfilenode, fork number and block number associated with
|
||||
* a buffer.
|
||||
*/
|
||||
RelFileNode
|
||||
BufferGetFileNode(Buffer buffer)
|
||||
void
|
||||
BufferGetTag(Buffer buffer, RelFileNode *rnode, ForkNumber *forknum,
|
||||
BlockNumber *blknum)
|
||||
{
|
||||
volatile BufferDesc *bufHdr;
|
||||
|
||||
/* Do the same checks as BufferGetBlockNumber. */
|
||||
Assert(BufferIsPinned(buffer));
|
||||
|
||||
if (BufferIsLocal(buffer))
|
||||
bufHdr = &(LocalBufferDescriptors[-buffer - 1]);
|
||||
else
|
||||
bufHdr = &BufferDescriptors[buffer - 1];
|
||||
|
||||
return bufHdr->tag.rnode;
|
||||
/* pinned, so OK to read tag without spinlock */
|
||||
*rnode = bufHdr->tag.rnode;
|
||||
*forknum = bufHdr->tag.forkNum;
|
||||
*blknum = bufHdr->tag.blockNum;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1820,6 +1841,7 @@ FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln)
|
||||
UnlockBufHdr(buf);
|
||||
|
||||
smgrwrite(reln,
|
||||
buf->tag.forkNum,
|
||||
buf->tag.blockNum,
|
||||
(char *) BufHdrGetBlock(buf),
|
||||
false);
|
||||
@@ -1849,7 +1871,7 @@ RelationGetNumberOfBlocks(Relation relation)
|
||||
/* Open it at the smgr level if not already done */
|
||||
RelationOpenSmgr(relation);
|
||||
|
||||
return smgrnblocks(relation->rd_smgr);
|
||||
return smgrnblocks(relation->rd_smgr, MAIN_FORKNUM);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1869,7 +1891,7 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
|
||||
rel->rd_targblock = InvalidBlockNumber;
|
||||
|
||||
/* Do the real work */
|
||||
smgrtruncate(rel->rd_smgr, nblocks, rel->rd_istemp);
|
||||
smgrtruncate(rel->rd_smgr, MAIN_FORKNUM, nblocks, rel->rd_istemp);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------
|
||||
@@ -1899,14 +1921,14 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
|
||||
* --------------------------------------------------------------------
|
||||
*/
|
||||
void
|
||||
DropRelFileNodeBuffers(RelFileNode rnode, bool istemp,
|
||||
DropRelFileNodeBuffers(RelFileNode rnode, ForkNumber forkNum, bool istemp,
|
||||
BlockNumber firstDelBlock)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (istemp)
|
||||
{
|
||||
DropRelFileNodeLocalBuffers(rnode, firstDelBlock);
|
||||
DropRelFileNodeLocalBuffers(rnode, forkNum, firstDelBlock);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -1916,6 +1938,7 @@ DropRelFileNodeBuffers(RelFileNode rnode, bool istemp,
|
||||
|
||||
LockBufHdr(bufHdr);
|
||||
if (RelFileNodeEquals(bufHdr->tag.rnode, rnode) &&
|
||||
bufHdr->tag.forkNum == forkNum &&
|
||||
bufHdr->tag.blockNum >= firstDelBlock)
|
||||
InvalidateBuffer(bufHdr); /* releases spinlock */
|
||||
else
|
||||
@@ -2055,6 +2078,7 @@ FlushRelationBuffers(Relation rel)
|
||||
error_context_stack = &errcontext;
|
||||
|
||||
smgrwrite(rel->rd_smgr,
|
||||
bufHdr->tag.forkNum,
|
||||
bufHdr->tag.blockNum,
|
||||
(char *) LocalBufHdrGetBlock(bufHdr),
|
||||
true);
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/storage/buffer/localbuf.c,v 1.80 2008/06/12 09:12:31 heikki Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/storage/buffer/localbuf.c,v 1.81 2008/08/11 11:05:11 heikki Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -61,7 +61,8 @@ static Block GetLocalBufferStorage(void);
|
||||
* (hence, usage_count is always advanced).
|
||||
*/
|
||||
BufferDesc *
|
||||
LocalBufferAlloc(SMgrRelation smgr, BlockNumber blockNum, bool *foundPtr)
|
||||
LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
|
||||
bool *foundPtr)
|
||||
{
|
||||
BufferTag newTag; /* identity of requested block */
|
||||
LocalBufferLookupEnt *hresult;
|
||||
@@ -70,7 +71,7 @@ LocalBufferAlloc(SMgrRelation smgr, BlockNumber blockNum, bool *foundPtr)
|
||||
int trycounter;
|
||||
bool found;
|
||||
|
||||
INIT_BUFFERTAG(newTag, smgr->smgr_rnode, blockNum);
|
||||
INIT_BUFFERTAG(newTag, smgr->smgr_rnode, forkNum, blockNum);
|
||||
|
||||
/* Initialize local buffers if first request in this session */
|
||||
if (LocalBufHash == NULL)
|
||||
@@ -162,6 +163,7 @@ LocalBufferAlloc(SMgrRelation smgr, BlockNumber blockNum, bool *foundPtr)
|
||||
|
||||
/* And write... */
|
||||
smgrwrite(oreln,
|
||||
bufHdr->tag.forkNum,
|
||||
bufHdr->tag.blockNum,
|
||||
(char *) LocalBufHdrGetBlock(bufHdr),
|
||||
true);
|
||||
@@ -250,7 +252,8 @@ MarkLocalBufferDirty(Buffer buffer)
|
||||
* See DropRelFileNodeBuffers in bufmgr.c for more notes.
|
||||
*/
|
||||
void
|
||||
DropRelFileNodeLocalBuffers(RelFileNode rnode, BlockNumber firstDelBlock)
|
||||
DropRelFileNodeLocalBuffers(RelFileNode rnode, ForkNumber forkNum,
|
||||
BlockNumber firstDelBlock)
|
||||
{
|
||||
int i;
|
||||
|
||||
@@ -261,6 +264,7 @@ DropRelFileNodeLocalBuffers(RelFileNode rnode, BlockNumber firstDelBlock)
|
||||
|
||||
if ((bufHdr->flags & BM_TAG_VALID) &&
|
||||
RelFileNodeEquals(bufHdr->tag.rnode, rnode) &&
|
||||
bufHdr->tag.forkNum == forkNum &&
|
||||
bufHdr->tag.blockNum >= firstDelBlock)
|
||||
{
|
||||
if (LocalRefCount[i] != 0)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
$PostgreSQL: pgsql/src/backend/storage/smgr/README,v 1.5 2008/03/21 13:23:28 momjian Exp $
|
||||
$PostgreSQL: pgsql/src/backend/storage/smgr/README,v 1.6 2008/08/11 11:05:11 heikki Exp $
|
||||
|
||||
Storage Manager
|
||||
===============
|
||||
@@ -32,3 +32,20 @@ The files in this directory, and their contents, are
|
||||
md.c The magnetic disk storage manager.
|
||||
|
||||
Note that md.c in turn relies on src/backend/storage/file/fd.c.
|
||||
|
||||
Relation Forks
|
||||
==============
|
||||
|
||||
Since 8.4, a single smgr relation can be comprised of multiple physical
|
||||
files, called relation forks. This allows storing additional metadata like
|
||||
Free Space information in additional forks, which can be grown and truncated
|
||||
independently of the main data file, while still treating it all as a single
|
||||
physical relation in system catalogs.
|
||||
|
||||
It is assumed that the main fork, fork number 0 or MAIN_FORKNUM, always
|
||||
exists. Fork numbers are assigned in src/include/storage/relfilenode.h.
|
||||
Functions in smgr.c and md.c take an extra fork number argument, in addition
|
||||
to relfilenode and block number, to identify which relation fork you want to
|
||||
access. Since most code wants to access the main fork, a shortcut version of
|
||||
ReadBuffer that accesses MAIN_FORKNUM is provided in the buffer manager for
|
||||
convenience.
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.138 2008/05/02 01:08:27 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.139 2008/08/11 11:05:11 heikki Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -23,6 +23,7 @@
|
||||
#include "postmaster/bgwriter.h"
|
||||
#include "storage/fd.h"
|
||||
#include "storage/bufmgr.h"
|
||||
#include "storage/relfilenode.h"
|
||||
#include "storage/smgr.h"
|
||||
#include "utils/hsearch.h"
|
||||
#include "utils/memutils.h"
|
||||
@@ -118,6 +119,7 @@ static MemoryContext MdCxt; /* context for all md.c allocations */
|
||||
typedef struct
|
||||
{
|
||||
RelFileNode rnode; /* the targeted relation */
|
||||
ForkNumber forknum;
|
||||
BlockNumber segno; /* which segment */
|
||||
} PendingOperationTag;
|
||||
|
||||
@@ -151,15 +153,18 @@ typedef enum /* behavior for mdopen & _mdfd_getseg */
|
||||
} ExtensionBehavior;
|
||||
|
||||
/* local routines */
|
||||
static MdfdVec *mdopen(SMgrRelation reln, ExtensionBehavior behavior);
|
||||
static void register_dirty_segment(SMgrRelation reln, MdfdVec *seg);
|
||||
static MdfdVec *mdopen(SMgrRelation reln, ForkNumber forknum,
|
||||
ExtensionBehavior behavior);
|
||||
static void register_dirty_segment(SMgrRelation reln, ForkNumber forknum,
|
||||
MdfdVec *seg);
|
||||
static void register_unlink(RelFileNode rnode);
|
||||
static MdfdVec *_fdvec_alloc(void);
|
||||
static MdfdVec *_mdfd_openseg(SMgrRelation reln, BlockNumber segno,
|
||||
int oflags);
|
||||
static MdfdVec *_mdfd_getseg(SMgrRelation reln, BlockNumber blkno,
|
||||
bool isTemp, ExtensionBehavior behavior);
|
||||
static BlockNumber _mdnblocks(SMgrRelation reln, MdfdVec *seg);
|
||||
static MdfdVec *_mdfd_openseg(SMgrRelation reln, ForkNumber forkno,
|
||||
BlockNumber segno, int oflags);
|
||||
static MdfdVec *_mdfd_getseg(SMgrRelation reln, ForkNumber forkno,
|
||||
BlockNumber blkno, bool isTemp, ExtensionBehavior behavior);
|
||||
static BlockNumber _mdnblocks(SMgrRelation reln, ForkNumber forknum,
|
||||
MdfdVec *seg);
|
||||
|
||||
|
||||
/*
|
||||
@@ -197,23 +202,40 @@ mdinit(void)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* mdexists() -- Does the physical file exist?
|
||||
*
|
||||
* Note: this will return true for lingering files, with pending deletions
|
||||
*/
|
||||
bool
|
||||
mdexists(SMgrRelation reln, ForkNumber forkNum)
|
||||
{
|
||||
/*
|
||||
* Close it first, to ensure that we notice if the fork has been
|
||||
* unlinked since we opened it.
|
||||
*/
|
||||
mdclose(reln, forkNum);
|
||||
|
||||
return (mdopen(reln, forkNum, EXTENSION_RETURN_NULL) != NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* mdcreate() -- Create a new relation on magnetic disk.
|
||||
*
|
||||
* If isRedo is true, it's okay for the relation to exist already.
|
||||
*/
|
||||
void
|
||||
mdcreate(SMgrRelation reln, bool isRedo)
|
||||
mdcreate(SMgrRelation reln, ForkNumber forkNum, bool isRedo)
|
||||
{
|
||||
char *path;
|
||||
File fd;
|
||||
|
||||
if (isRedo && reln->md_fd != NULL)
|
||||
if (isRedo && reln->md_fd[forkNum] != NULL)
|
||||
return; /* created and opened already... */
|
||||
|
||||
Assert(reln->md_fd == NULL);
|
||||
Assert(reln->md_fd[forkNum] == NULL);
|
||||
|
||||
path = relpath(reln->smgr_rnode);
|
||||
path = relpath(reln->smgr_rnode, forkNum);
|
||||
|
||||
fd = PathNameOpenFile(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, 0600);
|
||||
|
||||
@@ -236,20 +258,21 @@ mdcreate(SMgrRelation reln, bool isRedo)
|
||||
errno = save_errno;
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not create relation %u/%u/%u: %m",
|
||||
errmsg("could not create relation %u/%u/%u/%u: %m",
|
||||
reln->smgr_rnode.spcNode,
|
||||
reln->smgr_rnode.dbNode,
|
||||
reln->smgr_rnode.relNode)));
|
||||
reln->smgr_rnode.relNode,
|
||||
forkNum)));
|
||||
}
|
||||
}
|
||||
|
||||
pfree(path);
|
||||
|
||||
reln->md_fd = _fdvec_alloc();
|
||||
reln->md_fd[forkNum] = _fdvec_alloc();
|
||||
|
||||
reln->md_fd->mdfd_vfd = fd;
|
||||
reln->md_fd->mdfd_segno = 0;
|
||||
reln->md_fd->mdfd_chain = NULL;
|
||||
reln->md_fd[forkNum]->mdfd_vfd = fd;
|
||||
reln->md_fd[forkNum]->mdfd_segno = 0;
|
||||
reln->md_fd[forkNum]->mdfd_chain = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -285,7 +308,7 @@ mdcreate(SMgrRelation reln, bool isRedo)
|
||||
* we are usually not in a transaction anymore when this is called.
|
||||
*/
|
||||
void
|
||||
mdunlink(RelFileNode rnode, bool isRedo)
|
||||
mdunlink(RelFileNode rnode, ForkNumber forkNum, bool isRedo)
|
||||
{
|
||||
char *path;
|
||||
int ret;
|
||||
@@ -294,14 +317,14 @@ mdunlink(RelFileNode rnode, bool isRedo)
|
||||
* We have to clean out any pending fsync requests for the doomed
|
||||
* relation, else the next mdsync() will fail.
|
||||
*/
|
||||
ForgetRelationFsyncRequests(rnode);
|
||||
ForgetRelationFsyncRequests(rnode, forkNum);
|
||||
|
||||
path = relpath(rnode);
|
||||
path = relpath(rnode, forkNum);
|
||||
|
||||
/*
|
||||
* Delete or truncate the first segment.
|
||||
*/
|
||||
if (isRedo)
|
||||
if (isRedo || forkNum != MAIN_FORKNUM)
|
||||
ret = unlink(path);
|
||||
else
|
||||
{
|
||||
@@ -326,10 +349,11 @@ mdunlink(RelFileNode rnode, bool isRedo)
|
||||
if (!isRedo || errno != ENOENT)
|
||||
ereport(WARNING,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not remove relation %u/%u/%u: %m",
|
||||
errmsg("could not remove relation %u/%u/%u/%u: %m",
|
||||
rnode.spcNode,
|
||||
rnode.dbNode,
|
||||
rnode.relNode)));
|
||||
rnode.relNode,
|
||||
forkNum)));
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -353,11 +377,12 @@ mdunlink(RelFileNode rnode, bool isRedo)
|
||||
if (errno != ENOENT)
|
||||
ereport(WARNING,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not remove segment %u of relation %u/%u/%u: %m",
|
||||
errmsg("could not remove segment %u of relation %u/%u/%u/%u: %m",
|
||||
segno,
|
||||
rnode.spcNode,
|
||||
rnode.dbNode,
|
||||
rnode.relNode)));
|
||||
rnode.relNode,
|
||||
forkNum)));
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -367,7 +392,7 @@ mdunlink(RelFileNode rnode, bool isRedo)
|
||||
pfree(path);
|
||||
|
||||
/* Register request to unlink first segment later */
|
||||
if (!isRedo)
|
||||
if (!isRedo && forkNum == MAIN_FORKNUM)
|
||||
register_unlink(rnode);
|
||||
}
|
||||
|
||||
@@ -381,7 +406,8 @@ mdunlink(RelFileNode rnode, bool isRedo)
|
||||
* causes intervening file space to become filled with zeroes.
|
||||
*/
|
||||
void
|
||||
mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
|
||||
mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
char *buffer, bool isTemp)
|
||||
{
|
||||
off_t seekpos;
|
||||
int nbytes;
|
||||
@@ -389,7 +415,7 @@ mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
|
||||
|
||||
/* This assert is too expensive to have on normally ... */
|
||||
#ifdef CHECK_WRITE_VS_EXTEND
|
||||
Assert(blocknum >= mdnblocks(reln));
|
||||
Assert(blocknum >= mdnblocks(reln, forknum));
|
||||
#endif
|
||||
|
||||
/*
|
||||
@@ -400,13 +426,14 @@ mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
|
||||
if (blocknum == InvalidBlockNumber)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
|
||||
errmsg("cannot extend relation %u/%u/%u beyond %u blocks",
|
||||
errmsg("cannot extend relation %u/%u/%u/%u beyond %u blocks",
|
||||
reln->smgr_rnode.spcNode,
|
||||
reln->smgr_rnode.dbNode,
|
||||
reln->smgr_rnode.relNode,
|
||||
forknum,
|
||||
InvalidBlockNumber)));
|
||||
|
||||
v = _mdfd_getseg(reln, blocknum, isTemp, EXTENSION_CREATE);
|
||||
v = _mdfd_getseg(reln, forknum, blocknum, isTemp, EXTENSION_CREATE);
|
||||
|
||||
seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
|
||||
Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
|
||||
@@ -423,37 +450,40 @@ mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
|
||||
if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not seek to block %u of relation %u/%u/%u: %m",
|
||||
errmsg("could not seek to block %u of relation %u/%u/%u/%u: %m",
|
||||
blocknum,
|
||||
reln->smgr_rnode.spcNode,
|
||||
reln->smgr_rnode.dbNode,
|
||||
reln->smgr_rnode.relNode)));
|
||||
reln->smgr_rnode.relNode,
|
||||
forknum)));
|
||||
|
||||
if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ)) != BLCKSZ)
|
||||
{
|
||||
if (nbytes < 0)
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not extend relation %u/%u/%u: %m",
|
||||
errmsg("could not extend relation %u/%u/%u/%u: %m",
|
||||
reln->smgr_rnode.spcNode,
|
||||
reln->smgr_rnode.dbNode,
|
||||
reln->smgr_rnode.relNode),
|
||||
reln->smgr_rnode.relNode,
|
||||
forknum),
|
||||
errhint("Check free disk space.")));
|
||||
/* short write: complain appropriately */
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_DISK_FULL),
|
||||
errmsg("could not extend relation %u/%u/%u: wrote only %d of %d bytes at block %u",
|
||||
errmsg("could not extend relation %u/%u/%u/%u: wrote only %d of %d bytes at block %u",
|
||||
reln->smgr_rnode.spcNode,
|
||||
reln->smgr_rnode.dbNode,
|
||||
reln->smgr_rnode.relNode,
|
||||
forknum,
|
||||
nbytes, BLCKSZ, blocknum),
|
||||
errhint("Check free disk space.")));
|
||||
}
|
||||
|
||||
if (!isTemp)
|
||||
register_dirty_segment(reln, v);
|
||||
register_dirty_segment(reln, forknum, v);
|
||||
|
||||
Assert(_mdnblocks(reln, v) <= ((BlockNumber) RELSEG_SIZE));
|
||||
Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE));
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -467,17 +497,17 @@ mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
|
||||
* invent one out of whole cloth.
|
||||
*/
|
||||
static MdfdVec *
|
||||
mdopen(SMgrRelation reln, ExtensionBehavior behavior)
|
||||
mdopen(SMgrRelation reln, ForkNumber forknum, ExtensionBehavior behavior)
|
||||
{
|
||||
MdfdVec *mdfd;
|
||||
char *path;
|
||||
File fd;
|
||||
|
||||
/* No work if already open */
|
||||
if (reln->md_fd)
|
||||
return reln->md_fd;
|
||||
if (reln->md_fd[forknum])
|
||||
return reln->md_fd[forknum];
|
||||
|
||||
path = relpath(reln->smgr_rnode);
|
||||
path = relpath(reln->smgr_rnode, forknum);
|
||||
|
||||
fd = PathNameOpenFile(path, O_RDWR | PG_BINARY, 0600);
|
||||
|
||||
@@ -499,21 +529,22 @@ mdopen(SMgrRelation reln, ExtensionBehavior behavior)
|
||||
return NULL;
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not open relation %u/%u/%u: %m",
|
||||
errmsg("could not open relation %u/%u/%u/%u: %m",
|
||||
reln->smgr_rnode.spcNode,
|
||||
reln->smgr_rnode.dbNode,
|
||||
reln->smgr_rnode.relNode)));
|
||||
reln->smgr_rnode.relNode,
|
||||
forknum)));
|
||||
}
|
||||
}
|
||||
|
||||
pfree(path);
|
||||
|
||||
reln->md_fd = mdfd = _fdvec_alloc();
|
||||
reln->md_fd[forknum] = mdfd = _fdvec_alloc();
|
||||
|
||||
mdfd->mdfd_vfd = fd;
|
||||
mdfd->mdfd_segno = 0;
|
||||
mdfd->mdfd_chain = NULL;
|
||||
Assert(_mdnblocks(reln, mdfd) <= ((BlockNumber) RELSEG_SIZE));
|
||||
Assert(_mdnblocks(reln, forknum, mdfd) <= ((BlockNumber) RELSEG_SIZE));
|
||||
|
||||
return mdfd;
|
||||
}
|
||||
@@ -522,15 +553,15 @@ mdopen(SMgrRelation reln, ExtensionBehavior behavior)
|
||||
* mdclose() -- Close the specified relation, if it isn't closed already.
|
||||
*/
|
||||
void
|
||||
mdclose(SMgrRelation reln)
|
||||
mdclose(SMgrRelation reln, ForkNumber forknum)
|
||||
{
|
||||
MdfdVec *v = reln->md_fd;
|
||||
MdfdVec *v = reln->md_fd[forknum];
|
||||
|
||||
/* No work if already closed */
|
||||
if (v == NULL)
|
||||
return;
|
||||
|
||||
reln->md_fd = NULL; /* prevent dangling pointer after error */
|
||||
reln->md_fd[forknum] = NULL; /* prevent dangling pointer after error */
|
||||
|
||||
while (v != NULL)
|
||||
{
|
||||
@@ -549,13 +580,14 @@ mdclose(SMgrRelation reln)
|
||||
* mdread() -- Read the specified block from a relation.
|
||||
*/
|
||||
void
|
||||
mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer)
|
||||
mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
char *buffer)
|
||||
{
|
||||
off_t seekpos;
|
||||
int nbytes;
|
||||
MdfdVec *v;
|
||||
|
||||
v = _mdfd_getseg(reln, blocknum, false, EXTENSION_FAIL);
|
||||
v = _mdfd_getseg(reln, forknum, blocknum, false, EXTENSION_FAIL);
|
||||
|
||||
seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
|
||||
Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
|
||||
@@ -563,22 +595,24 @@ mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer)
|
||||
if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not seek to block %u of relation %u/%u/%u: %m",
|
||||
errmsg("could not seek to block %u of relation %u/%u/%u/%u: %m",
|
||||
blocknum,
|
||||
reln->smgr_rnode.spcNode,
|
||||
reln->smgr_rnode.dbNode,
|
||||
reln->smgr_rnode.relNode)));
|
||||
reln->smgr_rnode.relNode,
|
||||
forknum)));
|
||||
|
||||
if ((nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ)) != BLCKSZ)
|
||||
{
|
||||
if (nbytes < 0)
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not read block %u of relation %u/%u/%u: %m",
|
||||
errmsg("could not read block %u of relation %u/%u/%u/%u: %m",
|
||||
blocknum,
|
||||
reln->smgr_rnode.spcNode,
|
||||
reln->smgr_rnode.dbNode,
|
||||
reln->smgr_rnode.relNode)));
|
||||
reln->smgr_rnode.relNode,
|
||||
forknum)));
|
||||
|
||||
/*
|
||||
* Short read: we are at or past EOF, or we read a partial block at
|
||||
@@ -593,11 +627,12 @@ mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer)
|
||||
else
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_DATA_CORRUPTED),
|
||||
errmsg("could not read block %u of relation %u/%u/%u: read only %d of %d bytes",
|
||||
errmsg("could not read block %u of relation %u/%u/%u/%u: read only %d of %d bytes",
|
||||
blocknum,
|
||||
reln->smgr_rnode.spcNode,
|
||||
reln->smgr_rnode.dbNode,
|
||||
reln->smgr_rnode.relNode,
|
||||
forknum,
|
||||
nbytes, BLCKSZ)));
|
||||
}
|
||||
}
|
||||
@@ -610,7 +645,8 @@ mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer)
|
||||
* use mdextend().
|
||||
*/
|
||||
void
|
||||
mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
|
||||
mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
char *buffer, bool isTemp)
|
||||
{
|
||||
off_t seekpos;
|
||||
int nbytes;
|
||||
@@ -618,10 +654,10 @@ mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
|
||||
|
||||
/* This assert is too expensive to have on normally ... */
|
||||
#ifdef CHECK_WRITE_VS_EXTEND
|
||||
Assert(blocknum < mdnblocks(reln));
|
||||
Assert(blocknum < mdnblocks(reln, forknum));
|
||||
#endif
|
||||
|
||||
v = _mdfd_getseg(reln, blocknum, isTemp, EXTENSION_FAIL);
|
||||
v = _mdfd_getseg(reln, forknum, blocknum, isTemp, EXTENSION_FAIL);
|
||||
|
||||
seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
|
||||
Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
|
||||
@@ -629,36 +665,39 @@ mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
|
||||
if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not seek to block %u of relation %u/%u/%u: %m",
|
||||
errmsg("could not seek to block %u of relation %u/%u/%u/%u: %m",
|
||||
blocknum,
|
||||
reln->smgr_rnode.spcNode,
|
||||
reln->smgr_rnode.dbNode,
|
||||
reln->smgr_rnode.relNode)));
|
||||
reln->smgr_rnode.relNode,
|
||||
forknum)));
|
||||
|
||||
if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ)) != BLCKSZ)
|
||||
{
|
||||
if (nbytes < 0)
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not write block %u of relation %u/%u/%u: %m",
|
||||
errmsg("could not write block %u of relation %u/%u/%u/%u: %m",
|
||||
blocknum,
|
||||
reln->smgr_rnode.spcNode,
|
||||
reln->smgr_rnode.dbNode,
|
||||
reln->smgr_rnode.relNode)));
|
||||
reln->smgr_rnode.relNode,
|
||||
forknum)));
|
||||
/* short write: complain appropriately */
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_DISK_FULL),
|
||||
errmsg("could not write block %u of relation %u/%u/%u: wrote only %d of %d bytes",
|
||||
errmsg("could not write block %u of relation %u/%u/%u/%u: wrote only %d of %d bytes",
|
||||
blocknum,
|
||||
reln->smgr_rnode.spcNode,
|
||||
reln->smgr_rnode.dbNode,
|
||||
reln->smgr_rnode.relNode,
|
||||
forknum,
|
||||
nbytes, BLCKSZ),
|
||||
errhint("Check free disk space.")));
|
||||
}
|
||||
|
||||
if (!isTemp)
|
||||
register_dirty_segment(reln, v);
|
||||
register_dirty_segment(reln, forknum, v);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -670,9 +709,9 @@ mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
|
||||
* are present in the chain.
|
||||
*/
|
||||
BlockNumber
|
||||
mdnblocks(SMgrRelation reln)
|
||||
mdnblocks(SMgrRelation reln, ForkNumber forknum)
|
||||
{
|
||||
MdfdVec *v = mdopen(reln, EXTENSION_FAIL);
|
||||
MdfdVec *v = mdopen(reln, forknum, EXTENSION_FAIL);
|
||||
BlockNumber nblocks;
|
||||
BlockNumber segno = 0;
|
||||
|
||||
@@ -696,7 +735,7 @@ mdnblocks(SMgrRelation reln)
|
||||
|
||||
for (;;)
|
||||
{
|
||||
nblocks = _mdnblocks(reln, v);
|
||||
nblocks = _mdnblocks(reln, forknum, v);
|
||||
if (nblocks > ((BlockNumber) RELSEG_SIZE))
|
||||
elog(FATAL, "segment too big");
|
||||
if (nblocks < ((BlockNumber) RELSEG_SIZE))
|
||||
@@ -715,15 +754,16 @@ mdnblocks(SMgrRelation reln)
|
||||
* RELSEG_SIZE. While perhaps not strictly necessary, this keeps
|
||||
* the logic simple.
|
||||
*/
|
||||
v->mdfd_chain = _mdfd_openseg(reln, segno, O_CREAT);
|
||||
v->mdfd_chain = _mdfd_openseg(reln, forknum, segno, O_CREAT);
|
||||
if (v->mdfd_chain == NULL)
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not open segment %u of relation %u/%u/%u: %m",
|
||||
errmsg("could not open segment %u of relation %u/%u/%u/%u: %m",
|
||||
segno,
|
||||
reln->smgr_rnode.spcNode,
|
||||
reln->smgr_rnode.dbNode,
|
||||
reln->smgr_rnode.relNode)));
|
||||
reln->smgr_rnode.relNode,
|
||||
forknum)));
|
||||
}
|
||||
|
||||
v = v->mdfd_chain;
|
||||
@@ -734,7 +774,8 @@ mdnblocks(SMgrRelation reln)
|
||||
* mdtruncate() -- Truncate relation to specified number of blocks.
|
||||
*/
|
||||
void
|
||||
mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
|
||||
mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks,
|
||||
bool isTemp)
|
||||
{
|
||||
MdfdVec *v;
|
||||
BlockNumber curnblk;
|
||||
@@ -744,23 +785,24 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
|
||||
* NOTE: mdnblocks makes sure we have opened all active segments, so that
|
||||
* truncation loop will get them all!
|
||||
*/
|
||||
curnblk = mdnblocks(reln);
|
||||
curnblk = mdnblocks(reln, forknum);
|
||||
if (nblocks > curnblk)
|
||||
{
|
||||
/* Bogus request ... but no complaint if InRecovery */
|
||||
if (InRecovery)
|
||||
return;
|
||||
ereport(ERROR,
|
||||
(errmsg("could not truncate relation %u/%u/%u to %u blocks: it's only %u blocks now",
|
||||
(errmsg("could not truncate relation %u/%u/%u/%u to %u blocks: it's only %u blocks now",
|
||||
reln->smgr_rnode.spcNode,
|
||||
reln->smgr_rnode.dbNode,
|
||||
reln->smgr_rnode.relNode,
|
||||
forknum,
|
||||
nblocks, curnblk)));
|
||||
}
|
||||
if (nblocks == curnblk)
|
||||
return; /* no work */
|
||||
|
||||
v = mdopen(reln, EXTENSION_FAIL);
|
||||
v = mdopen(reln, forknum, EXTENSION_FAIL);
|
||||
|
||||
priorblocks = 0;
|
||||
while (v != NULL)
|
||||
@@ -777,15 +819,16 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
|
||||
if (FileTruncate(v->mdfd_vfd, 0) < 0)
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not truncate relation %u/%u/%u to %u blocks: %m",
|
||||
errmsg("could not truncate relation %u/%u/%u/%u to %u blocks: %m",
|
||||
reln->smgr_rnode.spcNode,
|
||||
reln->smgr_rnode.dbNode,
|
||||
reln->smgr_rnode.relNode,
|
||||
forknum,
|
||||
nblocks)));
|
||||
if (!isTemp)
|
||||
register_dirty_segment(reln, v);
|
||||
register_dirty_segment(reln, forknum, v);
|
||||
v = v->mdfd_chain;
|
||||
Assert(ov != reln->md_fd); /* we never drop the 1st segment */
|
||||
Assert(ov != reln->md_fd[forknum]); /* we never drop the 1st segment */
|
||||
pfree(ov);
|
||||
}
|
||||
else if (priorblocks + ((BlockNumber) RELSEG_SIZE) > nblocks)
|
||||
@@ -803,13 +846,14 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
|
||||
if (FileTruncate(v->mdfd_vfd, (off_t) lastsegblocks * BLCKSZ) < 0)
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not truncate relation %u/%u/%u to %u blocks: %m",
|
||||
errmsg("could not truncate relation %u/%u/%u/%u to %u blocks: %m",
|
||||
reln->smgr_rnode.spcNode,
|
||||
reln->smgr_rnode.dbNode,
|
||||
reln->smgr_rnode.relNode,
|
||||
forknum,
|
||||
nblocks)));
|
||||
if (!isTemp)
|
||||
register_dirty_segment(reln, v);
|
||||
register_dirty_segment(reln, forknum, v);
|
||||
v = v->mdfd_chain;
|
||||
ov->mdfd_chain = NULL;
|
||||
}
|
||||
@@ -832,7 +876,7 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
|
||||
* nothing of dirty buffers that may exist inside the buffer manager.
|
||||
*/
|
||||
void
|
||||
mdimmedsync(SMgrRelation reln)
|
||||
mdimmedsync(SMgrRelation reln, ForkNumber forknum)
|
||||
{
|
||||
MdfdVec *v;
|
||||
BlockNumber curnblk;
|
||||
@@ -841,20 +885,21 @@ mdimmedsync(SMgrRelation reln)
|
||||
* NOTE: mdnblocks makes sure we have opened all active segments, so that
|
||||
* fsync loop will get them all!
|
||||
*/
|
||||
curnblk = mdnblocks(reln);
|
||||
curnblk = mdnblocks(reln, forknum);
|
||||
|
||||
v = mdopen(reln, EXTENSION_FAIL);
|
||||
v = mdopen(reln, forknum, EXTENSION_FAIL);
|
||||
|
||||
while (v != NULL)
|
||||
{
|
||||
if (FileSync(v->mdfd_vfd) < 0)
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not fsync segment %u of relation %u/%u/%u: %m",
|
||||
errmsg("could not fsync segment %u of relation %u/%u/%u/%u: %m",
|
||||
v->mdfd_segno,
|
||||
reln->smgr_rnode.spcNode,
|
||||
reln->smgr_rnode.dbNode,
|
||||
reln->smgr_rnode.relNode)));
|
||||
reln->smgr_rnode.relNode,
|
||||
forknum)));
|
||||
v = v->mdfd_chain;
|
||||
}
|
||||
}
|
||||
@@ -1008,7 +1053,7 @@ mdsync(void)
|
||||
* FileSync, since fd.c might have closed the file behind our
|
||||
* back.
|
||||
*/
|
||||
seg = _mdfd_getseg(reln,
|
||||
seg = _mdfd_getseg(reln, entry->tag.forknum,
|
||||
entry->tag.segno * ((BlockNumber) RELSEG_SIZE),
|
||||
false, EXTENSION_RETURN_NULL);
|
||||
if (seg != NULL &&
|
||||
@@ -1024,19 +1069,21 @@ mdsync(void)
|
||||
failures > 0)
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not fsync segment %u of relation %u/%u/%u: %m",
|
||||
errmsg("could not fsync segment %u of relation %u/%u/%u/%u: %m",
|
||||
entry->tag.segno,
|
||||
entry->tag.rnode.spcNode,
|
||||
entry->tag.rnode.dbNode,
|
||||
entry->tag.rnode.relNode)));
|
||||
entry->tag.rnode.relNode,
|
||||
entry->tag.forknum)));
|
||||
else
|
||||
ereport(DEBUG1,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not fsync segment %u of relation %u/%u/%u, but retrying: %m",
|
||||
errmsg("could not fsync segment %u of relation %u/%u/%u/%u but retrying: %m",
|
||||
entry->tag.segno,
|
||||
entry->tag.rnode.spcNode,
|
||||
entry->tag.rnode.dbNode,
|
||||
entry->tag.rnode.relNode)));
|
||||
entry->tag.rnode.relNode,
|
||||
entry->tag.forknum)));
|
||||
|
||||
/*
|
||||
* Absorb incoming requests and check to see if canceled.
|
||||
@@ -1126,7 +1173,7 @@ mdpostckpt(void)
|
||||
Assert((CycleCtr) (entry->cycle_ctr + 1) == mdckpt_cycle_ctr);
|
||||
|
||||
/* Unlink the file */
|
||||
path = relpath(entry->rnode);
|
||||
path = relpath(entry->rnode, MAIN_FORKNUM);
|
||||
if (unlink(path) < 0)
|
||||
{
|
||||
/*
|
||||
@@ -1139,10 +1186,11 @@ mdpostckpt(void)
|
||||
if (errno != ENOENT)
|
||||
ereport(WARNING,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not remove relation %u/%u/%u: %m",
|
||||
errmsg("could not remove relation %u/%u/%u/%u: %m",
|
||||
entry->rnode.spcNode,
|
||||
entry->rnode.dbNode,
|
||||
entry->rnode.relNode)));
|
||||
entry->rnode.relNode,
|
||||
MAIN_FORKNUM)));
|
||||
}
|
||||
pfree(path);
|
||||
|
||||
@@ -1161,26 +1209,27 @@ mdpostckpt(void)
|
||||
* to be a performance problem).
|
||||
*/
|
||||
static void
|
||||
register_dirty_segment(SMgrRelation reln, MdfdVec *seg)
|
||||
register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
|
||||
{
|
||||
if (pendingOpsTable)
|
||||
{
|
||||
/* push it into local pending-ops table */
|
||||
RememberFsyncRequest(reln->smgr_rnode, seg->mdfd_segno);
|
||||
RememberFsyncRequest(reln->smgr_rnode, forknum, seg->mdfd_segno);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (ForwardFsyncRequest(reln->smgr_rnode, seg->mdfd_segno))
|
||||
if (ForwardFsyncRequest(reln->smgr_rnode, forknum, seg->mdfd_segno))
|
||||
return; /* passed it off successfully */
|
||||
|
||||
if (FileSync(seg->mdfd_vfd) < 0)
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not fsync segment %u of relation %u/%u/%u: %m",
|
||||
errmsg("could not fsync segment %u of relation %u/%u/%u/%u: %m",
|
||||
seg->mdfd_segno,
|
||||
reln->smgr_rnode.spcNode,
|
||||
reln->smgr_rnode.dbNode,
|
||||
reln->smgr_rnode.relNode)));
|
||||
reln->smgr_rnode.relNode,
|
||||
forknum)));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1196,7 +1245,7 @@ register_unlink(RelFileNode rnode)
|
||||
if (pendingOpsTable)
|
||||
{
|
||||
/* push it into local pending-ops table */
|
||||
RememberFsyncRequest(rnode, UNLINK_RELATION_REQUEST);
|
||||
RememberFsyncRequest(rnode, MAIN_FORKNUM, UNLINK_RELATION_REQUEST);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -1208,7 +1257,8 @@ register_unlink(RelFileNode rnode)
|
||||
* XXX should we just leave the file orphaned instead?
|
||||
*/
|
||||
Assert(IsUnderPostmaster);
|
||||
while (!ForwardFsyncRequest(rnode, UNLINK_RELATION_REQUEST))
|
||||
while (!ForwardFsyncRequest(rnode, MAIN_FORKNUM,
|
||||
UNLINK_RELATION_REQUEST))
|
||||
pg_usleep(10000L); /* 10 msec seems a good number */
|
||||
}
|
||||
}
|
||||
@@ -1233,7 +1283,7 @@ register_unlink(RelFileNode rnode)
|
||||
* structure for them.)
|
||||
*/
|
||||
void
|
||||
RememberFsyncRequest(RelFileNode rnode, BlockNumber segno)
|
||||
RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno)
|
||||
{
|
||||
Assert(pendingOpsTable);
|
||||
|
||||
@@ -1246,7 +1296,8 @@ RememberFsyncRequest(RelFileNode rnode, BlockNumber segno)
|
||||
hash_seq_init(&hstat, pendingOpsTable);
|
||||
while ((entry = (PendingOperationEntry *) hash_seq_search(&hstat)) != NULL)
|
||||
{
|
||||
if (RelFileNodeEquals(entry->tag.rnode, rnode))
|
||||
if (RelFileNodeEquals(entry->tag.rnode, rnode) &&
|
||||
entry->tag.forknum == forknum)
|
||||
{
|
||||
/* Okay, cancel this entry */
|
||||
entry->canceled = true;
|
||||
@@ -1313,6 +1364,7 @@ RememberFsyncRequest(RelFileNode rnode, BlockNumber segno)
|
||||
/* ensure any pad bytes in the hash key are zeroed */
|
||||
MemSet(&key, 0, sizeof(key));
|
||||
key.rnode = rnode;
|
||||
key.forknum = forknum;
|
||||
key.segno = segno;
|
||||
|
||||
entry = (PendingOperationEntry *) hash_search(pendingOpsTable,
|
||||
@@ -1346,12 +1398,12 @@ RememberFsyncRequest(RelFileNode rnode, BlockNumber segno)
|
||||
* ForgetRelationFsyncRequests -- forget any fsyncs for a rel
|
||||
*/
|
||||
void
|
||||
ForgetRelationFsyncRequests(RelFileNode rnode)
|
||||
ForgetRelationFsyncRequests(RelFileNode rnode, ForkNumber forknum)
|
||||
{
|
||||
if (pendingOpsTable)
|
||||
{
|
||||
/* standalone backend or startup process: fsync state is local */
|
||||
RememberFsyncRequest(rnode, FORGET_RELATION_FSYNC);
|
||||
RememberFsyncRequest(rnode, forknum, FORGET_RELATION_FSYNC);
|
||||
}
|
||||
else if (IsUnderPostmaster)
|
||||
{
|
||||
@@ -1365,7 +1417,7 @@ ForgetRelationFsyncRequests(RelFileNode rnode)
|
||||
* which would be bad, so I'm inclined to assume that the bgwriter
|
||||
* will always empty the queue soon.
|
||||
*/
|
||||
while (!ForwardFsyncRequest(rnode, FORGET_RELATION_FSYNC))
|
||||
while (!ForwardFsyncRequest(rnode, forknum, FORGET_RELATION_FSYNC))
|
||||
pg_usleep(10000L); /* 10 msec seems a good number */
|
||||
|
||||
/*
|
||||
@@ -1390,12 +1442,13 @@ ForgetDatabaseFsyncRequests(Oid dbid)
|
||||
if (pendingOpsTable)
|
||||
{
|
||||
/* standalone backend or startup process: fsync state is local */
|
||||
RememberFsyncRequest(rnode, FORGET_DATABASE_FSYNC);
|
||||
RememberFsyncRequest(rnode, InvalidForkNumber, FORGET_DATABASE_FSYNC);
|
||||
}
|
||||
else if (IsUnderPostmaster)
|
||||
{
|
||||
/* see notes in ForgetRelationFsyncRequests */
|
||||
while (!ForwardFsyncRequest(rnode, FORGET_DATABASE_FSYNC))
|
||||
while (!ForwardFsyncRequest(rnode, InvalidForkNumber,
|
||||
FORGET_DATABASE_FSYNC))
|
||||
pg_usleep(10000L); /* 10 msec seems a good number */
|
||||
}
|
||||
}
|
||||
@@ -1415,14 +1468,15 @@ _fdvec_alloc(void)
|
||||
* and make a MdfdVec object for it. Returns NULL on failure.
|
||||
*/
|
||||
static MdfdVec *
|
||||
_mdfd_openseg(SMgrRelation reln, BlockNumber segno, int oflags)
|
||||
_mdfd_openseg(SMgrRelation reln, ForkNumber forknum, BlockNumber segno,
|
||||
int oflags)
|
||||
{
|
||||
MdfdVec *v;
|
||||
int fd;
|
||||
char *path,
|
||||
*fullpath;
|
||||
|
||||
path = relpath(reln->smgr_rnode);
|
||||
path = relpath(reln->smgr_rnode, forknum);
|
||||
|
||||
if (segno > 0)
|
||||
{
|
||||
@@ -1449,7 +1503,7 @@ _mdfd_openseg(SMgrRelation reln, BlockNumber segno, int oflags)
|
||||
v->mdfd_vfd = fd;
|
||||
v->mdfd_segno = segno;
|
||||
v->mdfd_chain = NULL;
|
||||
Assert(_mdnblocks(reln, v) <= ((BlockNumber) RELSEG_SIZE));
|
||||
Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE));
|
||||
|
||||
/* all done */
|
||||
return v;
|
||||
@@ -1464,10 +1518,10 @@ _mdfd_openseg(SMgrRelation reln, BlockNumber segno, int oflags)
|
||||
* in the EXTENSION_CREATE case.
|
||||
*/
|
||||
static MdfdVec *
|
||||
_mdfd_getseg(SMgrRelation reln, BlockNumber blkno, bool isTemp,
|
||||
ExtensionBehavior behavior)
|
||||
_mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
|
||||
bool isTemp, ExtensionBehavior behavior)
|
||||
{
|
||||
MdfdVec *v = mdopen(reln, behavior);
|
||||
MdfdVec *v = mdopen(reln, forknum, behavior);
|
||||
BlockNumber targetseg;
|
||||
BlockNumber nextsegno;
|
||||
|
||||
@@ -1497,20 +1551,21 @@ _mdfd_getseg(SMgrRelation reln, BlockNumber blkno, bool isTemp,
|
||||
*/
|
||||
if (behavior == EXTENSION_CREATE || InRecovery)
|
||||
{
|
||||
if (_mdnblocks(reln, v) < RELSEG_SIZE)
|
||||
if (_mdnblocks(reln, forknum, v) < RELSEG_SIZE)
|
||||
{
|
||||
char *zerobuf = palloc0(BLCKSZ);
|
||||
|
||||
mdextend(reln, nextsegno * ((BlockNumber) RELSEG_SIZE) - 1,
|
||||
mdextend(reln, forknum,
|
||||
nextsegno * ((BlockNumber) RELSEG_SIZE) - 1,
|
||||
zerobuf, isTemp);
|
||||
pfree(zerobuf);
|
||||
}
|
||||
v->mdfd_chain = _mdfd_openseg(reln, nextsegno, O_CREAT);
|
||||
v->mdfd_chain = _mdfd_openseg(reln, forknum, +nextsegno, O_CREAT);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* We won't create segment if not existent */
|
||||
v->mdfd_chain = _mdfd_openseg(reln, nextsegno, 0);
|
||||
v->mdfd_chain = _mdfd_openseg(reln, forknum, nextsegno, 0);
|
||||
}
|
||||
if (v->mdfd_chain == NULL)
|
||||
{
|
||||
@@ -1519,11 +1574,12 @@ _mdfd_getseg(SMgrRelation reln, BlockNumber blkno, bool isTemp,
|
||||
return NULL;
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not open segment %u of relation %u/%u/%u (target block %u): %m",
|
||||
errmsg("could not open segment %u of relation %u/%u/%u/%u (target block %u): %m",
|
||||
nextsegno,
|
||||
reln->smgr_rnode.spcNode,
|
||||
reln->smgr_rnode.dbNode,
|
||||
reln->smgr_rnode.relNode,
|
||||
forknum,
|
||||
blkno)));
|
||||
}
|
||||
}
|
||||
@@ -1536,7 +1592,7 @@ _mdfd_getseg(SMgrRelation reln, BlockNumber blkno, bool isTemp,
|
||||
* Get number of blocks present in a single disk file
|
||||
*/
|
||||
static BlockNumber
|
||||
_mdnblocks(SMgrRelation reln, MdfdVec *seg)
|
||||
_mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
|
||||
{
|
||||
off_t len;
|
||||
|
||||
@@ -1544,11 +1600,12 @@ _mdnblocks(SMgrRelation reln, MdfdVec *seg)
|
||||
if (len < 0)
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not seek to end of segment %u of relation %u/%u/%u: %m",
|
||||
errmsg("could not seek to end of segment %u of relation %u/%u/%u/%u: %m",
|
||||
seg->mdfd_segno,
|
||||
reln->smgr_rnode.spcNode,
|
||||
reln->smgr_rnode.dbNode,
|
||||
reln->smgr_rnode.relNode)));
|
||||
reln->smgr_rnode.relNode,
|
||||
forknum)));
|
||||
/* note that this calculation will ignore any partial block at EOF */
|
||||
return (BlockNumber) (len / BLCKSZ);
|
||||
}
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.110 2008/06/12 09:12:31 heikki Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.111 2008/08/11 11:05:11 heikki Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -42,19 +42,22 @@ typedef struct f_smgr
|
||||
{
|
||||
void (*smgr_init) (void); /* may be NULL */
|
||||
void (*smgr_shutdown) (void); /* may be NULL */
|
||||
void (*smgr_close) (SMgrRelation reln);
|
||||
void (*smgr_create) (SMgrRelation reln, bool isRedo);
|
||||
void (*smgr_unlink) (RelFileNode rnode, bool isRedo);
|
||||
void (*smgr_extend) (SMgrRelation reln, BlockNumber blocknum,
|
||||
char *buffer, bool isTemp);
|
||||
void (*smgr_read) (SMgrRelation reln, BlockNumber blocknum,
|
||||
char *buffer);
|
||||
void (*smgr_write) (SMgrRelation reln, BlockNumber blocknum,
|
||||
char *buffer, bool isTemp);
|
||||
BlockNumber (*smgr_nblocks) (SMgrRelation reln);
|
||||
void (*smgr_truncate) (SMgrRelation reln, BlockNumber nblocks,
|
||||
bool isTemp);
|
||||
void (*smgr_immedsync) (SMgrRelation reln);
|
||||
void (*smgr_close) (SMgrRelation reln, ForkNumber forknum);
|
||||
void (*smgr_create) (SMgrRelation reln, ForkNumber forknum,
|
||||
bool isRedo);
|
||||
bool (*smgr_exists) (SMgrRelation reln, ForkNumber forknum);
|
||||
void (*smgr_unlink) (RelFileNode rnode, ForkNumber forknum,
|
||||
bool isRedo);
|
||||
void (*smgr_extend) (SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber blocknum, char *buffer, bool isTemp);
|
||||
void (*smgr_read) (SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber blocknum, char *buffer);
|
||||
void (*smgr_write) (SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber blocknum, char *buffer, bool isTemp);
|
||||
BlockNumber (*smgr_nblocks) (SMgrRelation reln, ForkNumber forknum);
|
||||
void (*smgr_truncate) (SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber nblocks, bool isTemp);
|
||||
void (*smgr_immedsync) (SMgrRelation reln, ForkNumber forknum);
|
||||
void (*smgr_commit) (void); /* may be NULL */
|
||||
void (*smgr_abort) (void); /* may be NULL */
|
||||
void (*smgr_pre_ckpt) (void); /* may be NULL */
|
||||
@@ -65,7 +68,7 @@ typedef struct f_smgr
|
||||
|
||||
static const f_smgr smgrsw[] = {
|
||||
/* magnetic disk */
|
||||
{mdinit, NULL, mdclose, mdcreate, mdunlink, mdextend,
|
||||
{mdinit, NULL, mdclose, mdcreate, mdexists, mdunlink, mdextend,
|
||||
mdread, mdwrite, mdnblocks, mdtruncate, mdimmedsync,
|
||||
NULL, NULL, mdpreckpt, mdsync, mdpostckpt
|
||||
}
|
||||
@@ -102,6 +105,7 @@ static HTAB *SMgrRelationHash = NULL;
|
||||
typedef struct PendingRelDelete
|
||||
{
|
||||
RelFileNode relnode; /* relation that may need to be deleted */
|
||||
ForkNumber forknum; /* fork number that may need to be deleted */
|
||||
int which; /* which storage manager? */
|
||||
bool isTemp; /* is it a temporary relation? */
|
||||
bool atCommit; /* T=delete at commit; F=delete at abort */
|
||||
@@ -126,19 +130,21 @@ static PendingRelDelete *pendingDeletes = NULL; /* head of linked list */
|
||||
typedef struct xl_smgr_create
|
||||
{
|
||||
RelFileNode rnode;
|
||||
ForkNumber forknum;
|
||||
} xl_smgr_create;
|
||||
|
||||
typedef struct xl_smgr_truncate
|
||||
{
|
||||
BlockNumber blkno;
|
||||
RelFileNode rnode;
|
||||
ForkNumber forknum;
|
||||
} xl_smgr_truncate;
|
||||
|
||||
|
||||
/* local function prototypes */
|
||||
static void smgrshutdown(int code, Datum arg);
|
||||
static void smgr_internal_unlink(RelFileNode rnode, int which,
|
||||
bool isTemp, bool isRedo);
|
||||
static void smgr_internal_unlink(RelFileNode rnode, ForkNumber forknum,
|
||||
int which, bool isTemp, bool isRedo);
|
||||
|
||||
|
||||
/*
|
||||
@@ -211,10 +217,15 @@ smgropen(RelFileNode rnode)
|
||||
/* Initialize it if not present before */
|
||||
if (!found)
|
||||
{
|
||||
int forknum;
|
||||
|
||||
/* hash_search already filled in the lookup key */
|
||||
reln->smgr_owner = NULL;
|
||||
reln->smgr_which = 0; /* we only have md.c at present */
|
||||
reln->md_fd = NULL; /* mark it not open */
|
||||
|
||||
/* mark it not open */
|
||||
for(forknum = 0; forknum <= MAX_FORKNUM; forknum++)
|
||||
reln->md_fd[forknum] = NULL;
|
||||
}
|
||||
|
||||
return reln;
|
||||
@@ -243,6 +254,15 @@ smgrsetowner(SMgrRelation *owner, SMgrRelation reln)
|
||||
*owner = reln;
|
||||
}
|
||||
|
||||
/*
|
||||
* smgrexists() -- Does the underlying file for a fork exist?
|
||||
*/
|
||||
bool
|
||||
smgrexists(SMgrRelation reln, ForkNumber forknum)
|
||||
{
|
||||
return (*(smgrsw[reln->smgr_which].smgr_exists)) (reln, forknum);
|
||||
}
|
||||
|
||||
/*
|
||||
* smgrclose() -- Close and delete an SMgrRelation object.
|
||||
*/
|
||||
@@ -250,8 +270,10 @@ void
|
||||
smgrclose(SMgrRelation reln)
|
||||
{
|
||||
SMgrRelation *owner;
|
||||
ForkNumber forknum;
|
||||
|
||||
(*(smgrsw[reln->smgr_which].smgr_close)) (reln);
|
||||
for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
|
||||
(*(smgrsw[reln->smgr_which].smgr_close)) (reln, forknum);
|
||||
|
||||
owner = reln->smgr_owner;
|
||||
|
||||
@@ -315,7 +337,8 @@ smgrclosenode(RelFileNode rnode)
|
||||
* smgrcreate() -- Create a new relation.
|
||||
*
|
||||
* Given an already-created (but presumably unused) SMgrRelation,
|
||||
* cause the underlying disk file or other storage to be created.
|
||||
* cause the underlying disk file or other storage for the fork
|
||||
* to be created.
|
||||
*
|
||||
* If isRedo is true, it is okay for the underlying file to exist
|
||||
* already because we are in a WAL replay sequence. In this case
|
||||
@@ -323,7 +346,7 @@ smgrclosenode(RelFileNode rnode)
|
||||
* tell whether to drop the file.
|
||||
*/
|
||||
void
|
||||
smgrcreate(SMgrRelation reln, bool isTemp, bool isRedo)
|
||||
smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isTemp, bool isRedo)
|
||||
{
|
||||
XLogRecPtr lsn;
|
||||
XLogRecData rdata;
|
||||
@@ -334,7 +357,7 @@ smgrcreate(SMgrRelation reln, bool isTemp, bool isRedo)
|
||||
* Exit quickly in WAL replay mode if we've already opened the file.
|
||||
* If it's open, it surely must exist.
|
||||
*/
|
||||
if (isRedo && reln->md_fd != NULL)
|
||||
if (isRedo && reln->md_fd[forknum] != NULL)
|
||||
return;
|
||||
|
||||
/*
|
||||
@@ -350,7 +373,7 @@ smgrcreate(SMgrRelation reln, bool isTemp, bool isRedo)
|
||||
reln->smgr_rnode.dbNode,
|
||||
isRedo);
|
||||
|
||||
(*(smgrsw[reln->smgr_which].smgr_create)) (reln, isRedo);
|
||||
(*(smgrsw[reln->smgr_which].smgr_create)) (reln, forknum, isRedo);
|
||||
|
||||
if (isRedo)
|
||||
return;
|
||||
@@ -360,6 +383,7 @@ smgrcreate(SMgrRelation reln, bool isTemp, bool isRedo)
|
||||
* will be dropped at abort time.
|
||||
*/
|
||||
xlrec.rnode = reln->smgr_rnode;
|
||||
xlrec.forknum = forknum;
|
||||
|
||||
rdata.data = (char *) &xlrec;
|
||||
rdata.len = sizeof(xlrec);
|
||||
@@ -372,6 +396,7 @@ smgrcreate(SMgrRelation reln, bool isTemp, bool isRedo)
|
||||
pending = (PendingRelDelete *)
|
||||
MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete));
|
||||
pending->relnode = reln->smgr_rnode;
|
||||
pending->forknum = forknum;
|
||||
pending->which = reln->smgr_which;
|
||||
pending->isTemp = isTemp;
|
||||
pending->atCommit = false; /* delete if abort */
|
||||
@@ -383,13 +408,11 @@ smgrcreate(SMgrRelation reln, bool isTemp, bool isRedo)
|
||||
/*
|
||||
* smgrscheduleunlink() -- Schedule unlinking a relation at xact commit.
|
||||
*
|
||||
* The relation is marked to be removed from the store if we
|
||||
* successfully commit the current transaction.
|
||||
*
|
||||
* This also implies smgrclose() on the SMgrRelation object.
|
||||
* The fork is marked to be removed from the store if we successfully
|
||||
* commit the current transaction.
|
||||
*/
|
||||
void
|
||||
smgrscheduleunlink(SMgrRelation reln, bool isTemp)
|
||||
smgrscheduleunlink(SMgrRelation reln, ForkNumber forknum, bool isTemp)
|
||||
{
|
||||
PendingRelDelete *pending;
|
||||
|
||||
@@ -397,6 +420,7 @@ smgrscheduleunlink(SMgrRelation reln, bool isTemp)
|
||||
pending = (PendingRelDelete *)
|
||||
MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete));
|
||||
pending->relnode = reln->smgr_rnode;
|
||||
pending->forknum = forknum;
|
||||
pending->which = reln->smgr_which;
|
||||
pending->isTemp = isTemp;
|
||||
pending->atCommit = true; /* delete if commit */
|
||||
@@ -413,51 +437,49 @@ smgrscheduleunlink(SMgrRelation reln, bool isTemp)
|
||||
* the existing list entry and delete the physical file immediately, but
|
||||
* for now I'll keep the logic simple.
|
||||
*/
|
||||
|
||||
/* Now close the file and throw away the hashtable entry */
|
||||
smgrclose(reln);
|
||||
}
|
||||
|
||||
/*
|
||||
* smgrdounlink() -- Immediately unlink a relation.
|
||||
*
|
||||
* The relation is removed from the store. This should not be used
|
||||
* during transactional operations, since it can't be undone.
|
||||
* The specified fork of the relation is removed from the store. This
|
||||
* should not be used during transactional operations, since it can't be
|
||||
* undone.
|
||||
*
|
||||
* If isRedo is true, it is okay for the underlying file to be gone
|
||||
* already.
|
||||
*
|
||||
* This also implies smgrclose() on the SMgrRelation object.
|
||||
*/
|
||||
void
|
||||
smgrdounlink(SMgrRelation reln, bool isTemp, bool isRedo)
|
||||
smgrdounlink(SMgrRelation reln, ForkNumber forknum, bool isTemp, bool isRedo)
|
||||
{
|
||||
RelFileNode rnode = reln->smgr_rnode;
|
||||
int which = reln->smgr_which;
|
||||
|
||||
/* Close the file and throw away the hashtable entry */
|
||||
smgrclose(reln);
|
||||
/* Close the fork */
|
||||
(*(smgrsw[which].smgr_close)) (reln, forknum);
|
||||
|
||||
smgr_internal_unlink(rnode, which, isTemp, isRedo);
|
||||
smgr_internal_unlink(rnode, forknum, which, isTemp, isRedo);
|
||||
}
|
||||
|
||||
/*
|
||||
* Shared subroutine that actually does the unlink ...
|
||||
*/
|
||||
static void
|
||||
smgr_internal_unlink(RelFileNode rnode, int which, bool isTemp, bool isRedo)
|
||||
smgr_internal_unlink(RelFileNode rnode, ForkNumber forknum,
|
||||
int which, bool isTemp, bool isRedo)
|
||||
{
|
||||
/*
|
||||
* Get rid of any remaining buffers for the relation. bufmgr will just
|
||||
* drop them without bothering to write the contents.
|
||||
*/
|
||||
DropRelFileNodeBuffers(rnode, isTemp, 0);
|
||||
DropRelFileNodeBuffers(rnode, forknum, isTemp, 0);
|
||||
|
||||
/*
|
||||
* Tell the free space map to forget this relation. It won't be accessed
|
||||
* any more anyway, but we may as well recycle the map space quickly.
|
||||
*/
|
||||
FreeSpaceMapForgetRel(&rnode);
|
||||
if (forknum == MAIN_FORKNUM)
|
||||
FreeSpaceMapForgetRel(&rnode);
|
||||
|
||||
/*
|
||||
* It'd be nice to tell the stats collector to forget it immediately, too.
|
||||
@@ -473,7 +495,7 @@ smgr_internal_unlink(RelFileNode rnode, int which, bool isTemp, bool isRedo)
|
||||
* ERROR, because we've already decided to commit or abort the current
|
||||
* xact.
|
||||
*/
|
||||
(*(smgrsw[which].smgr_unlink)) (rnode, isRedo);
|
||||
(*(smgrsw[which].smgr_unlink)) (rnode, forknum, isRedo);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -486,9 +508,11 @@ smgr_internal_unlink(RelFileNode rnode, int which, bool isTemp, bool isRedo)
|
||||
* causes intervening file space to become filled with zeroes.
|
||||
*/
|
||||
void
|
||||
smgrextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
|
||||
smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
char *buffer, bool isTemp)
|
||||
{
|
||||
(*(smgrsw[reln->smgr_which].smgr_extend)) (reln, blocknum, buffer, isTemp);
|
||||
(*(smgrsw[reln->smgr_which].smgr_extend)) (reln, forknum, blocknum,
|
||||
buffer, isTemp);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -500,9 +524,10 @@ smgrextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
|
||||
* return pages in the format that POSTGRES expects.
|
||||
*/
|
||||
void
|
||||
smgrread(SMgrRelation reln, BlockNumber blocknum, char *buffer)
|
||||
smgrread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
char *buffer)
|
||||
{
|
||||
(*(smgrsw[reln->smgr_which].smgr_read)) (reln, blocknum, buffer);
|
||||
(*(smgrsw[reln->smgr_which].smgr_read)) (reln, forknum, blocknum, buffer);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -521,9 +546,11 @@ smgrread(SMgrRelation reln, BlockNumber blocknum, char *buffer)
|
||||
* made to fsync the write before checkpointing.
|
||||
*/
|
||||
void
|
||||
smgrwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
|
||||
smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
char *buffer, bool isTemp)
|
||||
{
|
||||
(*(smgrsw[reln->smgr_which].smgr_write)) (reln, blocknum, buffer, isTemp);
|
||||
(*(smgrsw[reln->smgr_which].smgr_write)) (reln, forknum, blocknum,
|
||||
buffer, isTemp);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -531,9 +558,9 @@ smgrwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
|
||||
* supplied relation.
|
||||
*/
|
||||
BlockNumber
|
||||
smgrnblocks(SMgrRelation reln)
|
||||
smgrnblocks(SMgrRelation reln, ForkNumber forknum)
|
||||
{
|
||||
return (*(smgrsw[reln->smgr_which].smgr_nblocks)) (reln);
|
||||
return (*(smgrsw[reln->smgr_which].smgr_nblocks)) (reln, forknum);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -541,13 +568,14 @@ smgrnblocks(SMgrRelation reln)
|
||||
* of blocks
|
||||
*/
|
||||
void
|
||||
smgrtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
|
||||
smgrtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks,
|
||||
bool isTemp)
|
||||
{
|
||||
/*
|
||||
* Get rid of any buffers for the about-to-be-deleted blocks. bufmgr will
|
||||
* just drop them without bothering to write the contents.
|
||||
*/
|
||||
DropRelFileNodeBuffers(reln->smgr_rnode, isTemp, nblocks);
|
||||
DropRelFileNodeBuffers(reln->smgr_rnode, forknum, isTemp, nblocks);
|
||||
|
||||
/*
|
||||
* Tell the free space map to forget anything it may have stored for the
|
||||
@@ -557,7 +585,8 @@ smgrtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
|
||||
FreeSpaceMapTruncateRel(&reln->smgr_rnode, nblocks);
|
||||
|
||||
/* Do the truncation */
|
||||
(*(smgrsw[reln->smgr_which].smgr_truncate)) (reln, nblocks, isTemp);
|
||||
(*(smgrsw[reln->smgr_which].smgr_truncate)) (reln, forknum, nblocks,
|
||||
isTemp);
|
||||
|
||||
if (!isTemp)
|
||||
{
|
||||
@@ -570,6 +599,7 @@ smgrtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
|
||||
|
||||
xlrec.blkno = nblocks;
|
||||
xlrec.rnode = reln->smgr_rnode;
|
||||
xlrec.forknum = forknum;
|
||||
|
||||
rdata.data = (char *) &xlrec;
|
||||
rdata.len = sizeof(xlrec);
|
||||
@@ -604,9 +634,9 @@ smgrtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
|
||||
* otherwise the sync is not very meaningful.
|
||||
*/
|
||||
void
|
||||
smgrimmedsync(SMgrRelation reln)
|
||||
smgrimmedsync(SMgrRelation reln, ForkNumber forknum)
|
||||
{
|
||||
(*(smgrsw[reln->smgr_which].smgr_immedsync)) (reln);
|
||||
(*(smgrsw[reln->smgr_which].smgr_immedsync)) (reln, forknum);
|
||||
}
|
||||
|
||||
|
||||
@@ -666,6 +696,7 @@ smgrDoPendingDeletes(bool isCommit)
|
||||
/* do deletion if called for */
|
||||
if (pending->atCommit == isCommit)
|
||||
smgr_internal_unlink(pending->relnode,
|
||||
pending->forknum,
|
||||
pending->which,
|
||||
pending->isTemp,
|
||||
false);
|
||||
@@ -680,7 +711,7 @@ smgrDoPendingDeletes(bool isCommit)
|
||||
* smgrGetPendingDeletes() -- Get a list of relations to be deleted.
|
||||
*
|
||||
* The return value is the number of relations scheduled for termination.
|
||||
* *ptr is set to point to a freshly-palloc'd array of RelFileNodes.
|
||||
* *ptr is set to point to a freshly-palloc'd array of RelFileForks.
|
||||
* If there are no relations to be deleted, *ptr is set to NULL.
|
||||
*
|
||||
* If haveNonTemp isn't NULL, the bool it points to gets set to true if
|
||||
@@ -690,11 +721,11 @@ smgrDoPendingDeletes(bool isCommit)
|
||||
* by upper-level transactions.
|
||||
*/
|
||||
int
|
||||
smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr, bool *haveNonTemp)
|
||||
smgrGetPendingDeletes(bool forCommit, RelFileFork **ptr, bool *haveNonTemp)
|
||||
{
|
||||
int nestLevel = GetCurrentTransactionNestLevel();
|
||||
int nrels;
|
||||
RelFileNode *rptr;
|
||||
RelFileFork *rptr;
|
||||
PendingRelDelete *pending;
|
||||
|
||||
nrels = 0;
|
||||
@@ -710,12 +741,16 @@ smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr, bool *haveNonTemp)
|
||||
*ptr = NULL;
|
||||
return 0;
|
||||
}
|
||||
rptr = (RelFileNode *) palloc(nrels * sizeof(RelFileNode));
|
||||
rptr = (RelFileFork *) palloc(nrels * sizeof(RelFileFork));
|
||||
*ptr = rptr;
|
||||
for (pending = pendingDeletes; pending != NULL; pending = pending->next)
|
||||
{
|
||||
if (pending->nestLevel >= nestLevel && pending->atCommit == forCommit)
|
||||
*rptr++ = pending->relnode;
|
||||
{
|
||||
rptr->rnode = pending->relnode;
|
||||
rptr->forknum = pending->forknum;
|
||||
rptr++;
|
||||
}
|
||||
if (haveNonTemp && !pending->isTemp)
|
||||
*haveNonTemp = true;
|
||||
}
|
||||
@@ -843,7 +878,7 @@ smgr_redo(XLogRecPtr lsn, XLogRecord *record)
|
||||
SMgrRelation reln;
|
||||
|
||||
reln = smgropen(xlrec->rnode);
|
||||
smgrcreate(reln, false, true);
|
||||
smgrcreate(reln, xlrec->forknum, false, true);
|
||||
}
|
||||
else if (info == XLOG_SMGR_TRUNCATE)
|
||||
{
|
||||
@@ -858,7 +893,7 @@ smgr_redo(XLogRecPtr lsn, XLogRecord *record)
|
||||
* XLogOpenRelation, we prefer to recreate the rel and replay the log
|
||||
* as best we can until the drop is seen.
|
||||
*/
|
||||
smgrcreate(reln, false, true);
|
||||
smgrcreate(reln, xlrec->forknum, false, true);
|
||||
|
||||
/* Can't use smgrtruncate because it would try to xlog */
|
||||
|
||||
@@ -867,7 +902,8 @@ smgr_redo(XLogRecPtr lsn, XLogRecord *record)
|
||||
* truncated blocks. We must do this, else subsequent XLogReadBuffer
|
||||
* operations will not re-extend the file properly.
|
||||
*/
|
||||
DropRelFileNodeBuffers(xlrec->rnode, false, xlrec->blkno);
|
||||
DropRelFileNodeBuffers(xlrec->rnode, xlrec->forknum, false,
|
||||
xlrec->blkno);
|
||||
|
||||
/*
|
||||
* Tell the free space map to forget anything it may have stored for
|
||||
@@ -878,11 +914,12 @@ smgr_redo(XLogRecPtr lsn, XLogRecord *record)
|
||||
|
||||
/* Do the truncation */
|
||||
(*(smgrsw[reln->smgr_which].smgr_truncate)) (reln,
|
||||
xlrec->forknum,
|
||||
xlrec->blkno,
|
||||
false);
|
||||
|
||||
/* Also tell xlogutils.c about it */
|
||||
XLogTruncateRelation(xlrec->rnode, xlrec->blkno);
|
||||
XLogTruncateRelation(xlrec->rnode, xlrec->forknum, xlrec->blkno);
|
||||
}
|
||||
else
|
||||
elog(PANIC, "smgr_redo: unknown op code %u", info);
|
||||
@@ -897,17 +934,18 @@ smgr_desc(StringInfo buf, uint8 xl_info, char *rec)
|
||||
{
|
||||
xl_smgr_create *xlrec = (xl_smgr_create *) rec;
|
||||
|
||||
appendStringInfo(buf, "file create: %u/%u/%u",
|
||||
appendStringInfo(buf, "file create: %u/%u/%u/%u",
|
||||
xlrec->rnode.spcNode, xlrec->rnode.dbNode,
|
||||
xlrec->rnode.relNode);
|
||||
xlrec->rnode.relNode, xlrec->forknum);
|
||||
}
|
||||
else if (info == XLOG_SMGR_TRUNCATE)
|
||||
{
|
||||
xl_smgr_truncate *xlrec = (xl_smgr_truncate *) rec;
|
||||
|
||||
appendStringInfo(buf, "file truncate: %u/%u/%u to %u blocks",
|
||||
appendStringInfo(buf, "file truncate: %u/%u/%u/%u to %u blocks",
|
||||
xlrec->rnode.spcNode, xlrec->rnode.dbNode,
|
||||
xlrec->rnode.relNode, xlrec->blkno);
|
||||
xlrec->rnode.relNode, xlrec->forknum,
|
||||
xlrec->blkno);
|
||||
}
|
||||
else
|
||||
appendStringInfo(buf, "UNKNOWN");
|
||||
|
||||
Reference in New Issue
Block a user