mirror of
https://github.com/postgres/postgres.git
synced 2025-10-27 00:12:01 +03:00
Include the backend ID in the relpath of temporary relations.
This allows us to reliably remove all leftover temporary relation files on cluster startup without reference to system catalogs or WAL; therefore, we no longer include temporary relations in XLOG_XACT_COMMIT and XLOG_XACT_ABORT WAL records. Since these changes require including a backend ID in each SharedInvalSmgrMsg, the size of the SharedInvalidationMessage.id field has been reduced from two bytes to one, and the maximum number of connections has been reduced from INT_MAX / 4 to 2^23-1. It would be possible to remove these restrictions by increasing the size of SharedInvalidationMessage by 4 bytes, but right now that doesn't seem like a good trade-off. Review by Jaime Casanova and Tom Lane.
This commit is contained in:
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.151 2010/02/26 02:01:01 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.152 2010/08/13 20:10:52 rhaas Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -119,7 +119,7 @@ static MemoryContext MdCxt; /* context for all md.c allocations */
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
RelFileNode rnode; /* the targeted relation */
|
||||
RelFileNodeBackend rnode; /* the targeted relation */
|
||||
ForkNumber forknum;
|
||||
BlockNumber segno; /* which segment */
|
||||
} PendingOperationTag;
|
||||
@@ -135,7 +135,7 @@ typedef struct
|
||||
|
||||
typedef struct
|
||||
{
|
||||
RelFileNode rnode; /* the dead relation to delete */
|
||||
RelFileNodeBackend rnode; /* the dead relation to delete */
|
||||
CycleCtr cycle_ctr; /* mdckpt_cycle_ctr when request was made */
|
||||
} PendingUnlinkEntry;
|
||||
|
||||
@@ -158,14 +158,14 @@ static MdfdVec *mdopen(SMgrRelation reln, ForkNumber forknum,
|
||||
ExtensionBehavior behavior);
|
||||
static void register_dirty_segment(SMgrRelation reln, ForkNumber forknum,
|
||||
MdfdVec *seg);
|
||||
static void register_unlink(RelFileNode rnode);
|
||||
static void register_unlink(RelFileNodeBackend rnode);
|
||||
static MdfdVec *_fdvec_alloc(void);
|
||||
static char *_mdfd_segpath(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber segno);
|
||||
static MdfdVec *_mdfd_openseg(SMgrRelation reln, ForkNumber forkno,
|
||||
BlockNumber segno, int oflags);
|
||||
static MdfdVec *_mdfd_getseg(SMgrRelation reln, ForkNumber forkno,
|
||||
BlockNumber blkno, bool isTemp, ExtensionBehavior behavior);
|
||||
BlockNumber blkno, bool skipFsync, ExtensionBehavior behavior);
|
||||
static BlockNumber _mdnblocks(SMgrRelation reln, ForkNumber forknum,
|
||||
MdfdVec *seg);
|
||||
|
||||
@@ -321,7 +321,7 @@ mdcreate(SMgrRelation reln, ForkNumber forkNum, bool isRedo)
|
||||
* we are usually not in a transaction anymore when this is called.
|
||||
*/
|
||||
void
|
||||
mdunlink(RelFileNode rnode, ForkNumber forkNum, bool isRedo)
|
||||
mdunlink(RelFileNodeBackend rnode, ForkNumber forkNum, bool isRedo)
|
||||
{
|
||||
char *path;
|
||||
int ret;
|
||||
@@ -417,7 +417,7 @@ mdunlink(RelFileNode rnode, ForkNumber forkNum, bool isRedo)
|
||||
*/
|
||||
void
|
||||
mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
char *buffer, bool isTemp)
|
||||
char *buffer, bool skipFsync)
|
||||
{
|
||||
off_t seekpos;
|
||||
int nbytes;
|
||||
@@ -440,7 +440,7 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
relpath(reln->smgr_rnode, forknum),
|
||||
InvalidBlockNumber)));
|
||||
|
||||
v = _mdfd_getseg(reln, forknum, blocknum, isTemp, EXTENSION_CREATE);
|
||||
v = _mdfd_getseg(reln, forknum, blocknum, skipFsync, EXTENSION_CREATE);
|
||||
|
||||
seekpos = (off_t) BLCKSZ *(blocknum % ((BlockNumber) RELSEG_SIZE));
|
||||
|
||||
@@ -478,7 +478,7 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
errhint("Check free disk space.")));
|
||||
}
|
||||
|
||||
if (!isTemp)
|
||||
if (!skipFsync && !SmgrIsTemp(reln))
|
||||
register_dirty_segment(reln, forknum, v);
|
||||
|
||||
Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE));
|
||||
@@ -605,9 +605,10 @@ mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
MdfdVec *v;
|
||||
|
||||
TRACE_POSTGRESQL_SMGR_MD_READ_START(forknum, blocknum,
|
||||
reln->smgr_rnode.spcNode,
|
||||
reln->smgr_rnode.dbNode,
|
||||
reln->smgr_rnode.relNode);
|
||||
reln->smgr_rnode.node.spcNode,
|
||||
reln->smgr_rnode.node.dbNode,
|
||||
reln->smgr_rnode.node.relNode,
|
||||
reln->smgr_rnode.backend);
|
||||
|
||||
v = _mdfd_getseg(reln, forknum, blocknum, false, EXTENSION_FAIL);
|
||||
|
||||
@@ -624,9 +625,10 @@ mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ);
|
||||
|
||||
TRACE_POSTGRESQL_SMGR_MD_READ_DONE(forknum, blocknum,
|
||||
reln->smgr_rnode.spcNode,
|
||||
reln->smgr_rnode.dbNode,
|
||||
reln->smgr_rnode.relNode,
|
||||
reln->smgr_rnode.node.spcNode,
|
||||
reln->smgr_rnode.node.dbNode,
|
||||
reln->smgr_rnode.node.relNode,
|
||||
reln->smgr_rnode.backend,
|
||||
nbytes,
|
||||
BLCKSZ);
|
||||
|
||||
@@ -666,7 +668,7 @@ mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
*/
|
||||
void
|
||||
mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
char *buffer, bool isTemp)
|
||||
char *buffer, bool skipFsync)
|
||||
{
|
||||
off_t seekpos;
|
||||
int nbytes;
|
||||
@@ -678,11 +680,12 @@ mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
#endif
|
||||
|
||||
TRACE_POSTGRESQL_SMGR_MD_WRITE_START(forknum, blocknum,
|
||||
reln->smgr_rnode.spcNode,
|
||||
reln->smgr_rnode.dbNode,
|
||||
reln->smgr_rnode.relNode);
|
||||
reln->smgr_rnode.node.spcNode,
|
||||
reln->smgr_rnode.node.dbNode,
|
||||
reln->smgr_rnode.node.relNode,
|
||||
reln->smgr_rnode.backend);
|
||||
|
||||
v = _mdfd_getseg(reln, forknum, blocknum, isTemp, EXTENSION_FAIL);
|
||||
v = _mdfd_getseg(reln, forknum, blocknum, skipFsync, EXTENSION_FAIL);
|
||||
|
||||
seekpos = (off_t) BLCKSZ *(blocknum % ((BlockNumber) RELSEG_SIZE));
|
||||
|
||||
@@ -697,9 +700,10 @@ mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ);
|
||||
|
||||
TRACE_POSTGRESQL_SMGR_MD_WRITE_DONE(forknum, blocknum,
|
||||
reln->smgr_rnode.spcNode,
|
||||
reln->smgr_rnode.dbNode,
|
||||
reln->smgr_rnode.relNode,
|
||||
reln->smgr_rnode.node.spcNode,
|
||||
reln->smgr_rnode.node.dbNode,
|
||||
reln->smgr_rnode.node.relNode,
|
||||
reln->smgr_rnode.backend,
|
||||
nbytes,
|
||||
BLCKSZ);
|
||||
|
||||
@@ -720,7 +724,7 @@ mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
errhint("Check free disk space.")));
|
||||
}
|
||||
|
||||
if (!isTemp)
|
||||
if (!skipFsync && !SmgrIsTemp(reln))
|
||||
register_dirty_segment(reln, forknum, v);
|
||||
}
|
||||
|
||||
@@ -794,8 +798,7 @@ mdnblocks(SMgrRelation reln, ForkNumber forknum)
|
||||
* mdtruncate() -- Truncate relation to specified number of blocks.
|
||||
*/
|
||||
void
|
||||
mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks,
|
||||
bool isTemp)
|
||||
mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
|
||||
{
|
||||
MdfdVec *v;
|
||||
BlockNumber curnblk;
|
||||
@@ -839,7 +842,7 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks,
|
||||
errmsg("could not truncate file \"%s\": %m",
|
||||
FilePathName(v->mdfd_vfd))));
|
||||
|
||||
if (!isTemp)
|
||||
if (!SmgrIsTemp(reln))
|
||||
register_dirty_segment(reln, forknum, v);
|
||||
v = v->mdfd_chain;
|
||||
Assert(ov != reln->md_fd[forknum]); /* we never drop the 1st
|
||||
@@ -864,7 +867,7 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks,
|
||||
errmsg("could not truncate file \"%s\" to %u blocks: %m",
|
||||
FilePathName(v->mdfd_vfd),
|
||||
nblocks)));
|
||||
if (!isTemp)
|
||||
if (!SmgrIsTemp(reln))
|
||||
register_dirty_segment(reln, forknum, v);
|
||||
v = v->mdfd_chain;
|
||||
ov->mdfd_chain = NULL;
|
||||
@@ -1052,7 +1055,8 @@ mdsync(void)
|
||||
* the relation will have been dirtied through this same smgr
|
||||
* relation, and so we can save a file open/close cycle.
|
||||
*/
|
||||
reln = smgropen(entry->tag.rnode);
|
||||
reln = smgropen(entry->tag.rnode.node,
|
||||
entry->tag.rnode.backend);
|
||||
|
||||
/*
|
||||
* It is possible that the relation has been dropped or
|
||||
@@ -1235,7 +1239,7 @@ register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
|
||||
* a remote pending-ops table.
|
||||
*/
|
||||
static void
|
||||
register_unlink(RelFileNode rnode)
|
||||
register_unlink(RelFileNodeBackend rnode)
|
||||
{
|
||||
if (pendingOpsTable)
|
||||
{
|
||||
@@ -1278,7 +1282,8 @@ register_unlink(RelFileNode rnode)
|
||||
* structure for them.)
|
||||
*/
|
||||
void
|
||||
RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno)
|
||||
RememberFsyncRequest(RelFileNodeBackend rnode, ForkNumber forknum,
|
||||
BlockNumber segno)
|
||||
{
|
||||
Assert(pendingOpsTable);
|
||||
|
||||
@@ -1291,7 +1296,7 @@ RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno)
|
||||
hash_seq_init(&hstat, pendingOpsTable);
|
||||
while ((entry = (PendingOperationEntry *) hash_seq_search(&hstat)) != NULL)
|
||||
{
|
||||
if (RelFileNodeEquals(entry->tag.rnode, rnode) &&
|
||||
if (RelFileNodeBackendEquals(entry->tag.rnode, rnode) &&
|
||||
entry->tag.forknum == forknum)
|
||||
{
|
||||
/* Okay, cancel this entry */
|
||||
@@ -1312,7 +1317,7 @@ RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno)
|
||||
hash_seq_init(&hstat, pendingOpsTable);
|
||||
while ((entry = (PendingOperationEntry *) hash_seq_search(&hstat)) != NULL)
|
||||
{
|
||||
if (entry->tag.rnode.dbNode == rnode.dbNode)
|
||||
if (entry->tag.rnode.node.dbNode == rnode.node.dbNode)
|
||||
{
|
||||
/* Okay, cancel this entry */
|
||||
entry->canceled = true;
|
||||
@@ -1326,7 +1331,7 @@ RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno)
|
||||
PendingUnlinkEntry *entry = (PendingUnlinkEntry *) lfirst(cell);
|
||||
|
||||
next = lnext(cell);
|
||||
if (entry->rnode.dbNode == rnode.dbNode)
|
||||
if (entry->rnode.node.dbNode == rnode.node.dbNode)
|
||||
{
|
||||
pendingUnlinks = list_delete_cell(pendingUnlinks, cell, prev);
|
||||
pfree(entry);
|
||||
@@ -1393,7 +1398,7 @@ RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno)
|
||||
* ForgetRelationFsyncRequests -- forget any fsyncs for a rel
|
||||
*/
|
||||
void
|
||||
ForgetRelationFsyncRequests(RelFileNode rnode, ForkNumber forknum)
|
||||
ForgetRelationFsyncRequests(RelFileNodeBackend rnode, ForkNumber forknum)
|
||||
{
|
||||
if (pendingOpsTable)
|
||||
{
|
||||
@@ -1428,11 +1433,12 @@ ForgetRelationFsyncRequests(RelFileNode rnode, ForkNumber forknum)
|
||||
void
|
||||
ForgetDatabaseFsyncRequests(Oid dbid)
|
||||
{
|
||||
RelFileNode rnode;
|
||||
RelFileNodeBackend rnode;
|
||||
|
||||
rnode.dbNode = dbid;
|
||||
rnode.spcNode = 0;
|
||||
rnode.relNode = 0;
|
||||
rnode.node.dbNode = dbid;
|
||||
rnode.node.spcNode = 0;
|
||||
rnode.node.relNode = 0;
|
||||
rnode.backend = InvalidBackendId;
|
||||
|
||||
if (pendingOpsTable)
|
||||
{
|
||||
@@ -1523,12 +1529,12 @@ _mdfd_openseg(SMgrRelation reln, ForkNumber forknum, BlockNumber segno,
|
||||
* specified block.
|
||||
*
|
||||
* If the segment doesn't exist, we ereport, return NULL, or create the
|
||||
* segment, according to "behavior". Note: isTemp need only be correct
|
||||
* in the EXTENSION_CREATE case.
|
||||
* segment, according to "behavior". Note: skipFsync is only used in the
|
||||
* EXTENSION_CREATE case.
|
||||
*/
|
||||
static MdfdVec *
|
||||
_mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
|
||||
bool isTemp, ExtensionBehavior behavior)
|
||||
bool skipFsync, ExtensionBehavior behavior)
|
||||
{
|
||||
MdfdVec *v = mdopen(reln, forknum, behavior);
|
||||
BlockNumber targetseg;
|
||||
@@ -1566,7 +1572,7 @@ _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
|
||||
|
||||
mdextend(reln, forknum,
|
||||
nextsegno * ((BlockNumber) RELSEG_SIZE) - 1,
|
||||
zerobuf, isTemp);
|
||||
zerobuf, skipFsync);
|
||||
pfree(zerobuf);
|
||||
}
|
||||
v->mdfd_chain = _mdfd_openseg(reln, forknum, +nextsegno, O_CREAT);
|
||||
|
||||
Reference in New Issue
Block a user