1
0
mirror of https://github.com/postgres/postgres.git synced 2025-10-27 00:12:01 +03:00

Include the backend ID in the relpath of temporary relations.

This allows us to reliably remove all leftover temporary relation
files on cluster startup without reference to system catalogs or WAL;
therefore, we no longer include temporary relations in XLOG_XACT_COMMIT
and XLOG_XACT_ABORT WAL records.

Since these changes require including a backend ID in each
SharedInvalSmgrMsg, the size of the SharedInvalidationMessage.id
field has been reduced from two bytes to one, and the maximum number
of connections has been reduced from INT_MAX / 4 to 2^23-1.  It would
be possible to remove these restrictions by increasing the size of
SharedInvalidationMessage by 4 bytes, but right now that doesn't seem
like a good trade-off.

Review by Jaime Casanova and Tom Lane.
This commit is contained in:
Robert Haas
2010-08-13 20:10:54 +00:00
parent 3f9479ef3f
commit debcec7dc3
37 changed files with 669 additions and 343 deletions

View File

@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.151 2010/02/26 02:01:01 momjian Exp $
* $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.152 2010/08/13 20:10:52 rhaas Exp $
*
*-------------------------------------------------------------------------
*/
@@ -119,7 +119,7 @@ static MemoryContext MdCxt; /* context for all md.c allocations */
*/
typedef struct
{
RelFileNode rnode; /* the targeted relation */
RelFileNodeBackend rnode; /* the targeted relation */
ForkNumber forknum;
BlockNumber segno; /* which segment */
} PendingOperationTag;
@@ -135,7 +135,7 @@ typedef struct
typedef struct
{
RelFileNode rnode; /* the dead relation to delete */
RelFileNodeBackend rnode; /* the dead relation to delete */
CycleCtr cycle_ctr; /* mdckpt_cycle_ctr when request was made */
} PendingUnlinkEntry;
@@ -158,14 +158,14 @@ static MdfdVec *mdopen(SMgrRelation reln, ForkNumber forknum,
ExtensionBehavior behavior);
static void register_dirty_segment(SMgrRelation reln, ForkNumber forknum,
MdfdVec *seg);
static void register_unlink(RelFileNode rnode);
static void register_unlink(RelFileNodeBackend rnode);
static MdfdVec *_fdvec_alloc(void);
static char *_mdfd_segpath(SMgrRelation reln, ForkNumber forknum,
BlockNumber segno);
static MdfdVec *_mdfd_openseg(SMgrRelation reln, ForkNumber forkno,
BlockNumber segno, int oflags);
static MdfdVec *_mdfd_getseg(SMgrRelation reln, ForkNumber forkno,
BlockNumber blkno, bool isTemp, ExtensionBehavior behavior);
BlockNumber blkno, bool skipFsync, ExtensionBehavior behavior);
static BlockNumber _mdnblocks(SMgrRelation reln, ForkNumber forknum,
MdfdVec *seg);
@@ -321,7 +321,7 @@ mdcreate(SMgrRelation reln, ForkNumber forkNum, bool isRedo)
* we are usually not in a transaction anymore when this is called.
*/
void
mdunlink(RelFileNode rnode, ForkNumber forkNum, bool isRedo)
mdunlink(RelFileNodeBackend rnode, ForkNumber forkNum, bool isRedo)
{
char *path;
int ret;
@@ -417,7 +417,7 @@ mdunlink(RelFileNode rnode, ForkNumber forkNum, bool isRedo)
*/
void
mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
char *buffer, bool isTemp)
char *buffer, bool skipFsync)
{
off_t seekpos;
int nbytes;
@@ -440,7 +440,7 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
relpath(reln->smgr_rnode, forknum),
InvalidBlockNumber)));
v = _mdfd_getseg(reln, forknum, blocknum, isTemp, EXTENSION_CREATE);
v = _mdfd_getseg(reln, forknum, blocknum, skipFsync, EXTENSION_CREATE);
seekpos = (off_t) BLCKSZ *(blocknum % ((BlockNumber) RELSEG_SIZE));
@@ -478,7 +478,7 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
errhint("Check free disk space.")));
}
if (!isTemp)
if (!skipFsync && !SmgrIsTemp(reln))
register_dirty_segment(reln, forknum, v);
Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE));
@@ -605,9 +605,10 @@ mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
MdfdVec *v;
TRACE_POSTGRESQL_SMGR_MD_READ_START(forknum, blocknum,
reln->smgr_rnode.spcNode,
reln->smgr_rnode.dbNode,
reln->smgr_rnode.relNode);
reln->smgr_rnode.node.spcNode,
reln->smgr_rnode.node.dbNode,
reln->smgr_rnode.node.relNode,
reln->smgr_rnode.backend);
v = _mdfd_getseg(reln, forknum, blocknum, false, EXTENSION_FAIL);
@@ -624,9 +625,10 @@ mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ);
TRACE_POSTGRESQL_SMGR_MD_READ_DONE(forknum, blocknum,
reln->smgr_rnode.spcNode,
reln->smgr_rnode.dbNode,
reln->smgr_rnode.relNode,
reln->smgr_rnode.node.spcNode,
reln->smgr_rnode.node.dbNode,
reln->smgr_rnode.node.relNode,
reln->smgr_rnode.backend,
nbytes,
BLCKSZ);
@@ -666,7 +668,7 @@ mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
*/
void
mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
char *buffer, bool isTemp)
char *buffer, bool skipFsync)
{
off_t seekpos;
int nbytes;
@@ -678,11 +680,12 @@ mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
#endif
TRACE_POSTGRESQL_SMGR_MD_WRITE_START(forknum, blocknum,
reln->smgr_rnode.spcNode,
reln->smgr_rnode.dbNode,
reln->smgr_rnode.relNode);
reln->smgr_rnode.node.spcNode,
reln->smgr_rnode.node.dbNode,
reln->smgr_rnode.node.relNode,
reln->smgr_rnode.backend);
v = _mdfd_getseg(reln, forknum, blocknum, isTemp, EXTENSION_FAIL);
v = _mdfd_getseg(reln, forknum, blocknum, skipFsync, EXTENSION_FAIL);
seekpos = (off_t) BLCKSZ *(blocknum % ((BlockNumber) RELSEG_SIZE));
@@ -697,9 +700,10 @@ mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ);
TRACE_POSTGRESQL_SMGR_MD_WRITE_DONE(forknum, blocknum,
reln->smgr_rnode.spcNode,
reln->smgr_rnode.dbNode,
reln->smgr_rnode.relNode,
reln->smgr_rnode.node.spcNode,
reln->smgr_rnode.node.dbNode,
reln->smgr_rnode.node.relNode,
reln->smgr_rnode.backend,
nbytes,
BLCKSZ);
@@ -720,7 +724,7 @@ mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
errhint("Check free disk space.")));
}
if (!isTemp)
if (!skipFsync && !SmgrIsTemp(reln))
register_dirty_segment(reln, forknum, v);
}
@@ -794,8 +798,7 @@ mdnblocks(SMgrRelation reln, ForkNumber forknum)
* mdtruncate() -- Truncate relation to specified number of blocks.
*/
void
mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks,
bool isTemp)
mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
{
MdfdVec *v;
BlockNumber curnblk;
@@ -839,7 +842,7 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks,
errmsg("could not truncate file \"%s\": %m",
FilePathName(v->mdfd_vfd))));
if (!isTemp)
if (!SmgrIsTemp(reln))
register_dirty_segment(reln, forknum, v);
v = v->mdfd_chain;
Assert(ov != reln->md_fd[forknum]); /* we never drop the 1st
@@ -864,7 +867,7 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks,
errmsg("could not truncate file \"%s\" to %u blocks: %m",
FilePathName(v->mdfd_vfd),
nblocks)));
if (!isTemp)
if (!SmgrIsTemp(reln))
register_dirty_segment(reln, forknum, v);
v = v->mdfd_chain;
ov->mdfd_chain = NULL;
@@ -1052,7 +1055,8 @@ mdsync(void)
* the relation will have been dirtied through this same smgr
* relation, and so we can save a file open/close cycle.
*/
reln = smgropen(entry->tag.rnode);
reln = smgropen(entry->tag.rnode.node,
entry->tag.rnode.backend);
/*
* It is possible that the relation has been dropped or
@@ -1235,7 +1239,7 @@ register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
* a remote pending-ops table.
*/
static void
register_unlink(RelFileNode rnode)
register_unlink(RelFileNodeBackend rnode)
{
if (pendingOpsTable)
{
@@ -1278,7 +1282,8 @@ register_unlink(RelFileNode rnode)
* structure for them.)
*/
void
RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno)
RememberFsyncRequest(RelFileNodeBackend rnode, ForkNumber forknum,
BlockNumber segno)
{
Assert(pendingOpsTable);
@@ -1291,7 +1296,7 @@ RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno)
hash_seq_init(&hstat, pendingOpsTable);
while ((entry = (PendingOperationEntry *) hash_seq_search(&hstat)) != NULL)
{
if (RelFileNodeEquals(entry->tag.rnode, rnode) &&
if (RelFileNodeBackendEquals(entry->tag.rnode, rnode) &&
entry->tag.forknum == forknum)
{
/* Okay, cancel this entry */
@@ -1312,7 +1317,7 @@ RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno)
hash_seq_init(&hstat, pendingOpsTable);
while ((entry = (PendingOperationEntry *) hash_seq_search(&hstat)) != NULL)
{
if (entry->tag.rnode.dbNode == rnode.dbNode)
if (entry->tag.rnode.node.dbNode == rnode.node.dbNode)
{
/* Okay, cancel this entry */
entry->canceled = true;
@@ -1326,7 +1331,7 @@ RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno)
PendingUnlinkEntry *entry = (PendingUnlinkEntry *) lfirst(cell);
next = lnext(cell);
if (entry->rnode.dbNode == rnode.dbNode)
if (entry->rnode.node.dbNode == rnode.node.dbNode)
{
pendingUnlinks = list_delete_cell(pendingUnlinks, cell, prev);
pfree(entry);
@@ -1393,7 +1398,7 @@ RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno)
* ForgetRelationFsyncRequests -- forget any fsyncs for a rel
*/
void
ForgetRelationFsyncRequests(RelFileNode rnode, ForkNumber forknum)
ForgetRelationFsyncRequests(RelFileNodeBackend rnode, ForkNumber forknum)
{
if (pendingOpsTable)
{
@@ -1428,11 +1433,12 @@ ForgetRelationFsyncRequests(RelFileNode rnode, ForkNumber forknum)
void
ForgetDatabaseFsyncRequests(Oid dbid)
{
RelFileNode rnode;
RelFileNodeBackend rnode;
rnode.dbNode = dbid;
rnode.spcNode = 0;
rnode.relNode = 0;
rnode.node.dbNode = dbid;
rnode.node.spcNode = 0;
rnode.node.relNode = 0;
rnode.backend = InvalidBackendId;
if (pendingOpsTable)
{
@@ -1523,12 +1529,12 @@ _mdfd_openseg(SMgrRelation reln, ForkNumber forknum, BlockNumber segno,
* specified block.
*
* If the segment doesn't exist, we ereport, return NULL, or create the
* segment, according to "behavior". Note: isTemp need only be correct
* in the EXTENSION_CREATE case.
* segment, according to "behavior". Note: skipFsync is only used in the
* EXTENSION_CREATE case.
*/
static MdfdVec *
_mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
bool isTemp, ExtensionBehavior behavior)
bool skipFsync, ExtensionBehavior behavior)
{
MdfdVec *v = mdopen(reln, forknum, behavior);
BlockNumber targetseg;
@@ -1566,7 +1572,7 @@ _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
mdextend(reln, forknum,
nextsegno * ((BlockNumber) RELSEG_SIZE) - 1,
zerobuf, isTemp);
zerobuf, skipFsync);
pfree(zerobuf);
}
v->mdfd_chain = _mdfd_openseg(reln, forknum, +nextsegno, O_CREAT);