1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-28 23:42:10 +03:00

Introduce the concept of relation forks. An smgr relation can now consist

of multiple forks, and each fork can be created and grown separately.

The bulk of this patch is about changing the smgr API to include an extra
ForkNumber argument in every smgr function. Also, smgrscheduleunlink and
smgrdounlink no longer implicitly call smgrclose, because other forks might
still exist after unlinking one. The callers of those functions have been
modified to call smgrclose instead.

This patch in itself doesn't have any user-visible effect, but provides the
infrastructure needed for upcoming patches. The additional forks envisioned
are a rewritten FSM implementation that doesn't rely on a fixed-size shared
memory block, and a visibility map to allow skipping portions of a table in
VACUUM that have no dead tuples.
This commit is contained in:
Heikki Linnakangas
2008-08-11 11:05:11 +00:00
parent eca1388629
commit 3f0e808c4a
31 changed files with 733 additions and 446 deletions

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/transam/twophase.c,v 1.44 2008/08/01 13:16:08 alvherre Exp $
* $PostgreSQL: pgsql/src/backend/access/transam/twophase.c,v 1.45 2008/08/11 11:05:10 heikki Exp $
*
* NOTES
* Each global transaction is associated with a global transaction
@ -141,12 +141,12 @@ static void RecordTransactionCommitPrepared(TransactionId xid,
int nchildren,
TransactionId *children,
int nrels,
RelFileNode *rels);
RelFileFork *rels);
static void RecordTransactionAbortPrepared(TransactionId xid,
int nchildren,
TransactionId *children,
int nrels,
RelFileNode *rels);
RelFileFork *rels);
static void ProcessRecords(char *bufptr, TransactionId xid,
const TwoPhaseCallback callbacks[]);
@ -694,8 +694,8 @@ TwoPhaseGetDummyProc(TransactionId xid)
*
* 1. TwoPhaseFileHeader
* 2. TransactionId[] (subtransactions)
* 3. RelFileNode[] (files to be deleted at commit)
* 4. RelFileNode[] (files to be deleted at abort)
* 3. RelFileFork[] (files to be deleted at commit)
* 4. RelFileFork[] (files to be deleted at abort)
* 5. TwoPhaseRecordOnDisk
* 6. ...
* 7. TwoPhaseRecordOnDisk (end sentinel, rmid == TWOPHASE_RM_END_ID)
@ -793,8 +793,8 @@ StartPrepare(GlobalTransaction gxact)
TransactionId xid = gxact->proc.xid;
TwoPhaseFileHeader hdr;
TransactionId *children;
RelFileNode *commitrels;
RelFileNode *abortrels;
RelFileFork *commitrels;
RelFileFork *abortrels;
/* Initialize linked list */
records.head = palloc0(sizeof(XLogRecData));
@ -832,12 +832,12 @@ StartPrepare(GlobalTransaction gxact)
}
if (hdr.ncommitrels > 0)
{
save_state_data(commitrels, hdr.ncommitrels * sizeof(RelFileNode));
save_state_data(commitrels, hdr.ncommitrels * sizeof(RelFileFork));
pfree(commitrels);
}
if (hdr.nabortrels > 0)
{
save_state_data(abortrels, hdr.nabortrels * sizeof(RelFileNode));
save_state_data(abortrels, hdr.nabortrels * sizeof(RelFileFork));
pfree(abortrels);
}
}
@ -1140,8 +1140,8 @@ FinishPreparedTransaction(const char *gid, bool isCommit)
TwoPhaseFileHeader *hdr;
TransactionId latestXid;
TransactionId *children;
RelFileNode *commitrels;
RelFileNode *abortrels;
RelFileFork *commitrels;
RelFileFork *abortrels;
int i;
/*
@ -1169,10 +1169,10 @@ FinishPreparedTransaction(const char *gid, bool isCommit)
bufptr = buf + MAXALIGN(sizeof(TwoPhaseFileHeader));
children = (TransactionId *) bufptr;
bufptr += MAXALIGN(hdr->nsubxacts * sizeof(TransactionId));
commitrels = (RelFileNode *) bufptr;
bufptr += MAXALIGN(hdr->ncommitrels * sizeof(RelFileNode));
abortrels = (RelFileNode *) bufptr;
bufptr += MAXALIGN(hdr->nabortrels * sizeof(RelFileNode));
commitrels = (RelFileFork *) bufptr;
bufptr += MAXALIGN(hdr->ncommitrels * sizeof(RelFileFork));
abortrels = (RelFileFork *) bufptr;
bufptr += MAXALIGN(hdr->nabortrels * sizeof(RelFileFork));
/* compute latestXid among all children */
latestXid = TransactionIdLatest(xid, hdr->nsubxacts, children);
@ -1215,12 +1215,20 @@ FinishPreparedTransaction(const char *gid, bool isCommit)
if (isCommit)
{
for (i = 0; i < hdr->ncommitrels; i++)
smgrdounlink(smgropen(commitrels[i]), false, false);
{
SMgrRelation srel = smgropen(commitrels[i].rnode);
smgrdounlink(srel, commitrels[i].forknum, false, false);
smgrclose(srel);
}
}
else
{
for (i = 0; i < hdr->nabortrels; i++)
smgrdounlink(smgropen(abortrels[i]), false, false);
{
SMgrRelation srel = smgropen(abortrels[i].rnode);
smgrdounlink(srel, abortrels[i].forknum, false, false);
smgrclose(srel);
}
}
/* And now do the callbacks */
@ -1631,8 +1639,8 @@ RecoverPreparedTransactions(void)
bufptr = buf + MAXALIGN(sizeof(TwoPhaseFileHeader));
subxids = (TransactionId *) bufptr;
bufptr += MAXALIGN(hdr->nsubxacts * sizeof(TransactionId));
bufptr += MAXALIGN(hdr->ncommitrels * sizeof(RelFileNode));
bufptr += MAXALIGN(hdr->nabortrels * sizeof(RelFileNode));
bufptr += MAXALIGN(hdr->ncommitrels * sizeof(RelFileFork));
bufptr += MAXALIGN(hdr->nabortrels * sizeof(RelFileFork));
/*
* Reconstruct subtrans state for the transaction --- needed
@ -1685,7 +1693,7 @@ RecordTransactionCommitPrepared(TransactionId xid,
int nchildren,
TransactionId *children,
int nrels,
RelFileNode *rels)
RelFileFork *rels)
{
XLogRecData rdata[3];
int lastrdata = 0;
@ -1710,7 +1718,7 @@ RecordTransactionCommitPrepared(TransactionId xid,
{
rdata[0].next = &(rdata[1]);
rdata[1].data = (char *) rels;
rdata[1].len = nrels * sizeof(RelFileNode);
rdata[1].len = nrels * sizeof(RelFileFork);
rdata[1].buffer = InvalidBuffer;
lastrdata = 1;
}
@ -1760,7 +1768,7 @@ RecordTransactionAbortPrepared(TransactionId xid,
int nchildren,
TransactionId *children,
int nrels,
RelFileNode *rels)
RelFileFork *rels)
{
XLogRecData rdata[3];
int lastrdata = 0;
@ -1790,7 +1798,7 @@ RecordTransactionAbortPrepared(TransactionId xid,
{
rdata[0].next = &(rdata[1]);
rdata[1].data = (char *) rels;
rdata[1].len = nrels * sizeof(RelFileNode);
rdata[1].len = nrels * sizeof(RelFileFork);
rdata[1].buffer = InvalidBuffer;
lastrdata = 1;
}

View File

@ -10,7 +10,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.264 2008/05/12 20:01:58 alvherre Exp $
* $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.265 2008/08/11 11:05:10 heikki Exp $
*
*-------------------------------------------------------------------------
*/
@ -819,7 +819,7 @@ RecordTransactionCommit(void)
bool markXidCommitted = TransactionIdIsValid(xid);
TransactionId latestXid = InvalidTransactionId;
int nrels;
RelFileNode *rels;
RelFileFork *rels;
bool haveNonTemp;
int nchildren;
TransactionId *children;
@ -900,7 +900,7 @@ RecordTransactionCommit(void)
{
rdata[0].next = &(rdata[1]);
rdata[1].data = (char *) rels;
rdata[1].len = nrels * sizeof(RelFileNode);
rdata[1].len = nrels * sizeof(RelFileFork);
rdata[1].buffer = InvalidBuffer;
lastrdata = 1;
}
@ -1203,7 +1203,7 @@ RecordTransactionAbort(bool isSubXact)
TransactionId xid = GetCurrentTransactionIdIfAny();
TransactionId latestXid;
int nrels;
RelFileNode *rels;
RelFileFork *rels;
int nchildren;
TransactionId *children;
XLogRecData rdata[3];
@ -1264,7 +1264,7 @@ RecordTransactionAbort(bool isSubXact)
{
rdata[0].next = &(rdata[1]);
rdata[1].data = (char *) rels;
rdata[1].len = nrels * sizeof(RelFileNode);
rdata[1].len = nrels * sizeof(RelFileFork);
rdata[1].buffer = InvalidBuffer;
lastrdata = 1;
}
@ -4282,8 +4282,13 @@ xact_redo_commit(xl_xact_commit *xlrec, TransactionId xid)
/* Make sure files supposed to be dropped are dropped */
for (i = 0; i < xlrec->nrels; i++)
{
XLogDropRelation(xlrec->xnodes[i]);
smgrdounlink(smgropen(xlrec->xnodes[i]), false, true);
SMgrRelation srel;
XLogDropRelation(xlrec->xnodes[i].rnode, xlrec->xnodes[i].forknum);
srel = smgropen(xlrec->xnodes[i].rnode);
smgrdounlink(srel, xlrec->xnodes[i].forknum, false, true);
smgrclose(srel);
}
}
@ -4317,8 +4322,13 @@ xact_redo_abort(xl_xact_abort *xlrec, TransactionId xid)
/* Make sure files supposed to be dropped are dropped */
for (i = 0; i < xlrec->nrels; i++)
{
XLogDropRelation(xlrec->xnodes[i]);
smgrdounlink(smgropen(xlrec->xnodes[i]), false, true);
SMgrRelation srel;
XLogDropRelation(xlrec->xnodes[i].rnode, xlrec->xnodes[i].forknum);
srel = smgropen(xlrec->xnodes[i].rnode);
smgrdounlink(srel, xlrec->xnodes[i].forknum, false, true);
smgrclose(srel);
}
}
@ -4374,10 +4384,12 @@ xact_desc_commit(StringInfo buf, xl_xact_commit *xlrec)
appendStringInfo(buf, "; rels:");
for (i = 0; i < xlrec->nrels; i++)
{
RelFileNode rnode = xlrec->xnodes[i];
RelFileNode rnode = xlrec->xnodes[i].rnode;
ForkNumber forknum = xlrec->xnodes[i].forknum;
appendStringInfo(buf, " %u/%u/%u",
rnode.spcNode, rnode.dbNode, rnode.relNode);
appendStringInfo(buf, " %u/%u/%u/%u",
rnode.spcNode, rnode.dbNode, rnode.relNode,
forknum);
}
}
if (xlrec->nsubxacts > 0)
@ -4402,10 +4414,12 @@ xact_desc_abort(StringInfo buf, xl_xact_abort *xlrec)
appendStringInfo(buf, "; rels:");
for (i = 0; i < xlrec->nrels; i++)
{
RelFileNode rnode = xlrec->xnodes[i];
RelFileNode rnode = xlrec->xnodes[i].rnode;
ForkNumber forknum = xlrec->xnodes[i].forknum;
appendStringInfo(buf, " %u/%u/%u",
rnode.spcNode, rnode.dbNode, rnode.relNode);
appendStringInfo(buf, " %u/%u/%u/%u",
rnode.spcNode, rnode.dbNode, rnode.relNode,
forknum);
}
}
if (xlrec->nsubxacts > 0)

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.316 2008/07/13 20:45:47 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.317 2008/08/11 11:05:10 heikki Exp $
*
*-------------------------------------------------------------------------
*/
@ -1034,8 +1034,7 @@ XLogCheckBuffer(XLogRecData *rdata, bool doPageWrites,
/*
* The page needs to be backed up, so set up *bkpb
*/
bkpb->node = BufferGetFileNode(rdata->buffer);
bkpb->block = BufferGetBlockNumber(rdata->buffer);
BufferGetTag(rdata->buffer, &bkpb->node, &bkpb->fork, &bkpb->block);
if (rdata->buffer_std)
{
@ -2855,7 +2854,8 @@ RestoreBkpBlocks(XLogRecord *record, XLogRecPtr lsn)
memcpy(&bkpb, blk, sizeof(BkpBlock));
blk += sizeof(BkpBlock);
buffer = XLogReadBuffer(bkpb.node, bkpb.block, true);
buffer = XLogReadBufferWithFork(bkpb.node, bkpb.fork, bkpb.block,
true);
Assert(BufferIsValid(buffer));
page = (Page) BufferGetPage(buffer);

View File

@ -11,7 +11,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.57 2008/07/13 20:45:47 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.58 2008/08/11 11:05:10 heikki Exp $
*
*-------------------------------------------------------------------------
*/
@ -37,6 +37,7 @@
typedef struct xl_invalid_page_key
{
RelFileNode node; /* the relation */
ForkNumber forkno; /* the fork number */
BlockNumber blkno; /* the page */
} xl_invalid_page_key;
@ -51,7 +52,8 @@ static HTAB *invalid_page_tab = NULL;
/* Log a reference to an invalid page */
static void
log_invalid_page(RelFileNode node, BlockNumber blkno, bool present)
log_invalid_page(RelFileNode node, ForkNumber forkno, BlockNumber blkno,
bool present)
{
xl_invalid_page_key key;
xl_invalid_page *hentry;
@ -63,11 +65,11 @@ log_invalid_page(RelFileNode node, BlockNumber blkno, bool present)
* something about the XLOG record that generated the reference).
*/
if (present)
elog(DEBUG1, "page %u of relation %u/%u/%u is uninitialized",
blkno, node.spcNode, node.dbNode, node.relNode);
elog(DEBUG1, "page %u of relation %u/%u/%u/%u is uninitialized",
blkno, node.spcNode, node.dbNode, node.relNode, forkno);
else
elog(DEBUG1, "page %u of relation %u/%u/%u does not exist",
blkno, node.spcNode, node.dbNode, node.relNode);
elog(DEBUG1, "page %u of relation %u/%u/%u/%u does not exist",
blkno, node.spcNode, node.dbNode, node.relNode, forkno);
if (invalid_page_tab == NULL)
{
@ -87,6 +89,7 @@ log_invalid_page(RelFileNode node, BlockNumber blkno, bool present)
/* we currently assume xl_invalid_page_key contains no padding */
key.node = node;
key.forkno = forkno;
key.blkno = blkno;
hentry = (xl_invalid_page *)
hash_search(invalid_page_tab, (void *) &key, HASH_ENTER, &found);
@ -104,7 +107,7 @@ log_invalid_page(RelFileNode node, BlockNumber blkno, bool present)
/* Forget any invalid pages >= minblkno, because they've been dropped */
static void
forget_invalid_pages(RelFileNode node, BlockNumber minblkno)
forget_invalid_pages(RelFileNode node, ForkNumber forkno, BlockNumber minblkno)
{
HASH_SEQ_STATUS status;
xl_invalid_page *hentry;
@ -117,11 +120,12 @@ forget_invalid_pages(RelFileNode node, BlockNumber minblkno)
while ((hentry = (xl_invalid_page *) hash_seq_search(&status)) != NULL)
{
if (RelFileNodeEquals(hentry->key.node, node) &&
hentry->key.forkno == forkno &&
hentry->key.blkno >= minblkno)
{
elog(DEBUG2, "page %u of relation %u/%u/%u has been dropped",
elog(DEBUG2, "page %u of relation %u/%u/%u/%u has been dropped",
hentry->key.blkno, hentry->key.node.spcNode,
hentry->key.node.dbNode, hentry->key.node.relNode);
hentry->key.node.dbNode, hentry->key.node.relNode, forkno);
if (hash_search(invalid_page_tab,
(void *) &hentry->key,
@ -223,6 +227,18 @@ XLogCheckInvalidPages(void)
*/
Buffer
XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init)
{
return XLogReadBufferWithFork(rnode, MAIN_FORKNUM, blkno, init);
}
/*
* XLogReadBufferWithFork
* Like XLogReadBuffer, but for reading other relation forks than
* the main one.
*/
Buffer
XLogReadBufferWithFork(RelFileNode rnode, ForkNumber forknum,
BlockNumber blkno, bool init)
{
BlockNumber lastblock;
Buffer buffer;
@ -241,21 +257,21 @@ XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init)
* filesystem loses an inode during a crash. Better to write the data
* until we are actually told to delete the file.)
*/
smgrcreate(smgr, false, true);
smgrcreate(smgr, forknum, false, true);
lastblock = smgrnblocks(smgr);
lastblock = smgrnblocks(smgr, forknum);
if (blkno < lastblock)
{
/* page exists in file */
buffer = ReadBufferWithoutRelcache(rnode, false, blkno, init);
buffer = ReadBufferWithoutRelcache(rnode, false, forknum, blkno, init);
}
else
{
/* hm, page doesn't exist in file */
if (!init)
{
log_invalid_page(rnode, blkno, false);
log_invalid_page(rnode, forknum, blkno, false);
return InvalidBuffer;
}
/* OK to extend the file */
@ -266,7 +282,8 @@ XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init)
{
if (buffer != InvalidBuffer)
ReleaseBuffer(buffer);
buffer = ReadBufferWithoutRelcache(rnode, false, P_NEW, false);
buffer = ReadBufferWithoutRelcache(rnode, false, forknum,
P_NEW, false);
lastblock++;
}
Assert(BufferGetBlockNumber(buffer) == blkno);
@ -282,7 +299,7 @@ XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init)
if (PageIsNew(page))
{
UnlockReleaseBuffer(buffer);
log_invalid_page(rnode, blkno, true);
log_invalid_page(rnode, forknum, blkno, true);
return InvalidBuffer;
}
}
@ -363,12 +380,9 @@ FreeFakeRelcacheEntry(Relation fakerel)
* any open "invalid-page" records for the relation.
*/
void
XLogDropRelation(RelFileNode rnode)
XLogDropRelation(RelFileNode rnode, ForkNumber forknum)
{
/* Tell smgr to forget about this relation as well */
smgrclosenode(rnode);
forget_invalid_pages(rnode, 0);
forget_invalid_pages(rnode, forknum, 0);
}
/*
@ -397,7 +411,8 @@ XLogDropDatabase(Oid dbid)
* We need to clean up any open "invalid-page" records for the dropped pages.
*/
void
XLogTruncateRelation(RelFileNode rnode, BlockNumber nblocks)
XLogTruncateRelation(RelFileNode rnode, ForkNumber forkNum,
BlockNumber nblocks)
{
forget_invalid_pages(rnode, nblocks);
forget_invalid_pages(rnode, forkNum, nblocks);
}