1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-10 17:42:29 +03:00

Clean up WAL/buffer interactions as per my recent proposal. Get rid of the

misleadingly-named WriteBuffer routine, and instead require routines that
change buffer pages to call MarkBufferDirty (which does exactly what it says).
We also require that they do so before calling XLogInsert; this takes care of
the synchronization requirement documented in SyncOneBuffer.  Note that
because bufmgr takes the buffer content lock (in shared mode) while writing
out any buffer, it doesn't matter whether MarkBufferDirty is executed before
the buffer content change is complete, so long as the content change is
completed before releasing exclusive lock on the buffer.  So it's OK to set
the dirtybit before we fill in the LSN.
This eliminates the former kluge of needing to set the dirtybit in LockBuffer.
Aside from making the code more transparent, we can also add some new
debugging assertions, in particular that the caller of MarkBufferDirty must
hold the buffer content lock, not merely a pin.
This commit is contained in:
Tom Lane
2006-03-31 23:32:07 +00:00
parent 89395bfa6f
commit a8b8f4db23
24 changed files with 434 additions and 537 deletions

View File

@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.133 2006/03/05 15:58:21 momjian Exp $
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.134 2006/03/31 23:32:05 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -544,10 +544,13 @@ _bt_insertonpg(Relation rel,
_bt_pgaddtup(rel, page, itemsz, itup, newitemoff, "page");
MarkBufferDirty(buf);
if (BufferIsValid(metabuf))
{
metad->btm_fastroot = itup_blkno;
metad->btm_fastlevel = lpageop->btpo.level;
MarkBufferDirty(metabuf);
}
/* XLOG stuff */
@@ -619,11 +622,11 @@ _bt_insertonpg(Relation rel,
END_CRIT_SECTION();
/* Write out the updated page and release pin/lock */
/* release pin/lock */
if (BufferIsValid(metabuf))
_bt_wrtbuf(rel, metabuf);
_bt_relbuf(rel, metabuf);
_bt_wrtbuf(rel, buf);
_bt_relbuf(rel, buf);
}
}
@@ -819,12 +822,21 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
* Right sibling is locked, new siblings are prepared, but original page
* is not updated yet. Log changes before continuing.
*
* NO EREPORT(ERROR) till right sibling is updated.
* NO EREPORT(ERROR) till right sibling is updated. We can get away with
* not starting the critical section till here because we haven't been
* scribbling on the original page yet, and we don't care about the
* new sibling until it's linked into the btree.
*/
START_CRIT_SECTION();
MarkBufferDirty(buf);
MarkBufferDirty(rbuf);
if (!P_RIGHTMOST(ropaque))
{
sopaque->btpo_prev = BufferGetBlockNumber(rbuf);
MarkBufferDirty(sbuf);
}
/* XLOG stuff */
if (!rel->rd_istemp)
@@ -904,16 +916,22 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
* original. Note that this is not a waste of time, since we also require
* (in the page management code) that the center of a page always be
* clean, and the most efficient way to guarantee this is just to compact
* the data by reinserting it into a new left page.
* the data by reinserting it into a new left page. (XXX the latter
* comment is probably obsolete.)
*
* It's a bit weird that we don't fill in the left page till after writing
* the XLOG entry, but not really worth changing. Note that we use the
* origpage data (specifically its BTP_ROOT bit) while preparing the XLOG
* entry, so simply reshuffling the code won't do.
*/
PageRestoreTempPage(leftpage, origpage);
END_CRIT_SECTION();
/* write and release the old right sibling */
/* release the old right sibling */
if (!P_RIGHTMOST(ropaque))
_bt_wrtbuf(rel, sbuf);
_bt_relbuf(rel, sbuf);
/* split's done */
return rbuf;
@@ -1169,9 +1187,9 @@ _bt_insert_parent(Relation rel,
/* create a new root node and update the metapage */
rootbuf = _bt_newroot(rel, buf, rbuf);
/* release the split buffers */
_bt_wrtbuf(rel, rootbuf);
_bt_wrtbuf(rel, rbuf);
_bt_wrtbuf(rel, buf);
_bt_relbuf(rel, rootbuf);
_bt_relbuf(rel, rbuf);
_bt_relbuf(rel, buf);
}
else
{
@@ -1220,9 +1238,9 @@ _bt_insert_parent(Relation rel,
pbuf = _bt_getstackbuf(rel, stack, BT_WRITE);
/* Now we can write and unlock the children */
_bt_wrtbuf(rel, rbuf);
_bt_wrtbuf(rel, buf);
/* Now we can unlock the children */
_bt_relbuf(rel, rbuf);
_bt_relbuf(rel, buf);
/* Check for error only after writing children */
if (pbuf == InvalidBuffer)
@@ -1370,7 +1388,6 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
{
Buffer rootbuf;
Page lpage,
rpage,
rootpage;
BlockNumber lbkno,
rbkno;
@@ -1387,7 +1404,6 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
lbkno = BufferGetBlockNumber(lbuf);
rbkno = BufferGetBlockNumber(rbuf);
lpage = BufferGetPage(lbuf);
rpage = BufferGetPage(rbuf);
/* get a new root page */
rootbuf = _bt_getbuf(rel, P_NEW, BT_WRITE);
@@ -1451,6 +1467,9 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
elog(PANIC, "failed to add rightkey to new root page");
pfree(new_item);
MarkBufferDirty(rootbuf);
MarkBufferDirty(metabuf);
/* XLOG stuff */
if (!rel->rd_istemp)
{
@@ -1483,16 +1502,12 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
PageSetTLI(rootpage, ThisTimeLineID);
PageSetLSN(metapg, recptr);
PageSetTLI(metapg, ThisTimeLineID);
PageSetLSN(lpage, recptr);
PageSetTLI(lpage, ThisTimeLineID);
PageSetLSN(rpage, recptr);
PageSetTLI(rpage, ThisTimeLineID);
}
END_CRIT_SECTION();
/* write and let go of metapage buffer */
_bt_wrtbuf(rel, metabuf);
/* done with metapage */
_bt_relbuf(rel, metabuf);
return rootbuf;
}

View File

@@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.93 2006/03/05 15:58:21 momjian Exp $
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.94 2006/03/31 23:32:05 tgl Exp $
*
* NOTES
* Postgres btree pages look like ordinary relation pages. The opaque
@@ -53,13 +53,16 @@ _bt_metapinit(Relation rel)
buf = ReadBuffer(rel, P_NEW);
Assert(BufferGetBlockNumber(buf) == BTREE_METAPAGE);
LockBuffer(buf, BT_WRITE);
pg = BufferGetPage(buf);
/* NO ELOG(ERROR) from here till newmeta op is logged */
START_CRIT_SECTION();
_bt_initmetapage(pg, P_NONE, 0);
metad = BTPageGetMeta(pg);
/* NO ELOG(ERROR) from here till newmeta op is logged */
START_CRIT_SECTION();
MarkBufferDirty(buf);
/* XLOG stuff */
if (!rel->rd_istemp)
@@ -89,7 +92,7 @@ _bt_metapinit(Relation rel)
END_CRIT_SECTION();
WriteBuffer(buf);
UnlockReleaseBuffer(buf);
}
/*
@@ -235,6 +238,9 @@ _bt_getroot(Relation rel, int access)
metad->btm_fastroot = rootblkno;
metad->btm_fastlevel = 0;
MarkBufferDirty(rootbuf);
MarkBufferDirty(metabuf);
/* XLOG stuff */
if (!rel->rd_istemp)
{
@@ -261,8 +267,6 @@ _bt_getroot(Relation rel, int access)
END_CRIT_SECTION();
_bt_wrtnorelbuf(rel, rootbuf);
/*
* swap root write lock for read lock. There is no danger of anyone
* else accessing the new root page while it's unlocked, since no one
@@ -271,8 +275,8 @@ _bt_getroot(Relation rel, int access)
LockBuffer(rootbuf, BUFFER_LOCK_UNLOCK);
LockBuffer(rootbuf, BT_READ);
/* okay, metadata is correct, write and release it */
_bt_wrtbuf(rel, metabuf);
/* okay, metadata is correct, release lock on it */
_bt_relbuf(rel, metabuf);
}
else
{
@@ -581,49 +585,12 @@ _bt_relandgetbuf(Relation rel, Buffer obuf, BlockNumber blkno, int access)
/*
* _bt_relbuf() -- release a locked buffer.
*
* Lock and pin (refcount) are both dropped. Note that either read or
* write lock can be dropped this way, but if we modified the buffer,
* this is NOT the right way to release a write lock.
* Lock and pin (refcount) are both dropped.
*/
void
_bt_relbuf(Relation rel, Buffer buf)
{
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buf);
}
/*
* _bt_wrtbuf() -- write a btree page to disk.
*
* This routine releases the lock held on the buffer and our refcount
* for it. It is an error to call _bt_wrtbuf() without a write lock
* and a pin on the buffer.
*
* NOTE: actually, the buffer manager just marks the shared buffer page
* dirty here; the real I/O happens later. This is okay since we are not
* relying on write ordering anyway. The WAL mechanism is responsible for
* guaranteeing correctness after a crash.
*/
void
_bt_wrtbuf(Relation rel, Buffer buf)
{
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
WriteBuffer(buf);
}
/*
* _bt_wrtnorelbuf() -- write a btree page to disk, but do not release
* our reference or lock.
*
* It is an error to call _bt_wrtnorelbuf() without a write lock
* and a pin on the buffer.
*
* See above NOTE.
*/
void
_bt_wrtnorelbuf(Relation rel, Buffer buf)
{
WriteNoReleaseBuffer(buf);
UnlockReleaseBuffer(buf);
}
/*
@@ -676,9 +643,8 @@ _bt_page_recyclable(Page page)
* non-leaf page has to be done as part of an atomic action that includes
* deleting the page it points to.
*
* This routine assumes that the caller has pinned and locked the buffer,
* and will write the buffer afterwards. Also, the given itemnos *must*
* appear in increasing order in the array.
* This routine assumes that the caller has pinned and locked the buffer.
* Also, the given itemnos *must* appear in increasing order in the array.
*/
void
_bt_delitems(Relation rel, Buffer buf,
@@ -692,6 +658,8 @@ _bt_delitems(Relation rel, Buffer buf,
/* Fix the page */
PageIndexMultiDelete(page, itemnos, nitems);
MarkBufferDirty(buf);
/* XLOG stuff */
if (!rel->rd_istemp)
{
@@ -1053,8 +1021,16 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full)
{
metad->btm_fastroot = rightsib;
metad->btm_fastlevel = targetlevel;
MarkBufferDirty(metabuf);
}
/* Must mark buffers dirty before XLogInsert */
MarkBufferDirty(pbuf);
MarkBufferDirty(rbuf);
MarkBufferDirty(buf);
if (BufferIsValid(lbuf))
MarkBufferDirty(lbuf);
/* XLOG stuff */
if (!rel->rd_istemp)
{
@@ -1143,14 +1119,14 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full)
END_CRIT_SECTION();
/* Write and release buffers */
/* release buffers */
if (BufferIsValid(metabuf))
_bt_wrtbuf(rel, metabuf);
_bt_wrtbuf(rel, pbuf);
_bt_wrtbuf(rel, rbuf);
_bt_wrtbuf(rel, buf);
_bt_relbuf(rel, metabuf);
_bt_relbuf(rel, pbuf);
_bt_relbuf(rel, rbuf);
_bt_relbuf(rel, buf);
if (BufferIsValid(lbuf))
_bt_wrtbuf(rel, lbuf);
_bt_relbuf(rel, lbuf);
/*
* If parent became half dead, recurse to try to delete it. Otherwise, if

View File

@@ -12,7 +12,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.142 2006/03/05 15:58:21 momjian Exp $
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.143 2006/03/31 23:32:05 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -624,18 +624,13 @@ btbulkdelete(PG_FUNCTION_ARGS)
}
}
/*
* If we need to delete anything, do it and write the buffer; else
* just release the buffer.
*/
nextpage = opaque->btpo_next;
/* Apply any needed deletes */
if (ndeletable > 0)
{
_bt_delitems(rel, buf, deletable, ndeletable);
_bt_wrtbuf(rel, buf);
}
else
_bt_relbuf(rel, buf);
/* Fetch nextpage link before releasing the buffer */
nextpage = opaque->btpo_next;
_bt_relbuf(rel, buf);
/* call vacuum_delay_point while not holding any buffer lock */
vacuum_delay_point();

View File

@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.29 2006/03/29 21:17:37 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.30 2006/03/31 23:32:05 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -69,8 +69,7 @@ forget_matching_split(Relation reln, RelFileNode node,
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
rightblk = ItemPointerGetBlockNumber(&(itup->t_tid));
Assert(ItemPointerGetOffsetNumber(&(itup->t_tid)) == P_HIKEY);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buffer);
UnlockReleaseBuffer(buffer);
foreach(l, incomplete_splits)
{
@@ -80,7 +79,8 @@ forget_matching_split(Relation reln, RelFileNode node,
rightblk == split->rightblk)
{
if (is_root != split->is_root)
elog(LOG, "forget_matching_split: fishy is_root data");
elog(LOG, "forget_matching_split: fishy is_root data (expected %d, got %d)",
split->is_root, is_root);
incomplete_splits = list_delete_ptr(incomplete_splits, split);
break; /* need not look further */
}
@@ -143,8 +143,8 @@ _bt_restore_meta(Relation reln, XLogRecPtr lsn,
PageSetLSN(metapg, lsn);
PageSetTLI(metapg, ThisTimeLineID);
LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
WriteBuffer(metabuf);
MarkBufferDirty(metabuf);
UnlockReleaseBuffer(metabuf);
}
static void
@@ -185,8 +185,7 @@ btree_xlog_insert(bool isleaf, bool ismeta,
if (XLByteLE(lsn, PageGetLSN(page)))
{
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buffer);
UnlockReleaseBuffer(buffer);
}
else
{
@@ -197,8 +196,8 @@ btree_xlog_insert(bool isleaf, bool ismeta,
PageSetLSN(page, lsn);
PageSetTLI(page, ThisTimeLineID);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer);
MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
}
}
}
@@ -255,8 +254,8 @@ btree_xlog_split(bool onleft, bool isroot,
PageSetLSN(page, lsn);
PageSetTLI(page, ThisTimeLineID);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer);
MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
/* Right (new) sibling */
buffer = XLogReadBuffer(reln, rightsib, true);
@@ -277,8 +276,8 @@ btree_xlog_split(bool onleft, bool isroot,
PageSetLSN(page, lsn);
PageSetTLI(page, ThisTimeLineID);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer);
MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
/* Fix left-link of right (next) page */
if (!(record->xl_info & XLR_BKP_BLOCK_1))
@@ -292,8 +291,7 @@ btree_xlog_split(bool onleft, bool isroot,
if (XLByteLE(lsn, PageGetLSN(page)))
{
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buffer);
UnlockReleaseBuffer(buffer);
}
else
{
@@ -302,8 +300,8 @@ btree_xlog_split(bool onleft, bool isroot,
PageSetLSN(page, lsn);
PageSetTLI(page, ThisTimeLineID);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer);
MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
}
}
}
@@ -343,8 +341,7 @@ btree_xlog_delete(XLogRecPtr lsn, XLogRecord *record)
if (XLByteLE(lsn, PageGetLSN(page)))
{
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buffer);
UnlockReleaseBuffer(buffer);
return;
}
@@ -361,8 +358,8 @@ btree_xlog_delete(XLogRecPtr lsn, XLogRecord *record)
PageSetLSN(page, lsn);
PageSetTLI(page, ThisTimeLineID);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer);
MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
}
static void
@@ -395,8 +392,7 @@ btree_xlog_delete_page(bool ismeta,
pageop = (BTPageOpaque) PageGetSpecialPointer(page);
if (XLByteLE(lsn, PageGetLSN(page)))
{
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buffer);
UnlockReleaseBuffer(buffer);
}
else
{
@@ -424,8 +420,8 @@ btree_xlog_delete_page(bool ismeta,
PageSetLSN(page, lsn);
PageSetTLI(page, ThisTimeLineID);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer);
MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
}
}
}
@@ -439,8 +435,7 @@ btree_xlog_delete_page(bool ismeta,
page = (Page) BufferGetPage(buffer);
if (XLByteLE(lsn, PageGetLSN(page)))
{
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buffer);
UnlockReleaseBuffer(buffer);
}
else
{
@@ -449,8 +444,8 @@ btree_xlog_delete_page(bool ismeta,
PageSetLSN(page, lsn);
PageSetTLI(page, ThisTimeLineID);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer);
MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
}
}
}
@@ -466,8 +461,7 @@ btree_xlog_delete_page(bool ismeta,
page = (Page) BufferGetPage(buffer);
if (XLByteLE(lsn, PageGetLSN(page)))
{
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buffer);
UnlockReleaseBuffer(buffer);
}
else
{
@@ -476,8 +470,8 @@ btree_xlog_delete_page(bool ismeta,
PageSetLSN(page, lsn);
PageSetTLI(page, ThisTimeLineID);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer);
MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
}
}
}
@@ -498,8 +492,8 @@ btree_xlog_delete_page(bool ismeta,
PageSetLSN(page, lsn);
PageSetTLI(page, ThisTimeLineID);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer);
MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
/* Update metapage if needed */
if (ismeta)
@@ -544,8 +538,8 @@ btree_xlog_newroot(XLogRecPtr lsn, XLogRecord *record)
PageSetLSN(page, lsn);
PageSetTLI(page, ThisTimeLineID);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer);
MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
_bt_restore_meta(reln, lsn,
xlrec->rootblk, xlrec->level,