1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-18 02:02:55 +03:00

Clean up WAL/buffer interactions as per my recent proposal. Get rid of the

misleadingly-named WriteBuffer routine, and instead require routines that
change buffer pages to call MarkBufferDirty (which does exactly what it says).
We also require that they do so before calling XLogInsert; this takes care of
the synchronization requirement documented in SyncOneBuffer.  Note that
because bufmgr takes the buffer content lock (in shared mode) while writing
out any buffer, it doesn't matter whether MarkBufferDirty is executed before
the buffer content change is complete, so long as the content change is
completed before releasing exclusive lock on the buffer.  So it's OK to set
the dirtybit before we fill in the LSN.
This eliminates the former kluge of needing to set the dirtybit in LockBuffer.
Aside from making the code more transparent, we can also add some new
debugging assertions, in particular that the caller of MarkBufferDirty must
hold the buffer content lock, not merely a pin.
This commit is contained in:
Tom Lane
2006-03-31 23:32:07 +00:00
parent 89395bfa6f
commit a8b8f4db23
24 changed files with 434 additions and 537 deletions

View File

@@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.93 2006/03/05 15:58:21 momjian Exp $
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.94 2006/03/31 23:32:05 tgl Exp $
*
* NOTES
* Postgres btree pages look like ordinary relation pages. The opaque
@@ -53,13 +53,16 @@ _bt_metapinit(Relation rel)
buf = ReadBuffer(rel, P_NEW);
Assert(BufferGetBlockNumber(buf) == BTREE_METAPAGE);
LockBuffer(buf, BT_WRITE);
pg = BufferGetPage(buf);
/* NO ELOG(ERROR) from here till newmeta op is logged */
START_CRIT_SECTION();
_bt_initmetapage(pg, P_NONE, 0);
metad = BTPageGetMeta(pg);
/* NO ELOG(ERROR) from here till newmeta op is logged */
START_CRIT_SECTION();
MarkBufferDirty(buf);
/* XLOG stuff */
if (!rel->rd_istemp)
@@ -89,7 +92,7 @@ _bt_metapinit(Relation rel)
END_CRIT_SECTION();
WriteBuffer(buf);
UnlockReleaseBuffer(buf);
}
/*
@@ -235,6 +238,9 @@ _bt_getroot(Relation rel, int access)
metad->btm_fastroot = rootblkno;
metad->btm_fastlevel = 0;
MarkBufferDirty(rootbuf);
MarkBufferDirty(metabuf);
/* XLOG stuff */
if (!rel->rd_istemp)
{
@@ -261,8 +267,6 @@ _bt_getroot(Relation rel, int access)
END_CRIT_SECTION();
_bt_wrtnorelbuf(rel, rootbuf);
/*
* swap root write lock for read lock. There is no danger of anyone
* else accessing the new root page while it's unlocked, since no one
@@ -271,8 +275,8 @@ _bt_getroot(Relation rel, int access)
LockBuffer(rootbuf, BUFFER_LOCK_UNLOCK);
LockBuffer(rootbuf, BT_READ);
/* okay, metadata is correct, write and release it */
_bt_wrtbuf(rel, metabuf);
/* okay, metadata is correct, release lock on it */
_bt_relbuf(rel, metabuf);
}
else
{
@@ -581,49 +585,12 @@ _bt_relandgetbuf(Relation rel, Buffer obuf, BlockNumber blkno, int access)
/*
* _bt_relbuf() -- release a locked buffer.
*
* Lock and pin (refcount) are both dropped. Note that either read or
* write lock can be dropped this way, but if we modified the buffer,
* this is NOT the right way to release a write lock.
* Lock and pin (refcount) are both dropped.
*/
void
_bt_relbuf(Relation rel, Buffer buf)
{
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buf);
}
/*
* _bt_wrtbuf() -- write a btree page to disk.
*
* This routine releases the lock held on the buffer and our refcount
* for it. It is an error to call _bt_wrtbuf() without a write lock
* and a pin on the buffer.
*
* NOTE: actually, the buffer manager just marks the shared buffer page
* dirty here; the real I/O happens later. This is okay since we are not
* relying on write ordering anyway. The WAL mechanism is responsible for
* guaranteeing correctness after a crash.
*/
void
_bt_wrtbuf(Relation rel, Buffer buf)
{
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
WriteBuffer(buf);
}
/*
* _bt_wrtnorelbuf() -- write a btree page to disk, but do not release
* our reference or lock.
*
* It is an error to call _bt_wrtnorelbuf() without a write lock
* and a pin on the buffer.
*
* See above NOTE.
*/
void
_bt_wrtnorelbuf(Relation rel, Buffer buf)
{
WriteNoReleaseBuffer(buf);
UnlockReleaseBuffer(buf);
}
/*
@@ -676,9 +643,8 @@ _bt_page_recyclable(Page page)
* non-leaf page has to be done as part of an atomic action that includes
* deleting the page it points to.
*
* This routine assumes that the caller has pinned and locked the buffer,
* and will write the buffer afterwards. Also, the given itemnos *must*
* appear in increasing order in the array.
* This routine assumes that the caller has pinned and locked the buffer.
* Also, the given itemnos *must* appear in increasing order in the array.
*/
void
_bt_delitems(Relation rel, Buffer buf,
@@ -692,6 +658,8 @@ _bt_delitems(Relation rel, Buffer buf,
/* Fix the page */
PageIndexMultiDelete(page, itemnos, nitems);
MarkBufferDirty(buf);
/* XLOG stuff */
if (!rel->rd_istemp)
{
@@ -1053,8 +1021,16 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full)
{
metad->btm_fastroot = rightsib;
metad->btm_fastlevel = targetlevel;
MarkBufferDirty(metabuf);
}
/* Must mark buffers dirty before XLogInsert */
MarkBufferDirty(pbuf);
MarkBufferDirty(rbuf);
MarkBufferDirty(buf);
if (BufferIsValid(lbuf))
MarkBufferDirty(lbuf);
/* XLOG stuff */
if (!rel->rd_istemp)
{
@@ -1143,14 +1119,14 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full)
END_CRIT_SECTION();
/* Write and release buffers */
/* release buffers */
if (BufferIsValid(metabuf))
_bt_wrtbuf(rel, metabuf);
_bt_wrtbuf(rel, pbuf);
_bt_wrtbuf(rel, rbuf);
_bt_wrtbuf(rel, buf);
_bt_relbuf(rel, metabuf);
_bt_relbuf(rel, pbuf);
_bt_relbuf(rel, rbuf);
_bt_relbuf(rel, buf);
if (BufferIsValid(lbuf))
_bt_wrtbuf(rel, lbuf);
_bt_relbuf(rel, lbuf);
/*
* If parent became half dead, recurse to try to delete it. Otherwise, if