diff --git a/src/backend/access/heap/hio.c b/src/backend/access/heap/hio.c index 9c7d72a7e4e..fe7c1069edc 100644 --- a/src/backend/access/heap/hio.c +++ b/src/backend/access/heap/hio.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Id: hio.c,v 1.50 2003/09/25 06:57:57 petere Exp $ + * $Id: hio.c,v 1.50.2.1 2005/05/07 21:33:21 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -246,13 +246,6 @@ RelationGetBufferForTuple(Relation relation, Size len, */ buffer = ReadBuffer(relation, P_NEW); - /* - * Release the file-extension lock; it's now OK for someone else to - * extend the relation some more. - */ - if (needLock) - UnlockPage(relation, 0, ExclusiveLock); - /* * We can be certain that locking the otherBuffer first is OK, since * it must have a lower page number. @@ -261,9 +254,22 @@ RelationGetBufferForTuple(Relation relation, Size len, LockBuffer(otherBuffer, BUFFER_LOCK_EXCLUSIVE); /* - * We need to initialize the empty new page. + * Now acquire lock on the new page. */ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + + /* + * Release the file-extension lock; it's now OK for someone else to + * extend the relation some more. Note that we cannot release this + * lock before we have buffer lock on the new page, or we risk a + * race condition against vacuumlazy.c --- see comments therein. + */ + if (needLock) + UnlockPage(relation, 0, ExclusiveLock); + + /* + * We need to initialize the empty new page. + */ pageHeader = (Page) BufferGetPage(buffer); Assert(PageIsNew((PageHeader) pageHeader)); PageInit(pageHeader, BufferGetPageSize(buffer), 0); diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index 77ca04601db..0051888ad4f 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.72 2003/09/29 23:40:26 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.72.2.1 2005/05/07 21:33:21 tgl Exp $ * * NOTES * Postgres btree pages look like ordinary relation pages. The opaque @@ -473,18 +473,21 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access) buf = ReadBuffer(rel, P_NEW); + /* Acquire buffer lock on new page */ + LockBuffer(buf, BT_WRITE); + /* - * Release the file-extension lock; it's now OK for someone else - * to extend the relation some more. + * Release the file-extension lock; it's now OK for someone else to + * extend the relation some more. Note that we cannot release this + * lock before we have buffer lock on the new page, or we risk a + * race condition against btvacuumcleanup --- see comments therein. */ if (needLock) UnlockPage(rel, 0, ExclusiveLock); - /* Acquire appropriate buffer lock on new page */ - LockBuffer(buf, access); - /* Initialize the new page before returning it */ page = BufferGetPage(buf); + Assert(PageIsNew((PageHeader) page)); _bt_pageinit(page, BufferGetPageSize(buf)); } diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index 3979f79c358..07b3c8d6f68 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -12,7 +12,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.106 2003/09/29 23:40:26 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.106.2.1 2005/05/07 21:33:21 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -707,11 +707,35 @@ btvacuumcleanup(PG_FUNCTION_ARGS) BlockNumber pages_deleted = 0; MemoryContext mycontext; MemoryContext oldcontext; + bool needLock; Assert(stats != NULL); + /* + * First find out the number of pages in the index. We must acquire + * the relation-extension lock while doing this to avoid a race + * condition: if someone else is extending the relation, there is + * a window where bufmgr/smgr have created a new all-zero page but + * it hasn't yet been write-locked by _bt_getbuf(). If we manage to + * scan such a page here, we'll improperly assume it can be recycled. + * Taking the lock synchronizes things enough to prevent a problem: + * either num_pages won't include the new page, or _bt_getbuf already + * has write lock on the buffer and it will be fully initialized before + * we can examine it. (See also vacuumlazy.c, which has the same issue.) + * + * We can skip locking for new or temp relations, + * however, since no one else could be accessing them. + */ + needLock = !(rel->rd_isnew || rel->rd_istemp); + + if (needLock) + LockPage(rel, 0, ExclusiveLock); + num_pages = RelationGetNumberOfBlocks(rel); + if (needLock) + UnlockPage(rel, 0, ExclusiveLock); + /* No point in remembering more than MaxFSMPages pages */ maxFreePages = MaxFSMPages; if ((BlockNumber) maxFreePages > num_pages) diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c index cc1852f65a1..891def15344 100644 --- a/src/backend/commands/vacuumlazy.c +++ b/src/backend/commands/vacuumlazy.c @@ -31,7 +31,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/commands/vacuumlazy.c,v 1.32 2003/09/25 06:57:59 petere Exp $ + * $Header: /cvsroot/pgsql/src/backend/commands/vacuumlazy.c,v 1.32.2.1 2005/05/07 21:33:21 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -254,8 +254,30 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, if (PageIsNew(page)) { - /* Not sure we still need to handle this case, but... */ + /* + * An all-zeroes page could be left over if a backend extends + * the relation but crashes before initializing the page. + * Reclaim such pages for use. + * + * We have to be careful here because we could be looking at + * a page that someone has just added to the relation and not + * yet been able to initialize (see RelationGetBufferForTuple). + * To interlock against that, release the buffer read lock + * (which we must do anyway) and grab the relation extension + * lock before re-locking in exclusive mode. If the page is + * still uninitialized by then, it must be left over from a + * crashed backend, and we can initialize it. + * + * We don't really need the relation lock when this is a new + * or temp relation, but it's probably not worth the code space + * to check that, since this surely isn't a critical path. + * + * Note: the comparable code in vacuum.c need not do all this + * because it's got exclusive lock on the whole relation. + */ LockBuffer(buf, BUFFER_LOCK_UNLOCK); + LockPage(onerel, 0, ExclusiveLock); + UnlockPage(onerel, 0, ExclusiveLock); LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); if (PageIsNew(page)) {