mirror of
https://github.com/postgres/postgres.git
synced 2025-05-05 09:19:17 +03:00
Repair race condition introduced into heap_update() in 7.1 ---
PageGetFreeSpace() was being called while not holding the buffer lock, which not only could yield a garbage answer, but even if it's the right answer there might be less space available after we reacquire the buffer lock. Also repair potential deadlock introduced by my recent performance improvement in RelationGetBufferForTuple(): it was possible for two heap_updates to try to lock two buffers in opposite orders. The fix creates a global rule that buffers of a single heap relation should be locked in decreasing block number order. Currently, this only applies to heap_update; VACUUM can get away with ignoring the rule since it holds exclusive lock on the whole relation anyway. However, if we try to implement a VACUUM that can run in parallel with other transactions, VACUUM will also have to obey the lock order rule.
This commit is contained in:
parent
ae38a1f556
commit
27336e4f7a
@ -8,7 +8,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $Header: /cvsroot/pgsql/src/backend/access/heap/heapam.c,v 1.114 2001/05/12 19:58:27 tgl Exp $
|
* $Header: /cvsroot/pgsql/src/backend/access/heap/heapam.c,v 1.115 2001/05/16 22:35:12 tgl Exp $
|
||||||
*
|
*
|
||||||
*
|
*
|
||||||
* INTERFACE ROUTINES
|
* INTERFACE ROUTINES
|
||||||
@ -1317,7 +1317,7 @@ heap_insert(Relation relation, HeapTuple tup)
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Find buffer for this tuple */
|
/* Find buffer for this tuple */
|
||||||
buffer = RelationGetBufferForTuple(relation, tup->t_len);
|
buffer = RelationGetBufferForTuple(relation, tup->t_len, 0);
|
||||||
|
|
||||||
/* NO ELOG(ERROR) from here till changes are logged */
|
/* NO ELOG(ERROR) from here till changes are logged */
|
||||||
START_CRIT_SECTION();
|
START_CRIT_SECTION();
|
||||||
@ -1578,6 +1578,8 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
|
|||||||
newbuf;
|
newbuf;
|
||||||
bool need_toast,
|
bool need_toast,
|
||||||
already_marked;
|
already_marked;
|
||||||
|
Size newtupsize,
|
||||||
|
pagefree;
|
||||||
int result;
|
int result;
|
||||||
|
|
||||||
/* increment access statistics */
|
/* increment access statistics */
|
||||||
@ -1685,8 +1687,10 @@ l2:
|
|||||||
HeapTupleHasExtended(newtup) ||
|
HeapTupleHasExtended(newtup) ||
|
||||||
(MAXALIGN(newtup->t_len) > TOAST_TUPLE_THRESHOLD));
|
(MAXALIGN(newtup->t_len) > TOAST_TUPLE_THRESHOLD));
|
||||||
|
|
||||||
if (need_toast ||
|
newtupsize = MAXALIGN(newtup->t_len);
|
||||||
(unsigned) MAXALIGN(newtup->t_len) > PageGetFreeSpace((Page) dp))
|
pagefree = PageGetFreeSpace((Page) dp);
|
||||||
|
|
||||||
|
if (need_toast || newtupsize > pagefree)
|
||||||
{
|
{
|
||||||
_locked_tuple_.node = relation->rd_node;
|
_locked_tuple_.node = relation->rd_node;
|
||||||
_locked_tuple_.tid = oldtup.t_self;
|
_locked_tuple_.tid = oldtup.t_self;
|
||||||
@ -1704,25 +1708,73 @@ l2:
|
|||||||
|
|
||||||
/* Let the toaster do its thing */
|
/* Let the toaster do its thing */
|
||||||
if (need_toast)
|
if (need_toast)
|
||||||
|
{
|
||||||
heap_tuple_toast_attrs(relation, newtup, &oldtup);
|
heap_tuple_toast_attrs(relation, newtup, &oldtup);
|
||||||
|
newtupsize = MAXALIGN(newtup->t_len);
|
||||||
|
}
|
||||||
|
|
||||||
/* Now, do we need a new page for the tuple, or not? */
|
/*
|
||||||
if ((unsigned) MAXALIGN(newtup->t_len) <= PageGetFreeSpace((Page) dp))
|
* Now, do we need a new page for the tuple, or not? This is a bit
|
||||||
newbuf = buffer;
|
* tricky since someone else could have added tuples to the page
|
||||||
else
|
* while we weren't looking. We have to recheck the available space
|
||||||
newbuf = RelationGetBufferForTuple(relation, newtup->t_len);
|
* after reacquiring the buffer lock. But don't bother to do that
|
||||||
|
* if the former amount of free space is still not enough; it's
|
||||||
/* Re-acquire the lock on the old tuple's page. */
|
* unlikely there's more free now than before.
|
||||||
/* this seems to be deadlock free... */
|
*
|
||||||
|
* What's more, if we need to get a new page, we will need to acquire
|
||||||
|
* buffer locks on both old and new pages. To avoid deadlock against
|
||||||
|
* some other backend trying to get the same two locks in the other
|
||||||
|
* order, we must be consistent about the order we get the locks in.
|
||||||
|
* We use the rule "lock the higher-numbered page of the relation
|
||||||
|
* first". To implement this, we must do RelationGetBufferForTuple
|
||||||
|
* while not holding the lock on the old page, and we must tell it
|
||||||
|
* to give us a page beyond the old page.
|
||||||
|
*/
|
||||||
|
if (newtupsize > pagefree)
|
||||||
|
{
|
||||||
|
/* Assume there's no chance to put newtup on same page. */
|
||||||
|
newbuf = RelationGetBufferForTuple(relation, newtup->t_len,
|
||||||
|
BufferGetBlockNumber(buffer) + 1);
|
||||||
|
/* Now reacquire lock on old tuple's page. */
|
||||||
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
|
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
{
|
||||||
|
/* Re-acquire the lock on the old tuple's page. */
|
||||||
|
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
|
||||||
|
/* Re-check using the up-to-date free space */
|
||||||
|
pagefree = PageGetFreeSpace((Page) dp);
|
||||||
|
if (newtupsize > pagefree)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Rats, it doesn't fit anymore. We must now unlock and
|
||||||
|
* relock to avoid deadlock. Fortunately, this path should
|
||||||
|
* seldom be taken.
|
||||||
|
*/
|
||||||
|
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
|
||||||
|
newbuf = RelationGetBufferForTuple(relation, newtup->t_len,
|
||||||
|
BufferGetBlockNumber(buffer) + 1);
|
||||||
|
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* OK, it fits here, so we're done. */
|
||||||
|
newbuf = buffer;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
{
|
{
|
||||||
/* No TOAST work needed, and it'll fit on same page */
|
/* No TOAST work needed, and it'll fit on same page */
|
||||||
already_marked = false;
|
already_marked = false;
|
||||||
newbuf = buffer;
|
newbuf = buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* At this point newbuf and buffer are both pinned and locked,
|
||||||
|
* and newbuf has enough space for the new tuple.
|
||||||
|
*/
|
||||||
|
|
||||||
/* NO ELOG(ERROR) from here till changes are logged */
|
/* NO ELOG(ERROR) from here till changes are logged */
|
||||||
START_CRIT_SECTION();
|
START_CRIT_SECTION();
|
||||||
|
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $Id: hio.c,v 1.38 2001/05/12 19:58:27 tgl Exp $
|
* $Id: hio.c,v 1.39 2001/05/16 22:35:12 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -66,7 +66,13 @@ RelationPutHeapTuple(Relation relation,
|
|||||||
/*
|
/*
|
||||||
* RelationGetBufferForTuple
|
* RelationGetBufferForTuple
|
||||||
*
|
*
|
||||||
* Returns exclusive-locked buffer with free space >= given len.
|
* Returns exclusive-locked buffer with free space >= given len,
|
||||||
|
* being careful to select only a page at or beyond minblocknum
|
||||||
|
* in the relation.
|
||||||
|
*
|
||||||
|
* The minblocknum parameter is needed to prevent deadlock between
|
||||||
|
* concurrent heap_update operations; see heap_update for details.
|
||||||
|
* Pass zero if you don't particularly care which page you get.
|
||||||
*
|
*
|
||||||
* Note that we use LockPage to lock relation for extension. We can
|
* Note that we use LockPage to lock relation for extension. We can
|
||||||
* do this as long as in all other places we use page-level locking
|
* do this as long as in all other places we use page-level locking
|
||||||
@ -77,7 +83,8 @@ RelationPutHeapTuple(Relation relation,
|
|||||||
* before any (unlogged) changes are made in buffer pool.
|
* before any (unlogged) changes are made in buffer pool.
|
||||||
*/
|
*/
|
||||||
Buffer
|
Buffer
|
||||||
RelationGetBufferForTuple(Relation relation, Size len)
|
RelationGetBufferForTuple(Relation relation, Size len,
|
||||||
|
BlockNumber minblocknum)
|
||||||
{
|
{
|
||||||
Buffer buffer = InvalidBuffer;
|
Buffer buffer = InvalidBuffer;
|
||||||
Page pageHeader;
|
Page pageHeader;
|
||||||
@ -103,6 +110,8 @@ RelationGetBufferForTuple(Relation relation, Size len)
|
|||||||
if (relation->rd_nblocks > 0)
|
if (relation->rd_nblocks > 0)
|
||||||
{
|
{
|
||||||
lastblock = relation->rd_nblocks - 1;
|
lastblock = relation->rd_nblocks - 1;
|
||||||
|
if (lastblock >= minblocknum)
|
||||||
|
{
|
||||||
buffer = ReadBuffer(relation, lastblock);
|
buffer = ReadBuffer(relation, lastblock);
|
||||||
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
|
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
|
||||||
pageHeader = (Page) BufferGetPage(buffer);
|
pageHeader = (Page) BufferGetPage(buffer);
|
||||||
@ -114,6 +123,7 @@ RelationGetBufferForTuple(Relation relation, Size len)
|
|||||||
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
|
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
|
||||||
/* buffer release will happen below... */
|
/* buffer release will happen below... */
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Before extending relation, make sure no one else has done
|
* Before extending relation, make sure no one else has done
|
||||||
@ -137,13 +147,15 @@ RelationGetBufferForTuple(Relation relation, Size len)
|
|||||||
*/
|
*/
|
||||||
relation->rd_nblocks = RelationGetNumberOfBlocks(relation);
|
relation->rd_nblocks = RelationGetNumberOfBlocks(relation);
|
||||||
|
|
||||||
if (relation->rd_nblocks > oldnblocks)
|
if ((BlockNumber) relation->rd_nblocks > oldnblocks)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* Someone else has indeed extended the relation recently.
|
* Someone else has indeed extended the relation recently.
|
||||||
* Try to fit our tuple into the new last page.
|
* Try to fit our tuple into the new last page.
|
||||||
*/
|
*/
|
||||||
lastblock = relation->rd_nblocks - 1;
|
lastblock = relation->rd_nblocks - 1;
|
||||||
|
if (lastblock >= minblocknum)
|
||||||
|
{
|
||||||
buffer = ReleaseAndReadBuffer(buffer, relation, lastblock, false);
|
buffer = ReleaseAndReadBuffer(buffer, relation, lastblock, false);
|
||||||
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
|
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
|
||||||
pageHeader = (Page) BufferGetPage(buffer);
|
pageHeader = (Page) BufferGetPage(buffer);
|
||||||
@ -160,9 +172,13 @@ RelationGetBufferForTuple(Relation relation, Size len)
|
|||||||
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
|
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
|
||||||
/* buffer release will happen below... */
|
/* buffer release will happen below... */
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Extend the relation by one page and update rd_nblocks for next time.
|
* Extend the relation by one page and update rd_nblocks for next time.
|
||||||
|
*
|
||||||
|
* Note: at this point minblocknum is ignored; we won't extend by more
|
||||||
|
* than one block...
|
||||||
*/
|
*/
|
||||||
lastblock = relation->rd_nblocks;
|
lastblock = relation->rd_nblocks;
|
||||||
buffer = ReleaseAndReadBuffer(buffer, relation, lastblock, true);
|
buffer = ReleaseAndReadBuffer(buffer, relation, lastblock, true);
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
|
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
|
||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* $Id: hio.h,v 1.17 2001/01/24 19:43:19 momjian Exp $
|
* $Id: hio.h,v 1.18 2001/05/16 22:35:12 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -18,6 +18,7 @@
|
|||||||
|
|
||||||
extern void RelationPutHeapTuple(Relation relation, Buffer buffer,
|
extern void RelationPutHeapTuple(Relation relation, Buffer buffer,
|
||||||
HeapTuple tuple);
|
HeapTuple tuple);
|
||||||
extern Buffer RelationGetBufferForTuple(Relation relation, Size len);
|
extern Buffer RelationGetBufferForTuple(Relation relation, Size len,
|
||||||
|
BlockNumber minblocknum);
|
||||||
|
|
||||||
#endif /* HIO_H */
|
#endif /* HIO_H */
|
||||||
|
Loading…
x
Reference in New Issue
Block a user