1
0
mirror of https://github.com/postgres/postgres.git synced 2025-09-03 15:22:11 +03:00

Fix race condition between hot standby and restoring a full-page image.

There was a window in RestoreBackupBlock where a page would be zeroed out,
but not yet locked. If a backend pinned and locked the page in that window,
it saw the zeroed page instead of the old page or new page contents, which
could lead to missing rows in a result set, or errors.

To fix, replace RBM_ZERO with RBM_ZERO_AND_LOCK, which atomically pins,
zeroes, and locks the page, if it's not in the buffer cache already.

In stable branches, the old RBM_ZERO constant is renamed to RBM_DO_NOT_USE,
to avoid breaking any 3rd party extensions that might use RBM_ZERO. More
importantly, this avoids renumbering the other enum values, which would
cause even bigger confusion in extensions that use ReadBufferExtended, but
haven't been recompiled.

Backpatch to all supported versions; this has been racy since hot standby
was introduced.
This commit is contained in:
Heikki Linnakangas
2014-11-13 19:47:44 +02:00
parent d47fff3d72
commit 7eab804c22
6 changed files with 66 additions and 25 deletions

View File

@@ -155,9 +155,8 @@ _hash_getinitbuf(Relation rel, BlockNumber blkno)
if (blkno == P_NEW)
elog(ERROR, "hash AM does not use P_NEW");
buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_ZERO, NULL);
LockBuffer(buf, HASH_WRITE);
buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_ZERO_AND_LOCK,
NULL);
/* ref count and lock type are correct */
@@ -198,11 +197,13 @@ _hash_getnewbuf(Relation rel, BlockNumber blkno, ForkNumber forkNum)
if (BufferGetBlockNumber(buf) != blkno)
elog(ERROR, "unexpected hash relation size: %u, should be %u",
BufferGetBlockNumber(buf), blkno);
LockBuffer(buf, HASH_WRITE);
}
else
buf = ReadBufferExtended(rel, forkNum, blkno, RBM_ZERO, NULL);
LockBuffer(buf, HASH_WRITE);
{
buf = ReadBufferExtended(rel, forkNum, blkno, RBM_ZERO_AND_LOCK,
NULL);
}
/* ref count and lock type are correct */

View File

@@ -4884,9 +4884,8 @@ heap_xlog_newpage(XLogRecPtr lsn, XLogRecord *record)
* not do anything that assumes we are touching a heap.
*/
buffer = XLogReadBufferExtended(xlrec->node, xlrec->forknum, xlrec->blkno,
RBM_ZERO);
RBM_ZERO_AND_LOCK);
Assert(BufferIsValid(buffer));
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
page = (Page) BufferGetPage(buffer);
Assert(record->xl_len == SizeOfHeapNewpage + BLCKSZ);

View File

@@ -3813,12 +3813,8 @@ RestoreBackupBlock(XLogRecPtr lsn, XLogRecord *record, int block_index,
{
/* Found it, apply the update */
buffer = XLogReadBufferExtended(bkpb.node, bkpb.fork, bkpb.block,
RBM_ZERO);
get_cleanup_lock ? RBM_ZERO_AND_CLEANUP_LOCK : RBM_ZERO_AND_LOCK);
Assert(BufferIsValid(buffer));
if (get_cleanup_lock)
LockBufferForCleanup(buffer);
else
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
page = (Page) BufferGetPage(buffer);

View File

@@ -257,7 +257,8 @@ XLogCheckInvalidPages(void)
* The returned buffer is exclusively-locked.
*
* For historical reasons, instead of a ReadBufferMode argument, this only
* supports RBM_ZERO (init == true) and RBM_NORMAL (init == false) modes.
* supports RBM_ZERO_AND_LOCK (init == true) and RBM_NORMAL (init == false)
* modes.
*/
Buffer
XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init)
@@ -265,8 +266,8 @@ XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init)
Buffer buf;
buf = XLogReadBufferExtended(rnode, MAIN_FORKNUM, blkno,
init ? RBM_ZERO : RBM_NORMAL);
if (BufferIsValid(buf))
init ? RBM_ZERO_AND_LOCK : RBM_NORMAL);
if (BufferIsValid(buf) && !init)
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
return buf;
@@ -285,8 +286,8 @@ XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init)
* dropped or truncated. If we don't see evidence of that later in the WAL
* sequence, we'll complain at the end of WAL replay.)
*
* In RBM_ZERO and RBM_ZERO_ON_ERROR modes, if the page doesn't exist, the
* relation is extended with all-zeroes pages up to the given block number.
* In RBM_ZERO_* modes, if the page doesn't exist, the relation is extended
* with all-zeroes pages up to the given block number.
*
* In RBM_NORMAL_NO_LOG mode, we return InvalidBuffer if the page doesn't
* exist, and we don't check for all-zeroes. Thus, no log entry is made
@@ -340,7 +341,11 @@ XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum,
do
{
if (buffer != InvalidBuffer)
{
if (mode == RBM_ZERO_AND_LOCK || mode == RBM_ZERO_AND_CLEANUP_LOCK)
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buffer);
}
buffer = ReadBufferWithoutRelcache(rnode, forknum,
P_NEW, mode, NULL);
}
@@ -348,6 +353,8 @@ XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum,
/* Handle the corner case that P_NEW returns non-consecutive pages */
if (BufferGetBlockNumber(buffer) != blkno)
{
if (mode == RBM_ZERO_AND_LOCK || mode == RBM_ZERO_AND_CLEANUP_LOCK)
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buffer);
buffer = ReadBufferWithoutRelcache(rnode, forknum, blkno,
mode, NULL);