mirror of
https://github.com/postgres/postgres.git
synced 2025-08-24 09:27:52 +03:00
Fix race condition between hot standby and restoring a full-page image.
There was a window in RestoreBackupBlock where a page would be zeroed out, but not yet locked. If a backend pinned and locked the page in that window, it saw the zeroed page instead of the old page or new page contents, which could lead to missing rows in a result set, or errors. To fix, replace RBM_ZERO with RBM_ZERO_AND_LOCK, which atomically pins, zeroes, and locks the page, if it's not in the buffer cache already. In stable branches, the old RBM_ZERO constant is renamed to RBM_DO_NOT_USE, to avoid breaking any 3rd party extensions that might use RBM_ZERO. More importantly, this avoids renumbering the other enum values, which would cause even bigger confusion in extensions that use ReadBufferExtended, but haven't been recompiled. Backpatch to all supported versions; this has been racy since hot standby was introduced.
This commit is contained in:
@@ -287,9 +287,13 @@ XLogReadBufferForRedo(XLogRecPtr lsn, XLogRecord *record, int block_index,
|
||||
* XLogReadBufferForRedoExtended
|
||||
* Like XLogReadBufferForRedo, but with extra options.
|
||||
*
|
||||
* If mode is RBM_ZERO or RBM_ZERO_ON_ERROR, if the page doesn't exist, the
|
||||
* relation is extended with all-zeroes pages up to the referenced block
|
||||
* number. In RBM_ZERO mode, the return value is always BLK_NEEDS_REDO.
|
||||
* In RBM_ZERO_* modes, if the page doesn't exist, the relation is extended
|
||||
* with all-zeroes pages up to the referenced block number. In
|
||||
* RBM_ZERO_AND_LOCK and RBM_ZERO_AND_CLEANUP_LOCK modes, the return value
|
||||
* is always BLK_NEEDS_REDO.
|
||||
*
|
||||
* (The RBM_ZERO_AND_CLEANUP_LOCK mode is redundant with the get_cleanup_lock
|
||||
* parameter. Do not use an inconsistent combination!)
|
||||
*
|
||||
* If 'get_cleanup_lock' is true, a "cleanup lock" is acquired on the buffer
|
||||
* using LockBufferForCleanup(), instead of a regular exclusive lock.
|
||||
@@ -312,10 +316,13 @@ XLogReadBufferForRedoExtended(XLogRecPtr lsn, XLogRecord *record,
|
||||
*buf = XLogReadBufferExtended(rnode, forkno, blkno, mode);
|
||||
if (BufferIsValid(*buf))
|
||||
{
|
||||
if (get_cleanup_lock)
|
||||
LockBufferForCleanup(*buf);
|
||||
else
|
||||
LockBuffer(*buf, BUFFER_LOCK_EXCLUSIVE);
|
||||
if (mode != RBM_ZERO_AND_LOCK && mode != RBM_ZERO_AND_CLEANUP_LOCK)
|
||||
{
|
||||
if (get_cleanup_lock)
|
||||
LockBufferForCleanup(*buf);
|
||||
else
|
||||
LockBuffer(*buf, BUFFER_LOCK_EXCLUSIVE);
|
||||
}
|
||||
if (lsn <= PageGetLSN(BufferGetPage(*buf)))
|
||||
return BLK_DONE;
|
||||
else
|
||||
@@ -341,7 +348,8 @@ XLogReadBufferForRedoExtended(XLogRecPtr lsn, XLogRecord *record,
|
||||
* The returned buffer is exclusively-locked.
|
||||
*
|
||||
* For historical reasons, instead of a ReadBufferMode argument, this only
|
||||
* supports RBM_ZERO (init == true) and RBM_NORMAL (init == false) modes.
|
||||
* supports RBM_ZERO_AND_LOCK (init == true) and RBM_NORMAL (init == false)
|
||||
* modes.
|
||||
*/
|
||||
Buffer
|
||||
XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init)
|
||||
@@ -349,8 +357,8 @@ XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init)
|
||||
Buffer buf;
|
||||
|
||||
buf = XLogReadBufferExtended(rnode, MAIN_FORKNUM, blkno,
|
||||
init ? RBM_ZERO : RBM_NORMAL);
|
||||
if (BufferIsValid(buf))
|
||||
init ? RBM_ZERO_AND_LOCK : RBM_NORMAL);
|
||||
if (BufferIsValid(buf) && !init)
|
||||
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
|
||||
|
||||
return buf;
|
||||
@@ -369,8 +377,8 @@ XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init)
|
||||
* dropped or truncated. If we don't see evidence of that later in the WAL
|
||||
* sequence, we'll complain at the end of WAL replay.)
|
||||
*
|
||||
* In RBM_ZERO and RBM_ZERO_ON_ERROR modes, if the page doesn't exist, the
|
||||
* relation is extended with all-zeroes pages up to the given block number.
|
||||
* In RBM_ZERO_* modes, if the page doesn't exist, the relation is extended
|
||||
* with all-zeroes pages up to the given block number.
|
||||
*
|
||||
* In RBM_NORMAL_NO_LOG mode, we return InvalidBuffer if the page doesn't
|
||||
* exist, and we don't check for all-zeroes. Thus, no log entry is made
|
||||
@@ -424,7 +432,11 @@ XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum,
|
||||
do
|
||||
{
|
||||
if (buffer != InvalidBuffer)
|
||||
{
|
||||
if (mode == RBM_ZERO_AND_LOCK || mode == RBM_ZERO_AND_CLEANUP_LOCK)
|
||||
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
|
||||
ReleaseBuffer(buffer);
|
||||
}
|
||||
buffer = ReadBufferWithoutRelcache(rnode, forknum,
|
||||
P_NEW, mode, NULL);
|
||||
}
|
||||
@@ -432,6 +444,8 @@ XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum,
|
||||
/* Handle the corner case that P_NEW returns non-consecutive pages */
|
||||
if (BufferGetBlockNumber(buffer) != blkno)
|
||||
{
|
||||
if (mode == RBM_ZERO_AND_LOCK || mode == RBM_ZERO_AND_CLEANUP_LOCK)
|
||||
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
|
||||
ReleaseBuffer(buffer);
|
||||
buffer = ReadBufferWithoutRelcache(rnode, forknum, blkno,
|
||||
mode, NULL);
|
||||
@@ -537,12 +551,8 @@ RestoreBackupBlockContents(XLogRecPtr lsn, BkpBlock bkpb, char *blk,
|
||||
Page page;
|
||||
|
||||
buffer = XLogReadBufferExtended(bkpb.node, bkpb.fork, bkpb.block,
|
||||
RBM_ZERO);
|
||||
get_cleanup_lock ? RBM_ZERO_AND_CLEANUP_LOCK : RBM_ZERO_AND_LOCK);
|
||||
Assert(BufferIsValid(buffer));
|
||||
if (get_cleanup_lock)
|
||||
LockBufferForCleanup(buffer);
|
||||
else
|
||||
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
|
||||
|
||||
page = (Page) BufferGetPage(buffer);
|
||||
|
||||
|
Reference in New Issue
Block a user