mirror of
https://github.com/postgres/postgres.git
synced 2025-07-15 19:21:59 +03:00
During WAL recovery, when reading a page that we intend to overwrite completely
from the WAL data, don't bother to physically read it; just have bufmgr.c return a zeroed-out buffer instead. This speeds recovery significantly, and also avoids unnecessary failures when a page-to-be-overwritten has corrupt page headers on disk. This replaces a former kluge that accomplished the latter by pretending zero_damaged_pages was always ON during WAL recovery; which was OK when the kluge was put in, but is unsafe when restoring a WAL log that was written with full_page_writes off. Heikki Linnakangas
This commit is contained in:
@ -11,7 +11,7 @@
|
|||||||
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.49 2007/01/05 22:19:24 momjian Exp $
|
* $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.50 2007/05/02 23:18:03 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -206,7 +206,9 @@ XLogCheckInvalidPages(void)
|
|||||||
* If "init" is true then the caller intends to rewrite the page fully
|
* If "init" is true then the caller intends to rewrite the page fully
|
||||||
* using the info in the XLOG record. In this case we will extend the
|
* using the info in the XLOG record. In this case we will extend the
|
||||||
* relation if needed to make the page exist, and we will not complain about
|
* relation if needed to make the page exist, and we will not complain about
|
||||||
* the page being "new" (all zeroes).
|
* the page being "new" (all zeroes); in fact, we usually will supply a
|
||||||
|
* zeroed buffer without reading the page at all, so as to avoid unnecessary
|
||||||
|
* failure if the page is present on disk but has corrupt headers.
|
||||||
*
|
*
|
||||||
* If "init" is false then the caller needs the page to be valid already.
|
* If "init" is false then the caller needs the page to be valid already.
|
||||||
* If the page doesn't exist or contains zeroes, we return InvalidBuffer.
|
* If the page doesn't exist or contains zeroes, we return InvalidBuffer.
|
||||||
@ -226,7 +228,10 @@ XLogReadBuffer(Relation reln, BlockNumber blkno, bool init)
|
|||||||
if (blkno < lastblock)
|
if (blkno < lastblock)
|
||||||
{
|
{
|
||||||
/* page exists in file */
|
/* page exists in file */
|
||||||
buffer = ReadBuffer(reln, blkno);
|
if (init)
|
||||||
|
buffer = ReadOrZeroBuffer(reln, blkno);
|
||||||
|
else
|
||||||
|
buffer = ReadBuffer(reln, blkno);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.216 2007/03/30 18:34:55 mha Exp $
|
* $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.217 2007/05/02 23:18:03 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -17,6 +17,12 @@
|
|||||||
* and pin it so that no one can destroy it while this process
|
* and pin it so that no one can destroy it while this process
|
||||||
* is using it.
|
* is using it.
|
||||||
*
|
*
|
||||||
|
* ReadOrZeroBuffer() -- like ReadBuffer, but if the page is not already in
|
||||||
|
* cache we don't read it, but just return a zeroed-out buffer. Useful
|
||||||
|
* when the caller intends to fill the page from scratch, since this
|
||||||
|
* saves I/O and avoids unnecessary failure if the page-on-disk has
|
||||||
|
* corrupt page headers.
|
||||||
|
*
|
||||||
* ReleaseBuffer() -- unpin a buffer
|
* ReleaseBuffer() -- unpin a buffer
|
||||||
*
|
*
|
||||||
* MarkBufferDirty() -- mark a pinned buffer's contents as "dirty".
|
* MarkBufferDirty() -- mark a pinned buffer's contents as "dirty".
|
||||||
@ -87,6 +93,8 @@ static volatile BufferDesc *PinCountWaitBuf = NULL;
|
|||||||
extern PgStat_MsgBgWriter BgWriterStats;
|
extern PgStat_MsgBgWriter BgWriterStats;
|
||||||
|
|
||||||
|
|
||||||
|
static Buffer ReadBuffer_common(Relation reln, BlockNumber blockNum,
|
||||||
|
bool zeroPage);
|
||||||
static bool PinBuffer(volatile BufferDesc *buf);
|
static bool PinBuffer(volatile BufferDesc *buf);
|
||||||
static void PinBuffer_Locked(volatile BufferDesc *buf);
|
static void PinBuffer_Locked(volatile BufferDesc *buf);
|
||||||
static void UnpinBuffer(volatile BufferDesc *buf,
|
static void UnpinBuffer(volatile BufferDesc *buf,
|
||||||
@ -120,6 +128,27 @@ static void AtProcExit_Buffers(int code, Datum arg);
|
|||||||
*/
|
*/
|
||||||
Buffer
|
Buffer
|
||||||
ReadBuffer(Relation reln, BlockNumber blockNum)
|
ReadBuffer(Relation reln, BlockNumber blockNum)
|
||||||
|
{
|
||||||
|
return ReadBuffer_common(reln, blockNum, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ReadOrZeroBuffer -- like ReadBuffer, but if the page isn't in buffer
|
||||||
|
* cache already, it's filled with zeros instead of reading it from
|
||||||
|
* disk. The caller is expected to overwrite the whole buffer,
|
||||||
|
* so that the current page contents are not interesting.
|
||||||
|
*/
|
||||||
|
Buffer
|
||||||
|
ReadOrZeroBuffer(Relation reln, BlockNumber blockNum)
|
||||||
|
{
|
||||||
|
return ReadBuffer_common(reln, blockNum, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ReadBuffer_common -- common logic for ReadBuffer and ReadOrZeroBuffer
|
||||||
|
*/
|
||||||
|
static Buffer
|
||||||
|
ReadBuffer_common(Relation reln, BlockNumber blockNum, bool zeroPage)
|
||||||
{
|
{
|
||||||
volatile BufferDesc *bufHdr;
|
volatile BufferDesc *bufHdr;
|
||||||
Block bufBlock;
|
Block bufBlock;
|
||||||
@ -253,17 +282,18 @@ ReadBuffer(Relation reln, BlockNumber blockNum)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
smgrread(reln->rd_smgr, blockNum, (char *) bufBlock);
|
/*
|
||||||
|
* Read in the page, unless the caller intends to overwrite it
|
||||||
|
* and just wants us to allocate a buffer.
|
||||||
|
*/
|
||||||
|
if (zeroPage)
|
||||||
|
MemSet((char *) bufBlock, 0, BLCKSZ);
|
||||||
|
else
|
||||||
|
smgrread(reln->rd_smgr, blockNum, (char *) bufBlock);
|
||||||
/* check for garbage data */
|
/* check for garbage data */
|
||||||
if (!PageHeaderIsValid((PageHeader) bufBlock))
|
if (!PageHeaderIsValid((PageHeader) bufBlock))
|
||||||
{
|
{
|
||||||
/*
|
if (zero_damaged_pages)
|
||||||
* During WAL recovery, the first access to any data page should
|
|
||||||
* overwrite the whole page from the WAL; so a clobbered page
|
|
||||||
* header is not reason to fail. Hence, when InRecovery we may
|
|
||||||
* always act as though zero_damaged_pages is ON.
|
|
||||||
*/
|
|
||||||
if (zero_damaged_pages || InRecovery)
|
|
||||||
{
|
{
|
||||||
ereport(WARNING,
|
ereport(WARNING,
|
||||||
(errcode(ERRCODE_DATA_CORRUPTED),
|
(errcode(ERRCODE_DATA_CORRUPTED),
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* $PostgreSQL: pgsql/src/include/storage/bufmgr.h,v 1.102 2007/01/05 22:19:57 momjian Exp $
|
* $PostgreSQL: pgsql/src/include/storage/bufmgr.h,v 1.103 2007/05/02 23:18:03 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -111,6 +111,7 @@ extern DLLIMPORT int32 *LocalRefCount;
|
|||||||
* prototypes for functions in bufmgr.c
|
* prototypes for functions in bufmgr.c
|
||||||
*/
|
*/
|
||||||
extern Buffer ReadBuffer(Relation reln, BlockNumber blockNum);
|
extern Buffer ReadBuffer(Relation reln, BlockNumber blockNum);
|
||||||
|
extern Buffer ReadOrZeroBuffer(Relation reln, BlockNumber blockNum);
|
||||||
extern void ReleaseBuffer(Buffer buffer);
|
extern void ReleaseBuffer(Buffer buffer);
|
||||||
extern void UnlockReleaseBuffer(Buffer buffer);
|
extern void UnlockReleaseBuffer(Buffer buffer);
|
||||||
extern void MarkBufferDirty(Buffer buffer);
|
extern void MarkBufferDirty(Buffer buffer);
|
||||||
|
Reference in New Issue
Block a user