mirror of
				https://github.com/postgres/postgres.git
				synced 2025-11-03 09:13:20 +03:00 
			
		
		
		
	During WAL recovery, when reading a page that we intend to overwrite completely
from the WAL data, don't bother to physically read it; just have bufmgr.c return a zeroed-out buffer instead. This speeds recovery significantly, and also avoids unnecessary failures when a page-to-be-overwritten has corrupt page headers on disk. This replaces a former kluge that accomplished the latter by pretending zero_damaged_pages was always ON during WAL recovery; which was OK when the kluge was put in, but is unsafe when restoring a WAL log that was written with full_page_writes off. Heikki Linnakangas
This commit is contained in:
		@@ -11,7 +11,7 @@
 | 
				
			|||||||
 * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
 | 
					 * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
 | 
				
			||||||
 * Portions Copyright (c) 1994, Regents of the University of California
 | 
					 * Portions Copyright (c) 1994, Regents of the University of California
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.49 2007/01/05 22:19:24 momjian Exp $
 | 
					 * $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.50 2007/05/02 23:18:03 tgl Exp $
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 *-------------------------------------------------------------------------
 | 
					 *-------------------------------------------------------------------------
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
@@ -206,7 +206,9 @@ XLogCheckInvalidPages(void)
 | 
				
			|||||||
 * If "init" is true then the caller intends to rewrite the page fully
 | 
					 * If "init" is true then the caller intends to rewrite the page fully
 | 
				
			||||||
 * using the info in the XLOG record.  In this case we will extend the
 | 
					 * using the info in the XLOG record.  In this case we will extend the
 | 
				
			||||||
 * relation if needed to make the page exist, and we will not complain about
 | 
					 * relation if needed to make the page exist, and we will not complain about
 | 
				
			||||||
 * the page being "new" (all zeroes).
 | 
					 * the page being "new" (all zeroes); in fact, we usually will supply a
 | 
				
			||||||
 | 
					 * zeroed buffer without reading the page at all, so as to avoid unnecessary
 | 
				
			||||||
 | 
					 * failure if the page is present on disk but has corrupt headers.
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * If "init" is false then the caller needs the page to be valid already.
 | 
					 * If "init" is false then the caller needs the page to be valid already.
 | 
				
			||||||
 * If the page doesn't exist or contains zeroes, we return InvalidBuffer.
 | 
					 * If the page doesn't exist or contains zeroes, we return InvalidBuffer.
 | 
				
			||||||
@@ -226,6 +228,9 @@ XLogReadBuffer(Relation reln, BlockNumber blkno, bool init)
 | 
				
			|||||||
	if (blkno < lastblock)
 | 
						if (blkno < lastblock)
 | 
				
			||||||
	{
 | 
						{
 | 
				
			||||||
		/* page exists in file */
 | 
							/* page exists in file */
 | 
				
			||||||
 | 
							if (init)
 | 
				
			||||||
 | 
								buffer = ReadOrZeroBuffer(reln, blkno);
 | 
				
			||||||
 | 
							else
 | 
				
			||||||
			buffer = ReadBuffer(reln, blkno);
 | 
								buffer = ReadBuffer(reln, blkno);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	else
 | 
						else
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -8,7 +8,7 @@
 | 
				
			|||||||
 *
 | 
					 *
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * IDENTIFICATION
 | 
					 * IDENTIFICATION
 | 
				
			||||||
 *	  $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.216 2007/03/30 18:34:55 mha Exp $
 | 
					 *	  $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.217 2007/05/02 23:18:03 tgl Exp $
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 *-------------------------------------------------------------------------
 | 
					 *-------------------------------------------------------------------------
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
@@ -17,6 +17,12 @@
 | 
				
			|||||||
 *		and pin it so that no one can destroy it while this process
 | 
					 *		and pin it so that no one can destroy it while this process
 | 
				
			||||||
 *		is using it.
 | 
					 *		is using it.
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 | 
					 * ReadOrZeroBuffer() -- like ReadBuffer, but if the page is not already in
 | 
				
			||||||
 | 
					 *		cache we don't read it, but just return a zeroed-out buffer.  Useful
 | 
				
			||||||
 | 
					 *		when the caller intends to fill the page from scratch, since this
 | 
				
			||||||
 | 
					 *		saves I/O and avoids unnecessary failure if the page-on-disk has
 | 
				
			||||||
 | 
					 *		corrupt page headers.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 * ReleaseBuffer() -- unpin a buffer
 | 
					 * ReleaseBuffer() -- unpin a buffer
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * MarkBufferDirty() -- mark a pinned buffer's contents as "dirty".
 | 
					 * MarkBufferDirty() -- mark a pinned buffer's contents as "dirty".
 | 
				
			||||||
@@ -87,6 +93,8 @@ static volatile BufferDesc *PinCountWaitBuf = NULL;
 | 
				
			|||||||
extern PgStat_MsgBgWriter BgWriterStats;
 | 
					extern PgStat_MsgBgWriter BgWriterStats;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static Buffer ReadBuffer_common(Relation reln, BlockNumber blockNum,
 | 
				
			||||||
 | 
													bool zeroPage);
 | 
				
			||||||
static bool PinBuffer(volatile BufferDesc *buf);
 | 
					static bool PinBuffer(volatile BufferDesc *buf);
 | 
				
			||||||
static void PinBuffer_Locked(volatile BufferDesc *buf);
 | 
					static void PinBuffer_Locked(volatile BufferDesc *buf);
 | 
				
			||||||
static void UnpinBuffer(volatile BufferDesc *buf,
 | 
					static void UnpinBuffer(volatile BufferDesc *buf,
 | 
				
			||||||
@@ -120,6 +128,27 @@ static void AtProcExit_Buffers(int code, Datum arg);
 | 
				
			|||||||
 */
 | 
					 */
 | 
				
			||||||
Buffer
 | 
					Buffer
 | 
				
			||||||
ReadBuffer(Relation reln, BlockNumber blockNum)
 | 
					ReadBuffer(Relation reln, BlockNumber blockNum)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return ReadBuffer_common(reln, blockNum, false);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * ReadOrZeroBuffer -- like ReadBuffer, but if the page isn't in buffer
 | 
				
			||||||
 | 
					 *		cache already, it's filled with zeros instead of reading it from
 | 
				
			||||||
 | 
					 *		disk. The caller is expected to overwrite the whole buffer,
 | 
				
			||||||
 | 
					 *		so that the current page contents are not interesting.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					Buffer
 | 
				
			||||||
 | 
					ReadOrZeroBuffer(Relation reln, BlockNumber blockNum)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return ReadBuffer_common(reln, blockNum, true);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * ReadBuffer_common -- common logic for ReadBuffer and ReadOrZeroBuffer
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					static Buffer
 | 
				
			||||||
 | 
					ReadBuffer_common(Relation reln, BlockNumber blockNum, bool zeroPage)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	volatile BufferDesc *bufHdr;
 | 
						volatile BufferDesc *bufHdr;
 | 
				
			||||||
	Block		bufBlock;
 | 
						Block		bufBlock;
 | 
				
			||||||
@@ -253,17 +282,18 @@ ReadBuffer(Relation reln, BlockNumber blockNum)
 | 
				
			|||||||
	}
 | 
						}
 | 
				
			||||||
	else
 | 
						else
 | 
				
			||||||
	{
 | 
						{
 | 
				
			||||||
 | 
							/* 
 | 
				
			||||||
 | 
							 * Read in the page, unless the caller intends to overwrite it
 | 
				
			||||||
 | 
							 * and just wants us to allocate a buffer.
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
							if (zeroPage)
 | 
				
			||||||
 | 
								MemSet((char *) bufBlock, 0, BLCKSZ);
 | 
				
			||||||
 | 
							else
 | 
				
			||||||
			smgrread(reln->rd_smgr, blockNum, (char *) bufBlock);
 | 
								smgrread(reln->rd_smgr, blockNum, (char *) bufBlock);
 | 
				
			||||||
		/* check for garbage data */
 | 
							/* check for garbage data */
 | 
				
			||||||
		if (!PageHeaderIsValid((PageHeader) bufBlock))
 | 
							if (!PageHeaderIsValid((PageHeader) bufBlock))
 | 
				
			||||||
		{
 | 
							{
 | 
				
			||||||
			/*
 | 
								if (zero_damaged_pages)
 | 
				
			||||||
			 * During WAL recovery, the first access to any data page should
 | 
					 | 
				
			||||||
			 * overwrite the whole page from the WAL; so a clobbered page
 | 
					 | 
				
			||||||
			 * header is not reason to fail.  Hence, when InRecovery we may
 | 
					 | 
				
			||||||
			 * always act as though zero_damaged_pages is ON.
 | 
					 | 
				
			||||||
			 */
 | 
					 | 
				
			||||||
			if (zero_damaged_pages || InRecovery)
 | 
					 | 
				
			||||||
			{
 | 
								{
 | 
				
			||||||
				ereport(WARNING,
 | 
									ereport(WARNING,
 | 
				
			||||||
						(errcode(ERRCODE_DATA_CORRUPTED),
 | 
											(errcode(ERRCODE_DATA_CORRUPTED),
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -7,7 +7,7 @@
 | 
				
			|||||||
 * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
 | 
					 * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
 | 
				
			||||||
 * Portions Copyright (c) 1994, Regents of the University of California
 | 
					 * Portions Copyright (c) 1994, Regents of the University of California
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * $PostgreSQL: pgsql/src/include/storage/bufmgr.h,v 1.102 2007/01/05 22:19:57 momjian Exp $
 | 
					 * $PostgreSQL: pgsql/src/include/storage/bufmgr.h,v 1.103 2007/05/02 23:18:03 tgl Exp $
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 *-------------------------------------------------------------------------
 | 
					 *-------------------------------------------------------------------------
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
@@ -111,6 +111,7 @@ extern DLLIMPORT int32 *LocalRefCount;
 | 
				
			|||||||
 * prototypes for functions in bufmgr.c
 | 
					 * prototypes for functions in bufmgr.c
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
extern Buffer ReadBuffer(Relation reln, BlockNumber blockNum);
 | 
					extern Buffer ReadBuffer(Relation reln, BlockNumber blockNum);
 | 
				
			||||||
 | 
					extern Buffer ReadOrZeroBuffer(Relation reln, BlockNumber blockNum);
 | 
				
			||||||
extern void ReleaseBuffer(Buffer buffer);
 | 
					extern void ReleaseBuffer(Buffer buffer);
 | 
				
			||||||
extern void UnlockReleaseBuffer(Buffer buffer);
 | 
					extern void UnlockReleaseBuffer(Buffer buffer);
 | 
				
			||||||
extern void MarkBufferDirty(Buffer buffer);
 | 
					extern void MarkBufferDirty(Buffer buffer);
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user