During WAL recovery, when reading a page that we intend to overwrite completely

from the WAL data, don't bother to physically read it; just have bufmgr.c return a zeroed-out buffer instead. This speeds recovery significantly, and also avoids unnecessary failures when a page-to-be-overwritten has corrupt page headers on disk. This replaces a former kluge that accomplished the latter by pretending zero_damaged_pages was always ON during WAL recovery; which was OK when the kluge was put in, but is unsafe when restoring a WAL log that was written with full_page_writes off. Heikki Linnakangas
2025-12-21 05:21:08 +03:00 · 2007-05-02 23:18:03 +00:00
parent 8ec943856a
commit 8c3cc86e7b
3 changed files with 49 additions and 13 deletions
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.216 2007/03/30 18:34:55 mha Exp $
+ *	  $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.217 2007/05/02 23:18:03 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -17,6 +17,12 @@
 *		and pin it so that no one can destroy it while this process
 *		is using it.
 *
+ * ReadOrZeroBuffer() -- like ReadBuffer, but if the page is not already in
+ *		cache we don't read it, but just return a zeroed-out buffer.  Useful
+ *		when the caller intends to fill the page from scratch, since this
+ *		saves I/O and avoids unnecessary failure if the page-on-disk has
+ *		corrupt page headers.
+ *
 * ReleaseBuffer() -- unpin a buffer
 *
 * MarkBufferDirty() -- mark a pinned buffer's contents as "dirty".
@@ -87,6 +93,8 @@ static volatile BufferDesc *PinCountWaitBuf = NULL;
 extern PgStat_MsgBgWriter BgWriterStats;


+static Buffer ReadBuffer_common(Relation reln, BlockNumber blockNum,
+								bool zeroPage);
 static bool PinBuffer(volatile BufferDesc *buf);
 static void PinBuffer_Locked(volatile BufferDesc *buf);
 static void UnpinBuffer(volatile BufferDesc *buf,
@@ -120,6 +128,27 @@ static void AtProcExit_Buffers(int code, Datum arg);
 */
 Buffer
 ReadBuffer(Relation reln, BlockNumber blockNum)
+{
+	return ReadBuffer_common(reln, blockNum, false);
+}
+
+/*
+ * ReadOrZeroBuffer -- like ReadBuffer, but if the page isn't in buffer
+ *		cache already, it's filled with zeros instead of reading it from
+ *		disk. The caller is expected to overwrite the whole buffer,
+ *		so that the current page contents are not interesting.
+ */
+Buffer
+ReadOrZeroBuffer(Relation reln, BlockNumber blockNum)
+{
+	return ReadBuffer_common(reln, blockNum, true);
+}
+
+/*
+ * ReadBuffer_common -- common logic for ReadBuffer and ReadOrZeroBuffer
+ */
+static Buffer
+ReadBuffer_common(Relation reln, BlockNumber blockNum, bool zeroPage)
 {
 	volatile BufferDesc *bufHdr;
 	Block		bufBlock;
@@ -253,17 +282,18 @@ ReadBuffer(Relation reln, BlockNumber blockNum)
 	}
 	else
 	{
-		smgrread(reln->rd_smgr, blockNum, (char *) bufBlock);
+		/* 
+		 * Read in the page, unless the caller intends to overwrite it
+		 * and just wants us to allocate a buffer.
+		 */
+		if (zeroPage)
+			MemSet((char *) bufBlock, 0, BLCKSZ);
+		else
+			smgrread(reln->rd_smgr, blockNum, (char *) bufBlock);
 		/* check for garbage data */
 		if (!PageHeaderIsValid((PageHeader) bufBlock))
 		{
-			/*
-			 * During WAL recovery, the first access to any data page should
-			 * overwrite the whole page from the WAL; so a clobbered page
-			 * header is not reason to fail.  Hence, when InRecovery we may
-			 * always act as though zero_damaged_pages is ON.
-			 */
-			if (zero_damaged_pages || InRecovery)
+			if (zero_damaged_pages)
 			{
 				ereport(WARNING,
 						(errcode(ERRCODE_DATA_CORRUPTED),