Revert "Skip WAL for new relfilenodes, under wal_level=minimal."

This reverts commit cb2fd7eac2. Per numerous buildfarm members, it was incompatible with parallel query, and a test case assumed LP64. Back-patch to 9.5 (all supported versions). Discussion: https://postgr.es/m/20200321224920.GB1763544@rfd.leadboat.com
2025-09-02 04:21:28 +03:00 · 2020-03-22 09:24:09 -07:00
parent d0587f52b3
commit de9396326e
51 changed files with 362 additions and 1438 deletions
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -66,7 +66,7 @@
 #define BUF_WRITTEN				0x01
 #define BUF_REUSABLE			0x02

-#define RELS_BSEARCH_THRESHOLD		20
+#define DROP_RELS_BSEARCH_THRESHOLD		20

 typedef struct PrivateRefCountEntry
 {
@@ -105,19 +105,6 @@ typedef struct CkptTsStatus
 	int			index;
 } CkptTsStatus;

-/*
- * Type for array used to sort SMgrRelations
- *
- * FlushRelationsAllBuffers shares the same comparator function with
- * DropRelFileNodesAllBuffers. Pointer to this struct and RelFileNode must be
- * compatible.
- */
-typedef struct SMgrSortArray
-{
-	RelFileNode rnode;			/* This must be the first member */
-	SMgrRelation srel;
-} SMgrSortArray;
-
 /* GUC variables */
 bool		zero_damaged_pages = false;
 int			bgwriter_lru_maxpages = 100;
@@ -3003,7 +2990,7 @@ DropRelFileNodesAllBuffers(RelFileNodeBackend *rnodes, int nnodes)
 	 * an exactly determined value, as it depends on many factors (CPU and RAM
 	 * speeds, amount of shared buffers etc.).
 	 */
-	use_bsearch = n > RELS_BSEARCH_THRESHOLD;
+	use_bsearch = n > DROP_RELS_BSEARCH_THRESHOLD;

 	/* sort the list of rnodes if necessary */
 	if (use_bsearch)
@@ -3253,104 +3240,6 @@ FlushRelationBuffers(Relation rel)
 	}
 }

-/* ---------------------------------------------------------------------
- *		FlushRelationsAllBuffers
- *
- *		This function flushes out of the buffer pool all the pages of all
- *		forks of the specified smgr relations.  It's equivalent to calling
- *		FlushRelationBuffers once per fork per relation.  The relations are
- *		assumed not to use local buffers.
- * --------------------------------------------------------------------
- */
-void
-FlushRelationsAllBuffers(SMgrRelation *smgrs, int nrels)
-{
-	int			i;
-	SMgrSortArray *srels;
-	bool		use_bsearch;
-
-	if (nrels == 0)
-		return;
-
-	/* fill-in array for qsort */
-	srels = palloc(sizeof(SMgrSortArray) * nrels);
-
-	for (i = 0; i < nrels; i++)
-	{
-		Assert(!RelFileNodeBackendIsTemp(smgrs[i]->smgr_rnode));
-
-		srels[i].rnode = smgrs[i]->smgr_rnode.node;
-		srels[i].srel = smgrs[i];
-	}
-
-	/*
-	 * Save the bsearch overhead for low number of relations to sync. See
-	 * DropRelFileNodesAllBuffers for details.
-	 */
-	use_bsearch = nrels > RELS_BSEARCH_THRESHOLD;
-
-	/* sort the list of SMgrRelations if necessary */
-	if (use_bsearch)
-		pg_qsort(srels, nrels, sizeof(SMgrSortArray), rnode_comparator);
-
-	/* Make sure we can handle the pin inside the loop */
-	ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
-
-	for (i = 0; i < NBuffers; i++)
-	{
-		SMgrSortArray *srelent = NULL;
-		BufferDesc *bufHdr = GetBufferDescriptor(i);
-		uint32		buf_state;
-
-		/*
-		 * As in DropRelFileNodeBuffers, an unlocked precheck should be safe
-		 * and saves some cycles.
-		 */
-
-		if (!use_bsearch)
-		{
-			int			j;
-
-			for (j = 0; j < nrels; j++)
-			{
-				if (RelFileNodeEquals(bufHdr->tag.rnode, srels[j].rnode))
-				{
-					srelent = &srels[j];
-					break;
-				}
-			}
-
-		}
-		else
-		{
-			srelent = bsearch((const void *) &(bufHdr->tag.rnode),
-							  srels, nrels, sizeof(SMgrSortArray),
-							  rnode_comparator);
-		}
-
-		/* buffer doesn't belong to any of the given relfilenodes; skip it */
-		if (srelent == NULL)
-			continue;
-
-		ReservePrivateRefCountEntry();
-
-		buf_state = LockBufHdr(bufHdr);
-		if (RelFileNodeEquals(bufHdr->tag.rnode, srelent->rnode) &&
-			(buf_state & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
-		{
-			PinBuffer_Locked(bufHdr);
-			LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED);
-			FlushBuffer(bufHdr, srelent->srel);
-			LWLockRelease(BufferDescriptorGetContentLock(bufHdr));
-			UnpinBuffer(bufHdr, true);
-		}
-		else
-			UnlockBufHdr(bufHdr, buf_state);
-	}
-
-	pfree(srels);
-}
-
 /* ---------------------------------------------------------------------
 *		FlushDatabaseBuffers
 *
@@ -3552,15 +3441,13 @@ MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
 			(pg_atomic_read_u32(&bufHdr->state) & BM_PERMANENT))
 		{
 			/*
-			 * If we must not write WAL, due to a relfilenode-specific
-			 * condition or being in recovery, don't dirty the page.  We can
-			 * set the hint, just not dirty the page as a result so the hint
-			 * is lost when we evict the page or shutdown.
+			 * If we're in recovery we cannot dirty a page because of a hint.
+			 * We can set the hint, just not dirty the page as a result so the
+			 * hint is lost when we evict the page or shutdown.
 			 *
 			 * See src/backend/storage/page/README for longer discussion.
 			 */
-			if (RecoveryInProgress() ||
-				RelFileNodeSkippingWAL(bufHdr->tag.rnode))
+			if (RecoveryInProgress())
 				return;

 			/*
--- a/src/backend/storage/lmgr/lock.c
+++ b/src/backend/storage/lmgr/lock.c
@@ -614,18 +614,6 @@ LockHeldByMe(const LOCKTAG *locktag, LOCKMODE lockmode)
 	return (locallock && locallock->nLocks > 0);
 }

-#ifdef USE_ASSERT_CHECKING
-/*
- * GetLockMethodLocalHash -- return the hash of local locks, for modules that
- *		evaluate assertions based on all locks held.
- */
-HTAB *
-GetLockMethodLocalHash(void)
-{
-	return LockMethodLocalHash;
-}
-#endif
-
 /*
 * LockHasWaiters -- look up 'locktag' and check if releasing this
 *		lock would wake up other processes waiting for it.
--- a/src/backend/storage/smgr/md.c
+++ b/src/backend/storage/smgr/md.c
@@ -248,10 +248,11 @@ mdcreate(SMgrRelation reln, ForkNumber forkNum, bool isRedo)
 * During replay, we would delete the file and then recreate it, which is fine
 * if the contents of the file were repopulated by subsequent WAL entries.
 * But if we didn't WAL-log insertions, but instead relied on fsyncing the
- * file after populating it (as we do at wal_level=minimal), the contents of
- * the file would be lost forever.  By leaving the empty file until after the
- * next checkpoint, we prevent reassignment of the relfilenode number until
- * it's safe, because relfilenode assignment skips over any existing file.
+ * file after populating it (as for instance CLUSTER and CREATE INDEX do),
+ * the contents of the file would be lost forever.  By leaving the empty file
+ * until after the next checkpoint, we prevent reassignment of the relfilenode
+ * number until it's safe, because relfilenode assignment skips over any
+ * existing file.
 *
 * We do not need to go through this dance for temp relations, though, because
 * we never make WAL entries for temp rels, and so a temp rel poses no threat
@@ -876,18 +877,12 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
 *	mdimmedsync() -- Immediately sync a relation to stable storage.
 *
 * Note that only writes already issued are synced; this routine knows
- * nothing of dirty buffers that may exist inside the buffer manager.  We
- * sync active and inactive segments; smgrDoPendingSyncs() relies on this.
- * Consider a relation skipping WAL.  Suppose a checkpoint syncs blocks of
- * some segment, then mdtruncate() renders that segment inactive.  If we
- * crash before the next checkpoint syncs the newly-inactive segment, that
- * segment may survive recovery, reintroducing unwanted data into the table.
+ * nothing of dirty buffers that may exist inside the buffer manager.
 */
 void
 mdimmedsync(SMgrRelation reln, ForkNumber forknum)
 {
 	int			segno;
-	int			min_inactive_seg;

 	/*
 	 * NOTE: mdnblocks makes sure we have opened all active segments, so that
@@ -895,16 +890,7 @@ mdimmedsync(SMgrRelation reln, ForkNumber forknum)
 	 */
 	mdnblocks(reln, forknum);

-	min_inactive_seg = segno = reln->md_num_open_segs[forknum];
-
-	/*
-	 * Temporarily open inactive segments, then close them after sync.  There
-	 * may be some inactive segments left opened after fsync() error, but that
-	 * is harmless.  We don't bother to clean them up and take a risk of
-	 * further trouble.  The next mdclose() will soon close them.
-	 */
-	while (_mdfd_openseg(reln, forknum, segno, 0) != NULL)
-		segno++;
+	segno = reln->md_num_open_segs[forknum];

 	while (segno > 0)
 	{
@@ -915,14 +901,6 @@ mdimmedsync(SMgrRelation reln, ForkNumber forknum)
 					(errcode_for_file_access(),
 					 errmsg("could not fsync file \"%s\": %m",
 							FilePathName(v->mdfd_vfd))));
-
-		/* Close inactive segments immediately */
-		if (segno > min_inactive_seg)
-		{
-			FileClose(v->mdfd_vfd);
-			_fdvec_resize(reln, forknum, segno - 1);
-		}
-
 		segno--;
 	}
 }
--- a/src/backend/storage/smgr/smgr.c
+++ b/src/backend/storage/smgr/smgr.c
@@ -388,41 +388,6 @@ smgrdounlink(SMgrRelation reln, bool isRedo)
 	smgrsw[which].smgr_unlink(rnode, InvalidForkNumber, isRedo);
 }

-/*
- *	smgrdosyncall() -- Immediately sync all forks of all given relations
- *
- *		All forks of all given relations are synced out to the store.
- *
- *		This is equivalent to FlushRelationBuffers() for each smgr relation,
- *		then calling smgrimmedsync() for all forks of each relation, but it's
- *		significantly quicker so should be preferred when possible.
- */
-void
-smgrdosyncall(SMgrRelation *rels, int nrels)
-{
-	int			i = 0;
-	ForkNumber	forknum;
-
-	if (nrels == 0)
-		return;
-
-	FlushRelationsAllBuffers(rels, nrels);
-
-	/*
-	 * Sync the physical file(s).
-	 */
-	for (i = 0; i < nrels; i++)
-	{
-		int			which = rels[i]->smgr_which;
-
-		for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
-		{
-			if (smgrsw[which].smgr_exists(rels[i], forknum))
-				smgrsw[which].smgr_immedsync(rels[i], forknum);
-		}
-	}
-}
-
 /*
 *	smgrdounlinkall() -- Immediately unlink all forks of all given relations
 *