Refactor XLogOpenRelation() and XLogReadBuffer() in preparation for relation

forks. XLogOpenRelation() and the associated light-weight relation cache in xlogutils.c is gone, and XLogReadBuffer() now takes a RelFileNode as argument, instead of Relation. For functions that still need a Relation struct during WAL replay, there's a new function called CreateFakeRelcacheEntry() that returns a fake entry like XLogOpenRelation() used to.
2025-07-14 08:21:07 +03:00 · 2008-06-12 09:12:31 +00:00
parent c4f2a0458d
commit a213f1ee6c
20 changed files with 295 additions and 427 deletions
--- a/src/backend/access/transam/xlogutils.c
+++ b/src/backend/access/transam/xlogutils.c
@ -11,7 +11,7 @@
 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.54 2008/06/08 22:00:47 alvherre Exp $
+ * $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.55 2008/06/12 09:12:30 heikki Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -190,6 +190,9 @@ XLogCheckInvalidPages(void)

 	if (foundone)
 		elog(PANIC, "WAL contains references to invalid pages");
+
+	hash_destroy(invalid_page_tab);
+	invalid_page_tab = NULL;
 }


@ -218,27 +221,40 @@ XLogCheckInvalidPages(void)
 * at the end of WAL replay.)
 */
 Buffer
-XLogReadBuffer(Relation reln, BlockNumber blkno, bool init)
+XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init)
 {
-	BlockNumber lastblock = RelationGetNumberOfBlocks(reln);
+	BlockNumber lastblock;
 	Buffer		buffer;
+	SMgrRelation smgr;

 	Assert(blkno != P_NEW);

+	/* Open the relation at smgr level */
+	smgr = smgropen(rnode);
+
+	/*
+	 * Create the target file if it doesn't already exist.  This lets us cope
+	 * if the replay sequence contains writes to a relation that is later
+	 * deleted.  (The original coding of this routine would instead suppress
+	 * the writes, but that seems like it risks losing valuable data if the
+	 * filesystem loses an inode during a crash.  Better to write the data
+	 * until we are actually told to delete the file.)
+	 */
+	smgrcreate(smgr, false, true);
+
+	lastblock = smgrnblocks(smgr);
+
 	if (blkno < lastblock)
 	{
 		/* page exists in file */
-		if (init)
-			buffer = ReadOrZeroBuffer(reln, blkno);
-		else
-			buffer = ReadBuffer(reln, blkno);
+		buffer = ReadBufferWithoutRelcache(rnode, false, blkno, init);
 	}
 	else
 	{
 		/* hm, page doesn't exist in file */
 		if (!init)
 		{
-			log_invalid_page(reln->rd_node, blkno, false);
+			log_invalid_page(rnode, blkno, false);
 			return InvalidBuffer;
 		}
 		/* OK to extend the file */
@ -249,7 +265,7 @@ XLogReadBuffer(Relation reln, BlockNumber blkno, bool init)
 		{
 			if (buffer != InvalidBuffer)
 				ReleaseBuffer(buffer);
-			buffer = ReadBuffer(reln, P_NEW);
+			buffer = ReadBufferWithoutRelcache(rnode, false, P_NEW, false);
 			lastblock++;
 		}
 		Assert(BufferGetBlockNumber(buffer) == blkno);
@ -265,7 +281,7 @@ XLogReadBuffer(Relation reln, BlockNumber blkno, bool init)
 		if (PageIsNew((PageHeader) page))
 		{
 			UnlockReleaseBuffer(buffer);
-			log_invalid_page(reln->rd_node, blkno, true);
+			log_invalid_page(rnode, blkno, true);
 			return InvalidBuffer;
 		}
 	}
@ -275,226 +291,81 @@ XLogReadBuffer(Relation reln, BlockNumber blkno, bool init)


 /*
- * Lightweight "Relation" cache --- this substitutes for the normal relcache
- * during XLOG replay.
+ * Struct actually returned by XLogFakeRelcacheEntry, though the declared
+ * return type is Relation.
 */
-
-typedef struct XLogRelDesc
+typedef struct
 {
-	RelationData reldata;
-	struct XLogRelDesc *lessRecently;
-	struct XLogRelDesc *moreRecently;
-} XLogRelDesc;
+	RelationData		reldata;	/* Note: this must be first */
+	FormData_pg_class	pgc;
+} FakeRelCacheEntryData;

-typedef struct XLogRelCacheEntry
+typedef FakeRelCacheEntryData *FakeRelCacheEntry;
+
+/*
+ * Create a fake relation cache entry for a physical relation
+ *
+ * It's often convenient to use the same functions in XLOG replay as in the
+ * main codepath, but those functions typically work with a relcache entry. 
+ * We don't have a working relation cache during XLOG replay, but this 
+ * function can be used to create a fake relcache entry instead. Only the 
+ * fields related to physical storage, like rd_rel, are initialized, so the 
+ * fake entry is only usable in low-level operations like ReadBuffer().
+ *
+ * Caller must free the returned entry with FreeFakeRelcacheEntry().
+ */
+Relation
+CreateFakeRelcacheEntry(RelFileNode rnode)
 {
-	RelFileNode rnode;
-	XLogRelDesc *rdesc;
-} XLogRelCacheEntry;
+	FakeRelCacheEntry fakeentry;
+	Relation rel;

-static HTAB *_xlrelcache;
-static XLogRelDesc *_xlrelarr = NULL;
-static Form_pg_class _xlpgcarr = NULL;
-static int	_xlast = 0;
-static int	_xlcnt = 0;
+	/* Allocate the Relation struct and all related space in one block. */
+	fakeentry = palloc0(sizeof(FakeRelCacheEntryData));
+	rel = (Relation) fakeentry;

-#define _XLOG_RELCACHESIZE	512
+	rel->rd_rel = &fakeentry->pgc;
+	rel->rd_node = rnode;

-static void
-_xl_init_rel_cache(void)
-{
-	HASHCTL		ctl;
+	/* We don't know the name of the relation; use relfilenode instead */
+	sprintf(RelationGetRelationName(rel), "%u", rnode.relNode);

-	_xlcnt = _XLOG_RELCACHESIZE;
-	_xlast = 0;
-	_xlrelarr = (XLogRelDesc *) malloc(sizeof(XLogRelDesc) * _xlcnt);
-	memset(_xlrelarr, 0, sizeof(XLogRelDesc) * _xlcnt);
-	_xlpgcarr = (Form_pg_class) malloc(sizeof(FormData_pg_class) * _xlcnt);
-	memset(_xlpgcarr, 0, sizeof(FormData_pg_class) * _xlcnt);
+	/*
+	 * We set up the lockRelId in case anything tries to lock the dummy
+	 * relation.  Note that this is fairly bogus since relNode may be
+	 * different from the relation's OID.  It shouldn't really matter
+	 * though, since we are presumably running by ourselves and can't have
+	 * any lock conflicts ...
+	 */
+	rel->rd_lockInfo.lockRelId.dbId = rnode.dbNode;
+	rel->rd_lockInfo.lockRelId.relId = rnode.relNode;

-	_xlrelarr[0].moreRecently = &(_xlrelarr[0]);
-	_xlrelarr[0].lessRecently = &(_xlrelarr[0]);
+	rel->rd_targblock = InvalidBlockNumber;
+	rel->rd_smgr = NULL;

-	memset(&ctl, 0, sizeof(ctl));
-	ctl.keysize = sizeof(RelFileNode);
-	ctl.entrysize = sizeof(XLogRelCacheEntry);
-	ctl.hash = tag_hash;
-
-	_xlrelcache = hash_create("XLOG relcache", _XLOG_RELCACHESIZE,
-							  &ctl, HASH_ELEM | HASH_FUNCTION);
-}
-
-static void
-_xl_remove_hash_entry(XLogRelDesc *rdesc)
-{
-	Form_pg_class tpgc = rdesc->reldata.rd_rel;
-	XLogRelCacheEntry *hentry;
-
-	rdesc->lessRecently->moreRecently = rdesc->moreRecently;
-	rdesc->moreRecently->lessRecently = rdesc->lessRecently;
-
-	hentry = (XLogRelCacheEntry *) hash_search(_xlrelcache,
-					  (void *) &(rdesc->reldata.rd_node), HASH_REMOVE, NULL);
-	if (hentry == NULL)
-		elog(PANIC, "_xl_remove_hash_entry: file was not found in cache");
-
-	RelationCloseSmgr(&(rdesc->reldata));
-
-	memset(rdesc, 0, sizeof(XLogRelDesc));
-	memset(tpgc, 0, sizeof(FormData_pg_class));
-	rdesc->reldata.rd_rel = tpgc;
-}
-
-static XLogRelDesc *
-_xl_new_reldesc(void)
-{
-	XLogRelDesc *res;
-
-	_xlast++;
-	if (_xlast < _xlcnt)
-	{
-		_xlrelarr[_xlast].reldata.rd_rel = &(_xlpgcarr[_xlast]);
-		return &(_xlrelarr[_xlast]);
-	}
-
-	/* reuse */
-	res = _xlrelarr[0].moreRecently;
-
-	_xl_remove_hash_entry(res);
-
-	_xlast--;
-	return res;
-}
-
-
-void
-XLogInitRelationCache(void)
-{
-	_xl_init_rel_cache();
-	invalid_page_tab = NULL;
-}
-
-void
-XLogCloseRelationCache(void)
-{
-	HASH_SEQ_STATUS status;
-	XLogRelCacheEntry *hentry;
-
-	if (!_xlrelarr)
-		return;
-
-	hash_seq_init(&status, _xlrelcache);
-
-	while ((hentry = (XLogRelCacheEntry *) hash_seq_search(&status)) != NULL)
-		_xl_remove_hash_entry(hentry->rdesc);
-
-	hash_destroy(_xlrelcache);
-
-	free(_xlrelarr);
-	free(_xlpgcarr);
-
-	_xlrelarr = NULL;
+	return rel;
 }

 /*
- * Open a relation during XLOG replay
- *
- * Note: this once had an API that allowed NULL return on failure, but it
- * no longer does; any failure results in elog().
+ * Free a fake relation cache entry.
 */
-Relation
-XLogOpenRelation(RelFileNode rnode)
+void
+FreeFakeRelcacheEntry(Relation fakerel)
 {
-	XLogRelDesc *res;
-	XLogRelCacheEntry *hentry;
-	bool		found;
-
-	hentry = (XLogRelCacheEntry *)
-		hash_search(_xlrelcache, (void *) &rnode, HASH_FIND, NULL);
-
-	if (hentry)
-	{
-		res = hentry->rdesc;
-
-		res->lessRecently->moreRecently = res->moreRecently;
-		res->moreRecently->lessRecently = res->lessRecently;
-	}
-	else
-	{
-		res = _xl_new_reldesc();
-
-		sprintf(RelationGetRelationName(&(res->reldata)), "%u", rnode.relNode);
-
-		res->reldata.rd_node = rnode;
-
-		/*
-		 * We set up the lockRelId in case anything tries to lock the dummy
-		 * relation.  Note that this is fairly bogus since relNode may be
-		 * different from the relation's OID.  It shouldn't really matter
-		 * though, since we are presumably running by ourselves and can't have
-		 * any lock conflicts ...
-		 */
-		res->reldata.rd_lockInfo.lockRelId.dbId = rnode.dbNode;
-		res->reldata.rd_lockInfo.lockRelId.relId = rnode.relNode;
-
-		hentry = (XLogRelCacheEntry *)
-			hash_search(_xlrelcache, (void *) &rnode, HASH_ENTER, &found);
-
-		if (found)
-			elog(PANIC, "xlog relation already present on insert into cache");
-
-		hentry->rdesc = res;
-
-		res->reldata.rd_targblock = InvalidBlockNumber;
-		res->reldata.rd_smgr = NULL;
-		RelationOpenSmgr(&(res->reldata));
-
-		/*
-		 * Create the target file if it doesn't already exist.  This lets us
-		 * cope if the replay sequence contains writes to a relation that is
-		 * later deleted.  (The original coding of this routine would instead
-		 * return NULL, causing the writes to be suppressed. But that seems
-		 * like it risks losing valuable data if the filesystem loses an inode
-		 * during a crash.	Better to write the data until we are actually
-		 * told to delete the file.)
-		 */
-		smgrcreate(res->reldata.rd_smgr, res->reldata.rd_istemp, true);
-	}
-
-	res->moreRecently = &(_xlrelarr[0]);
-	res->lessRecently = _xlrelarr[0].lessRecently;
-	_xlrelarr[0].lessRecently = res;
-	res->lessRecently->moreRecently = res;
-
-	return &(res->reldata);
+	pfree(fakerel);
 }

 /*
 * Drop a relation during XLOG replay
 *
- * This is called when the relation is about to be deleted; we need to ensure
- * that there is no dangling smgr reference in the xlog relation cache.
- *
- * Currently, we don't bother to physically remove the relation from the
- * cache, we just let it age out normally.
- *
- * This also takes care of removing any open "invalid-page" records for
- * the relation.
+ * This is called when the relation is about to be deleted; we need to remove
+ * any open "invalid-page" records for the relation.
 */
 void
 XLogDropRelation(RelFileNode rnode)
 {
-	XLogRelCacheEntry *hentry;
-
-	hentry = (XLogRelCacheEntry *)
-		hash_search(_xlrelcache, (void *) &rnode, HASH_FIND, NULL);
-
-	if (hentry)
-	{
-		XLogRelDesc *rdesc = hentry->rdesc;
-
-		RelationCloseSmgr(&(rdesc->reldata));
-	}
+	/* Tell smgr to forget about this relation as well */
+	smgrclosenode(rnode);

 	forget_invalid_pages(rnode, 0);
 }
@ -507,18 +378,14 @@ XLogDropRelation(RelFileNode rnode)
 void
 XLogDropDatabase(Oid dbid)
 {
-	HASH_SEQ_STATUS status;
-	XLogRelCacheEntry *hentry;
-
-	hash_seq_init(&status, _xlrelcache);
-
-	while ((hentry = (XLogRelCacheEntry *) hash_seq_search(&status)) != NULL)
-	{
-		XLogRelDesc *rdesc = hentry->rdesc;
-
-		if (hentry->rnode.dbNode == dbid)
-			RelationCloseSmgr(&(rdesc->reldata));
-	}
+	/*
+	 * This is unnecessarily heavy-handed, as it will close SMgrRelation
+	 * objects for other databases as well. DROP DATABASE occurs seldom
+	 * enough that it's not worth introducing a variant of smgrclose for
+	 * just this purpose. XXX: Or should we rather leave the smgr entries
+	 * dangling?
+	 */
+	smgrcloseall();

 	forget_invalid_pages_db(dbid);
 }
@ -526,8 +393,7 @@ XLogDropDatabase(Oid dbid)
 /*
 * Truncate a relation during XLOG replay
 *
- * We don't need to do anything to the fake relcache, but we do need to
- * clean up any open "invalid-page" records for the dropped pages.
+ * We need to clean up any open "invalid-page" records for the dropped pages.
 */
 void
 XLogTruncateRelation(RelFileNode rnode, BlockNumber nblocks)