Port single-page btree vacuum logic to hash indexes.

This is advantageous for hash indexes for the same reasons it's good for btrees: it accelerates space recycling, reducing bloat. Ashutosh Sharma, reviewed by Amit Kapila and by me. A bit of additional hacking by me. Discussion: http://postgr.es/m/CAE9k0PkRSyzx8dOnokEpUi2A-RFZK72WN0h9DEMv_ut9q6bPRw@mail.gmail.com
2025-11-12 05:01:15 +03:00 · 2017-03-15 22:18:56 -04:00
parent 2038bf41c9
commit 6977b8b7f4
10 changed files with 517 additions and 17 deletions
--- a/src/backend/access/hash/hashinsert.c
+++ b/src/backend/access/hash/hashinsert.c
@@ -17,9 +17,14 @@

 #include "access/hash.h"
 #include "access/hash_xlog.h"
+#include "access/heapam.h"
 #include "miscadmin.h"
 #include "utils/rel.h"
+#include "storage/lwlock.h"
+#include "storage/buf_internals.h"

+static void _hash_vacuum_one_page(Relation rel, Buffer metabuf, Buffer buf,
+								  RelFileNode hnode);

 /*
 *	_hash_doinsert() -- Handle insertion of a single index tuple.
@@ -28,7 +33,7 @@
 *		and hashinsert.  By here, itup is completely filled in.
 */
 void
-_hash_doinsert(Relation rel, IndexTuple itup)
+_hash_doinsert(Relation rel, IndexTuple itup, Relation heapRel)
 {
 	Buffer		buf = InvalidBuffer;
 	Buffer		bucket_buf;
@@ -118,10 +123,30 @@ restart_insert:
 	/* Do the insertion */
 	while (PageGetFreeSpace(page) < itemsz)
 	{
+		BlockNumber nextblkno;
+
+		/*
+		 * Check if current page has any DEAD tuples. If yes,
+		 * delete these tuples and see if we can get a space for
+		 * the new item to be inserted before moving to the next
+		 * page in the bucket chain.
+		 */
+		if (H_HAS_DEAD_TUPLES(pageopaque))
+		{
+
+			if (IsBufferCleanupOK(buf))
+			{
+				_hash_vacuum_one_page(rel, metabuf, buf, heapRel->rd_node);
+
+				if (PageGetFreeSpace(page) >= itemsz)
+					break;				/* OK, now we have enough space */
+			}
+		}
+
 		/*
 		 * no space on this page; check for an overflow page
 		 */
-		BlockNumber nextblkno = pageopaque->hasho_nextblkno;
+		nextblkno = pageopaque->hasho_nextblkno;

 		if (BlockNumberIsValid(nextblkno))
 		{
@@ -157,7 +182,7 @@ restart_insert:
 			Assert(PageGetFreeSpace(page) >= itemsz);
 		}
 		pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
-		Assert(pageopaque->hasho_flag == LH_OVERFLOW_PAGE);
+		Assert((pageopaque->hasho_flag & LH_PAGE_TYPE) == LH_OVERFLOW_PAGE);
 		Assert(pageopaque->hasho_bucket == bucket);
 	}

@@ -300,3 +325,93 @@ _hash_pgaddmultitup(Relation rel, Buffer buf, IndexTuple *itups,
 				 RelationGetRelationName(rel));
 	}
 }
+
+/*
+ * _hash_vacuum_one_page - vacuum just one index page.
+ *
+ * Try to remove LP_DEAD items from the given page. We must acquire cleanup
+ * lock on the page being modified before calling this function.
+ */
+
+static void
+_hash_vacuum_one_page(Relation rel, Buffer metabuf, Buffer buf,
+					  RelFileNode hnode)
+{
+	OffsetNumber	deletable[MaxOffsetNumber];
+	int ndeletable = 0;
+	OffsetNumber offnum,
+				 maxoff;
+	Page	page = BufferGetPage(buf);
+	HashPageOpaque	pageopaque;
+	HashMetaPage	metap;
+	double tuples_removed = 0;
+
+	/* Scan each tuple in page to see if it is marked as LP_DEAD */
+	maxoff = PageGetMaxOffsetNumber(page);
+	for (offnum = FirstOffsetNumber;
+		 offnum <= maxoff;
+		 offnum = OffsetNumberNext(offnum))
+	{
+		ItemId	itemId = PageGetItemId(page, offnum);
+
+		if (ItemIdIsDead(itemId))
+		{
+			deletable[ndeletable++] = offnum;
+			tuples_removed += 1;
+		}
+	}
+
+	if (ndeletable > 0)
+	{
+		/*
+		 * Write-lock the meta page so that we can decrement
+		 * tuple count.
+		 */
+		LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
+
+		/* No ereport(ERROR) until changes are logged */
+		START_CRIT_SECTION();
+
+		PageIndexMultiDelete(page, deletable, ndeletable);
+
+		pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
+		pageopaque->hasho_flag &= ~LH_PAGE_HAS_DEAD_TUPLES;
+
+		metap = HashPageGetMeta(BufferGetPage(metabuf));
+		metap->hashm_ntuples -= tuples_removed;
+
+		MarkBufferDirty(buf);
+		MarkBufferDirty(metabuf);
+
+		/* XLOG stuff */
+		if (RelationNeedsWAL(rel))
+		{
+			xl_hash_vacuum_one_page	xlrec;
+			XLogRecPtr	recptr;
+
+			xlrec.hnode = hnode;
+			xlrec.ntuples = tuples_removed;
+
+			XLogBeginInsert();
+			XLogRegisterData((char *) &xlrec, SizeOfHashVacuumOnePage);
+
+			XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
+			XLogRegisterBufData(0, (char *) deletable,
+						ndeletable * sizeof(OffsetNumber));
+
+			XLogRegisterBuffer(1, metabuf, REGBUF_STANDARD);
+
+			recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_VACUUM_ONE_PAGE);
+
+			PageSetLSN(BufferGetPage(buf), recptr);
+			PageSetLSN(BufferGetPage(metabuf), recptr);
+		}
+
+		END_CRIT_SECTION();
+		/*
+		 * Releasing write lock on meta page as we have updated
+		 * the tuple count.
+		 */
+		LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
+	}
+}