Fix aboriginal mistake in lazy VACUUM's code for truncating away

no-longer-needed pages at the end of a table. We thought we could throw away pages containing HEAPTUPLE_DEAD tuples; but this is not so, because such tuples very likely have index entries pointing at them, and we wouldn't have removed the index entries. The problem only emerges in a somewhat unlikely race condition: the dead tuples have to have been inserted by a transaction that later aborted, and this has to have happened between VACUUM's initial scan of the page and then rechecking it for empty in count_nondeletable_pages. But that timespan will include an index-cleaning pass, so it's not all that hard to hit. This seems to explain a couple of previously unsolved bug reports.
2025-08-18 12:22:09 +03:00 · 2007-09-16 02:38:14 +00:00
parent b2f6211d19
commit 60e3aafbbf
1 changed files with 10 additions and 42 deletions
--- a/src/backend/commands/vacuumlazy.c
+++ b/src/backend/commands/vacuumlazy.c
@@ -31,7 +31,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.50.4.4 2007/09/12 02:05:53 alvherre Exp $
+ *	  $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.50.4.5 2007/09/16 02:38:14 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -842,7 +842,7 @@ lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats)
 }
 /*
- * Rescan end pages to verify that they are (still) empty of needed tuples.
+ * Rescan end pages to verify that they are (still) empty of tuples.
 *
 * Returns number of nondeletable pages (last nonempty page + 1).
 */
@@ -850,7 +850,6 @@ static BlockNumber
 count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats)
 {
 	BlockNumber blkno;
 	HeapTupleData tuple;
 	/* Strange coding of loop control is needed because blkno is unsigned */
 	blkno = vacrelstats->rel_pages;
@@ -860,8 +859,7 @@ count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats)
 		Page		page;
 		OffsetNumber offnum,
 					maxoff;
-		bool		tupgone,
+		bool		hastup;
 					hastup;
 		/*
 		 * We don't insert a vacuum delay point here, because we have an
@@ -898,43 +896,13 @@ count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats)
 			itemid = PageGetItemId(page, offnum);
 			if (!ItemIdIsUsed(itemid))
 				continue;
 			tuple.t_datamcxt = NULL;
 			tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
 			tuple.t_len = ItemIdGetLength(itemid);
 			ItemPointerSet(&(tuple.t_self), blkno, offnum);
 			tupgone = false;
 			switch (HeapTupleSatisfiesVacuum(tuple.t_data, OldestXmin, buf))
 			{
 				case HEAPTUPLE_DEAD:
 					tupgone = true;		/* we can delete the tuple */
 					break;
 				case HEAPTUPLE_LIVE:
 					/* Shouldn't be necessary to re-freeze anything */
 					break;
 				case HEAPTUPLE_RECENTLY_DEAD:
 			/*
-					 * If tuple is recently deleted then we must not
+			 * Note: any non-unused item should be taken as a reason to keep
-					 * remove it from relation.
+			 * this page.  We formerly thought that DEAD tuples could be
 			 * thrown away, but that's not so, because we'd not have cleaned
 			 * out their index entries.
 			 */
-					break;
+			if (ItemIdIsUsed(itemid))
 				case HEAPTUPLE_INSERT_IN_PROGRESS:
 					/* This is an expected case during concurrent vacuum */
 					break;
 				case HEAPTUPLE_DELETE_IN_PROGRESS:
 					/* This is an expected case during concurrent vacuum */
 					break;
 				default:
 					elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
 					break;
 			}
 			if (!tupgone)
 			{
 				hastup = true;
 				break;			/* can stop scanning */