diff --git a/src/backend/access/gin/ginvacuum.c b/src/backend/access/gin/ginvacuum.c index a0453b36cde..a276eb020b5 100644 --- a/src/backend/access/gin/ginvacuum.c +++ b/src/backend/access/gin/ginvacuum.c @@ -231,6 +231,7 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn END_CRIT_SECTION(); + gvs->result->pages_newly_deleted++; gvs->result->pages_deleted++; } diff --git a/src/backend/access/gist/gistvacuum.c b/src/backend/access/gist/gistvacuum.c index ddecb8ab18e..0663193531a 100644 --- a/src/backend/access/gist/gistvacuum.c +++ b/src/backend/access/gist/gistvacuum.c @@ -133,9 +133,21 @@ gistvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, MemoryContext oldctx; /* - * Reset counts that will be incremented during the scan; needed in case - * of multiple scans during a single VACUUM command. + * Reset fields that track information about the entire index now. This + * avoids double-counting in the case where a single VACUUM command + * requires multiple scans of the index. + * + * Avoid resetting the tuples_removed and pages_newly_deleted fields here, + * since they track information about the VACUUM command, and so must last + * across each call to gistvacuumscan(). + * + * (Note that pages_free is treated as state about the whole index, not + * the current VACUUM. This is appropriate because RecordFreeIndexPage() + * calls are idempotent, and get repeated for the same deleted pages in + * some scenarios. The point for us is to track the number of recyclable + * pages in the index at the end of the VACUUM command.) */ + stats->num_pages = 0; stats->estimated_count = false; stats->num_index_tuples = 0; stats->pages_deleted = 0; @@ -281,8 +293,8 @@ restart: { /* Okay to recycle this page */ RecordFreeIndexPage(rel, blkno); - vstate->stats->pages_free++; vstate->stats->pages_deleted++; + vstate->stats->pages_free++; } else if (GistPageIsDeleted(page)) { @@ -636,6 +648,7 @@ gistdeletepage(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, /* mark the page as deleted */ MarkBufferDirty(leafBuffer); GistPageSetDeleted(leafPage, txid); + stats->pages_newly_deleted++; stats->pages_deleted++; /* remove the downlink from the parent */ diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index 0bb78162f54..d8f847b0e66 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -2521,9 +2521,11 @@ lazy_cleanup_index(Relation indrel, (*stats)->num_index_tuples, (*stats)->num_pages), errdetail("%.0f index row versions were removed.\n" - "%u index pages have been deleted, %u are currently reusable.\n" + "%u index pages were newly deleted.\n" + "%u index pages are currently deleted, of which %u are currently reusable.\n" "%s.", (*stats)->tuples_removed, + (*stats)->pages_newly_deleted, (*stats)->pages_deleted, (*stats)->pages_free, pg_rusage_show(&ru0)))); } diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index a43805a7b09..629a23628ef 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -50,7 +50,7 @@ static bool _bt_mark_page_halfdead(Relation rel, Buffer leafbuf, static bool _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, BlockNumber scanblkno, bool *rightsib_empty, - uint32 *ndeleted); + BTVacState *vstate); static bool _bt_lock_subtree_parent(Relation rel, BlockNumber child, BTStack stack, Buffer *subtreeparent, @@ -1760,20 +1760,22 @@ _bt_rightsib_halfdeadflag(Relation rel, BlockNumber leafrightsib) * should never pass a buffer containing an existing deleted page here. The * lock and pin on caller's buffer will be dropped before we return. * - * Returns the number of pages successfully deleted (zero if page cannot - * be deleted now; could be more than one if parent or right sibling pages - * were deleted too). Note that this does not include pages that we delete - * that the btvacuumscan scan has yet to reach; they'll get counted later - * instead. + * Maintains bulk delete stats for caller, which are taken from vstate. We + * need to cooperate closely with caller here so that whole VACUUM operation + * reliably avoids any double counting of subsidiary-to-leafbuf pages that we + * delete in passing. If such pages happen to be from a block number that is + * ahead of the current scanblkno position, then caller is expected to count + * them directly later on. It's simpler for us to understand caller's + * requirements than it would be for caller to understand when or how a + * deleted page became deleted after the fact. * * NOTE: this leaks memory. Rather than trying to clean up everything * carefully, it's better to run it in a temp context that can be reset * frequently. */ -uint32 -_bt_pagedel(Relation rel, Buffer leafbuf) +void +_bt_pagedel(Relation rel, Buffer leafbuf, BTVacState *vstate) { - uint32 ndeleted = 0; BlockNumber rightsib; bool rightsib_empty; Page page; @@ -1781,7 +1783,8 @@ _bt_pagedel(Relation rel, Buffer leafbuf) /* * Save original leafbuf block number from caller. Only deleted blocks - * that are <= scanblkno get counted in ndeleted return value. + * that are <= scanblkno are added to bulk delete stat's pages_deleted + * count. */ BlockNumber scanblkno = BufferGetBlockNumber(leafbuf); @@ -1843,7 +1846,7 @@ _bt_pagedel(Relation rel, Buffer leafbuf) RelationGetRelationName(rel)))); _bt_relbuf(rel, leafbuf); - return ndeleted; + return; } /* @@ -1873,7 +1876,7 @@ _bt_pagedel(Relation rel, Buffer leafbuf) Assert(!P_ISHALFDEAD(opaque)); _bt_relbuf(rel, leafbuf); - return ndeleted; + return; } /* @@ -1922,8 +1925,7 @@ _bt_pagedel(Relation rel, Buffer leafbuf) if (_bt_leftsib_splitflag(rel, leftsib, leafblkno)) { ReleaseBuffer(leafbuf); - Assert(ndeleted == 0); - return ndeleted; + return; } /* we need an insertion scan key for the search, so build one */ @@ -1964,7 +1966,7 @@ _bt_pagedel(Relation rel, Buffer leafbuf) if (!_bt_mark_page_halfdead(rel, leafbuf, stack)) { _bt_relbuf(rel, leafbuf); - return ndeleted; + return; } } @@ -1979,7 +1981,7 @@ _bt_pagedel(Relation rel, Buffer leafbuf) { /* Check for interrupts in _bt_unlink_halfdead_page */ if (!_bt_unlink_halfdead_page(rel, leafbuf, scanblkno, - &rightsib_empty, &ndeleted)) + &rightsib_empty, vstate)) { /* * _bt_unlink_halfdead_page should never fail, since we @@ -1990,7 +1992,7 @@ _bt_pagedel(Relation rel, Buffer leafbuf) * lock and pin on leafbuf for us. */ Assert(false); - return ndeleted; + return; } } @@ -2026,8 +2028,6 @@ _bt_pagedel(Relation rel, Buffer leafbuf) leafbuf = _bt_getbuf(rel, rightsib, BT_WRITE); } - - return ndeleted; } /* @@ -2262,9 +2262,10 @@ _bt_mark_page_halfdead(Relation rel, Buffer leafbuf, BTStack stack) */ static bool _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, BlockNumber scanblkno, - bool *rightsib_empty, uint32 *ndeleted) + bool *rightsib_empty, BTVacState *vstate) { BlockNumber leafblkno = BufferGetBlockNumber(leafbuf); + IndexBulkDeleteResult *stats = vstate->stats; BlockNumber leafleftsib; BlockNumber leafrightsib; BlockNumber target; @@ -2674,12 +2675,17 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, BlockNumber scanblkno, _bt_relbuf(rel, buf); /* - * If btvacuumscan won't revisit this page in a future btvacuumpage call - * and count it as deleted then, we count it as deleted by current - * btvacuumpage call + * Maintain pages_newly_deleted, which is simply the number of pages + * deleted by the ongoing VACUUM operation. + * + * Maintain pages_deleted in a way that takes into account how + * btvacuumpage() will count deleted pages that have yet to become + * scanblkno -- only count page when it's not going to get that treatment + * later on. */ + stats->pages_newly_deleted++; if (target <= scanblkno) - (*ndeleted)++; + stats->pages_deleted++; return true; } diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index 3b2e0aa5cb7..504f5bef17a 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -38,17 +38,6 @@ #include "utils/memutils.h" -/* Working state needed by btvacuumpage */ -typedef struct -{ - IndexVacuumInfo *info; - IndexBulkDeleteResult *stats; - IndexBulkDeleteCallback callback; - void *callback_state; - BTCycleId cycleid; - MemoryContext pagedelcontext; -} BTVacState; - /* * BTPARALLEL_NOT_INITIALIZED indicates that the scan has not started. * @@ -1016,9 +1005,9 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, * avoids double-counting in the case where a single VACUUM command * requires multiple scans of the index. * - * Avoid resetting the tuples_removed field here, since it tracks - * information about the VACUUM command, and so must last across each call - * to btvacuumscan(). + * Avoid resetting the tuples_removed and pages_newly_deleted fields here, + * since they track information about the VACUUM command, and so must last + * across each call to btvacuumscan(). * * (Note that pages_free is treated as state about the whole index, not * the current VACUUM. This is appropriate because RecordFreeIndexPage() @@ -1237,11 +1226,13 @@ backtrack: } else if (P_ISHALFDEAD(opaque)) { - /* - * Half-dead leaf page. Try to delete now. Might update - * pages_deleted below. - */ + /* Half-dead leaf page (from interrupted VACUUM) -- finish deleting */ attempt_pagedel = true; + + /* + * _bt_pagedel() will increment both pages_newly_deleted and + * pages_deleted stats in all cases (barring corruption) + */ } else if (P_ISLEAF(opaque)) { @@ -1451,12 +1442,12 @@ backtrack: oldcontext = MemoryContextSwitchTo(vstate->pagedelcontext); /* - * We trust the _bt_pagedel return value because it does not include - * any page that a future call here from btvacuumscan is expected to - * count. There will be no double-counting. + * _bt_pagedel maintains the bulk delete stats on our behalf; + * pages_newly_deleted and pages_deleted are likely to be incremented + * during call */ Assert(blkno == scanblkno); - stats->pages_deleted += _bt_pagedel(rel, buf); + _bt_pagedel(rel, buf, vstate); MemoryContextSwitchTo(oldcontext); /* pagedel released buffer, so we shouldn't */ diff --git a/src/backend/access/spgist/spgvacuum.c b/src/backend/access/spgist/spgvacuum.c index 0d02a02222e..a9ffca5183b 100644 --- a/src/backend/access/spgist/spgvacuum.c +++ b/src/backend/access/spgist/spgvacuum.c @@ -891,6 +891,7 @@ spgvacuumscan(spgBulkDeleteState *bds) /* Report final stats */ bds->stats->num_pages = num_pages; + bds->stats->pages_newly_deleted = bds->stats->pages_deleted; bds->stats->pages_free = bds->stats->pages_deleted; } diff --git a/src/include/access/genam.h b/src/include/access/genam.h index ffa1a4c80db..4515401869f 100644 --- a/src/include/access/genam.h +++ b/src/include/access/genam.h @@ -63,8 +63,11 @@ typedef struct IndexVacuumInfo * of which this is just the first field; this provides a way for ambulkdelete * to communicate additional private data to amvacuumcleanup. * - * Note: pages_deleted and pages_free refer to free space within the index - * file. Some index AMs may compute num_index_tuples by reference to + * Note: pages_newly_deleted is the number of pages in the index that were + * deleted by the current vacuum operation. pages_deleted and pages_free + * refer to free space within the index file. + * + * Note: Some index AMs may compute num_index_tuples by reference to * num_heap_tuples, in which case they should copy the estimated_count field * from IndexVacuumInfo. */ @@ -74,7 +77,8 @@ typedef struct IndexBulkDeleteResult bool estimated_count; /* num_index_tuples is an estimate */ double num_index_tuples; /* tuples remaining */ double tuples_removed; /* # removed during vacuum operation */ - BlockNumber pages_deleted; /* # unused pages in index */ + BlockNumber pages_newly_deleted; /* # pages marked deleted by us */ + BlockNumber pages_deleted; /* # pages marked deleted (could be by us) */ BlockNumber pages_free; /* # pages available for reuse */ } IndexBulkDeleteResult; diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h index 9ac90d74398..b56b7b7868e 100644 --- a/src/include/access/nbtree.h +++ b/src/include/access/nbtree.h @@ -312,6 +312,20 @@ BTPageIsRecyclable(Page page) return false; } +/* + * BTVacState is private nbtree.c state used during VACUUM. It is exported + * for use by page deletion related code in nbtpage.c. + */ +typedef struct BTVacState +{ + IndexVacuumInfo *info; + IndexBulkDeleteResult *stats; + IndexBulkDeleteCallback callback; + void *callback_state; + BTCycleId cycleid; + MemoryContext pagedelcontext; +} BTVacState; + /* * Lehman and Yao's algorithm requires a ``high key'' on every non-rightmost * page. The high key is not a tuple that is used to visit the heap. It is @@ -1181,7 +1195,7 @@ extern void _bt_delitems_vacuum(Relation rel, Buffer buf, extern void _bt_delitems_delete_check(Relation rel, Buffer buf, Relation heapRel, TM_IndexDeleteOp *delstate); -extern uint32 _bt_pagedel(Relation rel, Buffer leafbuf); +extern void _bt_pagedel(Relation rel, Buffer leafbuf, BTVacState *vstate); /* * prototypes for functions in nbtsearch.c