1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-27 12:41:57 +03:00

Set relfrozenxid to oldest extant XID seen by VACUUM.

When VACUUM set relfrozenxid before now, it set it to whatever value was
used to determine which tuples to freeze -- the FreezeLimit cutoff.
This approach was very naive.  The relfrozenxid invariant only requires
that new relfrozenxid values be <= the oldest extant XID remaining in
the table (at the point that the VACUUM operation ends), which in
general might be much more recent than FreezeLimit.

VACUUM now carefully tracks the oldest remaining XID/MultiXactId as it
goes (the oldest remaining values _after_ lazy_scan_prune processing).
The final values are set as the table's new relfrozenxid and new
relminmxid in pg_class at the end of each VACUUM.  The oldest XID might
come from a tuple's xmin, xmax, or xvac fields.  It might even come from
one of the table's remaining MultiXacts.

Final relfrozenxid values must still be >= FreezeLimit in an aggressive
VACUUM (FreezeLimit still acts as a lower bound on the final value that
aggressive VACUUM can set relfrozenxid to).  Since standard VACUUMs
still make no guarantees about advancing relfrozenxid, they might as
well set relfrozenxid to a value from well before FreezeLimit when the
opportunity presents itself.  In general standard VACUUMs may now set
relfrozenxid to any value > the original relfrozenxid and <= OldestXmin.

Credit for the general idea of using the oldest extant XID to set
pg_class.relfrozenxid at the end of VACUUM goes to Andres Freund.

Author: Peter Geoghegan <pg@bowt.ie>
Reviewed-By: Andres Freund <andres@anarazel.de>
Reviewed-By: Robert Haas <robertmhaas@gmail.com>
Discussion: https://postgr.es/m/CAH2-WzkymFbz6D_vL+jmqSn_5q1wsFvFrE+37yLgL_Rkfd6Gzg@mail.gmail.com
This commit is contained in:
Peter Geoghegan
2022-04-03 09:57:21 -07:00
parent 05023a237c
commit 0b018fabaa
13 changed files with 717 additions and 305 deletions

View File

@ -144,7 +144,7 @@ typedef struct LVRelState
Relation *indrels;
int nindexes;
/* Aggressive VACUUM (scan all unfrozen pages)? */
/* Aggressive VACUUM? (must set relfrozenxid >= FreezeLimit) */
bool aggressive;
/* Use visibility map to skip? (disabled by DISABLE_PAGE_SKIPPING) */
bool skipwithvm;
@ -173,8 +173,9 @@ typedef struct LVRelState
/* VACUUM operation's target cutoffs for freezing XIDs and MultiXactIds */
TransactionId FreezeLimit;
MultiXactId MultiXactCutoff;
/* Are FreezeLimit/MultiXactCutoff still valid? */
bool freeze_cutoffs_valid;
/* Tracks oldest extant XID/MXID for setting relfrozenxid/relminmxid */
TransactionId NewRelfrozenXid;
MultiXactId NewRelminMxid;
/* Error reporting state */
char *relnamespace;
@ -313,7 +314,8 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
minmulti_updated;
TransactionId OldestXmin,
FreezeLimit;
MultiXactId MultiXactCutoff;
MultiXactId OldestMxact,
MultiXactCutoff;
BlockNumber orig_rel_pages,
new_rel_pages,
new_rel_allvisible;
@ -345,20 +347,17 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
/*
* Get OldestXmin cutoff, which is used to determine which deleted tuples
* are considered DEAD, not just RECENTLY_DEAD. Also get related cutoffs
* used to determine which XIDs/MultiXactIds will be frozen.
*
* If this is an aggressive VACUUM, then we're strictly required to freeze
* any and all XIDs from before FreezeLimit, so that we will be able to
* safely advance relfrozenxid up to FreezeLimit below (we must be able to
* advance relminmxid up to MultiXactCutoff, too).
* used to determine which XIDs/MultiXactIds will be frozen. If this is
* an aggressive VACUUM then lazy_scan_heap cannot leave behind unfrozen
* XIDs < FreezeLimit (all MXIDs < MultiXactCutoff also need to go away).
*/
aggressive = vacuum_set_xid_limits(rel,
params->freeze_min_age,
params->freeze_table_age,
params->multixact_freeze_min_age,
params->multixact_freeze_table_age,
&OldestXmin, &FreezeLimit,
&MultiXactCutoff);
&OldestXmin, &OldestMxact,
&FreezeLimit, &MultiXactCutoff);
skipwithvm = true;
if (params->options & VACOPT_DISABLE_PAGE_SKIPPING)
@ -505,10 +504,11 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
vacrel->vistest = GlobalVisTestFor(rel);
/* FreezeLimit controls XID freezing (always <= OldestXmin) */
vacrel->FreezeLimit = FreezeLimit;
/* MultiXactCutoff controls MXID freezing */
/* MultiXactCutoff controls MXID freezing (always <= OldestMxact) */
vacrel->MultiXactCutoff = MultiXactCutoff;
/* Track if cutoffs became invalid (possible in !aggressive case only) */
vacrel->freeze_cutoffs_valid = true;
/* Initialize state used to track oldest extant XID/XMID */
vacrel->NewRelfrozenXid = OldestXmin;
vacrel->NewRelminMxid = OldestMxact;
/*
* Call lazy_scan_heap to perform all required heap pruning, index
@ -542,13 +542,33 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
/*
* Prepare to update rel's pg_class entry.
*
* In principle new_live_tuples could be -1 indicating that we (still)
* don't know the tuple count. In practice that probably can't happen,
* since we'd surely have scanned some pages if the table is new and
* nonempty.
*
* Aggressive VACUUMs must always be able to advance relfrozenxid to a
* value >= FreezeLimit, and relminmxid to a value >= MultiXactCutoff.
* Non-aggressive VACUUMs may advance them by any amount, or not at all.
*/
Assert(vacrel->NewRelfrozenXid == OldestXmin ||
TransactionIdPrecedesOrEquals(aggressive ? FreezeLimit :
vacrel->relfrozenxid,
vacrel->NewRelfrozenXid));
Assert(vacrel->NewRelminMxid == OldestMxact ||
MultiXactIdPrecedesOrEquals(aggressive ? MultiXactCutoff :
vacrel->relminmxid,
vacrel->NewRelminMxid));
if (vacrel->scanned_pages + vacrel->frozenskipped_pages < orig_rel_pages)
{
/*
* Must keep original relfrozenxid in a non-aggressive VACUUM that
* had to skip an all-visible page. The state that tracks new
* values will have missed unfrozen XIDs from the pages we skipped.
*/
Assert(!aggressive);
vacrel->NewRelfrozenXid = InvalidTransactionId;
vacrel->NewRelminMxid = InvalidMultiXactId;
}
/*
* For safety, clamp relallvisible to be not more than what we're setting
* relpages to.
* pg_class.relpages to
*/
new_rel_pages = vacrel->rel_pages; /* After possible rel truncation */
visibilitymap_count(rel, &new_rel_allvisible, NULL);
@ -558,33 +578,14 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
/*
* Now actually update rel's pg_class entry.
*
* Aggressive VACUUM must reliably advance relfrozenxid (and relminmxid).
* We are able to advance relfrozenxid in a non-aggressive VACUUM too,
* provided we didn't skip any all-visible (not all-frozen) pages using
* the visibility map, and assuming that we didn't fail to get a cleanup
* lock that made it unsafe with respect to FreezeLimit (or perhaps our
* MultiXactCutoff) established for VACUUM operation.
* In principle new_live_tuples could be -1 indicating that we (still)
* don't know the tuple count. In practice that can't happen, since we
* scan every page that isn't skipped using the visibility map.
*/
if (vacrel->scanned_pages + vacrel->frozenskipped_pages < orig_rel_pages ||
!vacrel->freeze_cutoffs_valid)
{
/* Cannot advance relfrozenxid/relminmxid */
Assert(!aggressive);
frozenxid_updated = minmulti_updated = false;
vac_update_relstats(rel, new_rel_pages, vacrel->new_live_tuples,
new_rel_allvisible, vacrel->nindexes > 0,
InvalidTransactionId, InvalidMultiXactId,
NULL, NULL, false);
}
else
{
Assert(vacrel->scanned_pages + vacrel->frozenskipped_pages ==
orig_rel_pages);
vac_update_relstats(rel, new_rel_pages, vacrel->new_live_tuples,
new_rel_allvisible, vacrel->nindexes > 0,
FreezeLimit, MultiXactCutoff,
&frozenxid_updated, &minmulti_updated, false);
}
vac_update_relstats(rel, new_rel_pages, vacrel->new_live_tuples,
new_rel_allvisible, vacrel->nindexes > 0,
vacrel->NewRelfrozenXid, vacrel->NewRelminMxid,
&frozenxid_updated, &minmulti_updated, false);
/*
* Report results to the stats collector, too.
@ -692,17 +693,17 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
OldestXmin, diff);
if (frozenxid_updated)
{
diff = (int32) (FreezeLimit - vacrel->relfrozenxid);
diff = (int32) (vacrel->NewRelfrozenXid - vacrel->relfrozenxid);
appendStringInfo(&buf,
_("new relfrozenxid: %u, which is %d xids ahead of previous value\n"),
FreezeLimit, diff);
vacrel->NewRelfrozenXid, diff);
}
if (minmulti_updated)
{
diff = (int32) (MultiXactCutoff - vacrel->relminmxid);
diff = (int32) (vacrel->NewRelminMxid - vacrel->relminmxid);
appendStringInfo(&buf,
_("new relminmxid: %u, which is %d mxids ahead of previous value\n"),
MultiXactCutoff, diff);
vacrel->NewRelminMxid, diff);
}
if (orig_rel_pages > 0)
{
@ -1582,6 +1583,8 @@ lazy_scan_prune(LVRelState *vacrel,
recently_dead_tuples;
int nnewlpdead;
int nfrozen;
TransactionId NewRelfrozenXid;
MultiXactId NewRelminMxid;
OffsetNumber deadoffsets[MaxHeapTuplesPerPage];
xl_heap_freeze_tuple frozen[MaxHeapTuplesPerPage];
@ -1591,7 +1594,9 @@ lazy_scan_prune(LVRelState *vacrel,
retry:
/* Initialize (or reset) page-level counters */
/* Initialize (or reset) page-level state */
NewRelfrozenXid = vacrel->NewRelfrozenXid;
NewRelminMxid = vacrel->NewRelminMxid;
tuples_deleted = 0;
lpdead_items = 0;
live_tuples = 0;
@ -1798,8 +1803,8 @@ retry:
vacrel->relminmxid,
vacrel->FreezeLimit,
vacrel->MultiXactCutoff,
&frozen[nfrozen],
&tuple_totally_frozen))
&frozen[nfrozen], &tuple_totally_frozen,
&NewRelfrozenXid, &NewRelminMxid))
{
/* Will execute freeze below */
frozen[nfrozen++].offset = offnum;
@ -1813,13 +1818,16 @@ retry:
prunestate->all_frozen = false;
}
vacrel->offnum = InvalidOffsetNumber;
/*
* We have now divided every item on the page into either an LP_DEAD item
* that will need to be vacuumed in indexes later, or a LP_NORMAL tuple
* that remains and needs to be considered for freezing now (LP_UNUSED and
* LP_REDIRECT items also remain, but are of no further interest to us).
*/
vacrel->offnum = InvalidOffsetNumber;
vacrel->NewRelfrozenXid = NewRelfrozenXid;
vacrel->NewRelminMxid = NewRelminMxid;
/*
* Consider the need to freeze any items with tuple storage from the page
@ -1969,6 +1977,8 @@ lazy_scan_noprune(LVRelState *vacrel,
recently_dead_tuples,
missed_dead_tuples;
HeapTupleHeader tupleheader;
TransactionId NewRelfrozenXid = vacrel->NewRelfrozenXid;
MultiXactId NewRelminMxid = vacrel->NewRelminMxid;
OffsetNumber deadoffsets[MaxHeapTuplesPerPage];
Assert(BufferGetBlockNumber(buf) == blkno);
@ -2013,22 +2023,37 @@ lazy_scan_noprune(LVRelState *vacrel,
*hastup = true; /* page prevents rel truncation */
tupleheader = (HeapTupleHeader) PageGetItem(page, itemid);
if (heap_tuple_needs_freeze(tupleheader,
if (heap_tuple_would_freeze(tupleheader,
vacrel->FreezeLimit,
vacrel->MultiXactCutoff))
vacrel->MultiXactCutoff,
&NewRelfrozenXid, &NewRelminMxid))
{
/* Tuple with XID < FreezeLimit (or MXID < MultiXactCutoff) */
if (vacrel->aggressive)
{
/* Going to have to get cleanup lock for lazy_scan_prune */
/*
* Aggressive VACUUMs must always be able to advance rel's
* relfrozenxid to a value >= FreezeLimit (and be able to
* advance rel's relminmxid to a value >= MultiXactCutoff).
* The ongoing aggressive VACUUM won't be able to do that
* unless it can freeze an XID (or XMID) from this tuple now.
*
* The only safe option is to have caller perform processing
* of this page using lazy_scan_prune. Caller might have to
* wait a while for a cleanup lock, but it can't be helped.
*/
vacrel->offnum = InvalidOffsetNumber;
return false;
}
/*
* Current non-aggressive VACUUM operation definitely won't be
* able to advance relfrozenxid or relminmxid
* Non-aggressive VACUUMs are under no obligation to advance
* relfrozenxid (even by one XID). We can be much laxer here.
*
* Currently we always just accept an older final relfrozenxid
* and/or relminmxid value. We never make caller wait or work a
* little harder, even when it likely makes sense to do so.
*/
vacrel->freeze_cutoffs_valid = false;
}
ItemPointerSet(&(tuple.t_self), blkno, offnum);
@ -2078,9 +2103,14 @@ lazy_scan_noprune(LVRelState *vacrel,
vacrel->offnum = InvalidOffsetNumber;
/*
* Now save details of the LP_DEAD items from the page in vacrel (though
* only when VACUUM uses two-pass strategy)
* By here we know for sure that caller can put off freezing and pruning
* this particular page until the next VACUUM. Remember its details now.
* (lazy_scan_prune expects a clean slate, so we have to do this last.)
*/
vacrel->NewRelfrozenXid = NewRelfrozenXid;
vacrel->NewRelminMxid = NewRelminMxid;
/* Save any LP_DEAD items found on the page in dead_items array */
if (vacrel->nindexes == 0)
{
/* Using one-pass strategy (since table has no indexes) */