diff --git a/doc/src/sgml/maintenance.sgml b/doc/src/sgml/maintenance.sgml
index 84fba9dcb10..2e09fee5aeb 100644
--- a/doc/src/sgml/maintenance.sgml
+++ b/doc/src/sgml/maintenance.sgml
@@ -586,11 +586,11 @@
statistics in the system tables pg_class and
pg_database. In particular,
the relfrozenxid column of a table's
- pg_class row contains the freeze cutoff XID that was used
- by the last aggressive VACUUM for that table. All rows
- inserted by transactions with XIDs older than this cutoff XID are
- guaranteed to have been frozen. Similarly,
- the datfrozenxid column of a database's
+ pg_class row contains the oldest remaining unfrozen
+ XID at the end of the most recent VACUUM that successfully
+ advanced relfrozenxid (typically the most recent
+ aggressive VACUUM). Similarly, the
+ datfrozenxid column of a database's
pg_database row is a lower bound on the unfrozen XIDs
appearing in that database — it is just the minimum of the
per-table relfrozenxid values within the database.
@@ -638,7 +638,11 @@ SELECT datname, age(datfrozenxid) FROM pg_database;
set age(relfrozenxid) to a value just a little more than the
vacuum_freeze_min_age setting
that was used (more by the number of transactions started since the
- VACUUM started). If no relfrozenxid-advancing
+ VACUUM started). VACUUM
+ will set relfrozenxid to the oldest XID
+ that remains in the table, so it's possible that the final value
+ will be much more recent than strictly required.
+ If no relfrozenxid-advancing
VACUUM is issued on the table until
autovacuum_freeze_max_age is reached, an autovacuum will soon
be forced for the table.
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 74ad445e59b..1ee985f6330 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -6079,10 +6079,12 @@ heap_inplace_update(Relation relation, HeapTuple tuple)
* Determine what to do during freezing when a tuple is marked by a
* MultiXactId.
*
- * NB -- this might have the side-effect of creating a new MultiXactId!
- *
* "flags" is an output value; it's used to tell caller what to do on return.
- * Possible flags are:
+ *
+ * "mxid_oldest_xid_out" is an output value; it's used to track the oldest
+ * extant Xid within any Multixact that will remain after freezing executes.
+ *
+ * Possible values that we can set in "flags":
* FRM_NOOP
* don't do anything -- keep existing Xmax
* FRM_INVALIDATE_XMAX
@@ -6094,12 +6096,17 @@ heap_inplace_update(Relation relation, HeapTuple tuple)
* FRM_RETURN_IS_MULTI
* The return value is a new MultiXactId to set as new Xmax.
* (caller must obtain proper infomask bits using GetMultiXactIdHintBits)
+ *
+ * "mxid_oldest_xid_out" is only set when "flags" contains either FRM_NOOP or
+ * FRM_RETURN_IS_MULTI, since we only leave behind a MultiXactId for these.
+ *
+ * NB: Creates a _new_ MultiXactId when FRM_RETURN_IS_MULTI is set in "flags".
*/
static TransactionId
FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
TransactionId relfrozenxid, TransactionId relminmxid,
TransactionId cutoff_xid, MultiXactId cutoff_multi,
- uint16 *flags)
+ uint16 *flags, TransactionId *mxid_oldest_xid_out)
{
TransactionId xid = InvalidTransactionId;
int i;
@@ -6111,6 +6118,7 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
bool has_lockers;
TransactionId update_xid;
bool update_committed;
+ TransactionId temp_xid_out;
*flags = 0;
@@ -6147,7 +6155,7 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
if (HEAP_XMAX_IS_LOCKED_ONLY(t_infomask))
{
*flags |= FRM_INVALIDATE_XMAX;
- xid = InvalidTransactionId; /* not strictly necessary */
+ xid = InvalidTransactionId;
}
else
{
@@ -6174,7 +6182,7 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg_internal("cannot freeze committed update xid %u", xid)));
*flags |= FRM_INVALIDATE_XMAX;
- xid = InvalidTransactionId; /* not strictly necessary */
+ xid = InvalidTransactionId;
}
else
{
@@ -6182,6 +6190,10 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
}
}
+ /*
+ * Don't push back mxid_oldest_xid_out using FRM_RETURN_IS_XID Xid, or
+ * when no Xids will remain
+ */
return xid;
}
@@ -6205,6 +6217,7 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
/* is there anything older than the cutoff? */
need_replace = false;
+ temp_xid_out = *mxid_oldest_xid_out; /* init for FRM_NOOP */
for (i = 0; i < nmembers; i++)
{
if (TransactionIdPrecedes(members[i].xid, cutoff_xid))
@@ -6212,28 +6225,38 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
need_replace = true;
break;
}
+ if (TransactionIdPrecedes(members[i].xid, temp_xid_out))
+ temp_xid_out = members[i].xid;
}
/*
* In the simplest case, there is no member older than the cutoff; we can
- * keep the existing MultiXactId as is.
+ * keep the existing MultiXactId as-is, avoiding a more expensive second
+ * pass over the multi
*/
if (!need_replace)
{
+ /*
+ * When mxid_oldest_xid_out gets pushed back here it's likely that the
+ * update Xid was the oldest member, but we don't rely on that
+ */
*flags |= FRM_NOOP;
+ *mxid_oldest_xid_out = temp_xid_out;
pfree(members);
- return InvalidTransactionId;
+ return multi;
}
/*
- * If the multi needs to be updated, figure out which members do we need
- * to keep.
+ * Do a more thorough second pass over the multi to figure out which
+ * member XIDs actually need to be kept. Checking the precise status of
+ * individual members might even show that we don't need to keep anything.
*/
nnewmembers = 0;
newmembers = palloc(sizeof(MultiXactMember) * nmembers);
has_lockers = false;
update_xid = InvalidTransactionId;
update_committed = false;
+ temp_xid_out = *mxid_oldest_xid_out; /* init for FRM_RETURN_IS_MULTI */
for (i = 0; i < nmembers; i++)
{
@@ -6289,7 +6312,7 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
}
/*
- * Since the tuple wasn't marked HEAPTUPLE_DEAD by vacuum, the
+ * Since the tuple wasn't totally removed when vacuum pruned, the
* update Xid cannot possibly be older than the xid cutoff. The
* presence of such a tuple would cause corruption, so be paranoid
* and check.
@@ -6302,15 +6325,20 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
update_xid, cutoff_xid)));
/*
- * If we determined that it's an Xid corresponding to an update
- * that must be retained, additionally add it to the list of
- * members of the new Multi, in case we end up using that. (We
- * might still decide to use only an update Xid and not a multi,
- * but it's easier to maintain the list as we walk the old members
- * list.)
+ * We determined that this is an Xid corresponding to an update
+ * that must be retained -- add it to new members list for later.
+ *
+ * Also consider pushing back temp_xid_out, which is needed when
+ * we later conclude that a new multi is required (i.e. when we go
+ * on to set FRM_RETURN_IS_MULTI for our caller because we also
+ * need to retain a locker that's still running).
*/
if (TransactionIdIsValid(update_xid))
+ {
newmembers[nnewmembers++] = members[i];
+ if (TransactionIdPrecedes(members[i].xid, temp_xid_out))
+ temp_xid_out = members[i].xid;
+ }
}
else
{
@@ -6318,8 +6346,18 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
if (TransactionIdIsCurrentTransactionId(members[i].xid) ||
TransactionIdIsInProgress(members[i].xid))
{
- /* running locker cannot possibly be older than the cutoff */
+ /*
+ * Running locker cannot possibly be older than the cutoff.
+ *
+ * The cutoff is <= VACUUM's OldestXmin, which is also the
+ * initial value used for top-level relfrozenxid_out tracking
+ * state. A running locker cannot be older than VACUUM's
+ * OldestXmin, either, so we don't need a temp_xid_out step.
+ */
+ Assert(TransactionIdIsNormal(members[i].xid));
Assert(!TransactionIdPrecedes(members[i].xid, cutoff_xid));
+ Assert(!TransactionIdPrecedes(members[i].xid,
+ *mxid_oldest_xid_out));
newmembers[nnewmembers++] = members[i];
has_lockers = true;
}
@@ -6328,11 +6366,16 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
pfree(members);
+ /*
+ * Determine what to do with caller's multi based on information gathered
+ * during our second pass
+ */
if (nnewmembers == 0)
{
/* nothing worth keeping!? Tell caller to remove the whole thing */
*flags |= FRM_INVALIDATE_XMAX;
xid = InvalidTransactionId;
+ /* Don't push back mxid_oldest_xid_out -- no Xids will remain */
}
else if (TransactionIdIsValid(update_xid) && !has_lockers)
{
@@ -6348,15 +6391,18 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
if (update_committed)
*flags |= FRM_MARK_COMMITTED;
xid = update_xid;
+ /* Don't push back mxid_oldest_xid_out using FRM_RETURN_IS_XID Xid */
}
else
{
/*
* Create a new multixact with the surviving members of the previous
- * one, to set as new Xmax in the tuple.
+ * one, to set as new Xmax in the tuple. The oldest surviving member
+ * might push back mxid_oldest_xid_out.
*/
xid = MultiXactIdCreateFromMembers(nnewmembers, newmembers);
*flags |= FRM_RETURN_IS_MULTI;
+ *mxid_oldest_xid_out = temp_xid_out;
}
pfree(newmembers);
@@ -6375,31 +6421,41 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
* will be totally frozen after these operations are performed and false if
* more freezing will eventually be required.
*
- * Caller is responsible for setting the offset field, if appropriate.
+ * Caller must set frz->offset itself, before heap_execute_freeze_tuple call.
*
* It is assumed that the caller has checked the tuple with
* HeapTupleSatisfiesVacuum() and determined that it is not HEAPTUPLE_DEAD
* (else we should be removing the tuple, not freezing it).
*
- * NB: cutoff_xid *must* be <= the current global xmin, to ensure that any
+ * The *relfrozenxid_out and *relminmxid_out arguments are the current target
+ * relfrozenxid and relminmxid for VACUUM caller's heap rel. Any and all
+ * unfrozen XIDs or MXIDs that remain in caller's rel after VACUUM finishes
+ * _must_ have values >= the final relfrozenxid/relminmxid values in pg_class.
+ * This includes XIDs that remain as MultiXact members from any tuple's xmax.
+ * Each call here pushes back *relfrozenxid_out and/or *relminmxid_out as
+ * needed to avoid unsafe final values in rel's authoritative pg_class tuple.
+ *
+ * NB: cutoff_xid *must* be <= VACUUM's OldestXmin, to ensure that any
* XID older than it could neither be running nor seen as running by any
* open transaction. This ensures that the replacement will not change
* anyone's idea of the tuple state.
- * Similarly, cutoff_multi must be less than or equal to the smallest
- * MultiXactId used by any transaction currently open.
+ * Similarly, cutoff_multi must be <= VACUUM's OldestMxact.
*
- * If the tuple is in a shared buffer, caller must hold an exclusive lock on
- * that buffer.
+ * NB: This function has side effects: it might allocate a new MultiXactId.
+ * It will be set as tuple's new xmax when our *frz output is processed within
+ * heap_execute_freeze_tuple later on. If the tuple is in a shared buffer
+ * then caller had better have an exclusive lock on it already.
*
- * NB: It is not enough to set hint bits to indicate something is
- * committed/invalid -- they might not be set on a standby, or after crash
- * recovery. We really need to remove old xids.
+ * NB: It is not enough to set hint bits to indicate an XID committed/aborted.
+ * The *frz WAL record we output completely removes all old XIDs during REDO.
*/
bool
heap_prepare_freeze_tuple(HeapTupleHeader tuple,
TransactionId relfrozenxid, TransactionId relminmxid,
TransactionId cutoff_xid, TransactionId cutoff_multi,
- xl_heap_freeze_tuple *frz, bool *totally_frozen)
+ xl_heap_freeze_tuple *frz, bool *totally_frozen,
+ TransactionId *relfrozenxid_out,
+ MultiXactId *relminmxid_out)
{
bool changed = false;
bool xmax_already_frozen = false;
@@ -6418,7 +6474,9 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple,
* already a permanent value), while in the block below it is set true to
* mean "xmin won't need freezing after what we do to it here" (false
* otherwise). In both cases we're allowed to set totally_frozen, as far
- * as xmin is concerned.
+ * as xmin is concerned. Both cases also don't require relfrozenxid_out
+ * handling, since either way the tuple's xmin will be a permanent value
+ * once we're done with it.
*/
xid = HeapTupleHeaderGetXmin(tuple);
if (!TransactionIdIsNormal(xid))
@@ -6443,6 +6501,12 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple,
frz->t_infomask |= HEAP_XMIN_FROZEN;
changed = true;
}
+ else
+ {
+ /* xmin to remain unfrozen. Could push back relfrozenxid_out. */
+ if (TransactionIdPrecedes(xid, *relfrozenxid_out))
+ *relfrozenxid_out = xid;
+ }
}
/*
@@ -6452,7 +6516,7 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple,
* freezing, too. Also, if a multi needs freezing, we cannot simply take
* it out --- if there's a live updater Xid, it needs to be kept.
*
- * Make sure to keep heap_tuple_needs_freeze in sync with this.
+ * Make sure to keep heap_tuple_would_freeze in sync with this.
*/
xid = HeapTupleHeaderGetRawXmax(tuple);
@@ -6460,15 +6524,28 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple,
{
TransactionId newxmax;
uint16 flags;
+ TransactionId mxid_oldest_xid_out = *relfrozenxid_out;
newxmax = FreezeMultiXactId(xid, tuple->t_infomask,
relfrozenxid, relminmxid,
- cutoff_xid, cutoff_multi, &flags);
+ cutoff_xid, cutoff_multi,
+ &flags, &mxid_oldest_xid_out);
freeze_xmax = (flags & FRM_INVALIDATE_XMAX);
if (flags & FRM_RETURN_IS_XID)
{
+ /*
+ * xmax will become an updater Xid (original MultiXact's updater
+ * member Xid will be carried forward as a simple Xid in Xmax).
+ * Might have to ratchet back relfrozenxid_out here, though never
+ * relminmxid_out.
+ */
+ Assert(!freeze_xmax);
+ Assert(TransactionIdIsValid(newxmax));
+ if (TransactionIdPrecedes(newxmax, *relfrozenxid_out))
+ *relfrozenxid_out = newxmax;
+
/*
* NB -- some of these transformations are only valid because we
* know the return Xid is a tuple updater (i.e. not merely a
@@ -6487,6 +6564,19 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple,
uint16 newbits;
uint16 newbits2;
+ /*
+ * xmax is an old MultiXactId that we have to replace with a new
+ * MultiXactId, to carry forward two or more original member XIDs.
+ * Might have to ratchet back relfrozenxid_out here, though never
+ * relminmxid_out.
+ */
+ Assert(!freeze_xmax);
+ Assert(MultiXactIdIsValid(newxmax));
+ Assert(!MultiXactIdPrecedes(newxmax, *relminmxid_out));
+ Assert(TransactionIdPrecedesOrEquals(mxid_oldest_xid_out,
+ *relfrozenxid_out));
+ *relfrozenxid_out = mxid_oldest_xid_out;
+
/*
* We can't use GetMultiXactIdHintBits directly on the new multi
* here; that routine initializes the masks to all zeroes, which
@@ -6503,6 +6593,30 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple,
changed = true;
}
+ else if (flags & FRM_NOOP)
+ {
+ /*
+ * xmax is a MultiXactId, and nothing about it changes for now.
+ * Might have to ratchet back relminmxid_out, relfrozenxid_out, or
+ * both together.
+ */
+ Assert(!freeze_xmax);
+ Assert(MultiXactIdIsValid(newxmax) && xid == newxmax);
+ Assert(TransactionIdPrecedesOrEquals(mxid_oldest_xid_out,
+ *relfrozenxid_out));
+ if (MultiXactIdPrecedes(xid, *relminmxid_out))
+ *relminmxid_out = xid;
+ *relfrozenxid_out = mxid_oldest_xid_out;
+ }
+ else
+ {
+ /*
+ * Keeping nothing (neither an Xid nor a MultiXactId) in xmax.
+ * Won't have to ratchet back relminmxid_out or relfrozenxid_out.
+ */
+ Assert(freeze_xmax);
+ Assert(!TransactionIdIsValid(newxmax));
+ }
}
else if (TransactionIdIsNormal(xid))
{
@@ -6527,15 +6641,21 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple,
errmsg_internal("cannot freeze committed xmax %u",
xid)));
freeze_xmax = true;
+ /* No need for relfrozenxid_out handling, since we'll freeze xmax */
}
else
+ {
freeze_xmax = false;
+ if (TransactionIdPrecedes(xid, *relfrozenxid_out))
+ *relfrozenxid_out = xid;
+ }
}
else if ((tuple->t_infomask & HEAP_XMAX_INVALID) ||
!TransactionIdIsValid(HeapTupleHeaderGetRawXmax(tuple)))
{
freeze_xmax = false;
xmax_already_frozen = true;
+ /* No need for relfrozenxid_out handling for already-frozen xmax */
}
else
ereport(ERROR,
@@ -6576,6 +6696,8 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple,
* was removed in PostgreSQL 9.0. Note that if we were to respect
* cutoff_xid here, we'd need to make surely to clear totally_frozen
* when we skipped freezing on that basis.
+ *
+ * No need for relfrozenxid_out handling, since we always freeze xvac.
*/
if (TransactionIdIsNormal(xid))
{
@@ -6653,11 +6775,14 @@ heap_freeze_tuple(HeapTupleHeader tuple,
xl_heap_freeze_tuple frz;
bool do_freeze;
bool tuple_totally_frozen;
+ TransactionId relfrozenxid_out = cutoff_xid;
+ MultiXactId relminmxid_out = cutoff_multi;
do_freeze = heap_prepare_freeze_tuple(tuple,
relfrozenxid, relminmxid,
cutoff_xid, cutoff_multi,
- &frz, &tuple_totally_frozen);
+ &frz, &tuple_totally_frozen,
+ &relfrozenxid_out, &relminmxid_out);
/*
* Note that because this is not a WAL-logged operation, we don't need to
@@ -7036,9 +7161,7 @@ ConditionalMultiXactIdWait(MultiXactId multi, MultiXactStatus status,
* heap_tuple_needs_eventual_freeze
*
* Check to see whether any of the XID fields of a tuple (xmin, xmax, xvac)
- * will eventually require freezing. Similar to heap_tuple_needs_freeze,
- * but there's no cutoff, since we're trying to figure out whether freezing
- * will ever be needed, not whether it's needed now.
+ * will eventually require freezing (if tuple isn't removed by pruning first).
*/
bool
heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple)
@@ -7082,87 +7205,106 @@ heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple)
}
/*
- * heap_tuple_needs_freeze
+ * heap_tuple_would_freeze
*
- * Check to see whether any of the XID fields of a tuple (xmin, xmax, xvac)
- * are older than the specified cutoff XID or MultiXactId. If so, return true.
+ * Return value indicates if heap_prepare_freeze_tuple sibling function would
+ * freeze any of the XID/XMID fields from the tuple, given the same cutoffs.
+ * We must also deal with dead tuples here, since (xmin, xmax, xvac) fields
+ * could be processed by pruning away the whole tuple instead of freezing.
*
- * It doesn't matter whether the tuple is alive or dead, we are checking
- * to see if a tuple needs to be removed or frozen to avoid wraparound.
- *
- * NB: Cannot rely on hint bits here, they might not be set after a crash or
- * on a standby.
+ * The *relfrozenxid_out and *relminmxid_out input/output arguments work just
+ * like the heap_prepare_freeze_tuple arguments that they're based on. We
+ * never freeze here, which makes tracking the oldest extant XID/MXID simple.
*/
bool
-heap_tuple_needs_freeze(HeapTupleHeader tuple, TransactionId cutoff_xid,
- MultiXactId cutoff_multi)
+heap_tuple_would_freeze(HeapTupleHeader tuple, TransactionId cutoff_xid,
+ MultiXactId cutoff_multi,
+ TransactionId *relfrozenxid_out,
+ MultiXactId *relminmxid_out)
{
TransactionId xid;
+ MultiXactId multi;
+ bool would_freeze = false;
+ /* First deal with xmin */
xid = HeapTupleHeaderGetXmin(tuple);
- if (TransactionIdIsNormal(xid) &&
- TransactionIdPrecedes(xid, cutoff_xid))
- return true;
-
- /*
- * The considerations for multixacts are complicated; look at
- * heap_prepare_freeze_tuple for justifications. This routine had better
- * be in sync with that one!
- */
- if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
+ if (TransactionIdIsNormal(xid))
{
- MultiXactId multi;
+ if (TransactionIdPrecedes(xid, *relfrozenxid_out))
+ *relfrozenxid_out = xid;
+ if (TransactionIdPrecedes(xid, cutoff_xid))
+ would_freeze = true;
+ }
+ /* Now deal with xmax */
+ xid = InvalidTransactionId;
+ multi = InvalidMultiXactId;
+ if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
multi = HeapTupleHeaderGetRawXmax(tuple);
- if (!MultiXactIdIsValid(multi))
- {
- /* no xmax set, ignore */
- ;
- }
- else if (HEAP_LOCKED_UPGRADED(tuple->t_infomask))
- return true;
- else if (MultiXactIdPrecedes(multi, cutoff_multi))
- return true;
- else
- {
- MultiXactMember *members;
- int nmembers;
- int i;
+ else
+ xid = HeapTupleHeaderGetRawXmax(tuple);
- /* need to check whether any member of the mxact is too old */
-
- nmembers = GetMultiXactIdMembers(multi, &members, false,
- HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask));
-
- for (i = 0; i < nmembers; i++)
- {
- if (TransactionIdPrecedes(members[i].xid, cutoff_xid))
- {
- pfree(members);
- return true;
- }
- }
- if (nmembers > 0)
- pfree(members);
- }
+ if (TransactionIdIsNormal(xid))
+ {
+ /* xmax is a non-permanent XID */
+ if (TransactionIdPrecedes(xid, *relfrozenxid_out))
+ *relfrozenxid_out = xid;
+ if (TransactionIdPrecedes(xid, cutoff_xid))
+ would_freeze = true;
+ }
+ else if (!MultiXactIdIsValid(multi))
+ {
+ /* xmax is a permanent XID or invalid MultiXactId/XID */
+ }
+ else if (HEAP_LOCKED_UPGRADED(tuple->t_infomask))
+ {
+ /* xmax is a pg_upgrade'd MultiXact, which can't have updater XID */
+ if (MultiXactIdPrecedes(multi, *relminmxid_out))
+ *relminmxid_out = multi;
+ /* heap_prepare_freeze_tuple always freezes pg_upgrade'd xmax */
+ would_freeze = true;
}
else
{
- xid = HeapTupleHeaderGetRawXmax(tuple);
- if (TransactionIdIsNormal(xid) &&
- TransactionIdPrecedes(xid, cutoff_xid))
- return true;
+ /* xmax is a MultiXactId that may have an updater XID */
+ MultiXactMember *members;
+ int nmembers;
+
+ if (MultiXactIdPrecedes(multi, *relminmxid_out))
+ *relminmxid_out = multi;
+ if (MultiXactIdPrecedes(multi, cutoff_multi))
+ would_freeze = true;
+
+ /* need to check whether any member of the mxact is old */
+ nmembers = GetMultiXactIdMembers(multi, &members, false,
+ HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask));
+
+ for (int i = 0; i < nmembers; i++)
+ {
+ xid = members[i].xid;
+ Assert(TransactionIdIsNormal(xid));
+ if (TransactionIdPrecedes(xid, *relfrozenxid_out))
+ *relfrozenxid_out = xid;
+ if (TransactionIdPrecedes(xid, cutoff_xid))
+ would_freeze = true;
+ }
+ if (nmembers > 0)
+ pfree(members);
}
if (tuple->t_infomask & HEAP_MOVED)
{
xid = HeapTupleHeaderGetXvac(tuple);
- if (TransactionIdIsNormal(xid) &&
- TransactionIdPrecedes(xid, cutoff_xid))
- return true;
+ if (TransactionIdIsNormal(xid))
+ {
+ if (TransactionIdPrecedes(xid, *relfrozenxid_out))
+ *relfrozenxid_out = xid;
+ /* heap_prepare_freeze_tuple always freezes xvac */
+ would_freeze = true;
+ }
}
- return false;
+ return would_freeze;
}
/*
diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c
index 06b523a01fa..826982f70b7 100644
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -144,7 +144,7 @@ typedef struct LVRelState
Relation *indrels;
int nindexes;
- /* Aggressive VACUUM (scan all unfrozen pages)? */
+ /* Aggressive VACUUM? (must set relfrozenxid >= FreezeLimit) */
bool aggressive;
/* Use visibility map to skip? (disabled by DISABLE_PAGE_SKIPPING) */
bool skipwithvm;
@@ -173,8 +173,9 @@ typedef struct LVRelState
/* VACUUM operation's target cutoffs for freezing XIDs and MultiXactIds */
TransactionId FreezeLimit;
MultiXactId MultiXactCutoff;
- /* Are FreezeLimit/MultiXactCutoff still valid? */
- bool freeze_cutoffs_valid;
+ /* Tracks oldest extant XID/MXID for setting relfrozenxid/relminmxid */
+ TransactionId NewRelfrozenXid;
+ MultiXactId NewRelminMxid;
/* Error reporting state */
char *relnamespace;
@@ -313,7 +314,8 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
minmulti_updated;
TransactionId OldestXmin,
FreezeLimit;
- MultiXactId MultiXactCutoff;
+ MultiXactId OldestMxact,
+ MultiXactCutoff;
BlockNumber orig_rel_pages,
new_rel_pages,
new_rel_allvisible;
@@ -345,20 +347,17 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
/*
* Get OldestXmin cutoff, which is used to determine which deleted tuples
* are considered DEAD, not just RECENTLY_DEAD. Also get related cutoffs
- * used to determine which XIDs/MultiXactIds will be frozen.
- *
- * If this is an aggressive VACUUM, then we're strictly required to freeze
- * any and all XIDs from before FreezeLimit, so that we will be able to
- * safely advance relfrozenxid up to FreezeLimit below (we must be able to
- * advance relminmxid up to MultiXactCutoff, too).
+ * used to determine which XIDs/MultiXactIds will be frozen. If this is
+ * an aggressive VACUUM then lazy_scan_heap cannot leave behind unfrozen
+ * XIDs < FreezeLimit (all MXIDs < MultiXactCutoff also need to go away).
*/
aggressive = vacuum_set_xid_limits(rel,
params->freeze_min_age,
params->freeze_table_age,
params->multixact_freeze_min_age,
params->multixact_freeze_table_age,
- &OldestXmin, &FreezeLimit,
- &MultiXactCutoff);
+ &OldestXmin, &OldestMxact,
+ &FreezeLimit, &MultiXactCutoff);
skipwithvm = true;
if (params->options & VACOPT_DISABLE_PAGE_SKIPPING)
@@ -505,10 +504,11 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
vacrel->vistest = GlobalVisTestFor(rel);
/* FreezeLimit controls XID freezing (always <= OldestXmin) */
vacrel->FreezeLimit = FreezeLimit;
- /* MultiXactCutoff controls MXID freezing */
+ /* MultiXactCutoff controls MXID freezing (always <= OldestMxact) */
vacrel->MultiXactCutoff = MultiXactCutoff;
- /* Track if cutoffs became invalid (possible in !aggressive case only) */
- vacrel->freeze_cutoffs_valid = true;
+ /* Initialize state used to track oldest extant XID/XMID */
+ vacrel->NewRelfrozenXid = OldestXmin;
+ vacrel->NewRelminMxid = OldestMxact;
/*
* Call lazy_scan_heap to perform all required heap pruning, index
@@ -542,13 +542,33 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
/*
* Prepare to update rel's pg_class entry.
*
- * In principle new_live_tuples could be -1 indicating that we (still)
- * don't know the tuple count. In practice that probably can't happen,
- * since we'd surely have scanned some pages if the table is new and
- * nonempty.
- *
+ * Aggressive VACUUMs must always be able to advance relfrozenxid to a
+ * value >= FreezeLimit, and relminmxid to a value >= MultiXactCutoff.
+ * Non-aggressive VACUUMs may advance them by any amount, or not at all.
+ */
+ Assert(vacrel->NewRelfrozenXid == OldestXmin ||
+ TransactionIdPrecedesOrEquals(aggressive ? FreezeLimit :
+ vacrel->relfrozenxid,
+ vacrel->NewRelfrozenXid));
+ Assert(vacrel->NewRelminMxid == OldestMxact ||
+ MultiXactIdPrecedesOrEquals(aggressive ? MultiXactCutoff :
+ vacrel->relminmxid,
+ vacrel->NewRelminMxid));
+ if (vacrel->scanned_pages + vacrel->frozenskipped_pages < orig_rel_pages)
+ {
+ /*
+ * Must keep original relfrozenxid in a non-aggressive VACUUM that
+ * had to skip an all-visible page. The state that tracks new
+ * values will have missed unfrozen XIDs from the pages we skipped.
+ */
+ Assert(!aggressive);
+ vacrel->NewRelfrozenXid = InvalidTransactionId;
+ vacrel->NewRelminMxid = InvalidMultiXactId;
+ }
+
+ /*
* For safety, clamp relallvisible to be not more than what we're setting
- * relpages to.
+ * pg_class.relpages to
*/
new_rel_pages = vacrel->rel_pages; /* After possible rel truncation */
visibilitymap_count(rel, &new_rel_allvisible, NULL);
@@ -558,33 +578,14 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
/*
* Now actually update rel's pg_class entry.
*
- * Aggressive VACUUM must reliably advance relfrozenxid (and relminmxid).
- * We are able to advance relfrozenxid in a non-aggressive VACUUM too,
- * provided we didn't skip any all-visible (not all-frozen) pages using
- * the visibility map, and assuming that we didn't fail to get a cleanup
- * lock that made it unsafe with respect to FreezeLimit (or perhaps our
- * MultiXactCutoff) established for VACUUM operation.
+ * In principle new_live_tuples could be -1 indicating that we (still)
+ * don't know the tuple count. In practice that can't happen, since we
+ * scan every page that isn't skipped using the visibility map.
*/
- if (vacrel->scanned_pages + vacrel->frozenskipped_pages < orig_rel_pages ||
- !vacrel->freeze_cutoffs_valid)
- {
- /* Cannot advance relfrozenxid/relminmxid */
- Assert(!aggressive);
- frozenxid_updated = minmulti_updated = false;
- vac_update_relstats(rel, new_rel_pages, vacrel->new_live_tuples,
- new_rel_allvisible, vacrel->nindexes > 0,
- InvalidTransactionId, InvalidMultiXactId,
- NULL, NULL, false);
- }
- else
- {
- Assert(vacrel->scanned_pages + vacrel->frozenskipped_pages ==
- orig_rel_pages);
- vac_update_relstats(rel, new_rel_pages, vacrel->new_live_tuples,
- new_rel_allvisible, vacrel->nindexes > 0,
- FreezeLimit, MultiXactCutoff,
- &frozenxid_updated, &minmulti_updated, false);
- }
+ vac_update_relstats(rel, new_rel_pages, vacrel->new_live_tuples,
+ new_rel_allvisible, vacrel->nindexes > 0,
+ vacrel->NewRelfrozenXid, vacrel->NewRelminMxid,
+ &frozenxid_updated, &minmulti_updated, false);
/*
* Report results to the stats collector, too.
@@ -692,17 +693,17 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
OldestXmin, diff);
if (frozenxid_updated)
{
- diff = (int32) (FreezeLimit - vacrel->relfrozenxid);
+ diff = (int32) (vacrel->NewRelfrozenXid - vacrel->relfrozenxid);
appendStringInfo(&buf,
_("new relfrozenxid: %u, which is %d xids ahead of previous value\n"),
- FreezeLimit, diff);
+ vacrel->NewRelfrozenXid, diff);
}
if (minmulti_updated)
{
- diff = (int32) (MultiXactCutoff - vacrel->relminmxid);
+ diff = (int32) (vacrel->NewRelminMxid - vacrel->relminmxid);
appendStringInfo(&buf,
_("new relminmxid: %u, which is %d mxids ahead of previous value\n"),
- MultiXactCutoff, diff);
+ vacrel->NewRelminMxid, diff);
}
if (orig_rel_pages > 0)
{
@@ -1582,6 +1583,8 @@ lazy_scan_prune(LVRelState *vacrel,
recently_dead_tuples;
int nnewlpdead;
int nfrozen;
+ TransactionId NewRelfrozenXid;
+ MultiXactId NewRelminMxid;
OffsetNumber deadoffsets[MaxHeapTuplesPerPage];
xl_heap_freeze_tuple frozen[MaxHeapTuplesPerPage];
@@ -1591,7 +1594,9 @@ lazy_scan_prune(LVRelState *vacrel,
retry:
- /* Initialize (or reset) page-level counters */
+ /* Initialize (or reset) page-level state */
+ NewRelfrozenXid = vacrel->NewRelfrozenXid;
+ NewRelminMxid = vacrel->NewRelminMxid;
tuples_deleted = 0;
lpdead_items = 0;
live_tuples = 0;
@@ -1798,8 +1803,8 @@ retry:
vacrel->relminmxid,
vacrel->FreezeLimit,
vacrel->MultiXactCutoff,
- &frozen[nfrozen],
- &tuple_totally_frozen))
+ &frozen[nfrozen], &tuple_totally_frozen,
+ &NewRelfrozenXid, &NewRelminMxid))
{
/* Will execute freeze below */
frozen[nfrozen++].offset = offnum;
@@ -1813,13 +1818,16 @@ retry:
prunestate->all_frozen = false;
}
+ vacrel->offnum = InvalidOffsetNumber;
+
/*
* We have now divided every item on the page into either an LP_DEAD item
* that will need to be vacuumed in indexes later, or a LP_NORMAL tuple
* that remains and needs to be considered for freezing now (LP_UNUSED and
* LP_REDIRECT items also remain, but are of no further interest to us).
*/
- vacrel->offnum = InvalidOffsetNumber;
+ vacrel->NewRelfrozenXid = NewRelfrozenXid;
+ vacrel->NewRelminMxid = NewRelminMxid;
/*
* Consider the need to freeze any items with tuple storage from the page
@@ -1969,6 +1977,8 @@ lazy_scan_noprune(LVRelState *vacrel,
recently_dead_tuples,
missed_dead_tuples;
HeapTupleHeader tupleheader;
+ TransactionId NewRelfrozenXid = vacrel->NewRelfrozenXid;
+ MultiXactId NewRelminMxid = vacrel->NewRelminMxid;
OffsetNumber deadoffsets[MaxHeapTuplesPerPage];
Assert(BufferGetBlockNumber(buf) == blkno);
@@ -2013,22 +2023,37 @@ lazy_scan_noprune(LVRelState *vacrel,
*hastup = true; /* page prevents rel truncation */
tupleheader = (HeapTupleHeader) PageGetItem(page, itemid);
- if (heap_tuple_needs_freeze(tupleheader,
+ if (heap_tuple_would_freeze(tupleheader,
vacrel->FreezeLimit,
- vacrel->MultiXactCutoff))
+ vacrel->MultiXactCutoff,
+ &NewRelfrozenXid, &NewRelminMxid))
{
+ /* Tuple with XID < FreezeLimit (or MXID < MultiXactCutoff) */
if (vacrel->aggressive)
{
- /* Going to have to get cleanup lock for lazy_scan_prune */
+ /*
+ * Aggressive VACUUMs must always be able to advance rel's
+ * relfrozenxid to a value >= FreezeLimit (and be able to
+ * advance rel's relminmxid to a value >= MultiXactCutoff).
+ * The ongoing aggressive VACUUM won't be able to do that
+ * unless it can freeze an XID (or XMID) from this tuple now.
+ *
+ * The only safe option is to have caller perform processing
+ * of this page using lazy_scan_prune. Caller might have to
+ * wait a while for a cleanup lock, but it can't be helped.
+ */
vacrel->offnum = InvalidOffsetNumber;
return false;
}
/*
- * Current non-aggressive VACUUM operation definitely won't be
- * able to advance relfrozenxid or relminmxid
+ * Non-aggressive VACUUMs are under no obligation to advance
+ * relfrozenxid (even by one XID). We can be much laxer here.
+ *
+ * Currently we always just accept an older final relfrozenxid
+ * and/or relminmxid value. We never make caller wait or work a
+ * little harder, even when it likely makes sense to do so.
*/
- vacrel->freeze_cutoffs_valid = false;
}
ItemPointerSet(&(tuple.t_self), blkno, offnum);
@@ -2078,9 +2103,14 @@ lazy_scan_noprune(LVRelState *vacrel,
vacrel->offnum = InvalidOffsetNumber;
/*
- * Now save details of the LP_DEAD items from the page in vacrel (though
- * only when VACUUM uses two-pass strategy)
+ * By here we know for sure that caller can put off freezing and pruning
+ * this particular page until the next VACUUM. Remember its details now.
+ * (lazy_scan_prune expects a clean slate, so we have to do this last.)
*/
+ vacrel->NewRelfrozenXid = NewRelfrozenXid;
+ vacrel->NewRelminMxid = NewRelminMxid;
+
+ /* Save any LP_DEAD items found on the page in dead_items array */
if (vacrel->nindexes == 0)
{
/* Using one-pass strategy (since table has no indexes) */
diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c
index cd19e35319e..322d6bb2f18 100644
--- a/src/backend/commands/cluster.c
+++ b/src/backend/commands/cluster.c
@@ -806,9 +806,10 @@ copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
Form_pg_class relform;
TupleDesc oldTupDesc PG_USED_FOR_ASSERTS_ONLY;
TupleDesc newTupDesc PG_USED_FOR_ASSERTS_ONLY;
- TransactionId OldestXmin;
- TransactionId FreezeXid;
- MultiXactId MultiXactCutoff;
+ TransactionId OldestXmin,
+ FreezeXid;
+ MultiXactId OldestMxact,
+ MultiXactCutoff;
bool use_sort;
double num_tuples = 0,
tups_vacuumed = 0,
@@ -896,8 +897,8 @@ copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
* Since we're going to rewrite the whole table anyway, there's no reason
* not to be aggressive about this.
*/
- vacuum_set_xid_limits(OldHeap, 0, 0, 0, 0,
- &OldestXmin, &FreezeXid, &MultiXactCutoff);
+ vacuum_set_xid_limits(OldHeap, 0, 0, 0, 0, &OldestXmin, &OldestMxact,
+ &FreezeXid, &MultiXactCutoff);
/*
* FreezeXid will become the table's new relfrozenxid, and that mustn't go
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index 50a4a612e58..deec4887bec 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -945,14 +945,22 @@ get_all_vacuum_rels(int options)
* The output parameters are:
* - oldestXmin is the Xid below which tuples deleted by any xact (that
* committed) should be considered DEAD, not just RECENTLY_DEAD.
- * - freezeLimit is the Xid below which all Xids are replaced by
- * FrozenTransactionId during vacuum.
- * - multiXactCutoff is the value below which all MultiXactIds are removed
- * from Xmax.
+ * - oldestMxact is the Mxid below which MultiXacts are definitely not
+ * seen as visible by any running transaction.
+ * - freezeLimit is the Xid below which all Xids are definitely replaced by
+ * FrozenTransactionId during aggressive vacuums.
+ * - multiXactCutoff is the value below which all MultiXactIds are definitely
+ * removed from Xmax during aggressive vacuums.
*
* Return value indicates if vacuumlazy.c caller should make its VACUUM
* operation aggressive. An aggressive VACUUM must advance relfrozenxid up to
- * FreezeLimit, and relminmxid up to multiXactCutoff.
+ * FreezeLimit (at a minimum), and relminmxid up to multiXactCutoff (at a
+ * minimum).
+ *
+ * oldestXmin and oldestMxact are the most recent values that can ever be
+ * passed to vac_update_relstats() as frozenxid and minmulti arguments by our
+ * vacuumlazy.c caller later on. These values should be passed when it turns
+ * out that VACUUM will leave no unfrozen XIDs/XMIDs behind in the table.
*/
bool
vacuum_set_xid_limits(Relation rel,
@@ -961,6 +969,7 @@ vacuum_set_xid_limits(Relation rel,
int multixact_freeze_min_age,
int multixact_freeze_table_age,
TransactionId *oldestXmin,
+ MultiXactId *oldestMxact,
TransactionId *freezeLimit,
MultiXactId *multiXactCutoff)
{
@@ -969,7 +978,6 @@ vacuum_set_xid_limits(Relation rel,
int effective_multixact_freeze_max_age;
TransactionId limit;
TransactionId safeLimit;
- MultiXactId oldestMxact;
MultiXactId mxactLimit;
MultiXactId safeMxactLimit;
int freezetable;
@@ -1065,9 +1073,11 @@ vacuum_set_xid_limits(Relation rel,
effective_multixact_freeze_max_age / 2);
Assert(mxid_freezemin >= 0);
+ /* Remember for caller */
+ *oldestMxact = GetOldestMultiXactId();
+
/* compute the cutoff multi, being careful to generate a valid value */
- oldestMxact = GetOldestMultiXactId();
- mxactLimit = oldestMxact - mxid_freezemin;
+ mxactLimit = *oldestMxact - mxid_freezemin;
if (mxactLimit < FirstMultiXactId)
mxactLimit = FirstMultiXactId;
@@ -1082,8 +1092,8 @@ vacuum_set_xid_limits(Relation rel,
(errmsg("oldest multixact is far in the past"),
errhint("Close open transactions with multixacts soon to avoid wraparound problems.")));
/* Use the safe limit, unless an older mxact is still running */
- if (MultiXactIdPrecedes(oldestMxact, safeMxactLimit))
- mxactLimit = oldestMxact;
+ if (MultiXactIdPrecedes(*oldestMxact, safeMxactLimit))
+ mxactLimit = *oldestMxact;
else
mxactLimit = safeMxactLimit;
}
@@ -1390,12 +1400,9 @@ vac_update_relstats(Relation relation,
* Update relfrozenxid, unless caller passed InvalidTransactionId
* indicating it has no new data.
*
- * Ordinarily, we don't let relfrozenxid go backwards: if things are
- * working correctly, the only way the new frozenxid could be older would
- * be if a previous VACUUM was done with a tighter freeze_min_age, in
- * which case we don't want to forget the work it already did. However,
- * if the stored relfrozenxid is "in the future", then it must be corrupt
- * and it seems best to overwrite it with the cutoff we used this time.
+ * Ordinarily, we don't let relfrozenxid go backwards. However, if the
+ * stored relfrozenxid is "in the future" then it seems best to assume
+ * it's corrupt, and overwrite with the oldest remaining XID in the table.
* This should match vac_update_datfrozenxid() concerning what we consider
* to be "in the future".
*/
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index b46ab7d7390..4403f01e130 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -167,8 +167,10 @@ extern void heap_inplace_update(Relation relation, HeapTuple tuple);
extern bool heap_freeze_tuple(HeapTupleHeader tuple,
TransactionId relfrozenxid, TransactionId relminmxid,
TransactionId cutoff_xid, TransactionId cutoff_multi);
-extern bool heap_tuple_needs_freeze(HeapTupleHeader tuple, TransactionId cutoff_xid,
- MultiXactId cutoff_multi);
+extern bool heap_tuple_would_freeze(HeapTupleHeader tuple, TransactionId cutoff_xid,
+ MultiXactId cutoff_multi,
+ TransactionId *relfrozenxid_out,
+ MultiXactId *relminmxid_out);
extern bool heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple);
extern void simple_heap_insert(Relation relation, HeapTuple tup);
diff --git a/src/include/access/heapam_xlog.h b/src/include/access/heapam_xlog.h
index 5c47fdcec80..2d8a7f62706 100644
--- a/src/include/access/heapam_xlog.h
+++ b/src/include/access/heapam_xlog.h
@@ -410,7 +410,9 @@ extern bool heap_prepare_freeze_tuple(HeapTupleHeader tuple,
TransactionId cutoff_xid,
TransactionId cutoff_multi,
xl_heap_freeze_tuple *frz,
- bool *totally_frozen);
+ bool *totally_frozen,
+ TransactionId *relfrozenxid_out,
+ MultiXactId *relminmxid_out);
extern void heap_execute_freeze_tuple(HeapTupleHeader tuple,
xl_heap_freeze_tuple *xlrec_tp);
extern XLogRecPtr log_heap_visible(RelFileNode rnode, Buffer heap_buffer,
diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h
index d64f6268f23..ead88edda76 100644
--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -291,6 +291,7 @@ extern bool vacuum_set_xid_limits(Relation rel,
int multixact_freeze_min_age,
int multixact_freeze_table_age,
TransactionId *oldestXmin,
+ MultiXactId *oldestMxact,
TransactionId *freezeLimit,
MultiXactId *multiXactCutoff);
extern bool vacuum_xid_failsafe_check(TransactionId relfrozenxid,
diff --git a/src/test/isolation/expected/vacuum-no-cleanup-lock.out b/src/test/isolation/expected/vacuum-no-cleanup-lock.out
new file mode 100644
index 00000000000..f7bc93e8f1c
--- /dev/null
+++ b/src/test/isolation/expected/vacuum-no-cleanup-lock.out
@@ -0,0 +1,189 @@
+Parsed test spec with 4 sessions
+
+starting permutation: vacuumer_pg_class_stats dml_insert vacuumer_nonaggressive_vacuum vacuumer_pg_class_stats
+step vacuumer_pg_class_stats:
+ SELECT relpages, reltuples FROM pg_class WHERE oid = 'smalltbl'::regclass;
+
+relpages|reltuples
+--------+---------
+ 1| 20
+(1 row)
+
+step dml_insert:
+ INSERT INTO smalltbl SELECT max(id) + 1 FROM smalltbl;
+
+step vacuumer_nonaggressive_vacuum:
+ VACUUM smalltbl;
+
+step vacuumer_pg_class_stats:
+ SELECT relpages, reltuples FROM pg_class WHERE oid = 'smalltbl'::regclass;
+
+relpages|reltuples
+--------+---------
+ 1| 21
+(1 row)
+
+
+starting permutation: vacuumer_pg_class_stats dml_insert pinholder_cursor vacuumer_nonaggressive_vacuum vacuumer_pg_class_stats pinholder_commit
+step vacuumer_pg_class_stats:
+ SELECT relpages, reltuples FROM pg_class WHERE oid = 'smalltbl'::regclass;
+
+relpages|reltuples
+--------+---------
+ 1| 20
+(1 row)
+
+step dml_insert:
+ INSERT INTO smalltbl SELECT max(id) + 1 FROM smalltbl;
+
+step pinholder_cursor:
+ BEGIN;
+ DECLARE c1 CURSOR FOR SELECT 1 AS dummy FROM smalltbl;
+ FETCH NEXT FROM c1;
+
+dummy
+-----
+ 1
+(1 row)
+
+step vacuumer_nonaggressive_vacuum:
+ VACUUM smalltbl;
+
+step vacuumer_pg_class_stats:
+ SELECT relpages, reltuples FROM pg_class WHERE oid = 'smalltbl'::regclass;
+
+relpages|reltuples
+--------+---------
+ 1| 21
+(1 row)
+
+step pinholder_commit:
+ COMMIT;
+
+
+starting permutation: vacuumer_pg_class_stats pinholder_cursor dml_insert dml_delete dml_insert vacuumer_nonaggressive_vacuum vacuumer_pg_class_stats pinholder_commit
+step vacuumer_pg_class_stats:
+ SELECT relpages, reltuples FROM pg_class WHERE oid = 'smalltbl'::regclass;
+
+relpages|reltuples
+--------+---------
+ 1| 20
+(1 row)
+
+step pinholder_cursor:
+ BEGIN;
+ DECLARE c1 CURSOR FOR SELECT 1 AS dummy FROM smalltbl;
+ FETCH NEXT FROM c1;
+
+dummy
+-----
+ 1
+(1 row)
+
+step dml_insert:
+ INSERT INTO smalltbl SELECT max(id) + 1 FROM smalltbl;
+
+step dml_delete:
+ DELETE FROM smalltbl WHERE id = (SELECT min(id) FROM smalltbl);
+
+step dml_insert:
+ INSERT INTO smalltbl SELECT max(id) + 1 FROM smalltbl;
+
+step vacuumer_nonaggressive_vacuum:
+ VACUUM smalltbl;
+
+step vacuumer_pg_class_stats:
+ SELECT relpages, reltuples FROM pg_class WHERE oid = 'smalltbl'::regclass;
+
+relpages|reltuples
+--------+---------
+ 1| 21
+(1 row)
+
+step pinholder_commit:
+ COMMIT;
+
+
+starting permutation: vacuumer_pg_class_stats dml_insert dml_delete pinholder_cursor dml_insert vacuumer_nonaggressive_vacuum vacuumer_pg_class_stats pinholder_commit
+step vacuumer_pg_class_stats:
+ SELECT relpages, reltuples FROM pg_class WHERE oid = 'smalltbl'::regclass;
+
+relpages|reltuples
+--------+---------
+ 1| 20
+(1 row)
+
+step dml_insert:
+ INSERT INTO smalltbl SELECT max(id) + 1 FROM smalltbl;
+
+step dml_delete:
+ DELETE FROM smalltbl WHERE id = (SELECT min(id) FROM smalltbl);
+
+step pinholder_cursor:
+ BEGIN;
+ DECLARE c1 CURSOR FOR SELECT 1 AS dummy FROM smalltbl;
+ FETCH NEXT FROM c1;
+
+dummy
+-----
+ 1
+(1 row)
+
+step dml_insert:
+ INSERT INTO smalltbl SELECT max(id) + 1 FROM smalltbl;
+
+step vacuumer_nonaggressive_vacuum:
+ VACUUM smalltbl;
+
+step vacuumer_pg_class_stats:
+ SELECT relpages, reltuples FROM pg_class WHERE oid = 'smalltbl'::regclass;
+
+relpages|reltuples
+--------+---------
+ 1| 21
+(1 row)
+
+step pinholder_commit:
+ COMMIT;
+
+
+starting permutation: dml_begin dml_other_begin dml_key_share dml_other_key_share vacuumer_nonaggressive_vacuum pinholder_cursor dml_other_update dml_commit dml_other_commit vacuumer_nonaggressive_vacuum pinholder_commit vacuumer_nonaggressive_vacuum
+step dml_begin: BEGIN;
+step dml_other_begin: BEGIN;
+step dml_key_share: SELECT id FROM smalltbl WHERE id = 3 FOR KEY SHARE;
+id
+--
+ 3
+(1 row)
+
+step dml_other_key_share: SELECT id FROM smalltbl WHERE id = 3 FOR KEY SHARE;
+id
+--
+ 3
+(1 row)
+
+step vacuumer_nonaggressive_vacuum:
+ VACUUM smalltbl;
+
+step pinholder_cursor:
+ BEGIN;
+ DECLARE c1 CURSOR FOR SELECT 1 AS dummy FROM smalltbl;
+ FETCH NEXT FROM c1;
+
+dummy
+-----
+ 1
+(1 row)
+
+step dml_other_update: UPDATE smalltbl SET t = 'u' WHERE id = 3;
+step dml_commit: COMMIT;
+step dml_other_commit: COMMIT;
+step vacuumer_nonaggressive_vacuum:
+ VACUUM smalltbl;
+
+step pinholder_commit:
+ COMMIT;
+
+step vacuumer_nonaggressive_vacuum:
+ VACUUM smalltbl;
+
diff --git a/src/test/isolation/expected/vacuum-reltuples.out b/src/test/isolation/expected/vacuum-reltuples.out
deleted file mode 100644
index ce55376e7f2..00000000000
--- a/src/test/isolation/expected/vacuum-reltuples.out
+++ /dev/null
@@ -1,67 +0,0 @@
-Parsed test spec with 2 sessions
-
-starting permutation: modify vac stats
-step modify:
- insert into smalltbl select max(id)+1 from smalltbl;
-
-step vac:
- vacuum smalltbl;
-
-step stats:
- select relpages, reltuples from pg_class
- where oid='smalltbl'::regclass;
-
-relpages|reltuples
---------+---------
- 1| 21
-(1 row)
-
-
-starting permutation: modify open fetch1 vac close stats
-step modify:
- insert into smalltbl select max(id)+1 from smalltbl;
-
-step open:
- begin;
- declare c1 cursor for select 1 as dummy from smalltbl;
-
-step fetch1:
- fetch next from c1;
-
-dummy
------
- 1
-(1 row)
-
-step vac:
- vacuum smalltbl;
-
-step close:
- commit;
-
-step stats:
- select relpages, reltuples from pg_class
- where oid='smalltbl'::regclass;
-
-relpages|reltuples
---------+---------
- 1| 21
-(1 row)
-
-
-starting permutation: modify vac stats
-step modify:
- insert into smalltbl select max(id)+1 from smalltbl;
-
-step vac:
- vacuum smalltbl;
-
-step stats:
- select relpages, reltuples from pg_class
- where oid='smalltbl'::regclass;
-
-relpages|reltuples
---------+---------
- 1| 21
-(1 row)
-
diff --git a/src/test/isolation/isolation_schedule b/src/test/isolation/isolation_schedule
index 00749a40bdd..a48caae228e 100644
--- a/src/test/isolation/isolation_schedule
+++ b/src/test/isolation/isolation_schedule
@@ -84,7 +84,7 @@ test: alter-table-4
test: create-trigger
test: sequence-ddl
test: async-notify
-test: vacuum-reltuples
+test: vacuum-no-cleanup-lock
test: timeouts
test: vacuum-concurrent-drop
test: vacuum-conflict
diff --git a/src/test/isolation/specs/vacuum-no-cleanup-lock.spec b/src/test/isolation/specs/vacuum-no-cleanup-lock.spec
new file mode 100644
index 00000000000..a88be66de5f
--- /dev/null
+++ b/src/test/isolation/specs/vacuum-no-cleanup-lock.spec
@@ -0,0 +1,150 @@
+# Test for vacuum's reduced processing of heap pages (used for any heap page
+# where a cleanup lock isn't immediately available)
+#
+# Debugging tip: Change VACUUM to VACUUM VERBOSE to get feedback on what's
+# really going on
+
+# Use name type here to avoid TOAST table:
+setup
+{
+ CREATE TABLE smalltbl AS SELECT i AS id, 't'::name AS t FROM generate_series(1,20) i;
+ ALTER TABLE smalltbl SET (autovacuum_enabled = off);
+ ALTER TABLE smalltbl ADD PRIMARY KEY (id);
+}
+setup
+{
+ VACUUM ANALYZE smalltbl;
+}
+
+teardown
+{
+ DROP TABLE smalltbl;
+}
+
+# This session holds a pin on smalltbl's only heap page:
+session pinholder
+step pinholder_cursor
+{
+ BEGIN;
+ DECLARE c1 CURSOR FOR SELECT 1 AS dummy FROM smalltbl;
+ FETCH NEXT FROM c1;
+}
+step pinholder_commit
+{
+ COMMIT;
+}
+
+# This session inserts and deletes tuples, potentially affecting reltuples:
+session dml
+step dml_insert
+{
+ INSERT INTO smalltbl SELECT max(id) + 1 FROM smalltbl;
+}
+step dml_delete
+{
+ DELETE FROM smalltbl WHERE id = (SELECT min(id) FROM smalltbl);
+}
+step dml_begin { BEGIN; }
+step dml_key_share { SELECT id FROM smalltbl WHERE id = 3 FOR KEY SHARE; }
+step dml_commit { COMMIT; }
+
+# Needed for Multixact test:
+session dml_other
+step dml_other_begin { BEGIN; }
+step dml_other_key_share { SELECT id FROM smalltbl WHERE id = 3 FOR KEY SHARE; }
+step dml_other_update { UPDATE smalltbl SET t = 'u' WHERE id = 3; }
+step dml_other_commit { COMMIT; }
+
+# This session runs non-aggressive VACUUM, but with maximally aggressive
+# cutoffs for tuple freezing (e.g., FreezeLimit == OldestXmin):
+session vacuumer
+setup
+{
+ SET vacuum_freeze_min_age = 0;
+ SET vacuum_multixact_freeze_min_age = 0;
+}
+step vacuumer_nonaggressive_vacuum
+{
+ VACUUM smalltbl;
+}
+step vacuumer_pg_class_stats
+{
+ SELECT relpages, reltuples FROM pg_class WHERE oid = 'smalltbl'::regclass;
+}
+
+# Test VACUUM's reltuples counting mechanism.
+#
+# Final pg_class.reltuples should never be affected by VACUUM's inability to
+# get a cleanup lock on any page, except to the extent that any cleanup lock
+# contention changes the number of tuples that remain ("missed dead" tuples
+# are counted in reltuples, much like "recently dead" tuples).
+
+# Easy case:
+permutation
+ vacuumer_pg_class_stats # Start with 20 tuples
+ dml_insert
+ vacuumer_nonaggressive_vacuum
+ vacuumer_pg_class_stats # End with 21 tuples
+
+# Harder case -- count 21 tuples at the end (like last time), but with cleanup
+# lock contention this time:
+permutation
+ vacuumer_pg_class_stats # Start with 20 tuples
+ dml_insert
+ pinholder_cursor
+ vacuumer_nonaggressive_vacuum
+ vacuumer_pg_class_stats # End with 21 tuples
+ pinholder_commit # order doesn't matter
+
+# Same as "harder case", but vary the order, and delete an inserted row:
+permutation
+ vacuumer_pg_class_stats # Start with 20 tuples
+ pinholder_cursor
+ dml_insert
+ dml_delete
+ dml_insert
+ vacuumer_nonaggressive_vacuum
+ # reltuples is 21 here again -- "recently dead" tuple won't be included in
+ # count here:
+ vacuumer_pg_class_stats
+ pinholder_commit # order doesn't matter
+
+# Same as "harder case", but initial insert and delete before cursor:
+permutation
+ vacuumer_pg_class_stats # Start with 20 tuples
+ dml_insert
+ dml_delete
+ pinholder_cursor
+ dml_insert
+ vacuumer_nonaggressive_vacuum
+ # reltuples is 21 here again -- "missed dead" tuple ("recently dead" when
+ # concurrent activity held back VACUUM's OldestXmin) won't be included in
+ # count here:
+ vacuumer_pg_class_stats
+ pinholder_commit # order doesn't matter
+
+# Test VACUUM's mechanism for skipping MultiXact freezing.
+#
+# This provides test coverage for code paths that are only hit when we need to
+# freeze, but inability to acquire a cleanup lock on a heap page makes
+# freezing some XIDs/XMIDs < FreezeLimit/MultiXactCutoff impossible (without
+# waiting for a cleanup lock, which non-aggressive VACUUM is unwilling to do).
+permutation
+ dml_begin
+ dml_other_begin
+ dml_key_share
+ dml_other_key_share
+ # Will get cleanup lock, can't advance relminmxid yet:
+ # (though will usually advance relfrozenxid by ~2 XIDs)
+ vacuumer_nonaggressive_vacuum
+ pinholder_cursor
+ dml_other_update
+ dml_commit
+ dml_other_commit
+ # Can't cleanup lock, so still can't advance relminmxid here:
+ # (relfrozenxid held back by XIDs in MultiXact too)
+ vacuumer_nonaggressive_vacuum
+ pinholder_commit
+ # Pin was dropped, so will advance relminmxid, at long last:
+ # (ditto for relfrozenxid advancement)
+ vacuumer_nonaggressive_vacuum
diff --git a/src/test/isolation/specs/vacuum-reltuples.spec b/src/test/isolation/specs/vacuum-reltuples.spec
deleted file mode 100644
index a2a461f2f5d..00000000000
--- a/src/test/isolation/specs/vacuum-reltuples.spec
+++ /dev/null
@@ -1,49 +0,0 @@
-# Test for vacuum's handling of reltuples when pages are skipped due
-# to page pins. We absolutely need to avoid setting reltuples=0 in
-# such cases, since that interferes badly with planning.
-#
-# Expected result for all three permutation is 21 tuples, including
-# the second permutation. VACUUM is able to count the concurrently
-# inserted tuple in its final reltuples, even when a cleanup lock
-# cannot be acquired on the affected heap page.
-
-setup {
- create table smalltbl
- as select i as id from generate_series(1,20) i;
- alter table smalltbl set (autovacuum_enabled = off);
-}
-setup {
- vacuum analyze smalltbl;
-}
-
-teardown {
- drop table smalltbl;
-}
-
-session worker
-step open {
- begin;
- declare c1 cursor for select 1 as dummy from smalltbl;
-}
-step fetch1 {
- fetch next from c1;
-}
-step close {
- commit;
-}
-step stats {
- select relpages, reltuples from pg_class
- where oid='smalltbl'::regclass;
-}
-
-session vacuumer
-step vac {
- vacuum smalltbl;
-}
-step modify {
- insert into smalltbl select max(id)+1 from smalltbl;
-}
-
-permutation modify vac stats
-permutation modify open fetch1 vac close stats
-permutation modify vac stats