diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index e29c5ad0868..d20f0381f3b 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -2118,6 +2118,9 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, * If we can't see it, maybe no one else can either. At caller * request, check whether all chain members are dead to all * transactions. + * + * Note: if you change the criterion here for what is "dead", fix the + * planner's get_actual_variable_range() function to match. */ if (all_dead && *all_dead && !HeapTupleIsSurelyDead(heapTuple, RecentGlobalXmin)) diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 23e5526a8e1..81b0bc37d27 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -142,6 +142,7 @@ #include "utils/pg_locale.h" #include "utils/rel.h" #include "utils/selfuncs.h" +#include "utils/snapmgr.h" #include "utils/spccache.h" #include "utils/syscache.h" #include "utils/timestamp.h" @@ -5328,7 +5329,7 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata, HeapTuple tup; Datum values[INDEX_MAX_KEYS]; bool isnull[INDEX_MAX_KEYS]; - SnapshotData SnapshotDirty; + SnapshotData SnapshotNonVacuumable; estate = CreateExecutorState(); econtext = GetPerTupleExprContext(estate); @@ -5351,7 +5352,7 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata, slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRel)); econtext->ecxt_scantuple = slot; get_typlenbyval(vardata->atttype, &typLen, &typByVal); - InitDirtySnapshot(SnapshotDirty); + InitNonVacuumableSnapshot(SnapshotNonVacuumable, RecentGlobalXmin); /* set up an IS NOT NULL scan key so that we ignore nulls */ ScanKeyEntryInitialize(&scankeys[0], @@ -5373,17 +5374,29 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata, * active snapshot, which is the best approximation we've got * to what the query will see when executed. But that won't * be exact if a new snap is taken before running the query, - * and it can be very expensive if a lot of uncommitted rows - * exist at the end of the index (because we'll laboriously - * fetch each one and reject it). What seems like a good - * compromise is to use SnapshotDirty. That will accept - * uncommitted rows, and thus avoid fetching multiple heap - * tuples in this scenario. On the other hand, it will reject - * known-dead rows, and thus not give a bogus answer when the - * extreme value has been deleted; that case motivates not - * using SnapshotAny here. + * and it can be very expensive if a lot of recently-dead or + * uncommitted rows exist at the beginning or end of the index + * (because we'll laboriously fetch each one and reject it). + * Instead, we use SnapshotNonVacuumable. That will accept + * recently-dead and uncommitted rows as well as normal + * visible rows. On the other hand, it will reject known-dead + * rows, and thus not give a bogus answer when the extreme + * value has been deleted (unless the deletion was quite + * recent); that case motivates not using SnapshotAny here. + * + * A crucial point here is that SnapshotNonVacuumable, with + * RecentGlobalXmin as horizon, yields the inverse of the + * condition that the indexscan will use to decide that index + * entries are killable (see heap_hot_search_buffer()). + * Therefore, if the snapshot rejects a tuple and we have to + * continue scanning past it, we know that the indexscan will + * mark that index entry killed. That means that the next + * get_actual_variable_range() call will not have to visit + * that heap entry. In this way we avoid repetitive work when + * this function is used a lot during planning. */ - index_scan = index_beginscan(heapRel, indexRel, &SnapshotDirty, + index_scan = index_beginscan(heapRel, indexRel, + &SnapshotNonVacuumable, 1, 0); index_rescan(index_scan, scankeys, 1, NULL, 0); @@ -5415,7 +5428,8 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata, /* If max is requested, and we didn't find the index is empty */ if (max && have_data) { - index_scan = index_beginscan(heapRel, indexRel, &SnapshotDirty, + index_scan = index_beginscan(heapRel, indexRel, + &SnapshotNonVacuumable, 1, 0); index_rescan(index_scan, scankeys, 1, NULL, 0); diff --git a/src/backend/utils/time/tqual.c b/src/backend/utils/time/tqual.c index f9da9e17f52..bbac4083c98 100644 --- a/src/backend/utils/time/tqual.c +++ b/src/backend/utils/time/tqual.c @@ -45,6 +45,8 @@ * like HeapTupleSatisfiesSelf(), but includes open transactions * HeapTupleSatisfiesVacuum() * visible to any running transaction, used by VACUUM + * HeapTupleSatisfiesNonVacuumable() + * Snapshot-style API for HeapTupleSatisfiesVacuum * HeapTupleSatisfiesToast() * visible unless part of interrupted vacuum, used for TOAST * HeapTupleSatisfiesAny() @@ -1392,6 +1394,26 @@ HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin, return HEAPTUPLE_DEAD; } + +/* + * HeapTupleSatisfiesNonVacuumable + * + * True if tuple might be visible to some transaction; false if it's + * surely dead to everyone, ie, vacuumable. + * + * This is an interface to HeapTupleSatisfiesVacuum that meets the + * SnapshotSatisfiesFunc API, so it can be used through a Snapshot. + * snapshot->xmin must have been set up with the xmin horizon to use. + */ +bool +HeapTupleSatisfiesNonVacuumable(HeapTuple htup, Snapshot snapshot, + Buffer buffer) +{ + return HeapTupleSatisfiesVacuum(htup, snapshot->xmin, buffer) + != HEAPTUPLE_DEAD; +} + + /* * HeapTupleIsSurelyDead * diff --git a/src/include/utils/snapshot.h b/src/include/utils/snapshot.h index 074cc818649..bf519778df4 100644 --- a/src/include/utils/snapshot.h +++ b/src/include/utils/snapshot.h @@ -41,6 +41,7 @@ typedef bool (*SnapshotSatisfiesFunc) (HeapTuple htup, * * MVCC snapshots taken during recovery (in Hot-Standby mode) * * Historic MVCC snapshots used during logical decoding * * snapshots passed to HeapTupleSatisfiesDirty() + * * snapshots passed to HeapTupleSatisfiesNonVacuumable() * * snapshots used for SatisfiesAny, Toast, Self where no members are * accessed. * @@ -56,7 +57,8 @@ typedef struct SnapshotData /* * The remaining fields are used only for MVCC snapshots, and are normally * just zeroes in special snapshots. (But xmin and xmax are used - * specially by HeapTupleSatisfiesDirty.) + * specially by HeapTupleSatisfiesDirty, and xmin is used specially by + * HeapTupleSatisfiesNonVacuumable.) * * An MVCC snapshot can never see the effects of XIDs >= xmax. It can see * the effects of all older XIDs except those listed in the snapshot. xmin diff --git a/src/include/utils/tqual.h b/src/include/utils/tqual.h index 036d9898d69..9a3b56e5f03 100644 --- a/src/include/utils/tqual.h +++ b/src/include/utils/tqual.h @@ -66,6 +66,8 @@ extern bool HeapTupleSatisfiesToast(HeapTuple htup, Snapshot snapshot, Buffer buffer); extern bool HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot, Buffer buffer); +extern bool HeapTupleSatisfiesNonVacuumable(HeapTuple htup, + Snapshot snapshot, Buffer buffer); extern bool HeapTupleSatisfiesHistoricMVCC(HeapTuple htup, Snapshot snapshot, Buffer buffer); @@ -100,6 +102,14 @@ extern bool ResolveCminCmaxDuringDecoding(struct HTAB *tuplecid_data, #define InitDirtySnapshot(snapshotdata) \ ((snapshotdata).satisfies = HeapTupleSatisfiesDirty) +/* + * Similarly, some initialization is required for a NonVacuumable snapshot. + * The caller must supply the xmin horizon to use (e.g., RecentGlobalXmin). + */ +#define InitNonVacuumableSnapshot(snapshotdata, xmin_horizon) \ + ((snapshotdata).satisfies = HeapTupleSatisfiesNonVacuumable, \ + (snapshotdata).xmin = (xmin_horizon)) + /* * Similarly, some initialization is required for SnapshotToast. We need * to set lsn and whenTaken correctly to support snapshot_too_old.