mirror of
https://github.com/postgres/postgres.git
synced 2025-10-21 02:52:47 +03:00
snapshot scalability: Don't compute global horizons while building snapshots.
To make GetSnapshotData() more scalable, it cannot not look at at each proc's xmin: While snapshot contents do not need to change whenever a read-only transaction commits or a snapshot is released, a proc's xmin is modified in those cases. The frequency of xmin modifications leads to, particularly on higher core count systems, many cache misses inside GetSnapshotData(), despite the data underlying a snapshot not changing. That is the most significant source of GetSnapshotData() scaling poorly on larger systems. Without accessing xmins, GetSnapshotData() cannot calculate accurate horizons / thresholds as it has so far. But we don't really have to: The horizons don't actually change that much between GetSnapshotData() calls. Nor are the horizons actually used every time a snapshot is built. The trick this commit introduces is to delay computation of accurate horizons until there use and using horizon boundaries to determine whether accurate horizons need to be computed. The use of RecentGlobal[Data]Xmin to decide whether a row version could be removed has been replaces with new GlobalVisTest* functions. These use two thresholds to determine whether a row can be pruned: 1) definitely_needed, indicating that rows deleted by XIDs >= definitely_needed are definitely still visible. 2) maybe_needed, indicating that rows deleted by XIDs < maybe_needed can definitely be removed GetSnapshotData() updates definitely_needed to be the xmin of the computed snapshot. When testing whether a row can be removed (with GlobalVisTestIsRemovableXid()) and the tested XID falls in between the two (i.e. XID >= maybe_needed && XID < definitely_needed) the boundaries can be recomputed to be more accurate. As it is not cheap to compute accurate boundaries, we limit the number of times that happens in short succession. As the boundaries used by GlobalVisTestIsRemovableXid() are never reset (with maybe_needed updated by GetSnapshotData()), it is likely that further test can benefit from an earlier computation of accurate horizons. To avoid regressing performance when old_snapshot_threshold is set (as that requires an accurate horizon to be computed), heap_page_prune_opt() doesn't unconditionally call TransactionIdLimitedForOldSnapshots() anymore. Both the computation of the limited horizon, and the triggering of errors (with SetOldSnapshotThresholdTimestamp()) is now only done when necessary to remove tuples. This commit just removes the accesses to PGXACT->xmin from GetSnapshotData(), but other members of PGXACT residing in the same cache line are accessed. Therefore this in itself does not result in a significant improvement. Subsequent commits will take advantage of the fact that GetSnapshotData() now does not need to access xmins anymore. Note: This contains a workaround in heap_page_prune_opt() to keep the snapshot_too_old tests working. While that workaround is ugly, the tests currently are not meaningful, and it seems best to address them separately. Author: Andres Freund <andres@anarazel.de> Reviewed-By: Robert Haas <robertmhaas@gmail.com> Reviewed-By: Thomas Munro <thomas.munro@gmail.com> Reviewed-By: David Rowley <dgrowleyml@gmail.com> Discussion: https://postgr.es/m/20200301083601.ews6hz5dduc3w2se@alap3.anarazel.de
This commit is contained in:
@@ -12,6 +12,7 @@
|
||||
|
||||
#include "access/transam.h"
|
||||
#include "storage/block.h"
|
||||
#include "storage/bufpage.h"
|
||||
#include "storage/itemptr.h"
|
||||
#include "storage/off.h"
|
||||
|
||||
@@ -134,8 +135,7 @@ typedef struct GinMetaPageData
|
||||
*/
|
||||
#define GinPageGetDeleteXid(page) ( ((PageHeader) (page))->pd_prune_xid )
|
||||
#define GinPageSetDeleteXid(page, xid) ( ((PageHeader) (page))->pd_prune_xid = xid)
|
||||
#define GinPageIsRecyclable(page) ( PageIsNew(page) || (GinPageIsDeleted(page) \
|
||||
&& TransactionIdPrecedes(GinPageGetDeleteXid(page), RecentGlobalXmin)))
|
||||
extern bool GinPageIsRecyclable(Page page);
|
||||
|
||||
/*
|
||||
* We use our own ItemPointerGet(BlockNumber|OffsetNumber)
|
||||
|
@@ -172,9 +172,12 @@ extern TransactionId heap_compute_xid_horizon_for_tuples(Relation rel,
|
||||
int nitems);
|
||||
|
||||
/* in heap/pruneheap.c */
|
||||
struct GlobalVisState;
|
||||
extern void heap_page_prune_opt(Relation relation, Buffer buffer);
|
||||
extern int heap_page_prune(Relation relation, Buffer buffer,
|
||||
TransactionId OldestXmin,
|
||||
struct GlobalVisState *vistest,
|
||||
TransactionId limited_oldest_xmin,
|
||||
TimestampTz limited_oldest_ts,
|
||||
bool report_stats, TransactionId *latestRemovedXid);
|
||||
extern void heap_page_prune_execute(Buffer buffer,
|
||||
OffsetNumber *redirected, int nredirected,
|
||||
@@ -195,11 +198,14 @@ extern TM_Result HeapTupleSatisfiesUpdate(HeapTuple stup, CommandId curcid,
|
||||
Buffer buffer);
|
||||
extern HTSV_Result HeapTupleSatisfiesVacuum(HeapTuple stup, TransactionId OldestXmin,
|
||||
Buffer buffer);
|
||||
extern HTSV_Result HeapTupleSatisfiesVacuumHorizon(HeapTuple stup, Buffer buffer,
|
||||
TransactionId *dead_after);
|
||||
extern void HeapTupleSetHintBits(HeapTupleHeader tuple, Buffer buffer,
|
||||
uint16 infomask, TransactionId xid);
|
||||
extern bool HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple);
|
||||
extern bool XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot);
|
||||
extern bool HeapTupleIsSurelyDead(HeapTuple htup, TransactionId OldestXmin);
|
||||
extern bool HeapTupleIsSurelyDead(HeapTuple htup,
|
||||
struct GlobalVisState *vistest);
|
||||
|
||||
/*
|
||||
* To avoid leaking too much knowledge about reorderbuffer implementation
|
||||
|
@@ -95,15 +95,6 @@ FullTransactionIdFromU64(uint64 value)
|
||||
(dest) = FirstNormalTransactionId; \
|
||||
} while(0)
|
||||
|
||||
/* advance a FullTransactionId variable, stepping over special XIDs */
|
||||
static inline void
|
||||
FullTransactionIdAdvance(FullTransactionId *dest)
|
||||
{
|
||||
dest->value++;
|
||||
while (XidFromFullTransactionId(*dest) < FirstNormalTransactionId)
|
||||
dest->value++;
|
||||
}
|
||||
|
||||
/*
|
||||
* Retreat a FullTransactionId variable, stepping over xids that would appear
|
||||
* to be special only when viewed as 32bit XIDs.
|
||||
@@ -129,6 +120,23 @@ FullTransactionIdRetreat(FullTransactionId *dest)
|
||||
dest->value--;
|
||||
}
|
||||
|
||||
/*
|
||||
* Advance a FullTransactionId variable, stepping over xids that would appear
|
||||
* to be special only when viewed as 32bit XIDs.
|
||||
*/
|
||||
static inline void
|
||||
FullTransactionIdAdvance(FullTransactionId *dest)
|
||||
{
|
||||
dest->value++;
|
||||
|
||||
/* see FullTransactionIdAdvance() */
|
||||
if (FullTransactionIdPrecedes(*dest, FirstNormalFullTransactionId))
|
||||
return;
|
||||
|
||||
while (XidFromFullTransactionId(*dest) < FirstNormalTransactionId)
|
||||
dest->value++;
|
||||
}
|
||||
|
||||
/* back up a transaction ID variable, handling wraparound correctly */
|
||||
#define TransactionIdRetreat(dest) \
|
||||
do { \
|
||||
@@ -293,6 +301,59 @@ ReadNewTransactionId(void)
|
||||
return XidFromFullTransactionId(ReadNextFullTransactionId());
|
||||
}
|
||||
|
||||
/* return transaction ID backed up by amount, handling wraparound correctly */
|
||||
static inline TransactionId
|
||||
TransactionIdRetreatedBy(TransactionId xid, uint32 amount)
|
||||
{
|
||||
xid -= amount;
|
||||
|
||||
while (xid < FirstNormalTransactionId)
|
||||
xid--;
|
||||
|
||||
return xid;
|
||||
}
|
||||
|
||||
/* return the older of the two IDs */
|
||||
static inline TransactionId
|
||||
TransactionIdOlder(TransactionId a, TransactionId b)
|
||||
{
|
||||
if (!TransactionIdIsValid(a))
|
||||
return b;
|
||||
|
||||
if (!TransactionIdIsValid(b))
|
||||
return a;
|
||||
|
||||
if (TransactionIdPrecedes(a, b))
|
||||
return a;
|
||||
return b;
|
||||
}
|
||||
|
||||
/* return the older of the two IDs, assuming they're both normal */
|
||||
static inline TransactionId
|
||||
NormalTransactionIdOlder(TransactionId a, TransactionId b)
|
||||
{
|
||||
Assert(TransactionIdIsNormal(a));
|
||||
Assert(TransactionIdIsNormal(b));
|
||||
if (NormalTransactionIdPrecedes(a, b))
|
||||
return a;
|
||||
return b;
|
||||
}
|
||||
|
||||
/* return the newer of the two IDs */
|
||||
static inline FullTransactionId
|
||||
FullTransactionIdNewer(FullTransactionId a, FullTransactionId b)
|
||||
{
|
||||
if (!FullTransactionIdIsValid(a))
|
||||
return b;
|
||||
|
||||
if (!FullTransactionIdIsValid(b))
|
||||
return a;
|
||||
|
||||
if (FullTransactionIdFollows(a, b))
|
||||
return a;
|
||||
return b;
|
||||
}
|
||||
|
||||
#endif /* FRONTEND */
|
||||
|
||||
#endif /* TRANSAM_H */
|
||||
|
Reference in New Issue
Block a user