mirror of
https://github.com/postgres/postgres.git
synced 2025-11-13 16:22:44 +03:00
Use an MVCC snapshot, rather than SnapshotNow, for catalog scans.
SnapshotNow scans have the undesirable property that, in the face of concurrent updates, the scan can fail to see either the old or the new versions of the row. In many cases, we work around this by requiring DDL operations to hold AccessExclusiveLock on the object being modified; in some cases, the existing locking is inadequate and random failures occur as a result. This commit doesn't change anything related to locking, but will hopefully pave the way to allowing lock strength reductions in the future. The major issue has held us back from making this change in the past is that taking an MVCC snapshot is significantly more expensive than using a static special snapshot such as SnapshotNow. However, testing of various worst-case scenarios reveals that this problem is not severe except under fairly extreme workloads. To mitigate those problems, we avoid retaking the MVCC snapshot for each new scan; instead, we take a new snapshot only when invalidation messages have been processed. The catcache machinery already requires that invalidation messages be sent before releasing the related heavyweight lock; else other backends might rely on locally-cached data rather than scanning the catalog at all. Thus, making snapshot reuse dependent on the same guarantees shouldn't break anything that wasn't already subtly broken. Patch by me. Review by Michael Paquier and Andres Freund.
This commit is contained in:
@@ -80,7 +80,7 @@ static HeapScanDesc heap_beginscan_internal(Relation relation,
|
||||
Snapshot snapshot,
|
||||
int nkeys, ScanKey key,
|
||||
bool allow_strat, bool allow_sync,
|
||||
bool is_bitmapscan);
|
||||
bool is_bitmapscan, bool temp_snap);
|
||||
static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup,
|
||||
TransactionId xid, CommandId cid, int options);
|
||||
static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf,
|
||||
@@ -1286,7 +1286,17 @@ heap_beginscan(Relation relation, Snapshot snapshot,
|
||||
int nkeys, ScanKey key)
|
||||
{
|
||||
return heap_beginscan_internal(relation, snapshot, nkeys, key,
|
||||
true, true, false);
|
||||
true, true, false, false);
|
||||
}
|
||||
|
||||
HeapScanDesc
|
||||
heap_beginscan_catalog(Relation relation, int nkeys, ScanKey key)
|
||||
{
|
||||
Oid relid = RelationGetRelid(relation);
|
||||
Snapshot snapshot = RegisterSnapshot(GetCatalogSnapshot(relid));
|
||||
|
||||
return heap_beginscan_internal(relation, snapshot, nkeys, key,
|
||||
true, true, false, true);
|
||||
}
|
||||
|
||||
HeapScanDesc
|
||||
@@ -1295,7 +1305,7 @@ heap_beginscan_strat(Relation relation, Snapshot snapshot,
|
||||
bool allow_strat, bool allow_sync)
|
||||
{
|
||||
return heap_beginscan_internal(relation, snapshot, nkeys, key,
|
||||
allow_strat, allow_sync, false);
|
||||
allow_strat, allow_sync, false, false);
|
||||
}
|
||||
|
||||
HeapScanDesc
|
||||
@@ -1303,14 +1313,14 @@ heap_beginscan_bm(Relation relation, Snapshot snapshot,
|
||||
int nkeys, ScanKey key)
|
||||
{
|
||||
return heap_beginscan_internal(relation, snapshot, nkeys, key,
|
||||
false, false, true);
|
||||
false, false, true, false);
|
||||
}
|
||||
|
||||
static HeapScanDesc
|
||||
heap_beginscan_internal(Relation relation, Snapshot snapshot,
|
||||
int nkeys, ScanKey key,
|
||||
bool allow_strat, bool allow_sync,
|
||||
bool is_bitmapscan)
|
||||
bool is_bitmapscan, bool temp_snap)
|
||||
{
|
||||
HeapScanDesc scan;
|
||||
|
||||
@@ -1335,6 +1345,7 @@ heap_beginscan_internal(Relation relation, Snapshot snapshot,
|
||||
scan->rs_strategy = NULL; /* set in initscan */
|
||||
scan->rs_allow_strat = allow_strat;
|
||||
scan->rs_allow_sync = allow_sync;
|
||||
scan->rs_temp_snap = temp_snap;
|
||||
|
||||
/*
|
||||
* we can use page-at-a-time mode if it's an MVCC-safe snapshot
|
||||
@@ -1421,6 +1432,9 @@ heap_endscan(HeapScanDesc scan)
|
||||
if (scan->rs_strategy != NULL)
|
||||
FreeAccessStrategy(scan->rs_strategy);
|
||||
|
||||
if (scan->rs_temp_snap)
|
||||
UnregisterSnapshot(scan->rs_snapshot);
|
||||
|
||||
pfree(scan);
|
||||
}
|
||||
|
||||
|
||||
@@ -28,6 +28,7 @@
|
||||
#include "utils/builtins.h"
|
||||
#include "utils/lsyscache.h"
|
||||
#include "utils/rel.h"
|
||||
#include "utils/snapmgr.h"
|
||||
#include "utils/tqual.h"
|
||||
|
||||
|
||||
@@ -231,7 +232,7 @@ BuildIndexValueDescription(Relation indexRelation,
|
||||
* rel: catalog to scan, already opened and suitably locked
|
||||
* indexId: OID of index to conditionally use
|
||||
* indexOK: if false, forces a heap scan (see notes below)
|
||||
* snapshot: time qual to use (usually should be SnapshotNow)
|
||||
* snapshot: time qual to use (NULL for a recent catalog snapshot)
|
||||
* nkeys, key: scan keys
|
||||
*
|
||||
* The attribute numbers in the scan key should be set for the heap case.
|
||||
@@ -266,6 +267,19 @@ systable_beginscan(Relation heapRelation,
|
||||
sysscan->heap_rel = heapRelation;
|
||||
sysscan->irel = irel;
|
||||
|
||||
if (snapshot == NULL)
|
||||
{
|
||||
Oid relid = RelationGetRelid(heapRelation);
|
||||
|
||||
snapshot = RegisterSnapshot(GetCatalogSnapshot(relid));
|
||||
sysscan->snapshot = snapshot;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Caller is responsible for any snapshot. */
|
||||
sysscan->snapshot = NULL;
|
||||
}
|
||||
|
||||
if (irel)
|
||||
{
|
||||
int i;
|
||||
@@ -401,6 +415,9 @@ systable_endscan(SysScanDesc sysscan)
|
||||
else
|
||||
heap_endscan(sysscan->scan);
|
||||
|
||||
if (sysscan->snapshot)
|
||||
UnregisterSnapshot(sysscan->snapshot);
|
||||
|
||||
pfree(sysscan);
|
||||
}
|
||||
|
||||
@@ -444,6 +461,19 @@ systable_beginscan_ordered(Relation heapRelation,
|
||||
sysscan->heap_rel = heapRelation;
|
||||
sysscan->irel = indexRelation;
|
||||
|
||||
if (snapshot == NULL)
|
||||
{
|
||||
Oid relid = RelationGetRelid(heapRelation);
|
||||
|
||||
snapshot = RegisterSnapshot(GetCatalogSnapshot(relid));
|
||||
sysscan->snapshot = snapshot;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Caller is responsible for any snapshot. */
|
||||
sysscan->snapshot = NULL;
|
||||
}
|
||||
|
||||
/* Change attribute numbers to be index column numbers. */
|
||||
for (i = 0; i < nkeys; i++)
|
||||
{
|
||||
@@ -494,5 +524,7 @@ systable_endscan_ordered(SysScanDesc sysscan)
|
||||
{
|
||||
Assert(sysscan->irel);
|
||||
index_endscan(sysscan->iscan);
|
||||
if (sysscan->snapshot)
|
||||
UnregisterSnapshot(sysscan->snapshot);
|
||||
pfree(sysscan);
|
||||
}
|
||||
|
||||
@@ -141,9 +141,10 @@ deletes index entries before deleting tuples, the super-exclusive lock
|
||||
guarantees that VACUUM can't delete any heap tuple that an indexscanning
|
||||
process might be about to visit. (This guarantee works only for simple
|
||||
indexscans that visit the heap in sync with the index scan, not for bitmap
|
||||
scans. We only need the guarantee when using non-MVCC snapshot rules such
|
||||
as SnapshotNow, so in practice this is only important for system catalog
|
||||
accesses.)
|
||||
scans. We only need the guarantee when using non-MVCC snapshot rules; in
|
||||
an MVCC snapshot, it wouldn't matter if the heap tuple were replaced with
|
||||
an unrelated tuple at the same TID, because the new tuple wouldn't be
|
||||
visible to our scan anyway.)
|
||||
|
||||
Because a page can be split even while someone holds a pin on it, it is
|
||||
possible that an indexscan will return items that are no longer stored on
|
||||
|
||||
@@ -69,11 +69,14 @@ xact_desc_commit(StringInfo buf, xl_xact_commit *xlrec)
|
||||
appendStringInfo(buf, " catalog %u", msg->cat.catId);
|
||||
else if (msg->id == SHAREDINVALRELCACHE_ID)
|
||||
appendStringInfo(buf, " relcache %u", msg->rc.relId);
|
||||
/* remaining cases not expected, but print something anyway */
|
||||
/* not expected, but print something anyway */
|
||||
else if (msg->id == SHAREDINVALSMGR_ID)
|
||||
appendStringInfo(buf, " smgr");
|
||||
/* not expected, but print something anyway */
|
||||
else if (msg->id == SHAREDINVALRELMAP_ID)
|
||||
appendStringInfo(buf, " relmap");
|
||||
else if (msg->id == SHAREDINVALSNAPSHOT_ID)
|
||||
appendStringInfo(buf, " snapshot %u", msg->sn.relId);
|
||||
else
|
||||
appendStringInfo(buf, " unknown id %d", msg->id);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user