mirror of
https://github.com/postgres/postgres.git
synced 2025-11-18 02:02:55 +03:00
For inplace update durability, make heap_update() callers wait.
The previous commit fixed some ways of losing an inplace update. It
remained possible to lose one when a backend working toward a
heap_update() copied a tuple into memory just before inplace update of
that tuple. In catalogs eligible for inplace update, use LOCKTAG_TUPLE
to govern admission to the steps of copying an old tuple, modifying it,
and issuing heap_update(). This includes MERGE commands. To avoid
changing most of the pg_class DDL, don't require LOCKTAG_TUPLE when
holding a relation lock sufficient to exclude inplace updaters.
Back-patch to v12 (all supported versions). In v13 and v12, "UPDATE
pg_class" or "UPDATE pg_database" can still lose an inplace update. The
v14+ UPDATE fix needs commit 86dc90056d,
and it wasn't worth reimplementing that fix without such infrastructure.
Reviewed by Nitin Motiani and (in earlier versions) Heikki Linnakangas.
Discussion: https://postgr.es/m/20231027214946.79.nmisch@google.com
This commit is contained in:
9
src/backend/utils/cache/relcache.c
vendored
9
src/backend/utils/cache/relcache.c
vendored
@@ -3768,6 +3768,7 @@ RelationSetNewRelfilenumber(Relation relation, char persistence)
|
||||
{
|
||||
RelFileNumber newrelfilenumber;
|
||||
Relation pg_class;
|
||||
ItemPointerData otid;
|
||||
HeapTuple tuple;
|
||||
Form_pg_class classform;
|
||||
MultiXactId minmulti = InvalidMultiXactId;
|
||||
@@ -3810,11 +3811,12 @@ RelationSetNewRelfilenumber(Relation relation, char persistence)
|
||||
*/
|
||||
pg_class = table_open(RelationRelationId, RowExclusiveLock);
|
||||
|
||||
tuple = SearchSysCacheCopy1(RELOID,
|
||||
ObjectIdGetDatum(RelationGetRelid(relation)));
|
||||
tuple = SearchSysCacheLockedCopy1(RELOID,
|
||||
ObjectIdGetDatum(RelationGetRelid(relation)));
|
||||
if (!HeapTupleIsValid(tuple))
|
||||
elog(ERROR, "could not find tuple for relation %u",
|
||||
RelationGetRelid(relation));
|
||||
otid = tuple->t_self;
|
||||
classform = (Form_pg_class) GETSTRUCT(tuple);
|
||||
|
||||
/*
|
||||
@@ -3934,9 +3936,10 @@ RelationSetNewRelfilenumber(Relation relation, char persistence)
|
||||
classform->relminmxid = minmulti;
|
||||
classform->relpersistence = persistence;
|
||||
|
||||
CatalogTupleUpdate(pg_class, &tuple->t_self, tuple);
|
||||
CatalogTupleUpdate(pg_class, &otid, tuple);
|
||||
}
|
||||
|
||||
UnlockTuple(pg_class, &otid, InplaceUpdateTupleLock);
|
||||
heap_freetuple(tuple);
|
||||
|
||||
table_close(pg_class, RowExclusiveLock);
|
||||
|
||||
117
src/backend/utils/cache/syscache.c
vendored
117
src/backend/utils/cache/syscache.c
vendored
@@ -30,7 +30,10 @@
|
||||
#include "catalog/pg_shseclabel_d.h"
|
||||
#include "common/int.h"
|
||||
#include "lib/qunique.h"
|
||||
#include "miscadmin.h"
|
||||
#include "storage/lmgr.h"
|
||||
#include "utils/catcache.h"
|
||||
#include "utils/inval.h"
|
||||
#include "utils/lsyscache.h"
|
||||
#include "utils/rel.h"
|
||||
#include "utils/syscache.h"
|
||||
@@ -268,6 +271,98 @@ ReleaseSysCache(HeapTuple tuple)
|
||||
ReleaseCatCache(tuple);
|
||||
}
|
||||
|
||||
/*
|
||||
* SearchSysCacheLocked1
|
||||
*
|
||||
* Combine SearchSysCache1() with acquiring a LOCKTAG_TUPLE at mode
|
||||
* InplaceUpdateTupleLock. This is a tool for complying with the
|
||||
* README.tuplock section "Locking to write inplace-updated tables". After
|
||||
* the caller's heap_update(), it should UnlockTuple(InplaceUpdateTupleLock)
|
||||
* and ReleaseSysCache().
|
||||
*
|
||||
* The returned tuple may be the subject of an uncommitted update, so this
|
||||
* doesn't prevent the "tuple concurrently updated" error.
|
||||
*/
|
||||
HeapTuple
|
||||
SearchSysCacheLocked1(int cacheId,
|
||||
Datum key1)
|
||||
{
|
||||
ItemPointerData tid;
|
||||
LOCKTAG tag;
|
||||
Oid dboid =
|
||||
SysCache[cacheId]->cc_relisshared ? InvalidOid : MyDatabaseId;
|
||||
Oid reloid = cacheinfo[cacheId].reloid;
|
||||
|
||||
/*----------
|
||||
* Since inplace updates may happen just before our LockTuple(), we must
|
||||
* return content acquired after LockTuple() of the TID we return. If we
|
||||
* just fetched twice instead of looping, the following sequence would
|
||||
* defeat our locking:
|
||||
*
|
||||
* GRANT: SearchSysCache1() = TID (1,5)
|
||||
* GRANT: LockTuple(pg_class, (1,5))
|
||||
* [no more inplace update of (1,5) until we release the lock]
|
||||
* CLUSTER: SearchSysCache1() = TID (1,5)
|
||||
* CLUSTER: heap_update() = TID (1,8)
|
||||
* CLUSTER: COMMIT
|
||||
* GRANT: SearchSysCache1() = TID (1,8)
|
||||
* GRANT: return (1,8) from SearchSysCacheLocked1()
|
||||
* VACUUM: SearchSysCache1() = TID (1,8)
|
||||
* VACUUM: LockTuple(pg_class, (1,8)) # two TIDs now locked for one rel
|
||||
* VACUUM: inplace update
|
||||
* GRANT: heap_update() = (1,9) # lose inplace update
|
||||
*
|
||||
* In the happy case, this takes two fetches, one to determine the TID to
|
||||
* lock and another to get the content and confirm the TID didn't change.
|
||||
*
|
||||
* This is valid even if the row gets updated to a new TID, the old TID
|
||||
* becomes LP_UNUSED, and the row gets updated back to its old TID. We'd
|
||||
* still hold the right LOCKTAG_TUPLE and a copy of the row captured after
|
||||
* the LOCKTAG_TUPLE.
|
||||
*/
|
||||
ItemPointerSetInvalid(&tid);
|
||||
for (;;)
|
||||
{
|
||||
HeapTuple tuple;
|
||||
LOCKMODE lockmode = InplaceUpdateTupleLock;
|
||||
|
||||
tuple = SearchSysCache1(cacheId, key1);
|
||||
if (ItemPointerIsValid(&tid))
|
||||
{
|
||||
if (!HeapTupleIsValid(tuple))
|
||||
{
|
||||
LockRelease(&tag, lockmode, false);
|
||||
return tuple;
|
||||
}
|
||||
if (ItemPointerEquals(&tid, &tuple->t_self))
|
||||
return tuple;
|
||||
LockRelease(&tag, lockmode, false);
|
||||
}
|
||||
else if (!HeapTupleIsValid(tuple))
|
||||
return tuple;
|
||||
|
||||
tid = tuple->t_self;
|
||||
ReleaseSysCache(tuple);
|
||||
/* like: LockTuple(rel, &tid, lockmode) */
|
||||
SET_LOCKTAG_TUPLE(tag, dboid, reloid,
|
||||
ItemPointerGetBlockNumber(&tid),
|
||||
ItemPointerGetOffsetNumber(&tid));
|
||||
(void) LockAcquire(&tag, lockmode, false, false);
|
||||
|
||||
/*
|
||||
* If an inplace update just finished, ensure we process the syscache
|
||||
* inval. XXX this is insufficient: the inplace updater may not yet
|
||||
* have reached AtEOXact_Inval(). See test at inplace-inval.spec.
|
||||
*
|
||||
* If a heap_update() call just released its LOCKTAG_TUPLE, we'll
|
||||
* probably find the old tuple and reach "tuple concurrently updated".
|
||||
* If that heap_update() aborts, our LOCKTAG_TUPLE blocks inplace
|
||||
* updates while our caller works.
|
||||
*/
|
||||
AcceptInvalidationMessages();
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* SearchSysCacheCopy
|
||||
*
|
||||
@@ -294,6 +389,28 @@ SearchSysCacheCopy(int cacheId,
|
||||
return newtuple;
|
||||
}
|
||||
|
||||
/*
|
||||
* SearchSysCacheLockedCopy1
|
||||
*
|
||||
* Meld SearchSysCacheLockedCopy1 with SearchSysCacheCopy(). After the
|
||||
* caller's heap_update(), it should UnlockTuple(InplaceUpdateTupleLock) and
|
||||
* heap_freetuple().
|
||||
*/
|
||||
HeapTuple
|
||||
SearchSysCacheLockedCopy1(int cacheId,
|
||||
Datum key1)
|
||||
{
|
||||
HeapTuple tuple,
|
||||
newtuple;
|
||||
|
||||
tuple = SearchSysCacheLocked1(cacheId, key1);
|
||||
if (!HeapTupleIsValid(tuple))
|
||||
return tuple;
|
||||
newtuple = heap_copytuple(tuple);
|
||||
ReleaseSysCache(tuple);
|
||||
return newtuple;
|
||||
}
|
||||
|
||||
/*
|
||||
* SearchSysCacheExists
|
||||
*
|
||||
|
||||
Reference in New Issue
Block a user