1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-24 00:23:06 +03:00

Fix data loss at inplace update after heap_update().

As previously-added tests demonstrated, heap_inplace_update() could
instead update an unrelated tuple of the same catalog.  It could lose
the update.  Losing relhasindex=t was a source of index corruption.
Inplace-updating commands like VACUUM will now wait for heap_update()
commands like GRANT TABLE and GRANT DATABASE.  That isn't ideal, but a
long-running GRANT already hurts VACUUM progress more just by keeping an
XID running.  The VACUUM will behave like a DELETE or UPDATE waiting for
the uncommitted change.

For implementation details, start at the systable_inplace_update_begin()
header comment and README.tuplock.  Back-patch to v12 (all supported
versions).  In back branches, retain a deprecated heap_inplace_update(),
for extensions.

Reported by Smolkin Grigory.  Reviewed by Nitin Motiani, (in earlier
versions) Heikki Linnakangas, and (in earlier versions) Alexander
Lakhin.

Discussion: https://postgr.es/m/CAMp+ueZQz3yDk7qg42hk6-9gxniYbp-=bG2mgqecErqR5gGGOA@mail.gmail.com
This commit is contained in:
Noah Misch
2024-09-24 15:25:18 -07:00
parent dbf3f974ee
commit a07e03fd8f
16 changed files with 801 additions and 155 deletions

View File

@@ -20,6 +20,7 @@
#include "postgres.h"
#include "access/genam.h"
#include "access/heapam.h"
#include "access/relscan.h"
#include "access/tableam.h"
#include "access/transam.h"
@@ -29,6 +30,7 @@
#include "storage/bufmgr.h"
#include "storage/procarray.h"
#include "utils/acl.h"
#include "utils/injection_point.h"
#include "utils/lsyscache.h"
#include "utils/rel.h"
#include "utils/rls.h"
@@ -757,3 +759,138 @@ systable_endscan_ordered(SysScanDesc sysscan)
UnregisterSnapshot(sysscan->snapshot);
pfree(sysscan);
}
/*
* systable_inplace_update_begin --- update a row "in place" (overwrite it)
*
* Overwriting violates both MVCC and transactional safety, so the uses of
* this function in Postgres are extremely limited. Nonetheless we find some
* places to use it. Standard flow:
*
* ... [any slow preparation not requiring oldtup] ...
* systable_inplace_update_begin([...], &tup, &inplace_state);
* if (!HeapTupleIsValid(tup))
* elog(ERROR, [...]);
* ... [buffer is exclusive-locked; mutate "tup"] ...
* if (dirty)
* systable_inplace_update_finish(inplace_state, tup);
* else
* systable_inplace_update_cancel(inplace_state);
*
* The first several params duplicate the systable_beginscan() param list.
* "oldtupcopy" is an output parameter, assigned NULL if the key ceases to
* find a live tuple. (In PROC_IN_VACUUM, that is a low-probability transient
* condition.) If "oldtupcopy" gets non-NULL, you must pass output parameter
* "state" to systable_inplace_update_finish() or
* systable_inplace_update_cancel().
*/
void
systable_inplace_update_begin(Relation relation,
Oid indexId,
bool indexOK,
Snapshot snapshot,
int nkeys, const ScanKeyData *key,
HeapTuple *oldtupcopy,
void **state)
{
int retries = 0;
SysScanDesc scan;
HeapTuple oldtup;
/*
* For now, we don't allow parallel updates. Unlike a regular update,
* this should never create a combo CID, so it might be possible to relax
* this restriction, but not without more thought and testing. It's not
* clear that it would be useful, anyway.
*/
if (IsInParallelMode())
ereport(ERROR,
(errcode(ERRCODE_INVALID_TRANSACTION_STATE),
errmsg("cannot update tuples during a parallel operation")));
/*
* Accept a snapshot argument, for symmetry, but this function advances
* its snapshot as needed to reach the tail of the updated tuple chain.
*/
Assert(snapshot == NULL);
Assert(IsInplaceUpdateRelation(relation) || !IsSystemRelation(relation));
/* Loop for an exclusive-locked buffer of a non-updated tuple. */
for (;;)
{
TupleTableSlot *slot;
BufferHeapTupleTableSlot *bslot;
CHECK_FOR_INTERRUPTS();
/*
* Processes issuing heap_update (e.g. GRANT) at maximum speed could
* drive us to this error. A hostile table owner has stronger ways to
* damage their own table, so that's minor.
*/
if (retries++ > 10000)
elog(ERROR, "giving up after too many tries to overwrite row");
INJECTION_POINT("inplace-before-pin");
scan = systable_beginscan(relation, indexId, indexOK, snapshot,
nkeys, unconstify(ScanKeyData *, key));
oldtup = systable_getnext(scan);
if (!HeapTupleIsValid(oldtup))
{
systable_endscan(scan);
*oldtupcopy = NULL;
return;
}
slot = scan->slot;
Assert(TTS_IS_BUFFERTUPLE(slot));
bslot = (BufferHeapTupleTableSlot *) slot;
if (heap_inplace_lock(scan->heap_rel,
bslot->base.tuple, bslot->buffer))
break;
systable_endscan(scan);
};
*oldtupcopy = heap_copytuple(oldtup);
*state = scan;
}
/*
* systable_inplace_update_finish --- second phase of inplace update
*
* The tuple cannot change size, and therefore its header fields and null
* bitmap (if any) don't change either.
*/
void
systable_inplace_update_finish(void *state, HeapTuple tuple)
{
SysScanDesc scan = (SysScanDesc) state;
Relation relation = scan->heap_rel;
TupleTableSlot *slot = scan->slot;
BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
HeapTuple oldtup = bslot->base.tuple;
Buffer buffer = bslot->buffer;
heap_inplace_update_and_unlock(relation, oldtup, tuple, buffer);
systable_endscan(scan);
}
/*
* systable_inplace_update_cancel --- abandon inplace update
*
* This is an alternative to making a no-op update.
*/
void
systable_inplace_update_cancel(void *state)
{
SysScanDesc scan = (SysScanDesc) state;
Relation relation = scan->heap_rel;
TupleTableSlot *slot = scan->slot;
BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
HeapTuple oldtup = bslot->base.tuple;
Buffer buffer = bslot->buffer;
heap_inplace_unlock(relation, oldtup, buffer);
systable_endscan(scan);
}