Allow locking updated tuples in tuple_update() and tuple_delete()

Currently, in read committed transaction isolation mode (default), we have the following sequence of actions when tuple_update()/tuple_delete() finds the tuple updated by concurrent transaction. 1. Attempt to update/delete tuple with tuple_update()/tuple_delete(), which returns TM_Updated. 2. Lock tuple with tuple_lock(). 3. Re-evaluate plan qual (recheck if we still need to update/delete and calculate the new tuple for update). 4. Second attempt to update/delete tuple with tuple_update()/tuple_delete(). This attempt should be successful, since the tuple was previously locked. This patch eliminates step 2 by taking the lock during first tuple_update()/tuple_delete() call. Heap table access method saves some efforts by checking the updated tuple once instead of twice. Future undo-based table access methods, which will start from the latest row version, can immediately place a lock there. The code in nodeModifyTable.c is simplified by removing the nested switch/case. Discussion: https://postgr.es/m/CAPpHfdua-YFw3XTprfutzGp28xXLigFtzNbuFY8yPhqeq6X5kg%40mail.gmail.com Reviewed-by: Aleksander Alekseev, Pavel Borisov, Vignesh C, Mason Sharp Reviewed-by: Andres Freund, Chris Travers
2025-09-03 15:22:11 +03:00 · 2023-03-23 00:13:37 +03:00
parent 764da7710b
commit 11470f544e
6 changed files with 285 additions and 186 deletions
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -45,6 +45,12 @@
 #include "utils/builtins.h"
 #include "utils/rel.h"

+static TM_Result heapam_tuple_lock_internal(Relation relation, ItemPointer tid,
+											Snapshot snapshot, TupleTableSlot *slot,
+											CommandId cid, LockTupleMode mode,
+											LockWaitPolicy wait_policy, uint8 flags,
+											TM_FailureData *tmfd, bool updated);
+
 static void reform_and_rewrite_tuple(HeapTuple tuple,
 									 Relation OldHeap, Relation NewHeap,
 									 Datum *values, bool *isnull, RewriteState rwstate);
@@ -299,14 +305,46 @@ heapam_tuple_complete_speculative(Relation relation, TupleTableSlot *slot,
 static TM_Result
 heapam_tuple_delete(Relation relation, ItemPointer tid, CommandId cid,
 					Snapshot snapshot, Snapshot crosscheck, bool wait,
-					TM_FailureData *tmfd, bool changingPart)
+					TM_FailureData *tmfd, bool changingPart,
+					LazyTupleTableSlot *lockedSlot)
 {
+	TM_Result	result;
+
 	/*
 	 * Currently Deleting of index tuples are handled at vacuum, in case if
 	 * the storage itself is cleaning the dead tuples by itself, it is the
 	 * time to call the index tuple deletion also.
 	 */
-	return heap_delete(relation, tid, cid, crosscheck, wait, tmfd, changingPart);
+	result = heap_delete(relation, tid, cid, crosscheck, wait,
+						 tmfd, changingPart);
+
+	/*
+	 * If the tuple has been concurrently updated, then get the lock on it.
+	 * (Do this if caller asked for tat by providing a 'lockedSlot'.) With the
+	 * lock held retry of delete should succeed even if there are more
+	 * concurrent update attempts.
+	 */
+	if (result == TM_Updated && lockedSlot)
+	{
+		TupleTableSlot *evalSlot;
+
+		Assert(wait);
+
+		evalSlot = LAZY_TTS_EVAL(lockedSlot);
+		result = heapam_tuple_lock_internal(relation, tid, snapshot,
+											evalSlot, cid, LockTupleExclusive,
+											LockWaitBlock,
+											TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
+											tmfd, true);
+
+		if (result == TM_Ok)
+		{
+			tmfd->traversed = true;
+			return TM_Updated;
+		}
+	}
+
+	return result;
 }


@@ -314,7 +352,8 @@ static TM_Result
 heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot,
 					CommandId cid, Snapshot snapshot, Snapshot crosscheck,
 					bool wait, TM_FailureData *tmfd,
-					LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
+					LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes,
+					LazyTupleTableSlot *lockedSlot)
 {
 	bool		shouldFree = true;
 	HeapTuple	tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
@@ -352,6 +391,32 @@ heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot,
 	if (shouldFree)
 		pfree(tuple);

+	/*
+	 * If the tuple has been concurrently updated, then get the lock on it.
+	 * (Do this if caller asked for tat by providing a 'lockedSlot'.) With the
+	 * lock held retry of update should succeed even if there are more
+	 * concurrent update attempts.
+	 */
+	if (result == TM_Updated && lockedSlot)
+	{
+		TupleTableSlot *evalSlot;
+
+		Assert(wait);
+
+		evalSlot = LAZY_TTS_EVAL(lockedSlot);
+		result = heapam_tuple_lock_internal(relation, otid, snapshot,
+											evalSlot, cid, *lockmode,
+											LockWaitBlock,
+											TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
+											tmfd, true);
+
+		if (result == TM_Ok)
+		{
+			tmfd->traversed = true;
+			return TM_Updated;
+		}
+	}
+
 	return result;
 }

@@ -360,10 +425,26 @@ heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot,
 				  TupleTableSlot *slot, CommandId cid, LockTupleMode mode,
 				  LockWaitPolicy wait_policy, uint8 flags,
 				  TM_FailureData *tmfd)
+{
+	return heapam_tuple_lock_internal(relation, tid, snapshot, slot, cid,
+									  mode, wait_policy, flags, tmfd, false);
+}
+
+/*
+ * This routine does the work for heapam_tuple_lock(), but also support
+ * `updated` argument to re-use the work done by heapam_tuple_update() or
+ * heapam_tuple_delete() on figuring out that tuple was concurrently updated.
+ */
+static TM_Result
+heapam_tuple_lock_internal(Relation relation, ItemPointer tid,
+						   Snapshot snapshot, TupleTableSlot *slot,
+						   CommandId cid, LockTupleMode mode,
+						   LockWaitPolicy wait_policy, uint8 flags,
+						   TM_FailureData *tmfd, bool updated)
 {
 	BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
 	TM_Result	result;
-	Buffer		buffer;
+	Buffer		buffer = InvalidBuffer;
 	HeapTuple	tuple = &bslot->base.tupdata;
 	bool		follow_updates;

@@ -374,16 +455,26 @@ heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot,

 tuple_lock_retry:
 	tuple->t_self = *tid;
-	result = heap_lock_tuple(relation, tuple, cid, mode, wait_policy,
-							 follow_updates, &buffer, tmfd);
+	if (!updated)
+		result = heap_lock_tuple(relation, tuple, cid, mode, wait_policy,
+								 follow_updates, &buffer, tmfd);
+	else
+		result = TM_Updated;

 	if (result == TM_Updated &&
 		(flags & TUPLE_LOCK_FLAG_FIND_LAST_VERSION))
 	{
-		/* Should not encounter speculative tuple on recheck */
-		Assert(!HeapTupleHeaderIsSpeculative(tuple->t_data));
+		if (!updated)
+		{
+			/* Should not encounter speculative tuple on recheck */
+			Assert(!HeapTupleHeaderIsSpeculative(tuple->t_data));

-		ReleaseBuffer(buffer);
+			ReleaseBuffer(buffer);
+		}
+		else
+		{
+			updated = false;
+		}

 		if (!ItemPointerEquals(&tmfd->ctid, &tuple->t_self))
 		{
--- a/src/backend/access/table/tableam.c
+++ b/src/backend/access/table/tableam.c
@@ -306,7 +306,8 @@ simple_table_tuple_delete(Relation rel, ItemPointer tid, Snapshot snapshot)
 								GetCurrentCommandId(true),
 								snapshot, InvalidSnapshot,
 								true /* wait for commit */ ,
-								&tmfd, false /* changingPart */ );
+								&tmfd, false /* changingPart */ ,
+								NULL);

 	switch (result)
 	{
@@ -355,7 +356,8 @@ simple_table_tuple_update(Relation rel, ItemPointer otid,
 								GetCurrentCommandId(true),
 								snapshot, InvalidSnapshot,
 								true /* wait for commit */ ,
-								&tmfd, &lockmode, update_indexes);
+								&tmfd, &lockmode, update_indexes,
+								NULL);

 	switch (result)
 	{