1
0
mirror of https://github.com/postgres/postgres.git synced 2025-10-19 15:49:24 +03:00

Allow locking updated tuples in tuple_update() and tuple_delete()

Currently, in read committed transaction isolation mode (default), we have the
following sequence of actions when tuple_update()/tuple_delete() finds
the tuple updated by the concurrent transaction.

1. Attempt to update/delete tuple with tuple_update()/tuple_delete(), which
   returns TM_Updated.
2. Lock tuple with tuple_lock().
3. Re-evaluate plan qual (recheck if we still need to update/delete and
   calculate the new tuple for update).
4. Second attempt to update/delete tuple with tuple_update()/tuple_delete().
   This attempt should be successful, since the tuple was previously locked.

This commit eliminates step 2 by taking the lock during the first
tuple_update()/tuple_delete() call.  The heap table access method saves some
effort by checking the updated tuple once instead of twice.  Future
undo-based table access methods, which will start from the latest row version,
can immediately place a lock there.

Also, this commit makes tuple_update()/tuple_delete() optionally save the old
tuple into the dedicated slot.  That saves efforts on re-fetching tuples in
certain cases.

The code in nodeModifyTable.c is simplified by removing the nested switch/case.

Discussion: https://postgr.es/m/CAPpHfdua-YFw3XTprfutzGp28xXLigFtzNbuFY8yPhqeq6X5kg%40mail.gmail.com
Reviewed-by: Aleksander Alekseev, Pavel Borisov, Vignesh C, Mason Sharp
Reviewed-by: Andres Freund, Chris Travers
This commit is contained in:
Alexander Korotkov
2024-03-26 01:27:56 +02:00
parent c7076ba6ad
commit 87985cc925
9 changed files with 502 additions and 346 deletions

View File

@@ -284,19 +284,22 @@ extern void heap_multi_insert(Relation relation, struct TupleTableSlot **slots,
int ntuples, CommandId cid, int options,
BulkInsertState bistate);
extern TM_Result heap_delete(Relation relation, ItemPointer tid,
CommandId cid, Snapshot crosscheck, bool wait,
struct TM_FailureData *tmfd, bool changingPart);
CommandId cid, Snapshot crosscheck, int options,
struct TM_FailureData *tmfd, bool changingPart,
TupleTableSlot *oldSlot);
extern void heap_finish_speculative(Relation relation, ItemPointer tid);
extern void heap_abort_speculative(Relation relation, ItemPointer tid);
extern TM_Result heap_update(Relation relation, ItemPointer otid,
HeapTuple newtup,
CommandId cid, Snapshot crosscheck, bool wait,
CommandId cid, Snapshot crosscheck, int options,
struct TM_FailureData *tmfd, LockTupleMode *lockmode,
TU_UpdateIndexes *update_indexes);
extern TM_Result heap_lock_tuple(Relation relation, HeapTuple tuple,
CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy,
bool follow_updates,
Buffer *buffer, struct TM_FailureData *tmfd);
TU_UpdateIndexes *update_indexes,
TupleTableSlot *oldSlot);
extern TM_Result heap_lock_tuple(Relation relation, ItemPointer tid,
TupleTableSlot *slot,
CommandId cid, LockTupleMode mode,
LockWaitPolicy wait_policy, bool follow_updates,
struct TM_FailureData *tmfd);
extern void heap_inplace_update(Relation relation, HeapTuple tuple);
extern bool heap_prepare_freeze_tuple(HeapTupleHeader tuple,

View File

@@ -259,6 +259,15 @@ typedef struct TM_IndexDeleteOp
/* Follow update chain and lock latest version of tuple */
#define TUPLE_LOCK_FLAG_FIND_LAST_VERSION (1 << 1)
/*
* "options" flag bits for table_tuple_update and table_tuple_delete,
* Wait for any conflicting update to commit/abort */
#define TABLE_MODIFY_WAIT 0x0001
/* Fetch the existing tuple into a dedicated slot */
#define TABLE_MODIFY_FETCH_OLD_TUPLE 0x0002
/* On concurrent update, follow the update chain and lock latest version of tuple */
#define TABLE_MODIFY_LOCK_UPDATED 0x0004
/* Typedef for callback function for table_index_build_scan */
typedef void (*IndexBuildCallback) (Relation index,
@@ -528,9 +537,10 @@ typedef struct TableAmRoutine
CommandId cid,
Snapshot snapshot,
Snapshot crosscheck,
bool wait,
int options,
TM_FailureData *tmfd,
bool changingPart);
bool changingPart,
TupleTableSlot *oldSlot);
/* see table_tuple_update() for reference about parameters */
TM_Result (*tuple_update) (Relation rel,
@@ -539,10 +549,11 @@ typedef struct TableAmRoutine
CommandId cid,
Snapshot snapshot,
Snapshot crosscheck,
bool wait,
int options,
TM_FailureData *tmfd,
LockTupleMode *lockmode,
TU_UpdateIndexes *update_indexes);
TU_UpdateIndexes *update_indexes,
TupleTableSlot *oldSlot);
/* see table_tuple_lock() for reference about parameters */
TM_Result (*tuple_lock) (Relation rel,
@@ -1463,7 +1474,7 @@ table_multi_insert(Relation rel, TupleTableSlot **slots, int nslots,
}
/*
* Delete a tuple.
* Delete a tuple (and optionally lock the last tuple version).
*
* NB: do not call this directly unless prepared to deal with
* concurrent-update conditions. Use simple_table_tuple_delete instead.
@@ -1474,11 +1485,21 @@ table_multi_insert(Relation rel, TupleTableSlot **slots, int nslots,
* cid - delete command ID (used for visibility test, and stored into
* cmax if successful)
* crosscheck - if not InvalidSnapshot, also check tuple against this
* wait - true if should wait for any conflicting update to commit/abort
* options:
* If TABLE_MODIFY_WAIT, wait for any conflicting update to commit/abort.
* If TABLE_MODIFY_FETCH_OLD_TUPLE option is given, the existing tuple is
* fetched into oldSlot when the update is successful.
* If TABLE_MODIFY_LOCK_UPDATED option is given and the tuple is
* concurrently updated, then the last tuple version is locked and fetched
* into oldSlot.
*
* Output parameters:
* tmfd - filled in failure cases (see below)
* changingPart - true iff the tuple is being moved to another partition
* table due to an update of the partition key. Otherwise, false.
* oldSlot - slot to save the deleted or locked tuple. Can be NULL if none of
* TABLE_MODIFY_FETCH_OLD_TUPLE or TABLE_MODIFY_LOCK_UPDATED options
* is specified.
*
* Normal, successful return value is TM_Ok, which means we did actually
* delete it. Failure return codes are TM_SelfModified, TM_Updated, and
@@ -1490,16 +1511,18 @@ table_multi_insert(Relation rel, TupleTableSlot **slots, int nslots,
*/
static inline TM_Result
table_tuple_delete(Relation rel, ItemPointer tid, CommandId cid,
Snapshot snapshot, Snapshot crosscheck, bool wait,
TM_FailureData *tmfd, bool changingPart)
Snapshot snapshot, Snapshot crosscheck, int options,
TM_FailureData *tmfd, bool changingPart,
TupleTableSlot *oldSlot)
{
return rel->rd_tableam->tuple_delete(rel, tid, cid,
snapshot, crosscheck,
wait, tmfd, changingPart);
options, tmfd, changingPart,
oldSlot);
}
/*
* Update a tuple.
* Update a tuple (and optionally lock the last tuple version).
*
* NB: do not call this directly unless you are prepared to deal with
* concurrent-update conditions. Use simple_table_tuple_update instead.
@@ -1511,13 +1534,23 @@ table_tuple_delete(Relation rel, ItemPointer tid, CommandId cid,
* cid - update command ID (used for visibility test, and stored into
* cmax/cmin if successful)
* crosscheck - if not InvalidSnapshot, also check old tuple against this
* wait - true if should wait for any conflicting update to commit/abort
* options:
* If TABLE_MODIFY_WAIT, wait for any conflicting update to commit/abort.
* If TABLE_MODIFY_FETCH_OLD_TUPLE option is given, the existing tuple is
* fetched into oldSlot when the update is successful.
* If TABLE_MODIFY_LOCK_UPDATED option is given and the tuple is
* concurrently updated, then the last tuple version is locked and fetched
* into oldSlot.
*
* Output parameters:
* tmfd - filled in failure cases (see below)
* lockmode - filled with lock mode acquired on tuple
* update_indexes - in success cases this is set to true if new index entries
* are required for this tuple
*
* oldSlot - slot to save the deleted or locked tuple. Can be NULL if none of
* TABLE_MODIFY_FETCH_OLD_TUPLE or TABLE_MODIFY_LOCK_UPDATED options
* is specified.
* Normal, successful return value is TM_Ok, which means we did actually
* update it. Failure return codes are TM_SelfModified, TM_Updated, and
* TM_BeingModified (the last only possible if wait == false).
@@ -1535,13 +1568,15 @@ table_tuple_delete(Relation rel, ItemPointer tid, CommandId cid,
static inline TM_Result
table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot,
CommandId cid, Snapshot snapshot, Snapshot crosscheck,
bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode,
TU_UpdateIndexes *update_indexes)
int options, TM_FailureData *tmfd, LockTupleMode *lockmode,
TU_UpdateIndexes *update_indexes,
TupleTableSlot *oldSlot)
{
return rel->rd_tableam->tuple_update(rel, otid, slot,
cid, snapshot, crosscheck,
wait, tmfd,
lockmode, update_indexes);
options, tmfd,
lockmode, update_indexes,
oldSlot);
}
/*
@@ -2083,10 +2118,12 @@ table_scan_sample_next_tuple(TableScanDesc scan,
extern void simple_table_tuple_insert(Relation rel, TupleTableSlot *slot);
extern void simple_table_tuple_delete(Relation rel, ItemPointer tid,
Snapshot snapshot);
Snapshot snapshot,
TupleTableSlot *oldSlot);
extern void simple_table_tuple_update(Relation rel, ItemPointer otid,
TupleTableSlot *slot, Snapshot snapshot,
TU_UpdateIndexes *update_indexes);
TU_UpdateIndexes *update_indexes,
TupleTableSlot *oldSlot);
/* ----------------------------------------------------------------------------