mirror of
https://github.com/postgres/postgres.git
synced 2025-10-19 15:49:24 +03:00
Allow locking updated tuples in tuple_update() and tuple_delete()
Currently, in read committed transaction isolation mode (default), we have the following sequence of actions when tuple_update()/tuple_delete() finds the tuple updated by the concurrent transaction. 1. Attempt to update/delete tuple with tuple_update()/tuple_delete(), which returns TM_Updated. 2. Lock tuple with tuple_lock(). 3. Re-evaluate plan qual (recheck if we still need to update/delete and calculate the new tuple for update). 4. Second attempt to update/delete tuple with tuple_update()/tuple_delete(). This attempt should be successful, since the tuple was previously locked. This commit eliminates step 2 by taking the lock during the first tuple_update()/tuple_delete() call. The heap table access method saves some effort by checking the updated tuple once instead of twice. Future undo-based table access methods, which will start from the latest row version, can immediately place a lock there. Also, this commit makes tuple_update()/tuple_delete() optionally save the old tuple into the dedicated slot. That saves efforts on re-fetching tuples in certain cases. The code in nodeModifyTable.c is simplified by removing the nested switch/case. Discussion: https://postgr.es/m/CAPpHfdua-YFw3XTprfutzGp28xXLigFtzNbuFY8yPhqeq6X5kg%40mail.gmail.com Reviewed-by: Aleksander Alekseev, Pavel Borisov, Vignesh C, Mason Sharp Reviewed-by: Andres Freund, Chris Travers
This commit is contained in:
@@ -284,19 +284,22 @@ extern void heap_multi_insert(Relation relation, struct TupleTableSlot **slots,
|
||||
int ntuples, CommandId cid, int options,
|
||||
BulkInsertState bistate);
|
||||
extern TM_Result heap_delete(Relation relation, ItemPointer tid,
|
||||
CommandId cid, Snapshot crosscheck, bool wait,
|
||||
struct TM_FailureData *tmfd, bool changingPart);
|
||||
CommandId cid, Snapshot crosscheck, int options,
|
||||
struct TM_FailureData *tmfd, bool changingPart,
|
||||
TupleTableSlot *oldSlot);
|
||||
extern void heap_finish_speculative(Relation relation, ItemPointer tid);
|
||||
extern void heap_abort_speculative(Relation relation, ItemPointer tid);
|
||||
extern TM_Result heap_update(Relation relation, ItemPointer otid,
|
||||
HeapTuple newtup,
|
||||
CommandId cid, Snapshot crosscheck, bool wait,
|
||||
CommandId cid, Snapshot crosscheck, int options,
|
||||
struct TM_FailureData *tmfd, LockTupleMode *lockmode,
|
||||
TU_UpdateIndexes *update_indexes);
|
||||
extern TM_Result heap_lock_tuple(Relation relation, HeapTuple tuple,
|
||||
CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy,
|
||||
bool follow_updates,
|
||||
Buffer *buffer, struct TM_FailureData *tmfd);
|
||||
TU_UpdateIndexes *update_indexes,
|
||||
TupleTableSlot *oldSlot);
|
||||
extern TM_Result heap_lock_tuple(Relation relation, ItemPointer tid,
|
||||
TupleTableSlot *slot,
|
||||
CommandId cid, LockTupleMode mode,
|
||||
LockWaitPolicy wait_policy, bool follow_updates,
|
||||
struct TM_FailureData *tmfd);
|
||||
|
||||
extern void heap_inplace_update(Relation relation, HeapTuple tuple);
|
||||
extern bool heap_prepare_freeze_tuple(HeapTupleHeader tuple,
|
||||
|
@@ -259,6 +259,15 @@ typedef struct TM_IndexDeleteOp
|
||||
/* Follow update chain and lock latest version of tuple */
|
||||
#define TUPLE_LOCK_FLAG_FIND_LAST_VERSION (1 << 1)
|
||||
|
||||
/*
|
||||
* "options" flag bits for table_tuple_update and table_tuple_delete,
|
||||
* Wait for any conflicting update to commit/abort */
|
||||
#define TABLE_MODIFY_WAIT 0x0001
|
||||
/* Fetch the existing tuple into a dedicated slot */
|
||||
#define TABLE_MODIFY_FETCH_OLD_TUPLE 0x0002
|
||||
/* On concurrent update, follow the update chain and lock latest version of tuple */
|
||||
#define TABLE_MODIFY_LOCK_UPDATED 0x0004
|
||||
|
||||
|
||||
/* Typedef for callback function for table_index_build_scan */
|
||||
typedef void (*IndexBuildCallback) (Relation index,
|
||||
@@ -528,9 +537,10 @@ typedef struct TableAmRoutine
|
||||
CommandId cid,
|
||||
Snapshot snapshot,
|
||||
Snapshot crosscheck,
|
||||
bool wait,
|
||||
int options,
|
||||
TM_FailureData *tmfd,
|
||||
bool changingPart);
|
||||
bool changingPart,
|
||||
TupleTableSlot *oldSlot);
|
||||
|
||||
/* see table_tuple_update() for reference about parameters */
|
||||
TM_Result (*tuple_update) (Relation rel,
|
||||
@@ -539,10 +549,11 @@ typedef struct TableAmRoutine
|
||||
CommandId cid,
|
||||
Snapshot snapshot,
|
||||
Snapshot crosscheck,
|
||||
bool wait,
|
||||
int options,
|
||||
TM_FailureData *tmfd,
|
||||
LockTupleMode *lockmode,
|
||||
TU_UpdateIndexes *update_indexes);
|
||||
TU_UpdateIndexes *update_indexes,
|
||||
TupleTableSlot *oldSlot);
|
||||
|
||||
/* see table_tuple_lock() for reference about parameters */
|
||||
TM_Result (*tuple_lock) (Relation rel,
|
||||
@@ -1463,7 +1474,7 @@ table_multi_insert(Relation rel, TupleTableSlot **slots, int nslots,
|
||||
}
|
||||
|
||||
/*
|
||||
* Delete a tuple.
|
||||
* Delete a tuple (and optionally lock the last tuple version).
|
||||
*
|
||||
* NB: do not call this directly unless prepared to deal with
|
||||
* concurrent-update conditions. Use simple_table_tuple_delete instead.
|
||||
@@ -1474,11 +1485,21 @@ table_multi_insert(Relation rel, TupleTableSlot **slots, int nslots,
|
||||
* cid - delete command ID (used for visibility test, and stored into
|
||||
* cmax if successful)
|
||||
* crosscheck - if not InvalidSnapshot, also check tuple against this
|
||||
* wait - true if should wait for any conflicting update to commit/abort
|
||||
* options:
|
||||
* If TABLE_MODIFY_WAIT, wait for any conflicting update to commit/abort.
|
||||
* If TABLE_MODIFY_FETCH_OLD_TUPLE option is given, the existing tuple is
|
||||
* fetched into oldSlot when the update is successful.
|
||||
* If TABLE_MODIFY_LOCK_UPDATED option is given and the tuple is
|
||||
* concurrently updated, then the last tuple version is locked and fetched
|
||||
* into oldSlot.
|
||||
*
|
||||
* Output parameters:
|
||||
* tmfd - filled in failure cases (see below)
|
||||
* changingPart - true iff the tuple is being moved to another partition
|
||||
* table due to an update of the partition key. Otherwise, false.
|
||||
* oldSlot - slot to save the deleted or locked tuple. Can be NULL if none of
|
||||
* TABLE_MODIFY_FETCH_OLD_TUPLE or TABLE_MODIFY_LOCK_UPDATED options
|
||||
* is specified.
|
||||
*
|
||||
* Normal, successful return value is TM_Ok, which means we did actually
|
||||
* delete it. Failure return codes are TM_SelfModified, TM_Updated, and
|
||||
@@ -1490,16 +1511,18 @@ table_multi_insert(Relation rel, TupleTableSlot **slots, int nslots,
|
||||
*/
|
||||
static inline TM_Result
|
||||
table_tuple_delete(Relation rel, ItemPointer tid, CommandId cid,
|
||||
Snapshot snapshot, Snapshot crosscheck, bool wait,
|
||||
TM_FailureData *tmfd, bool changingPart)
|
||||
Snapshot snapshot, Snapshot crosscheck, int options,
|
||||
TM_FailureData *tmfd, bool changingPart,
|
||||
TupleTableSlot *oldSlot)
|
||||
{
|
||||
return rel->rd_tableam->tuple_delete(rel, tid, cid,
|
||||
snapshot, crosscheck,
|
||||
wait, tmfd, changingPart);
|
||||
options, tmfd, changingPart,
|
||||
oldSlot);
|
||||
}
|
||||
|
||||
/*
|
||||
* Update a tuple.
|
||||
* Update a tuple (and optionally lock the last tuple version).
|
||||
*
|
||||
* NB: do not call this directly unless you are prepared to deal with
|
||||
* concurrent-update conditions. Use simple_table_tuple_update instead.
|
||||
@@ -1511,13 +1534,23 @@ table_tuple_delete(Relation rel, ItemPointer tid, CommandId cid,
|
||||
* cid - update command ID (used for visibility test, and stored into
|
||||
* cmax/cmin if successful)
|
||||
* crosscheck - if not InvalidSnapshot, also check old tuple against this
|
||||
* wait - true if should wait for any conflicting update to commit/abort
|
||||
* options:
|
||||
* If TABLE_MODIFY_WAIT, wait for any conflicting update to commit/abort.
|
||||
* If TABLE_MODIFY_FETCH_OLD_TUPLE option is given, the existing tuple is
|
||||
* fetched into oldSlot when the update is successful.
|
||||
* If TABLE_MODIFY_LOCK_UPDATED option is given and the tuple is
|
||||
* concurrently updated, then the last tuple version is locked and fetched
|
||||
* into oldSlot.
|
||||
*
|
||||
* Output parameters:
|
||||
* tmfd - filled in failure cases (see below)
|
||||
* lockmode - filled with lock mode acquired on tuple
|
||||
* update_indexes - in success cases this is set to true if new index entries
|
||||
* are required for this tuple
|
||||
*
|
||||
* oldSlot - slot to save the deleted or locked tuple. Can be NULL if none of
|
||||
* TABLE_MODIFY_FETCH_OLD_TUPLE or TABLE_MODIFY_LOCK_UPDATED options
|
||||
* is specified.
|
||||
|
||||
* Normal, successful return value is TM_Ok, which means we did actually
|
||||
* update it. Failure return codes are TM_SelfModified, TM_Updated, and
|
||||
* TM_BeingModified (the last only possible if wait == false).
|
||||
@@ -1535,13 +1568,15 @@ table_tuple_delete(Relation rel, ItemPointer tid, CommandId cid,
|
||||
static inline TM_Result
|
||||
table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot,
|
||||
CommandId cid, Snapshot snapshot, Snapshot crosscheck,
|
||||
bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode,
|
||||
TU_UpdateIndexes *update_indexes)
|
||||
int options, TM_FailureData *tmfd, LockTupleMode *lockmode,
|
||||
TU_UpdateIndexes *update_indexes,
|
||||
TupleTableSlot *oldSlot)
|
||||
{
|
||||
return rel->rd_tableam->tuple_update(rel, otid, slot,
|
||||
cid, snapshot, crosscheck,
|
||||
wait, tmfd,
|
||||
lockmode, update_indexes);
|
||||
options, tmfd,
|
||||
lockmode, update_indexes,
|
||||
oldSlot);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -2083,10 +2118,12 @@ table_scan_sample_next_tuple(TableScanDesc scan,
|
||||
|
||||
extern void simple_table_tuple_insert(Relation rel, TupleTableSlot *slot);
|
||||
extern void simple_table_tuple_delete(Relation rel, ItemPointer tid,
|
||||
Snapshot snapshot);
|
||||
Snapshot snapshot,
|
||||
TupleTableSlot *oldSlot);
|
||||
extern void simple_table_tuple_update(Relation rel, ItemPointer otid,
|
||||
TupleTableSlot *slot, Snapshot snapshot,
|
||||
TU_UpdateIndexes *update_indexes);
|
||||
TU_UpdateIndexes *update_indexes,
|
||||
TupleTableSlot *oldSlot);
|
||||
|
||||
|
||||
/* ----------------------------------------------------------------------------
|
||||
|
Reference in New Issue
Block a user