1
0
mirror of https://github.com/postgres/postgres.git synced 2025-06-27 23:21:58 +03:00

Allow locking updated tuples in tuple_update() and tuple_delete()

Currently, in read committed transaction isolation mode (default), we have the
following sequence of actions when tuple_update()/tuple_delete() finds
the tuple updated by the concurrent transaction.

1. Attempt to update/delete tuple with tuple_update()/tuple_delete(), which
   returns TM_Updated.
2. Lock tuple with tuple_lock().
3. Re-evaluate plan qual (recheck if we still need to update/delete and
   calculate the new tuple for update).
4. Second attempt to update/delete tuple with tuple_update()/tuple_delete().
   This attempt should be successful, since the tuple was previously locked.

This commit eliminates step 2 by taking the lock during the first
tuple_update()/tuple_delete() call.  The heap table access method saves some
effort by checking the updated tuple once instead of twice.  Future
undo-based table access methods, which will start from the latest row version,
can immediately place a lock there.

Also, this commit makes tuple_update()/tuple_delete() optionally save the old
tuple into the dedicated slot.  That saves efforts on re-fetching tuples in
certain cases.

The code in nodeModifyTable.c is simplified by removing the nested switch/case.

Discussion: https://postgr.es/m/CAPpHfdua-YFw3XTprfutzGp28xXLigFtzNbuFY8yPhqeq6X5kg%40mail.gmail.com
Reviewed-by: Aleksander Alekseev, Pavel Borisov, Vignesh C, Mason Sharp
Reviewed-by: Andres Freund, Chris Travers
This commit is contained in:
Alexander Korotkov
2024-03-26 01:27:56 +02:00
parent c7076ba6ad
commit 87985cc925
9 changed files with 502 additions and 346 deletions

View File

@ -566,6 +566,15 @@ ExecInitInsertProjection(ModifyTableState *mtstate,
table_slot_create(resultRelInfo->ri_RelationDesc,
&estate->es_tupleTable);
/*
* In the ON CONFLICT UPDATE case, we will also need a slot for the old
* tuple to calculate the updated tuple on its base.
*/
if (node->onConflictAction == ONCONFLICT_UPDATE)
resultRelInfo->ri_oldTupleSlot =
table_slot_create(resultRelInfo->ri_RelationDesc,
&estate->es_tupleTable);
/* Build ProjectionInfo if needed (it probably isn't). */
if (need_projection)
{
@ -1154,7 +1163,7 @@ ExecInsert(ModifyTableContext *context,
ExecARUpdateTriggers(estate, resultRelInfo,
NULL, NULL,
NULL,
NULL,
resultRelInfo->ri_oldTupleSlot,
slot,
NULL,
mtstate->mt_transition_capture,
@ -1334,7 +1343,8 @@ ExecDeletePrologue(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
*/
static TM_Result
ExecDeleteAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
ItemPointer tupleid, bool changingPart)
ItemPointer tupleid, bool changingPart, int options,
TupleTableSlot *oldSlot)
{
EState *estate = context->estate;
@ -1342,9 +1352,10 @@ ExecDeleteAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
estate->es_output_cid,
estate->es_snapshot,
estate->es_crosscheck_snapshot,
true /* wait for commit */ ,
options,
&context->tmfd,
changingPart);
changingPart,
oldSlot);
}
/*
@ -1353,10 +1364,15 @@ ExecDeleteAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
* Closing steps of tuple deletion; this invokes AFTER FOR EACH ROW triggers,
* including the UPDATE triggers if the deletion is being done as part of a
* cross-partition tuple move.
*
* The old tuple is already fetched into slot for regular tables. For FDW,
* the old tuple is given as 'oldtuple' and is to be stored in 'slot' when
* needed.
*/
static void
ExecDeleteEpilogue(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
ItemPointer tupleid, HeapTuple oldtuple, bool changingPart)
ItemPointer tupleid, HeapTuple oldtuple,
TupleTableSlot *slot, bool changingPart)
{
ModifyTableState *mtstate = context->mtstate;
EState *estate = context->estate;
@ -1374,8 +1390,8 @@ ExecDeleteEpilogue(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
{
ExecARUpdateTriggers(estate, resultRelInfo,
NULL, NULL,
tupleid, oldtuple,
NULL, NULL, mtstate->mt_transition_capture,
oldtuple,
slot, NULL, NULL, mtstate->mt_transition_capture,
false);
/*
@ -1386,10 +1402,30 @@ ExecDeleteEpilogue(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
}
/* AFTER ROW DELETE Triggers */
ExecARDeleteTriggers(estate, resultRelInfo, tupleid, oldtuple,
ExecARDeleteTriggers(estate, resultRelInfo, oldtuple, slot,
ar_delete_trig_tcs, changingPart);
}
/*
* Initializes the tuple slot in a ResultRelInfo for DELETE action.
*
* We mark 'projectNewInfoValid' even though the projections themselves
* are not initialized here.
*/
static void
ExecInitDeleteTupleSlot(ModifyTableState *mtstate,
ResultRelInfo *resultRelInfo)
{
EState *estate = mtstate->ps.state;
Assert(!resultRelInfo->ri_projectNewInfoValid);
resultRelInfo->ri_oldTupleSlot =
table_slot_create(resultRelInfo->ri_RelationDesc,
&estate->es_tupleTable);
resultRelInfo->ri_projectNewInfoValid = true;
}
/* ----------------------------------------------------------------
* ExecDelete
*
@ -1409,7 +1445,8 @@ ExecDeleteEpilogue(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
* part of an UPDATE of partition-key, then the slot returned by
* EvalPlanQual() is passed back using output parameter epqreturnslot.
*
* Returns RETURNING result if any, otherwise NULL.
* Returns RETURNING result if any, otherwise NULL. The deleted tuple
* to be stored into oldslot independently that.
* ----------------------------------------------------------------
*/
static TupleTableSlot *
@ -1417,6 +1454,7 @@ ExecDelete(ModifyTableContext *context,
ResultRelInfo *resultRelInfo,
ItemPointer tupleid,
HeapTuple oldtuple,
TupleTableSlot *oldslot,
bool processReturning,
bool changingPart,
bool canSetTag,
@ -1480,6 +1518,15 @@ ExecDelete(ModifyTableContext *context,
}
else
{
int options = TABLE_MODIFY_WAIT | TABLE_MODIFY_FETCH_OLD_TUPLE;
/*
* Specify that we need to lock and fetch the last tuple version for
* EPQ on appropriate transaction isolation levels.
*/
if (!IsolationUsesXactSnapshot())
options |= TABLE_MODIFY_LOCK_UPDATED;
/*
* delete the tuple
*
@ -1490,7 +1537,8 @@ ExecDelete(ModifyTableContext *context,
* transaction-snapshot mode transactions.
*/
ldelete:
result = ExecDeleteAct(context, resultRelInfo, tupleid, changingPart);
result = ExecDeleteAct(context, resultRelInfo, tupleid, changingPart,
options, oldslot);
if (tmresult)
*tmresult = result;
@ -1537,7 +1585,6 @@ ldelete:
case TM_Updated:
{
TupleTableSlot *inputslot;
TupleTableSlot *epqslot;
if (IsolationUsesXactSnapshot())
@ -1546,87 +1593,29 @@ ldelete:
errmsg("could not serialize access due to concurrent update")));
/*
* Already know that we're going to need to do EPQ, so
* fetch tuple directly into the right slot.
* We need to do EPQ. The latest tuple is already found
* and locked as a result of TABLE_MODIFY_LOCK_UPDATED.
*/
EvalPlanQualBegin(context->epqstate);
inputslot = EvalPlanQualSlot(context->epqstate, resultRelationDesc,
resultRelInfo->ri_RangeTableIndex);
Assert(context->tmfd.traversed);
epqslot = EvalPlanQual(context->epqstate,
resultRelationDesc,
resultRelInfo->ri_RangeTableIndex,
oldslot);
if (TupIsNull(epqslot))
/* Tuple not passing quals anymore, exiting... */
return NULL;
result = table_tuple_lock(resultRelationDesc, tupleid,
estate->es_snapshot,
inputslot, estate->es_output_cid,
LockTupleExclusive, LockWaitBlock,
TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
&context->tmfd);
switch (result)
/*
* If requested, skip delete and pass back the updated
* row.
*/
if (epqreturnslot)
{
case TM_Ok:
Assert(context->tmfd.traversed);
epqslot = EvalPlanQual(context->epqstate,
resultRelationDesc,
resultRelInfo->ri_RangeTableIndex,
inputslot);
if (TupIsNull(epqslot))
/* Tuple not passing quals anymore, exiting... */
return NULL;
/*
* If requested, skip delete and pass back the
* updated row.
*/
if (epqreturnslot)
{
*epqreturnslot = epqslot;
return NULL;
}
else
goto ldelete;
case TM_SelfModified:
/*
* This can be reached when following an update
* chain from a tuple updated by another session,
* reaching a tuple that was already updated in
* this transaction. If previously updated by this
* command, ignore the delete, otherwise error
* out.
*
* See also TM_SelfModified response to
* table_tuple_delete() above.
*/
if (context->tmfd.cmax != estate->es_output_cid)
ereport(ERROR,
(errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
errmsg("tuple to be deleted was already modified by an operation triggered by the current command"),
errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
return NULL;
case TM_Deleted:
/* tuple already deleted; nothing to do */
return NULL;
default:
/*
* TM_Invisible should be impossible because we're
* waiting for updated row versions, and would
* already have errored out if the first version
* is invisible.
*
* TM_Updated should be impossible, because we're
* locking the latest version via
* TUPLE_LOCK_FLAG_FIND_LAST_VERSION.
*/
elog(ERROR, "unexpected table_tuple_lock status: %u",
result);
return NULL;
*epqreturnslot = epqslot;
return NULL;
}
Assert(false);
break;
else
goto ldelete;
}
case TM_Deleted:
@ -1660,7 +1649,8 @@ ldelete:
if (tupleDeleted)
*tupleDeleted = true;
ExecDeleteEpilogue(context, resultRelInfo, tupleid, oldtuple, changingPart);
ExecDeleteEpilogue(context, resultRelInfo, tupleid, oldtuple,
oldslot, changingPart);
/* Process RETURNING if present and if requested */
if (processReturning && resultRelInfo->ri_projectReturning)
@ -1678,17 +1668,13 @@ ldelete:
}
else
{
/* Copy old tuple to the returning slot */
slot = ExecGetReturningSlot(estate, resultRelInfo);
if (oldtuple != NULL)
{
ExecForceStoreHeapTuple(oldtuple, slot, false);
}
else
{
if (!table_tuple_fetch_row_version(resultRelationDesc, tupleid,
SnapshotAny, slot))
elog(ERROR, "failed to fetch deleted tuple for DELETE RETURNING");
}
ExecCopySlot(slot, oldslot);
Assert(!TupIsNull(slot));
}
rslot = ExecProcessReturning(resultRelInfo, slot, context->planSlot);
@ -1788,12 +1774,19 @@ ExecCrossPartitionUpdate(ModifyTableContext *context,
MemoryContextSwitchTo(oldcxt);
}
/*
* Make sure ri_oldTupleSlot is initialized. The old tuple is to be saved
* there by ExecDelete() to save effort on further re-fetching.
*/
if (unlikely(!resultRelInfo->ri_projectNewInfoValid))
ExecInitUpdateProjection(mtstate, resultRelInfo);
/*
* Row movement, part 1. Delete the tuple, but skip RETURNING processing.
* We want to return rows from INSERT.
*/
ExecDelete(context, resultRelInfo,
tupleid, oldtuple,
tupleid, oldtuple, resultRelInfo->ri_oldTupleSlot,
false, /* processReturning */
true, /* changingPart */
false, /* canSetTag */
@ -1834,21 +1827,13 @@ ExecCrossPartitionUpdate(ModifyTableContext *context,
return true;
else
{
/* Fetch the most recent version of old tuple. */
TupleTableSlot *oldSlot;
/* ... but first, make sure ri_oldTupleSlot is initialized. */
if (unlikely(!resultRelInfo->ri_projectNewInfoValid))
ExecInitUpdateProjection(mtstate, resultRelInfo);
oldSlot = resultRelInfo->ri_oldTupleSlot;
if (!table_tuple_fetch_row_version(resultRelInfo->ri_RelationDesc,
tupleid,
SnapshotAny,
oldSlot))
elog(ERROR, "failed to fetch tuple being updated");
/* and project the new tuple to retry the UPDATE with */
/*
* ExecDelete already fetches the most recent version of old tuple
* to resultRelInfo->ri_oldTupleSlot. So, just project the new
* tuple to retry the UPDATE with.
*/
*retry_slot = ExecGetUpdateNewTuple(resultRelInfo, epqslot,
oldSlot);
resultRelInfo->ri_oldTupleSlot);
return false;
}
}
@ -1967,7 +1952,8 @@ ExecUpdatePrepareSlot(ResultRelInfo *resultRelInfo,
static TM_Result
ExecUpdateAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
ItemPointer tupleid, HeapTuple oldtuple, TupleTableSlot *slot,
bool canSetTag, UpdateContext *updateCxt)
bool canSetTag, int options, TupleTableSlot *oldSlot,
UpdateContext *updateCxt)
{
EState *estate = context->estate;
Relation resultRelationDesc = resultRelInfo->ri_RelationDesc;
@ -2059,7 +2045,8 @@ lreplace:
ExecCrossPartitionUpdateForeignKey(context,
resultRelInfo,
insert_destrel,
tupleid, slot,
tupleid,
resultRelInfo->ri_oldTupleSlot,
inserted_tuple);
return TM_Ok;
@ -2102,9 +2089,10 @@ lreplace:
estate->es_output_cid,
estate->es_snapshot,
estate->es_crosscheck_snapshot,
true /* wait for commit */ ,
options /* wait for commit */ ,
&context->tmfd, &updateCxt->lockmode,
&updateCxt->updateIndexes);
&updateCxt->updateIndexes,
oldSlot);
return result;
}
@ -2118,7 +2106,8 @@ lreplace:
static void
ExecUpdateEpilogue(ModifyTableContext *context, UpdateContext *updateCxt,
ResultRelInfo *resultRelInfo, ItemPointer tupleid,
HeapTuple oldtuple, TupleTableSlot *slot)
HeapTuple oldtuple, TupleTableSlot *slot,
TupleTableSlot *oldslot)
{
ModifyTableState *mtstate = context->mtstate;
List *recheckIndexes = NIL;
@ -2134,7 +2123,7 @@ ExecUpdateEpilogue(ModifyTableContext *context, UpdateContext *updateCxt,
/* AFTER ROW UPDATE Triggers */
ExecARUpdateTriggers(context->estate, resultRelInfo,
NULL, NULL,
tupleid, oldtuple, slot,
oldtuple, oldslot, slot,
recheckIndexes,
mtstate->operation == CMD_INSERT ?
mtstate->mt_oc_transition_capture :
@ -2223,7 +2212,7 @@ ExecCrossPartitionUpdateForeignKey(ModifyTableContext *context,
/* Perform the root table's triggers. */
ExecARUpdateTriggers(context->estate,
rootRelInfo, sourcePartInfo, destPartInfo,
tupleid, NULL, newslot, NIL, NULL, true);
NULL, oldslot, newslot, NIL, NULL, true);
}
/* ----------------------------------------------------------------
@ -2246,6 +2235,7 @@ ExecCrossPartitionUpdateForeignKey(ModifyTableContext *context,
* no relevant triggers.
*
* slot contains the new tuple value to be stored.
* oldslot is the slot to store the old tuple.
* planSlot is the output of the ModifyTable's subplan; we use it
* to access values from other input tables (for RETURNING),
* row-ID junk columns, etc.
@ -2256,7 +2246,7 @@ ExecCrossPartitionUpdateForeignKey(ModifyTableContext *context,
static TupleTableSlot *
ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
ItemPointer tupleid, HeapTuple oldtuple, TupleTableSlot *slot,
bool canSetTag)
TupleTableSlot *oldslot, bool canSetTag, bool locked)
{
EState *estate = context->estate;
Relation resultRelationDesc = resultRelInfo->ri_RelationDesc;
@ -2309,6 +2299,16 @@ ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
}
else
{
int options = TABLE_MODIFY_WAIT | TABLE_MODIFY_FETCH_OLD_TUPLE;
/*
* Specify that we need to lock and fetch the last tuple version for
* EPQ on appropriate transaction isolation levels if the tuple isn't
* locked already.
*/
if (!locked && !IsolationUsesXactSnapshot())
options |= TABLE_MODIFY_LOCK_UPDATED;
/*
* If we generate a new candidate tuple after EvalPlanQual testing, we
* must loop back here to try again. (We don't need to redo triggers,
@ -2318,7 +2318,7 @@ ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
*/
redo_act:
result = ExecUpdateAct(context, resultRelInfo, tupleid, oldtuple, slot,
canSetTag, &updateCxt);
canSetTag, options, oldslot, &updateCxt);
/*
* If ExecUpdateAct reports that a cross-partition update was done,
@ -2369,88 +2369,32 @@ redo_act:
case TM_Updated:
{
TupleTableSlot *inputslot;
TupleTableSlot *epqslot;
TupleTableSlot *oldSlot;
if (IsolationUsesXactSnapshot())
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("could not serialize access due to concurrent update")));
/* Shouldn't get there if the tuple was previously locked */
Assert(!locked);
/*
* Already know that we're going to need to do EPQ, so
* fetch tuple directly into the right slot.
* We need to do EPQ. The latest tuple is already found
* and locked as a result of TABLE_MODIFY_LOCK_UPDATED.
*/
inputslot = EvalPlanQualSlot(context->epqstate, resultRelationDesc,
resultRelInfo->ri_RangeTableIndex);
result = table_tuple_lock(resultRelationDesc, tupleid,
estate->es_snapshot,
inputslot, estate->es_output_cid,
updateCxt.lockmode, LockWaitBlock,
TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
&context->tmfd);
switch (result)
{
case TM_Ok:
Assert(context->tmfd.traversed);
epqslot = EvalPlanQual(context->epqstate,
resultRelationDesc,
resultRelInfo->ri_RangeTableIndex,
inputslot);
if (TupIsNull(epqslot))
/* Tuple not passing quals anymore, exiting... */
return NULL;
/* Make sure ri_oldTupleSlot is initialized. */
if (unlikely(!resultRelInfo->ri_projectNewInfoValid))
ExecInitUpdateProjection(context->mtstate,
resultRelInfo);
/* Fetch the most recent version of old tuple. */
oldSlot = resultRelInfo->ri_oldTupleSlot;
if (!table_tuple_fetch_row_version(resultRelationDesc,
tupleid,
SnapshotAny,
oldSlot))
elog(ERROR, "failed to fetch tuple being updated");
slot = ExecGetUpdateNewTuple(resultRelInfo,
epqslot, oldSlot);
goto redo_act;
case TM_Deleted:
/* tuple already deleted; nothing to do */
return NULL;
case TM_SelfModified:
/*
* This can be reached when following an update
* chain from a tuple updated by another session,
* reaching a tuple that was already updated in
* this transaction. If previously modified by
* this command, ignore the redundant update,
* otherwise error out.
*
* See also TM_SelfModified response to
* table_tuple_update() above.
*/
if (context->tmfd.cmax != estate->es_output_cid)
ereport(ERROR,
(errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
errmsg("tuple to be updated was already modified by an operation triggered by the current command"),
errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
return NULL;
default:
/* see table_tuple_lock call in ExecDelete() */
elog(ERROR, "unexpected table_tuple_lock status: %u",
result);
return NULL;
}
Assert(context->tmfd.traversed);
epqslot = EvalPlanQual(context->epqstate,
resultRelationDesc,
resultRelInfo->ri_RangeTableIndex,
oldslot);
if (TupIsNull(epqslot))
/* Tuple not passing quals anymore, exiting... */
return NULL;
slot = ExecGetUpdateNewTuple(resultRelInfo,
epqslot,
oldslot);
goto redo_act;
}
break;
@ -2474,7 +2418,7 @@ redo_act:
(estate->es_processed)++;
ExecUpdateEpilogue(context, &updateCxt, resultRelInfo, tupleid, oldtuple,
slot);
slot, oldslot);
/* Process RETURNING if present */
if (resultRelInfo->ri_projectReturning)
@ -2692,7 +2636,8 @@ ExecOnConflictUpdate(ModifyTableContext *context,
*returning = ExecUpdate(context, resultRelInfo,
conflictTid, NULL,
resultRelInfo->ri_onConflict->oc_ProjSlot,
canSetTag);
existing,
canSetTag, true);
/*
* Clear out existing tuple, as there might not be another conflict among
@ -2934,6 +2879,7 @@ lmerge_matched:
{
result = ExecUpdateAct(context, resultRelInfo, tupleid,
NULL, newslot, canSetTag,
TABLE_MODIFY_WAIT, NULL,
&updateCxt);
/*
@ -2956,7 +2902,8 @@ lmerge_matched:
if (result == TM_Ok)
{
ExecUpdateEpilogue(context, &updateCxt, resultRelInfo,
tupleid, NULL, newslot);
tupleid, NULL, newslot,
resultRelInfo->ri_oldTupleSlot);
mtstate->mt_merge_updated += 1;
}
break;
@ -2987,12 +2934,12 @@ lmerge_matched:
}
else
result = ExecDeleteAct(context, resultRelInfo, tupleid,
false);
false, TABLE_MODIFY_WAIT, NULL);
if (result == TM_Ok)
{
ExecDeleteEpilogue(context, resultRelInfo, tupleid, NULL,
false);
resultRelInfo->ri_oldTupleSlot, false);
mtstate->mt_merge_deleted += 1;
}
break;
@ -4006,12 +3953,18 @@ ExecModifyTable(PlanState *pstate)
/* Now apply the update. */
slot = ExecUpdate(&context, resultRelInfo, tupleid, oldtuple,
slot, node->canSetTag);
slot, resultRelInfo->ri_oldTupleSlot,
node->canSetTag, false);
break;
case CMD_DELETE:
/* Initialize slot for DELETE to fetch the old tuple */
if (unlikely(!resultRelInfo->ri_projectNewInfoValid))
ExecInitDeleteTupleSlot(node, resultRelInfo);
slot = ExecDelete(&context, resultRelInfo, tupleid, oldtuple,
true, false, node->canSetTag, NULL, NULL, NULL);
resultRelInfo->ri_oldTupleSlot, true, false,
node->canSetTag, NULL, NULL, NULL);
break;
case CMD_MERGE: