1
0
mirror of https://github.com/postgres/postgres.git synced 2025-06-27 23:21:58 +03:00

Allow locking updated tuples in tuple_update() and tuple_delete()

Currently, in read committed transaction isolation mode (default), we have the
following sequence of actions when tuple_update()/tuple_delete() finds
the tuple updated by concurrent transaction.

1. Attempt to update/delete tuple with tuple_update()/tuple_delete(), which
   returns TM_Updated.
2. Lock tuple with tuple_lock().
3. Re-evaluate plan qual (recheck if we still need to update/delete and
   calculate the new tuple for update).
4. Second attempt to update/delete tuple with tuple_update()/tuple_delete().
   This attempt should be successful, since the tuple was previously locked.

This patch eliminates step 2 by taking the lock during first
tuple_update()/tuple_delete() call.  Heap table access method saves some
efforts by checking the updated tuple once instead of twice.  Future
undo-based table access methods, which will start from the latest row version,
can immediately place a lock there.

The code in nodeModifyTable.c is simplified by removing the nested switch/case.

Discussion: https://postgr.es/m/CAPpHfdua-YFw3XTprfutzGp28xXLigFtzNbuFY8yPhqeq6X5kg%40mail.gmail.com
Reviewed-by: Aleksander Alekseev, Pavel Borisov, Vignesh C, Mason Sharp
Reviewed-by: Andres Freund, Chris Travers
This commit is contained in:
Alexander Korotkov
2023-03-23 00:13:37 +03:00
parent 764da7710b
commit 11470f544e
6 changed files with 285 additions and 186 deletions

View File

@ -1324,26 +1324,62 @@ ExecDeletePrologue(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
return true;
}
/*
* The implementation for LazyTupleTableSlot wrapper for EPQ slot to be passed
* to table_tuple_update()/table_tuple_delete().
*/
typedef struct
{
EPQState *epqstate;
ResultRelInfo *resultRelInfo;
} GetEPQSlotArg;
static TupleTableSlot *
GetEPQSlot(void *arg)
{
GetEPQSlotArg *slotArg = (GetEPQSlotArg *) arg;
return EvalPlanQualSlot(slotArg->epqstate,
slotArg->resultRelInfo->ri_RelationDesc,
slotArg->resultRelInfo->ri_RangeTableIndex);
}
/*
* ExecDeleteAct -- subroutine for ExecDelete
*
* Actually delete the tuple from a plain table.
*
* If the 'lockUpdated' flag is set and the target tuple is updated, then
* the latest version gets locked and fetched into the EPQ slot.
*
* Caller is in charge of doing EvalPlanQual as necessary
*/
static TM_Result
ExecDeleteAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
ItemPointer tupleid, bool changingPart)
ItemPointer tupleid, bool changingPart, bool lockUpdated)
{
EState *estate = context->estate;
GetEPQSlotArg slotArg = {context->epqstate, resultRelInfo};
LazyTupleTableSlot lazyEPQSlot,
*lazyEPQSlotPtr;
if (lockUpdated)
{
MAKE_LAZY_TTS(&lazyEPQSlot, GetEPQSlot, &slotArg);
lazyEPQSlotPtr = &lazyEPQSlot;
}
else
{
lazyEPQSlotPtr = NULL;
}
return table_tuple_delete(resultRelInfo->ri_RelationDesc, tupleid,
estate->es_output_cid,
estate->es_snapshot,
estate->es_crosscheck_snapshot,
true /* wait for commit */ ,
&context->tmfd,
changingPart);
changingPart,
lazyEPQSlotPtr);
}
/*
@ -1488,7 +1524,8 @@ ExecDelete(ModifyTableContext *context,
* transaction-snapshot mode transactions.
*/
ldelete:
result = ExecDeleteAct(context, resultRelInfo, tupleid, changingPart);
result = ExecDeleteAct(context, resultRelInfo, tupleid, changingPart,
!IsolationUsesXactSnapshot());
switch (result)
{
@ -1541,103 +1578,49 @@ ldelete:
errmsg("could not serialize access due to concurrent update")));
/*
* Already know that we're going to need to do EPQ, so
* fetch tuple directly into the right slot.
* ExecDeleteAct() has already locked the old tuple for
* us. Now we need to copy it to the right slot.
*/
EvalPlanQualBegin(context->epqstate);
inputslot = EvalPlanQualSlot(context->epqstate, resultRelationDesc,
resultRelInfo->ri_RangeTableIndex);
result = table_tuple_lock(resultRelationDesc, tupleid,
estate->es_snapshot,
inputslot, estate->es_output_cid,
LockTupleExclusive, LockWaitBlock,
TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
&context->tmfd);
switch (result)
/*
* Save locked table for further processing for RETURNING
* clause.
*/
if (processReturning &&
resultRelInfo->ri_projectReturning &&
!resultRelInfo->ri_FdwRoutine)
{
case TM_Ok:
Assert(context->tmfd.traversed);
TupleTableSlot *returningSlot;
/*
* Save locked tuple for further processing of
* RETURNING clause.
*/
if (processReturning &&
resultRelInfo->ri_projectReturning &&
!resultRelInfo->ri_FdwRoutine)
{
TupleTableSlot *returningSlot;
returningSlot = ExecGetReturningSlot(estate, resultRelInfo);
ExecCopySlot(returningSlot, inputslot);
ExecMaterializeSlot(returningSlot);
}
epqslot = EvalPlanQual(context->epqstate,
resultRelationDesc,
resultRelInfo->ri_RangeTableIndex,
inputslot);
if (TupIsNull(epqslot))
/* Tuple not passing quals anymore, exiting... */
return NULL;
/*
* If requested, skip delete and pass back the
* updated row.
*/
if (epqreturnslot)
{
*epqreturnslot = epqslot;
return NULL;
}
else
goto ldelete;
case TM_SelfModified:
/*
* This can be reached when following an update
* chain from a tuple updated by another session,
* reaching a tuple that was already updated in
* this transaction. If previously updated by this
* command, ignore the delete, otherwise error
* out.
*
* See also TM_SelfModified response to
* table_tuple_delete() above.
*/
if (context->tmfd.cmax != estate->es_output_cid)
ereport(ERROR,
(errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
errmsg("tuple to be deleted was already modified by an operation triggered by the current command"),
errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
return NULL;
case TM_Deleted:
/* tuple already deleted; nothing to do */
return NULL;
default:
/*
* TM_Invisible should be impossible because we're
* waiting for updated row versions, and would
* already have errored out if the first version
* is invisible.
*
* TM_Updated should be impossible, because we're
* locking the latest version via
* TUPLE_LOCK_FLAG_FIND_LAST_VERSION.
*/
elog(ERROR, "unexpected table_tuple_lock status: %u",
result);
return NULL;
returningSlot = ExecGetReturningSlot(estate,
resultRelInfo);
ExecCopySlot(returningSlot, inputslot);
ExecMaterializeSlot(returningSlot);
}
Assert(false);
break;
Assert(context->tmfd.traversed);
epqslot = EvalPlanQual(context->epqstate,
resultRelationDesc,
resultRelInfo->ri_RangeTableIndex,
inputslot);
if (TupIsNull(epqslot))
/* Tuple not passing quals anymore, exiting... */
return NULL;
/*
* If requested, skip delete and pass back the updated
* row.
*/
if (epqreturnslot)
{
*epqreturnslot = epqslot;
return NULL;
}
else
goto ldelete;
}
case TM_Deleted:
@ -1982,12 +1965,15 @@ ExecUpdatePrepareSlot(ResultRelInfo *resultRelInfo,
static TM_Result
ExecUpdateAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
ItemPointer tupleid, HeapTuple oldtuple, TupleTableSlot *slot,
bool canSetTag, UpdateContext *updateCxt)
bool canSetTag, bool lockUpdated, UpdateContext *updateCxt)
{
EState *estate = context->estate;
Relation resultRelationDesc = resultRelInfo->ri_RelationDesc;
bool partition_constraint_failed;
TM_Result result;
GetEPQSlotArg slotArg = {context->epqstate, resultRelInfo};
LazyTupleTableSlot lazyEPQSlot,
*lazyEPQSlotPtr;
updateCxt->crossPartUpdate = false;
@ -2113,13 +2099,23 @@ lreplace:
* for referential integrity updates in transaction-snapshot mode
* transactions.
*/
if (lockUpdated)
{
MAKE_LAZY_TTS(&lazyEPQSlot, GetEPQSlot, &slotArg);
lazyEPQSlotPtr = &lazyEPQSlot;
}
else
{
lazyEPQSlotPtr = NULL;
}
result = table_tuple_update(resultRelationDesc, tupleid, slot,
estate->es_output_cid,
estate->es_snapshot,
estate->es_crosscheck_snapshot,
true /* wait for commit */ ,
&context->tmfd, &updateCxt->lockmode,
&updateCxt->updateIndexes);
&updateCxt->updateIndexes,
lazyEPQSlotPtr);
if (result == TM_Ok)
updateCxt->updated = true;
@ -2273,7 +2269,7 @@ ExecCrossPartitionUpdateForeignKey(ModifyTableContext *context,
static TupleTableSlot *
ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
ItemPointer tupleid, HeapTuple oldtuple, TupleTableSlot *slot,
bool canSetTag)
bool canSetTag, bool locked)
{
EState *estate = context->estate;
Relation resultRelationDesc = resultRelInfo->ri_RelationDesc;
@ -2335,7 +2331,8 @@ ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
*/
redo_act:
result = ExecUpdateAct(context, resultRelInfo, tupleid, oldtuple, slot,
canSetTag, &updateCxt);
canSetTag, !IsolationUsesXactSnapshot(),
&updateCxt);
/*
* If ExecUpdateAct reports that a cross-partition update was done,
@ -2394,81 +2391,39 @@ redo_act:
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("could not serialize access due to concurrent update")));
Assert(!locked);
/*
* Already know that we're going to need to do EPQ, so
* fetch tuple directly into the right slot.
* ExecUpdateAct() has already locked the old tuple for
* us. Now we need to copy it to the right slot.
*/
inputslot = EvalPlanQualSlot(context->epqstate, resultRelationDesc,
resultRelInfo->ri_RangeTableIndex);
result = table_tuple_lock(resultRelationDesc, tupleid,
estate->es_snapshot,
inputslot, estate->es_output_cid,
updateCxt.lockmode, LockWaitBlock,
TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
&context->tmfd);
/* Make sure ri_oldTupleSlot is initialized. */
if (unlikely(!resultRelInfo->ri_projectNewInfoValid))
ExecInitUpdateProjection(context->mtstate,
resultRelInfo);
switch (result)
{
case TM_Ok:
Assert(context->tmfd.traversed);
/*
* Save the locked tuple for further calculation of the
* new tuple.
*/
oldSlot = resultRelInfo->ri_oldTupleSlot;
ExecCopySlot(oldSlot, inputslot);
ExecMaterializeSlot(oldSlot);
Assert(context->tmfd.traversed);
/* Make sure ri_oldTupleSlot is initialized. */
if (unlikely(!resultRelInfo->ri_projectNewInfoValid))
ExecInitUpdateProjection(context->mtstate,
resultRelInfo);
/*
* Save the locked tuple for further calculation
* of the new tuple.
*/
oldSlot = resultRelInfo->ri_oldTupleSlot;
ExecCopySlot(oldSlot, inputslot);
ExecMaterializeSlot(oldSlot);
epqslot = EvalPlanQual(context->epqstate,
resultRelationDesc,
resultRelInfo->ri_RangeTableIndex,
inputslot);
if (TupIsNull(epqslot))
/* Tuple not passing quals anymore, exiting... */
return NULL;
slot = ExecGetUpdateNewTuple(resultRelInfo,
epqslot, oldSlot);
goto redo_act;
case TM_Deleted:
/* tuple already deleted; nothing to do */
return NULL;
case TM_SelfModified:
/*
* This can be reached when following an update
* chain from a tuple updated by another session,
* reaching a tuple that was already updated in
* this transaction. If previously modified by
* this command, ignore the redundant update,
* otherwise error out.
*
* See also TM_SelfModified response to
* table_tuple_update() above.
*/
if (context->tmfd.cmax != estate->es_output_cid)
ereport(ERROR,
(errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
errmsg("tuple to be updated was already modified by an operation triggered by the current command"),
errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
return NULL;
default:
/* see table_tuple_lock call in ExecDelete() */
elog(ERROR, "unexpected table_tuple_lock status: %u",
result);
return NULL;
}
epqslot = EvalPlanQual(context->epqstate,
resultRelationDesc,
resultRelInfo->ri_RangeTableIndex,
inputslot);
if (TupIsNull(epqslot))
/* Tuple not passing quals anymore, exiting... */
return NULL;
slot = ExecGetUpdateNewTuple(resultRelInfo,
epqslot, oldSlot);
goto redo_act;
}
break;
@ -2710,7 +2665,7 @@ ExecOnConflictUpdate(ModifyTableContext *context,
*returning = ExecUpdate(context, resultRelInfo,
conflictTid, NULL,
resultRelInfo->ri_onConflict->oc_ProjSlot,
canSetTag);
canSetTag, true);
/*
* Clear out existing tuple, as there might not be another conflict among
@ -2913,7 +2868,7 @@ lmerge_matched:
break; /* concurrent update/delete */
}
result = ExecUpdateAct(context, resultRelInfo, tupleid, NULL,
newslot, false, &updateCxt);
newslot, false, false, &updateCxt);
if (result == TM_Ok && updateCxt.updated)
{
ExecUpdateEpilogue(context, &updateCxt, resultRelInfo,
@ -2931,7 +2886,8 @@ lmerge_matched:
return true; /* "do nothing" */
break; /* concurrent update/delete */
}
result = ExecDeleteAct(context, resultRelInfo, tupleid, false);
result = ExecDeleteAct(context, resultRelInfo, tupleid,
false, false);
if (result == TM_Ok)
{
ExecDeleteEpilogue(context, resultRelInfo, tupleid, NULL,
@ -3837,7 +3793,7 @@ ExecModifyTable(PlanState *pstate)
/* Now apply the update. */
slot = ExecUpdate(&context, resultRelInfo, tupleid, oldtuple,
slot, node->canSetTag);
slot, node->canSetTag, false);
break;
case CMD_DELETE: