mirror of
				https://github.com/postgres/postgres.git
				synced 2025-11-03 09:13:20 +03:00 
			
		
		
		
	Fix concurrent update issue with MERGE.
When executing a MERGE UPDATE action, if there is more than one concurrent update of the target row, the lock-and-retry code would sometimes incorrectly identify the latest version of the target tuple, leading to incorrect results. This was caused by using the ctid field from the TM_FailureData returned by table_tuple_lock() in a case where the result was TM_Ok, which is unsafe because the TM_FailureData struct is not guaranteed to be fully populated in that case. Instead, it should use the tupleid passed to (and updated by) table_tuple_lock(). To reduce the chances of similar errors in the future, improve the commentary for table_tuple_lock() and TM_FailureData to make it clearer that table_tuple_lock() updates the tid passed to it, and most fields of TM_FailureData should not be relied on in non-failure cases. An exception to this is the "traversed" field, which is set in both success and failure cases. Reported-by: Dmitry <dsy.075@yandex.ru> Author: Yugo Nagata <nagata@sraoss.co.jp> Reviewed-by: Dean Rasheed <dean.a.rasheed@gmail.com> Reviewed-by: Chao Li <li.evan.chao@gmail.com> Discussion: https://postgr.es/m/1570d30e-2b95-4239-b9c3-f7bf2f2f8556@yandex.ru Backpatch-through: 15
This commit is contained in:
		@@ -3226,7 +3226,7 @@ lmerge_matched:
 | 
			
		||||
							 * the tuple moved, and setting our current
 | 
			
		||||
							 * resultRelInfo to that.
 | 
			
		||||
							 */
 | 
			
		||||
							if (ItemPointerIndicatesMovedPartitions(&context->tmfd.ctid))
 | 
			
		||||
							if (ItemPointerIndicatesMovedPartitions(tupleid))
 | 
			
		||||
								ereport(ERROR,
 | 
			
		||||
										(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
 | 
			
		||||
										 errmsg("tuple to be merged was already moved to another partition due to concurrent update")));
 | 
			
		||||
@@ -3274,12 +3274,13 @@ lmerge_matched:
 | 
			
		||||
									if (ItemPointerIsValid(&lockedtid))
 | 
			
		||||
										UnlockTuple(resultRelInfo->ri_RelationDesc, &lockedtid,
 | 
			
		||||
													InplaceUpdateTupleLock);
 | 
			
		||||
									LockTuple(resultRelInfo->ri_RelationDesc, &context->tmfd.ctid,
 | 
			
		||||
									LockTuple(resultRelInfo->ri_RelationDesc, tupleid,
 | 
			
		||||
											  InplaceUpdateTupleLock);
 | 
			
		||||
									lockedtid = context->tmfd.ctid;
 | 
			
		||||
									lockedtid = *tupleid;
 | 
			
		||||
								}
 | 
			
		||||
 | 
			
		||||
								if (!table_tuple_fetch_row_version(resultRelationDesc,
 | 
			
		||||
																   &context->tmfd.ctid,
 | 
			
		||||
																   tupleid,
 | 
			
		||||
																   SnapshotAny,
 | 
			
		||||
																   resultRelInfo->ri_oldTupleSlot))
 | 
			
		||||
									elog(ERROR, "failed to fetch the target tuple");
 | 
			
		||||
 
 | 
			
		||||
@@ -129,7 +129,9 @@ typedef enum TU_UpdateIndexes
 | 
			
		||||
/*
 | 
			
		||||
 * When table_tuple_update, table_tuple_delete, or table_tuple_lock fail
 | 
			
		||||
 * because the target tuple is already outdated, they fill in this struct to
 | 
			
		||||
 * provide information to the caller about what happened.
 | 
			
		||||
 * provide information to the caller about what happened. When those functions
 | 
			
		||||
 * succeed, the contents of this struct should not be relied upon, except for
 | 
			
		||||
 * `traversed`, which may be set in both success and failure cases.
 | 
			
		||||
 *
 | 
			
		||||
 * ctid is the target's ctid link: it is the same as the target's TID if the
 | 
			
		||||
 * target was deleted, or the location of the replacement tuple if the target
 | 
			
		||||
@@ -145,6 +147,9 @@ typedef enum TU_UpdateIndexes
 | 
			
		||||
 * tuple); otherwise cmax is zero.  (We make this restriction because
 | 
			
		||||
 * HeapTupleHeaderGetCmax doesn't work for tuples outdated in other
 | 
			
		||||
 * transactions.)
 | 
			
		||||
 *
 | 
			
		||||
 * traversed indicates if an update chain was followed in order to try to lock
 | 
			
		||||
 * the target tuple.  (This may be set in both success and failure cases.)
 | 
			
		||||
 */
 | 
			
		||||
typedef struct TM_FailureData
 | 
			
		||||
{
 | 
			
		||||
@@ -1549,7 +1554,7 @@ table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot,
 | 
			
		||||
 *
 | 
			
		||||
 * Input parameters:
 | 
			
		||||
 *	relation: relation containing tuple (caller must hold suitable lock)
 | 
			
		||||
 *	tid: TID of tuple to lock
 | 
			
		||||
 *	tid: TID of tuple to lock (updated if an update chain was followed)
 | 
			
		||||
 *	snapshot: snapshot to use for visibility determinations
 | 
			
		||||
 *	cid: current command ID (used for visibility test, and stored into
 | 
			
		||||
 *		tuple's cmax if lock is successful)
 | 
			
		||||
@@ -1574,8 +1579,10 @@ table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot,
 | 
			
		||||
 *	TM_WouldBlock: lock couldn't be acquired and wait_policy is skip
 | 
			
		||||
 *
 | 
			
		||||
 * In the failure cases other than TM_Invisible and TM_Deleted, the routine
 | 
			
		||||
 * fills *tmfd with the tuple's t_ctid, t_xmax, and, if possible, t_cmax.  See
 | 
			
		||||
 * comments for struct TM_FailureData for additional info.
 | 
			
		||||
 * fills *tmfd with the tuple's t_ctid, t_xmax, and, if possible, t_cmax.
 | 
			
		||||
 * Additionally, in both success and failure cases, tmfd->traversed is set if
 | 
			
		||||
 * an update chain was followed.  See comments for struct TM_FailureData for
 | 
			
		||||
 * additional info.
 | 
			
		||||
 */
 | 
			
		||||
static inline TM_Result
 | 
			
		||||
table_tuple_lock(Relation rel, ItemPointer tid, Snapshot snapshot,
 | 
			
		||||
 
 | 
			
		||||
@@ -271,6 +271,151 @@ key|balance|status|val
 | 
			
		||||
 | 
			
		||||
step c1: COMMIT;
 | 
			
		||||
 | 
			
		||||
starting permutation: update1 update6 merge_bal c2 select1 c1
 | 
			
		||||
step update1: UPDATE target t SET balance = balance + 10, val = t.val || ' updated by update1' WHERE t.key = 1;
 | 
			
		||||
step update6: UPDATE target t SET balance = balance - 100, val = t.val || ' updated by update6' WHERE t.key = 1;
 | 
			
		||||
step merge_bal: 
 | 
			
		||||
  MERGE INTO target t
 | 
			
		||||
  USING (SELECT 1 as key) s
 | 
			
		||||
  ON s.key = t.key
 | 
			
		||||
  WHEN MATCHED AND balance < 100 THEN
 | 
			
		||||
	UPDATE SET balance = balance * 2, val = t.val || ' when1'
 | 
			
		||||
  WHEN MATCHED AND balance < 200 THEN
 | 
			
		||||
	UPDATE SET balance = balance * 4, val = t.val || ' when2'
 | 
			
		||||
  WHEN MATCHED AND balance < 300 THEN
 | 
			
		||||
	UPDATE SET balance = balance * 8, val = t.val || ' when3';
 | 
			
		||||
 <waiting ...>
 | 
			
		||||
step c2: COMMIT;
 | 
			
		||||
step merge_bal: <... completed>
 | 
			
		||||
step select1: SELECT * FROM target;
 | 
			
		||||
key|balance|status|val                                              
 | 
			
		||||
---+-------+------+-------------------------------------------------
 | 
			
		||||
  1|    140|s1    |setup updated by update1 updated by update6 when1
 | 
			
		||||
(1 row)
 | 
			
		||||
 | 
			
		||||
step c1: COMMIT;
 | 
			
		||||
 | 
			
		||||
starting permutation: update1_pa update6_pa merge_bal_pa c2 select1_pa c1
 | 
			
		||||
step update1_pa: UPDATE target_pa t SET balance = balance + 10, val = t.val || ' updated by update1_pa' WHERE t.key = 1;
 | 
			
		||||
step update6_pa: UPDATE target_pa t SET balance = balance - 100, val = t.val || ' updated by update6_pa' WHERE t.key = 1;
 | 
			
		||||
step merge_bal_pa: 
 | 
			
		||||
  MERGE INTO target_pa t
 | 
			
		||||
  USING (SELECT 1 as key) s
 | 
			
		||||
  ON s.key = t.key
 | 
			
		||||
  WHEN MATCHED AND balance < 100 THEN
 | 
			
		||||
	UPDATE SET balance = balance * 2, val = t.val || ' when1'
 | 
			
		||||
  WHEN MATCHED AND balance < 200 THEN
 | 
			
		||||
	UPDATE SET balance = balance * 4, val = t.val || ' when2'
 | 
			
		||||
  WHEN MATCHED AND balance < 300 THEN
 | 
			
		||||
	UPDATE SET balance = balance * 8, val = t.val || ' when3';
 | 
			
		||||
 <waiting ...>
 | 
			
		||||
step c2: COMMIT;
 | 
			
		||||
step merge_bal_pa: <... completed>
 | 
			
		||||
step select1_pa: SELECT * FROM target_pa;
 | 
			
		||||
key|balance|status|val                                                    
 | 
			
		||||
---+-------+------+-------------------------------------------------------
 | 
			
		||||
  1|    140|s1    |setup updated by update1_pa updated by update6_pa when1
 | 
			
		||||
(1 row)
 | 
			
		||||
 | 
			
		||||
step c1: COMMIT;
 | 
			
		||||
 | 
			
		||||
starting permutation: update1_tg update6_tg merge_bal_tg c2 select1_tg c1
 | 
			
		||||
s2: NOTICE:  Update: (1,160,s1,setup) -> (1,170,s1,"setup updated by update1_tg")
 | 
			
		||||
step update1_tg: UPDATE target_tg t SET balance = balance + 10, val = t.val || ' updated by update1_tg' WHERE t.key = 1;
 | 
			
		||||
s2: NOTICE:  Update: (1,170,s1,"setup updated by update1_tg") -> (1,70,s1,"setup updated by update1_tg updated by update6_tg")
 | 
			
		||||
step update6_tg: UPDATE target_tg t SET balance = balance - 100, val = t.val || ' updated by update6_tg' WHERE t.key = 1;
 | 
			
		||||
step merge_bal_tg: 
 | 
			
		||||
  WITH t AS (
 | 
			
		||||
    MERGE INTO target_tg t
 | 
			
		||||
    USING (SELECT 1 as key) s
 | 
			
		||||
    ON s.key = t.key
 | 
			
		||||
    WHEN MATCHED AND balance < 100 THEN
 | 
			
		||||
      UPDATE SET balance = balance * 2, val = t.val || ' when1'
 | 
			
		||||
    WHEN MATCHED AND balance < 200 THEN
 | 
			
		||||
      UPDATE SET balance = balance * 4, val = t.val || ' when2'
 | 
			
		||||
    WHEN MATCHED AND balance < 300 THEN
 | 
			
		||||
      UPDATE SET balance = balance * 8, val = t.val || ' when3'
 | 
			
		||||
    RETURNING t.*
 | 
			
		||||
  )
 | 
			
		||||
  SELECT * FROM t;
 | 
			
		||||
 <waiting ...>
 | 
			
		||||
step c2: COMMIT;
 | 
			
		||||
s1: NOTICE:  Update: (1,70,s1,"setup updated by update1_tg updated by update6_tg") -> (1,140,s1,"setup updated by update1_tg updated by update6_tg when1")
 | 
			
		||||
step merge_bal_tg: <... completed>
 | 
			
		||||
key|balance|status|val                                                    
 | 
			
		||||
---+-------+------+-------------------------------------------------------
 | 
			
		||||
  1|    140|s1    |setup updated by update1_tg updated by update6_tg when1
 | 
			
		||||
(1 row)
 | 
			
		||||
 | 
			
		||||
step select1_tg: SELECT * FROM target_tg;
 | 
			
		||||
key|balance|status|val                                                    
 | 
			
		||||
---+-------+------+-------------------------------------------------------
 | 
			
		||||
  1|    140|s1    |setup updated by update1_tg updated by update6_tg when1
 | 
			
		||||
(1 row)
 | 
			
		||||
 | 
			
		||||
step c1: COMMIT;
 | 
			
		||||
 | 
			
		||||
starting permutation: update7 update6 merge_bal c2 select1 c1
 | 
			
		||||
step update7: UPDATE target t SET balance = 350, val = t.val || ' updated by update7' WHERE t.key = 1;
 | 
			
		||||
step update6: UPDATE target t SET balance = balance - 100, val = t.val || ' updated by update6' WHERE t.key = 1;
 | 
			
		||||
step merge_bal: 
 | 
			
		||||
  MERGE INTO target t
 | 
			
		||||
  USING (SELECT 1 as key) s
 | 
			
		||||
  ON s.key = t.key
 | 
			
		||||
  WHEN MATCHED AND balance < 100 THEN
 | 
			
		||||
	UPDATE SET balance = balance * 2, val = t.val || ' when1'
 | 
			
		||||
  WHEN MATCHED AND balance < 200 THEN
 | 
			
		||||
	UPDATE SET balance = balance * 4, val = t.val || ' when2'
 | 
			
		||||
  WHEN MATCHED AND balance < 300 THEN
 | 
			
		||||
	UPDATE SET balance = balance * 8, val = t.val || ' when3';
 | 
			
		||||
 <waiting ...>
 | 
			
		||||
step c2: COMMIT;
 | 
			
		||||
step merge_bal: <... completed>
 | 
			
		||||
step select1: SELECT * FROM target;
 | 
			
		||||
key|balance|status|val                                              
 | 
			
		||||
---+-------+------+-------------------------------------------------
 | 
			
		||||
  1|   2000|s1    |setup updated by update7 updated by update6 when3
 | 
			
		||||
(1 row)
 | 
			
		||||
 | 
			
		||||
step c1: COMMIT;
 | 
			
		||||
 | 
			
		||||
starting permutation: update1_pa_move merge_bal_pa c2 c1
 | 
			
		||||
step update1_pa_move: UPDATE target_pa t SET balance = 210, val = t.val || ' updated by update1_pa_move' WHERE t.key = 1;
 | 
			
		||||
step merge_bal_pa: 
 | 
			
		||||
  MERGE INTO target_pa t
 | 
			
		||||
  USING (SELECT 1 as key) s
 | 
			
		||||
  ON s.key = t.key
 | 
			
		||||
  WHEN MATCHED AND balance < 100 THEN
 | 
			
		||||
	UPDATE SET balance = balance * 2, val = t.val || ' when1'
 | 
			
		||||
  WHEN MATCHED AND balance < 200 THEN
 | 
			
		||||
	UPDATE SET balance = balance * 4, val = t.val || ' when2'
 | 
			
		||||
  WHEN MATCHED AND balance < 300 THEN
 | 
			
		||||
	UPDATE SET balance = balance * 8, val = t.val || ' when3';
 | 
			
		||||
 <waiting ...>
 | 
			
		||||
step c2: COMMIT;
 | 
			
		||||
step merge_bal_pa: <... completed>
 | 
			
		||||
ERROR:  tuple to be locked was already moved to another partition due to concurrent update
 | 
			
		||||
step c1: COMMIT;
 | 
			
		||||
 | 
			
		||||
starting permutation: update1_pa update1_pa_move merge_bal_pa c2 c1
 | 
			
		||||
step update1_pa: UPDATE target_pa t SET balance = balance + 10, val = t.val || ' updated by update1_pa' WHERE t.key = 1;
 | 
			
		||||
step update1_pa_move: UPDATE target_pa t SET balance = 210, val = t.val || ' updated by update1_pa_move' WHERE t.key = 1;
 | 
			
		||||
step merge_bal_pa: 
 | 
			
		||||
  MERGE INTO target_pa t
 | 
			
		||||
  USING (SELECT 1 as key) s
 | 
			
		||||
  ON s.key = t.key
 | 
			
		||||
  WHEN MATCHED AND balance < 100 THEN
 | 
			
		||||
	UPDATE SET balance = balance * 2, val = t.val || ' when1'
 | 
			
		||||
  WHEN MATCHED AND balance < 200 THEN
 | 
			
		||||
	UPDATE SET balance = balance * 4, val = t.val || ' when2'
 | 
			
		||||
  WHEN MATCHED AND balance < 300 THEN
 | 
			
		||||
	UPDATE SET balance = balance * 8, val = t.val || ' when3';
 | 
			
		||||
 <waiting ...>
 | 
			
		||||
step c2: COMMIT;
 | 
			
		||||
step merge_bal_pa: <... completed>
 | 
			
		||||
ERROR:  tuple to be locked was already moved to another partition due to concurrent update
 | 
			
		||||
step c1: COMMIT;
 | 
			
		||||
 | 
			
		||||
starting permutation: update1 merge_delete c2 select1 c1
 | 
			
		||||
step update1: UPDATE target t SET balance = balance + 10, val = t.val || ' updated by update1' WHERE t.key = 1;
 | 
			
		||||
step merge_delete: 
 | 
			
		||||
 
 | 
			
		||||
@@ -146,6 +146,8 @@ setup
 | 
			
		||||
  BEGIN ISOLATION LEVEL READ COMMITTED;
 | 
			
		||||
}
 | 
			
		||||
step "update1" { UPDATE target t SET balance = balance + 10, val = t.val || ' updated by update1' WHERE t.key = 1; }
 | 
			
		||||
step "update1_pa" { UPDATE target_pa t SET balance = balance + 10, val = t.val || ' updated by update1_pa' WHERE t.key = 1; }
 | 
			
		||||
step "update1_pa_move" { UPDATE target_pa t SET balance = 210, val = t.val || ' updated by update1_pa_move' WHERE t.key = 1; }
 | 
			
		||||
step "update1_tg" { UPDATE target_tg t SET balance = balance + 10, val = t.val || ' updated by update1_tg' WHERE t.key = 1; }
 | 
			
		||||
step "update2" { UPDATE target t SET status = 's2', val = t.val || ' updated by update2' WHERE t.key = 1; }
 | 
			
		||||
step "update2_tg" { UPDATE target_tg t SET status = 's2', val = t.val || ' updated by update2_tg' WHERE t.key = 1; }
 | 
			
		||||
@@ -153,6 +155,10 @@ step "update3" { UPDATE target t SET status = 's3', val = t.val || ' updated by
 | 
			
		||||
step "update3_tg" { UPDATE target_tg t SET status = 's3', val = t.val || ' updated by update3_tg' WHERE t.key = 1; }
 | 
			
		||||
step "update5" { UPDATE target t SET status = 's5', val = t.val || ' updated by update5' WHERE t.key = 1; }
 | 
			
		||||
step "update5_tg" { UPDATE target_tg t SET status = 's5', val = t.val || ' updated by update5_tg' WHERE t.key = 1; }
 | 
			
		||||
step "update6" { UPDATE target t SET balance = balance - 100, val = t.val || ' updated by update6' WHERE t.key = 1; }
 | 
			
		||||
step "update6_pa" { UPDATE target_pa t SET balance = balance - 100, val = t.val || ' updated by update6_pa' WHERE t.key = 1; }
 | 
			
		||||
step "update6_tg" { UPDATE target_tg t SET balance = balance - 100, val = t.val || ' updated by update6_tg' WHERE t.key = 1; }
 | 
			
		||||
step "update7" { UPDATE target t SET balance = 350, val = t.val || ' updated by update7' WHERE t.key = 1; }
 | 
			
		||||
step "update_bal1" { UPDATE target t SET balance = 50, val = t.val || ' updated by update_bal1' WHERE t.key = 1; }
 | 
			
		||||
step "update_bal1_pa" { UPDATE target_pa t SET balance = 50, val = t.val || ' updated by update_bal1_pa' WHERE t.key = 1; }
 | 
			
		||||
step "update_bal1_tg" { UPDATE target_tg t SET balance = 50, val = t.val || ' updated by update_bal1_tg' WHERE t.key = 1; }
 | 
			
		||||
@@ -179,6 +185,18 @@ permutation "update_bal1" "merge_bal" "c2" "select1" "c1"
 | 
			
		||||
permutation "update_bal1_pa" "merge_bal_pa" "c2" "select1_pa" "c1"
 | 
			
		||||
permutation "update_bal1_tg" "merge_bal_tg" "c2" "select1_tg" "c1"
 | 
			
		||||
 | 
			
		||||
# merge_bal sees row concurrently updated twice and rechecks WHEN conditions, different check passes, so final balance = 140
 | 
			
		||||
permutation "update1" "update6" "merge_bal" "c2" "select1" "c1"
 | 
			
		||||
permutation "update1_pa" "update6_pa" "merge_bal_pa" "c2" "select1_pa" "c1"
 | 
			
		||||
permutation "update1_tg" "update6_tg" "merge_bal_tg" "c2" "select1_tg" "c1"
 | 
			
		||||
 | 
			
		||||
# merge_bal sees row concurrently updated twice, first update would cause all checks to fail, second update causes different check to pass, so final balance = 2000
 | 
			
		||||
permutation "update7" "update6" "merge_bal" "c2" "select1" "c1"
 | 
			
		||||
 | 
			
		||||
# merge_bal sees concurrently updated row moved to new partition, so fails
 | 
			
		||||
permutation "update1_pa_move" "merge_bal_pa" "c2" "c1"
 | 
			
		||||
permutation "update1_pa" "update1_pa_move" "merge_bal_pa" "c2" "c1"
 | 
			
		||||
 | 
			
		||||
# merge_delete sees concurrently updated row and rechecks WHEN conditions, but recheck passes and row is deleted
 | 
			
		||||
permutation "update1" "merge_delete" "c2" "select1" "c1"
 | 
			
		||||
permutation "update1_tg" "merge_delete_tg" "c2" "select1_tg" "c1"
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user