mirror of
				https://github.com/postgres/postgres.git
				synced 2025-10-31 10:30:33 +03:00 
			
		
		
		
	Fix concurrent update issue with MERGE.
When executing a MERGE UPDATE action, if there is more than one concurrent update of the target row, the lock-and-retry code would sometimes incorrectly identify the latest version of the target tuple, leading to incorrect results. This was caused by using the ctid field from the TM_FailureData returned by table_tuple_lock() in a case where the result was TM_Ok, which is unsafe because the TM_FailureData struct is not guaranteed to be fully populated in that case. Instead, it should use the tupleid passed to (and updated by) table_tuple_lock(). To reduce the chances of similar errors in the future, improve the commentary for table_tuple_lock() and TM_FailureData to make it clearer that table_tuple_lock() updates the tid passed to it, and most fields of TM_FailureData should not be relied on in non-failure cases. An exception to this is the "traversed" field, which is set in both success and failure cases. Reported-by: Dmitry <dsy.075@yandex.ru> Author: Yugo Nagata <nagata@sraoss.co.jp> Reviewed-by: Dean Rasheed <dean.a.rasheed@gmail.com> Reviewed-by: Chao Li <li.evan.chao@gmail.com> Discussion: https://postgr.es/m/1570d30e-2b95-4239-b9c3-f7bf2f2f8556@yandex.ru Backpatch-through: 15
This commit is contained in:
		| @@ -3127,7 +3127,7 @@ lmerge_matched:; | ||||
| 							 * the tuple moved, and setting our current | ||||
| 							 * resultRelInfo to that. | ||||
| 							 */ | ||||
| 							if (ItemPointerIndicatesMovedPartitions(&context->tmfd.ctid)) | ||||
| 							if (ItemPointerIndicatesMovedPartitions(tupleid)) | ||||
| 								ereport(ERROR, | ||||
| 										(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), | ||||
| 										 errmsg("tuple to be deleted was already moved to another partition due to concurrent update"))); | ||||
| @@ -3139,14 +3139,14 @@ lmerge_matched:; | ||||
| 							 * that the first qualifying WHEN MATCHED action | ||||
| 							 * is executed. | ||||
| 							 * | ||||
| 							 * Update tupleid to that of the new tuple, for | ||||
| 							 * the refetch we do at the top. | ||||
| 							 * tupleid has been updated to that of the new | ||||
| 							 * tuple, as required for the refetch we do at the | ||||
| 							 * top. | ||||
| 							 */ | ||||
| 							if (resultRelInfo->ri_needLockTagTuple) | ||||
| 								UnlockTuple(resultRelInfo->ri_RelationDesc, | ||||
| 											&lockedtid, | ||||
| 											InplaceUpdateTupleLock); | ||||
| 							ItemPointerCopy(&context->tmfd.ctid, tupleid); | ||||
| 							goto lmerge_matched; | ||||
|  | ||||
| 						case TM_Deleted: | ||||
|   | ||||
| @@ -105,7 +105,9 @@ typedef enum TM_Result | ||||
| /* | ||||
|  * When table_tuple_update, table_tuple_delete, or table_tuple_lock fail | ||||
|  * because the target tuple is already outdated, they fill in this struct to | ||||
|  * provide information to the caller about what happened. | ||||
|  * provide information to the caller about what happened. When those functions | ||||
|  * succeed, the contents of this struct should not be relied upon, except for | ||||
|  * `traversed`, which may be set in both success and failure cases. | ||||
|  * | ||||
|  * ctid is the target's ctid link: it is the same as the target's TID if the | ||||
|  * target was deleted, or the location of the replacement tuple if the target | ||||
| @@ -121,6 +123,9 @@ typedef enum TM_Result | ||||
|  * tuple); otherwise cmax is zero.  (We make this restriction because | ||||
|  * HeapTupleHeaderGetCmax doesn't work for tuples outdated in other | ||||
|  * transactions.) | ||||
|  * | ||||
|  * traversed indicates if an update chain was followed in order to try to lock | ||||
|  * the target tuple.  (This may be set in both success and failure cases.) | ||||
|  */ | ||||
| typedef struct TM_FailureData | ||||
| { | ||||
| @@ -1520,7 +1525,7 @@ table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot, | ||||
|  * | ||||
|  * Input parameters: | ||||
|  *	relation: relation containing tuple (caller must hold suitable lock) | ||||
|  *	tid: TID of tuple to lock | ||||
|  *	tid: TID of tuple to lock (updated if an update chain was followed) | ||||
|  *	snapshot: snapshot to use for visibility determinations | ||||
|  *	cid: current command ID (used for visibility test, and stored into | ||||
|  *		tuple's cmax if lock is successful) | ||||
| @@ -1545,8 +1550,10 @@ table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot, | ||||
|  *	TM_WouldBlock: lock couldn't be acquired and wait_policy is skip | ||||
|  * | ||||
|  * In the failure cases other than TM_Invisible and TM_Deleted, the routine | ||||
|  * fills *tmfd with the tuple's t_ctid, t_xmax, and, if possible, t_cmax.  See | ||||
|  * comments for struct TM_FailureData for additional info. | ||||
|  * fills *tmfd with the tuple's t_ctid, t_xmax, and, if possible, t_cmax. | ||||
|  * Additionally, in both success and failure cases, tmfd->traversed is set if | ||||
|  * an update chain was followed.  See comments for struct TM_FailureData for | ||||
|  * additional info. | ||||
|  */ | ||||
| static inline TM_Result | ||||
| table_tuple_lock(Relation rel, ItemPointer tid, Snapshot snapshot, | ||||
|   | ||||
| @@ -262,6 +262,142 @@ key|balance|status|val | ||||
|  | ||||
| step c1: COMMIT; | ||||
|  | ||||
| starting permutation: update1 update6 merge_bal c2 select1 c1 | ||||
| step update1: UPDATE target t SET balance = balance + 10, val = t.val || ' updated by update1' WHERE t.key = 1; | ||||
| step update6: UPDATE target t SET balance = balance - 100, val = t.val || ' updated by update6' WHERE t.key = 1; | ||||
| step merge_bal:  | ||||
|   MERGE INTO target t | ||||
|   USING (SELECT 1 as key) s | ||||
|   ON s.key = t.key | ||||
|   WHEN MATCHED AND balance < 100 THEN | ||||
| 	UPDATE SET balance = balance * 2, val = t.val || ' when1' | ||||
|   WHEN MATCHED AND balance < 200 THEN | ||||
| 	UPDATE SET balance = balance * 4, val = t.val || ' when2' | ||||
|   WHEN MATCHED AND balance < 300 THEN | ||||
| 	UPDATE SET balance = balance * 8, val = t.val || ' when3'; | ||||
|  <waiting ...> | ||||
| step c2: COMMIT; | ||||
| step merge_bal: <... completed> | ||||
| step select1: SELECT * FROM target; | ||||
| key|balance|status|val                                               | ||||
| ---+-------+------+------------------------------------------------- | ||||
|   1|    140|s1    |setup updated by update1 updated by update6 when1 | ||||
| (1 row) | ||||
|  | ||||
| step c1: COMMIT; | ||||
|  | ||||
| starting permutation: update1_pa update6_pa merge_bal_pa c2 select1_pa c1 | ||||
| step update1_pa: UPDATE target_pa t SET balance = balance + 10, val = t.val || ' updated by update1_pa' WHERE t.key = 1; | ||||
| step update6_pa: UPDATE target_pa t SET balance = balance - 100, val = t.val || ' updated by update6_pa' WHERE t.key = 1; | ||||
| step merge_bal_pa:  | ||||
|   MERGE INTO target_pa t | ||||
|   USING (SELECT 1 as key) s | ||||
|   ON s.key = t.key | ||||
|   WHEN MATCHED AND balance < 100 THEN | ||||
| 	UPDATE SET balance = balance * 2, val = t.val || ' when1' | ||||
|   WHEN MATCHED AND balance < 200 THEN | ||||
| 	UPDATE SET balance = balance * 4, val = t.val || ' when2' | ||||
|   WHEN MATCHED AND balance < 300 THEN | ||||
| 	UPDATE SET balance = balance * 8, val = t.val || ' when3'; | ||||
|  <waiting ...> | ||||
| step c2: COMMIT; | ||||
| step merge_bal_pa: <... completed> | ||||
| step select1_pa: SELECT * FROM target_pa; | ||||
| key|balance|status|val                                                     | ||||
| ---+-------+------+------------------------------------------------------- | ||||
|   1|    140|s1    |setup updated by update1_pa updated by update6_pa when1 | ||||
| (1 row) | ||||
|  | ||||
| step c1: COMMIT; | ||||
|  | ||||
| starting permutation: update1_tg update6_tg merge_bal_tg c2 select1_tg c1 | ||||
| s2: NOTICE:  Update: (1,160,s1,setup) -> (1,170,s1,"setup updated by update1_tg") | ||||
| step update1_tg: UPDATE target_tg t SET balance = balance + 10, val = t.val || ' updated by update1_tg' WHERE t.key = 1; | ||||
| s2: NOTICE:  Update: (1,170,s1,"setup updated by update1_tg") -> (1,70,s1,"setup updated by update1_tg updated by update6_tg") | ||||
| step update6_tg: UPDATE target_tg t SET balance = balance - 100, val = t.val || ' updated by update6_tg' WHERE t.key = 1; | ||||
| step merge_bal_tg:  | ||||
|   MERGE INTO target_tg t | ||||
|   USING (SELECT 1 as key) s | ||||
|   ON s.key = t.key | ||||
|   WHEN MATCHED AND balance < 100 THEN | ||||
| 	UPDATE SET balance = balance * 2, val = t.val || ' when1' | ||||
|   WHEN MATCHED AND balance < 200 THEN | ||||
| 	UPDATE SET balance = balance * 4, val = t.val || ' when2' | ||||
|   WHEN MATCHED AND balance < 300 THEN | ||||
| 	UPDATE SET balance = balance * 8, val = t.val || ' when3'; | ||||
|  <waiting ...> | ||||
| step c2: COMMIT; | ||||
| s1: NOTICE:  Update: (1,70,s1,"setup updated by update1_tg updated by update6_tg") -> (1,140,s1,"setup updated by update1_tg updated by update6_tg when1") | ||||
| step merge_bal_tg: <... completed> | ||||
| step select1_tg: SELECT * FROM target_tg; | ||||
| key|balance|status|val                                                     | ||||
| ---+-------+------+------------------------------------------------------- | ||||
|   1|    140|s1    |setup updated by update1_tg updated by update6_tg when1 | ||||
| (1 row) | ||||
|  | ||||
| step c1: COMMIT; | ||||
|  | ||||
| starting permutation: update7 update6 merge_bal c2 select1 c1 | ||||
| step update7: UPDATE target t SET balance = 350, val = t.val || ' updated by update7' WHERE t.key = 1; | ||||
| step update6: UPDATE target t SET balance = balance - 100, val = t.val || ' updated by update6' WHERE t.key = 1; | ||||
| step merge_bal:  | ||||
|   MERGE INTO target t | ||||
|   USING (SELECT 1 as key) s | ||||
|   ON s.key = t.key | ||||
|   WHEN MATCHED AND balance < 100 THEN | ||||
| 	UPDATE SET balance = balance * 2, val = t.val || ' when1' | ||||
|   WHEN MATCHED AND balance < 200 THEN | ||||
| 	UPDATE SET balance = balance * 4, val = t.val || ' when2' | ||||
|   WHEN MATCHED AND balance < 300 THEN | ||||
| 	UPDATE SET balance = balance * 8, val = t.val || ' when3'; | ||||
|  <waiting ...> | ||||
| step c2: COMMIT; | ||||
| step merge_bal: <... completed> | ||||
| step select1: SELECT * FROM target; | ||||
| key|balance|status|val                                               | ||||
| ---+-------+------+------------------------------------------------- | ||||
|   1|   2000|s1    |setup updated by update7 updated by update6 when3 | ||||
| (1 row) | ||||
|  | ||||
| step c1: COMMIT; | ||||
|  | ||||
| starting permutation: update1_pa_move merge_bal_pa c2 c1 | ||||
| step update1_pa_move: UPDATE target_pa t SET balance = 210, val = t.val || ' updated by update1_pa_move' WHERE t.key = 1; | ||||
| step merge_bal_pa:  | ||||
|   MERGE INTO target_pa t | ||||
|   USING (SELECT 1 as key) s | ||||
|   ON s.key = t.key | ||||
|   WHEN MATCHED AND balance < 100 THEN | ||||
| 	UPDATE SET balance = balance * 2, val = t.val || ' when1' | ||||
|   WHEN MATCHED AND balance < 200 THEN | ||||
| 	UPDATE SET balance = balance * 4, val = t.val || ' when2' | ||||
|   WHEN MATCHED AND balance < 300 THEN | ||||
| 	UPDATE SET balance = balance * 8, val = t.val || ' when3'; | ||||
|  <waiting ...> | ||||
| step c2: COMMIT; | ||||
| step merge_bal_pa: <... completed> | ||||
| ERROR:  tuple to be locked was already moved to another partition due to concurrent update | ||||
| step c1: COMMIT; | ||||
|  | ||||
| starting permutation: update1_pa update1_pa_move merge_bal_pa c2 c1 | ||||
| step update1_pa: UPDATE target_pa t SET balance = balance + 10, val = t.val || ' updated by update1_pa' WHERE t.key = 1; | ||||
| step update1_pa_move: UPDATE target_pa t SET balance = 210, val = t.val || ' updated by update1_pa_move' WHERE t.key = 1; | ||||
| step merge_bal_pa:  | ||||
|   MERGE INTO target_pa t | ||||
|   USING (SELECT 1 as key) s | ||||
|   ON s.key = t.key | ||||
|   WHEN MATCHED AND balance < 100 THEN | ||||
| 	UPDATE SET balance = balance * 2, val = t.val || ' when1' | ||||
|   WHEN MATCHED AND balance < 200 THEN | ||||
| 	UPDATE SET balance = balance * 4, val = t.val || ' when2' | ||||
|   WHEN MATCHED AND balance < 300 THEN | ||||
| 	UPDATE SET balance = balance * 8, val = t.val || ' when3'; | ||||
|  <waiting ...> | ||||
| step c2: COMMIT; | ||||
| step merge_bal_pa: <... completed> | ||||
| ERROR:  tuple to be locked was already moved to another partition due to concurrent update | ||||
| step c1: COMMIT; | ||||
|  | ||||
| starting permutation: update1 merge_delete c2 select1 c1 | ||||
| step update1: UPDATE target t SET balance = balance + 10, val = t.val || ' updated by update1' WHERE t.key = 1; | ||||
| step merge_delete:  | ||||
|   | ||||
| @@ -142,6 +142,8 @@ setup | ||||
|   BEGIN ISOLATION LEVEL READ COMMITTED; | ||||
| } | ||||
| step "update1" { UPDATE target t SET balance = balance + 10, val = t.val || ' updated by update1' WHERE t.key = 1; } | ||||
| step "update1_pa" { UPDATE target_pa t SET balance = balance + 10, val = t.val || ' updated by update1_pa' WHERE t.key = 1; } | ||||
| step "update1_pa_move" { UPDATE target_pa t SET balance = 210, val = t.val || ' updated by update1_pa_move' WHERE t.key = 1; } | ||||
| step "update1_tg" { UPDATE target_tg t SET balance = balance + 10, val = t.val || ' updated by update1_tg' WHERE t.key = 1; } | ||||
| step "update2" { UPDATE target t SET status = 's2', val = t.val || ' updated by update2' WHERE t.key = 1; } | ||||
| step "update2_tg" { UPDATE target_tg t SET status = 's2', val = t.val || ' updated by update2_tg' WHERE t.key = 1; } | ||||
| @@ -149,6 +151,10 @@ step "update3" { UPDATE target t SET status = 's3', val = t.val || ' updated by | ||||
| step "update3_tg" { UPDATE target_tg t SET status = 's3', val = t.val || ' updated by update3_tg' WHERE t.key = 1; } | ||||
| step "update5" { UPDATE target t SET status = 's5', val = t.val || ' updated by update5' WHERE t.key = 1; } | ||||
| step "update5_tg" { UPDATE target_tg t SET status = 's5', val = t.val || ' updated by update5_tg' WHERE t.key = 1; } | ||||
| step "update6" { UPDATE target t SET balance = balance - 100, val = t.val || ' updated by update6' WHERE t.key = 1; } | ||||
| step "update6_pa" { UPDATE target_pa t SET balance = balance - 100, val = t.val || ' updated by update6_pa' WHERE t.key = 1; } | ||||
| step "update6_tg" { UPDATE target_tg t SET balance = balance - 100, val = t.val || ' updated by update6_tg' WHERE t.key = 1; } | ||||
| step "update7" { UPDATE target t SET balance = 350, val = t.val || ' updated by update7' WHERE t.key = 1; } | ||||
| step "update_bal1" { UPDATE target t SET balance = 50, val = t.val || ' updated by update_bal1' WHERE t.key = 1; } | ||||
| step "update_bal1_pa" { UPDATE target_pa t SET balance = 50, val = t.val || ' updated by update_bal1_pa' WHERE t.key = 1; } | ||||
| step "update_bal1_tg" { UPDATE target_tg t SET balance = 50, val = t.val || ' updated by update_bal1_tg' WHERE t.key = 1; } | ||||
| @@ -175,6 +181,18 @@ permutation "update_bal1" "merge_bal" "c2" "select1" "c1" | ||||
| permutation "update_bal1_pa" "merge_bal_pa" "c2" "select1_pa" "c1" | ||||
| permutation "update_bal1_tg" "merge_bal_tg" "c2" "select1_tg" "c1" | ||||
|  | ||||
| # merge_bal sees row concurrently updated twice and rechecks WHEN conditions, different check passes, so final balance = 140 | ||||
| permutation "update1" "update6" "merge_bal" "c2" "select1" "c1" | ||||
| permutation "update1_pa" "update6_pa" "merge_bal_pa" "c2" "select1_pa" "c1" | ||||
| permutation "update1_tg" "update6_tg" "merge_bal_tg" "c2" "select1_tg" "c1" | ||||
|  | ||||
| # merge_bal sees row concurrently updated twice, first update would cause all checks to fail, second update causes different check to pass, so final balance = 2000 | ||||
| permutation "update7" "update6" "merge_bal" "c2" "select1" "c1" | ||||
|  | ||||
| # merge_bal sees concurrently updated row moved to new partition, so fails | ||||
| permutation "update1_pa_move" "merge_bal_pa" "c2" "c1" | ||||
| permutation "update1_pa" "update1_pa_move" "merge_bal_pa" "c2" "c1" | ||||
|  | ||||
| # merge_delete sees concurrently updated row and rechecks WHEN conditions, but recheck passes and row is deleted | ||||
| permutation "update1" "merge_delete" "c2" "select1" "c1" | ||||
| permutation "update1_tg" "merge_delete_tg" "c2" "select1_tg" "c1" | ||||
|   | ||||
		Reference in New Issue
	
	Block a user