diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 6daf4a87dec..98759e3bf70 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -85,8 +85,11 @@ static void compute_new_xmax_infomask(TransactionId xmax, uint16 old_infomask, LockTupleMode mode, bool is_update, TransactionId *result_xmax, uint16 *result_infomask, uint16 *result_infomask2); -static TM_Result heap_lock_updated_tuple(Relation rel, HeapTuple tuple, - const ItemPointerData *ctid, TransactionId xid, +static TM_Result heap_lock_updated_tuple(Relation rel, + uint16 prior_infomask, + TransactionId prior_rawxmax, + const ItemPointerData *prior_ctid, + TransactionId xid, LockTupleMode mode); static void GetMultiXactIdHintBits(MultiXactId multi, uint16 *new_infomask, uint16 *new_infomask2); @@ -4816,11 +4819,13 @@ l3: * If there are updates, follow the update chain; bail out if * that cannot be done. */ - if (follow_updates && updated) + if (follow_updates && updated && + !ItemPointerEquals(&tuple->t_self, &t_ctid)) { TM_Result res; - res = heap_lock_updated_tuple(relation, tuple, &t_ctid, + res = heap_lock_updated_tuple(relation, + infomask, xwait, &t_ctid, GetCurrentTransactionId(), mode); if (res != TM_Ok) @@ -5063,11 +5068,13 @@ l3: } /* if there are updates, follow the update chain */ - if (follow_updates && !HEAP_XMAX_IS_LOCKED_ONLY(infomask)) + if (follow_updates && !HEAP_XMAX_IS_LOCKED_ONLY(infomask) && + !ItemPointerEquals(&tuple->t_self, &t_ctid)) { TM_Result res; - res = heap_lock_updated_tuple(relation, tuple, &t_ctid, + res = heap_lock_updated_tuple(relation, + infomask, xwait, &t_ctid, GetCurrentTransactionId(), mode); if (res != TM_Ok) @@ -5721,7 +5728,8 @@ test_lockmode_for_conflict(MultiXactStatus status, TransactionId xid, * version as well. */ static TM_Result -heap_lock_updated_tuple_rec(Relation rel, const ItemPointerData *tid, TransactionId xid, +heap_lock_updated_tuple_rec(Relation rel, TransactionId priorXmax, + const ItemPointerData *tid, TransactionId xid, LockTupleMode mode) { TM_Result result; @@ -5734,7 +5742,6 @@ heap_lock_updated_tuple_rec(Relation rel, const ItemPointerData *tid, Transactio old_infomask2; TransactionId xmax, new_xmax; - TransactionId priorXmax = InvalidTransactionId; bool cleared_all_frozen = false; bool pinned_desired_page; Buffer vmbuffer = InvalidBuffer; @@ -6048,7 +6055,10 @@ out_unlocked: * Follow update chain when locking an updated tuple, acquiring locks (row * marks) on the updated versions. * - * The initial tuple is assumed to be already locked. + * 'prior_infomask', 'prior_raw_xmax' and 'prior_ctid' are the corresponding + * fields from the initial tuple. We will lock the tuples starting from the + * one that 'prior_ctid' points to. Note: This function does not lock the + * initial tuple itself. * * This function doesn't check visibility, it just unconditionally marks the * tuple(s) as locked. If any tuple in the updated chain is being deleted @@ -6066,16 +6076,22 @@ out_unlocked: * levels, because that would lead to a serializability failure. */ static TM_Result -heap_lock_updated_tuple(Relation rel, HeapTuple tuple, const ItemPointerData *ctid, +heap_lock_updated_tuple(Relation rel, + uint16 prior_infomask, + TransactionId prior_raw_xmax, + const ItemPointerData *prior_ctid, TransactionId xid, LockTupleMode mode) { + INJECTION_POINT("heap_lock_updated_tuple", NULL); + /* - * If the tuple has not been updated, or has moved into another partition - * (effectively a delete) stop here. + * If the tuple has moved into another partition (effectively a delete) + * stop here. */ - if (!HeapTupleHeaderIndicatesMovedPartitions(tuple->t_data) && - !ItemPointerEquals(&tuple->t_self, ctid)) + if (!ItemPointerIndicatesMovedPartitions(prior_ctid)) { + TransactionId prior_xmax; + /* * If this is the first possibly-multixact-able operation in the * current transaction, set my per-backend OldestMemberMXactId @@ -6087,7 +6103,9 @@ heap_lock_updated_tuple(Relation rel, HeapTuple tuple, const ItemPointerData *ct */ MultiXactIdSetOldestMember(); - return heap_lock_updated_tuple_rec(rel, ctid, xid, mode); + prior_xmax = (prior_infomask & HEAP_XMAX_IS_MULTI) ? + MultiXactIdGetUpdateXid(prior_raw_xmax, prior_infomask) : prior_raw_xmax; + return heap_lock_updated_tuple_rec(rel, prior_xmax, prior_ctid, xid, mode); } /* nothing to lock */ diff --git a/src/test/modules/injection_points/Makefile b/src/test/modules/injection_points/Makefile index bfdb3f53377..cc9be6dcdd2 100644 --- a/src/test/modules/injection_points/Makefile +++ b/src/test/modules/injection_points/Makefile @@ -14,7 +14,8 @@ REGRESS_OPTS = --dlpath=$(top_builddir)/src/test/regress ISOLATION = basic \ inplace \ - syscache-update-pruned + syscache-update-pruned \ + heap_lock_update # Temporarily disabled because of flakiness #ISOLATION =+ diff --git a/src/test/modules/injection_points/expected/heap_lock_update.out b/src/test/modules/injection_points/expected/heap_lock_update.out new file mode 100644 index 00000000000..1ec8d876414 --- /dev/null +++ b/src/test/modules/injection_points/expected/heap_lock_update.out @@ -0,0 +1,83 @@ +Parsed test spec with 2 sessions + +starting permutation: s1begin s1update s2lock s1abort vacuum reinsert wake +step s1begin: BEGIN; +step s1update: UPDATE t SET id = 10000 WHERE id = 1 RETURNING ctid; +ctid +----- +(1,2) +(1 row) + +step s2lock: select * from t where id = 1 for update; +step s1abort: ABORT; +step vacuum: VACUUM t; +step reinsert: + INSERT INTO t VALUES (10001) RETURNING ctid; + UPDATE t SET id = 10002 WHERE id = 10001 RETURNING ctid; + +ctid +----- +(1,2) +(1 row) + +ctid +----- +(1,3) +(1 row) + +step wake: + SELECT FROM injection_points_detach('heap_lock_updated_tuple'); + SELECT FROM injection_points_wakeup('heap_lock_updated_tuple'); + +step s2lock: <... completed> +id +-- + 1 +(1 row) + +step wake: <... completed> + +starting permutation: s1begin s1update s2lock s1abort vacuum reinsert_and_lock wake +step s1begin: BEGIN; +step s1update: UPDATE t SET id = 10000 WHERE id = 1 RETURNING ctid; +ctid +----- +(1,2) +(1 row) + +step s2lock: select * from t where id = 1 for update; +step s1abort: ABORT; +step vacuum: VACUUM t; +step reinsert_and_lock: + BEGIN; + INSERT INTO t VALUES (10001) RETURNING ctid; + SELECT ctid, * FROM t WHERE id = 1 FOR UPDATE; + COMMIT; + UPDATE t SET id = 10002 WHERE id = 10001 returning ctid; + +ctid +----- +(1,2) +(1 row) + +ctid |id +-----+-- +(0,1)| 1 +(1 row) + +ctid +----- +(1,3) +(1 row) + +step wake: + SELECT FROM injection_points_detach('heap_lock_updated_tuple'); + SELECT FROM injection_points_wakeup('heap_lock_updated_tuple'); + +step s2lock: <... completed> +id +-- + 1 +(1 row) + +step wake: <... completed> diff --git a/src/test/modules/injection_points/meson.build b/src/test/modules/injection_points/meson.build index 493e11053dc..c9186ae04d1 100644 --- a/src/test/modules/injection_points/meson.build +++ b/src/test/modules/injection_points/meson.build @@ -46,6 +46,7 @@ tests += { 'basic', 'inplace', 'syscache-update-pruned', + 'heap_lock_update', # temporarily disabled because of flakiness # 'index-concurrently-upsert', # 'index-concurrently-upsert-predicate', diff --git a/src/test/modules/injection_points/specs/heap_lock_update.spec b/src/test/modules/injection_points/specs/heap_lock_update.spec new file mode 100644 index 00000000000..b3992a1eb7a --- /dev/null +++ b/src/test/modules/injection_points/specs/heap_lock_update.spec @@ -0,0 +1,117 @@ +# Test race condition in tuple locking +# +# This is a reproducer for the bug reported at: +# https://www.postgresql.org/message-id/CAOG%2BRQ74x0q%3DkgBBQ%3DmezuvOeZBfSxM1qu_o0V28bwDz3dHxLw%40mail.gmail.com +# +# The bug was that when following an update chain when locking tuples, +# we sometimes failed to check that the xmin on the next tuple matched +# the prior's xmax. If the updated tuple version was vacuumed away and +# the slot was reused for an unrelated tuple, we'd incorrectly follow +# and lock the unrelated tuple. + + +# Set up a test table with enough rows to fill a page. We need the +# UPDATE used in the test to put the new tuple on a different page, +# because otherwise the VACUUM cannot remove the aborted tuple because +# we hold a pin on the first page. +# +# The exact number of rows inserted doesn't otherwise matter, but we +# arrange things in a deterministic fashion so that the last inserted +# tuple goes to (1,1), and the updated and aborted tuple goes to +# (1,2). That way we can just memorize those ctids in the expected +# output, to verify that the test exercises the scenario we want. +setup +{ + CREATE EXTENSION injection_points; + + CREATE TABLE t (id int PRIMARY KEY); + do $$ + DECLARE + i int; + tid tid; + BEGIN + FOR i IN 1..5000 LOOP + INSERT INTO t VALUES (i) RETURNING ctid INTO tid; + IF tid = '(1,1)' THEN + RETURN; + END IF; + END LOOP; + RAISE 'expected to insert tuple to (1,1)'; + END; + $$; +} +teardown +{ + DROP TABLE t; + DROP EXTENSION injection_points; +} + +session s1 +step s1begin { BEGIN; } +step s1update { UPDATE t SET id = 10000 WHERE id = 1 RETURNING ctid; } +step s1abort { ABORT; } +step vacuum { VACUUM t; } + +# Insert a new tuple, and update it. +step reinsert { + INSERT INTO t VALUES (10001) RETURNING ctid; + UPDATE t SET id = 10002 WHERE id = 10001 RETURNING ctid; +} + +# Same as the 'reinsert' step, but for extra confusion, we also stamp +# the original tuple with the same 'xmax' as the re-inserted one. +step reinsert_and_lock { + BEGIN; + INSERT INTO t VALUES (10001) RETURNING ctid; + SELECT ctid, * FROM t WHERE id = 1 FOR UPDATE; + COMMIT; + UPDATE t SET id = 10002 WHERE id = 10001 returning ctid; +} + +step wake { + SELECT FROM injection_points_detach('heap_lock_updated_tuple'); + SELECT FROM injection_points_wakeup('heap_lock_updated_tuple'); +} + +session s2 +setup { + SELECT FROM injection_points_set_local(); + SELECT FROM injection_points_attach('heap_lock_updated_tuple', 'wait'); +} +step s2lock { select * from t where id = 1 for update; } + +permutation + # Begin transaction, update a row. Because of how we set up the + # test table, the updated tuple lands at (1,2) + s1begin + s1update + + # While the updating transaction is open, start a new session that + # tries to lock the row. This blocks on the open transaction. + s2lock + + # Abort the updating transaction. This unblocks session 2, but it + # will immediately hit the injection point and block on that. + s1abort + # Vacuum away the updated, aborted tuple. + vacuum + + # Insert a new tuple. It lands at the same location where the + # updated tuple was. + reinsert + + # Let the locking transaction continue. It should lock the + # original tuple, ignoring the re-inserted tuple. + wake(s2lock) + +# Variant where the re-inserted tuple is also locked by the inserting +# transaction. This failed an earlier version of the fix during +# development. +permutation + s1begin + s1update + s2lock + s1abort + vacuum + reinsert_and_lock + wake(s2lock)