From 4aa92911c7edc99d98dd4f86c93cba2ed8fb77c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Tue, 9 Apr 2024 12:50:24 +0300 Subject: [PATCH] MDEV-33802 Weird read view after ROLLBACK of another transaction Even after commit b8a671988954870b7db22e20d1a1409fd40f8e3d there is an anomaly where a locking read could return inconsistent results. If a locking read would have to wait for a record lock, then by the definition of a read view, the modifications made by the current lock holder cannot be visible in the read view. This is because the read view must exclude any transactions that had not been committed at the time when the read view was created. lock_rec_convert_impl_to_expl_for_trx(), lock_rec_convert_impl_to_expl(): Return an unsafe-to-dereference pointer to a transaction that holds or held the lock, or nullptr if the lock was available. lock_clust_rec_modify_check_and_lock(), lock_sec_rec_read_check_and_lock(), lock_clust_rec_read_check_and_lock(): Return DB_RECORD_CHANGED if innodb_strict_isolation=ON and the lock was being held by another transaction. The test case, which is based on a bug report by Zhuang Liu, covers the function lock_sec_rec_read_check_and_lock(). Reviewed by: Vladislav Lesin --- .../suite/innodb/r/lock_isolation.result | 33 ++++- mysql-test/suite/innodb/t/lock_isolation.test | 43 +++++- storage/innobase/lock/lock0lock.cc | 134 ++++++++++-------- 3 files changed, 148 insertions(+), 62 deletions(-) diff --git a/mysql-test/suite/innodb/r/lock_isolation.result b/mysql-test/suite/innodb/r/lock_isolation.result index 88a2ad9326e..eaba6b899a1 100644 --- a/mysql-test/suite/innodb/r/lock_isolation.result +++ b/mysql-test/suite/innodb/r/lock_isolation.result @@ -82,7 +82,6 @@ SELECT * FROM t; a b 10 20 10 20 -disconnect consistent; connection default; TRUNCATE TABLE t; INSERT INTO t VALUES(NULL, 1), (2, 2); @@ -99,10 +98,40 @@ a b COMMIT; connection con_weird; COMMIT; -disconnect con_weird; connection default; SELECT * FROM t; a b 10 1 10 20 DROP TABLE t; +# +# MDEV-33802 Weird read view after ROLLBACK of other transactions +# +CREATE TABLE t(a INT PRIMARY KEY, b INT UNIQUE) ENGINE=InnoDB; +INSERT INTO t SET a=1; +BEGIN; +INSERT INTO t SET a=2; +connection consistent; +START TRANSACTION WITH CONSISTENT SNAPSHOT; +SELECT * FROM t FORCE INDEX (b) FOR UPDATE; +ERROR HY000: Record has changed since last read in table 't' +connection con_weird; +START TRANSACTION WITH CONSISTENT SNAPSHOT; +SELECT * FROM t FORCE INDEX (b) FOR UPDATE; +connection default; +ROLLBACK; +connection con_weird; +a b +1 NULL +1 NULL +SELECT * FROM t FORCE INDEX (b) FOR UPDATE; +a b +1 NULL +disconnect con_weird; +connection consistent; +SELECT * FROM t FORCE INDEX (b) FOR UPDATE; +a b +1 NULL +disconnect consistent; +connection default; +DROP TABLE t; diff --git a/mysql-test/suite/innodb/t/lock_isolation.test b/mysql-test/suite/innodb/t/lock_isolation.test index 30d2978f4f2..5c60f6e707c 100644 --- a/mysql-test/suite/innodb/t/lock_isolation.test +++ b/mysql-test/suite/innodb/t/lock_isolation.test @@ -79,7 +79,6 @@ COMMIT; --connection consistent --reap SELECT * FROM t; ---disconnect consistent --connection default TRUNCATE TABLE t; @@ -103,8 +102,48 @@ COMMIT; --connection con_weird --reap COMMIT; ---disconnect con_weird --connection default SELECT * FROM t; DROP TABLE t; + +--echo # +--echo # MDEV-33802 Weird read view after ROLLBACK of other transactions +--echo # + +CREATE TABLE t(a INT PRIMARY KEY, b INT UNIQUE) ENGINE=InnoDB; +INSERT INTO t SET a=1; + +BEGIN; INSERT INTO t SET a=2; + +--connection consistent +START TRANSACTION WITH CONSISTENT SNAPSHOT; +--disable_ps2_protocol +--error ER_CHECKREAD +SELECT * FROM t FORCE INDEX (b) FOR UPDATE; +--enable_ps2_protocol + +--connection con_weird +START TRANSACTION WITH CONSISTENT SNAPSHOT; +send +SELECT * FROM t FORCE INDEX (b) FOR UPDATE; + +--connection default +let $wait_condition= + select count(*) = 1 from information_schema.processlist + where state = 'Sending data' + and info LIKE 'SELECT * FROM t %'; +--source include/wait_condition.inc +ROLLBACK; + +--connection con_weird +--reap +SELECT * FROM t FORCE INDEX (b) FOR UPDATE; +--disconnect con_weird + +--connection consistent +SELECT * FROM t FORCE INDEX (b) FOR UPDATE; +--disconnect consistent + +--connection default +DROP TABLE t; diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index 69b3dd1f5c2..a210700727c 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -5586,47 +5586,43 @@ lock_rec_insert_check_and_lock( return err; } -/*********************************************************************//** -Creates an explicit record lock for a running transaction that currently only -has an implicit lock on the record. The transaction instance must have a -reference count > 0 so that it can't be committed and freed before this -function has completed. */ -static -bool -lock_rec_convert_impl_to_expl_for_trx( -/*==================================*/ - trx_t* trx, /*!< in/out: active transaction */ - const page_id_t id, /*!< in: page identifier */ - const rec_t* rec, /*!< in: user record on page */ - dict_index_t* index) /*!< in: index of record */ +/** Create an explicit record lock for a transaction that currently only +has an implicit lock on the record. +@param trx referenced, active transaction, or nullptr +@param id page identifier +@param rec record in the page +@param index the index B-tree that the record belongs to +@return trx, with the reference released */ +static trx_t *lock_rec_convert_impl_to_expl_for_trx(trx_t *trx, + const page_id_t id, + const rec_t *rec, + dict_index_t *index) { - if (!trx) - return false; - - ut_ad(trx->is_referenced()); - ut_ad(page_rec_is_leaf(rec)); - ut_ad(!rec_is_metadata(rec, *index)); - - DEBUG_SYNC_C("before_lock_rec_convert_impl_to_expl_for_trx"); - ulint heap_no= page_rec_get_heap_no(rec); - + if (trx) { - LockGuard g{lock_sys.rec_hash, id}; - trx->mutex_lock(); - ut_ad(!trx_state_eq(trx, TRX_STATE_NOT_STARTED)); + ut_ad(trx->is_referenced()); + ut_ad(page_rec_is_leaf(rec)); + ut_ad(!rec_is_metadata(rec, *index)); - if (!trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY) && - !lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, g.cell(), id, heap_no, - trx)) - lock_rec_add_to_queue(LOCK_X | LOCK_REC_NOT_GAP, g.cell(), id, - page_align(rec), heap_no, index, trx, true); + ulint heap_no= page_rec_get_heap_no(rec); + + { + LockGuard g{lock_sys.rec_hash, id}; + trx->mutex_lock(); + ut_ad(!trx_state_eq(trx, TRX_STATE_NOT_STARTED)); + + if (!trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY) && + !lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, g.cell(), id, heap_no, + trx)) + lock_rec_add_to_queue(LOCK_X | LOCK_REC_NOT_GAP, g.cell(), id, + page_align(rec), heap_no, index, trx, true); + } + + trx->release_reference(); + trx->mutex_unlock(); } - trx->mutex_unlock(); - trx->release_reference(); - - DEBUG_SYNC_C("after_lock_rec_convert_impl_to_expl_for_trx"); - return false; + return trx; } @@ -5717,10 +5713,11 @@ should be created. @param[in] rec record on the leaf page @param[in] index the index of the record @param[in] offsets rec_get_offsets(rec,index) -@return whether caller_trx already holds an exclusive lock on rec */ +@return unsafe pointer to a transaction that held an exclusive lock on rec +@retval nullptr if no transaction held an exclusive lock */ template static -bool +const trx_t * lock_rec_convert_impl_to_expl( trx_t* caller_trx, page_id_t id, @@ -5744,10 +5741,10 @@ lock_rec_convert_impl_to_expl( trx_id = lock_clust_rec_some_has_impl(rec, index, offsets); if (trx_id == 0) { - return false; + return nullptr; } if (UNIV_UNLIKELY(trx_id == caller_trx->id)) { - return true; + return caller_trx; } trx = trx_sys.find(caller_trx, trx_id); @@ -5758,7 +5755,7 @@ lock_rec_convert_impl_to_expl( offsets); if (trx == caller_trx) { trx->release_reference(); - return true; + return trx; } ut_d(lock_rec_other_trx_holds_expl(caller_trx, trx, rec, id)); @@ -5803,11 +5800,18 @@ lock_clust_rec_modify_check_and_lock( /* If a transaction has no explicit x-lock set on the record, set one for it */ - if (lock_rec_convert_impl_to_expl(thr_get_trx(thr), - block->page.id(), + trx_t *trx = thr_get_trx(thr); + if (const trx_t *owner = + lock_rec_convert_impl_to_expl(trx, block->page.id(), rec, index, offsets)) { - /* We already hold an implicit exclusive lock. */ - return DB_SUCCESS; + if (owner == trx) { + /* We already hold an exclusive lock. */ + return DB_SUCCESS; + } + + if (trx->snapshot_isolation && trx->read_view.is_open()) { + return DB_RECORD_CHANGED; + } } err = lock_rec_lock(true, LOCK_X | LOCK_REC_NOT_GAP, @@ -5970,12 +5974,19 @@ lock_sec_rec_read_check_and_lock( return DB_SUCCESS; } - if (!page_rec_is_supremum(rec) - && lock_rec_convert_impl_to_expl( - trx, block->page.id(), rec, index, offsets) - && gap_mode == LOCK_REC_NOT_GAP) { - /* We already hold an implicit exclusive lock. */ - return DB_SUCCESS; + if (page_rec_is_supremum(rec)) { + } else if (const trx_t *owner = + lock_rec_convert_impl_to_expl(trx, block->page.id(), + rec, index, offsets)) { + if (owner == trx) { + if (gap_mode == LOCK_REC_NOT_GAP) { + /* We already hold an exclusive lock. */ + return DB_SUCCESS; + } + } else if (trx->snapshot_isolation + && trx->read_view.is_open()) { + return DB_RECORD_CHANGED; + } } #ifdef WITH_WSREP @@ -6055,13 +6066,20 @@ lock_clust_rec_read_check_and_lock( ulint heap_no = page_rec_get_heap_no(rec); trx_t *trx = thr_get_trx(thr); - if (!lock_table_has(trx, index->table, LOCK_X) - && heap_no != PAGE_HEAP_NO_SUPREMUM - && lock_rec_convert_impl_to_expl(trx, id, - rec, index, offsets) - && gap_mode == LOCK_REC_NOT_GAP) { - /* We already hold an implicit exclusive lock. */ - return DB_SUCCESS; + if (lock_table_has(trx, index->table, LOCK_X) + || heap_no == PAGE_HEAP_NO_SUPREMUM) { + } else if (const trx_t *owner = + lock_rec_convert_impl_to_expl(trx, id, + rec, index, offsets)) { + if (owner == trx) { + if (gap_mode == LOCK_REC_NOT_GAP) { + /* We already hold an exclusive lock. */ + return DB_SUCCESS; + } + } else if (trx->snapshot_isolation + && trx->read_view.is_open()) { + return DB_RECORD_CHANGED; + } } if (heap_no > PAGE_HEAP_NO_SUPREMUM && gap_mode != LOCK_GAP