diff --git a/mysql-test/suite/innodb/r/lock_wait_conflict.result b/mysql-test/suite/innodb/r/lock_wait_conflict.result new file mode 100644 index 00000000000..25d18c03ea1 --- /dev/null +++ b/mysql-test/suite/innodb/r/lock_wait_conflict.result @@ -0,0 +1,27 @@ +# +# MDEV-27025 insert-intention lock conflicts with waiting ORDINARY lock +# +CREATE TABLE t (a INT PRIMARY KEY, b INT NOT NULL UNIQUE) ENGINE=InnoDB; +connect prevent_purge,localhost,root,,; +start transaction with consistent snapshot; +connection default; +INSERT INTO t VALUES (20,20); +DELETE FROM t WHERE b = 20; +connect con_ins,localhost,root,,; +SET DEBUG_SYNC = 'row_ins_sec_index_entry_dup_locks_created SIGNAL ins_set_locks WAIT_FOR ins_cont'; +INSERT INTO t VALUES(10, 20); +connect con_del,localhost,root,,; +SET DEBUG_SYNC = 'now WAIT_FOR ins_set_locks'; +SET DEBUG_SYNC = 'lock_wait_suspend_thread_enter SIGNAL del_locked'; +DELETE FROM t WHERE b = 20; +connection default; +SET DEBUG_SYNC = 'now WAIT_FOR del_locked'; +SET DEBUG_SYNC = 'now SIGNAL ins_cont'; +connection con_ins; +disconnect con_ins; +connection con_del; +disconnect con_del; +disconnect prevent_purge; +connection default; +SET DEBUG_SYNC = 'RESET'; +DROP TABLE t; diff --git a/mysql-test/suite/innodb/t/lock_wait_conflict.test b/mysql-test/suite/innodb/t/lock_wait_conflict.test new file mode 100644 index 00000000000..46a29e14b43 --- /dev/null +++ b/mysql-test/suite/innodb/t/lock_wait_conflict.test @@ -0,0 +1,60 @@ +--source include/have_innodb.inc +--source include/count_sessions.inc +--source include/have_debug.inc +--source include/have_debug_sync.inc + +--echo # +--echo # MDEV-27025 insert-intention lock conflicts with waiting ORDINARY lock +--echo # + +# The test checks the ability to acquire exclusive record lock if the acquiring +# transaction already holds a shared lock on the record and another transaction +# is waiting for a lock. + +CREATE TABLE t (a INT PRIMARY KEY, b INT NOT NULL UNIQUE) ENGINE=InnoDB; + +--connect(prevent_purge,localhost,root,,) +start transaction with consistent snapshot; + +--connection default +INSERT INTO t VALUES (20,20); +DELETE FROM t WHERE b = 20; + +--connect(con_ins,localhost,root,,) +SET DEBUG_SYNC = 'row_ins_sec_index_entry_dup_locks_created SIGNAL ins_set_locks WAIT_FOR ins_cont'; +send +INSERT INTO t VALUES(10, 20); + +--connect(con_del,localhost,root,,) +SET DEBUG_SYNC = 'now WAIT_FOR ins_set_locks'; +SET DEBUG_SYNC = 'lock_wait_suspend_thread_enter SIGNAL del_locked'; +############################################################################### +# This DELETE creates waiting ORDINARY X-lock for heap_no 2 as the record is +# delete-marked, this lock conflicts with ORDINARY S-lock set by the the last +# INSERT. After the last INSERT creates insert-intention lock on +# heap_no 2, this lock will conflict with waiting ORDINARY X-lock of this +# DELETE, what causes DEADLOCK error for this DELETE. +############################################################################### +send +DELETE FROM t WHERE b = 20; + +--connection default +SET DEBUG_SYNC = 'now WAIT_FOR del_locked'; +SET DEBUG_SYNC = 'now SIGNAL ins_cont'; + +--connection con_ins +--reap +--disconnect con_ins + +--connection con_del +# Without the fix, ER_LOCK_DEADLOCK would be reported here. +--reap +--disconnect con_del + +--disconnect prevent_purge + +--connection default + +SET DEBUG_SYNC = 'RESET'; +DROP TABLE t; +--source include/wait_until_count_sessions.inc diff --git a/mysql-test/suite/versioning/r/update.result b/mysql-test/suite/versioning/r/update.result index d123331cc8c..db47b0357a0 100644 --- a/mysql-test/suite/versioning/r/update.result +++ b/mysql-test/suite/versioning/r/update.result @@ -283,7 +283,6 @@ connection default; update t1 set b = 'foo'; connection con1; update t1 set a = 'bar'; -ERROR 40001: Deadlock found when trying to get lock; try restarting transaction disconnect con1; connection default; drop table t1; diff --git a/mysql-test/suite/versioning/t/update.test b/mysql-test/suite/versioning/t/update.test index 058d2f4c865..f496a287697 100644 --- a/mysql-test/suite/versioning/t/update.test +++ b/mysql-test/suite/versioning/t/update.test @@ -186,7 +186,9 @@ send update t1 set b = 'foo'; connection con1; let $wait_condition= select count(*) from information_schema.innodb_lock_waits; source include/wait_condition.inc; -error ER_LOCK_DEADLOCK; +# There must no be DEADLOCK here as con1 transaction already holds locks, and +# default's transaction lock is waiting, so the locks of the following "UPDATE" +# must not conflict with waiting lock. update t1 set a = 'bar'; disconnect con1; connection default; diff --git a/storage/innobase/include/hash0hash.h b/storage/innobase/include/hash0hash.h index e2565c62169..696c58cf1c7 100644 --- a/storage/innobase/include/hash0hash.h +++ b/storage/innobase/include/hash0hash.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2018, MariaDB Corporation. +Copyright (c) 2018, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -31,7 +31,31 @@ Created 5/20/1997 Heikki Tuuri #include "sync0rw.h" struct hash_table_t; -struct hash_cell_t; + +struct hash_cell_t +{ + /** singly-linked, nullptr terminated list of hash buckets */ + void *node; + + /** Insert an element after another. + @tparam T type of the element + @param after the element after which to insert + @param insert the being-inserted element + @param next the next-element pointer in T */ + template + void insert_after(T &after, T &insert, T *T::*next) + { +#ifdef UNIV_DEBUG + for (const T *c= static_cast(node); c; c= c->*next) + if (c == &after) + goto found; + ut_error; + found: +#endif + insert.*next= after.*next; + after.*next= &insert; + } +}; typedef void* hash_node_t; @@ -477,10 +501,6 @@ hash_unlock_x_all_but( hash_table_t* table, /*!< in: hash table */ rw_lock_t* keep_lock); /*!< in: lock to keep */ -struct hash_cell_t{ - void* node; /*!< hash chain node, NULL if none */ -}; - /* The hash table structure */ struct hash_table_t { enum hash_table_sync_t type; /*mutex +@param[in] insert_before_waiting if true, inserts new B-tree record lock +just after the last non-waiting lock of the current transaction which is +located before the first waiting for the current transaction lock, otherwise +the lock is inserted at the end of the queue @return created lock */ lock_t* lock_rec_create_low( + lock_t* c_lock, #ifdef WITH_WSREP - lock_t* c_lock, /*!< conflicting lock */ que_thr_t* thr, /*!< thread owning trx */ #endif ulint type_mode, @@ -943,9 +951,12 @@ lock_rec_create_low( ulint heap_no, dict_index_t* index, trx_t* trx, - bool holds_trx_mutex); + bool holds_trx_mutex, + bool insert_before_waiting = false); + /** Enqueue a waiting request for a lock which cannot be granted immediately. Check for deadlocks. +@param[in] c_lock conflicting lock @param[in] type_mode the requested lock mode (LOCK_S or LOCK_X) possibly ORed with LOCK_GAP or LOCK_REC_NOT_GAP, ORed with @@ -964,9 +975,7 @@ Check for deadlocks. (or it happened to commit) */ dberr_t lock_rec_enqueue_waiting( -#ifdef WITH_WSREP - lock_t* c_lock, /*!< conflicting lock */ -#endif + lock_t* c_lock, ulint type_mode, const buf_block_t* block, ulint heap_no, diff --git a/storage/innobase/include/lock0lock.inl b/storage/innobase/include/lock0lock.inl index abe5052627b..c6719946b79 100644 --- a/storage/innobase/include/lock0lock.inl +++ b/storage/innobase/include/lock0lock.inl @@ -101,34 +101,37 @@ lock_hash_get( /*********************************************************************//** Creates a new record lock and inserts it to the lock queue. Does NOT check for deadlocks or lock compatibility! +@param[in] c_lock conflicting lock +@param[in] thr thread owning trx +@param[in] type_mode lock mode and wait flag, type is ignored and replaced by +LOCK_REC +@param[in] block buffer block containing the record +@param[in] heap_no heap number of the record +@param[in] index index of record +@param[in,out] trx transaction +@param[in] caller_owns_trx_mutex TRUE if caller owns trx mutex +@param[in] insert_before_waiting if true, inserts new B-tree record lock +just after the last non-waiting lock of the current transaction which is +located before the first waiting for the current transaction lock, otherwise +the lock is inserted at the end of the queue @return created lock */ UNIV_INLINE -lock_t* -lock_rec_create( -/*============*/ +lock_t *lock_rec_create(lock_t *c_lock, #ifdef WITH_WSREP - lock_t* c_lock, /*!< conflicting lock */ - que_thr_t* thr, /*!< thread owning trx */ + que_thr_t *thr, #endif - ulint type_mode,/*!< in: lock mode and wait - flag, type is ignored and - replaced by LOCK_REC */ - const buf_block_t* block, /*!< in: buffer block containing - the record */ - ulint heap_no,/*!< in: heap number of the record */ - dict_index_t* index, /*!< in: index of record */ - trx_t* trx, /*!< in,out: transaction */ - bool caller_owns_trx_mutex) - /*!< in: TRUE if caller owns - trx mutex */ + ulint type_mode, const buf_block_t *block, + ulint heap_no, dict_index_t *index, trx_t *trx, + bool caller_owns_trx_mutex, + bool insert_before_waiting) { btr_assert_not_corrupted(block, index); - return lock_rec_create_low( + return lock_rec_create_low(c_lock, #ifdef WITH_WSREP - c_lock, thr, + thr, #endif type_mode, block->page.id.space(), block->page.id.page_no(), - block->frame, heap_no, - index, trx, caller_owns_trx_mutex); + block->frame, heap_no, index, trx, + caller_owns_trx_mutex, insert_before_waiting); } diff --git a/storage/innobase/include/lock0priv.h b/storage/innobase/include/lock0priv.h index b7dcbfa2b86..db3689a2281 100644 --- a/storage/innobase/include/lock0priv.h +++ b/storage/innobase/include/lock0priv.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2015, 2018, MariaDB Corporation. +Copyright (c) 2015, 2018, 2022 MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -585,6 +585,9 @@ lock_rec_get_next_const( /*********************************************************************//** Gets the first explicit lock request on a record. +@param[in] hash hash chain the lock on +@param[in] page_id page id +@param[in] heap_no heap number of the record @return first lock, NULL if none exists */ UNIV_INLINE lock_t* @@ -660,15 +663,26 @@ lock_table_has( /** Set the wait status of a lock. @param[in,out] lock lock that will be waited for -@param[in,out] trx transaction that will wait for the lock */ -inline void lock_set_lock_and_trx_wait(lock_t* lock, trx_t* trx) +@param[in,out] trx transaction that will wait for the lock +@param[in] c_lock conflicting lock */ +inline void lock_set_lock_and_trx_wait(lock_t* lock, trx_t* trx, + const lock_t *c_lock) { ut_ad(lock); ut_ad(lock->trx == trx); - ut_ad(trx->lock.wait_lock == NULL); ut_ad(lock_mutex_own()); ut_ad(trx_mutex_own(trx)); + if (trx->lock.wait_trx) { + ut_ad(!c_lock || trx->lock.wait_trx == c_lock->trx); + ut_ad(trx->lock.wait_lock); + ut_ad((*trx->lock.wait_lock).trx == trx); + } else { + ut_ad(c_lock); + trx->lock.wait_trx = c_lock->trx; + ut_ad(!trx->lock.wait_lock); + } + trx->lock.wait_lock = lock; lock->type_mode |= LOCK_WAIT; } @@ -681,6 +695,7 @@ inline void lock_reset_lock_and_trx_wait(lock_t* lock) ut_ad(lock_mutex_own()); ut_ad(lock->trx->lock.wait_lock == NULL || lock->trx->lock.wait_lock == lock); + lock->trx->lock.wait_trx= NULL; lock->trx->lock.wait_lock = NULL; lock->type_mode &= ~LOCK_WAIT; } diff --git a/storage/innobase/include/lock0priv.inl b/storage/innobase/include/lock0priv.inl index 8bb145e41fc..61e8ff18ab1 100644 --- a/storage/innobase/include/lock0priv.inl +++ b/storage/innobase/include/lock0priv.inl @@ -145,22 +145,19 @@ lock_rec_get_first_on_page_addr( return(NULL); } -/*********************************************************************//** -Gets the first record lock on a page, where the page is identified by a +/** Gets the first record lock on a page, where the page is identified by a pointer to it. +@param[in] lock_hash lock hash table +@param[in] space page's space id +@param[in] page_no page number +@param[in] hash page's hash value in records hash table @return first lock, NULL if none exists */ UNIV_INLINE -lock_t* -lock_rec_get_first_on_page( -/*=======================*/ - hash_table_t* lock_hash, /*!< in: lock hash table */ - const buf_block_t* block) /*!< in: buffer block */ +lock_t *lock_rec_get_first_on_page(hash_table_t *lock_hash, ulint space, + ulint page_no, ulint hash) { ut_ad(lock_mutex_own()); - ulint space = block->page.id.space(); - ulint page_no = block->page.id.page_no(); - ulint hash = buf_block_get_lock_hash_val(block); for (lock_t* lock = static_cast( HASH_GET_FIRST(lock_hash, hash)); @@ -177,6 +174,20 @@ lock_rec_get_first_on_page( return(NULL); } +/** Gets the first record lock on a page, where the page is identified by a +pointer to it. +@param[in] lock_hash lock hash table +@param[in] block buffer block +@return first lock, NULL if none exists */ +UNIV_INLINE +lock_t *lock_rec_get_first_on_page(hash_table_t *lock_hash, + const buf_block_t *block) +{ + return lock_rec_get_first_on_page(lock_hash, block->page.id.space(), + block->page.id.page_no(), + buf_block_get_lock_hash_val(block)); +} + /*********************************************************************//** Gets the next explicit lock request on a record. @return next lock, NULL if none exists or if heap_no == ULINT_UNDEFINED */ @@ -210,21 +221,21 @@ lock_rec_get_next_const( return(lock_rec_get_next(heap_no, (lock_t*) lock)); } -/*********************************************************************//** -Gets the first explicit lock request on a record. +/** Gets the first explicit lock request on a record. +@param[in] hash hash chain the lock on +@param[in] space record's space id +@param[in] page_no record's page number +@param[in] lock_hash_val page's hash value in records hash table +@param[in] heap_no heap number of the record @return first lock, NULL if none exists */ UNIV_INLINE -lock_t* -lock_rec_get_first( -/*===============*/ - hash_table_t* hash, /*!< in: hash chain the lock on */ - const buf_block_t* block, /*!< in: block containing the record */ - ulint heap_no)/*!< in: heap number of the record */ +lock_t *lock_rec_get_first(hash_table_t *hash, ulint space, ulint page_no, + uint32_t lock_hash_val, ulint heap_no) { ut_ad(lock_mutex_own()); - for (lock_t* lock = lock_rec_get_first_on_page(hash, block); lock; - lock = lock_rec_get_next_on_page(lock)) { + for (lock_t* lock = lock_rec_get_first_on_page(hash, space, page_no, + lock_hash_val); lock; lock = lock_rec_get_next_on_page(lock)) { if (lock_rec_get_nth_bit(lock, heap_no)) { return(lock); } @@ -233,6 +244,20 @@ lock_rec_get_first( return(NULL); } +/** Gets the first explicit lock request on a record. +@param[in] hash hash chain the lock on +@param[in] block block containing the record +@param[in] heap_no heap number of the record +@return first lock, NULL if none exists */ +UNIV_INLINE +lock_t *lock_rec_get_first(hash_table_t *hash, const buf_block_t *block, + ulint heap_no) +{ + return lock_rec_get_first(hash, block->page.id.space(), + block->page.id.page_no(), + buf_block_get_lock_hash_val(block), heap_no); +} + /*********************************************************************//** Gets the nth bit of a record lock. @return TRUE if bit set also if i == ULINT_UNDEFINED return FALSE*/ diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h index d75095ed048..1d77297b0e7 100644 --- a/storage/innobase/include/trx0trx.h +++ b/storage/innobase/include/trx0trx.h @@ -436,7 +436,9 @@ struct trx_lock_t { trx_que_t que_state; /*!< valid when trx->state == TRX_STATE_ACTIVE: TRX_QUE_RUNNING, TRX_QUE_LOCK_WAIT, ... */ - + /** Transaction being waited for; protected by the same mutexes as + wait_lock */ + trx_t* wait_trx; lock_t* wait_lock; /*!< if trx execution state is TRX_QUE_LOCK_WAIT, this points to the lock request, otherwise this is diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index 37e23b56dfc..77e98e966c4 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2014, 2021, MariaDB Corporation. +Copyright (c) 2014, 2021, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1144,19 +1144,18 @@ static void wsrep_kill_victim(const trx_t * const trx, const lock_t *lock) /*********************************************************************//** Checks if some other transaction has a conflicting explicit lock request in the queue, so that we have to wait. +@param[in] mode LOCK_S or LOCK_X, possibly ORed to LOCK_GAP or LOC_REC_NOT_GAP, +LOCK_INSERT_INTENTION +@param[in] block buffer block containing the record +@param[in] heap_no heap number of the record +@param[in] trx our transaction +@param[out] was_ignored true if conflicting locks waiting for the current +transaction were ignored @return lock or NULL */ -static -lock_t* -lock_rec_other_has_conflicting( -/*===========================*/ - ulint mode, /*!< in: LOCK_S or LOCK_X, - possibly ORed to LOCK_GAP or - LOC_REC_NOT_GAP, - LOCK_INSERT_INTENTION */ - const buf_block_t* block, /*!< in: buffer block containing - the record */ - ulint heap_no,/*!< in: heap number of the record */ - const trx_t* trx) /*!< in: our transaction */ +static lock_t *lock_rec_other_has_conflicting(ulint mode, + const buf_block_t *block, + ulint heap_no, const trx_t *trx, + bool *was_ignored= NULL) { lock_t* lock; @@ -1168,6 +1167,16 @@ lock_rec_other_has_conflicting( lock != NULL; lock = lock_rec_get_next(heap_no, lock)) { + /* There can't be lock loops for one record, because + all waiting locks of the record will always wait for the same + lock of the record in a cell array, and check for + conflicting lock will always start with the first lock for the + heap_no, and go ahead with the same order(the order of the + locks in the cell array) */ + if (lock_get_wait(lock) && lock->trx->lock.wait_trx == trx) { + if (was_ignored) *was_ignored= true; + continue; + } if (lock_rec_has_to_wait(true, trx, mode, lock, is_supremum)) { #ifdef WITH_WSREP if (trx->is_wsrep()) { @@ -1324,6 +1333,7 @@ static void check_trx_state(const trx_t *trx) /** Create a new record lock and inserts it to the lock queue, without checking for deadlocks or conflicts. +@param[in] c_lock conflicting lock @param[in] type_mode lock mode and wait flag; type will be replaced with LOCK_REC @param[in] space tablespace id @@ -1333,11 +1343,15 @@ without checking for deadlocks or conflicts. @param[in] index the index tree @param[in,out] trx transaction @param[in] holds_trx_mutex whether the caller holds trx->mutex +@param[in] insert_before_waiting if true, inserts new B-tree record lock +just after the last non-waiting lock of the current transaction which is +located before the first waiting for the current transaction lock, otherwise +the lock is inserted at the end of the queue @return created lock */ lock_t* lock_rec_create_low( + lock_t* c_lock, #ifdef WITH_WSREP - lock_t* c_lock, /*!< conflicting lock */ que_thr_t* thr, /*!< thread owning trx */ #endif ulint type_mode, @@ -1347,7 +1361,8 @@ lock_rec_create_low( ulint heap_no, dict_index_t* index, trx_t* trx, - bool holds_trx_mutex) + bool holds_trx_mutex, + bool insert_before_waiting) { lock_t* lock; ulint n_bits; @@ -1457,7 +1472,7 @@ lock_rec_create_low( } trx->lock.que_state = TRX_QUE_LOCK_WAIT; - lock_set_lock_and_trx_wait(lock, trx); + lock_set_lock_and_trx_wait(lock, trx, c_lock); UT_LIST_ADD_LAST(trx->lock.trx_locks, lock); trx->lock.wait_thr = thr; @@ -1485,15 +1500,46 @@ lock_rec_create_low( trx_mutex_exit(c_lock->trx); } else #endif /* WITH_WSREP */ - if (!(type_mode & (LOCK_WAIT | LOCK_PREDICATE | LOCK_PRDT_PAGE)) - && innodb_lock_schedule_algorithm - == INNODB_LOCK_SCHEDULE_ALGORITHM_VATS - && !thd_is_replication_slave_thread(trx->mysql_thd)) { - HASH_PREPEND(lock_t, hash, lock_sys.rec_hash, - lock_rec_fold(space, page_no), lock); - } else { - HASH_INSERT(lock_t, hash, lock_hash_get(type_mode), - lock_rec_fold(space, page_no), lock); + if (insert_before_waiting + && !(type_mode & (LOCK_PREDICATE | LOCK_PRDT_PAGE))) { + /* Try to insert the lock just after the last non-waiting + lock of the current transaction which immediately + precedes the first waiting lock request. */ + uint32_t lock_hash_val = lock_rec_hash(space, page_no); + hash_cell_t& cell = lock_sys.rec_hash->array[lock_hash_val]; + + lock_t* last_non_waiting = NULL; + + for (lock_t* l = lock_rec_get_first(lock_sys.rec_hash, space, + page_no, lock_hash_val, heap_no); l; + l = lock_rec_get_next(heap_no, l)) { + if (lock_get_wait(lock) + && l->trx->lock.wait_trx == trx) { + break; + } + if (l->trx == trx) { + last_non_waiting = l; + } + } + + if (!last_non_waiting) { + goto append_last; + } + + cell.insert_after(*last_non_waiting, *lock, &lock_t::hash); + } + else { +append_last: + if (!(type_mode & (LOCK_WAIT | LOCK_PREDICATE | LOCK_PRDT_PAGE)) + && innodb_lock_schedule_algorithm + == INNODB_LOCK_SCHEDULE_ALGORITHM_VATS + && !thd_is_replication_slave_thread(trx->mysql_thd)) { + HASH_PREPEND(lock_t, hash, lock_sys.rec_hash, + lock_rec_fold(space, page_no), lock); + } else { + HASH_INSERT(lock_t, hash, lock_hash_get(type_mode), + lock_rec_fold(space, page_no), lock); + } } if (!holds_trx_mutex) { @@ -1501,7 +1547,7 @@ lock_rec_create_low( } ut_ad(trx_mutex_own(trx)); if (type_mode & LOCK_WAIT) { - lock_set_lock_and_trx_wait(lock, trx); + lock_set_lock_and_trx_wait(lock, trx, c_lock); } UT_LIST_ADD_LAST(trx->lock.trx_locks, lock); if (!holds_trx_mutex) { @@ -1661,6 +1707,7 @@ lock_rec_insert_to_head( /** Enqueue a waiting request for a lock which cannot be granted immediately. Check for deadlocks. +@param[in] c_lock conflicting lock @param[in] type_mode the requested lock mode (LOCK_S or LOCK_X) possibly ORed with LOCK_GAP or LOCK_REC_NOT_GAP, ORed with @@ -1679,9 +1726,7 @@ Check for deadlocks. (or it happened to commit) */ dberr_t lock_rec_enqueue_waiting( -#ifdef WITH_WSREP - lock_t* c_lock, /*!< conflicting lock */ -#endif + lock_t* c_lock, ulint type_mode, const buf_block_t* block, ulint heap_no, @@ -1719,9 +1764,9 @@ lock_rec_enqueue_waiting( /* Enqueue the lock request that will wait to be granted, note that we already own the trx mutex. */ - lock_t* lock = lock_rec_create( + lock_t* lock = lock_rec_create(c_lock, #ifdef WITH_WSREP - c_lock, thr, + thr, #endif type_mode | LOCK_WAIT, block, heap_no, index, trx, TRUE); @@ -1785,22 +1830,20 @@ on the record, and the request to be added is not a waiting request, we can reuse a suitable record lock object already existing on the same page, just setting the appropriate bit in its bitmap. This is a low-level function which does NOT check for deadlocks or lock compatibility! +@param[in] type_mode lock mode, wait, gap etc. flags; type is ignored and +replaced by LOCK_REC +@param[in] block buffer block containing the record +@param[in] heap_no heap number of the record +@param[in] index index of record +@param[in,out] trx transaction +@param[in] caller_owns_trx_mutex, TRUE if caller owns the transaction mutex +@param[in] insert_before_waiting true=insert B-tree record lock right before +a waiting lock request; false=insert the lock at the end of the queue @return lock where the bit was set */ -static -void -lock_rec_add_to_queue( -/*==================*/ - ulint type_mode,/*!< in: lock mode, wait, gap - etc. flags; type is ignored - and replaced by LOCK_REC */ - const buf_block_t* block, /*!< in: buffer block containing - the record */ - ulint heap_no,/*!< in: heap number of the record */ - dict_index_t* index, /*!< in: index of record */ - trx_t* trx, /*!< in/out: transaction */ - bool caller_owns_trx_mutex) - /*!< in: TRUE if caller owns the - transaction mutex */ +static void lock_rec_add_to_queue(ulint type_mode, const buf_block_t *block, + ulint heap_no, dict_index_t *index, + trx_t *trx, bool caller_owns_trx_mutex, + bool insert_before_waiting= false) { #ifdef UNIV_DEBUG ut_ad(lock_mutex_own()); @@ -1889,11 +1932,16 @@ lock_rec_add_to_queue( } } - lock_rec_create( + /* Note: We will not pass any conflicting lock to lock_rec_create(), + because we should be moving an existing waiting lock request. */ + ut_ad(!(type_mode & LOCK_WAIT) || trx->lock.wait_trx); + + lock_rec_create(NULL, #ifdef WITH_WSREP - NULL, NULL, + NULL, #endif - type_mode, block, heap_no, index, trx, caller_owns_trx_mutex); + type_mode, block, heap_no, index, trx, caller_owns_trx_mutex, + insert_before_waiting); } /*********************************************************************//** @@ -1949,28 +1997,23 @@ lock_rec_lock( /* Do nothing if the trx already has a strong enough lock on rec */ if (!lock_rec_has_expl(mode, block, heap_no, trx)) { - if ( -#ifdef WITH_WSREP - lock_t *c_lock= -#endif - lock_rec_other_has_conflicting(mode, block, heap_no, trx)) + bool was_ignored = false; + if (lock_t *c_lock= lock_rec_other_has_conflicting( + mode, block, heap_no, trx, &was_ignored)) { /* If another transaction has a non-gap conflicting request in the queue, as this transaction does not have a lock strong enough already granted on the record, we have to wait. */ - err = lock_rec_enqueue_waiting( -#ifdef WITH_WSREP - c_lock, -#endif /* WITH_WSREP */ - mode, block, heap_no, index, thr, NULL); + err = lock_rec_enqueue_waiting(c_lock, mode, block, heap_no, index, + thr, NULL); } else if (!impl) { /* Set the requested lock on the record. */ lock_rec_add_to_queue(LOCK_REC | mode, block, heap_no, index, trx, - true); + true, was_ignored); err= DB_SUCCESS_LOCKED_REC; } } @@ -1996,9 +2039,9 @@ lock_rec_lock( Note that we don't own the trx mutex. */ if (!impl) - lock_rec_create( + lock_rec_create(NULL, #ifdef WITH_WSREP - NULL, NULL, + NULL, #endif mode, block, heap_no, index, trx, false); @@ -2237,8 +2280,17 @@ static void lock_rec_dequeue_from_page(lock_t* in_lock) if (!lock_get_wait(lock)) { continue; } - const lock_t* c = lock_rec_has_to_wait_in_queue(lock); - if (!c) { + + ut_ad(lock->trx->lock.wait_trx); + ut_ad(lock->trx->lock.wait_lock); + + if (const lock_t* c = lock_rec_has_to_wait_in_queue( + lock)) { + trx_mutex_enter(lock->trx); + lock->trx->lock.wait_trx = c->trx; + trx_mutex_exit(lock->trx); + } + else { /* Grant the lock */ ut_ad(lock->trx != in_lock->trx); lock_grant(lock); @@ -2512,7 +2564,8 @@ lock_rec_move_low( lock_rec_reset_nth_bit(lock, donator_heap_no); if (type_mode & LOCK_WAIT) { - lock_reset_lock_and_trx_wait(lock); + ut_ad(lock->trx->lock.wait_lock == lock); + lock->type_mode &= ~LOCK_WAIT; } /* Note that we FIRST reset the bit, and then set the lock: @@ -2629,8 +2682,8 @@ lock_move_reorganize_page( lock_rec_bitmap_reset(lock); if (lock_get_wait(lock)) { - - lock_reset_lock_and_trx_wait(lock); + ut_ad(lock->trx->lock.wait_lock == lock); + lock->type_mode&= ~LOCK_WAIT; } lock = lock_rec_get_next_on_page(lock); @@ -2805,7 +2858,9 @@ lock_move_rec_list_end( ut_ad(!page_rec_is_metadata(orec)); if (type_mode & LOCK_WAIT) { - lock_reset_lock_and_trx_wait(lock); + ut_ad(lock->trx->lock.wait_lock == + lock); + lock->type_mode&= ~LOCK_WAIT; } lock_rec_add_to_queue( @@ -2902,7 +2957,9 @@ lock_move_rec_list_start( ut_ad(!page_rec_is_metadata(prev)); if (type_mode & LOCK_WAIT) { - lock_reset_lock_and_trx_wait(lock); + ut_ad(lock->trx->lock.wait_lock + == lock); + lock->type_mode&= ~LOCK_WAIT; } lock_rec_add_to_queue( @@ -2997,7 +3054,9 @@ lock_rtr_move_rec_list( if (rec1_heap_no < lock->un_member.rec_lock.n_bits && lock_rec_reset_nth_bit(lock, rec1_heap_no)) { if (type_mode & LOCK_WAIT) { - lock_reset_lock_and_trx_wait(lock); + ut_ad(lock->trx->lock.wait_lock + == lock); + lock->type_mode&= ~LOCK_WAIT; } lock_rec_add_to_queue( @@ -3451,10 +3510,8 @@ lock_table_create( in dictionary cache */ ulint type_mode,/*!< in: lock mode possibly ORed with LOCK_WAIT */ - trx_t* trx /*!< in: trx */ -#ifdef WITH_WSREP - , lock_t* c_lock = NULL /*!< in: conflicting lock */ -#endif + trx_t* trx, /*!< in: trx */ + lock_t* c_lock = NULL /*!< in: conflicting lock */ ) { lock_t* lock; @@ -3537,8 +3594,7 @@ lock_table_create( ut_list_append(table->locks, lock, TableLockGetNode()); if (type_mode & LOCK_WAIT) { - - lock_set_lock_and_trx_wait(lock, trx); + lock_set_lock_and_trx_wait(lock, trx, c_lock); } lock->trx->lock.table_locks.push_back(lock); @@ -3693,10 +3749,8 @@ lock_table_enqueue_waiting( ulint mode, /*!< in: lock mode this transaction is requesting */ dict_table_t* table, /*!< in/out: table */ - que_thr_t* thr /*!< in: query thread */ -#ifdef WITH_WSREP - , lock_t* c_lock /*!< in: conflicting lock or NULL */ -#endif + que_thr_t* thr, /*!< in: query thread */ + lock_t* c_lock /*!< in: conflicting lock or NULL */ ) { trx_t* trx; @@ -3727,11 +3781,7 @@ lock_table_enqueue_waiting( #endif /* WITH_WSREP */ /* Enqueue the lock request that will wait to be granted */ - lock = lock_table_create(table, ulint(mode) | LOCK_WAIT, trx -#ifdef WITH_WSREP - , c_lock -#endif - ); + lock = lock_table_create(table, ulint(mode) | LOCK_WAIT, trx, c_lock); const trx_t* victim_trx = DeadlockChecker::check_and_resolve(lock, trx); @@ -3887,11 +3937,7 @@ lock_table( if (wait_for != NULL) { err = lock_table_enqueue_waiting(ulint(mode) | flags, table, - thr -#ifdef WITH_WSREP - , wait_for -#endif - ); + thr, wait_for); } else { lock_table_create(table, ulint(mode) | flags, trx); @@ -3939,7 +3985,7 @@ lock_table_ix_resurrect( Checks if a waiting table lock request still has to wait in a queue. @return TRUE if still has to wait */ static -bool +const lock_t* lock_table_has_to_wait_in_queue( /*============================*/ const lock_t* wait_lock) /*!< in: waiting table lock */ @@ -3958,11 +4004,11 @@ lock_table_has_to_wait_in_queue( if (lock_has_to_wait(wait_lock, lock)) { - return(true); + return(lock); } } - return(false); + return(NULL); } /*************************************************************//** @@ -3991,9 +4037,17 @@ lock_table_dequeue( lock != NULL; lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock)) { - if (lock_get_wait(lock) - && !lock_table_has_to_wait_in_queue(lock)) { + if (!lock_get_wait(lock)) + continue; + ut_ad(lock->trx->lock.wait_trx); + ut_ad(lock->trx->lock.wait_lock); + + if (const lock_t *c = lock_table_has_to_wait_in_queue(lock)) { + trx_mutex_enter(lock->trx); + lock->trx->lock.wait_trx = c->trx; + trx_mutex_exit(lock->trx); + } else { /* Grant the lock */ ut_ad(in_lock->trx != lock->trx); lock_grant(lock); @@ -4189,8 +4243,16 @@ released: if (!lock_get_wait(lock)) { continue; } - const lock_t* c = lock_rec_has_to_wait_in_queue(lock); - if (!c) { + ut_ad(lock->trx->lock.wait_trx); + ut_ad(lock->trx->lock.wait_lock); + if (const lock_t* c = lock_rec_has_to_wait_in_queue( + lock)) { + if (lock->trx != trx) + trx_mutex_enter(lock->trx); + lock->trx->lock.wait_trx = c->trx; + if (lock->trx != trx) + trx_mutex_exit(lock->trx); + } else { /* Grant the lock */ ut_ad(trx != lock->trx); lock_grant(lock); @@ -4921,7 +4983,7 @@ func_exit: wsrep_report_bf_lock_wait(impl_trx->mysql_thd, impl_trx->id); wsrep_report_bf_lock_wait(other_lock->trx->mysql_thd, other_lock->trx->id); - if (!lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, + if (!lock_rec_has_expl(LOCK_S | LOCK_REC_NOT_GAP, block, heap_no, impl_trx)) { ib::info() << "WSREP impl BF lock conflict"; @@ -4930,7 +4992,20 @@ func_exit: #endif /* WITH_WSREP */ { ut_ad(lock_get_wait(other_lock)); - ut_ad(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, + /* After MDEV-27025 fix the following case is + possible: + 1. trx 1 acquires S-lock; + 2. trx 2 creates X-lock waiting for trx 1; + 3. trx 1 creates implicit lock, as + lock_rec_other_has_conflicting() returns no + conflicting trx 2 X-lock, the explicit lock + will not be created; + 4. trx 3 creates waiting X-lock, + it will wait for S-lock of trx 1. + That is why we relaxing the condition here and + check only for S-lock. + */ + ut_ad(lock_rec_has_expl(LOCK_S | LOCK_REC_NOT_GAP, block, heap_no, impl_trx)); } } @@ -5336,19 +5411,13 @@ lock_rec_insert_check_and_lock( const ulint type_mode = LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION; - if ( -#ifdef WITH_WSREP - lock_t* c_lock = -#endif /* WITH_WSREP */ + if (lock_t* c_lock = lock_rec_other_has_conflicting(type_mode, block, heap_no, trx)) { /* Note that we may get DB_SUCCESS also here! */ trx_mutex_enter(trx); - err = lock_rec_enqueue_waiting( -#ifdef WITH_WSREP - c_lock, -#endif /* WITH_WSREP */ - type_mode, block, heap_no, index, thr, NULL); + err = lock_rec_enqueue_waiting(c_lock, type_mode, block, + heap_no, index, thr, NULL); trx_mutex_exit(trx); } else { @@ -5425,7 +5494,7 @@ lock_rec_convert_impl_to_expl_for_trx( && !lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, block, heap_no, trx)) { lock_rec_add_to_queue(LOCK_REC | LOCK_X | LOCK_REC_NOT_GAP, - block, heap_no, index, trx, true); + block, heap_no, index, trx, true, true); } lock_mutex_exit(); diff --git a/storage/innobase/lock/lock0prdt.cc b/storage/innobase/lock/lock0prdt.cc index 9827243177d..15624cf79af 100644 --- a/storage/innobase/lock/lock0prdt.cc +++ b/storage/innobase/lock/lock0prdt.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2014, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2018, MariaDB Corporation. +Copyright (c) 2018, 2022 MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -487,9 +487,13 @@ lock_prdt_add_to_queue( } } - lock = lock_rec_create( + /* Note: We will not pass any conflicting lock to lock_rec_create(), + because we should be moving an existing waiting lock request. */ + ut_ad(!(type_mode & LOCK_WAIT) || trx->lock.wait_trx); + + lock = lock_rec_create(NULL, #ifdef WITH_WSREP - NULL, NULL, /* FIXME: replicate SPATIAL INDEX locks */ + NULL, /* FIXME: replicate SPATIAL INDEX locks */ #endif type_mode, block, PRDT_HEAPNO, index, trx, caller_owns_trx_mutex); @@ -579,9 +583,7 @@ lock_prdt_insert_check_and_lock( trx_mutex_enter(trx); err = lock_rec_enqueue_waiting( -#ifdef WITH_WSREP NULL, /* FIXME: replicate SPATIAL INDEX locks */ -#endif LOCK_X | LOCK_PREDICATE | LOCK_INSERT_INTENTION, block, PRDT_HEAPNO, index, thr, prdt); @@ -829,9 +831,9 @@ lock_prdt_lock( lock_t* lock = lock_rec_get_first_on_page(hash, block); if (lock == NULL) { - lock = lock_rec_create( + lock = lock_rec_create(NULL, #ifdef WITH_WSREP - NULL, NULL, /* FIXME: replicate SPATIAL INDEX locks */ + NULL, /* FIXME: replicate SPATIAL INDEX locks */ #endif ulint(mode) | type_mode, block, PRDT_HEAPNO, index, trx, FALSE); @@ -861,10 +863,8 @@ lock_prdt_lock( if (wait_for != NULL) { err = lock_rec_enqueue_waiting( -#ifdef WITH_WSREP NULL, /* FIXME: replicate SPATIAL INDEX locks */ -#endif ulint(mode) | type_mode, block, PRDT_HEAPNO, index, thr, prdt); @@ -948,9 +948,9 @@ lock_place_prdt_page_lock( } if (lock == NULL) { - lock = lock_rec_create_low( + lock = lock_rec_create_low(NULL, #ifdef WITH_WSREP - NULL, NULL, /* FIXME: replicate SPATIAL INDEX locks */ + NULL, /* FIXME: replicate SPATIAL INDEX locks */ #endif mode, space, page_no, NULL, PRDT_HEAPNO, index, trx, FALSE);