diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 6433786a079..1af3b9a9cca 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -4598,19 +4598,20 @@ extern "C" int thd_rpl_is_parallel(const MYSQL_THD thd) } /* - This function can optionally be called to check if thd_report_wait_for() + This function can optionally be called to check if thd_rpl_deadlock_check() needs to be called for waits done by a given transaction. - If this function returns false for a given thd, there is no need to do any - calls to thd_report_wait_for() on that thd. + If this function returns false for a given thd, there is no need to do + any calls to thd_rpl_deadlock_check() on that thd. - This call is optional; it is safe to call thd_report_wait_for() in any case. - This call can be used to save some redundant calls to thd_report_wait_for() - if desired. (This is unlikely to matter much unless there are _lots_ of - waits to report, as the overhead of thd_report_wait_for() is small). + This call is optional; it is safe to call thd_rpl_deadlock_check() in + any case. This call can be used to save some redundant calls to + thd_rpl_deadlock_check() if desired. (This is unlikely to matter much + unless there are _lots_ of waits to report, as the overhead of + thd_rpl_deadlock_check() is small). */ extern "C" int -thd_need_wait_for(const MYSQL_THD thd) +thd_need_wait_reports(const MYSQL_THD thd) { rpl_group_info *rgi; @@ -4625,75 +4626,9 @@ thd_need_wait_for(const MYSQL_THD thd) } /* - Used by InnoDB/XtraDB to report that one transaction THD is about to go to - wait for a transactional lock held by another transactions OTHER_THD. - - This is used for parallel replication, where transactions are required to - commit in the same order on the slave as they did on the master. If the - transactions on the slave encounters lock conflicts on the slave that did - not exist on the master, this can cause deadlocks. - - Normally, such conflicts will not occur, because the same conflict would - have prevented the two transactions from committing in parallel on the - master, thus preventing them from running in parallel on the slave in the - first place. However, it is possible in case when the optimizer chooses a - different plan on the slave than on the master (eg. table scan instead of - index scan). - - InnoDB/XtraDB reports lock waits using this call. If a lock wait causes a - deadlock with the pre-determined commit order, we kill the later transaction, - and later re-try it, to resolve the deadlock. - - This call need only receive reports about waits for locks that will remain - until the holding transaction commits. InnoDB/XtraDB auto-increment locks - are released earlier, and so need not be reported. (Such false positives are - not harmful, but could lead to unnecessary kill and retry, so best avoided). -*/ -extern "C" void -thd_report_wait_for(MYSQL_THD thd, MYSQL_THD other_thd) -{ - rpl_group_info *rgi; - rpl_group_info *other_rgi; - - if (!thd) - return; - DEBUG_SYNC(thd, "thd_report_wait_for"); - thd->transaction.stmt.mark_trans_did_wait(); - if (!other_thd) - return; - binlog_report_wait_for(thd, other_thd); - rgi= thd->rgi_slave; - other_rgi= other_thd->rgi_slave; - if (!rgi || !other_rgi) - return; - if (!rgi->is_parallel_exec) - return; - if (rgi->rli != other_rgi->rli) - return; - if (!rgi->gtid_sub_id || !other_rgi->gtid_sub_id) - return; - if (rgi->current_gtid.domain_id != other_rgi->current_gtid.domain_id) - return; - if (rgi->gtid_sub_id > other_rgi->gtid_sub_id) - return; - /* - This transaction is about to wait for another transaction that is required - by replication binlog order to commit after. This would cause a deadlock. - - So send a kill to the other transaction, with a temporary error; this will - cause replication to rollback (and later re-try) the other transaction, - releasing the lock for this transaction so replication can proceed. - */ - other_rgi->killed_for_retry= rpl_group_info::RETRY_KILL_KILLED; - mysql_mutex_lock(&other_thd->LOCK_thd_data); - other_thd->awake(KILL_CONNECTION); - mysql_mutex_unlock(&other_thd->LOCK_thd_data); -} - -/* - Used by storage engines (currently TokuDB) to report that one transaction - THD is about to go to wait for a transactional lock held by another - transactions OTHER_THD. + Used by storage engines (currently TokuDB and InnoDB/XtraDB) to report that + one transaction THD is about to go to wait for a transactional lock held by + another transactions OTHER_THD. This is used for parallel replication, where transactions are required to commit in the same order on the slave as they did on the master. If the @@ -4708,9 +4643,9 @@ thd_report_wait_for(MYSQL_THD thd, MYSQL_THD other_thd) chooses a different plan on the slave than on the master (eg. table scan instead of index scan). - InnoDB/XtraDB reports lock waits using this call. If a lock wait causes a - deadlock with the pre-determined commit order, we kill the later transaction, - and later re-try it, to resolve the deadlock. + Storage engines report lock waits using this call. If a lock wait causes a + deadlock with the pre-determined commit order, we kill the later + transaction, and later re-try it, to resolve the deadlock. This call need only receive reports about waits for locks that will remain until the holding transaction commits. InnoDB/XtraDB auto-increment locks, @@ -4801,8 +4736,8 @@ thd_rpl_deadlock_check(MYSQL_THD thd, MYSQL_THD other_thd) Calling this function is just an optimisation to avoid unnecessary deadlocks. If it was not used, a gap lock would be set that could eventually - cause a deadlock; the deadlock would be caught by thd_report_wait_for() and - the transaction T2 killed and rolled back (and later re-tried). + cause a deadlock; the deadlock would be caught by thd_rpl_deadlock_check() + and the transaction T2 killed and rolled back (and later re-tried). */ extern "C" int thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd) diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 604beec6f78..7734eb162fb 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -5685,8 +5685,7 @@ innobase_kill_query( wsrep_thd_is_BF(current_thd, FALSE)); } - if (!wsrep_thd_is_BF(trx->mysql_thd, FALSE) && - trx->abort_type == TRX_SERVER_ABORT) { + if (!wsrep_thd_is_BF(trx->mysql_thd, FALSE)) { ut_ad(!lock_mutex_own()); lock_mutex_enter(); lock_mutex_taken = true; diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h index 839c3d057e7..42d59cb396f 100644 --- a/storage/innobase/include/trx0trx.h +++ b/storage/innobase/include/trx0trx.h @@ -842,8 +842,7 @@ lock_sys->mutex and sometimes by trx->mutex. */ typedef enum { TRX_SERVER_ABORT = 0, - TRX_WSREP_ABORT = 1, - TRX_REPLICATION_ABORT = 2 + TRX_WSREP_ABORT = 1 } trx_abort_t; diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index 8f65966f1cb..6adb1c99041 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -70,15 +70,8 @@ static const ulint TABLE_LOCK_CACHE = 8; /** Size in bytes, of the table lock instance */ static const ulint TABLE_LOCK_SIZE = sizeof(ib_lock_t); -/* Buffer to collect THDs to report waits for. */ -struct thd_wait_reports { - struct thd_wait_reports *next; /*!< List link */ - ulint used; /*!< How many elements in waitees[] */ - trx_t *waitees[64]; /*!< Trxs for thd_report_wait_for() */ -}; - -extern "C" void thd_report_wait_for(MYSQL_THD thd, MYSQL_THD other_thd); -extern "C" int thd_need_wait_for(const MYSQL_THD thd); +extern "C" void thd_rpl_deadlock_check(MYSQL_THD thd, MYSQL_THD other_thd); +extern "C" int thd_need_wait_reports(const MYSQL_THD thd); extern "C" int thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd); /** Deadlock checker. */ @@ -109,7 +102,7 @@ private: const trx_t* trx, const lock_t* wait_lock, ib_uint64_t mark_start, - struct thd_wait_reports* waitee_buf_ptr) + bool report_waiters) : m_cost(), m_start(trx), @@ -117,7 +110,7 @@ private: m_wait_lock(wait_lock), m_mark_start(mark_start), m_n_elems(), - m_waitee_ptr(waitee_buf_ptr) + m_report_waiters(report_waiters) { } @@ -276,8 +269,8 @@ private: /** This is to avoid malloc/free calls. */ static state_t s_states[MAX_STACK_SIZE]; - /** Buffer to collect THDs to report waits for. */ - struct thd_wait_reports* m_waitee_ptr; + /** Set if thd_rpl_deadlock_check() should be called for waits. */ + bool m_report_waiters; }; /** Counter to mark visited nodes during deadlock search. */ @@ -286,11 +279,6 @@ ib_uint64_t DeadlockChecker::s_lock_mark_counter = 0; /** The stack used for deadlock searches. */ DeadlockChecker::state_t DeadlockChecker::s_states[MAX_STACK_SIZE]; -extern "C" void thd_report_wait_for(MYSQL_THD thd, MYSQL_THD other_thd); -extern "C" int thd_need_wait_for(const MYSQL_THD thd); -extern "C" -int thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd); - #ifdef UNIV_DEBUG /*********************************************************************//** Validates the lock system. @@ -2074,14 +2062,6 @@ RecLock::add_to_waitq(const lock_t* wait_for, const lock_prdt_t* prdt) dberr_t err = deadlock_check(lock); ut_ad(trx_mutex_own(m_trx)); - - /* m_trx->mysql_thd is NULL if it's an internal trx. So current_thd is used */ - if (err == DB_LOCK_WAIT) { - ut_ad(wait_for && wait_for->trx); - wait_for->trx->abort_type = TRX_REPLICATION_ABORT; - thd_report_wait_for(current_thd, wait_for->trx->mysql_thd); - wait_for->trx->abort_type = TRX_SERVER_ABORT; - } return(err); } @@ -7968,27 +7948,11 @@ DeadlockChecker::search() layer. These locks are released before commit, so they can not cause deadlocks with binlog-fixed commit order. */ - if (m_waitee_ptr && + if (m_report_waiters && (lock_get_type_low(lock) != LOCK_TABLE || lock_get_mode(lock) != LOCK_AUTO_INC)) { - if (m_waitee_ptr->used == - sizeof(m_waitee_ptr->waitees) / - sizeof(m_waitee_ptr->waitees[0])) { - m_waitee_ptr->next = - (struct thd_wait_reports *) - ut_malloc_nokey(sizeof(*m_waitee_ptr)); - m_waitee_ptr = m_waitee_ptr->next; - - if (!m_waitee_ptr) { - m_too_deep = true; - return (m_start); - } - - m_waitee_ptr->next = NULL; - m_waitee_ptr->used = 0; - } - - m_waitee_ptr->waitees[m_waitee_ptr->used++] = lock->trx; + thd_rpl_deadlock_check(m_start->mysql_thd, + lock->trx->mysql_thd); } if (lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) { @@ -8068,47 +8032,6 @@ DeadlockChecker::trx_rollback() trx_mutex_exit(trx); } -static -void -lock_report_waiters_to_mysql( -/*=======================*/ - struct thd_wait_reports* waitee_buf_ptr, /*!< in: set of trxs */ - THD* mysql_thd, /*!< in: THD */ - const trx_t* victim_trx) /*!< in: Trx selected - as deadlock victim, if - any */ -{ - struct thd_wait_reports* p; - struct thd_wait_reports* q; - ulint i; - - p = waitee_buf_ptr; - while (p) { - i = 0; - while (i < p->used) { - trx_t *w_trx = p->waitees[i]; - /* There is no need to report waits to a trx already - selected as a victim. */ - if (w_trx != victim_trx) { - /* If thd_report_wait_for() decides to kill the - transaction, then we will get a call back into - innobase_kill_query. We mark this by setting - current_lock_mutex_owner, so we can avoid trying - to recursively take lock_sys->mutex. */ - w_trx->abort_type = TRX_REPLICATION_ABORT; - thd_report_wait_for(mysql_thd, w_trx->mysql_thd); - w_trx->abort_type = TRX_SERVER_ABORT; - } - ++i; - } - q = p->next; - if (p != waitee_buf_ptr) { - ut_free(p); - } - p = q; - } -} - /** Checks if a joining lock request results in a deadlock. If a deadlock is found this function will resolve the deadlock by choosing a victim transaction and rolling it back. It will attempt to resolve all deadlocks. The returned @@ -8127,36 +8050,20 @@ DeadlockChecker::check_and_resolve(const lock_t* lock, const trx_t* trx) ut_ad(!srv_read_only_mode); const trx_t* victim_trx; - struct thd_wait_reports waitee_buf; - struct thd_wait_reports*waitee_buf_ptr; - THD* start_mysql_thd; + THD* start_mysql_thd; + bool report_waits = false; start_mysql_thd = trx->mysql_thd; - if (start_mysql_thd && thd_need_wait_for(start_mysql_thd)) { - waitee_buf_ptr = &waitee_buf; - } else { - waitee_buf_ptr = NULL; - } + if (start_mysql_thd && thd_need_wait_reports(start_mysql_thd)) + report_waits = true; /* Try and resolve as many deadlocks as possible. */ do { - if (waitee_buf_ptr) { - waitee_buf_ptr->next = NULL; - waitee_buf_ptr->used = 0; - } - - DeadlockChecker checker(trx, lock, s_lock_mark_counter, waitee_buf_ptr); + DeadlockChecker checker(trx, lock, s_lock_mark_counter, report_waits); victim_trx = checker.search(); - /* Report waits to upper layer, as needed. */ - if (waitee_buf_ptr) { - lock_report_waiters_to_mysql(waitee_buf_ptr, - start_mysql_thd, - victim_trx); - } - /* Search too deep, we rollback the joining transaction only if it is possible to rollback. Otherwise we rollback the transaction that is holding the lock that the joining diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc index aa54a5aff5d..900d520d242 100644 --- a/storage/xtradb/handler/ha_innodb.cc +++ b/storage/xtradb/handler/ha_innodb.cc @@ -5441,8 +5441,7 @@ innobase_kill_connection( wsrep_thd_is_BF(current_thd, FALSE), lock_get_info(trx->lock.wait_lock).c_str()); - if (!wsrep_thd_is_BF(trx->mysql_thd, FALSE) && - trx->abort_type == TRX_SERVER_ABORT) { + if (!wsrep_thd_is_BF(trx->mysql_thd, FALSE)) { ut_ad(!lock_mutex_own()); lock_mutex_enter(); } @@ -5462,8 +5461,7 @@ innobase_kill_connection( trx_mutex_exit(trx); } - if (!wsrep_thd_is_BF(trx->mysql_thd, FALSE) && - trx->abort_type == TRX_SERVER_ABORT) { + if (!wsrep_thd_is_BF(trx->mysql_thd, FALSE)) { lock_mutex_exit(); } } diff --git a/storage/xtradb/include/trx0trx.h b/storage/xtradb/include/trx0trx.h index ec810405b5a..44ff8f3ec98 100644 --- a/storage/xtradb/include/trx0trx.h +++ b/storage/xtradb/include/trx0trx.h @@ -707,8 +707,7 @@ lock_sys->mutex and sometimes by trx->mutex. */ typedef enum { TRX_SERVER_ABORT = 0, - TRX_WSREP_ABORT = 1, - TRX_REPLICATION_ABORT = 2 + TRX_WSREP_ABORT = 1 } trx_abort_t; struct trx_t{ diff --git a/storage/xtradb/lock/lock0lock.cc b/storage/xtradb/lock/lock0lock.cc index ecf43f71b21..341c452cd15 100644 --- a/storage/xtradb/lock/lock0lock.cc +++ b/storage/xtradb/lock/lock0lock.cc @@ -380,8 +380,8 @@ struct lock_stack_t { ulint heap_no; /*!< heap number if rec lock */ }; -extern "C" void thd_report_wait_for(MYSQL_THD thd, MYSQL_THD other_thd); -extern "C" int thd_need_wait_for(const MYSQL_THD thd); +extern "C" void thd_rpl_deadlock_check(MYSQL_THD thd, MYSQL_THD other_thd); +extern "C" int thd_need_wait_reports(const MYSQL_THD thd); extern "C" int thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd); @@ -406,7 +406,7 @@ UNIV_INTERN mysql_pfs_key_t lock_sys_wait_mutex_key; struct thd_wait_reports { struct thd_wait_reports *next; /*!< List link */ ulint used; /*!< How many elements in waitees[] */ - trx_t *waitees[64]; /*!< Trxs for thd_report_wait_for() */ + trx_t *waitees[64]; /*!< Trxs for thd_rpl_deadlock_check() */ }; @@ -4489,14 +4489,7 @@ lock_report_waiters_to_mysql( /* There is no need to report waits to a trx already selected as a victim. */ if (w_trx->id != victim_trx_id) { - /* If thd_report_wait_for() decides to kill the - transaction, then we will get a call back into - innobase_kill_query. We mark this by setting - current_lock_mutex_owner, so we can avoid trying - to recursively take lock_sys->mutex. */ - w_trx->abort_type = TRX_REPLICATION_ABORT; - thd_report_wait_for(mysql_thd, w_trx->mysql_thd); - w_trx->abort_type = TRX_SERVER_ABORT; + thd_rpl_deadlock_check(mysql_thd, w_trx->mysql_thd); } ++i; } @@ -4535,7 +4528,7 @@ lock_deadlock_check_and_resolve( assert_trx_in_list(trx); start_mysql_thd = trx->mysql_thd; - if (start_mysql_thd && thd_need_wait_for(start_mysql_thd)) { + if (start_mysql_thd && thd_need_wait_reports(start_mysql_thd)) { waitee_buf_ptr = &waitee_buf; } else { waitee_buf_ptr = NULL;