From 29bbcac0ee841faaa68eeb09c86ff825eabbe6b6 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Wed, 20 Jan 2021 18:22:38 +0100 Subject: [PATCH] MDEV-23328 Server hang due to Galera lock conflict resolution mutex order violation here. when wsrep bf thread kills a conflicting trx, the stack is wsrep_thd_LOCK() wsrep_kill_victim() lock_rec_other_has_conflicting() lock_clust_rec_read_check_and_lock() row_search_mvcc() ha_innobase::index_read() ha_innobase::rnd_pos() handler::ha_rnd_pos() handler::rnd_pos_by_record() handler::ha_rnd_pos_by_record() Rows_log_event::find_row() Update_rows_log_event::do_exec_row() Rows_log_event::do_apply_event() Log_event::apply_event() wsrep_apply_events() and mutexes are taken in the order lock_sys->mutex -> victim_trx->mutex -> victim_thread->LOCK_thd_data When a normal KILL statement is executed, the stack is innobase_kill_query() kill_handlerton() plugin_foreach_with_mask() ha_kill_query() THD::awake() kill_one_thread() and mutexes are victim_thread->LOCK_thd_data -> lock_sys->mutex -> victim_trx->mutex To fix the mutex order violation we kill the victim thd asynchronously, from the manager thread --- sql/wsrep_mysqld.cc | 2 - storage/innobase/handler/ha_innodb.cc | 171 +++++++++++++++----------- 2 files changed, 100 insertions(+), 73 deletions(-) diff --git a/sql/wsrep_mysqld.cc b/sql/wsrep_mysqld.cc index d392d1c2a61..c033f7e1464 100644 --- a/sql/wsrep_mysqld.cc +++ b/sql/wsrep_mysqld.cc @@ -2762,9 +2762,7 @@ extern "C" void wsrep_thd_awake(THD *thd, my_bool signal) { if (signal) { - mysql_mutex_lock(&thd->LOCK_thd_data); thd->awake(KILL_QUERY); - mysql_mutex_unlock(&thd->LOCK_thd_data); } else { diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index e475852cb90..58a07e46be6 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -60,6 +60,7 @@ this program; if not, write to the Free Software Foundation, Inc., #include #include +#include /* Include necessary InnoDB headers */ #include "btr0btr.h" @@ -19501,61 +19502,57 @@ wsrep_abort_slave_trx( (long long)bf_seqno, (long long)victim_seqno); abort(); } -/*******************************************************************//** -This function is used to kill one transaction in BF. */ -UNIV_INTERN -void -wsrep_innobase_kill_one_trx( -/*========================*/ - MYSQL_THD const bf_thd, - const trx_t * const bf_trx, - trx_t *victim_trx, - ibool signal) + +struct bg_wsrep_kill_trx_arg { + my_thread_id thd_id; + trx_id_t trx_id; + int64_t bf_seqno; + ibool signal; +}; + +static void bg_wsrep_kill_trx( + void *void_arg) { - ut_ad(bf_thd); - ut_ad(victim_trx); - ut_ad(lock_mutex_own()); - ut_ad(trx_mutex_own(victim_trx)); + bg_wsrep_kill_trx_arg *arg = (bg_wsrep_kill_trx_arg*)void_arg; + THD *thd = find_thread_by_id(arg->thd_id, false); + trx_t *victim_trx = NULL; + bool awake = false; + DBUG_ENTER("bg_wsrep_kill_trx"); - DBUG_ENTER("wsrep_innobase_kill_one_trx"); - THD *thd = (THD *) victim_trx->mysql_thd; - int64_t bf_seqno = wsrep_thd_trx_seqno(bf_thd); - - if (!thd) { - DBUG_PRINT("wsrep", ("no thd for conflicting lock")); - WSREP_WARN("no THD for trx: " TRX_ID_FMT, victim_trx->id); - DBUG_VOID_RETURN; + if (thd) { + victim_trx = thd_to_trx(thd); + lock_mutex_enter(); + trx_mutex_enter(victim_trx); + if (victim_trx->id != arg->trx_id) + { + trx_mutex_exit(victim_trx); + lock_mutex_exit(); + wsrep_thd_UNLOCK(thd); + victim_trx = NULL; + } } - WSREP_LOG_CONFLICT(bf_thd, thd, TRUE); + if (!victim_trx) { + /* it can happen that trx_id was meanwhile rolled back */ + DBUG_PRINT("wsrep", ("no thd for conflicting lock")); + goto ret; + } WSREP_DEBUG("BF kill (" ULINTPF ", seqno: " INT64PF "), victim: (%lu) trx: " TRX_ID_FMT, - signal, bf_seqno, + arg->signal, arg->bf_seqno, thd_get_thread_id(thd), victim_trx->id); WSREP_DEBUG("Aborting query: %s conf %d trx: %" PRId64, - (thd && wsrep_thd_query(thd)) ? wsrep_thd_query(thd) : "void", + (wsrep_thd_query(thd)) ? wsrep_thd_query(thd) : "void", wsrep_thd_conflict_state(thd, FALSE), wsrep_thd_ws_handle(thd)->trx_id); - wsrep_thd_LOCK(thd); - DBUG_EXECUTE_IF("sync.wsrep_after_BF_victim_lock", - { - const char act[]= - "now " - "wait_for signal.wsrep_after_BF_victim_lock"; - DBUG_ASSERT(!debug_sync_set_action(bf_thd, - STRING_WITH_LEN(act))); - };); - - if (wsrep_thd_query_state(thd) == QUERY_EXITING) { WSREP_DEBUG("kill trx EXITING for " TRX_ID_FMT, victim_trx->id); - wsrep_thd_UNLOCK(thd); - DBUG_VOID_RETURN; + goto ret_unlock; } if (wsrep_thd_exec_mode(thd) != LOCAL_STATE) { @@ -19571,18 +19568,13 @@ wsrep_innobase_kill_one_trx( case MUST_ABORT: WSREP_DEBUG("victim " TRX_ID_FMT " in MUST ABORT state", victim_trx->id); - wsrep_thd_UNLOCK(thd); - wsrep_thd_awake(thd, signal); - DBUG_VOID_RETURN; - break; + goto ret_awake; case ABORTED: case ABORTING: // fall through default: WSREP_DEBUG("victim " TRX_ID_FMT " in state %d", victim_trx->id, wsrep_thd_get_conflict_state(thd)); - wsrep_thd_UNLOCK(thd); - DBUG_VOID_RETURN; - break; + goto ret_unlock; } switch (wsrep_thd_query_state(thd)) { @@ -19595,12 +19587,12 @@ wsrep_innobase_kill_one_trx( victim_trx->id); if (wsrep_thd_exec_mode(thd) == REPL_RECV) { - wsrep_abort_slave_trx(bf_seqno, + wsrep_abort_slave_trx(arg->bf_seqno, wsrep_thd_trx_seqno(thd)); } else { wsrep_t *wsrep= get_wsrep(); rcode = wsrep->abort_pre_commit( - wsrep, bf_seqno, + wsrep, arg->bf_seqno, (wsrep_trx_id_t)wsrep_thd_ws_handle(thd)->trx_id ); @@ -19609,10 +19601,7 @@ wsrep_innobase_kill_one_trx( WSREP_DEBUG("cancel commit warning: " TRX_ID_FMT, victim_trx->id); - wsrep_thd_UNLOCK(thd); - wsrep_thd_awake(thd, signal); - DBUG_VOID_RETURN; - break; + goto ret_awake; case WSREP_OK: break; default: @@ -19625,12 +19614,9 @@ wsrep_innobase_kill_one_trx( * kill the lock holder first. */ abort(); - break; } } - wsrep_thd_UNLOCK(thd); - wsrep_thd_awake(thd, signal); - break; + goto ret_awake; case QUERY_EXEC: /* it is possible that victim trx is itself waiting for some * other lock. We need to cancel this waiting @@ -19651,26 +19637,20 @@ wsrep_innobase_kill_one_trx( lock_cancel_waiting_and_release(wait_lock); } - wsrep_thd_UNLOCK(thd); - wsrep_thd_awake(thd, signal); } else { /* abort currently executing query */ DBUG_PRINT("wsrep",("sending KILL_QUERY to: %lu", thd_get_thread_id(thd))); WSREP_DEBUG("kill query for: %ld", thd_get_thread_id(thd)); - /* Note that innobase_kill_query will take lock_mutex - and trx_mutex */ - wsrep_thd_UNLOCK(thd); - wsrep_thd_awake(thd, signal); /* for BF thd, we need to prevent him from committing */ if (wsrep_thd_exec_mode(thd) == REPL_RECV) { - wsrep_abort_slave_trx(bf_seqno, + wsrep_abort_slave_trx(arg->bf_seqno, wsrep_thd_trx_seqno(thd)); } } - break; + goto ret_awake; case QUERY_IDLE: { WSREP_DEBUG("kill IDLE for " TRX_ID_FMT, victim_trx->id); @@ -19678,10 +19658,9 @@ wsrep_innobase_kill_one_trx( if (wsrep_thd_exec_mode(thd) == REPL_RECV) { WSREP_DEBUG("kill BF IDLE, seqno: %lld", (long long)wsrep_thd_trx_seqno(thd)); - wsrep_thd_UNLOCK(thd); - wsrep_abort_slave_trx(bf_seqno, + wsrep_abort_slave_trx(arg->bf_seqno, wsrep_thd_trx_seqno(thd)); - DBUG_VOID_RETURN; + goto ret_unlock; } /* This will lock thd from proceeding after net_read() */ wsrep_thd_set_conflict_state(thd, ABORTING); @@ -19702,17 +19681,67 @@ wsrep_innobase_kill_one_trx( DBUG_PRINT("wsrep",("signalling wsrep rollbacker")); WSREP_DEBUG("signaling aborter"); wsrep_unlock_rollback(); - wsrep_thd_UNLOCK(thd); - - break; + goto ret_unlock; } default: WSREP_WARN("bad wsrep query state: %d", wsrep_thd_query_state(thd)); - wsrep_thd_UNLOCK(thd); - break; + goto ret_unlock; } +ret_awake: + awake = true; + +ret_unlock: + trx_mutex_exit(victim_trx); + lock_mutex_exit(); + if (awake) + wsrep_thd_awake(thd, arg->signal); + wsrep_thd_UNLOCK(thd); + +ret: + free(arg); + DBUG_VOID_RETURN; + +} + +/*******************************************************************//** +This function is used to kill one transaction in BF. */ +UNIV_INTERN +void +wsrep_innobase_kill_one_trx( +/*========================*/ + MYSQL_THD const bf_thd, + const trx_t * const bf_trx, + trx_t *victim_trx, + ibool signal) +{ + ut_ad(bf_thd); + ut_ad(victim_trx); + ut_ad(lock_mutex_own()); + ut_ad(trx_mutex_own(victim_trx)); + + bg_wsrep_kill_trx_arg *arg = (bg_wsrep_kill_trx_arg*)malloc(sizeof(*arg)); + arg->thd_id = thd_get_thread_id(victim_trx->mysql_thd); + arg->trx_id = victim_trx->id; + arg->bf_seqno = wsrep_thd_trx_seqno((THD*)bf_thd); + arg->signal = signal; + + DBUG_ENTER("wsrep_innobase_kill_one_trx"); + + WSREP_LOG_CONFLICT(bf_thd, victim_trx->mysql_thd, TRUE); + + DBUG_EXECUTE_IF("sync.wsrep_after_BF_victim_lock", + { + const char act[]= + "now " + "wait_for signal.wsrep_after_BF_victim_lock"; + DBUG_ASSERT(!debug_sync_set_action(bf_thd, + STRING_WITH_LEN(act))); + };); + + + mysql_manager_submit(bg_wsrep_kill_trx, arg); DBUG_VOID_RETURN; }