mirror of
				https://github.com/MariaDB/server.git
				synced 2025-10-24 07:13:33 +03:00 
			
		
		
		
	MDEV-23328 Server hang due to Galera lock conflict resolution
mutex order violation here. when wsrep bf thread kills a conflicting trx, the stack is wsrep_thd_LOCK() wsrep_kill_victim() lock_rec_other_has_conflicting() lock_clust_rec_read_check_and_lock() row_search_mvcc() ha_innobase::index_read() ha_innobase::rnd_pos() handler::ha_rnd_pos() handler::rnd_pos_by_record() handler::ha_rnd_pos_by_record() Rows_log_event::find_row() Update_rows_log_event::do_exec_row() Rows_log_event::do_apply_event() Log_event::apply_event() wsrep_apply_events() and mutexes are taken in the order lock_sys->mutex -> victim_trx->mutex -> victim_thread->LOCK_thd_data When a normal KILL statement is executed, the stack is innobase_kill_query() kill_handlerton() plugin_foreach_with_mask() ha_kill_query() THD::awake() kill_one_thread() and mutexes are victim_thread->LOCK_thd_data -> lock_sys->mutex -> victim_trx->mutex To fix the mutex order violation we kill the victim thd asynchronously, from the manager thread
This commit is contained in:
		| @@ -2762,9 +2762,7 @@ extern "C" void wsrep_thd_awake(THD *thd, my_bool signal) | ||||
| { | ||||
|   if (signal) | ||||
|   { | ||||
|     mysql_mutex_lock(&thd->LOCK_thd_data); | ||||
|     thd->awake(KILL_QUERY); | ||||
|     mysql_mutex_unlock(&thd->LOCK_thd_data); | ||||
|   } | ||||
|   else | ||||
|   { | ||||
|   | ||||
| @@ -60,6 +60,7 @@ this program; if not, write to the Free Software Foundation, Inc., | ||||
| 
 | ||||
| #include <my_service_manager.h> | ||||
| #include <key.h> | ||||
| #include <sql_manager.h> | ||||
| 
 | ||||
| /* Include necessary InnoDB headers */ | ||||
| #include "btr0btr.h" | ||||
| @@ -19501,61 +19502,57 @@ wsrep_abort_slave_trx( | ||||
| 		(long long)bf_seqno, (long long)victim_seqno); | ||||
| 	abort(); | ||||
| } | ||||
| /*******************************************************************//**
 | ||||
| This function is used to kill one transaction in BF. */ | ||||
| UNIV_INTERN | ||||
| void | ||||
| wsrep_innobase_kill_one_trx( | ||||
| /*========================*/ | ||||
| 	MYSQL_THD const bf_thd, | ||||
| 	const trx_t * const bf_trx, | ||||
| 	trx_t *victim_trx, | ||||
| 	ibool signal) | ||||
| 
 | ||||
| struct bg_wsrep_kill_trx_arg { | ||||
| 	my_thread_id	thd_id; | ||||
| 	trx_id_t	trx_id; | ||||
| 	int64_t		bf_seqno; | ||||
| 	ibool		signal; | ||||
| }; | ||||
| 
 | ||||
| static void bg_wsrep_kill_trx( | ||||
| 	void *void_arg) | ||||
| { | ||||
|         ut_ad(bf_thd); | ||||
|         ut_ad(victim_trx); | ||||
|         ut_ad(lock_mutex_own()); | ||||
|         ut_ad(trx_mutex_own(victim_trx)); | ||||
| 	bg_wsrep_kill_trx_arg *arg = (bg_wsrep_kill_trx_arg*)void_arg; | ||||
| 	THD *thd 		   = find_thread_by_id(arg->thd_id, false); | ||||
| 	trx_t *victim_trx 	   = NULL; | ||||
| 	bool awake 		   = false; | ||||
| 	DBUG_ENTER("bg_wsrep_kill_trx"); | ||||
| 
 | ||||
| 	DBUG_ENTER("wsrep_innobase_kill_one_trx"); | ||||
| 	THD *thd          = (THD *) victim_trx->mysql_thd; | ||||
| 	int64_t bf_seqno  = wsrep_thd_trx_seqno(bf_thd); | ||||
| 
 | ||||
| 	if (!thd) { | ||||
| 		DBUG_PRINT("wsrep", ("no thd for conflicting lock")); | ||||
| 		WSREP_WARN("no THD for trx: " TRX_ID_FMT, victim_trx->id); | ||||
| 		DBUG_VOID_RETURN; | ||||
| 	if (thd) { | ||||
| 		victim_trx = thd_to_trx(thd); | ||||
| 		lock_mutex_enter(); | ||||
| 		trx_mutex_enter(victim_trx); | ||||
| 		if (victim_trx->id != arg->trx_id) | ||||
| 		{ | ||||
| 			trx_mutex_exit(victim_trx); | ||||
| 			lock_mutex_exit(); | ||||
| 			wsrep_thd_UNLOCK(thd); | ||||
| 			victim_trx = NULL; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	WSREP_LOG_CONFLICT(bf_thd, thd, TRUE); | ||||
| 	if (!victim_trx) { | ||||
| 		/* it can happen that trx_id was meanwhile rolled back */ | ||||
| 		DBUG_PRINT("wsrep", ("no thd for conflicting lock")); | ||||
| 		goto ret; | ||||
| 	} | ||||
| 
 | ||||
| 	WSREP_DEBUG("BF kill (" ULINTPF ", seqno: " INT64PF | ||||
| 		    "), victim: (%lu) trx: " TRX_ID_FMT, | ||||
| 		    signal, bf_seqno, | ||||
| 		    arg->signal, arg->bf_seqno, | ||||
| 		    thd_get_thread_id(thd), | ||||
| 		    victim_trx->id); | ||||
| 
 | ||||
| 	WSREP_DEBUG("Aborting query: %s conf %d trx: %" PRId64, | ||||
| 		    (thd && wsrep_thd_query(thd)) ? wsrep_thd_query(thd) : "void", | ||||
| 		    (wsrep_thd_query(thd)) ? wsrep_thd_query(thd) : "void", | ||||
| 		    wsrep_thd_conflict_state(thd, FALSE), | ||||
| 		    wsrep_thd_ws_handle(thd)->trx_id); | ||||
| 
 | ||||
| 	wsrep_thd_LOCK(thd); | ||||
|         DBUG_EXECUTE_IF("sync.wsrep_after_BF_victim_lock", | ||||
|                  { | ||||
|                    const char act[]= | ||||
|                      "now " | ||||
|                      "wait_for signal.wsrep_after_BF_victim_lock"; | ||||
|                    DBUG_ASSERT(!debug_sync_set_action(bf_thd, | ||||
|                                                       STRING_WITH_LEN(act))); | ||||
|                  };); | ||||
| 
 | ||||
| 
 | ||||
| 	if (wsrep_thd_query_state(thd) == QUERY_EXITING) { | ||||
| 		WSREP_DEBUG("kill trx EXITING for " TRX_ID_FMT, | ||||
| 			    victim_trx->id); | ||||
| 		wsrep_thd_UNLOCK(thd); | ||||
| 		DBUG_VOID_RETURN; | ||||
| 		goto ret_unlock; | ||||
| 	} | ||||
| 
 | ||||
| 	if (wsrep_thd_exec_mode(thd) != LOCAL_STATE) { | ||||
| @@ -19571,18 +19568,13 @@ wsrep_innobase_kill_one_trx( | ||||
|         case MUST_ABORT: | ||||
| 		WSREP_DEBUG("victim " TRX_ID_FMT " in MUST ABORT state", | ||||
| 			    victim_trx->id); | ||||
| 		wsrep_thd_UNLOCK(thd); | ||||
| 		wsrep_thd_awake(thd, signal); | ||||
| 		DBUG_VOID_RETURN; | ||||
| 		break; | ||||
| 		goto ret_awake; | ||||
| 	case ABORTED: | ||||
| 	case ABORTING: // fall through
 | ||||
| 	default: | ||||
| 		WSREP_DEBUG("victim " TRX_ID_FMT " in state %d", | ||||
| 			    victim_trx->id, wsrep_thd_get_conflict_state(thd)); | ||||
| 		wsrep_thd_UNLOCK(thd); | ||||
| 		DBUG_VOID_RETURN; | ||||
| 		break; | ||||
| 		goto ret_unlock; | ||||
| 	} | ||||
| 
 | ||||
| 	switch (wsrep_thd_query_state(thd)) { | ||||
| @@ -19595,12 +19587,12 @@ wsrep_innobase_kill_one_trx( | ||||
| 			    victim_trx->id); | ||||
| 
 | ||||
| 		if (wsrep_thd_exec_mode(thd) == REPL_RECV) { | ||||
| 			wsrep_abort_slave_trx(bf_seqno, | ||||
| 			wsrep_abort_slave_trx(arg->bf_seqno, | ||||
| 					      wsrep_thd_trx_seqno(thd)); | ||||
| 		} else { | ||||
| 			wsrep_t *wsrep= get_wsrep(); | ||||
| 			rcode = wsrep->abort_pre_commit( | ||||
| 				wsrep, bf_seqno, | ||||
| 				wsrep, arg->bf_seqno, | ||||
| 				(wsrep_trx_id_t)wsrep_thd_ws_handle(thd)->trx_id | ||||
| 			); | ||||
| 
 | ||||
| @@ -19609,10 +19601,7 @@ wsrep_innobase_kill_one_trx( | ||||
| 				WSREP_DEBUG("cancel commit warning: " | ||||
| 					    TRX_ID_FMT, | ||||
| 					    victim_trx->id); | ||||
| 				wsrep_thd_UNLOCK(thd); | ||||
| 				wsrep_thd_awake(thd, signal); | ||||
| 				DBUG_VOID_RETURN; | ||||
| 				break; | ||||
| 				goto ret_awake; | ||||
| 			case WSREP_OK: | ||||
| 				break; | ||||
| 			default: | ||||
| @@ -19625,12 +19614,9 @@ wsrep_innobase_kill_one_trx( | ||||
| 				 * kill the lock holder first. | ||||
| 				 */ | ||||
| 				abort(); | ||||
| 				break; | ||||
| 			} | ||||
| 		} | ||||
| 		wsrep_thd_UNLOCK(thd); | ||||
| 		wsrep_thd_awake(thd, signal); | ||||
| 		break; | ||||
| 		goto ret_awake; | ||||
| 	case QUERY_EXEC: | ||||
| 		/* it is possible that victim trx is itself waiting for some
 | ||||
| 		 * other lock. We need to cancel this waiting | ||||
| @@ -19651,26 +19637,20 @@ wsrep_innobase_kill_one_trx( | ||||
| 				lock_cancel_waiting_and_release(wait_lock); | ||||
| 			} | ||||
| 
 | ||||
| 			wsrep_thd_UNLOCK(thd); | ||||
| 			wsrep_thd_awake(thd, signal); | ||||
| 		} else { | ||||
| 			/* abort currently executing query */ | ||||
| 			DBUG_PRINT("wsrep",("sending KILL_QUERY to: %lu", | ||||
|                                             thd_get_thread_id(thd))); | ||||
| 			WSREP_DEBUG("kill query for: %ld", | ||||
| 				thd_get_thread_id(thd)); | ||||
| 			/* Note that innobase_kill_query will take lock_mutex
 | ||||
| 			and trx_mutex */ | ||||
| 			wsrep_thd_UNLOCK(thd); | ||||
| 			wsrep_thd_awake(thd, signal); | ||||
| 
 | ||||
| 			/* for BF thd, we need to prevent him from committing */ | ||||
| 			if (wsrep_thd_exec_mode(thd) == REPL_RECV) { | ||||
| 				wsrep_abort_slave_trx(bf_seqno, | ||||
| 				wsrep_abort_slave_trx(arg->bf_seqno, | ||||
| 						    wsrep_thd_trx_seqno(thd)); | ||||
| 			} | ||||
| 		} | ||||
| 		break; | ||||
| 		goto ret_awake; | ||||
| 	case QUERY_IDLE: | ||||
| 	{ | ||||
| 		WSREP_DEBUG("kill IDLE for " TRX_ID_FMT, victim_trx->id); | ||||
| @@ -19678,10 +19658,9 @@ wsrep_innobase_kill_one_trx( | ||||
| 		if (wsrep_thd_exec_mode(thd) == REPL_RECV) { | ||||
| 			WSREP_DEBUG("kill BF IDLE, seqno: %lld", | ||||
| 				    (long long)wsrep_thd_trx_seqno(thd)); | ||||
| 			wsrep_thd_UNLOCK(thd); | ||||
| 			wsrep_abort_slave_trx(bf_seqno, | ||||
| 			wsrep_abort_slave_trx(arg->bf_seqno, | ||||
| 					      wsrep_thd_trx_seqno(thd)); | ||||
| 			DBUG_VOID_RETURN; | ||||
| 			goto ret_unlock; | ||||
| 		} | ||||
|                 /* This will lock thd from proceeding after net_read() */ | ||||
| 		wsrep_thd_set_conflict_state(thd, ABORTING); | ||||
| @@ -19702,17 +19681,67 @@ wsrep_innobase_kill_one_trx( | ||||
| 		DBUG_PRINT("wsrep",("signalling wsrep rollbacker")); | ||||
| 		WSREP_DEBUG("signaling aborter"); | ||||
| 		wsrep_unlock_rollback(); | ||||
| 		wsrep_thd_UNLOCK(thd); | ||||
| 
 | ||||
| 		break; | ||||
| 		goto ret_unlock; | ||||
| 	} | ||||
| 	default: | ||||
| 		WSREP_WARN("bad wsrep query state: %d", | ||||
| 			  wsrep_thd_query_state(thd)); | ||||
| 		wsrep_thd_UNLOCK(thd); | ||||
| 		break; | ||||
| 		goto ret_unlock; | ||||
| 	} | ||||
| 
 | ||||
| ret_awake: | ||||
| 	awake = true; | ||||
| 
 | ||||
| ret_unlock: | ||||
| 	trx_mutex_exit(victim_trx); | ||||
| 	lock_mutex_exit(); | ||||
| 	if (awake) | ||||
| 		wsrep_thd_awake(thd, arg->signal); | ||||
| 	wsrep_thd_UNLOCK(thd); | ||||
| 
 | ||||
| ret: | ||||
| 	free(arg); | ||||
| 	DBUG_VOID_RETURN; | ||||
| 
 | ||||
| } | ||||
| 
 | ||||
| /*******************************************************************//**
 | ||||
| This function is used to kill one transaction in BF. */ | ||||
| UNIV_INTERN | ||||
| void | ||||
| wsrep_innobase_kill_one_trx( | ||||
| /*========================*/ | ||||
| 	MYSQL_THD const bf_thd, | ||||
| 	const trx_t * const bf_trx, | ||||
| 	trx_t *victim_trx, | ||||
| 	ibool signal) | ||||
| { | ||||
| 	ut_ad(bf_thd); | ||||
| 	ut_ad(victim_trx); | ||||
| 	ut_ad(lock_mutex_own()); | ||||
| 	ut_ad(trx_mutex_own(victim_trx)); | ||||
| 
 | ||||
| 	bg_wsrep_kill_trx_arg *arg = (bg_wsrep_kill_trx_arg*)malloc(sizeof(*arg)); | ||||
| 	arg->thd_id	= thd_get_thread_id(victim_trx->mysql_thd); | ||||
| 	arg->trx_id	= victim_trx->id; | ||||
| 	arg->bf_seqno	= wsrep_thd_trx_seqno((THD*)bf_thd); | ||||
| 	arg->signal	= signal; | ||||
| 
 | ||||
| 	DBUG_ENTER("wsrep_innobase_kill_one_trx"); | ||||
| 
 | ||||
| 	WSREP_LOG_CONFLICT(bf_thd, victim_trx->mysql_thd, TRUE); | ||||
| 
 | ||||
| 	DBUG_EXECUTE_IF("sync.wsrep_after_BF_victim_lock", | ||||
| 		{ | ||||
| 		  const char act[]= | ||||
| 		    "now " | ||||
| 		    "wait_for signal.wsrep_after_BF_victim_lock"; | ||||
| 		  DBUG_ASSERT(!debug_sync_set_action(bf_thd, | ||||
| 						     STRING_WITH_LEN(act))); | ||||
| 		};); | ||||
| 
 | ||||
| 
 | ||||
| 	mysql_manager_submit(bg_wsrep_kill_trx, arg); | ||||
| 	DBUG_VOID_RETURN; | ||||
| } | ||||
| 
 | ||||
|   | ||||
		Reference in New Issue
	
	Block a user