mirror of
https://github.com/MariaDB/server.git
synced 2025-07-30 16:24:05 +03:00
MDEV-23328 Server hang due to Galera lock conflict resolution
Mutex order violation when wsrep bf thread kills a conflicting trx, the stack is wsrep_thd_LOCK() wsrep_kill_victim() lock_rec_other_has_conflicting() lock_clust_rec_read_check_and_lock() row_search_mvcc() ha_innobase::index_read() ha_innobase::rnd_pos() handler::ha_rnd_pos() handler::rnd_pos_by_record() handler::ha_rnd_pos_by_record() Rows_log_event::find_row() Update_rows_log_event::do_exec_row() Rows_log_event::do_apply_event() Log_event::apply_event() wsrep_apply_events() and mutexes are taken in the order lock_sys->mutex -> victim_trx->mutex -> victim_thread->LOCK_thd_data When a normal KILL statement is executed, the stack is innobase_kill_query() kill_handlerton() plugin_foreach_with_mask() ha_kill_query() THD::awake() kill_one_thread() and mutexes are victim_thread->LOCK_thd_data -> lock_sys->mutex -> victim_trx->mutex This patch is the plan D variant for fixing potetial mutex locking order exercised by BF aborting and KILL command execution. In this approach, KILL command is replicated as TOI operation. This guarantees total isolation for the KILL command execution in the first node: there is no concurrent replication applying and no concurrent DDL executing. Therefore there is no risk of BF aborting to happen in parallel with KILL command execution either. Potential mutex deadlocks between the different mutex access paths with KILL command execution and BF aborting cannot therefore happen. TOI replication is used, in this approach, purely as means to provide isolated KILL command execution in the first node. KILL command should not (and must not) be applied in secondary nodes. In this patch, we make this sure by skipping KILL execution in secondary nodes, in applying phase, where we bail out if applier thread is trying to execute KILL command. This is effective, but skipping the applying of KILL command could happen much earlier as well. This also fixed unprotected calls to wsrep_thd_abort that will use wsrep_abort_transaction. This is fixed by holding THD::LOCK_thd_data while we abort transaction. Reviewed-by: Jan Lindström <jan.lindstrom@mariadb.com>
This commit is contained in:
@ -1,4 +1,4 @@
|
||||
/* Copyright 2008-2015 Codership Oy <http://www.codership.com>
|
||||
/* Copyright 2008-2021 Codership Oy <http://www.codership.com>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@ -835,13 +835,25 @@ void wsrep_thr_init()
|
||||
DBUG_VOID_RETURN;
|
||||
}
|
||||
|
||||
/* This is wrapper for wsrep_break_lock in thr_lock.c */
|
||||
static int wsrep_thr_abort_thd(void *bf_thd_ptr, void *victim_thd_ptr, my_bool signal)
|
||||
{
|
||||
THD* victim_thd= (THD *) victim_thd_ptr;
|
||||
/* We need to lock THD::LOCK_thd_data to protect victim
|
||||
from concurrent usage or disconnect or delete. */
|
||||
mysql_mutex_lock(&victim_thd->LOCK_thd_data);
|
||||
int res= wsrep_abort_thd(bf_thd_ptr, victim_thd_ptr, signal);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
void wsrep_init_startup (bool first)
|
||||
{
|
||||
if (wsrep_init()) unireg_abort(1);
|
||||
|
||||
wsrep_thr_lock_init(
|
||||
(wsrep_thd_is_brute_force_fun)wsrep_thd_is_BF,
|
||||
(wsrep_abort_thd_fun)wsrep_abort_thd,
|
||||
(wsrep_abort_thd_fun)wsrep_thr_abort_thd,
|
||||
wsrep_debug, wsrep_convert_LOCK_to_trx,
|
||||
(wsrep_on_fun)wsrep_on);
|
||||
|
||||
@ -1694,6 +1706,11 @@ static int wsrep_TOI_begin(THD *thd, char *db_, char *table_,
|
||||
case SQLCOM_DROP_TABLE:
|
||||
buf_err= wsrep_drop_table_query(thd, &buf, &buf_len);
|
||||
break;
|
||||
case SQLCOM_KILL:
|
||||
WSREP_DEBUG("KILL as TOI: %s", thd->query());
|
||||
buf_err= wsrep_to_buf_helper(thd, thd->query(), thd->query_length(),
|
||||
&buf, &buf_len);
|
||||
break;
|
||||
case SQLCOM_CREATE_ROLE:
|
||||
if (sp_process_definer(thd))
|
||||
{
|
||||
@ -2058,8 +2075,13 @@ bool wsrep_grant_mdl_exception(MDL_context *requestor_ctx,
|
||||
ticket->wsrep_report(true);
|
||||
}
|
||||
|
||||
mysql_mutex_unlock(&granted_thd->LOCK_thd_data);
|
||||
wsrep_abort_thd((void *) request_thd, (void *) granted_thd, 1);
|
||||
/* This will call wsrep_abort_transaction so we should hold
|
||||
THD::LOCK_thd_data to protect victim from concurrent usage
|
||||
or disconnect or delete. */
|
||||
if (request_thd->wsrep_exec_mode == REPL_RECV)
|
||||
DEBUG_SYNC(request_thd, "wsrep_after_granted_lock");
|
||||
|
||||
wsrep_abort_thd((void *) request_thd, (void *) granted_thd, true);
|
||||
ret= false;
|
||||
}
|
||||
}
|
||||
@ -2241,6 +2263,7 @@ error:
|
||||
static bool abort_replicated(THD *thd)
|
||||
{
|
||||
bool ret_code= false;
|
||||
mysql_mutex_lock(&thd->LOCK_thd_data);
|
||||
if (thd->wsrep_query_state== QUERY_COMMITTING)
|
||||
{
|
||||
WSREP_DEBUG("aborting replicated trx: %llu", (ulonglong)(thd->real_id));
|
||||
@ -2248,6 +2271,8 @@ static bool abort_replicated(THD *thd)
|
||||
(void)wsrep_abort_thd(thd, thd, TRUE);
|
||||
ret_code= true;
|
||||
}
|
||||
else
|
||||
mysql_mutex_unlock(&thd->LOCK_thd_data);
|
||||
return ret_code;
|
||||
}
|
||||
|
||||
@ -2294,6 +2319,8 @@ static bool have_client_connections()
|
||||
(longlong) tmp->thread_id));
|
||||
if (is_client_connection(tmp) && tmp->killed == KILL_CONNECTION)
|
||||
{
|
||||
WSREP_DEBUG("Informing thread %lld that it's time to die",
|
||||
(longlong)tmp->thread_id);
|
||||
(void)abort_replicated(tmp);
|
||||
return true;
|
||||
}
|
||||
@ -2378,6 +2405,8 @@ void wsrep_close_client_connections(my_bool wait_to_end, THD *except_caller_thd)
|
||||
{
|
||||
DBUG_PRINT("quit",("Informing thread %lld that it's time to die",
|
||||
(longlong) tmp->thread_id));
|
||||
WSREP_DEBUG("Informing thread %lld that it's time to die",
|
||||
(longlong)tmp->thread_id);
|
||||
/* We skip slave threads & scheduler on this first loop through. */
|
||||
if (!is_client_connection(tmp))
|
||||
continue;
|
||||
@ -2394,15 +2423,18 @@ void wsrep_close_client_connections(my_bool wait_to_end, THD *except_caller_thd)
|
||||
continue;
|
||||
}
|
||||
|
||||
/* replicated transactions must be skipped */
|
||||
/* replicated transactions must be skipped and aborted
|
||||
with wsrep_abort_thd. */
|
||||
if (abort_replicated(tmp))
|
||||
continue;
|
||||
|
||||
WSREP_DEBUG("closing connection %lld", (longlong) tmp->thread_id);
|
||||
|
||||
/*
|
||||
instead of wsrep_close_thread() we do now soft kill by THD::awake
|
||||
*/
|
||||
instead of wsrep_close_thread() we do now soft kill by
|
||||
THD::awake(). Here also victim needs to be protected from
|
||||
concurrent usage or disconnect or delete.
|
||||
*/
|
||||
mysql_mutex_lock(&tmp->LOCK_thd_data);
|
||||
|
||||
tmp->awake(KILL_CONNECTION);
|
||||
@ -2423,7 +2455,6 @@ void wsrep_close_client_connections(my_bool wait_to_end, THD *except_caller_thd)
|
||||
I_List_iterator<THD> it2(threads);
|
||||
while ((tmp=it2++))
|
||||
{
|
||||
#ifndef __bsdi__ // Bug in BSDI kernel
|
||||
if (is_client_connection(tmp) &&
|
||||
!abort_replicated(tmp) &&
|
||||
!is_replaying_connection(tmp) &&
|
||||
@ -2432,7 +2463,6 @@ void wsrep_close_client_connections(my_bool wait_to_end, THD *except_caller_thd)
|
||||
WSREP_INFO("killing local connection: %lld", (longlong) tmp->thread_id);
|
||||
close_connection(tmp,0);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
DBUG_PRINT("quit",("Waiting for threads to die (count=%u)",thread_count));
|
||||
@ -2621,7 +2651,8 @@ extern "C" void wsrep_thd_set_query_state(
|
||||
|
||||
void wsrep_thd_set_conflict_state(THD *thd, enum wsrep_conflict_state state)
|
||||
{
|
||||
if (WSREP(thd)) thd->wsrep_conflict_state= state;
|
||||
mysql_mutex_assert_owner(&thd->LOCK_thd_data);
|
||||
thd->wsrep_conflict_state= state;
|
||||
}
|
||||
|
||||
|
||||
@ -2762,9 +2793,10 @@ extern "C" void wsrep_thd_awake(THD *thd, my_bool signal)
|
||||
{
|
||||
if (signal)
|
||||
{
|
||||
mysql_mutex_lock(&thd->LOCK_thd_data);
|
||||
/* Here we should hold THD::LOCK_thd_data to
|
||||
protect from concurrent usage. */
|
||||
mysql_mutex_assert_owner(&thd->LOCK_thd_data);
|
||||
thd->awake(KILL_QUERY);
|
||||
mysql_mutex_unlock(&thd->LOCK_thd_data);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
Reference in New Issue
Block a user