diff --git a/mysql-test/suite/rpl/r/rpl_start_slave_deadlock_sys_vars.result b/mysql-test/suite/rpl/r/rpl_start_slave_deadlock_sys_vars.result new file mode 100644 index 00000000000..f69a323f980 --- /dev/null +++ b/mysql-test/suite/rpl/r/rpl_start_slave_deadlock_sys_vars.result @@ -0,0 +1,31 @@ +include/master-slave.inc +[connection master] +# connection: slave +SET @save_slave_net_timeout = @@GLOBAL.slave_net_timeout; +STOP SLAVE; +include/wait_for_slave_to_stop.inc +# open an extra connection to the slave +# connection: slave2 +# set debug synchronization point +SET DEBUG_SYNC='fix_slave_net_timeout SIGNAL parked WAIT_FOR go'; +# attempt to set slave_net_timeout, will wait on sync point +SET @@GLOBAL.slave_net_timeout = 100; +# connection: slave +SET DEBUG_SYNC='now WAIT_FOR parked'; +# connection: slave1 +# attempt to start the SQL thread +START SLAVE SQL_THREAD; +# connection: slave +# wait until SQL thread has been started +# sleep a bit so that the SQL thread THD handle is initialized +# signal the set slave_net_timeout to continue +SET DEBUG_SYNC='now SIGNAL go'; +# connection: slave2 +# reap result of set slave_net_timeout +# connection: slave1 +# reap result of starting the SQL thread +# disconnect: slave2 +# connection: slave +# cleanup +SET @@GLOBAL.slave_net_timeout = @save_slave_net_timeout; +include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_start_slave_deadlock_sys_vars.test b/mysql-test/suite/rpl/t/rpl_start_slave_deadlock_sys_vars.test new file mode 100644 index 00000000000..dfcec6758b3 --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_start_slave_deadlock_sys_vars.test @@ -0,0 +1,57 @@ +source include/master-slave.inc; +source include/have_debug_sync.inc; + +--echo # connection: slave +connection slave; +SET @save_slave_net_timeout = @@GLOBAL.slave_net_timeout; +STOP SLAVE; +source include/wait_for_slave_to_stop.inc; + +--echo # open an extra connection to the slave +connect(slave2,127.0.0.1,root,,test,$SLAVE_MYPORT,); +--echo # connection: slave2 +--echo # set debug synchronization point +SET DEBUG_SYNC='fix_slave_net_timeout SIGNAL parked WAIT_FOR go'; +--echo # attempt to set slave_net_timeout, will wait on sync point +--send SET @@GLOBAL.slave_net_timeout = 100 + +--echo # connection: slave +connection slave; +SET DEBUG_SYNC='now WAIT_FOR parked'; + +--echo # connection: slave1 +connection slave1; +--echo # attempt to start the SQL thread +--send START SLAVE SQL_THREAD + +--echo # connection: slave +connection slave; +--echo # wait until SQL thread has been started +let $wait_condition= + select count(*) = 1 from information_schema.processlist + where state = "Waiting for slave thread to start" and info = "START SLAVE SQL_THREAD"; +--source include/wait_condition.inc +--echo # sleep a bit so that the SQL thread THD handle is initialized +sleep 2; +--echo # signal the set slave_net_timeout to continue +SET DEBUG_SYNC='now SIGNAL go'; + +--echo # connection: slave2 +connection slave2; +--echo # reap result of set slave_net_timeout +--reap + +--echo # connection: slave1 +connection slave1; +--echo # reap result of starting the SQL thread +--reap + +--echo # disconnect: slave2 +disconnect slave2; + +--echo # connection: slave +connection slave; +--echo # cleanup +SET @@GLOBAL.slave_net_timeout = @save_slave_net_timeout; + +source include/rpl_end.inc; diff --git a/sql/slave.cc b/sql/slave.cc index 9149cd82d5e..7b7fd12e267 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -3005,6 +3005,8 @@ pthread_handler_t handle_slave_io(void *arg) mysql= NULL ; retry_count= 0; + thd= new THD; // note that contructor of THD uses DBUG_ ! + mysql_mutex_lock(&mi->run_lock); /* Inform waiting threads that slave has started */ mi->slave_run_id++; @@ -3013,7 +3015,6 @@ pthread_handler_t handle_slave_io(void *arg) mi->events_till_disconnect = disconnect_slave_event_count; #endif - thd= new THD; // note that contructor of THD uses DBUG_ ! THD_CHECK_SENTRY(thd); mi->io_thd = thd; diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index 6cc9c9577ee..2fdba7fda82 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -47,6 +47,7 @@ #include "sql_base.h" // close_cached_tables #include #include "log_slow.h" +#include "debug_sync.h" // DEBUG_SYNC #include "log_event.h" #ifdef WITH_PERFSCHEMA_STORAGE_ENGINE @@ -3256,6 +3257,13 @@ bool Sys_var_rpl_filter::do_check(THD *thd, set_var *var) { bool status; + /* + We must not be holding LOCK_global_system_variables here, otherwise we can + deadlock with THD::init() which is invoked from within the slave threads + with opposite locking order. + */ + mysql_mutex_assert_not_owner(&LOCK_global_system_variables); + mysql_mutex_lock(&LOCK_active_mi); mysql_mutex_lock(&active_mi->rli.run_lock); @@ -3272,22 +3280,43 @@ bool Sys_var_rpl_filter::do_check(THD *thd, set_var *var) return status; } +void Sys_var_rpl_filter::lock(void) +{ + /* + Starting a slave thread causes the new thread to attempt to + acquire LOCK_global_system_variables (in THD::init) while + LOCK_active_mi is being held by the thread that initiated + the process. In order to not violate the lock order, unlock + LOCK_global_system_variables before grabbing LOCK_active_mi. + */ + mysql_mutex_unlock(&LOCK_global_system_variables); + + mysql_mutex_lock(&LOCK_active_mi); + mysql_mutex_lock(&active_mi->rli.run_lock); +} + +void Sys_var_rpl_filter::unlock(void) +{ + mysql_mutex_unlock(&active_mi->rli.run_lock); + mysql_mutex_unlock(&LOCK_active_mi); + + mysql_mutex_lock(&LOCK_global_system_variables); +} + bool Sys_var_rpl_filter::global_update(THD *thd, set_var *var) { bool slave_running, status= false; - mysql_mutex_lock(&LOCK_active_mi); - mysql_mutex_lock(&active_mi->rli.run_lock); + lock(); if (! (slave_running= active_mi->rli.slave_running)) status= set_filter_value(var->save_result.string_value.str); - mysql_mutex_unlock(&active_mi->rli.run_lock); - mysql_mutex_unlock(&LOCK_active_mi); - if (slave_running) my_error(ER_SLAVE_MUST_STOP, MYF(0)); + unlock(); + return slave_running || status; } @@ -3326,8 +3355,7 @@ uchar *Sys_var_rpl_filter::global_value_ptr(THD *thd, LEX_STRING *base) tmp.length(0); - mysql_mutex_lock(&LOCK_active_mi); - mysql_mutex_lock(&active_mi->rli.run_lock); + lock(); switch (opt_id) { case OPT_REPLICATE_DO_DB: @@ -3350,8 +3378,7 @@ uchar *Sys_var_rpl_filter::global_value_ptr(THD *thd, LEX_STRING *base) break; } - mysql_mutex_unlock(&active_mi->rli.run_lock); - mysql_mutex_unlock(&LOCK_active_mi); + unlock(); return (uchar *) thd->strmake(tmp.ptr(), tmp.length()); } @@ -3404,6 +3431,9 @@ static Sys_var_charptr Sys_slave_load_tmpdir( static bool fix_slave_net_timeout(sys_var *self, THD *thd, enum_var_type type) { + DEBUG_SYNC(thd, "fix_slave_net_timeout"); + + mysql_mutex_unlock(&LOCK_global_system_variables); mysql_mutex_lock(&LOCK_active_mi); DBUG_PRINT("info", ("slave_net_timeout=%u mi->heartbeat_period=%.3f", slave_net_timeout, @@ -3413,6 +3443,7 @@ static bool fix_slave_net_timeout(sys_var *self, THD *thd, enum_var_type type) ER_SLAVE_HEARTBEAT_VALUE_OUT_OF_RANGE_MAX, ER(ER_SLAVE_HEARTBEAT_VALUE_OUT_OF_RANGE_MAX)); mysql_mutex_unlock(&LOCK_active_mi); + mysql_mutex_lock(&LOCK_global_system_variables); return false; } static Sys_var_uint Sys_slave_net_timeout( @@ -3439,6 +3470,7 @@ static bool check_slave_skip_counter(sys_var *self, THD *thd, set_var *var) } static bool fix_slave_skip_counter(sys_var *self, THD *thd, enum_var_type type) { + mysql_mutex_unlock(&LOCK_global_system_variables); mysql_mutex_lock(&LOCK_active_mi); mysql_mutex_lock(&active_mi->rli.run_lock); /* @@ -3454,6 +3486,7 @@ static bool fix_slave_skip_counter(sys_var *self, THD *thd, enum_var_type type) } mysql_mutex_unlock(&active_mi->rli.run_lock); mysql_mutex_unlock(&LOCK_active_mi); + mysql_mutex_lock(&LOCK_global_system_variables); return 0; } static Sys_var_uint Sys_slave_skip_counter( diff --git a/sql/sys_vars.h b/sql/sys_vars.h index 4ea6b1f036e..5bec56a78cc 100644 --- a/sql/sys_vars.h +++ b/sql/sys_vars.h @@ -588,6 +588,8 @@ public: protected: uchar *global_value_ptr(THD *thd, LEX_STRING *base); bool set_filter_value(const char *value); + void lock(void); + void unlock(void); }; /**