mirror of
https://github.com/MariaDB/server.git
synced 2025-08-01 03:47:19 +03:00
MDEV-450: Deadlock between starting a slave and reading system variables
Starting the SQL thread might deadlock with reading the values of the replication filtering options. The deadlock is due to a lock order violation when the variables are read or set. For example, reading replicate_ignore_table first acquires LOCK_global_system_variables in sys_var::value_ptr and later acquires LOCK_active_mi in Sys_var_rpl_filter::global_value_ptr. This violates the order established when starting a SQL thread, where LOCK_active_mi is acquired before start_slave, and ends up creating a thread (handle_slave_sql) that allocates a THD handle whose constructor acquires LOCK_global_system_variables in THD::init. The solution is to unlock LOCK_global_system_variables before the replication filtering options are set or read. This way the lock order is preserved and the data being read/set is still protected given that it acquires LOCK_active_mi.
This commit is contained in:
@ -0,0 +1,31 @@
|
|||||||
|
include/master-slave.inc
|
||||||
|
[connection master]
|
||||||
|
# connection: slave
|
||||||
|
SET @save_slave_net_timeout = @@GLOBAL.slave_net_timeout;
|
||||||
|
STOP SLAVE;
|
||||||
|
include/wait_for_slave_to_stop.inc
|
||||||
|
# open an extra connection to the slave
|
||||||
|
# connection: slave2
|
||||||
|
# set debug synchronization point
|
||||||
|
SET DEBUG_SYNC='fix_slave_net_timeout SIGNAL parked WAIT_FOR go';
|
||||||
|
# attempt to set slave_net_timeout, will wait on sync point
|
||||||
|
SET @@GLOBAL.slave_net_timeout = 100;
|
||||||
|
# connection: slave
|
||||||
|
SET DEBUG_SYNC='now WAIT_FOR parked';
|
||||||
|
# connection: slave1
|
||||||
|
# attempt to start the SQL thread
|
||||||
|
START SLAVE SQL_THREAD;
|
||||||
|
# connection: slave
|
||||||
|
# wait until SQL thread has been started
|
||||||
|
# sleep a bit so that the SQL thread THD handle is initialized
|
||||||
|
# signal the set slave_net_timeout to continue
|
||||||
|
SET DEBUG_SYNC='now SIGNAL go';
|
||||||
|
# connection: slave2
|
||||||
|
# reap result of set slave_net_timeout
|
||||||
|
# connection: slave1
|
||||||
|
# reap result of starting the SQL thread
|
||||||
|
# disconnect: slave2
|
||||||
|
# connection: slave
|
||||||
|
# cleanup
|
||||||
|
SET @@GLOBAL.slave_net_timeout = @save_slave_net_timeout;
|
||||||
|
include/rpl_end.inc
|
@ -0,0 +1,57 @@
|
|||||||
|
source include/master-slave.inc;
|
||||||
|
source include/have_debug_sync.inc;
|
||||||
|
|
||||||
|
--echo # connection: slave
|
||||||
|
connection slave;
|
||||||
|
SET @save_slave_net_timeout = @@GLOBAL.slave_net_timeout;
|
||||||
|
STOP SLAVE;
|
||||||
|
source include/wait_for_slave_to_stop.inc;
|
||||||
|
|
||||||
|
--echo # open an extra connection to the slave
|
||||||
|
connect(slave2,127.0.0.1,root,,test,$SLAVE_MYPORT,);
|
||||||
|
--echo # connection: slave2
|
||||||
|
--echo # set debug synchronization point
|
||||||
|
SET DEBUG_SYNC='fix_slave_net_timeout SIGNAL parked WAIT_FOR go';
|
||||||
|
--echo # attempt to set slave_net_timeout, will wait on sync point
|
||||||
|
--send SET @@GLOBAL.slave_net_timeout = 100
|
||||||
|
|
||||||
|
--echo # connection: slave
|
||||||
|
connection slave;
|
||||||
|
SET DEBUG_SYNC='now WAIT_FOR parked';
|
||||||
|
|
||||||
|
--echo # connection: slave1
|
||||||
|
connection slave1;
|
||||||
|
--echo # attempt to start the SQL thread
|
||||||
|
--send START SLAVE SQL_THREAD
|
||||||
|
|
||||||
|
--echo # connection: slave
|
||||||
|
connection slave;
|
||||||
|
--echo # wait until SQL thread has been started
|
||||||
|
let $wait_condition=
|
||||||
|
select count(*) = 1 from information_schema.processlist
|
||||||
|
where state = "Waiting for slave thread to start" and info = "START SLAVE SQL_THREAD";
|
||||||
|
--source include/wait_condition.inc
|
||||||
|
--echo # sleep a bit so that the SQL thread THD handle is initialized
|
||||||
|
sleep 2;
|
||||||
|
--echo # signal the set slave_net_timeout to continue
|
||||||
|
SET DEBUG_SYNC='now SIGNAL go';
|
||||||
|
|
||||||
|
--echo # connection: slave2
|
||||||
|
connection slave2;
|
||||||
|
--echo # reap result of set slave_net_timeout
|
||||||
|
--reap
|
||||||
|
|
||||||
|
--echo # connection: slave1
|
||||||
|
connection slave1;
|
||||||
|
--echo # reap result of starting the SQL thread
|
||||||
|
--reap
|
||||||
|
|
||||||
|
--echo # disconnect: slave2
|
||||||
|
disconnect slave2;
|
||||||
|
|
||||||
|
--echo # connection: slave
|
||||||
|
connection slave;
|
||||||
|
--echo # cleanup
|
||||||
|
SET @@GLOBAL.slave_net_timeout = @save_slave_net_timeout;
|
||||||
|
|
||||||
|
source include/rpl_end.inc;
|
@ -3005,6 +3005,8 @@ pthread_handler_t handle_slave_io(void *arg)
|
|||||||
mysql= NULL ;
|
mysql= NULL ;
|
||||||
retry_count= 0;
|
retry_count= 0;
|
||||||
|
|
||||||
|
thd= new THD; // note that contructor of THD uses DBUG_ !
|
||||||
|
|
||||||
mysql_mutex_lock(&mi->run_lock);
|
mysql_mutex_lock(&mi->run_lock);
|
||||||
/* Inform waiting threads that slave has started */
|
/* Inform waiting threads that slave has started */
|
||||||
mi->slave_run_id++;
|
mi->slave_run_id++;
|
||||||
@ -3013,7 +3015,6 @@ pthread_handler_t handle_slave_io(void *arg)
|
|||||||
mi->events_till_disconnect = disconnect_slave_event_count;
|
mi->events_till_disconnect = disconnect_slave_event_count;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
thd= new THD; // note that contructor of THD uses DBUG_ !
|
|
||||||
THD_CHECK_SENTRY(thd);
|
THD_CHECK_SENTRY(thd);
|
||||||
mi->io_thd = thd;
|
mi->io_thd = thd;
|
||||||
|
|
||||||
|
@ -47,6 +47,7 @@
|
|||||||
#include "sql_base.h" // close_cached_tables
|
#include "sql_base.h" // close_cached_tables
|
||||||
#include <myisam.h>
|
#include <myisam.h>
|
||||||
#include "log_slow.h"
|
#include "log_slow.h"
|
||||||
|
#include "debug_sync.h" // DEBUG_SYNC
|
||||||
|
|
||||||
#include "log_event.h"
|
#include "log_event.h"
|
||||||
#ifdef WITH_PERFSCHEMA_STORAGE_ENGINE
|
#ifdef WITH_PERFSCHEMA_STORAGE_ENGINE
|
||||||
@ -3256,6 +3257,13 @@ bool Sys_var_rpl_filter::do_check(THD *thd, set_var *var)
|
|||||||
{
|
{
|
||||||
bool status;
|
bool status;
|
||||||
|
|
||||||
|
/*
|
||||||
|
We must not be holding LOCK_global_system_variables here, otherwise we can
|
||||||
|
deadlock with THD::init() which is invoked from within the slave threads
|
||||||
|
with opposite locking order.
|
||||||
|
*/
|
||||||
|
mysql_mutex_assert_not_owner(&LOCK_global_system_variables);
|
||||||
|
|
||||||
mysql_mutex_lock(&LOCK_active_mi);
|
mysql_mutex_lock(&LOCK_active_mi);
|
||||||
mysql_mutex_lock(&active_mi->rli.run_lock);
|
mysql_mutex_lock(&active_mi->rli.run_lock);
|
||||||
|
|
||||||
@ -3272,22 +3280,43 @@ bool Sys_var_rpl_filter::do_check(THD *thd, set_var *var)
|
|||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Sys_var_rpl_filter::lock(void)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
Starting a slave thread causes the new thread to attempt to
|
||||||
|
acquire LOCK_global_system_variables (in THD::init) while
|
||||||
|
LOCK_active_mi is being held by the thread that initiated
|
||||||
|
the process. In order to not violate the lock order, unlock
|
||||||
|
LOCK_global_system_variables before grabbing LOCK_active_mi.
|
||||||
|
*/
|
||||||
|
mysql_mutex_unlock(&LOCK_global_system_variables);
|
||||||
|
|
||||||
|
mysql_mutex_lock(&LOCK_active_mi);
|
||||||
|
mysql_mutex_lock(&active_mi->rli.run_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Sys_var_rpl_filter::unlock(void)
|
||||||
|
{
|
||||||
|
mysql_mutex_unlock(&active_mi->rli.run_lock);
|
||||||
|
mysql_mutex_unlock(&LOCK_active_mi);
|
||||||
|
|
||||||
|
mysql_mutex_lock(&LOCK_global_system_variables);
|
||||||
|
}
|
||||||
|
|
||||||
bool Sys_var_rpl_filter::global_update(THD *thd, set_var *var)
|
bool Sys_var_rpl_filter::global_update(THD *thd, set_var *var)
|
||||||
{
|
{
|
||||||
bool slave_running, status= false;
|
bool slave_running, status= false;
|
||||||
|
|
||||||
mysql_mutex_lock(&LOCK_active_mi);
|
lock();
|
||||||
mysql_mutex_lock(&active_mi->rli.run_lock);
|
|
||||||
|
|
||||||
if (! (slave_running= active_mi->rli.slave_running))
|
if (! (slave_running= active_mi->rli.slave_running))
|
||||||
status= set_filter_value(var->save_result.string_value.str);
|
status= set_filter_value(var->save_result.string_value.str);
|
||||||
|
|
||||||
mysql_mutex_unlock(&active_mi->rli.run_lock);
|
|
||||||
mysql_mutex_unlock(&LOCK_active_mi);
|
|
||||||
|
|
||||||
if (slave_running)
|
if (slave_running)
|
||||||
my_error(ER_SLAVE_MUST_STOP, MYF(0));
|
my_error(ER_SLAVE_MUST_STOP, MYF(0));
|
||||||
|
|
||||||
|
unlock();
|
||||||
|
|
||||||
return slave_running || status;
|
return slave_running || status;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3326,8 +3355,7 @@ uchar *Sys_var_rpl_filter::global_value_ptr(THD *thd, LEX_STRING *base)
|
|||||||
|
|
||||||
tmp.length(0);
|
tmp.length(0);
|
||||||
|
|
||||||
mysql_mutex_lock(&LOCK_active_mi);
|
lock();
|
||||||
mysql_mutex_lock(&active_mi->rli.run_lock);
|
|
||||||
|
|
||||||
switch (opt_id) {
|
switch (opt_id) {
|
||||||
case OPT_REPLICATE_DO_DB:
|
case OPT_REPLICATE_DO_DB:
|
||||||
@ -3350,8 +3378,7 @@ uchar *Sys_var_rpl_filter::global_value_ptr(THD *thd, LEX_STRING *base)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
mysql_mutex_unlock(&active_mi->rli.run_lock);
|
unlock();
|
||||||
mysql_mutex_unlock(&LOCK_active_mi);
|
|
||||||
|
|
||||||
return (uchar *) thd->strmake(tmp.ptr(), tmp.length());
|
return (uchar *) thd->strmake(tmp.ptr(), tmp.length());
|
||||||
}
|
}
|
||||||
@ -3404,6 +3431,9 @@ static Sys_var_charptr Sys_slave_load_tmpdir(
|
|||||||
|
|
||||||
static bool fix_slave_net_timeout(sys_var *self, THD *thd, enum_var_type type)
|
static bool fix_slave_net_timeout(sys_var *self, THD *thd, enum_var_type type)
|
||||||
{
|
{
|
||||||
|
DEBUG_SYNC(thd, "fix_slave_net_timeout");
|
||||||
|
|
||||||
|
mysql_mutex_unlock(&LOCK_global_system_variables);
|
||||||
mysql_mutex_lock(&LOCK_active_mi);
|
mysql_mutex_lock(&LOCK_active_mi);
|
||||||
DBUG_PRINT("info", ("slave_net_timeout=%u mi->heartbeat_period=%.3f",
|
DBUG_PRINT("info", ("slave_net_timeout=%u mi->heartbeat_period=%.3f",
|
||||||
slave_net_timeout,
|
slave_net_timeout,
|
||||||
@ -3413,6 +3443,7 @@ static bool fix_slave_net_timeout(sys_var *self, THD *thd, enum_var_type type)
|
|||||||
ER_SLAVE_HEARTBEAT_VALUE_OUT_OF_RANGE_MAX,
|
ER_SLAVE_HEARTBEAT_VALUE_OUT_OF_RANGE_MAX,
|
||||||
ER(ER_SLAVE_HEARTBEAT_VALUE_OUT_OF_RANGE_MAX));
|
ER(ER_SLAVE_HEARTBEAT_VALUE_OUT_OF_RANGE_MAX));
|
||||||
mysql_mutex_unlock(&LOCK_active_mi);
|
mysql_mutex_unlock(&LOCK_active_mi);
|
||||||
|
mysql_mutex_lock(&LOCK_global_system_variables);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
static Sys_var_uint Sys_slave_net_timeout(
|
static Sys_var_uint Sys_slave_net_timeout(
|
||||||
@ -3439,6 +3470,7 @@ static bool check_slave_skip_counter(sys_var *self, THD *thd, set_var *var)
|
|||||||
}
|
}
|
||||||
static bool fix_slave_skip_counter(sys_var *self, THD *thd, enum_var_type type)
|
static bool fix_slave_skip_counter(sys_var *self, THD *thd, enum_var_type type)
|
||||||
{
|
{
|
||||||
|
mysql_mutex_unlock(&LOCK_global_system_variables);
|
||||||
mysql_mutex_lock(&LOCK_active_mi);
|
mysql_mutex_lock(&LOCK_active_mi);
|
||||||
mysql_mutex_lock(&active_mi->rli.run_lock);
|
mysql_mutex_lock(&active_mi->rli.run_lock);
|
||||||
/*
|
/*
|
||||||
@ -3454,6 +3486,7 @@ static bool fix_slave_skip_counter(sys_var *self, THD *thd, enum_var_type type)
|
|||||||
}
|
}
|
||||||
mysql_mutex_unlock(&active_mi->rli.run_lock);
|
mysql_mutex_unlock(&active_mi->rli.run_lock);
|
||||||
mysql_mutex_unlock(&LOCK_active_mi);
|
mysql_mutex_unlock(&LOCK_active_mi);
|
||||||
|
mysql_mutex_lock(&LOCK_global_system_variables);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
static Sys_var_uint Sys_slave_skip_counter(
|
static Sys_var_uint Sys_slave_skip_counter(
|
||||||
|
@ -588,6 +588,8 @@ public:
|
|||||||
protected:
|
protected:
|
||||||
uchar *global_value_ptr(THD *thd, LEX_STRING *base);
|
uchar *global_value_ptr(THD *thd, LEX_STRING *base);
|
||||||
bool set_filter_value(const char *value);
|
bool set_filter_value(const char *value);
|
||||||
|
void lock(void);
|
||||||
|
void unlock(void);
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
Reference in New Issue
Block a user