mirror of
https://github.com/MariaDB/server.git
synced 2025-08-08 11:22:35 +03:00
MDEV-9573 'Stop slave' hangs on replication slave
The reason for this is that stop slave takes LOCK_active_mi over the whole operation while some slave operations will also need LOCK_active_mi which causes deadlocks. Fixed by introducing object counting for Master_info and not taking LOCK_active_mi over stop slave or even stop_all_slaves() Another benefit of this approach is that it allows: - Multiple threads can run SHOW SLAVE STATUS at the same time - START/STOP/RESET/SLAVE STATUS on a slave will not block other slaves - Simpler interface for handling get_master_info() - Added some missing unlock of 'log_lock' in error condtions - Moved rpl_parallel_inactivate_pool(&global_rpl_thread_pool) to end of stop_slave() to not have to use LOCK_active_mi inside terminate_slave_threads() - Changed argument for remove_master_info() to Master_info, as we always have this available - Fixed core dump when doing FLUSH TABLES WITH READ LOCK and parallel replication. Problem was that waiting for pause_for_ftwrl was not done when deleting rpt->current_owner after a force_abort.
This commit is contained in:
@@ -1312,6 +1312,29 @@ handle_rpl_parallel_thread(void *arg)
|
||||
}
|
||||
if (!in_event_group)
|
||||
{
|
||||
/* If we are in a FLUSH TABLES FOR READ LOCK, wait for it */
|
||||
while (rpt->current_entry && rpt->pause_for_ftwrl)
|
||||
{
|
||||
/*
|
||||
We are currently in the delicate process of pausing parallel
|
||||
replication while FLUSH TABLES WITH READ LOCK is starting. We must
|
||||
not de-allocate the thread (setting rpt->current_owner= NULL) until
|
||||
rpl_unpause_after_ftwrl() has woken us up.
|
||||
*/
|
||||
rpl_parallel_entry *e= rpt->current_entry;
|
||||
/*
|
||||
Ensure that we will unblock rpl_pause_for_ftrwl()
|
||||
e->pause_sub_id may be LONGLONG_MAX if rpt->current_entry has changed
|
||||
*/
|
||||
DBUG_ASSERT(e->pause_sub_id == (uint64)ULONGLONG_MAX ||
|
||||
e->last_committed_sub_id >= e->pause_sub_id);
|
||||
mysql_mutex_lock(&e->LOCK_parallel_entry);
|
||||
mysql_mutex_unlock(&rpt->LOCK_rpl_thread);
|
||||
if (rpt->pause_for_ftwrl)
|
||||
mysql_cond_wait(&e->COND_parallel_entry, &e->LOCK_parallel_entry);
|
||||
mysql_mutex_unlock(&e->LOCK_parallel_entry);
|
||||
mysql_mutex_lock(&rpt->LOCK_rpl_thread);
|
||||
}
|
||||
rpt->current_owner= NULL;
|
||||
/* Tell wait_for_done() that we are done, if it is waiting. */
|
||||
if (likely(rpt->current_entry) &&
|
||||
@@ -1369,6 +1392,28 @@ rpl_parallel_change_thread_count(rpl_parallel_thread_pool *pool,
|
||||
if ((res= pool_mark_busy(pool, current_thd)))
|
||||
return res;
|
||||
|
||||
/* Protect against parallel pool resizes */
|
||||
if (pool->count == new_count)
|
||||
{
|
||||
pool_mark_not_busy(pool);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
If we are about to delete pool, do an extra check that there are no new
|
||||
slave threads running since we marked pool busy
|
||||
*/
|
||||
if (!new_count)
|
||||
{
|
||||
if (any_slave_sql_running())
|
||||
{
|
||||
DBUG_PRINT("warning",
|
||||
("SQL threads running while trying to reset parallel pool"));
|
||||
pool_mark_not_busy(pool);
|
||||
return 0; // Ok to not resize pool
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
Allocate the new list of threads up-front.
|
||||
That way, if we fail half-way, we only need to free whatever we managed
|
||||
@@ -1382,7 +1427,7 @@ rpl_parallel_change_thread_count(rpl_parallel_thread_pool *pool,
|
||||
{
|
||||
my_error(ER_OUTOFMEMORY, MYF(0), (int(new_count*sizeof(*new_list) +
|
||||
new_count*sizeof(*rpt_array))));
|
||||
goto err;;
|
||||
goto err;
|
||||
}
|
||||
|
||||
for (i= 0; i < new_count; ++i)
|
||||
@@ -1503,6 +1548,20 @@ err:
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
Deactivate the parallel replication thread pool, if there are now no more
|
||||
SQL threads running.
|
||||
*/
|
||||
|
||||
int rpl_parallel_resize_pool_if_no_slaves(void)
|
||||
{
|
||||
/* master_info_index is set to NULL on shutdown */
|
||||
if (opt_slave_parallel_threads > 0 && !any_slave_sql_running() &&
|
||||
master_info_index)
|
||||
return rpl_parallel_inactivate_pool(&global_rpl_thread_pool);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
rpl_parallel_activate_pool(rpl_parallel_thread_pool *pool)
|
||||
@@ -1814,6 +1873,7 @@ rpl_parallel_thread_pool::get_thread(rpl_parallel_thread **owner,
|
||||
{
|
||||
rpl_parallel_thread *rpt;
|
||||
|
||||
DBUG_ASSERT(count > 0);
|
||||
mysql_mutex_lock(&LOCK_rpl_thread_pool);
|
||||
while (unlikely(busy) || !(rpt= free_list))
|
||||
mysql_cond_wait(&COND_rpl_thread_pool, &LOCK_rpl_thread_pool);
|
||||
|
Reference in New Issue
Block a user