mirror of
https://github.com/MariaDB/server.git
synced 2025-08-08 11:22:35 +03:00
MDEV-17346 parallel slave start and stop races to workers disappeared
The bug appears as a slave SQL thread hanging in rpl_parallel_thread_pool::get_thread() while there are no slave worker threads to awake it. The reason of the hang is that at the parallel slave worker pool activation the being stared SQL thread could read the worker pool size concurrently with pool deactivation. At reading the SQL thread did not employ necessary protection from a race. Fixed with making the SQL thread at the pool activation first to grab the same lock as potential deactivator also does prior to access the pool size.
This commit is contained in:
@@ -1617,13 +1617,32 @@ int rpl_parallel_resize_pool_if_no_slaves(void)
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
Pool activation is preceeded by taking a "lock" of pool_mark_busy
|
||||
which guarantees the number of running slaves drops to zero atomicly
|
||||
with the number of pool workers.
|
||||
This resolves race between the function caller thread and one
|
||||
that may be attempting to deactivate the pool.
|
||||
*/
|
||||
int
|
||||
rpl_parallel_activate_pool(rpl_parallel_thread_pool *pool)
|
||||
{
|
||||
int rc= 0;
|
||||
|
||||
if ((rc= pool_mark_busy(pool, current_thd)))
|
||||
return rc; // killed
|
||||
|
||||
if (!pool->count)
|
||||
return rpl_parallel_change_thread_count(pool, opt_slave_parallel_threads,
|
||||
0);
|
||||
return 0;
|
||||
{
|
||||
pool_mark_not_busy(pool);
|
||||
rc= rpl_parallel_change_thread_count(pool, opt_slave_parallel_threads,
|
||||
0);
|
||||
}
|
||||
else
|
||||
{
|
||||
pool_mark_not_busy(pool);
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user