mirror of
https://github.com/MariaDB/server.git
synced 2025-07-29 05:21:33 +03:00
MDEV-10653: SHOW SLAVE STATUS Can Deadlock an Errored Slave
AKA rpl.rpl_parallel, binlog_encryption.rpl_parallel fails in buildbot with timeout in include A replication parallel worker thread can deadlock with another connection running SHOW SLAVE STATUS. That is, if the replication worker thread is in do_gco_wait() and is killed, it will already hold the LOCK_parallel_entry, and during error reporting, try to grab the err_lock. SHOW SLAVE STATUS, however, grabs these locks in reverse order. It will initially grab the err_lock, and then try to grab LOCK_parallel_entry. This leads to a deadlock when both threads have grabbed their first lock without the second. This patch implements the MDEV-31894 proposed fix to optimize the workers_idle() check to compare the last in-use relay log’s queued_count==dequeued_count for idleness. This removes the need for workers_idle() to grab LOCK_parallel_entry, as these values are atomically updated. Huge thanks to Kristian Nielsen for diagnosing the problem! Reviewed By: ============ Kristian Nielsen <knielsen@knielsen-hq.org> Andrei Elkin <andrei.elkin@mariadb.com>
This commit is contained in:
11
sql/slave.cc
11
sql/slave.cc
@ -3123,6 +3123,14 @@ static bool send_show_master_info_data(THD *thd, Master_info *mi, bool full,
|
||||
mysql_mutex_lock(&mi->err_lock);
|
||||
/* err_lock is to protect mi->rli.last_error() */
|
||||
mysql_mutex_lock(&mi->rli.err_lock);
|
||||
|
||||
DBUG_EXECUTE_IF("hold_sss_with_err_lock", {
|
||||
DBUG_ASSERT(!debug_sync_set_action(
|
||||
thd, STRING_WITH_LEN("now SIGNAL sss_got_err_lock "
|
||||
"WAIT_FOR sss_continue")));
|
||||
DBUG_SET("-d,hold_sss_with_err_lock");
|
||||
});
|
||||
|
||||
protocol->store(mi->host, &my_charset_bin);
|
||||
protocol->store(mi->user, &my_charset_bin);
|
||||
protocol->store((uint32) mi->port);
|
||||
@ -3197,7 +3205,8 @@ static bool send_show_master_info_data(THD *thd, Master_info *mi, bool full,
|
||||
while the slave is processing ignored events, such as those skipped
|
||||
due to slave_skip_counter.
|
||||
*/
|
||||
if (mi->using_parallel() && idle && !mi->rli.parallel.workers_idle())
|
||||
if (mi->using_parallel() && idle &&
|
||||
!rpl_parallel::workers_idle(&mi->rli))
|
||||
idle= false;
|
||||
}
|
||||
if (idle)
|
||||
|
Reference in New Issue
Block a user