1
0
mirror of https://github.com/MariaDB/server.git synced 2025-07-30 16:24:05 +03:00

MDEV-29974: Missed kill waiting for worker queues to drain

When the SQL driver thread goes to wait for room in the parallel slave
worker queue, there was a race where a kill at the right moment could
be ignored and the wait proceed uninterrupted by the kill.

Fix by moving the THD::check_killed() to occur _after_ doing ENTER_COND().

This bug was seen as sporadic failure of the testcase rpl.rpl_parallel
(rpl.rpl_parallel_gco_wait_kill since 10.5), with "Slave stopped with
wrong error code".

Signed-off-by: Kristian Nielsen <knielsen@knielsen-hq.org>
This commit is contained in:
Kristian Nielsen
2023-08-16 11:57:34 +02:00
parent 900c4d6920
commit 34e8585437

View File

@ -2281,21 +2281,6 @@ rpl_parallel_entry::choose_thread(rpl_group_info *rgi, bool *did_enter_cond,
/* The thread is ready to queue into. */
break;
}
else if (unlikely(rli->sql_driver_thd->check_killed(1)))
{
unlock_or_exit_cond(rli->sql_driver_thd, &thr->LOCK_rpl_thread,
did_enter_cond, old_stage);
my_error(ER_CONNECTION_KILLED, MYF(0));
#ifdef ENABLED_DEBUG_SYNC
DBUG_EXECUTE_IF("rpl_parallel_wait_queue_max",
{
debug_sync_set_action(rli->sql_driver_thd,
STRING_WITH_LEN("now SIGNAL wait_queue_killed"));
};);
#endif
slave_output_error_info(rgi, rli->sql_driver_thd);
return NULL;
}
else
{
/*
@ -2323,6 +2308,23 @@ rpl_parallel_entry::choose_thread(rpl_group_info *rgi, bool *did_enter_cond,
old_stage);
*did_enter_cond= true;
}
if (unlikely(rli->sql_driver_thd->check_killed(1)))
{
unlock_or_exit_cond(rli->sql_driver_thd, &thr->LOCK_rpl_thread,
did_enter_cond, old_stage);
my_error(ER_CONNECTION_KILLED, MYF(0));
#ifdef ENABLED_DEBUG_SYNC
DBUG_EXECUTE_IF("rpl_parallel_wait_queue_max",
{
debug_sync_set_action(rli->sql_driver_thd,
STRING_WITH_LEN("now SIGNAL wait_queue_killed"));
};);
#endif
slave_output_error_info(rgi, rli->sql_driver_thd);
return NULL;
}
mysql_cond_wait(&thr->COND_rpl_thread_queue, &thr->LOCK_rpl_thread);
}
}