From 34e8585437abc862f31aeb1a4022ef278a4af6f0 Mon Sep 17 00:00:00 2001 From: Kristian Nielsen Date: Wed, 16 Aug 2023 11:57:34 +0200 Subject: [PATCH] MDEV-29974: Missed kill waiting for worker queues to drain When the SQL driver thread goes to wait for room in the parallel slave worker queue, there was a race where a kill at the right moment could be ignored and the wait proceed uninterrupted by the kill. Fix by moving the THD::check_killed() to occur _after_ doing ENTER_COND(). This bug was seen as sporadic failure of the testcase rpl.rpl_parallel (rpl.rpl_parallel_gco_wait_kill since 10.5), with "Slave stopped with wrong error code". Signed-off-by: Kristian Nielsen --- sql/rpl_parallel.cc | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc index ba5cf54e673..3bd27c73932 100644 --- a/sql/rpl_parallel.cc +++ b/sql/rpl_parallel.cc @@ -2281,21 +2281,6 @@ rpl_parallel_entry::choose_thread(rpl_group_info *rgi, bool *did_enter_cond, /* The thread is ready to queue into. */ break; } - else if (unlikely(rli->sql_driver_thd->check_killed(1))) - { - unlock_or_exit_cond(rli->sql_driver_thd, &thr->LOCK_rpl_thread, - did_enter_cond, old_stage); - my_error(ER_CONNECTION_KILLED, MYF(0)); -#ifdef ENABLED_DEBUG_SYNC - DBUG_EXECUTE_IF("rpl_parallel_wait_queue_max", - { - debug_sync_set_action(rli->sql_driver_thd, - STRING_WITH_LEN("now SIGNAL wait_queue_killed")); - };); -#endif - slave_output_error_info(rgi, rli->sql_driver_thd); - return NULL; - } else { /* @@ -2323,6 +2308,23 @@ rpl_parallel_entry::choose_thread(rpl_group_info *rgi, bool *did_enter_cond, old_stage); *did_enter_cond= true; } + + if (unlikely(rli->sql_driver_thd->check_killed(1))) + { + unlock_or_exit_cond(rli->sql_driver_thd, &thr->LOCK_rpl_thread, + did_enter_cond, old_stage); + my_error(ER_CONNECTION_KILLED, MYF(0)); +#ifdef ENABLED_DEBUG_SYNC + DBUG_EXECUTE_IF("rpl_parallel_wait_queue_max", + { + debug_sync_set_action(rli->sql_driver_thd, + STRING_WITH_LEN("now SIGNAL wait_queue_killed")); + };); +#endif + slave_output_error_info(rgi, rli->sql_driver_thd); + return NULL; + } + mysql_cond_wait(&thr->COND_rpl_thread_queue, &thr->LOCK_rpl_thread); } }