From 596921dab8e53c56dc72d0e0f6cb4f03d59e4825 Mon Sep 17 00:00:00 2001 From: Kristian Nielsen Date: Tue, 30 Apr 2024 19:21:24 +0200 Subject: [PATCH] MDEV-34042: Deadlock kill of XA PREPARE can break replication / rpl.rpl_parallel_multi_domain_xa sporadic failure Clear any pending deadlock kill after completing XA PREPARE, and before updating the mysql.gtid_slave_pos table in a separate transaction. Reviewed-by: Andrei Elkin Signed-off-by: Kristian Nielsen --- sql/log_event_server.cc | 13 +++++++++++++ sql/rpl_parallel.cc | 2 +- sql/rpl_parallel.h | 1 + 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/sql/log_event_server.cc b/sql/log_event_server.cc index ebbd52fcd74..5b0fbf8ced0 100644 --- a/sql/log_event_server.cc +++ b/sql/log_event_server.cc @@ -4209,6 +4209,19 @@ int XA_prepare_log_event::do_commit() else res= trans_xa_commit(thd); + if (thd->rgi_slave->is_parallel_exec) + { + /* + Since the transaction is prepared/committed without updating the GTID pos + (MDEV-32020...), we need here to clear any pending deadlock kill. + Otherwise if the kill happened after the prepare/commit completed, it + might end up killing the subsequent GTID position update, causing the + slave to fail with error. + */ + wait_for_pending_deadlock_kill(thd, thd->rgi_slave); + thd->reset_killed(); + } + return res; } #endif // HAVE_REPLICATION diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc index 3dff9bf8ade..64819dff2c4 100644 --- a/sql/rpl_parallel.cc +++ b/sql/rpl_parallel.cc @@ -131,7 +131,7 @@ handle_queued_pos_update(THD *thd, rpl_parallel_thread::queued_event *qev) asynchronously, we need to be sure they will be completed before starting a new transaction. Otherwise the new transaction might suffer a spurious kill. */ -static void +void wait_for_pending_deadlock_kill(THD *thd, rpl_group_info *rgi) { PSI_stage_info old_stage; diff --git a/sql/rpl_parallel.h b/sql/rpl_parallel.h index a605b977473..9ef5405f39b 100644 --- a/sql/rpl_parallel.h +++ b/sql/rpl_parallel.h @@ -494,6 +494,7 @@ struct rpl_parallel { extern struct rpl_parallel_thread_pool global_rpl_thread_pool; +extern void wait_for_pending_deadlock_kill(THD *thd, rpl_group_info *rgi); extern int rpl_parallel_resize_pool_if_no_slaves(void); extern int rpl_parallel_activate_pool(rpl_parallel_thread_pool *pool); extern int rpl_parallel_inactivate_pool(rpl_parallel_thread_pool *pool);