mirror of
https://github.com/MariaDB/server.git
synced 2025-08-08 11:22:35 +03:00
MDEV-31448: Killing a replica thread awaiting its GCO can hang/crash a parallel replica
The problem is that when a worker thread is (user) killed in wait_for_prior_commit, the event group may complete out-of-order since the wait for prior commit was aborted by the kill. This fix ensures that event groups will always complete in-order, even in the error case. This is done in finish_event_group() by doing an extra wait_for_prior_commit(), if necessary, that ignores kills. This fix supersedes the fix for MDEV-30780, so the earlier fix for that is reverted in this patch. Also fix that an error from wait_for_prior_commit() inside finish_event_group() would not signal the error to wakeup_subsequent_commits(). Based on earlier work by Brandon Nesterenko and Andrei Elkin, with some changes to simplify the semantics of wait_for_prior_commit() and make the code more robust to future changes. Reviewed-by: Andrei Elkin <andrei.elkin@mariadb.com> Signed-off-by: Kristian Nielsen <knielsen@knielsen-hq.org>
This commit is contained in:
@@ -2144,14 +2144,14 @@ struct wait_for_commit
|
||||
bool commit_started;
|
||||
|
||||
void register_wait_for_prior_commit(wait_for_commit *waitee);
|
||||
int wait_for_prior_commit(THD *thd)
|
||||
int wait_for_prior_commit(THD *thd, bool allow_kill=true)
|
||||
{
|
||||
/*
|
||||
Quick inline check, to avoid function call and locking in the common case
|
||||
where no wakeup is registered, or a registered wait was already signalled.
|
||||
*/
|
||||
if (waitee.load(std::memory_order_acquire))
|
||||
return wait_for_prior_commit2(thd);
|
||||
return wait_for_prior_commit2(thd, allow_kill);
|
||||
else
|
||||
{
|
||||
if (wakeup_error)
|
||||
@@ -2205,7 +2205,7 @@ struct wait_for_commit
|
||||
|
||||
void wakeup(int wakeup_error);
|
||||
|
||||
int wait_for_prior_commit2(THD *thd);
|
||||
int wait_for_prior_commit2(THD *thd, bool allow_kill);
|
||||
void wakeup_subsequent_commits2(int wakeup_error);
|
||||
void unregister_wait_for_prior_commit2();
|
||||
|
||||
@@ -4726,10 +4726,10 @@ public:
|
||||
}
|
||||
|
||||
wait_for_commit *wait_for_commit_ptr;
|
||||
int wait_for_prior_commit()
|
||||
int wait_for_prior_commit(bool allow_kill=true)
|
||||
{
|
||||
if (wait_for_commit_ptr)
|
||||
return wait_for_commit_ptr->wait_for_prior_commit(this);
|
||||
return wait_for_commit_ptr->wait_for_prior_commit(this, allow_kill);
|
||||
return 0;
|
||||
}
|
||||
void wakeup_subsequent_commits(int wakeup_error)
|
||||
|
Reference in New Issue
Block a user