mirror of
https://github.com/MariaDB/server.git
synced 2025-08-08 11:22:35 +03:00
MDEV-21910 Deadlock between BF abort and manual KILL command
When high priority replication slave applier encounters lock conflict in innodb, it will force the conflicting lock holder transaction (victim) to rollback. This is a must in multi-master sychronous replication model to avoid cluster lock-up. This high priority victim abort (aka "brute force" (BF) abort), is started from innodb lock manager while holding the victim's transaction's (trx) mutex. Depending on the execution state of the victim transaction, it may happen that the BF abort will call for THD::awake() to wake up the victim transaction for the rollback. Now, if BF abort requires THD::awake() to be called, then the applier thread executed locking protocol of: victim trx mutex -> victim THD::LOCK_thd_data If, at the same time another DBMS super user issues KILL command to abort the same victim, it will execute locking protocol of: victim THD::LOCK_thd_data -> victim trx mutex. These two locking protocol acquire mutexes in opposite order, hence unresolvable mutex locking deadlock may occur. The fix in this commit adds THD::wsrep_aborter flag to synchronize who can kill the victim This flag is set both when BF is called for from innodb and by KILL command. Either path of victim killing will bail out if victim's wsrep_killed is already set to avoid mutex conflicts with the other aborter execution. THD::wsrep_aborter records the aborter THD's ID. This is needed to preserve the right to kill the victim from different locations for the same aborter thread. It is also good error logging, to see who is reponsible for the abort. A new test case was added in galera.galera_bf_kill_debug.test for scenario where wsrep applier thread and manual KILL command try to kill same idle victim
This commit is contained in:
@@ -84,6 +84,7 @@ extern struct wsrep_service_st {
|
||||
my_bool (*wsrep_get_debug_func)();
|
||||
void (*wsrep_commit_ordered_func)(MYSQL_THD thd);
|
||||
my_bool (*wsrep_thd_is_applying_func)(const MYSQL_THD thd);
|
||||
bool (*wsrep_thd_set_wsrep_aborter_func)(MYSQL_THD bf_thd, MYSQL_THD thd);
|
||||
} *wsrep_service;
|
||||
|
||||
#define MYSQL_SERVICE_WSREP_INCLUDED
|
||||
@@ -124,6 +125,7 @@ extern struct wsrep_service_st {
|
||||
#define wsrep_get_debug() wsrep_service->wsrep_get_debug_func()
|
||||
#define wsrep_commit_ordered(T) wsrep_service->wsrep_commit_ordered_func(T)
|
||||
#define wsrep_thd_is_applying(T) wsrep_service->wsrep_thd_is_applying_func(T)
|
||||
#define wsrep_thd_set_wsrep_aborter(T) wsrep_service->wsrep_thd_set_wsrep_aborter_func(T1, T2)
|
||||
|
||||
#else
|
||||
|
||||
@@ -176,6 +178,8 @@ extern "C" my_bool wsrep_thd_is_local(const MYSQL_THD thd);
|
||||
/* Return true if thd is in high priority mode */
|
||||
/* todo: rename to is_high_priority() */
|
||||
extern "C" my_bool wsrep_thd_is_applying(const MYSQL_THD thd);
|
||||
/* set wsrep_aborter for the target THD */
|
||||
extern "C" bool wsrep_thd_set_wsrep_aborter(MYSQL_THD bf_thd, MYSQL_THD victim_thd);
|
||||
/* Return true if thd is in TOI mode */
|
||||
extern "C" my_bool wsrep_thd_is_toi(const MYSQL_THD thd);
|
||||
/* Return true if thd is in replicating TOI mode */
|
||||
@@ -216,6 +220,7 @@ extern "C" my_bool wsrep_get_debug();
|
||||
|
||||
extern "C" void wsrep_commit_ordered(MYSQL_THD thd);
|
||||
extern "C" my_bool wsrep_thd_is_applying(const MYSQL_THD thd);
|
||||
extern "C" bool wsrep_thd_set_wsrep_aborter(MYSQL_THD bf_thd, MYSQL_THD victim_thd);
|
||||
|
||||
#endif
|
||||
#endif /* MYSQL_SERVICE_WSREP_INCLUDED */
|
||||
|
Reference in New Issue
Block a user