mirror of
				https://github.com/MariaDB/server.git
				synced 2025-10-24 07:13:33 +03:00 
			
		
		
		
	MDEV-23328 Server hang due to Galera lock conflict resolution
Mutex order violation when wsrep bf thread kills a conflicting trx,
the stack is
          wsrep_thd_LOCK()
          wsrep_kill_victim()
          lock_rec_other_has_conflicting()
          lock_clust_rec_read_check_and_lock()
          row_search_mvcc()
          ha_innobase::index_read()
          ha_innobase::rnd_pos()
          handler::ha_rnd_pos()
          handler::rnd_pos_by_record()
          handler::ha_rnd_pos_by_record()
          Rows_log_event::find_row()
          Update_rows_log_event::do_exec_row()
          Rows_log_event::do_apply_event()
          Log_event::apply_event()
          wsrep_apply_events()
and mutexes are taken in the order
          lock_sys->mutex -> victim_trx->mutex -> victim_thread->LOCK_thd_data
When a normal KILL statement is executed, the stack is
          innobase_kill_query()
          kill_handlerton()
          plugin_foreach_with_mask()
          ha_kill_query()
          THD::awake()
          kill_one_thread()
        and mutexes are
          victim_thread->LOCK_thd_data -> lock_sys->mutex -> victim_trx->mutex
This patch is the plan D variant for fixing potetial mutex locking
order exercised by BF aborting and KILL command execution.
In this approach, KILL command is replicated as TOI operation.
This guarantees total isolation for the KILL command execution
in the first node: there is no concurrent replication applying
and no concurrent DDL executing. Therefore there is no risk of
BF aborting to happen in parallel with KILL command execution
either. Potential mutex deadlocks between the different mutex
access paths with KILL command execution and BF aborting cannot
therefore happen.
TOI replication is used, in this approach,  purely as means
to provide isolated KILL command execution in the first node.
KILL command should not (and must not) be applied in secondary
nodes. In this patch, we make this sure by skipping KILL
execution in secondary nodes, in applying phase, where we
bail out if applier thread is trying to execute KILL command.
This is effective, but skipping the applying of KILL command
could happen much earlier as well.
This also fixed unprotected calls to wsrep_thd_abort
that will use wsrep_abort_transaction. This is fixed
by holding THD::LOCK_thd_data while we abort transaction.
Reviewed-by: Jan Lindström <jan.lindstrom@mariadb.com>
			
			
This commit is contained in:
		
				
					committed by
					
						 Oleksandr Byelkin
						Oleksandr Byelkin
					
				
			
			
				
	
			
			
			
						parent
						
							d5bc05798f
						
					
				
				
					commit
					ef2dbb8dbc
				
			| @@ -68,9 +68,9 @@ f1	f2	f3 | ||||
| 10	10	0 | ||||
| INSERT INTO t1 VALUES (7,7,7); | ||||
| INSERT INTO t1 VALUES (8,8,8); | ||||
| DROP TABLE t1; | ||||
| test scenario 2 | ||||
| connection node_1; | ||||
| SELECT COUNT(*) FROM t1; | ||||
| COUNT(*) | ||||
| 7 | ||||
| CREATE TABLE t1 (f1 INTEGER PRIMARY KEY, f2 int, f3 int,  unique key keyj (f2)); | ||||
| INSERT INTO t1 VALUES (1, 1, 0); | ||||
| INSERT INTO t1 VALUES (3, 3, 0); | ||||
| @@ -92,9 +92,9 @@ SET SESSION wsrep_on = 1; | ||||
| SET GLOBAL wsrep_provider_options = 'dbug='; | ||||
| SET GLOBAL wsrep_provider_options = 'dbug=d,commit_monitor_master_enter_sync'; | ||||
| connection node_1; | ||||
| COMMIT; | ||||
| connection node_1a; | ||||
| SET SESSION wsrep_on = 0; | ||||
| SELECT COUNT(*) FROM t1; | ||||
| COUNT(*) | ||||
| 7 | ||||
| SET SESSION wsrep_on = 1; | ||||
| SET GLOBAL wsrep_provider_options = 'dbug='; | ||||
| SET GLOBAL DEBUG_DBUG = "d,sync.wsrep_replay_cb"; | ||||
| @@ -125,6 +125,7 @@ f1	f2	f3 | ||||
| 3	3	1 | ||||
| 4	4	2 | ||||
| 5	5	2 | ||||
| 8	8	8 | ||||
| 10	10	0 | ||||
| INSERT INTO t1 VALUES (7,7,7); | ||||
| INSERT INTO t1 VALUES (8,8,8); | ||||
|   | ||||
| @@ -1,54 +0,0 @@ | ||||
| connection node_2; | ||||
| connection node_1; | ||||
| connection node_2; | ||||
| CREATE TABLE t1(a int not null primary key auto_increment,b int) engine=InnoDB; | ||||
| insert into t1 values (NULL,1); | ||||
| connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2; | ||||
| connection node_2a; | ||||
| truncate t1; | ||||
| insert into t1 values (1,0); | ||||
| begin; | ||||
| update t1 set b=2 where a=1; | ||||
| connection node_2; | ||||
| set session wsrep_sync_wait=0; | ||||
| connect node_2b, 127.0.0.1, root, , test, $NODE_MYPORT_2; | ||||
| connection node_2b; | ||||
| SET GLOBAL debug_dbug = "d,sync.before_wsrep_thd_abort"; | ||||
| connection node_1; | ||||
| select * from t1; | ||||
| a	b | ||||
| 1	0 | ||||
| update t1 set b= 1 where a=1; | ||||
| connection node_2b; | ||||
| SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.before_wsrep_thd_abort_reached"; | ||||
| connection node_2; | ||||
| SET DEBUG_SYNC= 'before_awake_no_mutex SIGNAL awake_reached WAIT_FOR continue_kill'; | ||||
| connection node_2b; | ||||
| SET DEBUG_SYNC='now WAIT_FOR awake_reached'; | ||||
| SET GLOBAL debug_dbug = ""; | ||||
| SET DEBUG_SYNC = "now SIGNAL signal.before_wsrep_thd_abort"; | ||||
| SET DEBUG_SYNC = "now SIGNAL continue_kill"; | ||||
| connection node_2; | ||||
| connection node_2a; | ||||
| select * from t1; | ||||
| connection node_2; | ||||
| SET DEBUG_SYNC = "RESET"; | ||||
| drop table t1; | ||||
| disconnect node_2a; | ||||
| connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2; | ||||
| connection node_2a; | ||||
| CREATE TABLE t1 (i int primary key); | ||||
| SET DEBUG_SYNC = "before_wsrep_ordered_commit SIGNAL bwoc_reached WAIT_FOR bwoc_continue"; | ||||
| INSERT INTO t1 VALUES (1); | ||||
| connection node_2; | ||||
| SET DEBUG_SYNC = "now WAIT_FOR bwoc_reached"; | ||||
| SET DEBUG_SYNC = "now SIGNAL bwoc_continue"; | ||||
| SET DEBUG_SYNC='RESET'; | ||||
| connection node_2a; | ||||
| connection node_2; | ||||
| select * from t1; | ||||
| i | ||||
| 1 | ||||
| disconnect node_2a; | ||||
| connection node_1; | ||||
| drop table t1; | ||||
| @@ -23,22 +23,6 @@ connection node_1a; | ||||
| connection node_1b; | ||||
| connection node_2; | ||||
| connection node_2a; | ||||
| connection node_1; | ||||
| SET SESSION wsrep_sync_wait=15; | ||||
| SELECT COUNT(*) FROM parent; | ||||
| COUNT(*) | ||||
| 20001 | ||||
| SELECT COUNT(*) FROM child; | ||||
| COUNT(*) | ||||
| 10000 | ||||
| connection node_2; | ||||
| SET SESSION wsrep_sync_wait=15; | ||||
| SELECT COUNT(*) FROM parent; | ||||
| COUNT(*) | ||||
| 20001 | ||||
| SELECT COUNT(*) FROM child; | ||||
| COUNT(*) | ||||
| 10000 | ||||
| DROP TABLE child; | ||||
| DROP TABLE parent; | ||||
| DROP TABLE ten; | ||||
|   | ||||
| @@ -140,6 +140,14 @@ SELECT * FROM t1; | ||||
| # original state in node 1 | ||||
| INSERT INTO t1 VALUES (7,7,7); | ||||
| INSERT INTO t1 VALUES (8,8,8); | ||||
| SELECT COUNT(*) FROM t1; | ||||
| SELECT * FROM t1; | ||||
|  | ||||
| --connection node_1 | ||||
| --let $wait_condition = SELECT COUNT(*) = 7 FROM t1 | ||||
| --source include/wait_condition.inc | ||||
| SELECT COUNT(*) FROM t1; | ||||
| SELECT * FROM t1; | ||||
|  | ||||
| DROP TABLE t1; | ||||
|  | ||||
| @@ -199,9 +207,9 @@ INSERT INTO t1 VALUES (5, 5, 2); | ||||
| --source include/galera_set_sync_point.inc | ||||
|  | ||||
| --connection node_1 | ||||
| --send COMMIT | ||||
|  | ||||
| --connection node_1a | ||||
| --let $wait_condition = SELECT COUNT(*) = 7 FROM t1 | ||||
| --source include/wait_condition.inc | ||||
| SELECT COUNT(*) FROM t1; | ||||
| # wait for the local commit to enter in commit monitor wait state | ||||
| --let $galera_sync_point = apply_monitor_slave_enter_sync commit_monitor_master_enter_sync | ||||
| --source include/galera_wait_sync_point.inc | ||||
| @@ -273,4 +281,13 @@ SELECT * FROM t1; | ||||
| INSERT INTO t1 VALUES (7,7,7); | ||||
| INSERT INTO t1 VALUES (8,8,8); | ||||
|  | ||||
| SELECT COUNT(*) FROM t1; | ||||
| SELECT * FROM t1; | ||||
|  | ||||
| --connection node_1 | ||||
| --let $wait_condition = SELECT COUNT(*) = 7 FROM t1 | ||||
| --source include/wait_condition.inc | ||||
| SELECT COUNT(*) FROM t1; | ||||
| SELECT * FROM t1; | ||||
|  | ||||
| DROP TABLE t1; | ||||
|   | ||||
| @@ -1,7 +0,0 @@ | ||||
| !include ../galera_2nodes.cnf | ||||
|  | ||||
| [mysqld.1] | ||||
| wsrep-debug=SERVER | ||||
|  | ||||
| [mysqld.2] | ||||
| wsrep-debug=SERVER | ||||
| @@ -1,140 +0,0 @@ | ||||
| --source include/galera_cluster.inc | ||||
| --source include/have_innodb.inc | ||||
| --source include/have_debug.inc | ||||
| --source include/have_debug_sync.inc | ||||
|  | ||||
| # | ||||
| # Test case 7: | ||||
| # 1. Start a transaction on node_2, | ||||
| #    and leave it pending while holding a row locked | ||||
| # 2. set sync point pause applier | ||||
| # 3. send a conflicting write on node_1, it will pause | ||||
| #    at the sync point | ||||
| # 4. though another connection to node_2, kill the local | ||||
| #    transaction | ||||
| # | ||||
|  | ||||
| --connection node_2 | ||||
| CREATE TABLE t1(a int not null primary key auto_increment,b int) engine=InnoDB; | ||||
| insert into t1 values (NULL,1); | ||||
|  | ||||
| # | ||||
| # connection node_2a runs a local transaction, that is victim of BF abort | ||||
| # and victim of KILL command by connection node_2 | ||||
| # | ||||
| --connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2 | ||||
| --connection node_2a | ||||
| truncate t1; | ||||
| insert into t1 values (1,0); | ||||
|  | ||||
| # start a transaction that will conflict with later applier | ||||
| begin; | ||||
| update t1 set b=2 where a=1; | ||||
|  | ||||
| --connection node_2 | ||||
| set session wsrep_sync_wait=0; | ||||
| --let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'root' AND COMMAND = 'Sleep' LIMIT 1 | ||||
| --source include/wait_condition.inc | ||||
|  | ||||
| --let $k_thread = `SELECT ID FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'root' AND COMMAND = 'Sleep' LIMIT 1` | ||||
|  | ||||
| # connection node_2b is for controlling debug syn points | ||||
| # first set a sync point for applier, to pause during BF aborting | ||||
| # and before THD::awake would be called | ||||
| # | ||||
| --connect node_2b, 127.0.0.1, root, , test, $NODE_MYPORT_2 | ||||
| --connection node_2b | ||||
| SET GLOBAL debug_dbug = "d,sync.before_wsrep_thd_abort"; | ||||
|  | ||||
| # | ||||
| # replicate an update, which will BF abort the victim node_2a | ||||
| # however, while applier in node 2 is handling the abort, | ||||
| # it will pause in sync point set by node_2b | ||||
| # | ||||
| --connection node_1 | ||||
| select * from t1; | ||||
| update t1 set b= 1 where a=1; | ||||
|  | ||||
| # | ||||
| # wait until the applying of above update has reached the sync point | ||||
| # in node 2 | ||||
| # | ||||
| --connection node_2b | ||||
| SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.before_wsrep_thd_abort_reached"; | ||||
|  | ||||
| --connection node_2 | ||||
| # | ||||
| # pause KILL execution before awake | ||||
| # | ||||
| SET DEBUG_SYNC= 'before_awake_no_mutex SIGNAL awake_reached WAIT_FOR continue_kill'; | ||||
| --disable_query_log | ||||
| --send_eval KILL $k_thread | ||||
| --enable_query_log | ||||
|  | ||||
|  | ||||
| --connection node_2b | ||||
| SET DEBUG_SYNC='now WAIT_FOR awake_reached'; | ||||
|  | ||||
| # release applier and KILL operator | ||||
| SET GLOBAL debug_dbug = ""; | ||||
| SET DEBUG_SYNC = "now SIGNAL signal.before_wsrep_thd_abort"; | ||||
| SET DEBUG_SYNC = "now SIGNAL continue_kill"; | ||||
|  | ||||
| --connection node_2 | ||||
| --reap | ||||
|  | ||||
| --connection node_2a | ||||
| --error 0,1213 | ||||
| select * from t1; | ||||
|  | ||||
| --connection node_2 | ||||
| SET DEBUG_SYNC = "RESET"; | ||||
|  | ||||
| drop table t1; | ||||
|  | ||||
| --disconnect node_2a | ||||
| # | ||||
| # Test case 7: | ||||
| # run a transaction in node 2, and set a sync point to pause the transaction | ||||
| # in commit phase. | ||||
| # Through another connection to node 2, kill the committing transaction by | ||||
| # KILL QUERY command | ||||
| # | ||||
|  | ||||
| --connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2 | ||||
| --connection node_2a | ||||
| --let $connection_id = `SELECT CONNECTION_ID()` | ||||
|  | ||||
| CREATE TABLE t1 (i int primary key); | ||||
|  | ||||
| # Set up sync point | ||||
| SET DEBUG_SYNC = "before_wsrep_ordered_commit SIGNAL bwoc_reached WAIT_FOR bwoc_continue"; | ||||
|  | ||||
| # Send insert which will block in the sync point above | ||||
| --send INSERT INTO t1 VALUES (1) | ||||
|  | ||||
| --connection node_2 | ||||
| SET DEBUG_SYNC = "now WAIT_FOR bwoc_reached"; | ||||
|  | ||||
| --disable_query_log | ||||
| --disable_result_log | ||||
| # victim has passed the point of no return, kill is not possible anymore | ||||
| --eval KILL QUERY $connection_id | ||||
| --enable_result_log | ||||
| --enable_query_log | ||||
|  | ||||
| SET DEBUG_SYNC = "now SIGNAL bwoc_continue"; | ||||
| SET DEBUG_SYNC='RESET'; | ||||
| --connection node_2a | ||||
| --error 0,1213 | ||||
| --reap | ||||
|  | ||||
| --connection node_2 | ||||
| # victim was able to complete the INSERT | ||||
| select * from t1; | ||||
|  | ||||
| --disconnect node_2a | ||||
|  | ||||
| --connection node_1 | ||||
| drop table t1; | ||||
|  | ||||
| @@ -54,15 +54,11 @@ INSERT INTO parent VALUES (1, 0); | ||||
| --connection node_2a | ||||
| --reap | ||||
|  | ||||
| --connection node_1 | ||||
| SET SESSION wsrep_sync_wait=15; | ||||
| SELECT COUNT(*) FROM parent; | ||||
| SELECT COUNT(*) FROM child; | ||||
|  | ||||
| --connection node_2 | ||||
| SET SESSION wsrep_sync_wait=15; | ||||
| SELECT COUNT(*) FROM parent; | ||||
| SELECT COUNT(*) FROM child; | ||||
| # | ||||
| # ALTER TABLE could bf kill one or more of INSERTs to parent, so | ||||
| # the actual number of rows in PARENT depends on whether | ||||
| # the INSERT is committed before ALTER TABLE is executed | ||||
| # | ||||
|  | ||||
| DROP TABLE child; | ||||
| DROP TABLE parent; | ||||
|   | ||||
| @@ -94,11 +94,13 @@ SELECT * FROM t1; | ||||
| --eval SET GLOBAL wsrep_auto_increment_control = $auto_increment_control_orig | ||||
| --eval SET GLOBAL auto_increment_increment = $auto_increment_increment_node1 | ||||
| --eval SET GLOBAL auto_increment_offset = $auto_increment_offset_node1 | ||||
| --disconnect node_1a | ||||
|  | ||||
| --connection node_2 | ||||
| --eval SET GLOBAL wsrep_auto_increment_control = $auto_increment_control_orig | ||||
| --eval SET GLOBAL auto_increment_increment = $auto_increment_increment_node2 | ||||
| --eval SET GLOBAL auto_increment_offset = $auto_increment_offset_node2 | ||||
| --disconnect node_2a | ||||
|  | ||||
| --enable_query_log | ||||
|  | ||||
|   | ||||
| @@ -913,7 +913,7 @@ static my_bool kill_handlerton(THD *thd, plugin_ref plugin, | ||||
| { | ||||
|   handlerton *hton= plugin_hton(plugin); | ||||
| 
 | ||||
|   mysql_mutex_assert_owner(&thd->LOCK_thd_data); | ||||
|   mysql_mutex_assert_owner(&thd->LOCK_thd_kill); | ||||
|   if (hton->kill_query && thd_get_ha_data(thd, hton)) | ||||
|     hton->kill_query(hton, thd, *(enum thd_kill_levels *) level); | ||||
|   return FALSE; | ||||
|   | ||||
| @@ -1,4 +1,4 @@ | ||||
| /* Copyright 2018 Codership Oy <info@codership.com>
 | ||||
| /* Copyright 2018-2021 Codership Oy <info@codership.com>
 | ||||
| 
 | ||||
|    This program is free software; you can redistribute it and/or modify | ||||
|    it under the terms of the GNU General Public License as published by | ||||
| @@ -29,12 +29,14 @@ extern "C" my_bool wsrep_on(const THD *thd) | ||||
| 
 | ||||
| extern "C" void wsrep_thd_LOCK(const THD *thd) | ||||
| { | ||||
|   mysql_mutex_lock(&thd->LOCK_thd_kill); | ||||
|   mysql_mutex_lock(&thd->LOCK_thd_data); | ||||
| } | ||||
| 
 | ||||
| extern "C" void wsrep_thd_UNLOCK(const THD *thd) | ||||
| { | ||||
|   mysql_mutex_unlock(&thd->LOCK_thd_data); | ||||
|   mysql_mutex_unlock(&thd->LOCK_thd_kill); | ||||
| } | ||||
| 
 | ||||
| extern "C" void wsrep_thd_kill_LOCK(const THD *thd) | ||||
| @@ -188,6 +190,8 @@ extern "C" void wsrep_handle_SR_rollback(THD *bf_thd, | ||||
|   DBUG_ASSERT(wsrep_thd_is_SR(victim_thd)); | ||||
|   if (!victim_thd || !wsrep_on(bf_thd)) return; | ||||
| 
 | ||||
|   wsrep_thd_LOCK(victim_thd); | ||||
| 
 | ||||
|   WSREP_DEBUG("handle rollback, for deadlock: thd %llu trx_id %" PRIu64 " frags %zu conf %s", | ||||
|               victim_thd->thread_id, | ||||
|               victim_thd->wsrep_trx_id(), | ||||
| @@ -208,6 +212,9 @@ extern "C" void wsrep_handle_SR_rollback(THD *bf_thd, | ||||
|   { | ||||
|     wsrep_thd_self_abort(victim_thd); | ||||
|   } | ||||
| 
 | ||||
|   wsrep_thd_UNLOCK(victim_thd); | ||||
| 
 | ||||
|   if (bf_thd) | ||||
|   { | ||||
|     wsrep_store_threadvars(bf_thd); | ||||
| @@ -217,6 +224,9 @@ extern "C" void wsrep_handle_SR_rollback(THD *bf_thd, | ||||
| extern "C" my_bool wsrep_thd_bf_abort(THD *bf_thd, THD *victim_thd, | ||||
|                                       my_bool signal) | ||||
| { | ||||
|   mysql_mutex_assert_owner(&victim_thd->LOCK_thd_kill); | ||||
|   mysql_mutex_assert_owner(&victim_thd->LOCK_thd_data); | ||||
| 
 | ||||
|   DBUG_EXECUTE_IF("sync.before_wsrep_thd_abort", | ||||
|                  { | ||||
|                    const char act[]= | ||||
| @@ -233,28 +243,26 @@ extern "C" my_bool wsrep_thd_bf_abort(THD *bf_thd, THD *victim_thd, | ||||
|     have wsrep on. Note that this should never interrupt RSU | ||||
|     as RSU has paused the provider. | ||||
|    */ | ||||
|   mysql_mutex_assert_owner(&victim_thd->LOCK_thd_data); | ||||
|   mysql_mutex_assert_owner(&victim_thd->LOCK_thd_kill); | ||||
| 
 | ||||
|   if ((ret || !wsrep_on(victim_thd)) && signal) | ||||
|   { | ||||
|     mysql_mutex_assert_not_owner(&victim_thd->LOCK_thd_data); | ||||
|     mysql_mutex_assert_not_owner(&victim_thd->LOCK_thd_kill); | ||||
|     mysql_mutex_lock(&victim_thd->LOCK_thd_data); | ||||
| 
 | ||||
|     if (victim_thd->wsrep_aborter && victim_thd->wsrep_aborter != bf_thd->thread_id) | ||||
|     { | ||||
|       WSREP_DEBUG("victim is killed already by %llu, skipping awake", | ||||
|                   victim_thd->wsrep_aborter); | ||||
|       mysql_mutex_unlock(&victim_thd->LOCK_thd_data); | ||||
|       wsrep_thd_UNLOCK(victim_thd); | ||||
|       return false; | ||||
|     } | ||||
| 
 | ||||
|     mysql_mutex_lock(&victim_thd->LOCK_thd_kill); | ||||
|     victim_thd->wsrep_aborter= bf_thd->thread_id; | ||||
|     victim_thd->awake_no_mutex(KILL_QUERY); | ||||
|     mysql_mutex_unlock(&victim_thd->LOCK_thd_kill); | ||||
|     mysql_mutex_unlock(&victim_thd->LOCK_thd_data); | ||||
|   } else { | ||||
|     WSREP_DEBUG("wsrep_thd_bf_abort skipped awake"); | ||||
|   } | ||||
|   else | ||||
|     WSREP_DEBUG("wsrep_thd_bf_abort skipped awake for %llu", thd_get_thread_id(victim_thd)); | ||||
| 
 | ||||
|   wsrep_thd_UNLOCK(victim_thd); | ||||
|   return ret; | ||||
| } | ||||
| 
 | ||||
| @@ -279,8 +287,6 @@ extern "C" my_bool wsrep_thd_order_before(const THD *left, const THD *right) | ||||
| 
 | ||||
| extern "C" my_bool wsrep_thd_is_aborting(const MYSQL_THD thd) | ||||
| { | ||||
|   mysql_mutex_assert_owner(&thd->LOCK_thd_data); | ||||
| 
 | ||||
|   const wsrep::client_state& cs(thd->wsrep_cs()); | ||||
|   const enum wsrep::transaction::state tx_state(cs.transaction().state()); | ||||
|   switch (tx_state) | ||||
| @@ -294,8 +300,6 @@ extern "C" my_bool wsrep_thd_is_aborting(const MYSQL_THD thd) | ||||
|     default: | ||||
|       return false; | ||||
|   } | ||||
| 
 | ||||
|   return false; | ||||
| } | ||||
| 
 | ||||
| static inline enum wsrep::key::type | ||||
|   | ||||
| @@ -1072,8 +1072,8 @@ terminate_slave_thread(THD *thd, | ||||
|     int error __attribute__((unused)); | ||||
|     DBUG_PRINT("loop", ("killing slave thread")); | ||||
| 
 | ||||
|     mysql_mutex_lock(&thd->LOCK_thd_data); | ||||
|     mysql_mutex_lock(&thd->LOCK_thd_kill); | ||||
|     mysql_mutex_lock(&thd->LOCK_thd_data); | ||||
| #ifndef DONT_USE_THR_ALARM | ||||
|     /*
 | ||||
|       Error codes from pthread_kill are: | ||||
|   | ||||
| @@ -826,6 +826,7 @@ THD::THD(my_thread_id id, bool is_wsrep_applier) | ||||
|   mysql_mutex_init(key_LOCK_wakeup_ready, &LOCK_wakeup_ready, MY_MUTEX_INIT_FAST); | ||||
|   mysql_mutex_init(key_LOCK_thd_kill, &LOCK_thd_kill, MY_MUTEX_INIT_FAST); | ||||
|   mysql_cond_init(key_COND_wakeup_ready, &COND_wakeup_ready, 0); | ||||
|   mysql_mutex_record_order(&LOCK_thd_kill, &LOCK_thd_data); | ||||
| 
 | ||||
|   /* Variables with default values */ | ||||
|   proc_info="login"; | ||||
| @@ -1883,7 +1884,6 @@ void THD::awake_no_mutex(killed_state state_to_set) | ||||
|   DBUG_PRINT("enter", ("this: %p current_thd: %p  state: %d", | ||||
|                        this, current_thd, (int) state_to_set)); | ||||
|   THD_CHECK_SENTRY(this); | ||||
|   mysql_mutex_assert_owner(&LOCK_thd_data); | ||||
|   mysql_mutex_assert_owner(&LOCK_thd_kill); | ||||
| 
 | ||||
|   print_aborted_warning(3, "KILLED"); | ||||
| @@ -2048,6 +2048,8 @@ bool THD::notify_shared_lock(MDL_context_owner *ctx_in_use, | ||||
| 
 | ||||
|   if (needs_thr_lock_abort) | ||||
|   { | ||||
|     bool mutex_released= false; | ||||
|     mysql_mutex_lock(&in_use->LOCK_thd_kill); | ||||
|     mysql_mutex_lock(&in_use->LOCK_thd_data); | ||||
|     /* If not already dying */ | ||||
|     if (in_use->killed != KILL_CONNECTION_HARD) | ||||
| @@ -2064,12 +2066,25 @@ bool THD::notify_shared_lock(MDL_context_owner *ctx_in_use, | ||||
|           thread can see those instances (e.g. see partitioning code). | ||||
|         */ | ||||
|         if (!thd_table->needs_reopen()) | ||||
|         { | ||||
|           signalled|= mysql_lock_abort_for_thread(this, thd_table); | ||||
|         } | ||||
|       } | ||||
| #ifdef WITH_WSREP | ||||
|       if (WSREP(this) && wsrep_thd_is_BF(this, false)) | ||||
|       { | ||||
|         WSREP_DEBUG("notify_shared_lock: BF thread %llu query %s" | ||||
|                     " victim %llu query %s", | ||||
|                     this->real_id, wsrep_thd_query(this), | ||||
|                     in_use->real_id, wsrep_thd_query(in_use)); | ||||
|         wsrep_abort_thd(this, in_use, false); | ||||
|         mutex_released= true; | ||||
|       } | ||||
| #endif /* WITH_WSREP */ | ||||
|     } | ||||
|     if (!mutex_released) | ||||
|     { | ||||
|       mysql_mutex_unlock(&in_use->LOCK_thd_data); | ||||
|       mysql_mutex_unlock(&in_use->LOCK_thd_kill); | ||||
|     } | ||||
|     mysql_mutex_unlock(&in_use->LOCK_thd_data); | ||||
|   } | ||||
|   DBUG_RETURN(signalled); | ||||
| } | ||||
| @@ -5288,11 +5303,14 @@ thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd) | ||||
| #ifdef WITH_WSREP | ||||
|   /* wsrep applier, replayer and TOI processing threads are ordered
 | ||||
|      by replication provider, relaxed GAP locking protocol can be used | ||||
|      between high priority wsrep threads | ||||
|      between high priority wsrep threads. | ||||
|      Note that wsrep_thd_is_BF() doesn't take LOCK_thd_data for either thd, | ||||
|      the caller should guarantee that the BF state won't change. | ||||
|      (e.g. InnoDB does it by keeping lock_sys.mutex locked) | ||||
|   */ | ||||
|   if (WSREP_ON && | ||||
|       wsrep_thd_is_BF(const_cast<THD *>(thd), false) && | ||||
|       wsrep_thd_is_BF(const_cast<THD *>(other_thd), true)) | ||||
|       wsrep_thd_is_BF(const_cast<THD *>(other_thd), false)) | ||||
|     return 0; | ||||
| #endif /* WITH_WSREP */ | ||||
|   rgi= thd->rgi_slave; | ||||
|   | ||||
| @@ -3470,11 +3470,11 @@ public: | ||||
|   void awake_no_mutex(killed_state state_to_set); | ||||
|   void awake(killed_state state_to_set) | ||||
|   { | ||||
|     mysql_mutex_lock(&LOCK_thd_data); | ||||
|     mysql_mutex_lock(&LOCK_thd_kill); | ||||
|     mysql_mutex_lock(&LOCK_thd_data); | ||||
|     awake_no_mutex(state_to_set); | ||||
|     mysql_mutex_unlock(&LOCK_thd_kill); | ||||
|     mysql_mutex_unlock(&LOCK_thd_data); | ||||
|     mysql_mutex_unlock(&LOCK_thd_kill); | ||||
|   } | ||||
|   void abort_current_cond_wait(bool force); | ||||
|   | ||||
|   | ||||
| @@ -9248,7 +9248,7 @@ THD *find_thread_by_id(longlong id, bool query_id) | ||||
|   return arg.thd; | ||||
| } | ||||
| 
 | ||||
|     mysql_mutex_lock(&thd->LOCK_thd_data); | ||||
| 
 | ||||
| /**
 | ||||
|   kill one thread. | ||||
| 
 | ||||
| @@ -9292,7 +9292,8 @@ kill_one_thread(THD *thd, longlong id, killed_state kill_signal, killed_type typ | ||||
|       faster and do a harder kill than KILL_SYSTEM_THREAD; | ||||
|     */ | ||||
| 
 | ||||
|     mysql_mutex_lock(&tmp->LOCK_thd_data); // for various wsrep* checks below
 | ||||
|     mysql_mutex_lock(&tmp->LOCK_thd_data); // Lock from concurrent usage
 | ||||
| 
 | ||||
| #ifdef WITH_WSREP | ||||
|     if (((thd->security_ctx->master_access & PRIV_KILL_OTHER_USER_PROCESS) || | ||||
|         thd->security_ctx->user_matches(tmp->security_ctx)) && | ||||
| @@ -9307,23 +9308,23 @@ kill_one_thread(THD *thd, longlong id, killed_state kill_signal, killed_type typ | ||||
|       if (tmp->wsrep_aborter && tmp->wsrep_aborter != thd->thread_id) | ||||
|       { | ||||
|         /* victim is in hit list already, bail out */ | ||||
| 	WSREP_DEBUG("victim has wsrep aborter: %lu, skipping awake()", | ||||
|                     tmp->wsrep_aborter); | ||||
| 	WSREP_DEBUG("victim %llu has wsrep aborter: %lu, skipping awake()", | ||||
| 		    id, tmp->wsrep_aborter); | ||||
|         error= 0; | ||||
|       } | ||||
|       else | ||||
| #endif /* WITH_WSREP */ | ||||
|       { | ||||
|         WSREP_DEBUG("kill_one_thread %llu, victim: %llu wsrep_aborter %llu by signal %d", | ||||
|                     thd->thread_id, id, tmp->wsrep_aborter, kill_signal); | ||||
|         WSREP_DEBUG("kill_one_thread victim: %llu wsrep_aborter %lu by signal %d", | ||||
|                     id, tmp->wsrep_aborter, kill_signal); | ||||
|         tmp->awake_no_mutex(kill_signal); | ||||
|         WSREP_DEBUG("victim: %llu taken care of", id); | ||||
|         error= 0; | ||||
|       } | ||||
|     } | ||||
|     else | ||||
|       error= (type == KILL_TYPE_QUERY ? ER_KILL_QUERY_DENIED_ERROR : | ||||
|                                         ER_KILL_DENIED_ERROR); | ||||
| 
 | ||||
|     mysql_mutex_unlock(&tmp->LOCK_thd_data); | ||||
|   } | ||||
|   mysql_mutex_unlock(&tmp->LOCK_thd_kill); | ||||
| @@ -9438,6 +9439,18 @@ static | ||||
| void sql_kill(THD *thd, longlong id, killed_state state, killed_type type) | ||||
| { | ||||
|   uint error; | ||||
| #ifdef WITH_WSREP | ||||
|   if (WSREP(thd)) | ||||
|   { | ||||
|     WSREP_DEBUG("sql_kill called"); | ||||
|     if (thd->wsrep_applier) | ||||
|     { | ||||
|       WSREP_DEBUG("KILL in applying, bailing out here"); | ||||
|       return; | ||||
|     } | ||||
|     WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) | ||||
|   } | ||||
| #endif /* WITH_WSREP */ | ||||
|   if (likely(!(error= kill_one_thread(thd, id, state, type)))) | ||||
|   { | ||||
|     if (!thd->killed) | ||||
| @@ -9447,6 +9460,11 @@ void sql_kill(THD *thd, longlong id, killed_state state, killed_type type) | ||||
|   } | ||||
|   else | ||||
|     my_error(error, MYF(0), id); | ||||
| #ifdef WITH_WSREP | ||||
|   return; | ||||
|  wsrep_error_label: | ||||
|   my_error(ER_CANNOT_USER, MYF(0), wsrep_thd_query(thd)); | ||||
| #endif /* WITH_WSREP */ | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| @@ -9455,6 +9473,18 @@ sql_kill_user(THD *thd, LEX_USER *user, killed_state state) | ||||
| { | ||||
|   uint error; | ||||
|   ha_rows rows; | ||||
| #ifdef WITH_WSREP | ||||
|   if (WSREP(thd)) | ||||
|   { | ||||
|     WSREP_DEBUG("sql_kill_user called"); | ||||
|     if (thd->wsrep_applier) | ||||
|     { | ||||
|       WSREP_DEBUG("KILL in applying, bailing out here"); | ||||
|       return; | ||||
|     } | ||||
|     WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) | ||||
|   } | ||||
| #endif /* WITH_WSREP */ | ||||
|   if (likely(!(error= kill_threads_for_user(thd, user, state, &rows)))) | ||||
|     my_ok(thd, rows); | ||||
|   else | ||||
| @@ -9465,6 +9495,11 @@ sql_kill_user(THD *thd, LEX_USER *user, killed_state state) | ||||
|     */ | ||||
|     my_error(error, MYF(0), user->host.str, user->user.str); | ||||
|   } | ||||
| #ifdef WITH_WSREP | ||||
|   return; | ||||
|  wsrep_error_label: | ||||
|   my_error(ER_CANNOT_USER, MYF(0), user->user.str); | ||||
| #endif /* WITH_WSREP */ | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
|   | ||||
| @@ -3498,8 +3498,8 @@ static my_bool kill_callback(THD *thd, kill_callback_arg *arg) | ||||
|       thd->variables.server_id == arg->slave_server_id) | ||||
|   { | ||||
|     arg->thd= thd; | ||||
|     mysql_mutex_lock(&thd->LOCK_thd_data); | ||||
|     mysql_mutex_lock(&thd->LOCK_thd_kill);    // Lock from delete
 | ||||
|     mysql_mutex_lock(&thd->LOCK_thd_data); | ||||
|     return 1; | ||||
|   } | ||||
|   return 0; | ||||
|   | ||||
| @@ -68,20 +68,15 @@ bool Wsrep_client_service::interrupted( | ||||
|   wsrep::unique_lock<wsrep::mutex>& lock WSREP_UNUSED) const | ||||
| { | ||||
|   DBUG_ASSERT(m_thd == current_thd); | ||||
|   /* Underlying mutex in lock object points to LOCK_thd_data, which
 | ||||
|      protects m_thd->wsrep_trx(), LOCK_thd_kill protects m_thd->killed. | ||||
|      Locking order is: | ||||
|      1) LOCK_thd_data | ||||
|      2) LOCK_thd_kill */ | ||||
|   /* Underlying mutex in lock object points to THD::LOCK_thd_data, which
 | ||||
|   protects m_thd->wsrep_trx() and protects us from thd delete. */ | ||||
|   mysql_mutex_assert_owner(static_cast<mysql_mutex_t*>(lock.mutex()->native())); | ||||
|   mysql_mutex_lock(&m_thd->LOCK_thd_kill); | ||||
|   bool ret= (m_thd->killed != NOT_KILLED); | ||||
|   if (ret) | ||||
|   { | ||||
|     WSREP_DEBUG("wsrep state is interrupted, THD::killed %d trx state %d", | ||||
|                 m_thd->killed,  m_thd->wsrep_trx().state()); | ||||
|   } | ||||
|   mysql_mutex_unlock(&m_thd->LOCK_thd_kill); | ||||
|   return ret; | ||||
| } | ||||
| 
 | ||||
|   | ||||
| @@ -2134,6 +2134,11 @@ static int wsrep_TOI_event_buf(THD* thd, uchar** buf, size_t* buf_len) | ||||
|   case SQLCOM_DROP_TABLE: | ||||
|     err= wsrep_drop_table_query(thd, buf, buf_len); | ||||
|     break; | ||||
|   case SQLCOM_KILL: | ||||
|     WSREP_DEBUG("KILL as TOI: %s", thd->query()); | ||||
|     err= wsrep_to_buf_helper(thd, thd->query(), thd->query_length(), | ||||
|                              buf, buf_len); | ||||
|     break; | ||||
|   case SQLCOM_CREATE_ROLE: | ||||
|     if (sp_process_definer(thd)) | ||||
|     { | ||||
| @@ -2547,7 +2552,11 @@ void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx, | ||||
|                   request_thd, granted_thd); | ||||
|     ticket->wsrep_report(wsrep_debug); | ||||
| 
 | ||||
|     mysql_mutex_lock(&granted_thd->LOCK_thd_data); | ||||
|     /* Here we will call wsrep_abort_transaction so we should hold
 | ||||
|     THD::LOCK_thd_data to protect victim from concurrent usage | ||||
|     and THD::LOCK_thd_kill to protect from disconnect or delete. */ | ||||
|     wsrep_thd_LOCK(granted_thd); | ||||
| 
 | ||||
|     if (wsrep_thd_is_toi(granted_thd) || | ||||
|         wsrep_thd_is_applying(granted_thd)) | ||||
|     { | ||||
| @@ -2555,21 +2564,22 @@ void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx, | ||||
|       { | ||||
|         WSREP_DEBUG("BF thread waiting for SR in aborting state"); | ||||
|         ticket->wsrep_report(wsrep_debug); | ||||
|         mysql_mutex_unlock(&granted_thd->LOCK_thd_data); | ||||
|         wsrep_thd_UNLOCK(granted_thd); | ||||
|       } | ||||
|       else if (wsrep_thd_is_SR(granted_thd) && !wsrep_thd_is_SR(request_thd)) | ||||
|       { | ||||
|         WSREP_MDL_LOG(INFO, "MDL conflict, DDL vs SR",  | ||||
|         WSREP_MDL_LOG(INFO, "MDL conflict, DDL vs SR", | ||||
|                       schema, schema_len, request_thd, granted_thd); | ||||
|         mysql_mutex_unlock(&granted_thd->LOCK_thd_data); | ||||
|         wsrep_abort_thd(request_thd, granted_thd, 1); | ||||
|         mysql_mutex_assert_not_owner(&granted_thd->LOCK_thd_data); | ||||
|         mysql_mutex_assert_not_owner(&granted_thd->LOCK_thd_kill); | ||||
|       } | ||||
|       else | ||||
|       { | ||||
|         WSREP_MDL_LOG(INFO, "MDL BF-BF conflict", schema, schema_len, | ||||
|                       request_thd, granted_thd); | ||||
|         ticket->wsrep_report(true); | ||||
|         mysql_mutex_unlock(&granted_thd->LOCK_thd_data); | ||||
|         wsrep_thd_UNLOCK(granted_thd); | ||||
|         unireg_abort(1); | ||||
|       } | ||||
|     } | ||||
| @@ -2578,15 +2588,16 @@ void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx, | ||||
|     { | ||||
|       WSREP_DEBUG("BF thread waiting for FLUSH"); | ||||
|       ticket->wsrep_report(wsrep_debug); | ||||
|       mysql_mutex_unlock(&granted_thd->LOCK_thd_data); | ||||
|       wsrep_thd_UNLOCK(granted_thd); | ||||
|     } | ||||
|     else if (request_thd->lex->sql_command == SQLCOM_DROP_TABLE) | ||||
|     { | ||||
|       WSREP_DEBUG("DROP caused BF abort, conf %s", | ||||
|                   wsrep_thd_transaction_state_str(granted_thd)); | ||||
|       ticket->wsrep_report(wsrep_debug); | ||||
|       mysql_mutex_unlock(&granted_thd->LOCK_thd_data); | ||||
|       wsrep_abort_thd(request_thd, granted_thd, 1); | ||||
|       mysql_mutex_assert_not_owner(&granted_thd->LOCK_thd_data); | ||||
|       mysql_mutex_assert_not_owner(&granted_thd->LOCK_thd_kill); | ||||
|     } | ||||
|     else | ||||
|     { | ||||
| @@ -2595,8 +2606,9 @@ void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx, | ||||
|       ticket->wsrep_report(wsrep_debug); | ||||
|       if (granted_thd->wsrep_trx().active()) | ||||
|       { | ||||
|         mysql_mutex_unlock(&granted_thd->LOCK_thd_data); | ||||
|         wsrep_abort_thd(request_thd, granted_thd, 1); | ||||
|         wsrep_abort_thd(request_thd, granted_thd, true); | ||||
|         mysql_mutex_assert_not_owner(&granted_thd->LOCK_thd_data); | ||||
|         mysql_mutex_assert_not_owner(&granted_thd->LOCK_thd_kill); | ||||
|       } | ||||
|       else | ||||
|       { | ||||
| @@ -2604,10 +2616,11 @@ void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx, | ||||
|           Granted_thd is likely executing with wsrep_on=0. If the requesting | ||||
|           thd is BF, BF abort and wait. | ||||
|         */ | ||||
|         mysql_mutex_unlock(&granted_thd->LOCK_thd_data); | ||||
|         if (wsrep_thd_is_BF(request_thd, FALSE)) | ||||
|         { | ||||
|           ha_abort_transaction(request_thd, granted_thd, TRUE); | ||||
|           mysql_mutex_assert_not_owner(&granted_thd->LOCK_thd_data); | ||||
|           mysql_mutex_assert_not_owner(&granted_thd->LOCK_thd_kill); | ||||
|         } | ||||
|         else | ||||
|         { | ||||
| @@ -2629,6 +2642,7 @@ void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx, | ||||
| static bool abort_replicated(THD *thd) | ||||
| { | ||||
|   bool ret_code= false; | ||||
|   wsrep_thd_LOCK(thd); | ||||
|   if (thd->wsrep_trx().state() == wsrep::transaction::s_committing) | ||||
|   { | ||||
|     WSREP_DEBUG("aborting replicated trx: %llu", (ulonglong)(thd->real_id)); | ||||
| @@ -2636,6 +2650,9 @@ static bool abort_replicated(THD *thd) | ||||
|     (void)wsrep_abort_thd(thd, thd, TRUE); | ||||
|     ret_code= true; | ||||
|   } | ||||
|   else | ||||
|     wsrep_thd_UNLOCK(thd); | ||||
| 
 | ||||
|   return ret_code; | ||||
| } | ||||
| 
 | ||||
| @@ -2673,8 +2690,10 @@ static my_bool have_client_connections(THD *thd, void*) | ||||
|                      (longlong) thd->thread_id)); | ||||
|   if (is_client_connection(thd) && thd->killed == KILL_CONNECTION) | ||||
|   { | ||||
|     WSREP_DEBUG("Informing thread %lld that it's time to die", | ||||
|                 thd->thread_id); | ||||
|     (void)abort_replicated(thd); | ||||
|     return 1; | ||||
|     return true; | ||||
|   } | ||||
|   return 0; | ||||
| } | ||||
| @@ -2711,6 +2730,8 @@ static my_bool kill_all_threads(THD *thd, THD *caller_thd) | ||||
| { | ||||
|   DBUG_PRINT("quit", ("Informing thread %lld that it's time to die", | ||||
|                       (longlong) thd->thread_id)); | ||||
|   WSREP_DEBUG("Informing thread %lld that it's time to die", | ||||
| 	      thd->thread_id); | ||||
|   /* We skip slave threads & scheduler on this first loop through. */ | ||||
|   if (is_client_connection(thd) && thd != caller_thd) | ||||
|   { | ||||
|   | ||||
| @@ -1,4 +1,4 @@ | ||||
| /* Copyright (C) 2013 Codership Oy <info@codership.com>
 | ||||
| /* Copyright (C) 2013-2021 Codership Oy <info@codership.com>
 | ||||
| 
 | ||||
|    This program is free software; you can redistribute it and/or modify | ||||
|    it under the terms of the GNU General Public License as published by | ||||
| @@ -314,7 +314,8 @@ int wsrep_abort_thd(THD *bf_thd_ptr, THD *victim_thd_ptr, my_bool signal) | ||||
|   THD *victim_thd= (THD *) victim_thd_ptr; | ||||
|   THD *bf_thd= (THD *) bf_thd_ptr; | ||||
| 
 | ||||
|   mysql_mutex_lock(&victim_thd->LOCK_thd_data); | ||||
|   mysql_mutex_assert_owner(&victim_thd->LOCK_thd_data); | ||||
|   mysql_mutex_assert_owner(&victim_thd->LOCK_thd_kill); | ||||
| 
 | ||||
|   /* Note that when you use RSU node is desynced from cluster, thus WSREP(thd)
 | ||||
|   might not be true. | ||||
| @@ -327,16 +328,14 @@ int wsrep_abort_thd(THD *bf_thd_ptr, THD *victim_thd_ptr, my_bool signal) | ||||
|   { | ||||
|       WSREP_DEBUG("wsrep_abort_thd, by: %llu, victim: %llu", (bf_thd) ? | ||||
|                   (long long)bf_thd->real_id : 0, (long long)victim_thd->real_id); | ||||
|       mysql_mutex_unlock(&victim_thd->LOCK_thd_data); | ||||
|       ha_abort_transaction(bf_thd, victim_thd, signal); | ||||
|       mysql_mutex_lock(&victim_thd->LOCK_thd_data); | ||||
|   } | ||||
|   else | ||||
|   { | ||||
|     WSREP_DEBUG("wsrep_abort_thd not effective: %p %p", bf_thd, victim_thd); | ||||
|     wsrep_thd_UNLOCK(victim_thd); | ||||
|   } | ||||
| 
 | ||||
|   mysql_mutex_unlock(&victim_thd->LOCK_thd_data); | ||||
|   DBUG_RETURN(1); | ||||
| } | ||||
| 
 | ||||
| @@ -345,6 +344,9 @@ bool wsrep_bf_abort(THD* bf_thd, THD* victim_thd) | ||||
|   WSREP_LOG_THD(bf_thd, "BF aborter before"); | ||||
|   WSREP_LOG_THD(victim_thd, "victim before"); | ||||
| 
 | ||||
|   mysql_mutex_assert_owner(&victim_thd->LOCK_thd_data); | ||||
|   mysql_mutex_assert_owner(&victim_thd->LOCK_thd_kill); | ||||
| 
 | ||||
|   DBUG_EXECUTE_IF("sync.wsrep_bf_abort", | ||||
|                   { | ||||
|                     const char act[]= | ||||
| @@ -358,7 +360,7 @@ bool wsrep_bf_abort(THD* bf_thd, THD* victim_thd) | ||||
|   if (WSREP(victim_thd) && !victim_thd->wsrep_trx().active()) | ||||
|   { | ||||
|     WSREP_DEBUG("wsrep_bf_abort, BF abort for non active transaction"); | ||||
|     switch (victim_thd->wsrep_trx().state())  | ||||
|     switch (victim_thd->wsrep_trx().state()) | ||||
|     { | ||||
|     case wsrep::transaction::s_aborting: /* fall through */ | ||||
|     case wsrep::transaction::s_aborted: | ||||
| @@ -367,7 +369,13 @@ bool wsrep_bf_abort(THD* bf_thd, THD* victim_thd) | ||||
|     default: | ||||
|       break; | ||||
|     } | ||||
|     /* Test: galera_create_table_as_select. Here we enter wsrep-lib
 | ||||
|     were LOCK_thd_data will be acquired, thus we need to release it. | ||||
|     However, we can still hold LOCK_thd_kill to protect from | ||||
|     disconnect or delete. */ | ||||
|     mysql_mutex_unlock(&victim_thd->LOCK_thd_data); | ||||
|     wsrep_start_transaction(victim_thd, victim_thd->wsrep_next_trx_id()); | ||||
|     mysql_mutex_lock(&victim_thd->LOCK_thd_data); | ||||
|   } | ||||
| 
 | ||||
|   bool ret; | ||||
| @@ -375,11 +383,21 @@ bool wsrep_bf_abort(THD* bf_thd, THD* victim_thd) | ||||
| 
 | ||||
|   if (wsrep_thd_is_toi(bf_thd)) | ||||
|   { | ||||
|     /* Here we enter wsrep-lib were LOCK_thd_data will be acquired,
 | ||||
|     thus we need to release it. However, we can still hold | ||||
|     LOCK_thd_kill to protect from disconnect or delete. */ | ||||
|     mysql_mutex_unlock(&victim_thd->LOCK_thd_data); | ||||
|     ret= victim_thd->wsrep_cs().total_order_bf_abort(bf_seqno); | ||||
|     mysql_mutex_lock(&victim_thd->LOCK_thd_data); | ||||
|   } | ||||
|   else | ||||
|   { | ||||
|     /* Test: mysql-wsrep-features#165. Here we enter wsrep-lib
 | ||||
|     were LOCK_thd_data will be acquired and later LOCK_thd_kill | ||||
|     thus we need to release them. */ | ||||
|     wsrep_thd_UNLOCK(victim_thd); | ||||
|     ret= victim_thd->wsrep_cs().bf_abort(bf_seqno); | ||||
|     wsrep_thd_LOCK(victim_thd); | ||||
|   } | ||||
|   if (ret) | ||||
|   { | ||||
|   | ||||
| @@ -4761,7 +4761,7 @@ static void innobase_kill_query(handlerton*, THD *thd, enum thd_kill_levels) | ||||
|   { | ||||
|     ut_ad(trx->mysql_thd == thd); | ||||
| #ifdef WITH_WSREP | ||||
|     if (trx->is_wsrep() && wsrep_thd_is_aborting(thd)) | ||||
|     if (wsrep_thd_is_aborting(thd) || trx->lock.was_chosen_as_wsrep_victim) | ||||
|       /* if victim has been signaled by BF thread and/or aborting is already
 | ||||
|       progressing, following query aborting is not necessary any more. | ||||
|       Also, BF thread should own trx mutex for the victim. */ | ||||
| @@ -4771,6 +4771,8 @@ static void innobase_kill_query(handlerton*, THD *thd, enum thd_kill_levels) | ||||
|     if (lock_t *lock= trx->lock.wait_lock) | ||||
|     { | ||||
|       trx_mutex_enter(trx); | ||||
|       if (trx->is_wsrep() && wsrep_thd_is_aborting(thd)) | ||||
|         trx->lock.was_chosen_as_deadlock_victim= TRUE; | ||||
|       lock_cancel_waiting_and_release(lock); | ||||
|       trx_mutex_exit(trx); | ||||
|     } | ||||
| @@ -18639,6 +18641,40 @@ static struct st_mysql_storage_engine innobase_storage_engine= | ||||
| 
 | ||||
| #ifdef WITH_WSREP | ||||
| 
 | ||||
| static | ||||
| void | ||||
| wsrep_kill_victim( | ||||
| 	MYSQL_THD const bf_thd, | ||||
| 	MYSQL_THD thd, | ||||
| 	trx_t* victim_trx, | ||||
| 	my_bool signal) | ||||
| { | ||||
|   DBUG_ENTER("wsrep_kill_victim"); | ||||
| 
 | ||||
|   /* Mark transaction as a victim for Galera abort */ | ||||
|   victim_trx->lock.was_chosen_as_wsrep_victim= true; | ||||
|   if (wsrep_thd_set_wsrep_aborter(bf_thd, thd)) | ||||
|   { | ||||
|     WSREP_DEBUG("innodb kill transaction skipped due to wsrep_aborter set"); | ||||
|     wsrep_thd_UNLOCK(thd); | ||||
|     DBUG_VOID_RETURN; | ||||
|   } | ||||
| 
 | ||||
|   if (wsrep_thd_bf_abort(bf_thd, thd, signal)) | ||||
|   { | ||||
|     lock_t*  wait_lock= victim_trx->lock.wait_lock; | ||||
|     if (wait_lock) | ||||
|     { | ||||
|       DBUG_ASSERT(victim_trx->is_wsrep()); | ||||
|       WSREP_DEBUG("victim has wait flag: %lu", thd_get_thread_id(thd)); | ||||
|       victim_trx->lock.was_chosen_as_deadlock_victim= TRUE; | ||||
|       lock_cancel_waiting_and_release(wait_lock); | ||||
|     } | ||||
|   } | ||||
| 
 | ||||
|   DBUG_VOID_RETURN; | ||||
| } | ||||
| 
 | ||||
| /** This function is used to kill one transaction.
 | ||||
| 
 | ||||
| This transaction was open on this node (not-yet-committed), and a | ||||
| @@ -18662,87 +18698,65 @@ comparison as in the local certification failure. | ||||
| @param[in]	bf_thd		Brute force (BF) thread | ||||
| @param[in,out]	victim_trx	Vimtim trx to be killed | ||||
| @param[in]	signal		Should victim be signaled */ | ||||
| UNIV_INTERN | ||||
| void | ||||
| wsrep_innobase_kill_one_trx( | ||||
| 	THD* bf_thd, | ||||
| 	MYSQL_THD const bf_thd, | ||||
| 	trx_t *victim_trx, | ||||
| 	bool signal) | ||||
| 	my_bool signal) | ||||
| { | ||||
| 	ut_ad(bf_thd); | ||||
| 	ut_ad(victim_trx); | ||||
| 	ut_ad(lock_mutex_own()); | ||||
| 	ut_ad(trx_mutex_own(victim_trx)); | ||||
|   ut_ad(bf_thd); | ||||
|   ut_ad(victim_trx); | ||||
|   ut_ad(lock_mutex_own()); | ||||
|   ut_ad(trx_mutex_own(victim_trx)); | ||||
| 
 | ||||
| 	DBUG_ENTER("wsrep_innobase_kill_one_trx"); | ||||
|   DBUG_ENTER("wsrep_innobase_kill_one_trx"); | ||||
|   THD *thd= (THD *) victim_trx->mysql_thd; | ||||
|   /* Note that bf_trx might not exist here e.g. on MDL conflict
 | ||||
|   case (test: galera_concurrent_ctas).*/ | ||||
|   trx_t* bf_trx= (trx_t*)thd_to_trx(bf_thd); | ||||
| 
 | ||||
| 	THD *thd= (THD *) victim_trx->mysql_thd; | ||||
| 	ut_ad(thd); | ||||
| 	/* Note that bf_trx might not exist here e.g. on MDL conflict
 | ||||
| 	case (test: galera_concurrent_ctas). Similarly, BF thread | ||||
| 	could be also acquiring MDL-lock causing victim to be | ||||
| 	aborted. However, we have not yet called innobase_trx_init() | ||||
| 	for BF transaction (test: galera_many_columns)*/ | ||||
| 	trx_t* bf_trx= thd_to_trx(bf_thd); | ||||
| 	DBUG_ASSERT(wsrep_on(bf_thd)); | ||||
|   if (!thd) | ||||
|   { | ||||
|     WSREP_WARN("no THD for trx: " TRX_ID_FMT, victim_trx->id); | ||||
|     DBUG_VOID_RETURN; | ||||
|   } | ||||
| 
 | ||||
| 	wsrep_thd_LOCK(thd); | ||||
|   /* Here we need to lock THD::LOCK_thd_data to protect from
 | ||||
|   concurrent usage or disconnect or delete. */ | ||||
|   DEBUG_SYNC(bf_thd, "wsrep_before_BF_victim_lock"); | ||||
|   wsrep_thd_LOCK(thd); | ||||
|   DEBUG_SYNC(bf_thd, "wsrep_after_BF_victim_lock"); | ||||
| 
 | ||||
| 	WSREP_LOG_CONFLICT(bf_thd, thd, TRUE); | ||||
|   WSREP_LOG_CONFLICT(bf_thd, thd, TRUE); | ||||
| 
 | ||||
| 	WSREP_DEBUG("Aborter %s trx_id: " TRX_ID_FMT " thread: %ld " | ||||
| 		"seqno: %lld client_state: %s client_mode: %s transaction_mode: %s " | ||||
| 		"query: %s", | ||||
| 		wsrep_thd_is_BF(bf_thd, false) ? "BF" : "normal", | ||||
| 		bf_trx ? bf_trx->id : TRX_ID_MAX, | ||||
| 		thd_get_thread_id(bf_thd), | ||||
| 		wsrep_thd_trx_seqno(bf_thd), | ||||
| 		wsrep_thd_client_state_str(bf_thd), | ||||
| 		wsrep_thd_client_mode_str(bf_thd), | ||||
| 		wsrep_thd_transaction_state_str(bf_thd), | ||||
| 		wsrep_thd_query(bf_thd)); | ||||
|   WSREP_DEBUG("wsrep_innobase_kill_one_trx: Aborter %s " | ||||
| 	      "trx_id: " TRX_ID_FMT " thread: %ld " | ||||
| 	      "seqno: %lld client_state: %s client_mode: %s " | ||||
| 	      "trx_state %s query: %s", | ||||
| 	      wsrep_thd_is_BF(bf_thd, false) ? "BF" : "normal", | ||||
| 	      bf_trx ? bf_trx->id : TRX_ID_MAX, | ||||
| 	      thd_get_thread_id(bf_thd), | ||||
| 	      wsrep_thd_trx_seqno(bf_thd), | ||||
| 	      wsrep_thd_client_state_str(bf_thd), | ||||
| 	      wsrep_thd_client_mode_str(bf_thd), | ||||
| 	      wsrep_thd_transaction_state_str(bf_thd), | ||||
| 	      wsrep_thd_query(bf_thd)); | ||||
| 
 | ||||
| 	WSREP_DEBUG("Victim %s trx_id: " TRX_ID_FMT " thread: %ld " | ||||
| 		"seqno: %lld client_state: %s  client_mode: %s transaction_mode: %s " | ||||
| 		"query: %s", | ||||
| 		wsrep_thd_is_BF(thd, false) ? "BF" : "normal", | ||||
| 		victim_trx->id, | ||||
| 		thd_get_thread_id(thd), | ||||
| 		wsrep_thd_trx_seqno(thd), | ||||
| 		wsrep_thd_client_state_str(thd), | ||||
| 		wsrep_thd_client_mode_str(thd), | ||||
| 		wsrep_thd_transaction_state_str(thd), | ||||
| 		wsrep_thd_query(thd)); | ||||
|   WSREP_DEBUG("wsrep_innobase_kill_one_trx: Victim %s " | ||||
| 	      "trx_id: " TRX_ID_FMT " thread: %ld " | ||||
| 	      "seqno: %lld client_state: %s client_mode: %s " | ||||
| 	      "trx_state %s query: %s", | ||||
| 	      wsrep_thd_is_BF(thd, false) ? "BF" : "normal", | ||||
| 	      victim_trx->id, | ||||
| 	      thd_get_thread_id(thd), | ||||
| 	      wsrep_thd_trx_seqno(thd), | ||||
| 	      wsrep_thd_client_state_str(thd), | ||||
| 	      wsrep_thd_client_mode_str(thd), | ||||
| 	      wsrep_thd_transaction_state_str(thd), | ||||
| 	      wsrep_thd_query(thd)); | ||||
| 
 | ||||
| 	/* Mark transaction as a victim for Galera abort */ | ||||
| 	victim_trx->lock.was_chosen_as_wsrep_victim= true; | ||||
| 	if (wsrep_thd_set_wsrep_aborter(bf_thd, thd)) | ||||
| 	{ | ||||
| 	  WSREP_DEBUG("innodb kill transaction skipped due to wsrep_aborter set"); | ||||
| 	  wsrep_thd_UNLOCK(thd); | ||||
| 	  DBUG_VOID_RETURN; | ||||
| 	} | ||||
| 
 | ||||
| 	/* Note that we need to release this as it will be acquired
 | ||||
| 	below in wsrep-lib */ | ||||
| 	wsrep_thd_UNLOCK(thd); | ||||
| 	DEBUG_SYNC(bf_thd, "before_wsrep_thd_abort"); | ||||
| 
 | ||||
| 	if (wsrep_thd_bf_abort(bf_thd, thd, signal)) | ||||
| 	{ | ||||
| 		lock_t*  wait_lock = victim_trx->lock.wait_lock; | ||||
| 		if (wait_lock) { | ||||
| 			DBUG_ASSERT(victim_trx->is_wsrep()); | ||||
| 			WSREP_DEBUG("victim has wait flag: %lu", | ||||
| 				    thd_get_thread_id(thd)); | ||||
| 
 | ||||
| 			WSREP_DEBUG("canceling wait lock"); | ||||
| 			victim_trx->lock.was_chosen_as_deadlock_victim= TRUE; | ||||
| 			lock_cancel_waiting_and_release(wait_lock); | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	DBUG_VOID_RETURN; | ||||
|   wsrep_kill_victim(bf_thd, thd, victim_trx, signal); | ||||
|   DBUG_VOID_RETURN; | ||||
| } | ||||
| 
 | ||||
| /** This function forces the victim transaction to abort. Aborting the
 | ||||
| @@ -18762,29 +18776,42 @@ wsrep_abort_transaction( | ||||
| 	THD *victim_thd, | ||||
| 	my_bool signal) | ||||
| { | ||||
| 	DBUG_ENTER("wsrep_abort_transaction"); | ||||
| 	ut_ad(bf_thd); | ||||
| 	ut_ad(victim_thd); | ||||
|   /* Note that victim thd is protected with
 | ||||
|   THD::LOCK_thd_data and THD::LOCK_thd_kill here. */ | ||||
|   trx_t* victim_trx= thd_to_trx(victim_thd); | ||||
|   trx_t* bf_trx= thd_to_trx(bf_thd); | ||||
|   WSREP_DEBUG("wsrep_abort_transaction: BF:" | ||||
| 	      " thread %ld client_state %s client_mode %s" | ||||
| 	      " trans_state %s query %s trx " TRX_ID_FMT, | ||||
| 	      thd_get_thread_id(bf_thd), | ||||
| 	      wsrep_thd_client_state_str(bf_thd), | ||||
| 	      wsrep_thd_client_mode_str(bf_thd), | ||||
| 	      wsrep_thd_transaction_state_str(bf_thd), | ||||
| 	      wsrep_thd_query(bf_thd), | ||||
| 	      bf_trx ? bf_trx->id : 0); | ||||
| 
 | ||||
| 	trx_t* victim_trx	= thd_to_trx(victim_thd); | ||||
|   WSREP_DEBUG("wsrep_abort_transaction: victim:" | ||||
| 	      " thread %ld client_state %s client_mode %s" | ||||
| 	      " trans_state %s query %s trx " TRX_ID_FMT, | ||||
| 	      thd_get_thread_id(victim_thd), | ||||
| 	      wsrep_thd_client_state_str(victim_thd), | ||||
| 	      wsrep_thd_client_mode_str(victim_thd), | ||||
| 	      wsrep_thd_transaction_state_str(victim_thd), | ||||
| 	      wsrep_thd_query(victim_thd), | ||||
| 	      victim_trx ? victim_trx->id : 0); | ||||
| 
 | ||||
| 	WSREP_DEBUG("abort transaction: BF: %s victim: %s victim conf: %s", | ||||
| 			wsrep_thd_query(bf_thd), | ||||
| 			wsrep_thd_query(victim_thd), | ||||
| 			wsrep_thd_transaction_state_str(victim_thd)); | ||||
| 
 | ||||
| 	if (victim_trx) { | ||||
| 		lock_mutex_enter(); | ||||
| 		trx_mutex_enter(victim_trx); | ||||
| 		wsrep_innobase_kill_one_trx(bf_thd, victim_trx, signal); | ||||
| 		trx_mutex_exit(victim_trx); | ||||
| 		lock_mutex_exit(); | ||||
| 		DBUG_VOID_RETURN; | ||||
| 	} else { | ||||
| 		wsrep_thd_bf_abort(bf_thd, victim_thd, signal); | ||||
| 	} | ||||
| 
 | ||||
| 	DBUG_VOID_RETURN; | ||||
|   if (victim_trx) | ||||
|   { | ||||
|     lock_mutex_enter(); | ||||
|     trx_mutex_enter(victim_trx); | ||||
|     wsrep_kill_victim(bf_thd, victim_thd, victim_trx, signal); | ||||
|     lock_mutex_exit(); | ||||
|     trx_mutex_exit(victim_trx); | ||||
|   } | ||||
|   else | ||||
|   { | ||||
|     wsrep_thd_bf_abort(bf_thd, victim_thd, signal); | ||||
|   } | ||||
| } | ||||
| 
 | ||||
| static | ||||
|   | ||||
| @@ -208,7 +208,11 @@ innobase_casedn_str( | ||||
| 	char*	a);	/*!< in/out: string to put in lower case */ | ||||
| 
 | ||||
| #ifdef WITH_WSREP | ||||
| void wsrep_innobase_kill_one_trx(THD *bf_thd, trx_t *victim_trx, bool signal); | ||||
| void | ||||
| wsrep_innobase_kill_one_trx( | ||||
| 	THD* bf_thd, | ||||
| 	trx_t *victim_trx, | ||||
| 	my_bool signal); | ||||
| ulint wsrep_innobase_mysql_sort(int mysql_type, uint charset_number, | ||||
|                              unsigned char* str, ulint str_length, | ||||
|                              ulint buf_length); | ||||
|   | ||||
| @@ -1,7 +1,7 @@ | ||||
| /*****************************************************************************
 | ||||
| 
 | ||||
| Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. | ||||
| Copyright (c) 2014, 2020, MariaDB Corporation. | ||||
| Copyright (c) 2014, 2021, MariaDB Corporation. | ||||
| 
 | ||||
| This program is free software; you can redistribute it and/or modify it under | ||||
| the terms of the GNU General Public License as published by the Free Software | ||||
| @@ -185,13 +185,11 @@ lock_wait_table_reserve_slot( | ||||
| check if lock timeout was for priority thread, | ||||
| as a side effect trigger lock monitor | ||||
| @param[in]    trx    transaction owning the lock | ||||
| @param[in]    locked true if trx and lock_sys.mutex is ownd | ||||
| @return	false for regular lock timeout */ | ||||
| static | ||||
| bool | ||||
| wsrep_is_BF_lock_timeout( | ||||
| 	const trx_t*	trx, | ||||
| 	bool		locked = true) | ||||
| 	const trx_t*	trx) | ||||
| { | ||||
| 	bool long_wait= (trx->error_state != DB_DEADLOCK && | ||||
| 			 srv_monitor_timer && trx->is_wsrep() && | ||||
| @@ -205,21 +203,10 @@ wsrep_is_BF_lock_timeout( | ||||
| 		ib::info() << "WSREP: BF lock wait long for trx:" << trx->id | ||||
| 			   << " query: " << wsrep_thd_query(trx->mysql_thd); | ||||
| 
 | ||||
| 		if (!locked) | ||||
| 			lock_mutex_enter(); | ||||
| 
 | ||||
| 		ut_ad(lock_mutex_own()); | ||||
| 
 | ||||
| 		trx_print_latched(stderr, trx, 3000); | ||||
| 		/* Note this will release lock_sys mutex */ | ||||
| 		lock_print_info_all_transactions(stderr); | ||||
| 
 | ||||
| 		if (locked) | ||||
| 			lock_mutex_enter(); | ||||
| 
 | ||||
| 		return was_wait; | ||||
| 	} else | ||||
| 	} else { | ||||
| 		return false; | ||||
| 	} | ||||
| } | ||||
| #endif /* WITH_WSREP */ | ||||
| 
 | ||||
| @@ -388,7 +375,7 @@ lock_wait_suspend_thread( | ||||
| 	    && wait_time > (double) lock_wait_timeout | ||||
| #ifdef WITH_WSREP | ||||
| 	    && (!trx->is_wsrep() | ||||
| 		|| (!wsrep_is_BF_lock_timeout(trx, false) | ||||
| 		|| (!wsrep_is_BF_lock_timeout(trx) | ||||
| 		    && trx->error_state != DB_DEADLOCK)) | ||||
| #endif /* WITH_WSREP */ | ||||
| 	    ) { | ||||
|   | ||||
		Reference in New Issue
	
	Block a user