1
0
mirror of https://github.com/MariaDB/server.git synced 2025-07-11 15:22:09 +03:00

MDEV-23328 Server hang due to Galera lock conflict resolution

Mutex order violation when wsrep bf thread kills a conflicting trx,
the stack is

          wsrep_thd_LOCK()
          wsrep_kill_victim()
          lock_rec_other_has_conflicting()
          lock_clust_rec_read_check_and_lock()
          row_search_mvcc()
          ha_innobase::index_read()
          ha_innobase::rnd_pos()
          handler::ha_rnd_pos()
          handler::rnd_pos_by_record()
          handler::ha_rnd_pos_by_record()
          Rows_log_event::find_row()
          Update_rows_log_event::do_exec_row()
          Rows_log_event::do_apply_event()
          Log_event::apply_event()
          wsrep_apply_events()

and mutexes are taken in the order

          lock_sys->mutex -> victim_trx->mutex -> victim_thread->LOCK_thd_data

When a normal KILL statement is executed, the stack is

          innobase_kill_query()
          kill_handlerton()
          plugin_foreach_with_mask()
          ha_kill_query()
          THD::awake()
          kill_one_thread()

        and mutexes are

          victim_thread->LOCK_thd_data -> lock_sys->mutex -> victim_trx->mutex

This patch is the plan D variant for fixing potetial mutex locking
order exercised by BF aborting and KILL command execution.

In this approach, KILL command is replicated as TOI operation.
This guarantees total isolation for the KILL command execution
in the first node: there is no concurrent replication applying
and no concurrent DDL executing. Therefore there is no risk of
BF aborting to happen in parallel with KILL command execution
either. Potential mutex deadlocks between the different mutex
access paths with KILL command execution and BF aborting cannot
therefore happen.

TOI replication is used, in this approach,  purely as means
to provide isolated KILL command execution in the first node.
KILL command should not (and must not) be applied in secondary
nodes. In this patch, we make this sure by skipping KILL
execution in secondary nodes, in applying phase, where we
bail out if applier thread is trying to execute KILL command.
This is effective, but skipping the applying of KILL command
could happen much earlier as well.

This also fixed unprotected calls to wsrep_thd_abort
that will use wsrep_abort_transaction. This is fixed
by holding THD::LOCK_thd_data while we abort transaction.

Reviewed-by: Jan Lindström <jan.lindstrom@mariadb.com>
This commit is contained in:
sjaakola
2021-10-21 14:49:51 +03:00
committed by Oleksandr Byelkin
parent d5bc05798f
commit ef2dbb8dbc
21 changed files with 311 additions and 403 deletions

View File

@ -68,9 +68,9 @@ f1 f2 f3
10 10 0 10 10 0
INSERT INTO t1 VALUES (7,7,7); INSERT INTO t1 VALUES (7,7,7);
INSERT INTO t1 VALUES (8,8,8); INSERT INTO t1 VALUES (8,8,8);
DROP TABLE t1; SELECT COUNT(*) FROM t1;
test scenario 2 COUNT(*)
connection node_1; 7
CREATE TABLE t1 (f1 INTEGER PRIMARY KEY, f2 int, f3 int, unique key keyj (f2)); CREATE TABLE t1 (f1 INTEGER PRIMARY KEY, f2 int, f3 int, unique key keyj (f2));
INSERT INTO t1 VALUES (1, 1, 0); INSERT INTO t1 VALUES (1, 1, 0);
INSERT INTO t1 VALUES (3, 3, 0); INSERT INTO t1 VALUES (3, 3, 0);
@ -92,9 +92,9 @@ SET SESSION wsrep_on = 1;
SET GLOBAL wsrep_provider_options = 'dbug='; SET GLOBAL wsrep_provider_options = 'dbug=';
SET GLOBAL wsrep_provider_options = 'dbug=d,commit_monitor_master_enter_sync'; SET GLOBAL wsrep_provider_options = 'dbug=d,commit_monitor_master_enter_sync';
connection node_1; connection node_1;
COMMIT; SELECT COUNT(*) FROM t1;
connection node_1a; COUNT(*)
SET SESSION wsrep_on = 0; 7
SET SESSION wsrep_on = 1; SET SESSION wsrep_on = 1;
SET GLOBAL wsrep_provider_options = 'dbug='; SET GLOBAL wsrep_provider_options = 'dbug=';
SET GLOBAL DEBUG_DBUG = "d,sync.wsrep_replay_cb"; SET GLOBAL DEBUG_DBUG = "d,sync.wsrep_replay_cb";
@ -125,6 +125,7 @@ f1 f2 f3
3 3 1 3 3 1
4 4 2 4 4 2
5 5 2 5 5 2
8 8 8
10 10 0 10 10 0
INSERT INTO t1 VALUES (7,7,7); INSERT INTO t1 VALUES (7,7,7);
INSERT INTO t1 VALUES (8,8,8); INSERT INTO t1 VALUES (8,8,8);

View File

@ -1,54 +0,0 @@
connection node_2;
connection node_1;
connection node_2;
CREATE TABLE t1(a int not null primary key auto_increment,b int) engine=InnoDB;
insert into t1 values (NULL,1);
connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2;
connection node_2a;
truncate t1;
insert into t1 values (1,0);
begin;
update t1 set b=2 where a=1;
connection node_2;
set session wsrep_sync_wait=0;
connect node_2b, 127.0.0.1, root, , test, $NODE_MYPORT_2;
connection node_2b;
SET GLOBAL debug_dbug = "d,sync.before_wsrep_thd_abort";
connection node_1;
select * from t1;
a b
1 0
update t1 set b= 1 where a=1;
connection node_2b;
SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.before_wsrep_thd_abort_reached";
connection node_2;
SET DEBUG_SYNC= 'before_awake_no_mutex SIGNAL awake_reached WAIT_FOR continue_kill';
connection node_2b;
SET DEBUG_SYNC='now WAIT_FOR awake_reached';
SET GLOBAL debug_dbug = "";
SET DEBUG_SYNC = "now SIGNAL signal.before_wsrep_thd_abort";
SET DEBUG_SYNC = "now SIGNAL continue_kill";
connection node_2;
connection node_2a;
select * from t1;
connection node_2;
SET DEBUG_SYNC = "RESET";
drop table t1;
disconnect node_2a;
connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2;
connection node_2a;
CREATE TABLE t1 (i int primary key);
SET DEBUG_SYNC = "before_wsrep_ordered_commit SIGNAL bwoc_reached WAIT_FOR bwoc_continue";
INSERT INTO t1 VALUES (1);
connection node_2;
SET DEBUG_SYNC = "now WAIT_FOR bwoc_reached";
SET DEBUG_SYNC = "now SIGNAL bwoc_continue";
SET DEBUG_SYNC='RESET';
connection node_2a;
connection node_2;
select * from t1;
i
1
disconnect node_2a;
connection node_1;
drop table t1;

View File

@ -23,22 +23,6 @@ connection node_1a;
connection node_1b; connection node_1b;
connection node_2; connection node_2;
connection node_2a; connection node_2a;
connection node_1;
SET SESSION wsrep_sync_wait=15;
SELECT COUNT(*) FROM parent;
COUNT(*)
20001
SELECT COUNT(*) FROM child;
COUNT(*)
10000
connection node_2;
SET SESSION wsrep_sync_wait=15;
SELECT COUNT(*) FROM parent;
COUNT(*)
20001
SELECT COUNT(*) FROM child;
COUNT(*)
10000
DROP TABLE child; DROP TABLE child;
DROP TABLE parent; DROP TABLE parent;
DROP TABLE ten; DROP TABLE ten;

View File

@ -140,6 +140,14 @@ SELECT * FROM t1;
# original state in node 1 # original state in node 1
INSERT INTO t1 VALUES (7,7,7); INSERT INTO t1 VALUES (7,7,7);
INSERT INTO t1 VALUES (8,8,8); INSERT INTO t1 VALUES (8,8,8);
SELECT COUNT(*) FROM t1;
SELECT * FROM t1;
--connection node_1
--let $wait_condition = SELECT COUNT(*) = 7 FROM t1
--source include/wait_condition.inc
SELECT COUNT(*) FROM t1;
SELECT * FROM t1;
DROP TABLE t1; DROP TABLE t1;
@ -199,9 +207,9 @@ INSERT INTO t1 VALUES (5, 5, 2);
--source include/galera_set_sync_point.inc --source include/galera_set_sync_point.inc
--connection node_1 --connection node_1
--send COMMIT --let $wait_condition = SELECT COUNT(*) = 7 FROM t1
--source include/wait_condition.inc
--connection node_1a SELECT COUNT(*) FROM t1;
# wait for the local commit to enter in commit monitor wait state # wait for the local commit to enter in commit monitor wait state
--let $galera_sync_point = apply_monitor_slave_enter_sync commit_monitor_master_enter_sync --let $galera_sync_point = apply_monitor_slave_enter_sync commit_monitor_master_enter_sync
--source include/galera_wait_sync_point.inc --source include/galera_wait_sync_point.inc
@ -273,4 +281,13 @@ SELECT * FROM t1;
INSERT INTO t1 VALUES (7,7,7); INSERT INTO t1 VALUES (7,7,7);
INSERT INTO t1 VALUES (8,8,8); INSERT INTO t1 VALUES (8,8,8);
SELECT COUNT(*) FROM t1;
SELECT * FROM t1;
--connection node_1
--let $wait_condition = SELECT COUNT(*) = 7 FROM t1
--source include/wait_condition.inc
SELECT COUNT(*) FROM t1;
SELECT * FROM t1;
DROP TABLE t1; DROP TABLE t1;

View File

@ -1,7 +0,0 @@
!include ../galera_2nodes.cnf
[mysqld.1]
wsrep-debug=SERVER
[mysqld.2]
wsrep-debug=SERVER

View File

@ -1,140 +0,0 @@
--source include/galera_cluster.inc
--source include/have_innodb.inc
--source include/have_debug.inc
--source include/have_debug_sync.inc
#
# Test case 7:
# 1. Start a transaction on node_2,
# and leave it pending while holding a row locked
# 2. set sync point pause applier
# 3. send a conflicting write on node_1, it will pause
# at the sync point
# 4. though another connection to node_2, kill the local
# transaction
#
--connection node_2
CREATE TABLE t1(a int not null primary key auto_increment,b int) engine=InnoDB;
insert into t1 values (NULL,1);
#
# connection node_2a runs a local transaction, that is victim of BF abort
# and victim of KILL command by connection node_2
#
--connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2
--connection node_2a
truncate t1;
insert into t1 values (1,0);
# start a transaction that will conflict with later applier
begin;
update t1 set b=2 where a=1;
--connection node_2
set session wsrep_sync_wait=0;
--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'root' AND COMMAND = 'Sleep' LIMIT 1
--source include/wait_condition.inc
--let $k_thread = `SELECT ID FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'root' AND COMMAND = 'Sleep' LIMIT 1`
# connection node_2b is for controlling debug syn points
# first set a sync point for applier, to pause during BF aborting
# and before THD::awake would be called
#
--connect node_2b, 127.0.0.1, root, , test, $NODE_MYPORT_2
--connection node_2b
SET GLOBAL debug_dbug = "d,sync.before_wsrep_thd_abort";
#
# replicate an update, which will BF abort the victim node_2a
# however, while applier in node 2 is handling the abort,
# it will pause in sync point set by node_2b
#
--connection node_1
select * from t1;
update t1 set b= 1 where a=1;
#
# wait until the applying of above update has reached the sync point
# in node 2
#
--connection node_2b
SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.before_wsrep_thd_abort_reached";
--connection node_2
#
# pause KILL execution before awake
#
SET DEBUG_SYNC= 'before_awake_no_mutex SIGNAL awake_reached WAIT_FOR continue_kill';
--disable_query_log
--send_eval KILL $k_thread
--enable_query_log
--connection node_2b
SET DEBUG_SYNC='now WAIT_FOR awake_reached';
# release applier and KILL operator
SET GLOBAL debug_dbug = "";
SET DEBUG_SYNC = "now SIGNAL signal.before_wsrep_thd_abort";
SET DEBUG_SYNC = "now SIGNAL continue_kill";
--connection node_2
--reap
--connection node_2a
--error 0,1213
select * from t1;
--connection node_2
SET DEBUG_SYNC = "RESET";
drop table t1;
--disconnect node_2a
#
# Test case 7:
# run a transaction in node 2, and set a sync point to pause the transaction
# in commit phase.
# Through another connection to node 2, kill the committing transaction by
# KILL QUERY command
#
--connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2
--connection node_2a
--let $connection_id = `SELECT CONNECTION_ID()`
CREATE TABLE t1 (i int primary key);
# Set up sync point
SET DEBUG_SYNC = "before_wsrep_ordered_commit SIGNAL bwoc_reached WAIT_FOR bwoc_continue";
# Send insert which will block in the sync point above
--send INSERT INTO t1 VALUES (1)
--connection node_2
SET DEBUG_SYNC = "now WAIT_FOR bwoc_reached";
--disable_query_log
--disable_result_log
# victim has passed the point of no return, kill is not possible anymore
--eval KILL QUERY $connection_id
--enable_result_log
--enable_query_log
SET DEBUG_SYNC = "now SIGNAL bwoc_continue";
SET DEBUG_SYNC='RESET';
--connection node_2a
--error 0,1213
--reap
--connection node_2
# victim was able to complete the INSERT
select * from t1;
--disconnect node_2a
--connection node_1
drop table t1;

View File

@ -54,15 +54,11 @@ INSERT INTO parent VALUES (1, 0);
--connection node_2a --connection node_2a
--reap --reap
--connection node_1 #
SET SESSION wsrep_sync_wait=15; # ALTER TABLE could bf kill one or more of INSERTs to parent, so
SELECT COUNT(*) FROM parent; # the actual number of rows in PARENT depends on whether
SELECT COUNT(*) FROM child; # the INSERT is committed before ALTER TABLE is executed
#
--connection node_2
SET SESSION wsrep_sync_wait=15;
SELECT COUNT(*) FROM parent;
SELECT COUNT(*) FROM child;
DROP TABLE child; DROP TABLE child;
DROP TABLE parent; DROP TABLE parent;

View File

@ -94,11 +94,13 @@ SELECT * FROM t1;
--eval SET GLOBAL wsrep_auto_increment_control = $auto_increment_control_orig --eval SET GLOBAL wsrep_auto_increment_control = $auto_increment_control_orig
--eval SET GLOBAL auto_increment_increment = $auto_increment_increment_node1 --eval SET GLOBAL auto_increment_increment = $auto_increment_increment_node1
--eval SET GLOBAL auto_increment_offset = $auto_increment_offset_node1 --eval SET GLOBAL auto_increment_offset = $auto_increment_offset_node1
--disconnect node_1a
--connection node_2 --connection node_2
--eval SET GLOBAL wsrep_auto_increment_control = $auto_increment_control_orig --eval SET GLOBAL wsrep_auto_increment_control = $auto_increment_control_orig
--eval SET GLOBAL auto_increment_increment = $auto_increment_increment_node2 --eval SET GLOBAL auto_increment_increment = $auto_increment_increment_node2
--eval SET GLOBAL auto_increment_offset = $auto_increment_offset_node2 --eval SET GLOBAL auto_increment_offset = $auto_increment_offset_node2
--disconnect node_2a
--enable_query_log --enable_query_log

View File

@ -913,7 +913,7 @@ static my_bool kill_handlerton(THD *thd, plugin_ref plugin,
{ {
handlerton *hton= plugin_hton(plugin); handlerton *hton= plugin_hton(plugin);
mysql_mutex_assert_owner(&thd->LOCK_thd_data); mysql_mutex_assert_owner(&thd->LOCK_thd_kill);
if (hton->kill_query && thd_get_ha_data(thd, hton)) if (hton->kill_query && thd_get_ha_data(thd, hton))
hton->kill_query(hton, thd, *(enum thd_kill_levels *) level); hton->kill_query(hton, thd, *(enum thd_kill_levels *) level);
return FALSE; return FALSE;

View File

@ -1,4 +1,4 @@
/* Copyright 2018 Codership Oy <info@codership.com> /* Copyright 2018-2021 Codership Oy <info@codership.com>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -29,12 +29,14 @@ extern "C" my_bool wsrep_on(const THD *thd)
extern "C" void wsrep_thd_LOCK(const THD *thd) extern "C" void wsrep_thd_LOCK(const THD *thd)
{ {
mysql_mutex_lock(&thd->LOCK_thd_kill);
mysql_mutex_lock(&thd->LOCK_thd_data); mysql_mutex_lock(&thd->LOCK_thd_data);
} }
extern "C" void wsrep_thd_UNLOCK(const THD *thd) extern "C" void wsrep_thd_UNLOCK(const THD *thd)
{ {
mysql_mutex_unlock(&thd->LOCK_thd_data); mysql_mutex_unlock(&thd->LOCK_thd_data);
mysql_mutex_unlock(&thd->LOCK_thd_kill);
} }
extern "C" void wsrep_thd_kill_LOCK(const THD *thd) extern "C" void wsrep_thd_kill_LOCK(const THD *thd)
@ -188,6 +190,8 @@ extern "C" void wsrep_handle_SR_rollback(THD *bf_thd,
DBUG_ASSERT(wsrep_thd_is_SR(victim_thd)); DBUG_ASSERT(wsrep_thd_is_SR(victim_thd));
if (!victim_thd || !wsrep_on(bf_thd)) return; if (!victim_thd || !wsrep_on(bf_thd)) return;
wsrep_thd_LOCK(victim_thd);
WSREP_DEBUG("handle rollback, for deadlock: thd %llu trx_id %" PRIu64 " frags %zu conf %s", WSREP_DEBUG("handle rollback, for deadlock: thd %llu trx_id %" PRIu64 " frags %zu conf %s",
victim_thd->thread_id, victim_thd->thread_id,
victim_thd->wsrep_trx_id(), victim_thd->wsrep_trx_id(),
@ -208,6 +212,9 @@ extern "C" void wsrep_handle_SR_rollback(THD *bf_thd,
{ {
wsrep_thd_self_abort(victim_thd); wsrep_thd_self_abort(victim_thd);
} }
wsrep_thd_UNLOCK(victim_thd);
if (bf_thd) if (bf_thd)
{ {
wsrep_store_threadvars(bf_thd); wsrep_store_threadvars(bf_thd);
@ -217,6 +224,9 @@ extern "C" void wsrep_handle_SR_rollback(THD *bf_thd,
extern "C" my_bool wsrep_thd_bf_abort(THD *bf_thd, THD *victim_thd, extern "C" my_bool wsrep_thd_bf_abort(THD *bf_thd, THD *victim_thd,
my_bool signal) my_bool signal)
{ {
mysql_mutex_assert_owner(&victim_thd->LOCK_thd_kill);
mysql_mutex_assert_owner(&victim_thd->LOCK_thd_data);
DBUG_EXECUTE_IF("sync.before_wsrep_thd_abort", DBUG_EXECUTE_IF("sync.before_wsrep_thd_abort",
{ {
const char act[]= const char act[]=
@ -233,28 +243,26 @@ extern "C" my_bool wsrep_thd_bf_abort(THD *bf_thd, THD *victim_thd,
have wsrep on. Note that this should never interrupt RSU have wsrep on. Note that this should never interrupt RSU
as RSU has paused the provider. as RSU has paused the provider.
*/ */
mysql_mutex_assert_owner(&victim_thd->LOCK_thd_data);
mysql_mutex_assert_owner(&victim_thd->LOCK_thd_kill);
if ((ret || !wsrep_on(victim_thd)) && signal) if ((ret || !wsrep_on(victim_thd)) && signal)
{ {
mysql_mutex_assert_not_owner(&victim_thd->LOCK_thd_data);
mysql_mutex_assert_not_owner(&victim_thd->LOCK_thd_kill);
mysql_mutex_lock(&victim_thd->LOCK_thd_data);
if (victim_thd->wsrep_aborter && victim_thd->wsrep_aborter != bf_thd->thread_id) if (victim_thd->wsrep_aborter && victim_thd->wsrep_aborter != bf_thd->thread_id)
{ {
WSREP_DEBUG("victim is killed already by %llu, skipping awake", WSREP_DEBUG("victim is killed already by %llu, skipping awake",
victim_thd->wsrep_aborter); victim_thd->wsrep_aborter);
mysql_mutex_unlock(&victim_thd->LOCK_thd_data); wsrep_thd_UNLOCK(victim_thd);
return false; return false;
} }
mysql_mutex_lock(&victim_thd->LOCK_thd_kill);
victim_thd->wsrep_aborter= bf_thd->thread_id; victim_thd->wsrep_aborter= bf_thd->thread_id;
victim_thd->awake_no_mutex(KILL_QUERY); victim_thd->awake_no_mutex(KILL_QUERY);
mysql_mutex_unlock(&victim_thd->LOCK_thd_kill);
mysql_mutex_unlock(&victim_thd->LOCK_thd_data);
} else {
WSREP_DEBUG("wsrep_thd_bf_abort skipped awake");
} }
else
WSREP_DEBUG("wsrep_thd_bf_abort skipped awake for %llu", thd_get_thread_id(victim_thd));
wsrep_thd_UNLOCK(victim_thd);
return ret; return ret;
} }
@ -279,8 +287,6 @@ extern "C" my_bool wsrep_thd_order_before(const THD *left, const THD *right)
extern "C" my_bool wsrep_thd_is_aborting(const MYSQL_THD thd) extern "C" my_bool wsrep_thd_is_aborting(const MYSQL_THD thd)
{ {
mysql_mutex_assert_owner(&thd->LOCK_thd_data);
const wsrep::client_state& cs(thd->wsrep_cs()); const wsrep::client_state& cs(thd->wsrep_cs());
const enum wsrep::transaction::state tx_state(cs.transaction().state()); const enum wsrep::transaction::state tx_state(cs.transaction().state());
switch (tx_state) switch (tx_state)
@ -294,8 +300,6 @@ extern "C" my_bool wsrep_thd_is_aborting(const MYSQL_THD thd)
default: default:
return false; return false;
} }
return false;
} }
static inline enum wsrep::key::type static inline enum wsrep::key::type

View File

@ -1072,8 +1072,8 @@ terminate_slave_thread(THD *thd,
int error __attribute__((unused)); int error __attribute__((unused));
DBUG_PRINT("loop", ("killing slave thread")); DBUG_PRINT("loop", ("killing slave thread"));
mysql_mutex_lock(&thd->LOCK_thd_data);
mysql_mutex_lock(&thd->LOCK_thd_kill); mysql_mutex_lock(&thd->LOCK_thd_kill);
mysql_mutex_lock(&thd->LOCK_thd_data);
#ifndef DONT_USE_THR_ALARM #ifndef DONT_USE_THR_ALARM
/* /*
Error codes from pthread_kill are: Error codes from pthread_kill are:

View File

@ -826,6 +826,7 @@ THD::THD(my_thread_id id, bool is_wsrep_applier)
mysql_mutex_init(key_LOCK_wakeup_ready, &LOCK_wakeup_ready, MY_MUTEX_INIT_FAST); mysql_mutex_init(key_LOCK_wakeup_ready, &LOCK_wakeup_ready, MY_MUTEX_INIT_FAST);
mysql_mutex_init(key_LOCK_thd_kill, &LOCK_thd_kill, MY_MUTEX_INIT_FAST); mysql_mutex_init(key_LOCK_thd_kill, &LOCK_thd_kill, MY_MUTEX_INIT_FAST);
mysql_cond_init(key_COND_wakeup_ready, &COND_wakeup_ready, 0); mysql_cond_init(key_COND_wakeup_ready, &COND_wakeup_ready, 0);
mysql_mutex_record_order(&LOCK_thd_kill, &LOCK_thd_data);
/* Variables with default values */ /* Variables with default values */
proc_info="login"; proc_info="login";
@ -1883,7 +1884,6 @@ void THD::awake_no_mutex(killed_state state_to_set)
DBUG_PRINT("enter", ("this: %p current_thd: %p state: %d", DBUG_PRINT("enter", ("this: %p current_thd: %p state: %d",
this, current_thd, (int) state_to_set)); this, current_thd, (int) state_to_set));
THD_CHECK_SENTRY(this); THD_CHECK_SENTRY(this);
mysql_mutex_assert_owner(&LOCK_thd_data);
mysql_mutex_assert_owner(&LOCK_thd_kill); mysql_mutex_assert_owner(&LOCK_thd_kill);
print_aborted_warning(3, "KILLED"); print_aborted_warning(3, "KILLED");
@ -2048,6 +2048,8 @@ bool THD::notify_shared_lock(MDL_context_owner *ctx_in_use,
if (needs_thr_lock_abort) if (needs_thr_lock_abort)
{ {
bool mutex_released= false;
mysql_mutex_lock(&in_use->LOCK_thd_kill);
mysql_mutex_lock(&in_use->LOCK_thd_data); mysql_mutex_lock(&in_use->LOCK_thd_data);
/* If not already dying */ /* If not already dying */
if (in_use->killed != KILL_CONNECTION_HARD) if (in_use->killed != KILL_CONNECTION_HARD)
@ -2064,12 +2066,25 @@ bool THD::notify_shared_lock(MDL_context_owner *ctx_in_use,
thread can see those instances (e.g. see partitioning code). thread can see those instances (e.g. see partitioning code).
*/ */
if (!thd_table->needs_reopen()) if (!thd_table->needs_reopen())
{
signalled|= mysql_lock_abort_for_thread(this, thd_table); signalled|= mysql_lock_abort_for_thread(this, thd_table);
}
} }
#ifdef WITH_WSREP
if (WSREP(this) && wsrep_thd_is_BF(this, false))
{
WSREP_DEBUG("notify_shared_lock: BF thread %llu query %s"
" victim %llu query %s",
this->real_id, wsrep_thd_query(this),
in_use->real_id, wsrep_thd_query(in_use));
wsrep_abort_thd(this, in_use, false);
mutex_released= true;
}
#endif /* WITH_WSREP */
}
if (!mutex_released)
{
mysql_mutex_unlock(&in_use->LOCK_thd_data);
mysql_mutex_unlock(&in_use->LOCK_thd_kill);
} }
mysql_mutex_unlock(&in_use->LOCK_thd_data);
} }
DBUG_RETURN(signalled); DBUG_RETURN(signalled);
} }
@ -5288,11 +5303,14 @@ thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd)
#ifdef WITH_WSREP #ifdef WITH_WSREP
/* wsrep applier, replayer and TOI processing threads are ordered /* wsrep applier, replayer and TOI processing threads are ordered
by replication provider, relaxed GAP locking protocol can be used by replication provider, relaxed GAP locking protocol can be used
between high priority wsrep threads between high priority wsrep threads.
Note that wsrep_thd_is_BF() doesn't take LOCK_thd_data for either thd,
the caller should guarantee that the BF state won't change.
(e.g. InnoDB does it by keeping lock_sys.mutex locked)
*/ */
if (WSREP_ON && if (WSREP_ON &&
wsrep_thd_is_BF(const_cast<THD *>(thd), false) && wsrep_thd_is_BF(const_cast<THD *>(thd), false) &&
wsrep_thd_is_BF(const_cast<THD *>(other_thd), true)) wsrep_thd_is_BF(const_cast<THD *>(other_thd), false))
return 0; return 0;
#endif /* WITH_WSREP */ #endif /* WITH_WSREP */
rgi= thd->rgi_slave; rgi= thd->rgi_slave;

View File

@ -3470,11 +3470,11 @@ public:
void awake_no_mutex(killed_state state_to_set); void awake_no_mutex(killed_state state_to_set);
void awake(killed_state state_to_set) void awake(killed_state state_to_set)
{ {
mysql_mutex_lock(&LOCK_thd_data);
mysql_mutex_lock(&LOCK_thd_kill); mysql_mutex_lock(&LOCK_thd_kill);
mysql_mutex_lock(&LOCK_thd_data);
awake_no_mutex(state_to_set); awake_no_mutex(state_to_set);
mysql_mutex_unlock(&LOCK_thd_kill);
mysql_mutex_unlock(&LOCK_thd_data); mysql_mutex_unlock(&LOCK_thd_data);
mysql_mutex_unlock(&LOCK_thd_kill);
} }
void abort_current_cond_wait(bool force); void abort_current_cond_wait(bool force);

View File

@ -9248,7 +9248,7 @@ THD *find_thread_by_id(longlong id, bool query_id)
return arg.thd; return arg.thd;
} }
mysql_mutex_lock(&thd->LOCK_thd_data);
/** /**
kill one thread. kill one thread.
@ -9292,7 +9292,8 @@ kill_one_thread(THD *thd, longlong id, killed_state kill_signal, killed_type typ
faster and do a harder kill than KILL_SYSTEM_THREAD; faster and do a harder kill than KILL_SYSTEM_THREAD;
*/ */
mysql_mutex_lock(&tmp->LOCK_thd_data); // for various wsrep* checks below mysql_mutex_lock(&tmp->LOCK_thd_data); // Lock from concurrent usage
#ifdef WITH_WSREP #ifdef WITH_WSREP
if (((thd->security_ctx->master_access & PRIV_KILL_OTHER_USER_PROCESS) || if (((thd->security_ctx->master_access & PRIV_KILL_OTHER_USER_PROCESS) ||
thd->security_ctx->user_matches(tmp->security_ctx)) && thd->security_ctx->user_matches(tmp->security_ctx)) &&
@ -9307,23 +9308,23 @@ kill_one_thread(THD *thd, longlong id, killed_state kill_signal, killed_type typ
if (tmp->wsrep_aborter && tmp->wsrep_aborter != thd->thread_id) if (tmp->wsrep_aborter && tmp->wsrep_aborter != thd->thread_id)
{ {
/* victim is in hit list already, bail out */ /* victim is in hit list already, bail out */
WSREP_DEBUG("victim has wsrep aborter: %lu, skipping awake()", WSREP_DEBUG("victim %llu has wsrep aborter: %lu, skipping awake()",
tmp->wsrep_aborter); id, tmp->wsrep_aborter);
error= 0; error= 0;
} }
else else
#endif /* WITH_WSREP */ #endif /* WITH_WSREP */
{ {
WSREP_DEBUG("kill_one_thread %llu, victim: %llu wsrep_aborter %llu by signal %d", WSREP_DEBUG("kill_one_thread victim: %llu wsrep_aborter %lu by signal %d",
thd->thread_id, id, tmp->wsrep_aborter, kill_signal); id, tmp->wsrep_aborter, kill_signal);
tmp->awake_no_mutex(kill_signal); tmp->awake_no_mutex(kill_signal);
WSREP_DEBUG("victim: %llu taken care of", id);
error= 0; error= 0;
} }
} }
else else
error= (type == KILL_TYPE_QUERY ? ER_KILL_QUERY_DENIED_ERROR : error= (type == KILL_TYPE_QUERY ? ER_KILL_QUERY_DENIED_ERROR :
ER_KILL_DENIED_ERROR); ER_KILL_DENIED_ERROR);
mysql_mutex_unlock(&tmp->LOCK_thd_data); mysql_mutex_unlock(&tmp->LOCK_thd_data);
} }
mysql_mutex_unlock(&tmp->LOCK_thd_kill); mysql_mutex_unlock(&tmp->LOCK_thd_kill);
@ -9438,6 +9439,18 @@ static
void sql_kill(THD *thd, longlong id, killed_state state, killed_type type) void sql_kill(THD *thd, longlong id, killed_state state, killed_type type)
{ {
uint error; uint error;
#ifdef WITH_WSREP
if (WSREP(thd))
{
WSREP_DEBUG("sql_kill called");
if (thd->wsrep_applier)
{
WSREP_DEBUG("KILL in applying, bailing out here");
return;
}
WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
}
#endif /* WITH_WSREP */
if (likely(!(error= kill_one_thread(thd, id, state, type)))) if (likely(!(error= kill_one_thread(thd, id, state, type))))
{ {
if (!thd->killed) if (!thd->killed)
@ -9447,6 +9460,11 @@ void sql_kill(THD *thd, longlong id, killed_state state, killed_type type)
} }
else else
my_error(error, MYF(0), id); my_error(error, MYF(0), id);
#ifdef WITH_WSREP
return;
wsrep_error_label:
my_error(ER_CANNOT_USER, MYF(0), wsrep_thd_query(thd));
#endif /* WITH_WSREP */
} }
@ -9455,6 +9473,18 @@ sql_kill_user(THD *thd, LEX_USER *user, killed_state state)
{ {
uint error; uint error;
ha_rows rows; ha_rows rows;
#ifdef WITH_WSREP
if (WSREP(thd))
{
WSREP_DEBUG("sql_kill_user called");
if (thd->wsrep_applier)
{
WSREP_DEBUG("KILL in applying, bailing out here");
return;
}
WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
}
#endif /* WITH_WSREP */
if (likely(!(error= kill_threads_for_user(thd, user, state, &rows)))) if (likely(!(error= kill_threads_for_user(thd, user, state, &rows))))
my_ok(thd, rows); my_ok(thd, rows);
else else
@ -9465,6 +9495,11 @@ sql_kill_user(THD *thd, LEX_USER *user, killed_state state)
*/ */
my_error(error, MYF(0), user->host.str, user->user.str); my_error(error, MYF(0), user->host.str, user->user.str);
} }
#ifdef WITH_WSREP
return;
wsrep_error_label:
my_error(ER_CANNOT_USER, MYF(0), user->user.str);
#endif /* WITH_WSREP */
} }

View File

@ -3498,8 +3498,8 @@ static my_bool kill_callback(THD *thd, kill_callback_arg *arg)
thd->variables.server_id == arg->slave_server_id) thd->variables.server_id == arg->slave_server_id)
{ {
arg->thd= thd; arg->thd= thd;
mysql_mutex_lock(&thd->LOCK_thd_data);
mysql_mutex_lock(&thd->LOCK_thd_kill); // Lock from delete mysql_mutex_lock(&thd->LOCK_thd_kill); // Lock from delete
mysql_mutex_lock(&thd->LOCK_thd_data);
return 1; return 1;
} }
return 0; return 0;

View File

@ -68,20 +68,15 @@ bool Wsrep_client_service::interrupted(
wsrep::unique_lock<wsrep::mutex>& lock WSREP_UNUSED) const wsrep::unique_lock<wsrep::mutex>& lock WSREP_UNUSED) const
{ {
DBUG_ASSERT(m_thd == current_thd); DBUG_ASSERT(m_thd == current_thd);
/* Underlying mutex in lock object points to LOCK_thd_data, which /* Underlying mutex in lock object points to THD::LOCK_thd_data, which
protects m_thd->wsrep_trx(), LOCK_thd_kill protects m_thd->killed. protects m_thd->wsrep_trx() and protects us from thd delete. */
Locking order is:
1) LOCK_thd_data
2) LOCK_thd_kill */
mysql_mutex_assert_owner(static_cast<mysql_mutex_t*>(lock.mutex()->native())); mysql_mutex_assert_owner(static_cast<mysql_mutex_t*>(lock.mutex()->native()));
mysql_mutex_lock(&m_thd->LOCK_thd_kill);
bool ret= (m_thd->killed != NOT_KILLED); bool ret= (m_thd->killed != NOT_KILLED);
if (ret) if (ret)
{ {
WSREP_DEBUG("wsrep state is interrupted, THD::killed %d trx state %d", WSREP_DEBUG("wsrep state is interrupted, THD::killed %d trx state %d",
m_thd->killed, m_thd->wsrep_trx().state()); m_thd->killed, m_thd->wsrep_trx().state());
} }
mysql_mutex_unlock(&m_thd->LOCK_thd_kill);
return ret; return ret;
} }

View File

@ -2134,6 +2134,11 @@ static int wsrep_TOI_event_buf(THD* thd, uchar** buf, size_t* buf_len)
case SQLCOM_DROP_TABLE: case SQLCOM_DROP_TABLE:
err= wsrep_drop_table_query(thd, buf, buf_len); err= wsrep_drop_table_query(thd, buf, buf_len);
break; break;
case SQLCOM_KILL:
WSREP_DEBUG("KILL as TOI: %s", thd->query());
err= wsrep_to_buf_helper(thd, thd->query(), thd->query_length(),
buf, buf_len);
break;
case SQLCOM_CREATE_ROLE: case SQLCOM_CREATE_ROLE:
if (sp_process_definer(thd)) if (sp_process_definer(thd))
{ {
@ -2547,7 +2552,11 @@ void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx,
request_thd, granted_thd); request_thd, granted_thd);
ticket->wsrep_report(wsrep_debug); ticket->wsrep_report(wsrep_debug);
mysql_mutex_lock(&granted_thd->LOCK_thd_data); /* Here we will call wsrep_abort_transaction so we should hold
THD::LOCK_thd_data to protect victim from concurrent usage
and THD::LOCK_thd_kill to protect from disconnect or delete. */
wsrep_thd_LOCK(granted_thd);
if (wsrep_thd_is_toi(granted_thd) || if (wsrep_thd_is_toi(granted_thd) ||
wsrep_thd_is_applying(granted_thd)) wsrep_thd_is_applying(granted_thd))
{ {
@ -2555,21 +2564,22 @@ void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx,
{ {
WSREP_DEBUG("BF thread waiting for SR in aborting state"); WSREP_DEBUG("BF thread waiting for SR in aborting state");
ticket->wsrep_report(wsrep_debug); ticket->wsrep_report(wsrep_debug);
mysql_mutex_unlock(&granted_thd->LOCK_thd_data); wsrep_thd_UNLOCK(granted_thd);
} }
else if (wsrep_thd_is_SR(granted_thd) && !wsrep_thd_is_SR(request_thd)) else if (wsrep_thd_is_SR(granted_thd) && !wsrep_thd_is_SR(request_thd))
{ {
WSREP_MDL_LOG(INFO, "MDL conflict, DDL vs SR", WSREP_MDL_LOG(INFO, "MDL conflict, DDL vs SR",
schema, schema_len, request_thd, granted_thd); schema, schema_len, request_thd, granted_thd);
mysql_mutex_unlock(&granted_thd->LOCK_thd_data);
wsrep_abort_thd(request_thd, granted_thd, 1); wsrep_abort_thd(request_thd, granted_thd, 1);
mysql_mutex_assert_not_owner(&granted_thd->LOCK_thd_data);
mysql_mutex_assert_not_owner(&granted_thd->LOCK_thd_kill);
} }
else else
{ {
WSREP_MDL_LOG(INFO, "MDL BF-BF conflict", schema, schema_len, WSREP_MDL_LOG(INFO, "MDL BF-BF conflict", schema, schema_len,
request_thd, granted_thd); request_thd, granted_thd);
ticket->wsrep_report(true); ticket->wsrep_report(true);
mysql_mutex_unlock(&granted_thd->LOCK_thd_data); wsrep_thd_UNLOCK(granted_thd);
unireg_abort(1); unireg_abort(1);
} }
} }
@ -2578,15 +2588,16 @@ void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx,
{ {
WSREP_DEBUG("BF thread waiting for FLUSH"); WSREP_DEBUG("BF thread waiting for FLUSH");
ticket->wsrep_report(wsrep_debug); ticket->wsrep_report(wsrep_debug);
mysql_mutex_unlock(&granted_thd->LOCK_thd_data); wsrep_thd_UNLOCK(granted_thd);
} }
else if (request_thd->lex->sql_command == SQLCOM_DROP_TABLE) else if (request_thd->lex->sql_command == SQLCOM_DROP_TABLE)
{ {
WSREP_DEBUG("DROP caused BF abort, conf %s", WSREP_DEBUG("DROP caused BF abort, conf %s",
wsrep_thd_transaction_state_str(granted_thd)); wsrep_thd_transaction_state_str(granted_thd));
ticket->wsrep_report(wsrep_debug); ticket->wsrep_report(wsrep_debug);
mysql_mutex_unlock(&granted_thd->LOCK_thd_data);
wsrep_abort_thd(request_thd, granted_thd, 1); wsrep_abort_thd(request_thd, granted_thd, 1);
mysql_mutex_assert_not_owner(&granted_thd->LOCK_thd_data);
mysql_mutex_assert_not_owner(&granted_thd->LOCK_thd_kill);
} }
else else
{ {
@ -2595,8 +2606,9 @@ void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx,
ticket->wsrep_report(wsrep_debug); ticket->wsrep_report(wsrep_debug);
if (granted_thd->wsrep_trx().active()) if (granted_thd->wsrep_trx().active())
{ {
mysql_mutex_unlock(&granted_thd->LOCK_thd_data); wsrep_abort_thd(request_thd, granted_thd, true);
wsrep_abort_thd(request_thd, granted_thd, 1); mysql_mutex_assert_not_owner(&granted_thd->LOCK_thd_data);
mysql_mutex_assert_not_owner(&granted_thd->LOCK_thd_kill);
} }
else else
{ {
@ -2604,10 +2616,11 @@ void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx,
Granted_thd is likely executing with wsrep_on=0. If the requesting Granted_thd is likely executing with wsrep_on=0. If the requesting
thd is BF, BF abort and wait. thd is BF, BF abort and wait.
*/ */
mysql_mutex_unlock(&granted_thd->LOCK_thd_data);
if (wsrep_thd_is_BF(request_thd, FALSE)) if (wsrep_thd_is_BF(request_thd, FALSE))
{ {
ha_abort_transaction(request_thd, granted_thd, TRUE); ha_abort_transaction(request_thd, granted_thd, TRUE);
mysql_mutex_assert_not_owner(&granted_thd->LOCK_thd_data);
mysql_mutex_assert_not_owner(&granted_thd->LOCK_thd_kill);
} }
else else
{ {
@ -2629,6 +2642,7 @@ void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx,
static bool abort_replicated(THD *thd) static bool abort_replicated(THD *thd)
{ {
bool ret_code= false; bool ret_code= false;
wsrep_thd_LOCK(thd);
if (thd->wsrep_trx().state() == wsrep::transaction::s_committing) if (thd->wsrep_trx().state() == wsrep::transaction::s_committing)
{ {
WSREP_DEBUG("aborting replicated trx: %llu", (ulonglong)(thd->real_id)); WSREP_DEBUG("aborting replicated trx: %llu", (ulonglong)(thd->real_id));
@ -2636,6 +2650,9 @@ static bool abort_replicated(THD *thd)
(void)wsrep_abort_thd(thd, thd, TRUE); (void)wsrep_abort_thd(thd, thd, TRUE);
ret_code= true; ret_code= true;
} }
else
wsrep_thd_UNLOCK(thd);
return ret_code; return ret_code;
} }
@ -2673,8 +2690,10 @@ static my_bool have_client_connections(THD *thd, void*)
(longlong) thd->thread_id)); (longlong) thd->thread_id));
if (is_client_connection(thd) && thd->killed == KILL_CONNECTION) if (is_client_connection(thd) && thd->killed == KILL_CONNECTION)
{ {
WSREP_DEBUG("Informing thread %lld that it's time to die",
thd->thread_id);
(void)abort_replicated(thd); (void)abort_replicated(thd);
return 1; return true;
} }
return 0; return 0;
} }
@ -2711,6 +2730,8 @@ static my_bool kill_all_threads(THD *thd, THD *caller_thd)
{ {
DBUG_PRINT("quit", ("Informing thread %lld that it's time to die", DBUG_PRINT("quit", ("Informing thread %lld that it's time to die",
(longlong) thd->thread_id)); (longlong) thd->thread_id));
WSREP_DEBUG("Informing thread %lld that it's time to die",
thd->thread_id);
/* We skip slave threads & scheduler on this first loop through. */ /* We skip slave threads & scheduler on this first loop through. */
if (is_client_connection(thd) && thd != caller_thd) if (is_client_connection(thd) && thd != caller_thd)
{ {

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2013 Codership Oy <info@codership.com> /* Copyright (C) 2013-2021 Codership Oy <info@codership.com>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -314,7 +314,8 @@ int wsrep_abort_thd(THD *bf_thd_ptr, THD *victim_thd_ptr, my_bool signal)
THD *victim_thd= (THD *) victim_thd_ptr; THD *victim_thd= (THD *) victim_thd_ptr;
THD *bf_thd= (THD *) bf_thd_ptr; THD *bf_thd= (THD *) bf_thd_ptr;
mysql_mutex_lock(&victim_thd->LOCK_thd_data); mysql_mutex_assert_owner(&victim_thd->LOCK_thd_data);
mysql_mutex_assert_owner(&victim_thd->LOCK_thd_kill);
/* Note that when you use RSU node is desynced from cluster, thus WSREP(thd) /* Note that when you use RSU node is desynced from cluster, thus WSREP(thd)
might not be true. might not be true.
@ -327,16 +328,14 @@ int wsrep_abort_thd(THD *bf_thd_ptr, THD *victim_thd_ptr, my_bool signal)
{ {
WSREP_DEBUG("wsrep_abort_thd, by: %llu, victim: %llu", (bf_thd) ? WSREP_DEBUG("wsrep_abort_thd, by: %llu, victim: %llu", (bf_thd) ?
(long long)bf_thd->real_id : 0, (long long)victim_thd->real_id); (long long)bf_thd->real_id : 0, (long long)victim_thd->real_id);
mysql_mutex_unlock(&victim_thd->LOCK_thd_data);
ha_abort_transaction(bf_thd, victim_thd, signal); ha_abort_transaction(bf_thd, victim_thd, signal);
mysql_mutex_lock(&victim_thd->LOCK_thd_data);
} }
else else
{ {
WSREP_DEBUG("wsrep_abort_thd not effective: %p %p", bf_thd, victim_thd); WSREP_DEBUG("wsrep_abort_thd not effective: %p %p", bf_thd, victim_thd);
wsrep_thd_UNLOCK(victim_thd);
} }
mysql_mutex_unlock(&victim_thd->LOCK_thd_data);
DBUG_RETURN(1); DBUG_RETURN(1);
} }
@ -345,6 +344,9 @@ bool wsrep_bf_abort(THD* bf_thd, THD* victim_thd)
WSREP_LOG_THD(bf_thd, "BF aborter before"); WSREP_LOG_THD(bf_thd, "BF aborter before");
WSREP_LOG_THD(victim_thd, "victim before"); WSREP_LOG_THD(victim_thd, "victim before");
mysql_mutex_assert_owner(&victim_thd->LOCK_thd_data);
mysql_mutex_assert_owner(&victim_thd->LOCK_thd_kill);
DBUG_EXECUTE_IF("sync.wsrep_bf_abort", DBUG_EXECUTE_IF("sync.wsrep_bf_abort",
{ {
const char act[]= const char act[]=
@ -358,7 +360,7 @@ bool wsrep_bf_abort(THD* bf_thd, THD* victim_thd)
if (WSREP(victim_thd) && !victim_thd->wsrep_trx().active()) if (WSREP(victim_thd) && !victim_thd->wsrep_trx().active())
{ {
WSREP_DEBUG("wsrep_bf_abort, BF abort for non active transaction"); WSREP_DEBUG("wsrep_bf_abort, BF abort for non active transaction");
switch (victim_thd->wsrep_trx().state()) switch (victim_thd->wsrep_trx().state())
{ {
case wsrep::transaction::s_aborting: /* fall through */ case wsrep::transaction::s_aborting: /* fall through */
case wsrep::transaction::s_aborted: case wsrep::transaction::s_aborted:
@ -367,7 +369,13 @@ bool wsrep_bf_abort(THD* bf_thd, THD* victim_thd)
default: default:
break; break;
} }
/* Test: galera_create_table_as_select. Here we enter wsrep-lib
were LOCK_thd_data will be acquired, thus we need to release it.
However, we can still hold LOCK_thd_kill to protect from
disconnect or delete. */
mysql_mutex_unlock(&victim_thd->LOCK_thd_data);
wsrep_start_transaction(victim_thd, victim_thd->wsrep_next_trx_id()); wsrep_start_transaction(victim_thd, victim_thd->wsrep_next_trx_id());
mysql_mutex_lock(&victim_thd->LOCK_thd_data);
} }
bool ret; bool ret;
@ -375,11 +383,21 @@ bool wsrep_bf_abort(THD* bf_thd, THD* victim_thd)
if (wsrep_thd_is_toi(bf_thd)) if (wsrep_thd_is_toi(bf_thd))
{ {
/* Here we enter wsrep-lib were LOCK_thd_data will be acquired,
thus we need to release it. However, we can still hold
LOCK_thd_kill to protect from disconnect or delete. */
mysql_mutex_unlock(&victim_thd->LOCK_thd_data);
ret= victim_thd->wsrep_cs().total_order_bf_abort(bf_seqno); ret= victim_thd->wsrep_cs().total_order_bf_abort(bf_seqno);
mysql_mutex_lock(&victim_thd->LOCK_thd_data);
} }
else else
{ {
/* Test: mysql-wsrep-features#165. Here we enter wsrep-lib
were LOCK_thd_data will be acquired and later LOCK_thd_kill
thus we need to release them. */
wsrep_thd_UNLOCK(victim_thd);
ret= victim_thd->wsrep_cs().bf_abort(bf_seqno); ret= victim_thd->wsrep_cs().bf_abort(bf_seqno);
wsrep_thd_LOCK(victim_thd);
} }
if (ret) if (ret)
{ {

View File

@ -4761,7 +4761,7 @@ static void innobase_kill_query(handlerton*, THD *thd, enum thd_kill_levels)
{ {
ut_ad(trx->mysql_thd == thd); ut_ad(trx->mysql_thd == thd);
#ifdef WITH_WSREP #ifdef WITH_WSREP
if (trx->is_wsrep() && wsrep_thd_is_aborting(thd)) if (wsrep_thd_is_aborting(thd) || trx->lock.was_chosen_as_wsrep_victim)
/* if victim has been signaled by BF thread and/or aborting is already /* if victim has been signaled by BF thread and/or aborting is already
progressing, following query aborting is not necessary any more. progressing, following query aborting is not necessary any more.
Also, BF thread should own trx mutex for the victim. */ Also, BF thread should own trx mutex for the victim. */
@ -4771,6 +4771,8 @@ static void innobase_kill_query(handlerton*, THD *thd, enum thd_kill_levels)
if (lock_t *lock= trx->lock.wait_lock) if (lock_t *lock= trx->lock.wait_lock)
{ {
trx_mutex_enter(trx); trx_mutex_enter(trx);
if (trx->is_wsrep() && wsrep_thd_is_aborting(thd))
trx->lock.was_chosen_as_deadlock_victim= TRUE;
lock_cancel_waiting_and_release(lock); lock_cancel_waiting_and_release(lock);
trx_mutex_exit(trx); trx_mutex_exit(trx);
} }
@ -18639,6 +18641,40 @@ static struct st_mysql_storage_engine innobase_storage_engine=
#ifdef WITH_WSREP #ifdef WITH_WSREP
static
void
wsrep_kill_victim(
MYSQL_THD const bf_thd,
MYSQL_THD thd,
trx_t* victim_trx,
my_bool signal)
{
DBUG_ENTER("wsrep_kill_victim");
/* Mark transaction as a victim for Galera abort */
victim_trx->lock.was_chosen_as_wsrep_victim= true;
if (wsrep_thd_set_wsrep_aborter(bf_thd, thd))
{
WSREP_DEBUG("innodb kill transaction skipped due to wsrep_aborter set");
wsrep_thd_UNLOCK(thd);
DBUG_VOID_RETURN;
}
if (wsrep_thd_bf_abort(bf_thd, thd, signal))
{
lock_t* wait_lock= victim_trx->lock.wait_lock;
if (wait_lock)
{
DBUG_ASSERT(victim_trx->is_wsrep());
WSREP_DEBUG("victim has wait flag: %lu", thd_get_thread_id(thd));
victim_trx->lock.was_chosen_as_deadlock_victim= TRUE;
lock_cancel_waiting_and_release(wait_lock);
}
}
DBUG_VOID_RETURN;
}
/** This function is used to kill one transaction. /** This function is used to kill one transaction.
This transaction was open on this node (not-yet-committed), and a This transaction was open on this node (not-yet-committed), and a
@ -18662,87 +18698,65 @@ comparison as in the local certification failure.
@param[in] bf_thd Brute force (BF) thread @param[in] bf_thd Brute force (BF) thread
@param[in,out] victim_trx Vimtim trx to be killed @param[in,out] victim_trx Vimtim trx to be killed
@param[in] signal Should victim be signaled */ @param[in] signal Should victim be signaled */
UNIV_INTERN
void void
wsrep_innobase_kill_one_trx( wsrep_innobase_kill_one_trx(
THD* bf_thd, MYSQL_THD const bf_thd,
trx_t *victim_trx, trx_t *victim_trx,
bool signal) my_bool signal)
{ {
ut_ad(bf_thd); ut_ad(bf_thd);
ut_ad(victim_trx); ut_ad(victim_trx);
ut_ad(lock_mutex_own()); ut_ad(lock_mutex_own());
ut_ad(trx_mutex_own(victim_trx)); ut_ad(trx_mutex_own(victim_trx));
DBUG_ENTER("wsrep_innobase_kill_one_trx"); DBUG_ENTER("wsrep_innobase_kill_one_trx");
THD *thd= (THD *) victim_trx->mysql_thd;
/* Note that bf_trx might not exist here e.g. on MDL conflict
case (test: galera_concurrent_ctas).*/
trx_t* bf_trx= (trx_t*)thd_to_trx(bf_thd);
THD *thd= (THD *) victim_trx->mysql_thd; if (!thd)
ut_ad(thd); {
/* Note that bf_trx might not exist here e.g. on MDL conflict WSREP_WARN("no THD for trx: " TRX_ID_FMT, victim_trx->id);
case (test: galera_concurrent_ctas). Similarly, BF thread DBUG_VOID_RETURN;
could be also acquiring MDL-lock causing victim to be }
aborted. However, we have not yet called innobase_trx_init()
for BF transaction (test: galera_many_columns)*/
trx_t* bf_trx= thd_to_trx(bf_thd);
DBUG_ASSERT(wsrep_on(bf_thd));
wsrep_thd_LOCK(thd); /* Here we need to lock THD::LOCK_thd_data to protect from
concurrent usage or disconnect or delete. */
DEBUG_SYNC(bf_thd, "wsrep_before_BF_victim_lock");
wsrep_thd_LOCK(thd);
DEBUG_SYNC(bf_thd, "wsrep_after_BF_victim_lock");
WSREP_LOG_CONFLICT(bf_thd, thd, TRUE); WSREP_LOG_CONFLICT(bf_thd, thd, TRUE);
WSREP_DEBUG("Aborter %s trx_id: " TRX_ID_FMT " thread: %ld " WSREP_DEBUG("wsrep_innobase_kill_one_trx: Aborter %s "
"seqno: %lld client_state: %s client_mode: %s transaction_mode: %s " "trx_id: " TRX_ID_FMT " thread: %ld "
"query: %s", "seqno: %lld client_state: %s client_mode: %s "
wsrep_thd_is_BF(bf_thd, false) ? "BF" : "normal", "trx_state %s query: %s",
bf_trx ? bf_trx->id : TRX_ID_MAX, wsrep_thd_is_BF(bf_thd, false) ? "BF" : "normal",
thd_get_thread_id(bf_thd), bf_trx ? bf_trx->id : TRX_ID_MAX,
wsrep_thd_trx_seqno(bf_thd), thd_get_thread_id(bf_thd),
wsrep_thd_client_state_str(bf_thd), wsrep_thd_trx_seqno(bf_thd),
wsrep_thd_client_mode_str(bf_thd), wsrep_thd_client_state_str(bf_thd),
wsrep_thd_transaction_state_str(bf_thd), wsrep_thd_client_mode_str(bf_thd),
wsrep_thd_query(bf_thd)); wsrep_thd_transaction_state_str(bf_thd),
wsrep_thd_query(bf_thd));
WSREP_DEBUG("Victim %s trx_id: " TRX_ID_FMT " thread: %ld " WSREP_DEBUG("wsrep_innobase_kill_one_trx: Victim %s "
"seqno: %lld client_state: %s client_mode: %s transaction_mode: %s " "trx_id: " TRX_ID_FMT " thread: %ld "
"query: %s", "seqno: %lld client_state: %s client_mode: %s "
wsrep_thd_is_BF(thd, false) ? "BF" : "normal", "trx_state %s query: %s",
victim_trx->id, wsrep_thd_is_BF(thd, false) ? "BF" : "normal",
thd_get_thread_id(thd), victim_trx->id,
wsrep_thd_trx_seqno(thd), thd_get_thread_id(thd),
wsrep_thd_client_state_str(thd), wsrep_thd_trx_seqno(thd),
wsrep_thd_client_mode_str(thd), wsrep_thd_client_state_str(thd),
wsrep_thd_transaction_state_str(thd), wsrep_thd_client_mode_str(thd),
wsrep_thd_query(thd)); wsrep_thd_transaction_state_str(thd),
wsrep_thd_query(thd));
/* Mark transaction as a victim for Galera abort */ wsrep_kill_victim(bf_thd, thd, victim_trx, signal);
victim_trx->lock.was_chosen_as_wsrep_victim= true; DBUG_VOID_RETURN;
if (wsrep_thd_set_wsrep_aborter(bf_thd, thd))
{
WSREP_DEBUG("innodb kill transaction skipped due to wsrep_aborter set");
wsrep_thd_UNLOCK(thd);
DBUG_VOID_RETURN;
}
/* Note that we need to release this as it will be acquired
below in wsrep-lib */
wsrep_thd_UNLOCK(thd);
DEBUG_SYNC(bf_thd, "before_wsrep_thd_abort");
if (wsrep_thd_bf_abort(bf_thd, thd, signal))
{
lock_t* wait_lock = victim_trx->lock.wait_lock;
if (wait_lock) {
DBUG_ASSERT(victim_trx->is_wsrep());
WSREP_DEBUG("victim has wait flag: %lu",
thd_get_thread_id(thd));
WSREP_DEBUG("canceling wait lock");
victim_trx->lock.was_chosen_as_deadlock_victim= TRUE;
lock_cancel_waiting_and_release(wait_lock);
}
}
DBUG_VOID_RETURN;
} }
/** This function forces the victim transaction to abort. Aborting the /** This function forces the victim transaction to abort. Aborting the
@ -18762,29 +18776,42 @@ wsrep_abort_transaction(
THD *victim_thd, THD *victim_thd,
my_bool signal) my_bool signal)
{ {
DBUG_ENTER("wsrep_abort_transaction"); /* Note that victim thd is protected with
ut_ad(bf_thd); THD::LOCK_thd_data and THD::LOCK_thd_kill here. */
ut_ad(victim_thd); trx_t* victim_trx= thd_to_trx(victim_thd);
trx_t* bf_trx= thd_to_trx(bf_thd);
WSREP_DEBUG("wsrep_abort_transaction: BF:"
" thread %ld client_state %s client_mode %s"
" trans_state %s query %s trx " TRX_ID_FMT,
thd_get_thread_id(bf_thd),
wsrep_thd_client_state_str(bf_thd),
wsrep_thd_client_mode_str(bf_thd),
wsrep_thd_transaction_state_str(bf_thd),
wsrep_thd_query(bf_thd),
bf_trx ? bf_trx->id : 0);
trx_t* victim_trx = thd_to_trx(victim_thd); WSREP_DEBUG("wsrep_abort_transaction: victim:"
" thread %ld client_state %s client_mode %s"
" trans_state %s query %s trx " TRX_ID_FMT,
thd_get_thread_id(victim_thd),
wsrep_thd_client_state_str(victim_thd),
wsrep_thd_client_mode_str(victim_thd),
wsrep_thd_transaction_state_str(victim_thd),
wsrep_thd_query(victim_thd),
victim_trx ? victim_trx->id : 0);
WSREP_DEBUG("abort transaction: BF: %s victim: %s victim conf: %s", if (victim_trx)
wsrep_thd_query(bf_thd), {
wsrep_thd_query(victim_thd), lock_mutex_enter();
wsrep_thd_transaction_state_str(victim_thd)); trx_mutex_enter(victim_trx);
wsrep_kill_victim(bf_thd, victim_thd, victim_trx, signal);
if (victim_trx) { lock_mutex_exit();
lock_mutex_enter(); trx_mutex_exit(victim_trx);
trx_mutex_enter(victim_trx); }
wsrep_innobase_kill_one_trx(bf_thd, victim_trx, signal); else
trx_mutex_exit(victim_trx); {
lock_mutex_exit(); wsrep_thd_bf_abort(bf_thd, victim_thd, signal);
DBUG_VOID_RETURN; }
} else {
wsrep_thd_bf_abort(bf_thd, victim_thd, signal);
}
DBUG_VOID_RETURN;
} }
static static

View File

@ -208,7 +208,11 @@ innobase_casedn_str(
char* a); /*!< in/out: string to put in lower case */ char* a); /*!< in/out: string to put in lower case */
#ifdef WITH_WSREP #ifdef WITH_WSREP
void wsrep_innobase_kill_one_trx(THD *bf_thd, trx_t *victim_trx, bool signal); void
wsrep_innobase_kill_one_trx(
THD* bf_thd,
trx_t *victim_trx,
my_bool signal);
ulint wsrep_innobase_mysql_sort(int mysql_type, uint charset_number, ulint wsrep_innobase_mysql_sort(int mysql_type, uint charset_number,
unsigned char* str, ulint str_length, unsigned char* str, ulint str_length,
ulint buf_length); ulint buf_length);

View File

@ -1,7 +1,7 @@
/***************************************************************************** /*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2014, 2020, MariaDB Corporation. Copyright (c) 2014, 2021, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software the terms of the GNU General Public License as published by the Free Software
@ -185,13 +185,11 @@ lock_wait_table_reserve_slot(
check if lock timeout was for priority thread, check if lock timeout was for priority thread,
as a side effect trigger lock monitor as a side effect trigger lock monitor
@param[in] trx transaction owning the lock @param[in] trx transaction owning the lock
@param[in] locked true if trx and lock_sys.mutex is ownd
@return false for regular lock timeout */ @return false for regular lock timeout */
static static
bool bool
wsrep_is_BF_lock_timeout( wsrep_is_BF_lock_timeout(
const trx_t* trx, const trx_t* trx)
bool locked = true)
{ {
bool long_wait= (trx->error_state != DB_DEADLOCK && bool long_wait= (trx->error_state != DB_DEADLOCK &&
srv_monitor_timer && trx->is_wsrep() && srv_monitor_timer && trx->is_wsrep() &&
@ -205,21 +203,10 @@ wsrep_is_BF_lock_timeout(
ib::info() << "WSREP: BF lock wait long for trx:" << trx->id ib::info() << "WSREP: BF lock wait long for trx:" << trx->id
<< " query: " << wsrep_thd_query(trx->mysql_thd); << " query: " << wsrep_thd_query(trx->mysql_thd);
if (!locked)
lock_mutex_enter();
ut_ad(lock_mutex_own());
trx_print_latched(stderr, trx, 3000);
/* Note this will release lock_sys mutex */
lock_print_info_all_transactions(stderr);
if (locked)
lock_mutex_enter();
return was_wait; return was_wait;
} else } else {
return false; return false;
}
} }
#endif /* WITH_WSREP */ #endif /* WITH_WSREP */
@ -388,7 +375,7 @@ lock_wait_suspend_thread(
&& wait_time > (double) lock_wait_timeout && wait_time > (double) lock_wait_timeout
#ifdef WITH_WSREP #ifdef WITH_WSREP
&& (!trx->is_wsrep() && (!trx->is_wsrep()
|| (!wsrep_is_BF_lock_timeout(trx, false) || (!wsrep_is_BF_lock_timeout(trx)
&& trx->error_state != DB_DEADLOCK)) && trx->error_state != DB_DEADLOCK))
#endif /* WITH_WSREP */ #endif /* WITH_WSREP */
) { ) {