From 4601e6e565dacd074e211108acf48c067f951b4f Mon Sep 17 00:00:00 2001 From: Teemu Ollakka Date: Thu, 29 Oct 2020 16:30:52 +0200 Subject: [PATCH] MDEV-24255 MTR test galera_bf_abort fails with --ps-protocol MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Under ps-protocol, commandsl like COM_STMT_FETCH, COM_STMT_CLOSE and COM_STMT_SEND_LONG_DATA are not supposed to return errors. Therefore, if a transaction is BF aborted and the client is processing one of those commands, then we should not return a deadlock error immediately. Instead wait for the a subsequent client interaction which permits errors to be returned. To handle this, wsrep_before_command() now accepts parameter keep_command_error. If set true, keep_command_error will cause wsrep-lib side to skip result handling, and to keep the current error for the next interaction with the client. Reviewed-by: Jan Lindström --- mysql-test/suite/galera/r/galera#500.result | 2 - .../suite/galera/r/galera_bf_abort_ps.result | 16 ++++++ .../r/galera_bf_abort_ps_threadpool.result | 22 ++++++++ .../suite/galera/t/galera_bf_abort_ps.cnf | 3 ++ .../suite/galera/t/galera_bf_abort_ps.test | 34 ++++++++++++ .../t/galera_bf_abort_ps_threadpool.cnf | 7 +++ .../t/galera_bf_abort_ps_threadpool.test | 54 +++++++++++++++++++ sql/sql_parse.cc | 31 +++++++---- sql/wsrep_trans_observer.h | 10 +++- wsrep-lib | 2 +- 10 files changed, 166 insertions(+), 15 deletions(-) create mode 100644 mysql-test/suite/galera/r/galera_bf_abort_ps.result create mode 100644 mysql-test/suite/galera/r/galera_bf_abort_ps_threadpool.result create mode 100644 mysql-test/suite/galera/t/galera_bf_abort_ps.cnf create mode 100644 mysql-test/suite/galera/t/galera_bf_abort_ps.test create mode 100644 mysql-test/suite/galera/t/galera_bf_abort_ps_threadpool.cnf create mode 100644 mysql-test/suite/galera/t/galera_bf_abort_ps_threadpool.test diff --git a/mysql-test/suite/galera/r/galera#500.result b/mysql-test/suite/galera/r/galera#500.result index d983e844d76..a5ab0b19718 100644 --- a/mysql-test/suite/galera/r/galera#500.result +++ b/mysql-test/suite/galera/r/galera#500.result @@ -1,5 +1,3 @@ -connection node_1; -connection node_2; connection node_2; connection node_1; connection node_1; diff --git a/mysql-test/suite/galera/r/galera_bf_abort_ps.result b/mysql-test/suite/galera/r/galera_bf_abort_ps.result new file mode 100644 index 00000000000..42292cb20a0 --- /dev/null +++ b/mysql-test/suite/galera/r/galera_bf_abort_ps.result @@ -0,0 +1,16 @@ +connection node_2; +connection node_1; +CREATE TABLE t1 (f1 INTEGER PRIMARY KEY, f2 CHAR(6)) ENGINE=InnoDB; +connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2; +connection node_2; +START TRANSACTION; +INSERT INTO t1 VALUES (1,'node_2'); +connection node_1; +INSERT INTO t1 VALUES (1,'node_1'); +connection node_2a; +connection node_2; +INSERT INTO t1 VALUES (2, 'node_2'); +ERROR 40001: Deadlock found when trying to get lock; try restarting transaction +wsrep_local_aborts_increment +1 +DROP TABLE t1; diff --git a/mysql-test/suite/galera/r/galera_bf_abort_ps_threadpool.result b/mysql-test/suite/galera/r/galera_bf_abort_ps_threadpool.result new file mode 100644 index 00000000000..7482e76778e --- /dev/null +++ b/mysql-test/suite/galera/r/galera_bf_abort_ps_threadpool.result @@ -0,0 +1,22 @@ +connection node_2; +connection node_1; +CREATE TABLE t1 (f1 INTEGER PRIMARY KEY, f2 CHAR(6)) ENGINE=InnoDB; +connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2; +connection node_2; +START TRANSACTION; +INSERT INTO t1 VALUES (1,'node_2'); +connection node_2a; +SET GLOBAL debug_dbug = "+d,sync.wsrep_apply_cb"; +connection node_1; +INSERT INTO t1 VALUES (1,'node_1'); +connection node_2a; +SET DEBUG_SYNC = "now WAIT_FOR sync.wsrep_apply_cb_reached"; +connection node_2; +SET DEBUG_SYNC = "wsrep_before_before_command SIGNAL signal.wsrep_apply_cb WAIT_FOR bf_abort"; +INSERT INTO t1 VALUES (2, 'node_2'); +ERROR 40001: Deadlock found when trying to get lock; try restarting transaction +wsrep_local_aborts_increment +1 +SET DEBUG_SYNC = 'RESET'; +SET GLOBAL debug_dbug = DEFAULT; +DROP TABLE t1; diff --git a/mysql-test/suite/galera/t/galera_bf_abort_ps.cnf b/mysql-test/suite/galera/t/galera_bf_abort_ps.cnf new file mode 100644 index 00000000000..34c1a8cc3cf --- /dev/null +++ b/mysql-test/suite/galera/t/galera_bf_abort_ps.cnf @@ -0,0 +1,3 @@ +!include ../galera_2nodes.cnf +[mysqltest] +ps-protocol \ No newline at end of file diff --git a/mysql-test/suite/galera/t/galera_bf_abort_ps.test b/mysql-test/suite/galera/t/galera_bf_abort_ps.test new file mode 100644 index 00000000000..d2dfb92651e --- /dev/null +++ b/mysql-test/suite/galera/t/galera_bf_abort_ps.test @@ -0,0 +1,34 @@ +# +# MDEV-24255 +# Test BF abort of a transaction that has ps-protocol enabled +# + +--source include/galera_cluster.inc + +CREATE TABLE t1 (f1 INTEGER PRIMARY KEY, f2 CHAR(6)) ENGINE=InnoDB; +--connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2 + +--connection node_2 +--let $wsrep_local_bf_aborts_before = `SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_bf_aborts'` + +START TRANSACTION; +INSERT INTO t1 VALUES (1,'node_2'); + +--connection node_1 +INSERT INTO t1 VALUES (1,'node_1'); + +--connection node_2a +--let $wait_condition = SELECT COUNT(*) = 1 FROM t1 WHERE f2 = 'node_1' +--source include/wait_condition.inc + +--connection node_2 +--error ER_LOCK_DEADLOCK +INSERT INTO t1 VALUES (2, 'node_2'); + +--let $wsrep_local_bf_aborts_after = `SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_bf_aborts'` + +--disable_query_log +--eval SELECT $wsrep_local_bf_aborts_after - $wsrep_local_bf_aborts_before = 1 AS wsrep_local_aborts_increment; +--enable_query_log + +DROP TABLE t1; diff --git a/mysql-test/suite/galera/t/galera_bf_abort_ps_threadpool.cnf b/mysql-test/suite/galera/t/galera_bf_abort_ps_threadpool.cnf new file mode 100644 index 00000000000..83baa995c17 --- /dev/null +++ b/mysql-test/suite/galera/t/galera_bf_abort_ps_threadpool.cnf @@ -0,0 +1,7 @@ +!include ../galera_2nodes.cnf + +[mysqld] +thread-handling=pool-of-threads + +[mysqltest] +ps-protocol diff --git a/mysql-test/suite/galera/t/galera_bf_abort_ps_threadpool.test b/mysql-test/suite/galera/t/galera_bf_abort_ps_threadpool.test new file mode 100644 index 00000000000..56348a6f527 --- /dev/null +++ b/mysql-test/suite/galera/t/galera_bf_abort_ps_threadpool.test @@ -0,0 +1,54 @@ +# +# MDEV-24255 +# Test BF abort of a transaction that has ps-protocol enabled +# This test stresses the case where wsrep_before_command() +# finds the transaction in state s_must_abort. This only +# possible when the server is using the thread pool. +# + +--source include/galera_cluster.inc +--source include/have_debug_sync.inc + +CREATE TABLE t1 (f1 INTEGER PRIMARY KEY, f2 CHAR(6)) ENGINE=InnoDB; + +--connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2 + +--connection node_2 +--let $wsrep_local_bf_aborts_before = `SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_bf_aborts'` + +START TRANSACTION; +INSERT INTO t1 VALUES (1,'node_2'); + +--connection node_2a +SET GLOBAL debug_dbug = "+d,sync.wsrep_apply_cb"; + +--connection node_1 +INSERT INTO t1 VALUES (1,'node_1'); + +--connection node_2a +SET DEBUG_SYNC = "now WAIT_FOR sync.wsrep_apply_cb_reached"; + +--connection node_2 +SET DEBUG_SYNC = "wsrep_before_before_command SIGNAL signal.wsrep_apply_cb WAIT_FOR bf_abort"; + +# +# The following INSERT is expected to enter +# wsrep_before_command() and find its transaction +# in state s_must_abort. +# Notice that the test appears more complicated +# than it needs to... however we cannot use +# --send for this INSERT, otherwise mysqltest +# will not use ps-protocol +# +--error ER_LOCK_DEADLOCK +INSERT INTO t1 VALUES (2, 'node_2'); + +--let $wsrep_local_bf_aborts_after = `SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_bf_aborts'` + +--disable_query_log +--eval SELECT $wsrep_local_bf_aborts_after - $wsrep_local_bf_aborts_before = 1 AS wsrep_local_aborts_increment; +--enable_query_log + +SET DEBUG_SYNC = 'RESET'; +SET GLOBAL debug_dbug = DEFAULT; +DROP TABLE t1; diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index e03b98177d0..209caa9460e 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -1164,6 +1164,14 @@ static bool wsrep_tables_accessible_when_detached(const TABLE_LIST *tables) } return true; } + +static bool wsrep_command_no_result(char command) +{ + return (command == COM_STMT_PREPARE || + command == COM_STMT_FETCH || + command == COM_STMT_SEND_LONG_DATA || + command == COM_STMT_CLOSE); +} #endif /* WITH_WSREP */ #ifndef EMBEDDED_LIBRARY @@ -1287,12 +1295,20 @@ bool do_command(THD *thd) #ifdef WITH_WSREP DEBUG_SYNC(thd, "wsrep_before_before_command"); /* - Aborted by background rollbacker thread. - Handle error here and jump straight to out + If this command does not return a result, then we + instruct wsrep_before_command() to skip result handling. + This causes BF aborted transaction to roll back but keep + the error state until next command which is able to return + a result to the client. */ - if (wsrep_before_command(thd)) + if (wsrep_before_command(thd, wsrep_command_no_result(command))) { - thd->store_globals(); + /* + Aborted by background rollbacker thread. + Handle error here and jump straight to out. + Notice that thd->store_globals() is called + in wsrep_before_command(). + */ WSREP_LOG_THD(thd, "enter found BF aborted"); DBUG_ASSERT(!thd->mdl_context.has_locks()); DBUG_ASSERT(!thd->get_stmt_da()->is_set()); @@ -2384,12 +2400,7 @@ dispatch_end: WSREP_DEBUG("THD is killed at dispatch_end"); } wsrep_after_command_before_result(thd); - if (wsrep_current_error(thd) && - !(command == COM_STMT_PREPARE || - command == COM_STMT_FETCH || - command == COM_STMT_SEND_LONG_DATA || - command == COM_STMT_CLOSE - )) + if (wsrep_current_error(thd) && !wsrep_command_no_result(command)) { /* todo: Pass wsrep client state current error to override */ wsrep_override_error(thd, wsrep_current_error(thd), diff --git a/sql/wsrep_trans_observer.h b/sql/wsrep_trans_observer.h index 05970e8b12f..dbe710a0256 100644 --- a/sql/wsrep_trans_observer.h +++ b/sql/wsrep_trans_observer.h @@ -442,11 +442,17 @@ wsrep_wait_rollback_complete_and_acquire_ownership(THD *thd) DBUG_VOID_RETURN; } -static inline int wsrep_before_command(THD* thd) +static inline int wsrep_before_command(THD* thd, bool keep_command_error) { return (thd->wsrep_cs().state() != wsrep::client_state::s_none ? - thd->wsrep_cs().before_command() : 0); + thd->wsrep_cs().before_command(keep_command_error) : 0); } + +static inline int wsrep_before_command(THD* thd) +{ + return wsrep_before_command(thd, false); +} + /* Called after each command. diff --git a/wsrep-lib b/wsrep-lib index 41a6e9dad79..dcf3ce91cdb 160000 --- a/wsrep-lib +++ b/wsrep-lib @@ -1 +1 @@ -Subproject commit 41a6e9dad79c921134e44cf974b6b7ca3b84e538 +Subproject commit dcf3ce91cdb9d254ae04ecc6a2f91f46b171280d