From 868c2ceb013e06c29ba37d4634f2d543b96539aa Mon Sep 17 00:00:00 2001 From: Nirbhay Choubey Date: Sun, 12 Jun 2016 19:28:56 -0400 Subject: [PATCH] MDEV-9083: Slave IO thread does not handle autoreconnect to restarting Galera Cluster node Chery-picked commits from codership/mysql-wsrep. MW-284: Slave I/O retry on ER_COM_UNKNOWN_ERROR Slave would treat ER_COM_UNKNOWN_ERROR as fatal error and stop. The fix here is to treat it as a network error and rely on the built-in mechanism to retry. MW-284: Add an MTR test --- mysql-test/suite/galera/r/MW-284.result | 13 ++++++ mysql-test/suite/galera/t/MW-284.cnf | 1 + mysql-test/suite/galera/t/MW-284.test | 57 +++++++++++++++++++++++++ sql/slave.cc | 4 ++ 4 files changed, 75 insertions(+) create mode 100644 mysql-test/suite/galera/r/MW-284.result create mode 100644 mysql-test/suite/galera/t/MW-284.cnf create mode 100644 mysql-test/suite/galera/t/MW-284.test diff --git a/mysql-test/suite/galera/r/MW-284.result b/mysql-test/suite/galera/r/MW-284.result new file mode 100644 index 00000000000..8b5119663ce --- /dev/null +++ b/mysql-test/suite/galera/r/MW-284.result @@ -0,0 +1,13 @@ +CREATE TABLE t1 (f1 INTEGER) ENGINE=InnoDB; +SET GLOBAL wsrep_provider_options='gmcast.isolate=1'; +SET SESSION wsrep_on = OFF; +SET SESSION wsrep_on = ON; +START SLAVE; +include/wait_for_slave_param.inc [Slave_IO_Running] +SET GLOBAL wsrep_provider_options='gmcast.isolate=0'; +include/wait_for_slave_to_start.inc +INSERT INTO t1 VALUES (1); +DROP TABLE t1; +STOP SLAVE; +RESET SLAVE ALL; +CALL mtr.add_suppression('failed registering on master'); diff --git a/mysql-test/suite/galera/t/MW-284.cnf b/mysql-test/suite/galera/t/MW-284.cnf new file mode 100644 index 00000000000..52fd3093931 --- /dev/null +++ b/mysql-test/suite/galera/t/MW-284.cnf @@ -0,0 +1 @@ +!include ../galera_2nodes_as_master.cnf diff --git a/mysql-test/suite/galera/t/MW-284.test b/mysql-test/suite/galera/t/MW-284.test new file mode 100644 index 00000000000..5998e22ed1e --- /dev/null +++ b/mysql-test/suite/galera/t/MW-284.test @@ -0,0 +1,57 @@ +# +# MW-284 Slave I/O retry on ER_COM_UNKNOWN_ERROR +# + +--source include/galera_cluster.inc +--source include/have_innodb.inc + +--connect node_3, 127.0.0.1, root, , test, $NODE_MYPORT_3 +--disable_query_log +--eval CHANGE MASTER TO MASTER_HOST='127.0.0.1', MASTER_PORT=$NODE_MYPORT_1, MASTER_USER='root', MASTER_CONNECT_RETRY=1; +--enable_query_log + +--connection node_1 +CREATE TABLE t1 (f1 INTEGER) ENGINE=InnoDB; +SET GLOBAL wsrep_provider_options='gmcast.isolate=1'; +SET SESSION wsrep_on = OFF; +--let $wait_condition = SELECT VARIABLE_VALUE = 'non-Primary' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_status' +--source include/wait_condition.inc +SET SESSION wsrep_on = ON; + +--connection node_3 +START SLAVE; +--sleep 1 +--let $slave_param= Slave_IO_Running +--let $slave_param_value= Connecting +--source include/wait_for_slave_param.inc + +--connection node_1 +SET GLOBAL wsrep_provider_options='gmcast.isolate=0'; + +# We expect the slave to reconnect and resume replication + +--connection node_3 +--source include/wait_for_slave_to_start.inc + +--connection node_1 +INSERT INTO t1 VALUES (1); + +--connection node_3 +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1' +--source include/wait_condition.inc +--let $wait_condition = SELECT COUNT(*) > 0 FROM t1 +--source include/wait_condition.inc + +# Cleanup + +--connection node_1 +DROP TABLE t1; + +--connection node_3 +--let $wait_condition = SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1' +--source include/wait_condition.inc + +STOP SLAVE; +RESET SLAVE ALL; + +CALL mtr.add_suppression('failed registering on master'); diff --git a/sql/slave.cc b/sql/slave.cc index 4bc4cd4ab83..e9db77a0924 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -1361,6 +1361,10 @@ bool is_network_error(uint errorno) errorno == ER_NET_READ_INTERRUPTED || errorno == ER_SERVER_SHUTDOWN) return TRUE; +#ifdef WITH_WSREP + if (errorno == ER_UNKNOWN_COM_ERROR) + return TRUE; +#endif return FALSE; }