From 081926f3d8bc0b2ebaac87568a5e6ab6294ffbc0 Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 17 Jun 2014 14:10:13 +0200 Subject: [PATCH] MDEV-6188: master_retry_count (ignored if disconnect happens on SET master_heartbeat_period) That particular part of slave connect to master was missing code to handle retry in case of network errors. The same problem is present in MySQL 5.5, but fixed in MySQL 5.6. Fixed with this patch, by adding the code (mostly identical to MySQL 5.6), and also adding a test case. I checked other queries done towards master during slave connect, and they now all seem to handle reconnect in case of network failures. --- .../suite/rpl/r/rpl_heartbeat_debug.result | 25 +++++++++ .../suite/rpl/t/rpl_heartbeat_debug.test | 52 +++++++++++++++++++ sql/slave.cc | 36 ++++++++++--- 3 files changed, 105 insertions(+), 8 deletions(-) create mode 100644 mysql-test/suite/rpl/r/rpl_heartbeat_debug.result create mode 100644 mysql-test/suite/rpl/t/rpl_heartbeat_debug.test diff --git a/mysql-test/suite/rpl/r/rpl_heartbeat_debug.result b/mysql-test/suite/rpl/r/rpl_heartbeat_debug.result new file mode 100644 index 00000000000..b9dec686e4a --- /dev/null +++ b/mysql-test/suite/rpl/r/rpl_heartbeat_debug.result @@ -0,0 +1,25 @@ +include/master-slave.inc +[connection master] +include/stop_slave.inc +set @restore_slave_net_timeout= @@global.slave_net_timeout; +set @@global.slave_net_timeout= 10; +show status like 'Slave_heartbeat_period';; +Variable_name Slave_heartbeat_period +Value 60.000 +SET @save_dbug= @@GLOBAL.debug_dbug; +SET GLOBAL debug_dbug="+d,simulate_slave_heartbeat_network_error"; +CALL mtr.add_suppression('SET @master_heartbeat_period to master failed with error'); +CALL mtr.add_suppression('Master command COM_REGISTER_SLAVE failed: failed registering on master, reconnecting to try again'); +include/start_slave.inc +drop table if exists t1; +CREATE TABLE t1 (a INT PRIMARY KEY); +INSERT INTO t1 VALUES (1); +SELECT * FROM t1; +a +1 +drop table t1; +include/stop_slave.inc +SET GLOBAL debug_dbug=@save_dbug; +set @@global.slave_net_timeout= @restore_slave_net_timeout; +include/start_slave.inc +include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_heartbeat_debug.test b/mysql-test/suite/rpl/t/rpl_heartbeat_debug.test new file mode 100644 index 00000000000..7cdf67d6532 --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_heartbeat_debug.test @@ -0,0 +1,52 @@ +# Testing master to slave heartbeat protocol, test cases that need debug build. + +--source include/master-slave.inc +--source include/have_debug.inc + +connection slave; +--source include/stop_slave.inc +set @restore_slave_net_timeout= @@global.slave_net_timeout; +--disable_warnings +set @@global.slave_net_timeout= 10; +--enable_warnings + +### +### Checking the range +### + +# +# default period slave_net_timeout/2 +# +--query_vertical show status like 'Slave_heartbeat_period'; +SET @save_dbug= @@GLOBAL.debug_dbug; +SET GLOBAL debug_dbug="+d,simulate_slave_heartbeat_network_error"; +CALL mtr.add_suppression('SET @master_heartbeat_period to master failed with error'); +CALL mtr.add_suppression('Master command COM_REGISTER_SLAVE failed: failed registering on master, reconnecting to try again'); +--source include/start_slave.inc + + +connection master; +--disable_warnings +drop table if exists t1; +--enable_warnings + +CREATE TABLE t1 (a INT PRIMARY KEY); +INSERT INTO t1 VALUES (1); + +sync_slave_with_master; + +--connection slave +SELECT * FROM t1; + +connection master; +drop table t1; + +connection slave; +--source include/stop_slave.inc +--disable_warnings +SET GLOBAL debug_dbug=@save_dbug; +set @@global.slave_net_timeout= @restore_slave_net_timeout; +--enable_warnings +--source include/start_slave.inc + +--source include/rpl_end.inc diff --git a/sql/slave.cc b/sql/slave.cc index 702b6d38cbe..78f152fd2fd 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -1630,15 +1630,35 @@ when it try to get the value of TIME_ZONE global variable from master."; llstr((ulonglong) (mi->heartbeat_period*1000000000UL), llbuf); sprintf(query, query_format, llbuf); - if (mysql_real_query(mysql, query, strlen(query)) - && !check_io_slave_killed(mi->io_thd, mi, NULL)) + DBUG_EXECUTE_IF("simulate_slave_heartbeat_network_error", + { static ulong dbug_count= 0; + if (++dbug_count < 3) + goto heartbeat_network_error; + }); + if (mysql_real_query(mysql, query, strlen(query))) { - errmsg= "The slave I/O thread stops because SET @master_heartbeat_period " - "on master failed."; - err_code= ER_SLAVE_FATAL_ERROR; - sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql)); - mysql_free_result(mysql_store_result(mysql)); - goto err; + if (check_io_slave_killed(mi->io_thd, mi, NULL)) + goto slave_killed_err; + + if (is_network_error(mysql_errno(mysql))) + { + IF_DBUG(heartbeat_network_error: , ) + mi->report(WARNING_LEVEL, mysql_errno(mysql), + "SET @master_heartbeat_period to master failed with error: %s", + mysql_error(mysql)); + mysql_free_result(mysql_store_result(mysql)); + goto network_err; + } + else + { + /* Fatal error */ + errmsg= "The slave I/O thread stops because a fatal error is encountered " + "when it tries to SET @master_heartbeat_period on master."; + err_code= ER_SLAVE_FATAL_ERROR; + sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql)); + mysql_free_result(mysql_store_result(mysql)); + goto err; + } } mysql_free_result(mysql_store_result(mysql)); }