From 6b97512b2170fc6a77288cef6e5a85cad773ae4e Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 24 Apr 2013 13:05:40 +0200 Subject: [PATCH] Add missing check for thd->killed in mysql_binlog_send(). The slave dump thread running on the master only checked thd->killed whenever it reached the end of a binlog file, not between events. This could unnecessarily delay server shutdown. This was found by code inspection while tracking down some occasional "forcing close of thread..." errors in Buildbot. Hopefully this will fix the failures, but the fix is correct in any case. Also increase the wait during server shutdown, 2 seconds is a bit tight in case of heavy I/O stall, and it seems better to delay shutdown a bit than force-kill threads unnecessarily. Also fix some races in test cases that restart the mysqld server. The .expect file should be changed with --append_file, --remove_file + --write_file creates a short window where mysqld can error out due to .expect file missing. --- mysql-test/suite/binlog/t/binlog_mdev342.test | 3 +-- .../suite/binlog/t/binlog_xa_recover.test | 9 +++----- mysql-test/suite/rpl/t/rpl_gtid_crash.test | 21 +++++++------------ .../suite/rpl/t/rpl_gtid_stop_start.test | 6 ++---- mysql-test/suite/rpl/t/rpl_mdev382.test | 3 +-- .../storage_engine/alter_tablespace.test | 3 +-- sql/mysqld.cc | 17 +++++++++++++-- sql/sql_repl.cc | 6 +++++- 8 files changed, 35 insertions(+), 33 deletions(-) diff --git a/mysql-test/suite/binlog/t/binlog_mdev342.test b/mysql-test/suite/binlog/t/binlog_mdev342.test index 02310b87a76..024fa884f91 100644 --- a/mysql-test/suite/binlog/t/binlog_mdev342.test +++ b/mysql-test/suite/binlog/t/binlog_mdev342.test @@ -44,8 +44,7 @@ connection con1; --error 2006,2013 reap; ---remove_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect ---write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect restart-binlog_mdev342.test EOF diff --git a/mysql-test/suite/binlog/t/binlog_xa_recover.test b/mysql-test/suite/binlog/t/binlog_xa_recover.test index d3914e60b10..903044ca5bd 100644 --- a/mysql-test/suite/binlog/t/binlog_xa_recover.test +++ b/mysql-test/suite/binlog/t/binlog_xa_recover.test @@ -117,8 +117,7 @@ connection con4; --error 2006,2013 reap; ---remove_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect ---write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect restart-group_commit_binlog_pos.test EOF @@ -212,8 +211,7 @@ SET SESSION debug_dbug="+d,crash_commit_after_log"; --error 2006,2013 INSERT INTO t1 VALUES (14, NULL); ---remove_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect ---write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect restart-group_commit_binlog_pos.test EOF @@ -257,8 +255,7 @@ SET SESSION debug_dbug="+d,crash_before_write_checkpoint_event"; --error 2006,2013 INSERT INTO t1 VALUES (24, REPEAT("x", 4100)); ---remove_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect ---write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect restart-group_commit_binlog_pos.test EOF diff --git a/mysql-test/suite/rpl/t/rpl_gtid_crash.test b/mysql-test/suite/rpl/t/rpl_gtid_crash.test index 51a81506ba7..d78001aa69b 100644 --- a/mysql-test/suite/rpl/t/rpl_gtid_crash.test +++ b/mysql-test/suite/rpl/t/rpl_gtid_crash.test @@ -52,8 +52,7 @@ SET SESSION debug_dbug="+d,crash_dispatch_command_before"; SELECT 1; --source include/wait_until_disconnected.inc ---remove_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect ---write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect restart-rpl_gtid_crash.test EOF @@ -112,8 +111,7 @@ SET SESSION debug_dbug="+d,crash_dispatch_command_before"; SELECT 1; --source include/wait_until_disconnected.inc ---remove_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect ---write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect restart EOF @@ -147,8 +145,7 @@ INSERT INTO t1 VALUES (4); --connection server_2 --source include/wait_until_disconnected.inc ---remove_file $MYSQLTEST_VARDIR/tmp/mysqld.2.expect ---write_file $MYSQLTEST_VARDIR/tmp/mysqld.2.expect +--append_file $MYSQLTEST_VARDIR/tmp/mysqld.2.expect restart: --skip-slave-start=0 EOF @@ -172,8 +169,7 @@ INSERT INTO t1 VALUES (5); --connection server_2 --source include/wait_until_disconnected.inc ---remove_file $MYSQLTEST_VARDIR/tmp/mysqld.2.expect ---write_file $MYSQLTEST_VARDIR/tmp/mysqld.2.expect +--append_file $MYSQLTEST_VARDIR/tmp/mysqld.2.expect restart: --skip-slave-start=0 EOF @@ -197,8 +193,7 @@ INSERT INTO t1 VALUES (6); --connection server_2 --source include/wait_until_disconnected.inc ---remove_file $MYSQLTEST_VARDIR/tmp/mysqld.2.expect ---write_file $MYSQLTEST_VARDIR/tmp/mysqld.2.expect +--append_file $MYSQLTEST_VARDIR/tmp/mysqld.2.expect restart: --skip-slave-start=0 EOF @@ -222,8 +217,7 @@ INSERT INTO t1 VALUES (7); --connection server_2 --source include/wait_until_disconnected.inc ---remove_file $MYSQLTEST_VARDIR/tmp/mysqld.2.expect ---write_file $MYSQLTEST_VARDIR/tmp/mysqld.2.expect +--append_file $MYSQLTEST_VARDIR/tmp/mysqld.2.expect restart: --skip-slave-start=0 EOF @@ -247,8 +241,7 @@ INSERT INTO t1 VALUES (8); --connection server_2 --source include/wait_until_disconnected.inc ---remove_file $MYSQLTEST_VARDIR/tmp/mysqld.2.expect ---write_file $MYSQLTEST_VARDIR/tmp/mysqld.2.expect +--append_file $MYSQLTEST_VARDIR/tmp/mysqld.2.expect restart: --skip-slave-start=0 EOF diff --git a/mysql-test/suite/rpl/t/rpl_gtid_stop_start.test b/mysql-test/suite/rpl/t/rpl_gtid_stop_start.test index a0665110792..262d86a294e 100644 --- a/mysql-test/suite/rpl/t/rpl_gtid_stop_start.test +++ b/mysql-test/suite/rpl/t/rpl_gtid_stop_start.test @@ -41,8 +41,7 @@ INSERT INTO t1 VALUES (3); --source include/show_binary_logs.inc # Let the slave mysqld server start again. ---remove_file $MYSQLTEST_VARDIR/tmp/mysqld.2.expect ---write_file $MYSQLTEST_VARDIR/tmp/mysqld.2.expect +--append_file $MYSQLTEST_VARDIR/tmp/mysqld.2.expect restart: --skip-slave-start=0 EOF @@ -72,8 +71,7 @@ EOF --shutdown_server 30 --source include/wait_until_disconnected.inc ---remove_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect ---write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect restart EOF diff --git a/mysql-test/suite/rpl/t/rpl_mdev382.test b/mysql-test/suite/rpl/t/rpl_mdev382.test index d88eeaa463e..d6e799d674c 100644 --- a/mysql-test/suite/rpl/t/rpl_mdev382.test +++ b/mysql-test/suite/rpl/t/rpl_mdev382.test @@ -210,8 +210,7 @@ EOF --shutdown_server 30 ---remove_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect ---write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect restart-rpl_mdev382.test EOF diff --git a/mysql-test/suite/storage_engine/alter_tablespace.test b/mysql-test/suite/storage_engine/alter_tablespace.test index 508639c7dc5..ac206b8e328 100644 --- a/mysql-test/suite/storage_engine/alter_tablespace.test +++ b/mysql-test/suite/storage_engine/alter_tablespace.test @@ -53,8 +53,7 @@ EOF --replace_result $datadir --copy_file $datadir/test/t1.ibd $datadir/test/t1.ibd.save - --remove_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect - --write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect + --append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect restart EOF diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 2d9923d3ef0..963f259f486 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -1494,8 +1494,21 @@ static void close_connections(void) Events::deinit(); end_slave(); - /* Give threads time to die. */ - for (int i= 0; *(volatile int32*) &thread_count && i < 100; i++) + /* + Give threads time to die. + + In 5.5, this was waiting 100 rounds @ 20 milliseconds/round, so as little + as 2 seconds, depending on thread scheduling. + + From 10.0, we increase this to 1000 rounds / 20 seconds. The rationale is + that on a server with heavy I/O load, it is quite possible for eg. an + fsync() of the binlog or whatever to cause something like LOCK_log to be + held for more than 2 seconds. We do not want to force kill threads in + such cases, if it can be avoided. Note that normally, the wait will be + much smaller than even 2 seconds, this is only a safety fallback against + stuck threads so server shutdown is not held up forever. + */ + for (int i= 0; *(volatile int32*) &thread_count && i < 1000; i++) my_sleep(20000); /* diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc index d767fb50cae..550b78832ae 100644 --- a/sql/sql_repl.cc +++ b/sql/sql_repl.cc @@ -1727,13 +1727,15 @@ impossible position"; while (!net->error && net->vio != 0 && !thd->killed) { Log_event_type event_type= UNKNOWN_EVENT; + killed_state killed; /* reset the transmit packet for the event read from binary log file */ if (reset_transmit_packet(thd, flags, &ev_offset, &errmsg)) goto err; - while (!(error = Log_event::read_log_event(&log, packet, log_lock, + while (!(killed= thd->killed) && + !(error = Log_event::read_log_event(&log, packet, log_lock, current_checksum_alg))) { #ifndef DBUG_OFF @@ -1818,6 +1820,8 @@ impossible position"; if (reset_transmit_packet(thd, flags, &ev_offset, &errmsg)) goto err; } + if (killed) + goto end; /* TODO: now that we are logging the offset, check to make sure