diff --git a/mysql-test/suite/rpl/r/rpl_shutdown_sighup.result b/mysql-test/suite/rpl/r/rpl_shutdown_sighup.result new file mode 100644 index 00000000000..01b35f1dc58 --- /dev/null +++ b/mysql-test/suite/rpl/r/rpl_shutdown_sighup.result @@ -0,0 +1,50 @@ +include/master-slave.inc +[connection master] +connection slave; +set statement sql_log_bin=0 for call mtr.add_suppression("Signal handler thread did not exit in a timely manner"); +# +# Main test +connection master; +create table t1 (a int); +insert into t1 values (1); +include/save_master_gtid.inc +connection slave; +include/sync_with_master_gtid.inc +set @@global.debug_dbug= "+d,hold_sighup_log_refresh"; +# Waiting for sighup to reach reload_acl_and_cache.. +set debug_sync="now wait_for in_reload_acl_and_cache"; +# Signalling signal handler to proceed to sleep before REFRESH_HOSTS +set debug_sync="now signal refresh_logs"; +# Starting shutdown (note this will take 3+ seconds due to DBUG my_sleep in reload_acl_and_cache) +shutdown; +connection server_2; +connection slave; +include/assert_grep.inc [Ensure Mariadbd did not segfault when shutting down] +connection master; +connection slave; +# +# Error testcase to ensure an error message is shown if the signal +# takes longer than the timeout while processing the SIGHUP +connection slave; +set @@global.debug_dbug= "+d,force_sighup_processing_timeout"; +set @@global.debug_dbug= "+d,hold_sighup_log_refresh"; +connection master; +insert into t1 values (1); +include/save_master_gtid.inc +connection slave; +include/sync_with_master_gtid.inc +# Waiting for sighup to reach reload_acl_and_cache.. +set debug_sync="now wait_for in_reload_acl_and_cache"; +# Signalling signal handler to proceed to sleep before REFRESH_HOSTS +set debug_sync="now signal refresh_logs"; +# Starting shutdown (note this will take 3+ seconds due to DBUG my_sleep in reload_acl_and_cache) +shutdown; +connection server_2; +connection slave; +include/assert_grep.inc [Ensure warning is issued that signal handler thread is still processing] +# +# Cleanup +connection master; +drop table t1; +include/rpl_end.inc +# End of rpl_shutdown_sighup.test diff --git a/mysql-test/suite/rpl/t/rpl_shutdown_sighup.test b/mysql-test/suite/rpl/t/rpl_shutdown_sighup.test new file mode 100644 index 00000000000..d1940f81a85 --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_shutdown_sighup.test @@ -0,0 +1,154 @@ +# +# The signal handler thread can use various different runtime resources when +# processing a SIGHUP (e.g. master-info information), as the logic calls into +# reload_acl_and_cache(). This test ensures that SIGHUP processing, when +# concurrent with server shutdown, the shutdown logic must wait for the SIGHUP +# processing to finish before cleaning up any resources. +# +# Additionally, the error case is tested such that the signal handler thread +# takes too long processing a SIGHUP, and the main mysqld thread must skip its +# wait and output a warning. +# +# Note the SIGHUP is sent via the command-line kill program via a perl script. +# +# References: +# MDEV-30260: Slave crashed:reload_acl_and_cache during shutdown +# + +--source include/not_windows.inc +--source include/not_embedded.inc +--source include/have_debug.inc +--source include/have_debug_sync.inc + +# Binlog format doesn't matter +--source include/have_binlog_format_statement.inc +--source include/master-slave.inc + +# For error test case which forces timeout +--connection slave +set statement sql_log_bin=0 for call mtr.add_suppression("Signal handler thread did not exit in a timely manner"); + + +--echo # +--echo # Main test +--connection master +create table t1 (a int); +insert into t1 values (1); +--source include/save_master_gtid.inc + +--connection slave +--source include/sync_with_master_gtid.inc + +# Make signal handler handle SIGHUP.. +set @@global.debug_dbug= "+d,hold_sighup_log_refresh"; +--let KILL_NODE_PIDFILE = `SELECT @@pid_file` +--perl + my $kill_sig = $ENV{'KILL_SIGNAL_VALUE'}; + my $pid_filename = $ENV{'KILL_NODE_PIDFILE'}; + my $mysqld_pid = `cat $pid_filename`; + chomp($mysqld_pid); + system("kill -HUP $mysqld_pid"); + exit(0); +EOF + +--echo # Waiting for sighup to reach reload_acl_and_cache.. +set debug_sync="now wait_for in_reload_acl_and_cache"; +--echo # Signalling signal handler to proceed to sleep before REFRESH_HOSTS +set debug_sync="now signal refresh_logs"; + +# ..while we are shutting down +--write_file $MYSQLTEST_VARDIR/tmp/mysqld.2.expect +wait +EOF +--echo # Starting shutdown (note this will take 3+ seconds due to DBUG my_sleep in reload_acl_and_cache) +shutdown; + +--source include/wait_until_disconnected.inc +--append_file $MYSQLTEST_VARDIR/tmp/mysqld.2.expect +restart: --skip-slave-start=0 +EOF + +--connection server_2 +--enable_reconnect +--source include/wait_until_connected_again.inc + +--connection slave +--enable_reconnect +--source include/wait_until_connected_again.inc + +--let $assert_text= Ensure Mariadbd did not segfault when shutting down +--let $assert_select= got signal 11 +--let $assert_file= $MYSQLTEST_VARDIR/log/mysqld.2.err +--let $assert_count= 0 +--let $assert_only_after = CURRENT_TEST: rpl.rpl_shutdown_sighup +--source include/assert_grep.inc + +--connection master +--sync_slave_with_master + + +--echo # +--echo # Error testcase to ensure an error message is shown if the signal +--echo # takes longer than the timeout while processing the SIGHUP + +--connection slave +set @@global.debug_dbug= "+d,force_sighup_processing_timeout"; +set @@global.debug_dbug= "+d,hold_sighup_log_refresh"; + +--connection master +insert into t1 values (1); +--source include/save_master_gtid.inc + +--connection slave +--source include/sync_with_master_gtid.inc + +# Make signal handler handle SIGHUP.. +--let KILL_NODE_PIDFILE = `SELECT @@pid_file` +--perl + my $kill_sig = $ENV{'KILL_SIGNAL_VALUE'}; + my $pid_filename = $ENV{'KILL_NODE_PIDFILE'}; + my $mysqld_pid = `cat $pid_filename`; + chomp($mysqld_pid); + system("kill -HUP $mysqld_pid"); + exit(0); +EOF +--echo # Waiting for sighup to reach reload_acl_and_cache.. +set debug_sync="now wait_for in_reload_acl_and_cache"; +--echo # Signalling signal handler to proceed to sleep before REFRESH_HOSTS +set debug_sync="now signal refresh_logs"; + +# ..while we are shutting down +--write_file $MYSQLTEST_VARDIR/tmp/mysqld.2.expect +wait +EOF +--echo # Starting shutdown (note this will take 3+ seconds due to DBUG my_sleep in reload_acl_and_cache) +shutdown; + +--source include/wait_until_disconnected.inc +--append_file $MYSQLTEST_VARDIR/tmp/mysqld.2.expect +restart: --skip-slave-start=0 +EOF + +--connection server_2 +--enable_reconnect +--source include/wait_until_connected_again.inc + +--connection slave +--enable_reconnect +--source include/wait_until_connected_again.inc + +--let $assert_text= Ensure warning is issued that signal handler thread is still processing +--let $assert_select= Signal handler thread did not exit in a timely manner. +--let $assert_file= $MYSQLTEST_VARDIR/log/mysqld.2.err +--let $assert_count= 1 +--let $assert_only_after = CURRENT_TEST: rpl.rpl_shutdown_sighup +--source include/assert_grep.inc + + +--echo # +--echo # Cleanup +--connection master +drop table t1; + +--source include/rpl_end.inc +--echo # End of rpl_shutdown_sighup.test diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 5d23a972cdc..5ba51bdfb74 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -1766,7 +1766,8 @@ static void close_connections(void) DBUG_EXECUTE_IF("delay_shutdown_phase_2_after_semisync_wait", my_sleep(500000);); - Events::deinit(); + if (Events::inited) + Events::stop(); slave_prepare_for_shutdown(); ack_receiver.stop(); @@ -1827,6 +1828,12 @@ static void close_connections(void) } /* End of kill phase 2 */ + /* + The signal thread can use server resources, e.g. when processing SIGHUP, + and it must end gracefully before clean_up() + */ + wait_for_signal_thread_to_end(); + DBUG_PRINT("quit",("close_connections thread")); DBUG_VOID_RETURN; } @@ -1930,14 +1937,8 @@ extern "C" void unireg_abort(int exit_code) static void mysqld_exit(int exit_code) { DBUG_ENTER("mysqld_exit"); - /* - Important note: we wait for the signal thread to end, - but if a kill -15 signal was sent, the signal thread did - spawn the kill_server_thread thread, which is running concurrently. - */ rpl_deinit_gtid_waiting(); rpl_deinit_gtid_slave_state(); - wait_for_signal_thread_to_end(); #ifdef WITH_WSREP wsrep_deinit_server(); wsrep_sst_auth_free(); @@ -2016,6 +2017,9 @@ static void clean_up(bool print_message) free_status_vars(); end_thr_alarm(1); /* Free allocated memory */ end_thr_timer(); +#ifndef EMBEDDED_LIBRARY + Events::deinit(); +#endif my_free_open_file_info(); if (defaults_argv) free_defaults(defaults_argv); @@ -2083,16 +2087,32 @@ static void clean_up(bool print_message) */ static void wait_for_signal_thread_to_end() { - uint i; + uint i, n_waits= DBUG_EVALUATE("force_sighup_processing_timeout", 5, 100); + int err= 0; /* Wait up to 10 seconds for signal thread to die. We use this mainly to avoid getting warnings that my_thread_end has not been called */ - for (i= 0 ; i < 100 && signal_thread_in_use; i++) + for (i= 0 ; i < n_waits && signal_thread_in_use; i++) { - if (pthread_kill(signal_thread, MYSQL_KILL_SIGNAL) == ESRCH) + err= pthread_kill(signal_thread, MYSQL_KILL_SIGNAL); + if (err) break; - my_sleep(100); // Give it time to die + my_sleep(100000); // Give it time to die, .1s per iteration + } + + if (err && err != ESRCH) + { + sql_print_error("Failed to send kill signal to signal handler thread, " + "pthread_kill() errno: %d", + err); + } + + if (i == n_waits && signal_thread_in_use) + { + sql_print_warning("Signal handler thread did not exit in a timely manner. " + "Continuing to wait for it to stop.."); + pthread_join(signal_thread, NULL); } } #endif /*EMBEDDED_LIBRARY*/ @@ -2916,7 +2936,6 @@ static void start_signal_handler(void) (void) pthread_attr_init(&thr_attr); pthread_attr_setscope(&thr_attr,PTHREAD_SCOPE_SYSTEM); - (void) pthread_attr_setdetachstate(&thr_attr,PTHREAD_CREATE_DETACHED); (void) my_setstacksize(&thr_attr,my_thread_stack_size); mysql_mutex_lock(&LOCK_start_thread); @@ -2936,18 +2955,6 @@ static void start_signal_handler(void) } -#if defined(USE_ONE_SIGNAL_HAND) -pthread_handler_t kill_server_thread(void *arg __attribute__((unused))) -{ - my_thread_init(); // Initialize new thread - break_connect_loop(); - my_thread_end(); - pthread_exit(0); - return 0; -} -#endif - - /** This threads handles all signals and alarms. */ /* ARGSUSED */ pthread_handler_t signal_hand(void *arg __attribute__((unused))) @@ -3004,7 +3011,7 @@ pthread_handler_t signal_hand(void *arg __attribute__((unused))) int origin; while ((error= my_sigwait(&set, &sig, &origin)) == EINTR) /* no-op */; - if (cleanup_done) + if (abort_loop) { DBUG_PRINT("quit",("signal_handler: calling my_thread_end()")); my_thread_end(); @@ -3027,18 +3034,8 @@ pthread_handler_t signal_hand(void *arg __attribute__((unused))) { /* Delete the instrumentation for the signal thread */ PSI_CALL_delete_current_thread(); -#ifdef USE_ONE_SIGNAL_HAND - pthread_t tmp; - if (unlikely((error= mysql_thread_create(0, /* Not instrumented */ - &tmp, &connection_attrib, - kill_server_thread, - (void*) &sig)))) - sql_print_error("Can't create thread to kill server (errno= %d)", - error); -#else my_sigset(sig, SIG_IGN); break_connect_loop(); // MIT THREAD has a alarm thread -#endif } break; case SIGHUP: diff --git a/sql/sql_reload.cc b/sql/sql_reload.cc index cd75cd45319..4f6356cff54 100644 --- a/sql/sql_reload.cc +++ b/sql/sql_reload.cc @@ -67,6 +67,15 @@ bool reload_acl_and_cache(THD *thd, unsigned long long options, bool result=0; select_errors=0; /* Write if more errors */ int tmp_write_to_binlog= *write_to_binlog= 1; +#ifndef DBUG_OFF + /* + When invoked for handling a SIGHUP by rpl_shutdown_sighup.test, we need to + force the signal handler to wait after REFRESH_TABLES, as that will check + for a killed server, and we need to call hostname_cache_refresh after + server cleanup has happened to trigger MDEV-30260. + */ + int do_dbug_sleep= 0; +#endif DBUG_ASSERT(!thd || !thd->in_sub_stmt); @@ -99,6 +108,15 @@ bool reload_acl_and_cache(THD *thd, unsigned long long options, */ my_error(ER_UNKNOWN_ERROR, MYF(0)); } + +#ifndef DBUG_OFF + DBUG_EXECUTE_IF("hold_sighup_log_refresh", { + DBUG_ASSERT(!debug_sync_set_action( + thd, STRING_WITH_LEN("now SIGNAL in_reload_acl_and_cache " + "WAIT_FOR refresh_logs"))); + do_dbug_sleep= 1; + }); +#endif } opt_noacl= 0; @@ -351,6 +369,11 @@ bool reload_acl_and_cache(THD *thd, unsigned long long options, } my_dbopt_cleanup(); } + +#ifndef DBUG_OFF + if (do_dbug_sleep) + my_sleep(3000000); // 3s +#endif if (options & REFRESH_HOSTS) hostname_cache_refresh(); if (thd && (options & REFRESH_STATUS))