diff --git a/mysql-test/suite/binlog/r/binlog_mdev342.result b/mysql-test/suite/binlog/r/binlog_mdev342.result new file mode 100644 index 00000000000..2346ccc2e3d --- /dev/null +++ b/mysql-test/suite/binlog/r/binlog_mdev342.result @@ -0,0 +1,29 @@ +SET GLOBAL max_binlog_size= 4096; +SET GLOBAL innodb_flush_log_at_trx_commit= 1; +CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb; +SET DEBUG_SYNC= "binlog_open_before_update_index SIGNAL con1_ready WAIT_FOR con1_cont"; +SET SESSION debug_dbug="+d,crash_create_critical_before_update_index"; +INSERT INTO t1 VALUES (1, REPEAT("x", 4100)); +SET DEBUG_SYNC= "now WAIT_FOR con1_ready"; +SET DEBUG_SYNC= "ha_commit_trans_after_prepare SIGNAL con2_ready"; +INSERT INTO t1 VALUES (2, NULL); +SET DEBUG_SYNC= "now WAIT_FOR con2_ready"; +SET DEBUG_SYNC= "now SIGNAL con1_cont"; +Got one of the listed errors +SELECT a FROM t1 ORDER BY a; +a +1 +show binary logs; +Log_name File_size +master-bin.000001 # +master-bin.000002 # +show binlog events in 'master-bin.000001' from ; +Log_name Pos Event_type Server_id End_log_pos Info +master-bin.000001 # Format_desc # # SERVER_VERSION, BINLOG_VERSION +master-bin.000001 # Query # # use `test`; CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb +master-bin.000001 # Query # # BEGIN +master-bin.000001 # Table_map # # table_id: # (test.t1) +master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F +master-bin.000001 # Xid # # COMMIT /* XID */ +master-bin.000001 # Rotate # # master-bin.000002;pos= +DROP TABLE t1; diff --git a/mysql-test/suite/binlog/t/binlog_mdev342-master.opt b/mysql-test/suite/binlog/t/binlog_mdev342-master.opt new file mode 100644 index 00000000000..425fda95086 --- /dev/null +++ b/mysql-test/suite/binlog/t/binlog_mdev342-master.opt @@ -0,0 +1 @@ +--skip-stack-trace --skip-core-file diff --git a/mysql-test/suite/binlog/t/binlog_mdev342.test b/mysql-test/suite/binlog/t/binlog_mdev342.test new file mode 100644 index 00000000000..cae915b302b --- /dev/null +++ b/mysql-test/suite/binlog/t/binlog_mdev342.test @@ -0,0 +1,62 @@ +--source include/have_innodb.inc +--source include/have_debug.inc +--source include/have_debug_sync.inc +--source include/have_binlog_format_row.inc +# Valgrind does not work well with test that crashes the server +--source include/not_valgrind.inc + +# (We do not need to restore these settings, as we crash the server). +SET GLOBAL max_binlog_size= 4096; +SET GLOBAL innodb_flush_log_at_trx_commit= 1; + +CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb; + +# One connection does an insert that causes a binlog rotate. +# The rotate is paused after writing new file but before updating index. +connect(con1,localhost,root,,); +SET DEBUG_SYNC= "binlog_open_before_update_index SIGNAL con1_ready WAIT_FOR con1_cont"; +SET SESSION debug_dbug="+d,crash_create_critical_before_update_index"; +send INSERT INTO t1 VALUES (1, REPEAT("x", 4100)); + +connection default; +SET DEBUG_SYNC= "now WAIT_FOR con1_ready"; + +# Another connection creates a prepared transaction. +# After the transaction is prepared, it will hang waiting for LOCK_log. +connect(con2,localhost,root,,); +SET DEBUG_SYNC= "ha_commit_trans_after_prepare SIGNAL con2_ready"; +send INSERT INTO t1 VALUES (2, NULL); + +connection default; +SET DEBUG_SYNC= "now WAIT_FOR con2_ready"; + +# Now crash the server in con1, with old binlog closed, new binlog not yet in +# index, and one transaction in prepared-but-not-committed state. +--write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +wait-binlog_mdev342.test +EOF +SET DEBUG_SYNC= "now SIGNAL con1_cont"; +connection con1; +--error 2006,2013 +reap; + +--remove_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +restart-binlog_mdev342.test +EOF + +connection default; +--enable_reconnect +--source include/wait_until_connected_again.inc + +# Check that all transactions are recovered. +SELECT a FROM t1 ORDER BY a; + +--source include/show_binary_logs.inc +--let $binlog_file= master-bin.000001 +--let $binlog_start= 4 +--source include/show_binlog_events.inc + +# Cleanup +connection default; +DROP TABLE t1; diff --git a/sql/handler.cc b/sql/handler.cc index 2c9cc388006..b7d545a75dc 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -1276,6 +1276,7 @@ int ha_commit_trans(THD *thd, bool all) need_prepare_ordered|= (ht->prepare_ordered != NULL); need_commit_ordered|= (ht->commit_ordered != NULL); } + DEBUG_SYNC(thd, "ha_commit_trans_after_prepare"); DBUG_EXECUTE_IF("crash_commit_after_prepare", DBUG_SUICIDE();); if (!is_real_trans) diff --git a/sql/log.cc b/sql/log.cc index e80a2aaec51..801d945f3f0 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -2464,9 +2464,10 @@ void MYSQL_LOG::init_pthread_objects() SYNOPSIS close() - exiting Bitmask. For the slow and general logs the only used bit is - LOG_CLOSE_TO_BE_OPENED. This is used if we intend to call - open at once after close. + exiting Bitmask. LOG_CLOSE_TO_BE_OPENED is used if we intend to call + open at once after close. LOG_CLOSE_DELAYED_CLOSE is used for + binlog rotation, to delay actual close of the old file until + we have successfully created the new file. NOTES One can do an open on the object at once after doing a close. @@ -2487,7 +2488,8 @@ void MYSQL_LOG::close(uint exiting) sql_print_error(ER(ER_ERROR_ON_WRITE), name, errno); } - if (mysql_file_close(log_file.file, MYF(MY_WME)) && ! write_error) + if (!(exiting & LOG_CLOSE_DELAYED_CLOSE) && + mysql_file_close(log_file.file, MYF(MY_WME)) && ! write_error) { write_error= 1; sql_print_error(ER(ER_ERROR_ON_WRITE), name, errno); @@ -3189,6 +3191,10 @@ bool MYSQL_BIN_LOG::open(const char *log_name, if (write_file_name_to_index_file) { #ifdef HAVE_REPLICATION +#ifdef ENABLED_DEBUG_SYNC + if (current_thd) + DEBUG_SYNC(current_thd, "binlog_open_before_update_index"); +#endif DBUG_EXECUTE_IF("crash_create_critical_before_update_index", DBUG_SUICIDE();); #endif @@ -4297,6 +4303,10 @@ int MYSQL_BIN_LOG::new_file_impl(bool need_lock) { int error= 0, close_on_error= FALSE; char new_name[FN_REFLEN], *new_name_ptr, *old_name, *file_to_open; + uint close_flag; + bool delay_close= false; + File old_file; + LINT_INIT(old_file); DBUG_ENTER("MYSQL_BIN_LOG::new_file_impl"); if (!is_open()) @@ -4380,7 +4390,20 @@ int MYSQL_BIN_LOG::new_file_impl(bool need_lock) } old_name=name; name=0; // Don't free name - close(LOG_CLOSE_TO_BE_OPENED | LOG_CLOSE_INDEX); + close_flag= LOG_CLOSE_TO_BE_OPENED | LOG_CLOSE_INDEX; + if (!is_relay_log) + { + /* + We need to keep the old binlog file open (and marked as in-use) until + the new one is fully created and synced to disk and index. Otherwise we + leave a window where if we crash, there is no binlog file marked as + crashed for server restart to detect the need for recovery. + */ + old_file= log_file.file; + close_flag|= LOG_CLOSE_DELAYED_CLOSE; + delay_close= true; + } + close(close_flag); if (log_type == LOG_BIN && checksum_alg_reset != BINLOG_CHECKSUM_ALG_UNDEF) { DBUG_ASSERT(!is_relay_log); @@ -4423,6 +4446,12 @@ int MYSQL_BIN_LOG::new_file_impl(bool need_lock) end: + if (delay_close) + { + clear_inuse_flag_when_closing(old_file); + mysql_file_close(old_file, MYF(MY_WME)); + } + if (error && close_on_error /* rotate or reopen failed */) { /* @@ -6275,6 +6304,8 @@ int MYSQL_BIN_LOG::wait_for_update_bin_log(THD* thd, - LOG_CLOSE_TO_BE_OPENED : if we intend to call open at once after close. - LOG_CLOSE_STOP_EVENT : write a 'stop' event to the log + - LOG_CLOSE_DELAYED_CLOSE : do not yet close the file and clear the + LOG_EVENT_BINLOG_IN_USE_F flag @note One can do an open on the object at once after doing a close. @@ -6304,12 +6335,11 @@ void MYSQL_BIN_LOG::close(uint exiting) #endif /* HAVE_REPLICATION */ /* don't pwrite in a file opened with O_APPEND - it doesn't work */ - if (log_file.type == WRITE_CACHE && log_type == LOG_BIN) + if (log_file.type == WRITE_CACHE && log_type == LOG_BIN + && !(exiting & LOG_CLOSE_DELAYED_CLOSE)) { - my_off_t offset= BIN_LOG_HEADER_SIZE + FLAGS_OFFSET; my_off_t org_position= mysql_file_tell(log_file.file, MYF(0)); - uchar flags= 0; // clearing LOG_EVENT_BINLOG_IN_USE_F - mysql_file_pwrite(log_file.file, &flags, 1, offset, MYF(0)); + clear_inuse_flag_when_closing(log_file.file); /* Restore position so that anything we have in the IO_cache is written to the correct position. @@ -6344,6 +6374,18 @@ void MYSQL_BIN_LOG::close(uint exiting) } +/* + Clear the LOG_EVENT_BINLOG_IN_USE_F; this marks the binlog file as cleanly + closed and not needing crash recovery. +*/ +void MYSQL_BIN_LOG::clear_inuse_flag_when_closing(File file) +{ + my_off_t offset= BIN_LOG_HEADER_SIZE + FLAGS_OFFSET; + uchar flags= 0; // clearing LOG_EVENT_BINLOG_IN_USE_F + mysql_file_pwrite(file, &flags, 1, offset, MYF(0)); +} + + void MYSQL_BIN_LOG::set_max_size(ulong max_size_arg) { /* diff --git a/sql/log.h b/sql/log.h index 3ac7091a4e6..b350f3882fa 100644 --- a/sql/log.h +++ b/sql/log.h @@ -202,6 +202,7 @@ extern TC_LOG_DUMMY tc_log_dummy; #define LOG_CLOSE_INDEX 1 #define LOG_CLOSE_TO_BE_OPENED 2 #define LOG_CLOSE_STOP_EVENT 4 +#define LOG_CLOSE_DELAYED_CLOSE 8 /* Maximum unique log filename extension. @@ -665,6 +666,7 @@ public: bool need_mutex); bool reset_logs(THD* thd); void close(uint exiting); + void clear_inuse_flag_when_closing(File file); // iterating through the log index file int find_log_pos(LOG_INFO* linfo, const char* log_name,