diff --git a/innobase/include/srv0srv.h b/innobase/include/srv0srv.h index 2050e5b45c0..ecd57bae47c 100644 --- a/innobase/include/srv0srv.h +++ b/innobase/include/srv0srv.h @@ -100,7 +100,10 @@ extern ulint srv_max_n_threads; extern lint srv_conc_n_threads; extern ibool srv_fast_shutdown; - +extern ibool srv_very_fast_shutdown; /* if this TRUE, do not flush the + buffer pool to data files at the + shutdown; we effectively 'crash' + InnoDB */ extern ibool srv_innodb_status; extern ibool srv_use_doublewrite_buf; diff --git a/innobase/log/log0log.c b/innobase/log/log0log.c index b6357fa2c76..e08adb013b5 100644 --- a/innobase/log/log0log.c +++ b/innobase/log/log0log.c @@ -3048,13 +3048,25 @@ loop: #ifdef UNIV_LOG_ARCHIVE log_archive_all(); #endif /* UNIV_LOG_ARCHIVE */ - log_make_checkpoint_at(ut_dulint_max, TRUE); + + if (!srv_very_fast_shutdown) { + /* In a 'very fast' shutdown we do not flush the buffer pool: + it is essentially a 'crash' of the InnoDB server. */ + + log_make_checkpoint_at(ut_dulint_max, TRUE); + } else { + /* Make sure that the log is all flushed to disk, so that + we can recover all committed transactions in a crash + recovery */ + log_buffer_flush_to_disk(); + } mutex_enter(&(log_sys->mutex)); lsn = log_sys->lsn; - if (ut_dulint_cmp(lsn, log_sys->last_checkpoint_lsn) != 0 + if ((ut_dulint_cmp(lsn, log_sys->last_checkpoint_lsn) != 0 + && !srv_very_fast_shutdown) #ifdef UNIV_LOG_ARCHIVE || (srv_log_archive_on && ut_dulint_cmp(lsn, @@ -3098,11 +3110,12 @@ loop: fil_flush_file_spaces(FIL_TABLESPACE); fil_flush_file_spaces(FIL_LOG); - /* The next fil_write_... will pass the buffer pool: therefore - it is essential that the buffer pool has been completely flushed - to disk! */ + /* The call fil_write_flushed_lsn_to_data_files() will pass the buffer + pool: therefore it is essential that the buffer pool has been + completely flushed to disk! (We do not call fil_write... if the + 'very fast' shutdown is enabled.) */ - if (!buf_all_freed()) { + if (!srv_very_fast_shutdown && !buf_all_freed()) { goto loop; } @@ -3125,7 +3138,7 @@ loop: /* Make some checks that the server really is quiet */ ut_a(srv_n_threads_active[SRV_MASTER] == 0); - ut_a(buf_all_freed()); + ut_a(srv_very_fast_shutdown || buf_all_freed()); ut_a(0 == ut_dulint_cmp(lsn, log_sys->lsn)); if (ut_dulint_cmp(lsn, srv_start_lsn) < 0) { @@ -3140,7 +3153,15 @@ loop: srv_shutdown_lsn = lsn; - fil_write_flushed_lsn_to_data_files(lsn, arch_log_no); + if (!srv_very_fast_shutdown) { + /* In a 'very fast' shutdown we do not flush the buffer pool: + it is essentially a 'crash' of the InnoDB server. Then we must + not write the lsn stamps to the data files, since at a + startup InnoDB deduces from the stamps if the previous + shutdown was clean. */ + + fil_write_flushed_lsn_to_data_files(lsn, arch_log_no); + } fil_flush_file_spaces(FIL_TABLESPACE); @@ -3148,7 +3169,7 @@ loop: /* Make some checks that the server really is quiet */ ut_a(srv_n_threads_active[SRV_MASTER] == 0); - ut_a(buf_all_freed()); + ut_a(srv_very_fast_shutdown || buf_all_freed()); ut_a(0 == ut_dulint_cmp(lsn, log_sys->lsn)); } diff --git a/innobase/srv/srv0srv.c b/innobase/srv/srv0srv.c index adffe06ef0a..d913d77fdfc 100644 --- a/innobase/srv/srv0srv.c +++ b/innobase/srv/srv0srv.c @@ -249,6 +249,10 @@ merge to completion before shutdown */ ibool srv_fast_shutdown = FALSE; +ibool srv_very_fast_shutdown = FALSE; /* if this TRUE, do not flush the + buffer pool to data files at the + shutdown; we effectively 'crash' + InnoDB */ /* Generate a innodb_status. file */ ibool srv_innodb_status = FALSE; @@ -2178,7 +2182,8 @@ loop: /*****************************************************************/ background_loop: /* ---- In this loop we run background operations when the server - is quiet from user activity */ + is quiet from user activity. Also in the case of a shutdown, we + loop here, flushing the buffer pool to the data files. */ /* The server has been quiet for a while: start running background operations */ @@ -2251,7 +2256,16 @@ background_loop: flush_loop: srv_main_thread_op_info = "flushing buffer pool pages"; - n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100, ut_dulint_max); + + if (!srv_very_fast_shutdown) { + n_pages_flushed = + buf_flush_batch(BUF_FLUSH_LIST, 100, ut_dulint_max); + } else { + /* In a 'very fast' shutdown we do not flush the buffer pool + to data files: we set n_pages_flushed to 0 artificially. */ + + n_pages_flushed = 0; + } srv_main_thread_op_info = "reserving kernel mutex"; @@ -2304,7 +2318,10 @@ flush_loop: /* If we are doing a fast shutdown (= the default) we do not do purge or insert buffer merge. But we - flush the buffer pool completely to disk. */ + flush the buffer pool completely to disk. + In a 'very fast' shutdown we do not flush the buffer + pool to data files: we have set n_pages_flushed to + 0 artificially. */ goto background_loop; } diff --git a/sql/ha_innodb.cc b/sql/ha_innodb.cc index f10731628da..d748c005d02 100644 --- a/sql/ha_innodb.cc +++ b/sql/ha_innodb.cc @@ -20,6 +20,9 @@ NOTE: You can only use noninlined InnoDB functions in this file, because we have disables the InnoDB inlining in this file. */ /* TODO list for the InnoDB handler in 4.1: + - Remove the flag innodb_active_trans from thd and replace it with a + function call innodb_active_trans(thd), which looks at the InnoDB + trx struct state field - Find out what kind of problems the OS X case-insensitivity causes to table and database names; should we 'normalize' the names like we do in Windows? @@ -114,6 +117,9 @@ uint innobase_flush_log_at_trx_commit = 1; my_bool innobase_log_archive = FALSE;/* unused */ my_bool innobase_use_native_aio = FALSE; my_bool innobase_fast_shutdown = TRUE; +my_bool innobase_very_fast_shutdown = FALSE; /* this can be set to + 1 just prior calling + innobase_end() */ my_bool innobase_file_per_table = FALSE; my_bool innobase_locks_unsafe_for_binlog = FALSE; my_bool innobase_create_status_file = FALSE; @@ -799,6 +805,10 @@ ha_innobase::init_table_handle_for_HANDLER(void) trx_assign_read_view(prebuilt->trx); + /* Set the MySQL flag to mark that there is an active transaction */ + + current_thd->transaction.all.innodb_active_trans = 1; + /* We did the necessary inits in this function, no need to repeat them in row_search_for_mysql */ @@ -1059,6 +1069,15 @@ innobase_end(void) #endif if (innodb_inited) { + if (innobase_very_fast_shutdown) { + srv_very_fast_shutdown = TRUE; + fprintf(stderr, +"InnoDB: MySQL has requested a very fast shutdown without flushing\n" +"InnoDB: the InnoDB buffer pool to data files. At the next mysqld startup\n" +"InnoDB: InnoDB will do a crash recovery!\n"); + + } + innodb_inited= 0; if (innobase_shutdown_for_mysql() != DB_SUCCESS) err= 1; @@ -1115,6 +1134,48 @@ innobase_commit_low( trx_commit_for_mysql(trx); } +/********************************************************************* +Creates an InnoDB transaction struct for the thd if it does not yet have one. +Starts a new InnoDB transaction if a transaction is not yet started. And +assigns a new snapshot for a consistent read if the transaction does not yet +have one. */ + +int +innobase_start_trx_and_assign_read_view( +/*====================================*/ + /* out: 0 */ + THD* thd) /* in: MySQL thread handle of the user for whom + the transaction should be committed */ +{ + trx_t* trx; + + DBUG_ENTER("innobase_start_trx_and_assign_read_view"); + + /* Create a new trx struct for thd, if it does not yet have one */ + + trx = check_trx_exists(thd); + + /* This is just to play safe: release a possible FIFO ticket and + search latch. Since we will reserve the kernel mutex, we have to + release the search system latch first to obey the latching order. */ + + innobase_release_stat_resources(trx); + + /* If the transaction is not started yet, start it */ + + trx_start_if_not_started_noninline(trx); + + /* Assign a read view if the transaction does not have it yet */ + + trx_assign_read_view(trx); + + /* Set the MySQL flag to mark that there is an active transaction */ + + current_thd->transaction.all.innodb_active_trans = 1; + + DBUG_RETURN(0); +} + /********************************************************************* Commits a transaction in an InnoDB database or marks an SQL statement ended. */ @@ -1146,8 +1207,10 @@ innobase_commit( 1. ::external_lock(), 2. ::start_stmt(), - 3. innobase_query_caching_of_table_permitted(), and + 3. innobase_query_caching_of_table_permitted(), 4. innobase_savepoint(), + 5. ::init_table_handle_for_HANDLER(), + 6. innobase_start_trx_and_assign_read_view() and it is only set to 0 in a commit or a rollback. If it is 0 we know there cannot be resources to be freed and we could return immediately. diff --git a/sql/ha_innodb.h b/sql/ha_innodb.h index d72f5a58fe4..7f7b000a100 100644 --- a/sql/ha_innodb.h +++ b/sql/ha_innodb.h @@ -191,6 +191,11 @@ extern my_bool innobase_log_archive, innobase_use_native_aio, innobase_fast_shutdown, innobase_file_per_table, innobase_locks_unsafe_for_binlog, innobase_create_status_file; +extern my_bool innobase_very_fast_shutdown; /* set this to 1 just before + calling innobase_end() if you want + InnoDB to shut down without + flushing the buffer pool: this + is equivalent to a 'crash' */ extern "C" { extern ulong srv_max_buf_pool_modified_pct; extern ulong srv_auto_extend_increment; @@ -231,3 +236,4 @@ void innobase_release_temporary_latches(void* innobase_tid); void innobase_store_binlog_offset_and_flush_log(char *binlog_name,longlong offset); +int innobase_start_trx_and_assign_read_view(THD* thd);