diff --git a/include/mysql_com.h b/include/mysql_com.h index affd24a4636..c85014a662b 100644 --- a/include/mysql_com.h +++ b/include/mysql_com.h @@ -136,6 +136,7 @@ enum enum_server_command #define REFRESH_QUERY_CACHE_FREE 0x20000L /* pack query cache */ #define REFRESH_DES_KEY_FILE 0x40000L #define REFRESH_USER_RESOURCES 0x80000L +#define REFRESH_CHECKPOINT 0x100000L /* Don't do checkpoints */ #define CLIENT_LONG_PASSWORD 1 /* new more secure passwords */ #define CLIENT_FOUND_ROWS 2 /* Found instead of affected rows */ diff --git a/mysql-test/r/flush.result b/mysql-test/r/flush.result index b978304f59d..d575346a140 100644 --- a/mysql-test/r/flush.result +++ b/mysql-test/r/flush.result @@ -6,7 +6,7 @@ insert into t2 values(3); select * from t1; n 3 -flush tables with read lock; +flush tables with read lock and disable checkpoint; drop table t2; ERROR HY000: Can't execute the query because you have a conflicting read lock drop table t2; diff --git a/mysql-test/t/flush.test b/mysql-test/t/flush.test index f27d4cf2fad..b0d685e2b41 100644 --- a/mysql-test/t/flush.test +++ b/mysql-test/t/flush.test @@ -28,7 +28,7 @@ enable_query_log; connection con1; select * from t1; connection con2; -flush tables with read lock; +flush tables with read lock and disable checkpoint; --error 1223 drop table t2; connection con1; diff --git a/sql/handler.cc b/sql/handler.cc index df5acb98efe..e51ded64336 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -598,6 +598,23 @@ void ha_drop_database(char* path) } +static my_bool checkpoint_state_handlerton(THD *unused1, plugin_ref plugin, + void *disable) +{ + handlerton *hton= plugin_data(plugin, handlerton *); + if (hton->state == SHOW_OPTION_YES && hton->checkpoint_state) + hton->checkpoint_state(hton, (int) *(bool*) disable); + return FALSE; +} + + +void ha_checkpoint_state(bool disable) +{ + plugin_foreach(NULL, checkpoint_state_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, &disable); +} + + + static my_bool closecon_handlerton(THD *thd, plugin_ref plugin, void *unused) { diff --git a/sql/handler.h b/sql/handler.h index 02b96ebefb8..f674a8a56bb 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -865,6 +865,19 @@ struct handlerton int (*recover)(handlerton *hton, XID *xid_list, uint len); int (*commit_by_xid)(handlerton *hton, XID *xid); int (*rollback_by_xid)(handlerton *hton, XID *xid); + /* + "Disable or enable checkpointing internal to the storage engine. This is + used for FLUSH TABLES WITH READ LOCK AND DISABLE CHECKPOINT to ensure that + the engine will never start any recovery from a time between + FLUSH TABLES ... ; UNLOCK TABLES. + + While checkpointing is disabled, the engine should pause any background + write activity (such as tablespace checkpointing) that require consistency + between different files (such as transaction log and tablespace files) for + crash recovery to succeed. The idea is to use this to make safe + multi-volume LVM snapshot backups. + */ + int (*checkpoint_state)(handlerton *hton, bool disabled); void *(*create_cursor_read_view)(handlerton *hton, THD *thd); void (*set_cursor_read_view)(handlerton *hton, THD *thd, void *read_view); void (*close_cursor_read_view)(handlerton *hton, THD *thd, void *read_view); @@ -2629,6 +2642,7 @@ int ha_panic(enum ha_panic_function flag); void ha_close_connection(THD* thd); bool ha_flush_logs(handlerton *db_type); void ha_drop_database(char* path); +void ha_checkpoint_state(bool disable); int ha_create_table(THD *thd, const char *path, const char *db, const char *table_name, HA_CREATE_INFO *create_info, diff --git a/sql/lex.h b/sql/lex.h index 7671f23682d..057b7d1ce96 100644 --- a/sql/lex.h +++ b/sql/lex.h @@ -104,6 +104,7 @@ static SYMBOL symbols[] = { { "CHARACTER", SYM(CHAR_SYM)}, { "CHARSET", SYM(CHARSET)}, { "CHECK", SYM(CHECK_SYM)}, + { "CHECKPOINT", SYM(CHECKPOINT_SYM)}, { "CHECKSUM", SYM(CHECKSUM_SYM)}, { "CIPHER", SYM(CIPHER_SYM)}, { "CLIENT", SYM(CLIENT_SYM)}, diff --git a/sql/lock.cc b/sql/lock.cc index 740b54f9153..2dc6bc357f4 100644 --- a/sql/lock.cc +++ b/sql/lock.cc @@ -1449,7 +1449,7 @@ static void print_lock_error(int error, const char *table) ****************************************************************************/ -volatile uint global_read_lock=0; +volatile uint global_read_lock=0, global_disable_checkpoint= 0; volatile uint global_read_lock_blocks_commit=0; static volatile uint protect_against_global_read_lock=0; static volatile uint waiting_for_read_lock=0; @@ -1508,6 +1508,14 @@ void unlock_global_read_lock(THD *thd) tmp= --global_read_lock; if (thd->global_read_lock == MADE_GLOBAL_READ_LOCK_BLOCK_COMMIT) --global_read_lock_blocks_commit; + if (thd->global_disable_checkpoint) + { + thd->global_disable_checkpoint= 0; + if (!--global_disable_checkpoint) + { + ha_checkpoint_state(0); // Enable checkpoints + } + } pthread_mutex_unlock(&LOCK_global_read_lock); /* Send the signal outside the mutex to avoid a context switch */ if (!tmp) @@ -1629,6 +1637,24 @@ bool make_global_read_lock_block_commit(THD *thd) } +/** + Disable checkpoints for all handlers + This is released in unlock_global_read_lock() +*/ + +void disable_checkpoints(THD *thd) +{ + pthread_mutex_lock(&LOCK_global_read_lock); + if (!thd->global_disable_checkpoint) + { + thd->global_disable_checkpoint= 1; + if (!global_disable_checkpoint++) + ha_checkpoint_state(1); // Disable checkpoints + } + pthread_mutex_unlock(&LOCK_global_read_lock); +} + + /** Broadcast COND_refresh and COND_global_read_lock. diff --git a/sql/mysql_priv.h b/sql/mysql_priv.h index b9bc99e9747..6901da60fbd 100644 --- a/sql/mysql_priv.h +++ b/sql/mysql_priv.h @@ -2094,7 +2094,7 @@ extern bool opt_ignore_builtin_innodb; extern my_bool opt_character_set_client_handshake; extern bool volatile abort_loop, shutdown_in_progress; extern bool in_bootstrap; -extern uint volatile thread_count, thread_running, global_read_lock; +extern uint volatile thread_count, thread_running, global_read_lock, global_disable_checkpoint; extern ulong thread_created; extern uint thread_handling; extern uint connection_count, extra_connection_count; @@ -2257,6 +2257,7 @@ bool wait_if_global_read_lock(THD *thd, bool abort_on_refresh, bool is_not_commit); void start_waiting_global_read_lock(THD *thd); bool make_global_read_lock_block_commit(THD *thd); +void disable_checkpoints(THD *thd); bool set_protect_against_global_read_lock(void); void unset_protect_against_global_read_lock(void); void broadcast_refresh(void); diff --git a/sql/sql_class.cc b/sql/sql_class.cc index f8905cd5f8c..a5c06d2aa77 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -674,6 +674,7 @@ THD::THD() stmt_depends_on_first_successful_insert_id_in_prev_stmt(FALSE), examined_row_count(0), global_read_lock(0), + global_disable_checkpoint(0), is_fatal_error(0), transaction_rollback_request(0), is_fatal_sub_stmt_error(0), diff --git a/sql/sql_class.h b/sql/sql_class.h index 9ea4e37e610..904ffda5c85 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -1920,7 +1920,7 @@ public: ulong query_plan_fsort_passes; pthread_t real_id; /* For debugging */ my_thread_id thread_id; - uint tmp_table, global_read_lock; + uint tmp_table, global_read_lock, global_disable_checkpoint; uint server_status,open_options; enum enum_thread_type system_thread; uint select_number; //number of select (used for EXPLAIN) diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index 103cf72cf14..94d0436fba3 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -7058,6 +7058,8 @@ bool reload_acl_and_cache(THD *thd, ulong options, TABLE_LIST *tables, unlock_global_read_lock(thd); return 1; } + if (options & REFRESH_CHECKPOINT) + disable_checkpoints(thd); } else { diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index bf213b19bb9..27d8029cbbe 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -758,6 +758,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token CHANGED %token CHARSET %token CHAR_SYM /* SQL-2003-R */ +%token CHECKPOINT_SYM %token CHECKSUM_SYM %token CHECK_SYM /* SQL-2003-R */ %token CIPHER_SYM @@ -1336,6 +1337,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); opt_natural_language_mode opt_query_expansion opt_ev_status opt_ev_on_completion ev_on_completion opt_ev_comment ev_alter_on_schedule_completion opt_ev_rename_to opt_ev_sql_stmt + optional_flush_tables_arguments %type ulong_num real_ulong_num merge_insert_types @@ -10784,8 +10786,8 @@ flush_option: table_or_tables { Lex->type|= REFRESH_TABLES; } opt_table_list {} - | TABLES WITH READ_SYM LOCK_SYM - { Lex->type|= REFRESH_TABLES | REFRESH_READ_LOCK; } + | TABLES WITH READ_SYM LOCK_SYM optional_flush_tables_arguments + { Lex->type|= REFRESH_TABLES | REFRESH_READ_LOCK | $5; } | QUERY_SYM CACHE_SYM { Lex->type|= REFRESH_QUERY_CACHE_FREE; } | HOSTS_SYM @@ -10821,6 +10823,10 @@ opt_table_list: | table_list {} ; +optional_flush_tables_arguments: + /* empty */ {$$= 0;} + | AND_SYM DISABLE_SYM CHECKPOINT_SYM {$$= REFRESH_CHECKPOINT; } + reset: RESET_SYM { @@ -11859,6 +11865,7 @@ keyword: | CACHE_SYM {} | CHARSET {} | CHECKSUM_SYM {} + | CHECKPOINT_SYM {} | CLOSE_SYM {} | COMMENT_SYM {} | COMMIT_SYM {} diff --git a/storage/maria/ha_maria.cc b/storage/maria/ha_maria.cc index 4c7e8ecce9f..7811ccca65a 100644 --- a/storage/maria/ha_maria.cc +++ b/storage/maria/ha_maria.cc @@ -3167,6 +3167,14 @@ bool maria_flush_logs(handlerton *hton) } +int maria_checkpoint_state(handlerton *hton, bool disabled) +{ + maria_checkpoint_disabled= (my_bool) disabled; + return 0; +} + + + #define SHOW_MSG_LEN (FN_REFLEN + 20) /** @brief show status handler @@ -3355,6 +3363,7 @@ static int ha_maria_init(void *p) maria_hton->panic= maria_hton_panic; maria_hton->commit= maria_commit; maria_hton->rollback= maria_rollback; + maria_hton->checkpoint_state= maria_checkpoint_state; #ifdef MARIA_CANNOT_ROLLBACK maria_hton->commit= 0; #endif diff --git a/storage/maria/ma_checkpoint.c b/storage/maria/ma_checkpoint.c index 79a9e790e70..7522514649b 100644 --- a/storage/maria/ma_checkpoint.c +++ b/storage/maria/ma_checkpoint.c @@ -576,6 +576,12 @@ pthread_handler_t ma_checkpoint_background(void *arg) switch (sleeps % interval) { case 0: + /* If checkpoints are disabled, wait 1 second and try again */ + if (maria_checkpoint_disabled) + { + sleep_time= 1; + break; + } /* With background flushing evenly distributed over the time between two checkpoints, we should have only little flushing to do @@ -590,6 +596,7 @@ pthread_handler_t ma_checkpoint_background(void *arg) want to checkpoint every minute, hence the positive checkpoint_min_activity. */ + if (((translog_get_horizon() - log_horizon_at_last_checkpoint) + (maria_pagecache->global_cache_write - pagecache_flushes_at_last_checkpoint) * diff --git a/storage/maria/ma_static.c b/storage/maria/ma_static.c index a97f642547a..71094fb0343 100644 --- a/storage/maria/ma_static.c +++ b/storage/maria/ma_static.c @@ -39,6 +39,7 @@ my_bool maria_inited= FALSE; my_bool maria_in_ha_maria= FALSE; /* If used from ha_maria or not */ my_bool maria_recovery_changed_data= 0, maria_recovery_verbose= 0; my_bool maria_assert_if_crashed_table= 0; +my_bool maria_checkpoint_disabled= 0; pthread_mutex_t THR_LOCK_maria; #if defined(THREAD) && !defined(DONT_USE_RW_LOCKS) diff --git a/storage/maria/maria_def.h b/storage/maria/maria_def.h index 03f0641b023..eeb72c83287 100644 --- a/storage/maria/maria_def.h +++ b/storage/maria/maria_def.h @@ -820,7 +820,7 @@ extern uint maria_quick_table_bits; extern char *maria_data_root; extern uchar maria_zero_string[]; extern my_bool maria_inited, maria_in_ha_maria, maria_recovery_changed_data; -extern my_bool maria_recovery_verbose; +extern my_bool maria_recovery_verbose, maria_checkpoint_disabled; extern my_bool maria_assert_if_crashed_table; extern HASH maria_stored_state; extern int (*maria_create_trn_hook)(MARIA_HA *); diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc index 7389abc464d..14b93704748 100644 --- a/storage/xtradb/handler/ha_innodb.cc +++ b/storage/xtradb/handler/ha_innodb.cc @@ -496,6 +496,17 @@ bool innobase_show_status(handlerton *hton, THD* thd, stat_print_fn* stat_print, enum ha_stat_type stat_type); +/* Enable / disable checkpoints */ +static int innobase_checkpoint_state(handlerton *hton, bool disable) +{ + if (disable) + (void) log_disable_checkpoint(); + else + log_enable_checkpoint(); + return 0; +} + + /*****************************************************************//** Commits a transaction in an InnoDB database. */ static @@ -2065,6 +2076,7 @@ innobase_init( innobase_hton->recover=innobase_xa_recover; innobase_hton->commit_by_xid=innobase_commit_by_xid; innobase_hton->rollback_by_xid=innobase_rollback_by_xid; + innobase_hton->checkpoint_state= innobase_checkpoint_state; innobase_hton->create_cursor_read_view=innobase_create_cursor_view; innobase_hton->set_cursor_read_view=innobase_set_cursor_view; innobase_hton->close_cursor_read_view=innobase_close_cursor_view; diff --git a/storage/xtradb/include/log0log.h b/storage/xtradb/include/log0log.h index 2b4b34f2600..1bca9029648 100644 --- a/storage/xtradb/include/log0log.h +++ b/storage/xtradb/include/log0log.h @@ -249,12 +249,15 @@ log_checkpoint( /*===========*/ ibool sync, /*!< in: TRUE if synchronous operation is desired */ - ibool write_always); /*!< in: the function normally checks if the + ibool write_always, /*!< in: the function normally checks if the the new checkpoint would have a greater lsn than the previous one: if not, then no physical write is done; by setting this parameter TRUE, a physical write will always be made to log files */ + ibool safe_to_ignore);/*!< in: TRUE if checkpoint can be ignored in + the case checkpoint's are disabled */ + /****************************************************************//** Makes a checkpoint at a given lsn or later. */ UNIV_INTERN @@ -271,6 +274,18 @@ log_make_checkpoint_at( by setting this parameter TRUE, a physical write will always be made to log files */ +/****************************************************************//** +Disable checkpoints. This is used when doing a volume snapshot +to ensure that we don't get checkpoint between snapshoting two +different volumes */ +UNIV_INTERN +ibool log_disable_checkpoint(); + +/****************************************************************//** +Enable checkpoints that was disabled with log_disable_checkpoint() */ +UNIV_INTERN +void log_enable_checkpoint(); + /****************************************************************//** Makes a checkpoint at the latest lsn and writes it to first page of each data file in the database, so that we know that the file spaces contain diff --git a/storage/xtradb/log/log0log.c b/storage/xtradb/log/log0log.c index c39f60bd4b9..82c6c3da23e 100644 --- a/storage/xtradb/log/log0log.c +++ b/storage/xtradb/log/log0log.c @@ -97,6 +97,8 @@ archive */ UNIV_INTERN byte log_archive_io; #endif /* UNIV_LOG_ARCHIVE */ +UNIV_INTERN ulint log_disable_checkpoint_active= 0; + /* A margin for free space in the log buffer before a log entry is catenated */ #define LOG_BUF_WRITE_MARGIN (4 * OS_FILE_LOG_BLOCK_SIZE) @@ -174,7 +176,7 @@ log_fsp_current_free_limit_set_and_checkpoint( success = FALSE; while (!success) { - success = log_checkpoint(TRUE, TRUE); + success = log_checkpoint(TRUE, TRUE, FALSE); } } @@ -1988,12 +1990,14 @@ log_checkpoint( /*===========*/ ibool sync, /*!< in: TRUE if synchronous operation is desired */ - ibool write_always) /*!< in: the function normally checks if the + ibool write_always, /*!< in: the function normally checks if the the new checkpoint would have a greater lsn than the previous one: if not, then no physical write is done; by setting this parameter TRUE, a physical write will always be made to log files */ + ibool safe_to_ignore) /*!< in: TRUE if checkpoint can be ignored in + the case checkpoint's are disabled */ { ib_uint64_t oldest_lsn; @@ -2024,14 +2028,27 @@ log_checkpoint( mutex_enter(&(log_sys->mutex)); + /* Return if this is not a forced checkpoint and either there is no + need for a checkpoint or if checkpoints are disabled */ if (!write_always - && log_sys->last_checkpoint_lsn >= oldest_lsn) { + && (log_sys->last_checkpoint_lsn >= oldest_lsn || + (safe_to_ignore && log_disable_checkpoint_active))) + { mutex_exit(&(log_sys->mutex)); return(TRUE); } + if (log_disable_checkpoint_active) + { + /* Wait until we are allowed to do a checkpoint */ + mutex_exit(&(log_sys->mutex)); + rw_lock_s_lock(&(log_sys->checkpoint_lock)); + rw_lock_s_unlock(&(log_sys->checkpoint_lock)); + mutex_enter(&(log_sys->mutex)); + } + ut_ad(log_sys->flushed_to_disk_lsn >= oldest_lsn); if (log_sys->n_pending_checkpoint_writes > 0) { @@ -2092,7 +2109,73 @@ log_make_checkpoint_at( while (!log_preflush_pool_modified_pages(lsn, TRUE)); - while (!log_checkpoint(TRUE, write_always)); + while (!log_checkpoint(TRUE, write_always, FALSE)); +} + +/****************************************************************//** +Disable checkpoints. This is used when doing a volumne snapshot +to ensure that we don't get checkpoint between snapshoting two +different volumes */ + +UNIV_INTERN +ibool log_disable_checkpoint() +{ + mutex_enter(&(log_sys->mutex)); + + /* + Wait if a checkpoint write is running. + This is the same code that is used in log_checkpoint() to ensure + that two checkpoints are not happening at the same time. + */ + while (log_sys->n_pending_checkpoint_writes > 0) + { + mutex_exit(&(log_sys->mutex)); + rw_lock_s_lock(&(log_sys->checkpoint_lock)); + rw_lock_s_unlock(&(log_sys->checkpoint_lock)); + mutex_enter(&(log_sys->mutex)); + } + /* + The following should never be true; It's is here just in case of + wrong usage of this function. (Better safe than sorry). + */ + + if (log_disable_checkpoint_active) + { + mutex_exit(&(log_sys->mutex)); + return 1; /* Already disabled */ + } + /* + Take the checkpoint lock to ensure we will not get any checkpoints + running + */ + rw_lock_x_lock_gen(&(log_sys->checkpoint_lock), LOG_CHECKPOINT); + log_disable_checkpoint_active= 1; + mutex_exit(&(log_sys->mutex)); + return 0; +} + + +/****************************************************************//** +Enable checkpoints that was disabled with log_disable_checkpoint() +This lock is called by MariaDB and only when we have done call earlier +to log_disable_checkpoint(). + +Note: We can't take a log->mutex lock here running log_checkpoint() +which is waiting (log_sys->checkpoint_lock may already have it. +This is however safe to do without a mutex as log_disable_checkpoint +is protected by log_sys->checkpoint_lock. +*/ + +UNIV_INTERN +void log_enable_checkpoint() +{ + ut_ad(log_disable_checkpoint_active); + /* Test variable, mostly to protect against wrong usage */ + if (log_disable_checkpoint_active) + { + log_disable_checkpoint_active= 0; + rw_lock_x_unlock_gen(&(log_sys->checkpoint_lock), LOG_CHECKPOINT); + } } /****************************************************************//** @@ -2189,7 +2272,7 @@ loop: } if (do_checkpoint) { - log_checkpoint(checkpoint_sync, FALSE); + log_checkpoint(checkpoint_sync, FALSE, FALSE); if (checkpoint_sync) { @@ -3099,6 +3182,10 @@ logs_empty_and_mark_files_at_shutdown(void) ut_print_timestamp(stderr); fprintf(stderr, " InnoDB: Starting shutdown...\n"); } + + /* Enable checkpoints if someone had turned them off */ + log_enable_checkpoint(); + /* Wait until the master thread and all other operations are idle: our algorithm only works if the server is idle at shutdown */ diff --git a/storage/xtradb/srv/srv0srv.c b/storage/xtradb/srv/srv0srv.c index 4565cac41b9..1cd44b451eb 100644 --- a/storage/xtradb/srv/srv0srv.c +++ b/storage/xtradb/srv/srv0srv.c @@ -3232,7 +3232,7 @@ retry_flush_batch: /* Make a new checkpoint about once in 10 seconds */ - log_checkpoint(TRUE, FALSE); + log_checkpoint(TRUE, FALSE, TRUE); srv_main_thread_op_info = "reserving kernel mutex"; @@ -3353,7 +3353,7 @@ flush_loop: srv_main_thread_op_info = "making checkpoint"; - log_checkpoint(TRUE, FALSE); + log_checkpoint(TRUE, FALSE, TRUE); if (buf_get_modified_ratio_pct() > srv_max_buf_pool_modified_pct) {