From ffa7e0fabe50ceb00d2d64f4806fb87f9f607b3a Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 18 Oct 2010 11:25:06 +0200 Subject: [PATCH] MWL#116: Efficient group commit: Fix bug that binlog pos stored by XtraDB during commit was wrong when more than one commit in group. Now the actual binlog position for each commit is stored in THD, and XtraDB can fetch the correct value from within commit_ordered() or commit(). mysql-test/r/group_commit_binlog_pos.result: Test case for XtraDB binlog position. mysql-test/t/group_commit_binlog_pos-master.opt: Test case for XtraDB binlog position. mysql-test/t/group_commit_binlog_pos.test: Test case for XtraDB binlog position. sql/log.cc: Save binlog position corresponding to commit in THD, and make accessible to storage engine. sql/sql_parse.cc: Add generic crash point for use in test cases. storage/xtradb/handler/ha_innodb.cc: Update to use new method of getting current binlog position that works with group commit. storage/xtradb/handler/ha_innodb.h: Update to use new method of getting current binlog position that works with group commit. --- mysql-test/r/group_commit_binlog_pos.result | 35 ++++++++ .../t/group_commit_binlog_pos-master.opt | 1 + mysql-test/t/group_commit_binlog_pos.test | 83 +++++++++++++++++++ sql/log.cc | 41 ++++++++- sql/sql_parse.cc | 4 + storage/xtradb/handler/ha_innodb.cc | 21 ++--- storage/xtradb/handler/ha_innodb.h | 15 ++-- 7 files changed, 175 insertions(+), 25 deletions(-) create mode 100644 mysql-test/r/group_commit_binlog_pos.result create mode 100644 mysql-test/t/group_commit_binlog_pos-master.opt create mode 100644 mysql-test/t/group_commit_binlog_pos.test diff --git a/mysql-test/r/group_commit_binlog_pos.result b/mysql-test/r/group_commit_binlog_pos.result new file mode 100644 index 00000000000..0d7c23cbbbd --- /dev/null +++ b/mysql-test/r/group_commit_binlog_pos.result @@ -0,0 +1,35 @@ +CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb; +INSERT INTO t1 VALUES (0); +SET DEBUG_SYNC= "commit_after_get_LOCK_group_commit SIGNAL con1_waiting WAIT_FOR con3_queued"; +SET DEBUG_SYNC= "commit_loop_entry_commit_ordered SIGNAL con1_loop WAIT_FOR con1_loop_cont EXECUTE 3"; +INSERT INTO t1 VALUES (1); +SET DEBUG_SYNC= "now WAIT_FOR con1_waiting"; +SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL con2_queued"; +INSERT INTO t1 VALUES (2); +SET DEBUG_SYNC= "now WAIT_FOR con2_queued"; +SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL con3_queued"; +INSERT INTO t1 VALUES (3); +SET DEBUG_SYNC= "now WAIT_FOR con1_loop"; +SET DEBUG_SYNC= "now SIGNAL con1_loop_cont"; +SET DEBUG_SYNC= "now WAIT_FOR con1_loop"; +SET DEBUG_SYNC= "now SIGNAL con1_loop_cont"; +SET DEBUG_SYNC= "now WAIT_FOR con1_loop"; +SELECT * FROM t1 ORDER BY a; +a +0 +1 +2 +SET SESSION debug="+d,crash_dispatch_command_before"; +SELECT 1; +ERROR HY000: Lost connection to MySQL server during query +ERROR HY000: Lost connection to MySQL server during query +ERROR HY000: Lost connection to MySQL server during query +SELECT * FROM t1 ORDER BY a; +a +0 +1 +2 +3 +InnoDB: Last MySQL binlog file position 0 767, file name ./master-bin.000001 +SET DEBUG_SYNC= 'RESET'; +DROP TABLE t1; diff --git a/mysql-test/t/group_commit_binlog_pos-master.opt b/mysql-test/t/group_commit_binlog_pos-master.opt new file mode 100644 index 00000000000..425fda95086 --- /dev/null +++ b/mysql-test/t/group_commit_binlog_pos-master.opt @@ -0,0 +1 @@ +--skip-stack-trace --skip-core-file diff --git a/mysql-test/t/group_commit_binlog_pos.test b/mysql-test/t/group_commit_binlog_pos.test new file mode 100644 index 00000000000..ad1a0e4b508 --- /dev/null +++ b/mysql-test/t/group_commit_binlog_pos.test @@ -0,0 +1,83 @@ +--source include/have_debug_sync.inc +--source include/have_innodb.inc +--source include/have_log_bin.inc +# Need DBUG to crash the server intentionally +--source include/have_debug.inc +# Don't test this under valgrind, memory leaks will occur as we crash +--source include/not_valgrind.inc + +# XtraDB stores the binlog position corresponding to the last commit, and +# prints it during crash recovery. +# Test that we get the correct position when we group commit several +# transactions together. + +CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb; +INSERT INTO t1 VALUES (0); + +connect(con1,localhost,root,,); +connect(con2,localhost,root,,); +connect(con3,localhost,root,,); + +# Queue up three commits for group commit. + +connection con1; +SET DEBUG_SYNC= "commit_after_get_LOCK_group_commit SIGNAL con1_waiting WAIT_FOR con3_queued"; +SET DEBUG_SYNC= "commit_loop_entry_commit_ordered SIGNAL con1_loop WAIT_FOR con1_loop_cont EXECUTE 3"; +send INSERT INTO t1 VALUES (1); + +connection con2; +SET DEBUG_SYNC= "now WAIT_FOR con1_waiting"; +SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL con2_queued"; +send INSERT INTO t1 VALUES (2); + +connection con3; +SET DEBUG_SYNC= "now WAIT_FOR con2_queued"; +SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL con3_queued"; +send INSERT INTO t1 VALUES (3); + +connection default; +SET DEBUG_SYNC= "now WAIT_FOR con1_loop"; +# At this point, no transactions are committed. +SET DEBUG_SYNC= "now SIGNAL con1_loop_cont"; +SET DEBUG_SYNC= "now WAIT_FOR con1_loop"; +# At this point, 1 transaction is committed. +SET DEBUG_SYNC= "now SIGNAL con1_loop_cont"; +SET DEBUG_SYNC= "now WAIT_FOR con1_loop"; + +# At this point, 2 transactions are committed. +SELECT * FROM t1 ORDER BY a; + +connection con2; +reap; + +# Now crash the server with 1+2 in-memory committed, 3 only prepared. +connection default; +system echo wait-group_commit_binlog_pos.test >> $MYSQLTEST_VARDIR/tmp/mysqld.1.expect; +SET SESSION debug="+d,crash_dispatch_command_before"; +--error 2013 +SELECT 1; + +connection con1; +--error 2013 +reap; +connection con3; +--error 2013 +reap; + +system echo restart-group_commit_binlog_pos.test >> $MYSQLTEST_VARDIR/tmp/mysqld.1.expect; + +connection default; +--enable_reconnect +--source include/wait_until_connected_again.inc + +# Crash recovery should recover all three transactions. +SELECT * FROM t1 ORDER BY a; + +# Check that the binlog position reported by InnoDB is the correct one +# for the end of the second transaction (as can be checked with +# mysqlbinlog). +let $MYSQLD_DATADIR= `SELECT @@datadir`; +--exec grep 'InnoDB: Last MySQL binlog file position' $MYSQLD_DATADIR/../../log/mysqld.1.err | tail -1 + +SET DEBUG_SYNC= 'RESET'; +DROP TABLE t1; diff --git a/sql/log.cc b/sql/log.cc index 4e489662079..d99f04d4425 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -155,7 +155,7 @@ class binlog_trx_data { public: binlog_trx_data() : at_least_one_stmt_committed(0), incident(FALSE), m_pending(0), - before_stmt_pos(MY_OFF_T_UNDEF), using_xa(0) + before_stmt_pos(MY_OFF_T_UNDEF), using_xa(0), commit_bin_log_file_pos(0) { trans_log.end_of_file= max_binlog_cache_size; (void) my_pthread_mutex_init(&LOCK_group_commit, MY_MUTEX_INIT_SLOW, @@ -218,6 +218,7 @@ public: incident= FALSE; trans_log.end_of_file= max_binlog_cache_size; using_xa= FALSE; + commit_bin_log_file_pos= 0; DBUG_ASSERT(empty()); } @@ -297,6 +298,11 @@ public: /* Mutex and condition for wakeup after group commit. */ pthread_mutex_t LOCK_group_commit; pthread_cond_t COND_group_commit; + /* + Binlog position after current commit, available to storage engines during + commit() and commit_ordered(). + */ + ulonglong commit_bin_log_file_pos; }; handlerton *binlog_hton; @@ -5170,6 +5176,8 @@ MYSQL_BIN_LOG::trx_group_commit_leader(TC_group_commit_entry *first) write_count++; } + current->commit_bin_log_file_pos= + log_file.pos_in_file + (log_file.write_pos - log_file.write_buffer); if (current->end_event->get_type_code() == XID_EVENT) xid_count++; } @@ -6005,6 +6013,7 @@ int TC_LOG_group_commit::log_and_order(THD *thd, my_xid xid, bool all, ++num_group_commits; do { + DEBUG_SYNC(thd, "commit_loop_entry_commit_ordered"); ++num_commits; if (!current->xid_error) run_commit_ordered(current->thd, current->all); @@ -6813,6 +6822,36 @@ ulonglong mysql_bin_log_file_pos(void) { return (ulonglong) mysql_bin_log.get_log_file()->pos_in_file; } +/* + Get the current position of the MySQL binlog for transaction currently being + committed. + + This is valid to call from within storage engine commit_ordered() and + commit() methods only. + + Since it stores the position inside THD, it is safe to call without any + locking. + + Note that currently the binlog file name is not stored inside THD, but this + is still safe as it can only change when the log is rotated, and we never + rotate the binlog while commits are pending inside storage engines. +*/ +void +mysql_bin_log_commit_pos(THD *thd, ulonglong *out_pos, const char **out_file) +{ + binlog_trx_data *const trx_data= + (binlog_trx_data*) thd_get_ha_data(thd, binlog_hton); + if (trx_data) + { + *out_pos= trx_data->commit_bin_log_file_pos; + *out_file= mysql_bin_log.get_log_fname(); + } + else + { + *out_pos= NULL; + *out_file= NULL; + } +} #endif /* INNODB_COMPATIBILITY_HOOKS */ diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index caec049c244..8dee0155f2b 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -999,6 +999,10 @@ bool dispatch_command(enum enum_server_command command, THD *thd, DBUG_ENTER("dispatch_command"); DBUG_PRINT("info", ("command: %d", command)); + DBUG_EXECUTE_IF("crash_dispatch_command_before", + { DBUG_PRINT("crash_dispatch_command_before", ("now")); + DBUG_ABORT(); }); + thd->command=command; /* Commands which always take a long time are logged into diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc index d9abb5ae032..b05670e6c6e 100644 --- a/storage/xtradb/handler/ha_innodb.cc +++ b/storage/xtradb/handler/ha_innodb.cc @@ -2716,8 +2716,10 @@ static void innobase_commit_ordered_2( /*============*/ - trx_t* trx) /*!< in: Innodb transaction */ + trx_t* trx, /*!< in: Innodb transaction */ + THD* thd) /*!< in: MySQL thread handle */ { + ulonglong tmp_pos; DBUG_ENTER("innobase_commit_ordered"); /* We need current binlog position for ibbackup to work. @@ -2741,17 +2743,8 @@ retry: } } - /* The following calls to read the MySQL binary log - file name and the position return consistent results: - 1) We use commit_ordered() to get same commit order - in InnoDB as in binary log. - 2) A MySQL log file rotation cannot happen because - MySQL protects against this by having a counter of - transactions in prepared state and it only allows - a rotation when the counter drops to zero. See - LOCK_prep_xids and COND_prep_xids in log.cc. */ - trx->mysql_log_file_name = mysql_bin_log_file_name(); - trx->mysql_log_offset = (ib_int64_t) mysql_bin_log_file_pos(); + mysql_bin_log_commit_pos(thd, &tmp_pos, &(trx->mysql_log_file_name)); + trx->mysql_log_offset = (ib_int64_t) tmp_pos; /* Don't do write + flush right now. For group commit to work we want to do the flush in the innobase_commit() @@ -2817,7 +2810,7 @@ innobase_commit_ordered( DBUG_ASSERT(all || (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))); - innobase_commit_ordered_2(trx); + innobase_commit_ordered_2(trx, thd); trx->active_trans |= TRX_ACTIVE_COMMIT_ORDERED; @@ -2881,7 +2874,7 @@ innobase_commit( /* Run the fast part of commit if we did not already. */ if ((trx->active_trans & TRX_ACTIVE_COMMIT_ORDERED) == 0) { - innobase_commit_ordered_2(trx); + innobase_commit_ordered_2(trx, thd); } /* We were instructed to commit the whole transaction, or diff --git a/storage/xtradb/handler/ha_innodb.h b/storage/xtradb/handler/ha_innodb.h index 04224277deb..20363b19abd 100644 --- a/storage/xtradb/handler/ha_innodb.h +++ b/storage/xtradb/handler/ha_innodb.h @@ -239,16 +239,6 @@ LEX_STRING *thd_query_string(MYSQL_THD thd); char **thd_query(MYSQL_THD thd); #endif -/** Get the file name of the MySQL binlog. - * @return the name of the binlog file - */ -const char* mysql_bin_log_file_name(void); - -/** Get the current position of the MySQL binlog. - * @return byte offset from the beginning of the binlog - */ -ulonglong mysql_bin_log_file_pos(void); - /** Check if a user thread is a replication slave thread @param thd user thread @@ -289,6 +279,11 @@ bool thd_binlog_filter_ok(const MYSQL_THD thd); #endif /* MYSQL_VERSION_ID > 50140 */ } +/** Get the file name and position of the MySQL binlog corresponding to the + * current commit. + */ +extern void mysql_bin_log_commit_pos(THD *thd, ulonglong *out_pos, const char **out_file); + typedef struct trx_struct trx_t; /********************************************************************//** @file handler/ha_innodb.h