diff --git a/mysql-test/suite/bugs/r/rpl_bug38205.result b/mysql-test/suite/bugs/r/rpl_bug38205.result new file mode 100644 index 00000000000..8f1dee344fa --- /dev/null +++ b/mysql-test/suite/bugs/r/rpl_bug38205.result @@ -0,0 +1,56 @@ +stop slave; +drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9; +reset master; +reset slave; +drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9; +start slave; +create table t1i(n int primary key) engine=innodb; +create table t2m(n int primary key) engine=myisam; +begin; +insert into t1i values (1); +insert into t1i values (2); +insert into t1i values (3); +commit; +begin; +insert into t1i values (5); +begin; +insert into t1i values (4); +insert into t2m values (1); +update t1i set n = 5 where n = 4; +commit; +zero +0 +*** kill sql thread *** +rollback; +*** sql thread is *not* running: No *** +*** the prove: the killed slave has not finished the current transaction *** +three +3 +one +1 +zero +0 +delete from t2m; +start slave sql_thread; +delete from t1i; +delete from t2m; +begin; +insert into t1i values (5); +begin; +insert into t1i values (4); +update t1i set n = 5 where n = 4; +commit; +zero +0 +stop slave sql_thread; +rollback; +*** sql thread is *not* running: No *** +*** the prove: the stopped slave has rolled back the current transaction *** +zero +0 +zero +0 +one +1 +start slave sql_thread; +drop table t1i, t2m; diff --git a/mysql-test/suite/bugs/t/rpl_bug38205.test b/mysql-test/suite/bugs/t/rpl_bug38205.test new file mode 100644 index 00000000000..52b36636e79 --- /dev/null +++ b/mysql-test/suite/bugs/t/rpl_bug38205.test @@ -0,0 +1,166 @@ +# +# Bug #38205 Row-based Replication (RBR) causes inconsistencies: HA_ERR_FOUND_DUPP_KEY +# Bug#319 if while a non-transactional slave is replicating a transaction possible problem +# +# Verifying the fact that STOP SLAVE in the middle of a group execution waits +# for the end of the group before the slave sql thread will stop. +# The patch refines STOP SLAVE to not interrupt a transaction or other type of +# the replication events group (the part I). +# Killing the sql thread continues to provide a "hard" stop (the part II). +# +# Non-deterministic tests +# + +source include/master-slave.inc; +source include/have_innodb.inc; + + +# +# Part II, killed sql slave leaves instantly +# + +# A. multi-statement transaction as the replication group + +connection master; + +create table t1i(n int primary key) engine=innodb; +create table t2m(n int primary key) engine=myisam; + +sync_slave_with_master; + +connection master; + +begin; +insert into t1i values (1); +insert into t1i values (2); +insert into t1i values (3); +commit; + +sync_slave_with_master; + +# +# todo: first challenge is to find out the SQL thread id +# the following is not fully reliable +# + +let $id=`SELECT id from information_schema.processlist where user like 'system user' and state like '%Has read all relay log%' or user like 'system user' and state like '%Reading event from the relay log%'`; +connection slave; +begin; +insert into t1i values (5); + +connection master; +let $pos0_master= query_get_value(SHOW MASTER STATUS, Position, 1); +begin; +insert into t1i values (4); +insert into t2m values (1); # non-ta update +update t1i set n = 5 where n = 4; # to block at. can't be played with killed +commit; +let $pos1_master= query_get_value(SHOW MASTER STATUS, Position, 1); + +connection slave; +# slave sql thread must be locked out by the conn `slave' explicit lock +let $pos0_slave= query_get_value(SHOW SLAVE STATUS, Exec_Master_Log_Pos, 1); +--disable_query_log +eval select $pos0_master - $pos0_slave as zero; +--enable_query_log + +connection slave1; + +let $count= 1; +let $table= t2m; +source include/wait_until_rows_count.inc; +# +# todo: may fail as said above +# +--echo *** kill sql thread *** +--disable_query_log +eval kill connection $id; +--enable_query_log + +connection slave; +rollback; # release the sql thread + +connection slave1; + +source include/wait_for_slave_sql_to_stop.inc; +let $sql_status= query_get_value(SHOW SLAVE STATUS, Slave_SQL_Running, 1); +--echo *** sql thread is *not* running: $sql_status *** +let $pos1_slave= query_get_value(SHOW SLAVE STATUS, Exec_Master_Log_Pos, 1); + +connection slave; +--echo *** the prove: the killed slave has not finished the current transaction *** + +--disable_query_log +select count(*) as three from t1i; +eval select $pos1_master > $pos1_slave as one; +eval select $pos1_slave - $pos0_slave as zero; +--enable_query_log + +delete from t2m; # remove the row to be able to replay +start slave sql_thread; + +# +# Part I: B The homogenous transaction remains interuptable in between +# + +connection master; +delete from t1i; +delete from t2m; + +sync_slave_with_master; +begin; +insert into t1i values (5); + +connection master; +let $pos0_master= query_get_value(SHOW MASTER STATUS, Position, 1); +begin; +insert into t1i values (4); +update t1i set n = 5 where n = 4; # to block at. not to be played +commit; +let $pos1_master= query_get_value(SHOW MASTER STATUS, Position, 1); + + +connection slave1; +# slave sql can't advance as must be locked by the conn `slave' trans +let $pos0_slave= query_get_value(SHOW SLAVE STATUS, Exec_Master_Log_Pos, 1); +--disable_query_log +eval select $pos0_master - $pos0_slave as zero; +--enable_query_log + +# +# the replicated trans is blocked by the slave's local. +# However, it's not easy to catch the exact moment when it happens. +# The test issues sleep which makes the test either non-deterministic or +# wasting too much time. +# +--sleep 3 + +send stop slave sql_thread; + +connection slave; +rollback; # release the sql thread + +connection slave1; +reap; +source include/wait_for_slave_sql_to_stop.inc; +let $sql_status= query_get_value(SHOW SLAVE STATUS, Slave_SQL_Running, 1); +--echo *** sql thread is *not* running: $sql_status *** + +let $pos1_slave= query_get_value(SHOW SLAVE STATUS, Exec_Master_Log_Pos, 1); + +--echo *** the prove: the stopped slave has rolled back the current transaction *** + +--disable_query_log +select count(*) as zero from t1i; +eval select $pos0_master - $pos0_slave as zero; +eval select $pos1_master > $pos0_slave as one; +--enable_query_log + +start slave sql_thread; + +# clean-up + +connection master; +drop table t1i, t2m; + +sync_slave_with_master; diff --git a/mysql-test/suite/rpl/r/rpl_start_stop_slave.result b/mysql-test/suite/rpl/r/rpl_start_stop_slave.result index 04ece812f35..e2b1935c268 100644 --- a/mysql-test/suite/rpl/r/rpl_start_stop_slave.result +++ b/mysql-test/suite/rpl/r/rpl_start_stop_slave.result @@ -10,3 +10,31 @@ start slave; stop slave io_thread; start slave io_thread; drop table t1; +create table t1i(n int primary key) engine=innodb; +create table t2m(n int primary key) engine=myisam; +begin; +insert into t1i values (1); +insert into t1i values (2); +insert into t1i values (3); +commit; +begin; +insert into t1i values (5); +begin; +insert into t1i values (4); +insert into t2m values (1); +insert into t1i values (5); +commit; +zero +0 +stop slave; +rollback; +*** sql thread is *not* running: No *** +*** the prove: the stopped slave has finished the current transaction *** +five +5 +zero +0 +one +1 +include/start_slave.inc +drop table t1i, t2m; diff --git a/mysql-test/suite/rpl/t/rpl_start_stop_slave-slave.opt b/mysql-test/suite/rpl/t/rpl_start_stop_slave-slave.opt new file mode 100644 index 00000000000..00ea161cd6e --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_start_stop_slave-slave.opt @@ -0,0 +1 @@ +--innodb_lock_wait_timeout=60 diff --git a/mysql-test/suite/rpl/t/rpl_start_stop_slave.test b/mysql-test/suite/rpl/t/rpl_start_stop_slave.test index dbf0775c978..d9b87427321 100644 --- a/mysql-test/suite/rpl/t/rpl_start_stop_slave.test +++ b/mysql-test/suite/rpl/t/rpl_start_stop_slave.test @@ -1,4 +1,5 @@ source include/master-slave.inc; +source include/have_innodb.inc; # # Bug#6148 () @@ -35,4 +36,88 @@ save_master_pos; connection slave; sync_with_master; -# End of 4.1 tests + +# +# Bug#38205 Row-based Replication (RBR) causes inconsistencies... +# Bug#319 if while a non-transactional slave is replicating a transaction... +# +# Verifying that STOP SLAVE does not interrupt excution of a group +# execution of events if the group can not roll back. +# Killing the sql thread continues to provide a "hard" stop (the +# part II, moved to the bugs suite as it's hard to make it +# deterministic with KILL). +# + +# +# Part I. The being stopped sql thread finishes first the current group of +# events if the group contains an event on a non-transaction table. + +connection master; +create table t1i(n int primary key) engine=innodb; +create table t2m(n int primary key) engine=myisam; +begin; +insert into t1i values (1); +insert into t1i values (2); +insert into t1i values (3); +commit; + +sync_slave_with_master; +connection slave; +begin; +insert into t1i values (5); + +connection master; +let $pos0_master= query_get_value(SHOW MASTER STATUS, Position, 1); +begin; +insert into t1i values (4); +insert into t2m values (1); # non-ta update to process +insert into t1i values (5); # to block at. to be played with stopped +commit; + +connection slave; +# slave sql thread must be locked out by the conn `slave' explicit lock +let $pos0_slave= query_get_value(SHOW SLAVE STATUS, Exec_Master_Log_Pos, 1); +--disable_query_log +eval select $pos0_master - $pos0_slave as zero; +--enable_query_log + +connection slave1; +let $count= 1; +let $table= t2m; +source include/wait_until_rows_count.inc; +send stop slave; + +connection slave; +rollback; # release the sql thread + +connection slave1; +reap; +source include/wait_for_slave_to_stop.inc; +let $sql_status= query_get_value(SHOW SLAVE STATUS, Slave_SQL_Running, 1); +--echo *** sql thread is *not* running: $sql_status *** + + +connection master; +let $pos1_master= query_get_value(SHOW MASTER STATUS, Position, 1); + +connection slave; + +let $pos1_slave= query_get_value(SHOW SLAVE STATUS, Exec_Master_Log_Pos, 1); + +--echo *** the prove: the stopped slave has finished the current transaction *** + +--disable_query_log +select count(*) as five from t1i; +eval select $pos1_master - $pos1_slave as zero; +eval select $pos1_slave > $pos0_slave as one; +--enable_query_log + +source include/start_slave.inc; + +# clean-up +connection master; +drop table t1i, t2m; + +sync_slave_with_master; + +# End of tests diff --git a/sql/log_event.cc b/sql/log_event.cc index 6e798d9169e..917e79b86f2 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -7194,7 +7194,12 @@ int Rows_log_event::do_apply_event(Relay_log_info const *rli) */ lex_start(thd); mysql_reset_thd_for_next_command(thd); - + /* + The current statement is just about to begin and + has not yet modified anything. Note, all.modified is reset + by mysql_reset_thd_for_next_command. + */ + thd->transaction.stmt.modified_non_trans_table= FALSE; /* Check if the slave is set to use SBR. If so, it should switch to using RBR until the end of the "statement", i.e., next @@ -7303,6 +7308,7 @@ int Rows_log_event::do_apply_event(Relay_log_info const *rli) if (table) { + bool transactional_table= table->file->has_transactions(); /* table == NULL means that this table should not be replicated (this was set up by Table_map_log_event::do_apply_event() @@ -7412,6 +7418,9 @@ int Rows_log_event::do_apply_event(Relay_log_info const *rli) m_curr_row= m_curr_row_end; + if (error == 0 && !transactional_table) + thd->transaction.all.modified_non_trans_table= + thd->transaction.stmt.modified_non_trans_table= TRUE; } // row processing loop DBUG_EXECUTE_IF("STOP_SLAVE_after_first_Rows_event", diff --git a/sql/slave.cc b/sql/slave.cc index a0db9837648..3287e3e5e89 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -698,6 +698,9 @@ static bool sql_slave_killed(THD* thd, Relay_log_info* rli) DBUG_ASSERT(rli->slave_running == 1);// tracking buffer overrun if (abort_loop || thd->killed || rli->abort_slave) { + if (rli->abort_slave && rli->is_in_group() && + thd->transaction.all.modified_non_trans_table) + DBUG_RETURN(0); /* If we are in an unsafe situation (stopping could corrupt replication), we give one minute to the slave SQL thread of grace before really