1
0
mirror of https://github.com/MariaDB/server.git synced 2025-12-24 11:21:21 +03:00

Bug#38205 Row-based Replication (RBR) causes inconsistencies: HA_ERR_FOUND_DUP

Bug#319  if while a non-transactional slave is replicating a transaction possible problem 

It is impossible to roll back a mixed engines transaction when one of the engine is
non-transaction. In replication that fact is crucial because the slave can not safely
re-apply a transction that was interrupted with STOP SLAVE.

Fixed with making STOP SLAVE not be effective immediately in the case the current
group of replication events has modified a non-transaction table. In order for slave to leave
either the group needs finishing or the user issues KILL QUERY|CONNECTION slave_thread_id.
This commit is contained in:
Andrei Elkin
2009-03-26 10:25:06 +02:00
parent 5d5f0fcd47
commit badc6a127d
7 changed files with 349 additions and 2 deletions

View File

@@ -0,0 +1,165 @@
#
# Bug #38205 Row-based Replication (RBR) causes inconsistencies: HA_ERR_FOUND_DUPP_KEY
#
# Verifying the fact that STOP SLAVE in the middle of a group execution waits
# for the end of the group before the slave sql thread will stop.
# The patch refines STOP SLAVE to not interrupt a transaction or other type of
# the replication events group (the part I).
# Killing the sql thread continues to provide a "hard" stop (the part II).
#
# Non-deterministic tests
#
source include/master-slave.inc;
source include/have_innodb.inc;
#
# Part II, killed sql slave leaves instantly
#
# A. multi-statement transaction as the replication group
connection master;
create table t1i(n int primary key) engine=innodb;
create table t2m(n int primary key) engine=myisam;
sync_slave_with_master;
connection master;
begin;
insert into t1i values (1);
insert into t1i values (2);
insert into t1i values (3);
commit;
sync_slave_with_master;
#
# todo: first challenge is to find out the SQL thread id
# the following is not fully reliable
#
let $id=`SELECT id from information_schema.processlist where user like 'system user' and state like '%Has read all relay log%' or user like 'system user' and state like '%Reading event from the relay log%'`;
connection slave;
begin;
insert into t1i values (5);
connection master;
let $pos0_master= query_get_value(SHOW MASTER STATUS, Position, 1);
begin;
insert into t1i values (4);
insert into t2m values (1); # non-ta update
update t1i set n = 5 where n = 4; # to block at. can't be played with killed
commit;
let $pos1_master= query_get_value(SHOW MASTER STATUS, Position, 1);
connection slave;
# slave sql thread must be locked out by the conn `slave' explicit lock
let $pos0_slave= query_get_value(SHOW SLAVE STATUS, Exec_Master_Log_Pos, 1);
--disable_query_log
eval select $pos0_master - $pos0_slave as zero;
--enable_query_log
connection slave1;
let $count= 1;
let $table= t2m;
source include/wait_until_rows_count.inc;
#
# todo: may fail as said above
#
--echo *** kill sql thread ***
--disable_query_log
eval kill connection $id;
--enable_query_log
connection slave;
rollback; # release the sql thread
connection slave1;
source include/wait_for_slave_sql_to_stop.inc;
let $sql_status= query_get_value(SHOW SLAVE STATUS, Slave_SQL_Running, 1);
--echo *** sql thread is *not* running: $sql_status ***
let $pos1_slave= query_get_value(SHOW SLAVE STATUS, Exec_Master_Log_Pos, 1);
connection slave;
--echo *** the prove: the killed slave has not finished the current transaction ***
--disable_query_log
select count(*) as three from t1i;
eval select $pos1_master > $pos1_slave as one;
eval select $pos1_slave - $pos0_slave as zero;
--enable_query_log
delete from t2m; # remove the row to be able to replay
start slave sql_thread;
#
# Part I: B The homogenous transaction remains interuptable in between
#
connection master;
delete from t1i;
delete from t2m;
sync_slave_with_master;
begin;
insert into t1i values (5);
connection master;
let $pos0_master= query_get_value(SHOW MASTER STATUS, Position, 1);
begin;
insert into t1i values (4);
update t1i set n = 5 where n = 4; # to block at. not to be played
commit;
let $pos1_master= query_get_value(SHOW MASTER STATUS, Position, 1);
connection slave1;
# slave sql can't advance as must be locked by the conn `slave' trans
let $pos0_slave= query_get_value(SHOW SLAVE STATUS, Exec_Master_Log_Pos, 1);
--disable_query_log
eval select $pos0_master - $pos0_slave as zero;
--enable_query_log
#
# the replicated trans is blocked by the slave's local.
# However, it's not easy to catch the exact moment when it happens.
# The test issues sleep which makes the test either non-deterministic or
# wasting too much time.
#
--sleep 3
send stop slave sql_thread;
connection slave;
rollback; # release the sql thread
connection slave1;
reap;
source include/wait_for_slave_sql_to_stop.inc;
let $sql_status= query_get_value(SHOW SLAVE STATUS, Slave_SQL_Running, 1);
--echo *** sql thread is running: $sql_status ***
let $pos1_slave= query_get_value(SHOW SLAVE STATUS, Exec_Master_Log_Pos, 1);
--echo *** the prove: the stopped slave has rolled back the current transaction ***
--disable_query_log
select count(*) as zero from t1i;
eval select $pos0_master - $pos0_slave as zero;
eval select $pos1_master > $pos0_slave as one;
--enable_query_log
start slave sql_thread;
# clean-up
connection master;
drop table t1i, t2m;
sync_slave_with_master;