mirror of
https://github.com/MariaDB/server.git
synced 2025-11-28 17:36:30 +03:00
Fix problems related to reconnect. When we need to reconnect (ie. explict stop/start of just the IO thread by user, or automatic reconnect due to loosing network connection with the master), it is a bit complex to correctly resume at the right point without causing duplicate or missing events in the relay log. The previous code had multiple problems in this regard. With this patch, the problem is solved as follows. The IO thread keeps track (in memory) of which GTID was last queued to the relay log. If it needs to reconnect, it resumes at that GTID position. It also counts number of events received within the last, possibly partial, event group, and skips the same number of events after a reconnect, so that events already enqueued before the reconnect are not duplicated. (There is no need to keep any persistent state; whenever we restart slave threads after both of them being stopped (such as after server restart), we erase the relay logs and start over from the last GTID applied by SQL thread. But while the SQL thread is running, this patch is needed to get correct relay log).
202 lines
4.8 KiB
Plaintext
202 lines
4.8 KiB
Plaintext
--let $rpl_topology=1->2
|
|
--source include/rpl_init.inc
|
|
--source include/have_innodb.inc
|
|
--source include/have_debug.inc
|
|
|
|
|
|
--connection server_2
|
|
--source include/stop_slave.inc
|
|
CHANGE MASTER TO master_use_gtid= current_pos;
|
|
--source include/start_slave.inc
|
|
|
|
--connection server_1
|
|
CREATE TABLE t1 (a INT);
|
|
FLUSH LOGS;
|
|
--save_master_pos
|
|
|
|
--connection server_2
|
|
--sync_with_master
|
|
|
|
|
|
# Prepare a string of events and have the slave replicate all of it.
|
|
--connection server_1
|
|
SET gtid_domain_id=10;
|
|
INSERT INTO t1 VALUES (1);
|
|
INSERT INTO t1 VALUES (2);
|
|
SET gtid_seq_no=100;
|
|
INSERT INTO t1 VALUES (3);
|
|
INSERT INTO t1 VALUES (4);
|
|
INSERT INTO t1 VALUES (5);
|
|
--save_master_pos
|
|
|
|
--connection server_2
|
|
--sync_with_master
|
|
--source include/stop_slave.inc
|
|
SELECT * FROM t1 ORDER BY a;
|
|
|
|
# Now start the slave again, but force a reconnect. There was a bug that this
|
|
# reconnect would cause duplicate events.
|
|
|
|
--connection server_1
|
|
# Make sure to get rid of any old binlog dump thread so it does not
|
|
# interfere with our DBUG error injection.
|
|
--source include/kill_binlog_dump_threads.inc
|
|
INSERT INTO t1 VALUES (10);
|
|
SET @old_dbug= @@GLOBAL.debug_dbug;
|
|
SET GLOBAL debug_dbug="+d,gtid_force_reconnect_at_10_1_100";
|
|
--save_master_pos
|
|
|
|
--connection server_2
|
|
--source include/start_slave.inc
|
|
--sync_with_master
|
|
SELECT * FROM t1 ORDER BY a;
|
|
|
|
--source include/stop_slave.inc
|
|
TRUNCATE t1;
|
|
RESET MASTER;
|
|
SET GLOBAL gtid_slave_pos= "";
|
|
|
|
--connection server_1
|
|
SET GLOBAL debug_dbug= @old_debug;
|
|
TRUNCATE t1;
|
|
RESET MASTER;
|
|
|
|
# A1 B1 A2 B2 A3 B3, slave reached A1 and B3 and stopped. Slave starts,
|
|
# reconnects at A2. There was a bug that B2 would be duplicated.
|
|
|
|
SET gtid_domain_id=10;
|
|
SET gtid_seq_no=50;
|
|
INSERT INTO t1 VALUES (1);
|
|
SET gtid_domain_id=11;
|
|
INSERT INTO t1 VALUES (11);
|
|
SET gtid_domain_id=10;
|
|
SET gtid_seq_no=100;
|
|
INSERT INTO t1 VALUES (2);
|
|
SET gtid_domain_id=11;
|
|
INSERT INTO t1 VALUES (12);
|
|
SET gtid_domain_id=10;
|
|
INSERT INTO t1 VALUES (3);
|
|
SET gtid_domain_id=11;
|
|
SET gtid_seq_no=200;
|
|
INSERT INTO t1 VALUES (13);
|
|
|
|
--connection server_2
|
|
START SLAVE UNTIL master_gtid_pos="10-1-50,11-1-200";
|
|
--source include/wait_for_slave_to_stop.inc
|
|
SELECT * FROM t1 ORDER BY a;
|
|
|
|
--connection server_1
|
|
--source include/kill_binlog_dump_threads.inc
|
|
INSERT INTO t1 VALUES (20);
|
|
SET GLOBAL debug_dbug="+d,gtid_force_reconnect_at_10_1_100";
|
|
--save_master_pos
|
|
|
|
--connection server_2
|
|
--source include/start_slave.inc
|
|
--sync_with_master
|
|
SELECT * FROM t1 ORDER BY a;
|
|
|
|
--source include/stop_slave.inc
|
|
TRUNCATE t1;
|
|
RESET MASTER;
|
|
SET GLOBAL gtid_slave_pos= "";
|
|
|
|
--connection server_1
|
|
SET GLOBAL debug_dbug= @old_debug;
|
|
TRUNCATE t1;
|
|
RESET MASTER;
|
|
|
|
# A1 B1 A2 B2 A3 B3. START SLAVE UNTIL A1,B3, gets reconnect at B2.
|
|
# There was a bug that the UNTIL would be ignored, and A2 would be lost.
|
|
|
|
--source include/kill_binlog_dump_threads.inc
|
|
SET gtid_domain_id= 9;
|
|
SET gtid_seq_no= 50;
|
|
INSERT INTO t1 VALUES (1);
|
|
SET gtid_domain_id= 10;
|
|
INSERT INTO t1 VALUES (11);
|
|
SET gtid_domain_id= 9;
|
|
INSERT INTO t1 VALUES (2);
|
|
SET gtid_domain_id= 10;
|
|
SET gtid_seq_no= 100;
|
|
INSERT INTO t1 VALUES (12);
|
|
SET gtid_domain_id= 9;
|
|
INSERT INTO t1 VALUES (3);
|
|
SET gtid_domain_id= 10;
|
|
SET gtid_seq_no= 200;
|
|
INSERT INTO t1 VALUES (13);
|
|
SET gtid_domain_id= 10;
|
|
|
|
SET GLOBAL debug_dbug="+d,gtid_force_reconnect_at_10_1_100";
|
|
|
|
--connection server_2
|
|
START SLAVE UNTIL master_gtid_pos="9-1-50,10-1-200";
|
|
--source include/wait_for_slave_to_stop.inc
|
|
SELECT * FROM t1 ORDER BY a;
|
|
|
|
--connection server_1
|
|
SET GLOBAL debug_dbug= @old_debug;
|
|
INSERT INTO t1 VALUES (20);
|
|
--save_master_pos
|
|
|
|
--connection server_2
|
|
--source include/start_slave.inc
|
|
SELECT * FROM t1 ORDER BY a;
|
|
|
|
|
|
--echo *** Test when slave IO thread needs to reconnect in the middle of an event group. ***
|
|
--connection server_2
|
|
--source include/stop_slave.inc
|
|
|
|
TRUNCATE t1;
|
|
RESET MASTER;
|
|
SET GLOBAL gtid_slave_pos= "";
|
|
|
|
--connection server_1
|
|
SET GLOBAL debug_dbug= @old_debug;
|
|
TRUNCATE t1;
|
|
RESET MASTER;
|
|
|
|
--source include/kill_binlog_dump_threads.inc
|
|
SET GLOBAL debug_dbug="+d,binlog_force_reconnect_after_22_events";
|
|
|
|
# 4 events for FD, fake rotate, gtid list, binlog checkpoint.
|
|
# 2 events for GTID, create table
|
|
CREATE TABLE t2 (a INT PRIMARY KEY) ENGINE=InnoDB;
|
|
# 3 events for BEGIN/query/COMMIT
|
|
INSERT INTO t2 VALUES (1);
|
|
# 4 events for BEGIN/query/query/COMMIT
|
|
BEGIN;
|
|
INSERT INTO t2 VALUES (10);
|
|
INSERT INTO t2 VALUES (11);
|
|
COMMIT;
|
|
# So this event group starts after 4+2+4+3=13 events. Or 16 in row-based.
|
|
BEGIN;
|
|
INSERT INTO t2 VALUES (20);
|
|
INSERT INTO t2 VALUES (21);
|
|
INSERT INTO t2 VALUES (22);
|
|
INSERT INTO t2 VALUES (23);
|
|
INSERT INTO t2 VALUES (24);
|
|
INSERT INTO t2 VALUES (25);
|
|
INSERT INTO t2 VALUES (26);
|
|
INSERT INTO t2 VALUES (27);
|
|
INSERT INTO t2 VALUES (28);
|
|
INSERT INTO t2 VALUES (29);
|
|
COMMIT;
|
|
--save_master_pos
|
|
|
|
--connection server_2
|
|
--source include/start_slave.inc
|
|
--sync_with_master
|
|
SELECT * FROM t2 ORDER BY a;
|
|
|
|
--connection server_1
|
|
SET GLOBAL debug_dbug= @old_debug;
|
|
|
|
|
|
# Clean up.
|
|
--connection server_1
|
|
DROP TABLE t1, t2;
|
|
|
|
--source include/rpl_end.inc
|