mirror of
https://github.com/MariaDB/server.git
synced 2025-08-08 11:22:35 +03:00
MDEV-21117: refine the server binlog-based recovery for semisync
Problem: ======= When the semisync master is crashed and restarted as slave it could recover transactions that former slaves may never have seen. A known method existed to clear out all prepared transactions with --tc-heuristic-recover=rollback does not care to adjust binlog accordingly. Fix: === The binlog-based recovery is made to concern of the slave semisync role of post-crash restarted server. No changes in behavior is done to the "normal" binloggging server and the semisync master. When the restarted server is configured with --rpl-semi-sync-slave-enabled=1 the refined recovery attempts to roll back prepared transactions and truncate binlog accordingly. In case of a partially committed (that is committed at least in one of the engine participants) such transaction gets committed. It's guaranteed no (partially as well) committed transactions exist beyond the truncate position. In case there exists a non-transactional replication event (being in a way a committed transaction) past the computed truncate position the recovery ends with an error. As after master crash and failover to slave, the demoted-to-slave ex-master must be ready to face and accept its own (generated by) events, without generally necessary --replicate-same-server-id. So the acceptance conditions are relaxed for the semisync slave to accept own events without that option. While gtid_strict_mode ON ensures no duplicate transaction can be (re-)executed the master_use_gtid=none slave has to be configured with --replicate-same-server-id. *NOTE* for reviewers. This patch does not handle the user XA which is done in next git commit.
This commit is contained in:
@@ -0,0 +1,58 @@
|
||||
SET @@global.max_binlog_size= 4096;
|
||||
call mtr.add_suppression("Table '.*tm' is marked as crashed and should be repaired");
|
||||
call mtr.add_suppression("Got an error from unknown thread");
|
||||
call mtr.add_suppression("Checking table: '.*tm'");
|
||||
call mtr.add_suppression("Recovering table: '.*tm'");
|
||||
call mtr.add_suppression("Cannot truncate the binary log to file");
|
||||
call mtr.add_suppression("Crash recovery failed");
|
||||
call mtr.add_suppression("Can.t init tc log");
|
||||
call mtr.add_suppression("Aborting");
|
||||
call mtr.add_suppression("Found 1 prepared transactions");
|
||||
call mtr.add_suppression("mysqld: Table.*tm.*is marked as crashed");
|
||||
call mtr.add_suppression("Checking table.*tm");
|
||||
RESET MASTER;
|
||||
FLUSH LOGS;
|
||||
CREATE TABLE ti (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb;
|
||||
CREATE TABLE tm (f INT) ENGINE=MYISAM;
|
||||
INSERT INTO tm VALUES(1);
|
||||
connect master1,localhost,root,,;
|
||||
connect master2,localhost,root,,;
|
||||
connect master3,localhost,root,,;
|
||||
connection master1;
|
||||
SET DEBUG_SYNC= "commit_after_release_LOCK_after_binlog_sync SIGNAL master1_ready WAIT_FOR master1_go";
|
||||
INSERT INTO ti VALUES (5 - 1, REPEAT("x", 4100));
|
||||
connection master2;
|
||||
SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL master2_ready WAIT_FOR master2_go";
|
||||
INSERT INTO ti VALUES (5, REPEAT("x", 1));
|
||||
connection master3;
|
||||
SET DEBUG_SYNC= "now WAIT_FOR master2_ready";
|
||||
SET DEBUG_SYNC= "commit_before_get_LOCK_after_binlog_sync SIGNAL master3_ready";
|
||||
INSERT INTO tm VALUES (2);
|
||||
connection default;
|
||||
SET DEBUG_SYNC= "now WAIT_FOR master3_ready";
|
||||
# The gtid binlog state prior the crash must be restored at the end of the test;
|
||||
SELECT @@global.gtid_binlog_state;
|
||||
@@global.gtid_binlog_state
|
||||
0-1-9
|
||||
# Kill the server
|
||||
# Failed restart as the semisync slave
|
||||
# Normal restart
|
||||
# restart
|
||||
FOUND 1 /Cannot truncate the binary log to file/ in mysqld.1.err
|
||||
# Proof that the in-doubt transactions are recovered by the 2nd normal server restart
|
||||
SELECT COUNT(*) = 5 as 'True' FROM ti;
|
||||
True
|
||||
1
|
||||
SELECT COUNT(*) <= 1 FROM tm;
|
||||
COUNT(*) <= 1
|
||||
1
|
||||
# The gtid binlog state prior the crash is restored now
|
||||
SELECT @@GLOBAL.gtid_binlog_state;
|
||||
@@GLOBAL.gtid_binlog_state
|
||||
0-1-9
|
||||
SELECT @@GLOBAL.gtid_binlog_pos;
|
||||
@@GLOBAL.gtid_binlog_pos
|
||||
0-1-9
|
||||
# Cleanup
|
||||
DROP TABLE ti, tm;
|
||||
End of test
|
Reference in New Issue
Block a user