mirror of
https://github.com/MariaDB/server.git
synced 2025-08-08 11:22:35 +03:00
MDEV-21117: refine the server binlog-based recovery for semisync
Problem: ======= When the semisync master is crashed and restarted as slave it could recover transactions that former slaves may never have seen. A known method existed to clear out all prepared transactions with --tc-heuristic-recover=rollback does not care to adjust binlog accordingly. Fix: === The binlog-based recovery is made to concern of the slave semisync role of post-crash restarted server. No changes in behavior is done to the "normal" binloggging server and the semisync master. When the restarted server is configured with --rpl-semi-sync-slave-enabled=1 the refined recovery attempts to roll back prepared transactions and truncate binlog accordingly. In case of a partially committed (that is committed at least in one of the engine participants) such transaction gets committed. It's guaranteed no (partially as well) committed transactions exist beyond the truncate position. In case there exists a non-transactional replication event (being in a way a committed transaction) past the computed truncate position the recovery ends with an error. As after master crash and failover to slave, the demoted-to-slave ex-master must be ready to face and accept its own (generated by) events, without generally necessary --replicate-same-server-id. So the acceptance conditions are relaxed for the semisync slave to accept own events without that option. While gtid_strict_mode ON ensures no duplicate transaction can be (re-)executed the master_use_gtid=none slave has to be configured with --replicate-same-server-id. *NOTE* for reviewers. This patch does not handle the user XA which is done in next git commit.
This commit is contained in:
129
mysql-test/suite/rpl/r/rpl_semi_sync_fail_over.result
Normal file
129
mysql-test/suite/rpl/r/rpl_semi_sync_fail_over.result
Normal file
@@ -0,0 +1,129 @@
|
||||
include/master-slave.inc
|
||||
[connection master]
|
||||
connection server_2;
|
||||
include/stop_slave.inc
|
||||
connection server_1;
|
||||
RESET MASTER;
|
||||
SET @@global.max_binlog_size= 4096;
|
||||
connection server_2;
|
||||
RESET MASTER;
|
||||
SET @@global.max_binlog_size= 4096;
|
||||
set @@global.rpl_semi_sync_slave_enabled = 1;
|
||||
set @@global.gtid_slave_pos = "";
|
||||
CHANGE MASTER TO master_use_gtid= slave_pos;
|
||||
include/start_slave.inc
|
||||
connection server_1;
|
||||
ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB;
|
||||
set @@global.rpl_semi_sync_master_enabled = 1;
|
||||
set @@global.rpl_semi_sync_master_wait_point=AFTER_SYNC;
|
||||
call mtr.add_suppression("Can.t init tc log");
|
||||
call mtr.add_suppression("Aborting");
|
||||
call mtr.add_suppression("1 client is using or hasn.t closed the table properly");
|
||||
call mtr.add_suppression("Table './mtr/test_suppressions' is marked as crashed and should be repaired");
|
||||
CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb;
|
||||
INSERT INTO t1 VALUES (1, 'dummy1');
|
||||
connect conn_client,127.0.0.1,root,,test,$SERVER_MYPORT_1,;
|
||||
SET DEBUG_SYNC= "commit_after_release_LOCK_after_binlog_sync SIGNAL con1_ready WAIT_FOR con1_go";
|
||||
INSERT INTO t1 VALUES (2, REPEAT("x", 4100));
|
||||
connection server_1;
|
||||
SET DEBUG_SYNC= "now WAIT_FOR con1_ready";
|
||||
# Kill the server
|
||||
connection server_2;
|
||||
include/stop_slave.inc
|
||||
SELECT @@GLOBAL.gtid_current_pos;
|
||||
@@GLOBAL.gtid_current_pos
|
||||
0-1-8
|
||||
# restart: --rpl-semi-sync-slave-enabled=1
|
||||
connection server_1;
|
||||
FOUND 1 /truncated binlog file:.*master.*000001/ in mysqld.1.err
|
||||
disconnect conn_client;
|
||||
connection server_2;
|
||||
set global rpl_semi_sync_master_enabled = 1;
|
||||
set global rpl_semi_sync_master_wait_point=AFTER_SYNC;
|
||||
connection server_1;
|
||||
CHANGE MASTER TO master_host='127.0.0.1', master_port=$new_master_port, master_user='root', master_use_gtid=SLAVE_POS;
|
||||
set global rpl_semi_sync_slave_enabled = 1;
|
||||
set @@global.gtid_slave_pos=@@global.gtid_binlog_pos;
|
||||
include/start_slave.inc
|
||||
connection server_2;
|
||||
INSERT INTO t1 VALUES (3, 'dummy3');
|
||||
# The gtid state on current master must be equal to ...
|
||||
SHOW VARIABLES LIKE 'gtid_binlog_pos';
|
||||
Variable_name Value
|
||||
gtid_binlog_pos 0-2-9
|
||||
connection server_1;
|
||||
SELECT COUNT(*) = 3 as 'true' FROM t1;
|
||||
true
|
||||
1
|
||||
# ... the gtid states on the slave:
|
||||
SHOW VARIABLES LIKE 'gtid_slave_pos';
|
||||
Variable_name Value
|
||||
gtid_slave_pos 0-2-9
|
||||
SHOW VARIABLES LIKE 'gtid_binlog_pos';
|
||||
Variable_name Value
|
||||
gtid_binlog_pos 0-2-9
|
||||
connection server_2;
|
||||
connect conn_client,127.0.0.1,root,,test,$SERVER_MYPORT_2,;
|
||||
SET DEBUG_SYNC= "commit_after_release_LOCK_after_binlog_sync SIGNAL con1_ready WAIT_FOR con1_go";
|
||||
INSERT INTO t1 VALUES (4, REPEAT("x", 4100));
|
||||
connect conn_client_2,127.0.0.1,root,,test,$SERVER_MYPORT_2,;
|
||||
SET DEBUG_SYNC= "now WAIT_FOR con1_ready";
|
||||
SET DEBUG_SYNC= "commit_after_release_LOCK_log SIGNAL con1_ready WAIT_FOR con2_go";
|
||||
INSERT INTO t1 VALUES (5, REPEAT("x", 4100));
|
||||
connection server_2;
|
||||
SET DEBUG_SYNC= "now WAIT_FOR con1_ready";
|
||||
# Kill the server
|
||||
connection server_1;
|
||||
include/stop_slave.inc
|
||||
SELECT @@GLOBAL.gtid_current_pos;
|
||||
@@GLOBAL.gtid_current_pos
|
||||
0-2-11
|
||||
# restart: --rpl-semi-sync-slave-enabled=1
|
||||
connection server_2;
|
||||
NOT FOUND /truncated binlog file:.*slave.*000001/ in mysqld.2.err
|
||||
disconnect conn_client;
|
||||
connection server_1;
|
||||
set global rpl_semi_sync_master_enabled = 1;
|
||||
set global rpl_semi_sync_master_wait_point=AFTER_SYNC;
|
||||
connection server_2;
|
||||
CHANGE MASTER TO master_host='127.0.0.1', master_port=$new_master_port, master_user='root', master_use_gtid=SLAVE_POS;
|
||||
set global rpl_semi_sync_slave_enabled = 1;
|
||||
set @@global.gtid_slave_pos=@@global.gtid_binlog_pos;
|
||||
include/start_slave.inc
|
||||
connection server_1;
|
||||
INSERT INTO t1 VALUES (6, 'Done');
|
||||
# The gtid state on current master must be equal to ...
|
||||
SHOW VARIABLES LIKE 'gtid_binlog_pos';
|
||||
Variable_name Value
|
||||
gtid_binlog_pos 0-1-12
|
||||
connection server_2;
|
||||
SELECT COUNT(*) = 6 as 'true' FROM t1;
|
||||
true
|
||||
1
|
||||
# ... the gtid states on the slave:
|
||||
SHOW VARIABLES LIKE 'gtid_slave_pos';
|
||||
Variable_name Value
|
||||
gtid_slave_pos 0-1-12
|
||||
SHOW VARIABLES LIKE 'gtid_binlog_pos';
|
||||
Variable_name Value
|
||||
gtid_binlog_pos 0-1-12
|
||||
include/diff_tables.inc [server_1:t1, server_2:t1]
|
||||
# Cleanup
|
||||
connection server_1;
|
||||
DROP TABLE t1;
|
||||
connection server_2;
|
||||
include/stop_slave.inc
|
||||
connection server_1;
|
||||
set @@global.rpl_semi_sync_master_enabled = 0;
|
||||
set @@global.rpl_semi_sync_slave_enabled = 0;
|
||||
set @@global.rpl_semi_sync_master_wait_point=default;
|
||||
RESET SLAVE;
|
||||
RESET MASTER;
|
||||
connection server_2;
|
||||
set @@global.rpl_semi_sync_master_enabled = 0;
|
||||
set @@global.rpl_semi_sync_slave_enabled = 0;
|
||||
set @@global.rpl_semi_sync_master_wait_point=default;
|
||||
CHANGE MASTER TO master_host='127.0.0.1', master_port=$SERVER_MYPORT_1, master_user='root', master_use_gtid=no;
|
||||
include/start_slave.inc
|
||||
connection default;
|
||||
include/rpl_end.inc
|
77
mysql-test/suite/rpl/t/rpl_semi_sync_crash.inc
Normal file
77
mysql-test/suite/rpl/t/rpl_semi_sync_crash.inc
Normal file
@@ -0,0 +1,77 @@
|
||||
if ($failover_to_slave)
|
||||
{
|
||||
--let $server_to_crash=1
|
||||
--let $server_to_promote=2
|
||||
--let $new_master_port=$SERVER_MYPORT_2
|
||||
--let $client_port=$SERVER_MYPORT_1
|
||||
|
||||
--connect (conn_client,127.0.0.1,root,,test,$SERVER_MYPORT_1,)
|
||||
}
|
||||
if (!$failover_to_slave)
|
||||
{
|
||||
--let $server_to_crash=2
|
||||
--let $server_to_promote=1
|
||||
--let $new_master_port=$SERVER_MYPORT_1
|
||||
--let $client_port=$SERVER_MYPORT_2
|
||||
|
||||
--connect (conn_client,127.0.0.1,root,,test,$SERVER_MYPORT_2,)
|
||||
}
|
||||
|
||||
|
||||
# Hold insert after write to binlog and before "run_commit_ordered" in engine
|
||||
|
||||
SET DEBUG_SYNC= "commit_after_release_LOCK_after_binlog_sync SIGNAL con1_ready WAIT_FOR con1_go";
|
||||
--send_eval $query_to_crash
|
||||
|
||||
# complicate recovery with an extra binlog file
|
||||
if (!$failover_to_slave)
|
||||
{
|
||||
--connect (conn_client_2,127.0.0.1,root,,test,$SERVER_MYPORT_2,)
|
||||
# use the same signal with $query_to_crash
|
||||
SET DEBUG_SYNC= "now WAIT_FOR con1_ready";
|
||||
SET DEBUG_SYNC= "commit_after_release_LOCK_log SIGNAL con1_ready WAIT_FOR con2_go";
|
||||
--send_eval $query2_to_crash
|
||||
}
|
||||
|
||||
--connection server_$server_to_crash
|
||||
SET DEBUG_SYNC= "now WAIT_FOR con1_ready";
|
||||
--source include/kill_mysqld.inc
|
||||
|
||||
--connection server_$server_to_promote
|
||||
--error 2003
|
||||
--source include/stop_slave.inc
|
||||
SELECT @@GLOBAL.gtid_current_pos;
|
||||
|
||||
--let $restart_parameters=--rpl-semi-sync-slave-enabled=1
|
||||
--let $allow_rpl_inited=1
|
||||
--source include/start_mysqld.inc
|
||||
|
||||
--connection server_$server_to_crash
|
||||
--enable_reconnect
|
||||
--source include/wait_until_connected_again.inc
|
||||
|
||||
# Check error log for correct messages.
|
||||
let $log_error_ = $MYSQLTEST_VARDIR/log/mysqld.$server_to_crash.err;
|
||||
--let SEARCH_FILE=$log_error_
|
||||
--let SEARCH_PATTERN=$log_search_pattern
|
||||
--source include/search_pattern_in_file.inc
|
||||
|
||||
--disconnect conn_client
|
||||
|
||||
#
|
||||
# FAIL OVER now to new master
|
||||
#
|
||||
--connection server_$server_to_promote
|
||||
set global rpl_semi_sync_master_enabled = 1;
|
||||
set global rpl_semi_sync_master_wait_point=AFTER_SYNC;
|
||||
|
||||
--connection server_$server_to_crash
|
||||
--let $master_port=$SERVER_MYPORT_2
|
||||
if (`select $server_to_crash = 2`)
|
||||
{
|
||||
--let $master_port=$SERVER_MYPORT_1
|
||||
}
|
||||
evalp CHANGE MASTER TO master_host='127.0.0.1', master_port=$new_master_port, master_user='root', master_use_gtid=SLAVE_POS;
|
||||
set global rpl_semi_sync_slave_enabled = 1;
|
||||
set @@global.gtid_slave_pos=@@global.gtid_binlog_pos;
|
||||
--source include/start_slave.inc
|
11
mysql-test/suite/rpl/t/rpl_semi_sync_fail_over.cnf
Normal file
11
mysql-test/suite/rpl/t/rpl_semi_sync_fail_over.cnf
Normal file
@@ -0,0 +1,11 @@
|
||||
!include suite/rpl/rpl_1slave_base.cnf
|
||||
!include include/default_client.cnf
|
||||
|
||||
|
||||
[mysqld.1]
|
||||
log-slave-updates
|
||||
gtid-strict-mode=1
|
||||
|
||||
[mysqld.2]
|
||||
log-slave-updates
|
||||
gtid-strict-mode=1
|
144
mysql-test/suite/rpl/t/rpl_semi_sync_fail_over.test
Normal file
144
mysql-test/suite/rpl/t/rpl_semi_sync_fail_over.test
Normal file
@@ -0,0 +1,144 @@
|
||||
# ==== Purpose ====
|
||||
#
|
||||
# Test verifies replication failover scenario.
|
||||
#
|
||||
# ==== Implementation ====
|
||||
#
|
||||
# Steps:
|
||||
# 0 - Having two servers 1 and 2 enable semi-sync replication with
|
||||
# with the master wait 'after_sync'.
|
||||
# 1 - Insert a row. While inserting second row simulate
|
||||
# a server crash at once the transaction is written to binlog, flushed
|
||||
# and synced but the binlog position is not updated.
|
||||
# 2 - Post crash-recovery on the old master execute there CHANGE MASTER
|
||||
# TO command to connect to server id 2.
|
||||
# 3 - The old master new slave server 1 must connect to the new
|
||||
# master server 2.
|
||||
# 4 - repeat the above to crash the new master and restore in role the old one
|
||||
#
|
||||
# ==== References ====
|
||||
#
|
||||
# MDEV-21117: recovery for --rpl-semi-sync-slave-enabled server
|
||||
|
||||
|
||||
--source include/have_innodb.inc
|
||||
--source include/have_debug_sync.inc
|
||||
--source include/have_binlog_format_row.inc
|
||||
--source include/master-slave.inc
|
||||
|
||||
# Initial slave
|
||||
--connection server_2
|
||||
--source include/stop_slave.inc
|
||||
|
||||
# Initial master
|
||||
--connection server_1
|
||||
RESET MASTER;
|
||||
SET @@global.max_binlog_size= 4096;
|
||||
|
||||
--connection server_2
|
||||
RESET MASTER;
|
||||
SET @@global.max_binlog_size= 4096;
|
||||
set @@global.rpl_semi_sync_slave_enabled = 1;
|
||||
set @@global.gtid_slave_pos = "";
|
||||
CHANGE MASTER TO master_use_gtid= slave_pos;
|
||||
--source include/start_slave.inc
|
||||
|
||||
|
||||
--connection server_1
|
||||
ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB;
|
||||
set @@global.rpl_semi_sync_master_enabled = 1;
|
||||
set @@global.rpl_semi_sync_master_wait_point=AFTER_SYNC;
|
||||
|
||||
call mtr.add_suppression("Can.t init tc log");
|
||||
call mtr.add_suppression("Aborting");
|
||||
call mtr.add_suppression("1 client is using or hasn.t closed the table properly");
|
||||
call mtr.add_suppression("Table './mtr/test_suppressions' is marked as crashed and should be repaired");
|
||||
|
||||
CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb;
|
||||
INSERT INTO t1 VALUES (1, 'dummy1');
|
||||
|
||||
#
|
||||
# CRASH the original master, and FAILOVER to the new
|
||||
#
|
||||
|
||||
# value 1 for server id 1 -> 2 failover
|
||||
--let $failover_to_slave=1
|
||||
--let $query_to_crash= INSERT INTO t1 VALUES (2, REPEAT("x", 4100))
|
||||
--let $log_search_pattern=truncated binlog file:.*master.*000001
|
||||
--source rpl_semi_sync_crash.inc
|
||||
|
||||
--connection server_2
|
||||
--let $rows_so_far=3
|
||||
--eval INSERT INTO t1 VALUES ($rows_so_far, 'dummy3')
|
||||
--save_master_pos
|
||||
--echo # The gtid state on current master must be equal to ...
|
||||
SHOW VARIABLES LIKE 'gtid_binlog_pos';
|
||||
|
||||
--connection server_1
|
||||
--sync_with_master
|
||||
--eval SELECT COUNT(*) = $rows_so_far as 'true' FROM t1
|
||||
--echo # ... the gtid states on the slave:
|
||||
SHOW VARIABLES LIKE 'gtid_slave_pos';
|
||||
SHOW VARIABLES LIKE 'gtid_binlog_pos';
|
||||
|
||||
--connection server_2
|
||||
#
|
||||
# CRASH the new master and FAILOVER back to the original
|
||||
#
|
||||
|
||||
# value 0 for the reverse server id 2 -> 1 failover
|
||||
--let $failover_to_slave=0
|
||||
--let $query_to_crash = INSERT INTO t1 VALUES (4, REPEAT("x", 4100))
|
||||
--let $query2_to_crash= INSERT INTO t1 VALUES (5, REPEAT("x", 4100))
|
||||
--let $log_search_pattern=truncated binlog file:.*slave.*000001
|
||||
--source rpl_semi_sync_crash.inc
|
||||
|
||||
--connection server_1
|
||||
--let $rows_so_far=6
|
||||
--eval INSERT INTO t1 VALUES ($rows_so_far, 'Done')
|
||||
--save_master_pos
|
||||
--echo # The gtid state on current master must be equal to ...
|
||||
SHOW VARIABLES LIKE 'gtid_binlog_pos';
|
||||
|
||||
--connection server_2
|
||||
--sync_with_master
|
||||
--eval SELECT COUNT(*) = $rows_so_far as 'true' FROM t1
|
||||
--echo # ... the gtid states on the slave:
|
||||
SHOW VARIABLES LIKE 'gtid_slave_pos';
|
||||
SHOW VARIABLES LIKE 'gtid_binlog_pos';
|
||||
|
||||
|
||||
--let $diff_tables=server_1:t1, server_2:t1
|
||||
--source include/diff_tables.inc
|
||||
|
||||
#
|
||||
--echo # Cleanup
|
||||
#
|
||||
--connection server_1
|
||||
DROP TABLE t1;
|
||||
--save_master_pos
|
||||
|
||||
--connection server_2
|
||||
--sync_with_master
|
||||
--source include/stop_slave.inc
|
||||
|
||||
--connection server_1
|
||||
set @@global.rpl_semi_sync_master_enabled = 0;
|
||||
set @@global.rpl_semi_sync_slave_enabled = 0;
|
||||
set @@global.rpl_semi_sync_master_wait_point=default;
|
||||
RESET SLAVE;
|
||||
RESET MASTER;
|
||||
|
||||
--connection server_2
|
||||
set @@global.rpl_semi_sync_master_enabled = 0;
|
||||
set @@global.rpl_semi_sync_slave_enabled = 0;
|
||||
set @@global.rpl_semi_sync_master_wait_point=default;
|
||||
|
||||
evalp CHANGE MASTER TO master_host='127.0.0.1', master_port=$SERVER_MYPORT_1, master_user='root', master_use_gtid=no;
|
||||
--source include/start_slave.inc
|
||||
|
||||
connection default;
|
||||
--enable_reconnect
|
||||
--source include/wait_until_connected_again.inc
|
||||
|
||||
--source include/rpl_end.inc
|
Reference in New Issue
Block a user