mirror of
https://github.com/MariaDB/server.git
synced 2025-08-08 11:22:35 +03:00
MDEV-28461 semisync-slave server recovery fails to rollback prepared transaction
that is not in binlog. Post-crash recovery of --rpl-semi-sync-slave-enabled server failed to recognize a transaction in-doubt that needed rolled back. A prepared-but-not-in-binlog transaction gets committed instead to possibly create inconsistency with a master (e.g the way it was observed in the bug report). The semisync recovery is corrected now with initializing binlog coordinates of any transaction in-doubt to the maximum offset which is unreachable. In effect when a prepared transaction that is not found in binlog it will be decided to rollback because it's guaranteed to reside in a truncated tail area of binlog. Mtr tests are reinforced to cover the described scenario.
This commit is contained in:
@@ -4,11 +4,13 @@ RESET MASTER;
|
||||
SET @@global.sync_binlog=1;
|
||||
CREATE TABLE t (f INT) ENGINE=INNODB;
|
||||
CREATE TABLE t2 (f INT) ENGINE=INNODB;
|
||||
CREATE TABLE t4 (f INT) ENGINE=INNODB;
|
||||
CREATE TABLE tm (f INT) ENGINE=Aria;
|
||||
# Case A.
|
||||
connect master1,localhost,root,,;
|
||||
connect master2,localhost,root,,;
|
||||
connect master3,localhost,root,,;
|
||||
connect master4,localhost,root,,;
|
||||
connection default;
|
||||
INSERT INTO t VALUES (10);
|
||||
INSERT INTO tm VALUES (10);
|
||||
@@ -23,14 +25,23 @@ connection master3;
|
||||
SET DEBUG_SYNC= "now WAIT_FOR master2_ready";
|
||||
SELECT @@global.gtid_binlog_pos as 'Before the crash';
|
||||
Before the crash
|
||||
0-1-7
|
||||
0-1-8
|
||||
connection master4;
|
||||
SET DEBUG_SYNC= "ha_commit_trans_before_log_and_order SIGNAL master4_ready WAIT_FOR signal_never_arrives";
|
||||
INSERT INTO t4 VALUES (13);
|
||||
connection master3;
|
||||
SET DEBUG_SYNC= "now WAIT_FOR master4_ready";
|
||||
SELECT @@global.gtid_binlog_pos as 'Before the crash and never logged trx';
|
||||
Before the crash and never logged trx
|
||||
0-1-8
|
||||
connection default;
|
||||
# Kill the server
|
||||
disconnect master1;
|
||||
disconnect master2;
|
||||
disconnect master3;
|
||||
# restart: --rpl-semi-sync-slave-enabled=1 --sync-binlog=1
|
||||
FOUND 1 /Successfully truncated.*to remove transactions starting from GTID 0-1-6/ in mysqld.1.err
|
||||
disconnect master4;
|
||||
# restart: --rpl-semi-sync-slave-enabled=1 --sync-binlog=1 --log-warnings=3
|
||||
FOUND 1 /Successfully truncated.*to remove transactions starting from GTID 0-1-7/ in mysqld.1.err
|
||||
Pre-crash binlog file content:
|
||||
include/show_binlog_events.inc
|
||||
Log_name Pos Event_type Server_id End_log_pos Info
|
||||
@@ -39,6 +50,8 @@ master-bin.000001 # Query # # use `test`; CREATE TABLE t (f INT) ENGINE=INNODB
|
||||
master-bin.000001 # Gtid # # GTID #-#-#
|
||||
master-bin.000001 # Query # # use `test`; CREATE TABLE t2 (f INT) ENGINE=INNODB
|
||||
master-bin.000001 # Gtid # # GTID #-#-#
|
||||
master-bin.000001 # Query # # use `test`; CREATE TABLE t4 (f INT) ENGINE=INNODB
|
||||
master-bin.000001 # Gtid # # GTID #-#-#
|
||||
master-bin.000001 # Query # # use `test`; CREATE TABLE tm (f INT) ENGINE=Aria
|
||||
master-bin.000001 # Gtid # # BEGIN GTID #-#-#
|
||||
master-bin.000001 # Query # # use `test`; INSERT INTO t VALUES (10)
|
||||
@@ -48,16 +61,20 @@ master-bin.000001 # Query # # use `test`; INSERT INTO tm VALUES (10)
|
||||
master-bin.000001 # Query # # COMMIT
|
||||
SELECT @@global.gtid_binlog_pos as 'After the crash';
|
||||
After the crash
|
||||
0-1-5
|
||||
0-1-6
|
||||
"One row should be present in table 't'"
|
||||
SELECT * FROM t;
|
||||
f
|
||||
10
|
||||
"No row should be present in table 't4'"
|
||||
SELECT * FROM t4;
|
||||
f
|
||||
DELETE FROM t;
|
||||
# Case B.
|
||||
connect master1,localhost,root,,;
|
||||
connect master2,localhost,root,,;
|
||||
connect master3,localhost,root,,;
|
||||
connect master4,localhost,root,,;
|
||||
connection default;
|
||||
INSERT INTO t VALUES (10);
|
||||
INSERT INTO tm VALUES (10);
|
||||
@@ -72,14 +89,23 @@ connection master3;
|
||||
SET DEBUG_SYNC= "now WAIT_FOR master2_ready";
|
||||
SELECT @@global.gtid_binlog_pos as 'Before the crash';
|
||||
Before the crash
|
||||
0-1-10
|
||||
0-1-11
|
||||
connection master4;
|
||||
SET DEBUG_SYNC= "ha_commit_trans_before_log_and_order SIGNAL master4_ready WAIT_FOR signal_never_arrives";
|
||||
INSERT INTO t4 VALUES (13);
|
||||
connection master3;
|
||||
SET DEBUG_SYNC= "now WAIT_FOR master4_ready";
|
||||
SELECT @@global.gtid_binlog_pos as 'Before the crash and never logged trx';
|
||||
Before the crash and never logged trx
|
||||
0-1-11
|
||||
connection default;
|
||||
# Kill the server
|
||||
disconnect master1;
|
||||
disconnect master2;
|
||||
disconnect master3;
|
||||
# restart: --rpl-semi-sync-slave-enabled=1 --sync-binlog=1
|
||||
FOUND 1 /Successfully truncated.*to remove transactions starting from GTID 0-1-10/ in mysqld.1.err
|
||||
disconnect master4;
|
||||
# restart: --rpl-semi-sync-slave-enabled=1 --sync-binlog=1 --log-warnings=3
|
||||
FOUND 1 /Successfully truncated.*to remove transactions starting from GTID 0-1-11/ in mysqld.1.err
|
||||
Pre-crash binlog file content:
|
||||
include/show_binlog_events.inc
|
||||
Log_name Pos Event_type Server_id End_log_pos Info
|
||||
@@ -97,11 +123,14 @@ master-bin.000002 # Query # # use `test`; DELETE FROM t2 WHERE f = 0
|
||||
master-bin.000002 # Query # # COMMIT
|
||||
SELECT @@global.gtid_binlog_pos as 'After the crash';
|
||||
After the crash
|
||||
0-1-9
|
||||
0-1-10
|
||||
"One row should be present in table 't'"
|
||||
SELECT * FROM t;
|
||||
f
|
||||
10
|
||||
"No row should be present in table 't4'"
|
||||
SELECT * FROM t4;
|
||||
f
|
||||
DELETE FROM t;
|
||||
# Case C.
|
||||
CREATE PROCEDURE sp_blank_xa()
|
||||
@@ -114,6 +143,7 @@ END|
|
||||
connect master1,localhost,root,,;
|
||||
connect master2,localhost,root,,;
|
||||
connect master3,localhost,root,,;
|
||||
connect master4,localhost,root,,;
|
||||
connection default;
|
||||
INSERT INTO t VALUES (10);
|
||||
INSERT INTO tm VALUES (10);
|
||||
@@ -128,14 +158,23 @@ connection master3;
|
||||
SET DEBUG_SYNC= "now WAIT_FOR master2_ready";
|
||||
SELECT @@global.gtid_binlog_pos as 'Before the crash';
|
||||
Before the crash
|
||||
0-1-15
|
||||
0-1-16
|
||||
connection master4;
|
||||
SET DEBUG_SYNC= "ha_commit_trans_before_log_and_order SIGNAL master4_ready WAIT_FOR signal_never_arrives";
|
||||
INSERT INTO t4 VALUES (13);
|
||||
connection master3;
|
||||
SET DEBUG_SYNC= "now WAIT_FOR master4_ready";
|
||||
SELECT @@global.gtid_binlog_pos as 'Before the crash and never logged trx';
|
||||
Before the crash and never logged trx
|
||||
0-1-16
|
||||
connection default;
|
||||
# Kill the server
|
||||
disconnect master1;
|
||||
disconnect master2;
|
||||
disconnect master3;
|
||||
# restart: --rpl-semi-sync-slave-enabled=1 --sync-binlog=1
|
||||
FOUND 1 /Successfully truncated.*to remove transactions starting from GTID 0-1-14/ in mysqld.1.err
|
||||
disconnect master4;
|
||||
# restart: --rpl-semi-sync-slave-enabled=1 --sync-binlog=1 --log-warnings=3
|
||||
FOUND 1 /Successfully truncated.*to remove transactions starting from GTID 0-1-15/ in mysqld.1.err
|
||||
Pre-crash binlog file content:
|
||||
include/show_binlog_events.inc
|
||||
Log_name Pos Event_type Server_id End_log_pos Info
|
||||
@@ -158,11 +197,14 @@ master-bin.000003 # Query # # use `test`; INSERT INTO tm VALUES (10)
|
||||
master-bin.000003 # Query # # COMMIT
|
||||
SELECT @@global.gtid_binlog_pos as 'After the crash';
|
||||
After the crash
|
||||
0-1-13
|
||||
0-1-14
|
||||
"One row should be present in table 't'"
|
||||
SELECT * FROM t;
|
||||
f
|
||||
10
|
||||
"No row should be present in table 't4'"
|
||||
SELECT * FROM t4;
|
||||
f
|
||||
DELETE FROM t;
|
||||
DROP PROCEDURE sp_blank_xa;
|
||||
# Case D.
|
||||
@@ -176,6 +218,7 @@ END|
|
||||
connect master1,localhost,root,,;
|
||||
connect master2,localhost,root,,;
|
||||
connect master3,localhost,root,,;
|
||||
connect master4,localhost,root,,;
|
||||
connection default;
|
||||
INSERT INTO t VALUES (10);
|
||||
INSERT INTO tm VALUES (10);
|
||||
@@ -190,14 +233,23 @@ connection master3;
|
||||
SET DEBUG_SYNC= "now WAIT_FOR master2_ready";
|
||||
SELECT @@global.gtid_binlog_pos as 'Before the crash';
|
||||
Before the crash
|
||||
0-1-20
|
||||
0-1-21
|
||||
connection master4;
|
||||
SET DEBUG_SYNC= "ha_commit_trans_before_log_and_order SIGNAL master4_ready WAIT_FOR signal_never_arrives";
|
||||
INSERT INTO t4 VALUES (13);
|
||||
connection master3;
|
||||
SET DEBUG_SYNC= "now WAIT_FOR master4_ready";
|
||||
SELECT @@global.gtid_binlog_pos as 'Before the crash and never logged trx';
|
||||
Before the crash and never logged trx
|
||||
0-1-21
|
||||
connection default;
|
||||
# Kill the server
|
||||
disconnect master1;
|
||||
disconnect master2;
|
||||
disconnect master3;
|
||||
# restart: --rpl-semi-sync-slave-enabled=1 --sync-binlog=1
|
||||
FOUND 1 /Successfully truncated.*to remove transactions starting from GTID 0-1-20/ in mysqld.1.err
|
||||
disconnect master4;
|
||||
# restart: --rpl-semi-sync-slave-enabled=1 --sync-binlog=1 --log-warnings=3
|
||||
FOUND 1 /Successfully truncated.*to remove transactions starting from GTID 0-1-21/ in mysqld.1.err
|
||||
Pre-crash binlog file content:
|
||||
include/show_binlog_events.inc
|
||||
Log_name Pos Event_type Server_id End_log_pos Info
|
||||
@@ -226,11 +278,14 @@ master-bin.000004 # Query # # XA END X'786964',X'',1
|
||||
master-bin.000004 # XA_prepare # # XA PREPARE X'786964',X'',1
|
||||
SELECT @@global.gtid_binlog_pos as 'After the crash';
|
||||
After the crash
|
||||
0-1-19
|
||||
0-1-20
|
||||
"One row should be present in table 't'"
|
||||
SELECT * FROM t;
|
||||
f
|
||||
10
|
||||
"No row should be present in table 't4'"
|
||||
SELECT * FROM t4;
|
||||
f
|
||||
DELETE FROM t;
|
||||
DROP PROCEDURE sp_xa;
|
||||
# Cleanup
|
||||
|
@@ -22,6 +22,11 @@ SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL master2_ready WAIT
|
||||
INSERT INTO ti VALUES (3, "not gonna survive");
|
||||
connection default;
|
||||
SET DEBUG_SYNC= "now WAIT_FOR master2_ready";
|
||||
connect master3,localhost,root,,;
|
||||
SET DEBUG_SYNC= "ha_commit_trans_before_log_and_order SIGNAL master3_ready WAIT_FOR master3_go_never_arrives";
|
||||
INSERT INTO ti VALUES (4, "not gonna be log therefore survive"),(5, "ditto");
|
||||
connection default;
|
||||
SET DEBUG_SYNC= "now WAIT_FOR master3_ready";
|
||||
"List of binary logs before crash"
|
||||
show binary logs;
|
||||
Log_name File_size
|
||||
@@ -36,7 +41,8 @@ connection default;
|
||||
# Kill the server
|
||||
disconnect master1;
|
||||
disconnect master2;
|
||||
# restart: --rpl-semi-sync-slave-enabled=1 --sync-binlog=1
|
||||
disconnect master3;
|
||||
# restart: --rpl-semi-sync-slave-enabled=1 --sync-binlog=1 --log-warnings=3
|
||||
FOUND 1 /truncated binlog file:.*master.*000002/ in mysqld.1.err
|
||||
"One record should be present in table"
|
||||
SELECT * FROM ti;
|
||||
|
@@ -1,6 +1,7 @@
|
||||
connect(master1,localhost,root,,);
|
||||
connect(master2,localhost,root,,);
|
||||
connect(master3,localhost,root,,);
|
||||
connect(master4,localhost,root,,);
|
||||
|
||||
--connection default
|
||||
|
||||
@@ -22,16 +23,26 @@ SET DEBUG_SYNC= "commit_before_get_LOCK_after_binlog_sync SIGNAL master2_ready";
|
||||
SET DEBUG_SYNC= "now WAIT_FOR master2_ready";
|
||||
SELECT @@global.gtid_binlog_pos as 'Before the crash';
|
||||
|
||||
--connection master4
|
||||
# Simulate prepared & not-logged trx; it will never recover.
|
||||
SET DEBUG_SYNC= "ha_commit_trans_before_log_and_order SIGNAL master4_ready WAIT_FOR signal_never_arrives";
|
||||
--send INSERT INTO t4 VALUES (13)
|
||||
|
||||
--connection master3
|
||||
SET DEBUG_SYNC= "now WAIT_FOR master4_ready";
|
||||
SELECT @@global.gtid_binlog_pos as 'Before the crash and never logged trx';
|
||||
|
||||
--connection default
|
||||
--source include/kill_mysqld.inc
|
||||
--disconnect master1
|
||||
--disconnect master2
|
||||
--disconnect master3
|
||||
--disconnect master4
|
||||
|
||||
#
|
||||
# Server restart
|
||||
#
|
||||
--let $restart_parameters= --rpl-semi-sync-slave-enabled=1 --sync-binlog=1
|
||||
--let $restart_parameters= --rpl-semi-sync-slave-enabled=1 --sync-binlog=1 --log-warnings=3
|
||||
--source include/start_mysqld.inc
|
||||
|
||||
# Check error log for a successful truncate message.
|
||||
@@ -49,6 +60,8 @@ SELECT @@global.gtid_binlog_pos as 'Before the crash';
|
||||
SELECT @@global.gtid_binlog_pos as 'After the crash';
|
||||
--echo "One row should be present in table 't'"
|
||||
SELECT * FROM t;
|
||||
--echo "No row should be present in table 't4'"
|
||||
SELECT * FROM t4;
|
||||
|
||||
# prepare binlog file index for the next test
|
||||
--inc $binlog_file_index
|
||||
|
@@ -25,6 +25,7 @@ RESET MASTER;
|
||||
SET @@global.sync_binlog=1;
|
||||
CREATE TABLE t (f INT) ENGINE=INNODB;
|
||||
CREATE TABLE t2 (f INT) ENGINE=INNODB;
|
||||
CREATE TABLE t4 (f INT) ENGINE=INNODB;
|
||||
CREATE TABLE tm (f INT) ENGINE=Aria;
|
||||
|
||||
# Old (pre-crash) binlog file index initial value.
|
||||
@@ -42,14 +43,14 @@ CREATE TABLE tm (f INT) ENGINE=Aria;
|
||||
# 'query1' onwards will be removed from the binary log.
|
||||
# Show-binlog-events is to prove that.
|
||||
|
||||
--let $truncate_gtid_pos = 0-1-6
|
||||
--let $truncate_gtid_pos = 0-1-7
|
||||
--let $query1 = INSERT INTO t VALUES (20)
|
||||
--let $query2 = DELETE FROM t2 WHERE f = 0 /* no such record */
|
||||
--source binlog_truncate_active_log.inc
|
||||
|
||||
--echo # Case B.
|
||||
# The inverted sequence ends up to truncate starting from $query2
|
||||
--let $truncate_gtid_pos = 0-1-10
|
||||
--let $truncate_gtid_pos = 0-1-11
|
||||
--let $query1 = DELETE FROM t2 WHERE f = 0
|
||||
--let $query2 = INSERT INTO t VALUES (20)
|
||||
--source binlog_truncate_active_log.inc
|
||||
@@ -68,7 +69,7 @@ delimiter ;|
|
||||
|
||||
# The same as in A with $query2 being the zero-engine XA transaction.
|
||||
# Both $query1 and $query2 are going to be truncated.
|
||||
--let $truncate_gtid_pos = 0-1-14
|
||||
--let $truncate_gtid_pos = 0-1-15
|
||||
--let $query1 = INSERT INTO t VALUES (20)
|
||||
--let $query2 = CALL sp_blank_xa
|
||||
--source binlog_truncate_active_log.inc
|
||||
@@ -89,7 +90,7 @@ delimiter ;|
|
||||
|
||||
# The same as in B with $query1 being the prepared XA transaction.
|
||||
# Truncation must occurs at $query2.
|
||||
--let $truncate_gtid_pos = 0-1-20
|
||||
--let $truncate_gtid_pos = 0-1-21
|
||||
--let $query1 = CALL sp_xa
|
||||
--let $query2 = INSERT INTO t2 VALUES (20)
|
||||
--source binlog_truncate_active_log.inc
|
||||
|
@@ -40,6 +40,15 @@ SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL master2_ready WAIT
|
||||
|
||||
--connection default
|
||||
SET DEBUG_SYNC= "now WAIT_FOR master2_ready";
|
||||
|
||||
connect(master3,localhost,root,,);
|
||||
# The 3nd trx for recovery, it won't get into binlog nor therefore recover
|
||||
SET DEBUG_SYNC= "ha_commit_trans_before_log_and_order SIGNAL master3_ready WAIT_FOR master3_go_never_arrives";
|
||||
--send INSERT INTO ti VALUES (4, "not gonna be log therefore survive"),(5, "ditto")
|
||||
|
||||
--connection default
|
||||
SET DEBUG_SYNC= "now WAIT_FOR master3_ready";
|
||||
|
||||
--echo "List of binary logs before crash"
|
||||
--source include/show_binary_logs.inc
|
||||
--echo # The gtid binlog state prior the crash will be truncated at the end of the test
|
||||
@@ -49,11 +58,12 @@ SELECT @@global.gtid_binlog_state;
|
||||
--source include/kill_mysqld.inc
|
||||
--disconnect master1
|
||||
--disconnect master2
|
||||
--disconnect master3
|
||||
|
||||
#
|
||||
# Server restart
|
||||
#
|
||||
--let $restart_parameters= --rpl-semi-sync-slave-enabled=1 --sync-binlog=1
|
||||
--let $restart_parameters= --rpl-semi-sync-slave-enabled=1 --sync-binlog=1 --log-warnings=3
|
||||
--source include/start_mysqld.inc
|
||||
|
||||
# Check error log for a successful truncate message.
|
||||
|
@@ -963,6 +963,7 @@ typedef struct xid_t XID;
|
||||
*/
|
||||
typedef uint Binlog_file_id;
|
||||
const Binlog_file_id MAX_binlog_id= UINT_MAX;
|
||||
const my_off_t MAX_off_t = (~(my_off_t) 0);
|
||||
/*
|
||||
Compound binlog-id and byte offset of transaction's first event
|
||||
in a sequence (e.g the recovery sequence) of binlog files.
|
||||
@@ -977,14 +978,22 @@ struct xid_recovery_member
|
||||
my_xid xid;
|
||||
uint in_engine_prepare; // number of engines that have xid prepared
|
||||
bool decided_to_commit;
|
||||
Binlog_offset binlog_coord; // semisync recovery binlog offset
|
||||
/*
|
||||
Semisync recovery binlog offset. It's initialized with the maximum
|
||||
unreachable offset. The max value will remain for any transaction
|
||||
not found in binlog to yield its rollback decision as it's guaranteed
|
||||
to be within a truncated tail part of the binlog.
|
||||
*/
|
||||
Binlog_offset binlog_coord;
|
||||
XID *full_xid; // needed by wsrep or past it recovery
|
||||
decltype(::server_id) server_id; // server id of orginal server
|
||||
|
||||
xid_recovery_member(my_xid xid_arg, uint prepare_arg, bool decided_arg,
|
||||
XID *full_xid_arg, decltype(::server_id) server_id_arg)
|
||||
: xid(xid_arg), in_engine_prepare(prepare_arg),
|
||||
decided_to_commit(decided_arg), full_xid(full_xid_arg) , server_id(server_id_arg) {};
|
||||
decided_to_commit(decided_arg),
|
||||
binlog_coord(Binlog_offset(MAX_binlog_id, MAX_off_t)),
|
||||
full_xid(full_xid_arg), server_id(server_id_arg) {};
|
||||
};
|
||||
|
||||
/* for recover() handlerton call */
|
||||
|
Reference in New Issue
Block a user