From 6c39eaeb126328e7813b146ecf652d51e4508981 Mon Sep 17 00:00:00 2001 From: Sujatha Date: Thu, 9 Apr 2020 20:45:45 +0530 Subject: [PATCH] MDEV-21117: refine the server binlog-based recovery for semisync Problem: ======= When the semisync master is crashed and restarted as slave it could recover transactions that former slaves may never have seen. A known method existed to clear out all prepared transactions with --tc-heuristic-recover=rollback does not care to adjust binlog accordingly. Fix: === The binlog-based recovery is made to concern of the slave semisync role of post-crash restarted server. No changes in behavior is done to the "normal" binloggging server and the semisync master. When the restarted server is configured with --rpl-semi-sync-slave-enabled=1 the refined recovery attempts to roll back prepared transactions and truncate binlog accordingly. In case of a partially committed (that is committed at least in one of the engine participants) such transaction gets committed. It's guaranteed no (partially as well) committed transactions exist beyond the truncate position. In case there exists a non-transactional replication event (being in a way a committed transaction) past the computed truncate position the recovery ends with an error. As after master crash and failover to slave, the demoted-to-slave ex-master must be ready to face and accept its own (generated by) events, without generally necessary --replicate-same-server-id. So the acceptance conditions are relaxed for the semisync slave to accept own events without that option. While gtid_strict_mode ON ensures no duplicate transaction can be (re-)executed the master_use_gtid=none slave has to be configured with --replicate-same-server-id. *NOTE* for reviewers. This patch does not handle the user XA which is done in next git commit. --- .../r/binlog_truncate_active_log.result | 237 +++++ .../r/binlog_truncate_multi_engine.result | 189 ++++ .../binlog/r/binlog_truncate_multi_log.result | 53 ++ .../r/binlog_truncate_multi_log_unsafe.result | 58 ++ .../binlog/t/binlog_truncate_active_log.inc | 57 ++ .../binlog/t/binlog_truncate_active_log.test | 102 ++ .../binlog/t/binlog_truncate_multi_engine.inc | 73 ++ .../binlog/t/binlog_truncate_multi_engine.opt | 1 + .../t/binlog_truncate_multi_engine.test | 60 ++ .../binlog/t/binlog_truncate_multi_log.test | 77 ++ .../t/binlog_truncate_multi_log_unsafe.test | 119 +++ .../mariabackup/include/have_rocksdb.inc | 4 - .../rpl/r/rpl_semi_sync_fail_over.result | 129 +++ .../suite/rpl/t/rpl_semi_sync_crash.inc | 77 ++ .../suite/rpl/t/rpl_semi_sync_fail_over.cnf | 11 + .../suite/rpl/t/rpl_semi_sync_fail_over.test | 144 +++ sql/handler.cc | 256 ++++- sql/handler.h | 34 +- sql/log.cc | 896 ++++++++++++++++-- sql/log.h | 14 +- sql/log_event.cc | 31 +- sql/log_event.h | 29 +- sql/log_event_server.cc | 68 +- sql/slave.cc | 26 +- storage/innobase/log/log0log.cc | 1 + 25 files changed, 2619 insertions(+), 127 deletions(-) create mode 100644 mysql-test/suite/binlog/r/binlog_truncate_active_log.result create mode 100644 mysql-test/suite/binlog/r/binlog_truncate_multi_engine.result create mode 100644 mysql-test/suite/binlog/r/binlog_truncate_multi_log.result create mode 100644 mysql-test/suite/binlog/r/binlog_truncate_multi_log_unsafe.result create mode 100644 mysql-test/suite/binlog/t/binlog_truncate_active_log.inc create mode 100644 mysql-test/suite/binlog/t/binlog_truncate_active_log.test create mode 100644 mysql-test/suite/binlog/t/binlog_truncate_multi_engine.inc create mode 100644 mysql-test/suite/binlog/t/binlog_truncate_multi_engine.opt create mode 100644 mysql-test/suite/binlog/t/binlog_truncate_multi_engine.test create mode 100644 mysql-test/suite/binlog/t/binlog_truncate_multi_log.test create mode 100644 mysql-test/suite/binlog/t/binlog_truncate_multi_log_unsafe.test delete mode 100644 mysql-test/suite/mariabackup/include/have_rocksdb.inc create mode 100644 mysql-test/suite/rpl/r/rpl_semi_sync_fail_over.result create mode 100644 mysql-test/suite/rpl/t/rpl_semi_sync_crash.inc create mode 100644 mysql-test/suite/rpl/t/rpl_semi_sync_fail_over.cnf create mode 100644 mysql-test/suite/rpl/t/rpl_semi_sync_fail_over.test diff --git a/mysql-test/suite/binlog/r/binlog_truncate_active_log.result b/mysql-test/suite/binlog/r/binlog_truncate_active_log.result new file mode 100644 index 00000000000..3ad9f5c560c --- /dev/null +++ b/mysql-test/suite/binlog/r/binlog_truncate_active_log.result @@ -0,0 +1,237 @@ +call mtr.add_suppression("Can.t init tc log"); +call mtr.add_suppression("Aborting"); +RESET MASTER; +CREATE TABLE t (f INT) ENGINE=INNODB; +CREATE TABLE t2 (f INT) ENGINE=INNODB; +CREATE TABLE tm (f INT) ENGINE=Aria; +# Case A. +connect master1,localhost,root,,; +connect master2,localhost,root,,; +connect master3,localhost,root,,; +connection default; +INSERT INTO t VALUES (10); +INSERT INTO tm VALUES (10); +connection master1; +SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL master1_ready WAIT_FOR signal_never_arrives"; +INSERT INTO t VALUES (20); +connection master2; +SET DEBUG_SYNC= "now WAIT_FOR master1_ready"; +SET DEBUG_SYNC= "commit_before_get_LOCK_after_binlog_sync SIGNAL master2_ready"; +DELETE FROM t2 WHERE f = 0 /* no such record */; +connection master3; +SET DEBUG_SYNC= "now WAIT_FOR master2_ready"; +SELECT @@global.gtid_binlog_pos as 'Before the crash'; +Before the crash +0-1-7 +connection default; +# Kill the server +disconnect master1; +disconnect master2; +disconnect master3; +# restart: --rpl-semi-sync-slave-enabled=1 +FOUND 1 /Successfully truncated.*to remove transactions starting from GTID 0-1-6/ in mysqld.1.err +Pre-crash binlog file content: +include/show_binlog_events.inc +Log_name Pos Event_type Server_id End_log_pos Info +master-bin.000001 # Gtid # # GTID #-#-# +master-bin.000001 # Query # # use `test`; CREATE TABLE t (f INT) ENGINE=INNODB +master-bin.000001 # Gtid # # GTID #-#-# +master-bin.000001 # Query # # use `test`; CREATE TABLE t2 (f INT) ENGINE=INNODB +master-bin.000001 # Gtid # # GTID #-#-# +master-bin.000001 # Query # # use `test`; CREATE TABLE tm (f INT) ENGINE=Aria +master-bin.000001 # Gtid # # BEGIN GTID #-#-# +master-bin.000001 # Query # # use `test`; INSERT INTO t VALUES (10) +master-bin.000001 # Xid # # COMMIT /* XID */ +master-bin.000001 # Gtid # # BEGIN GTID #-#-# +master-bin.000001 # Query # # use `test`; INSERT INTO tm VALUES (10) +master-bin.000001 # Query # # COMMIT +SELECT @@global.gtid_binlog_pos as 'After the crash'; +After the crash +0-1-5 +"One row should be present in table 't'" +SELECT * FROM t; +f +10 +DELETE FROM t; +# Case B. +connect master1,localhost,root,,; +connect master2,localhost,root,,; +connect master3,localhost,root,,; +connection default; +INSERT INTO t VALUES (10); +INSERT INTO tm VALUES (10); +connection master1; +SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL master1_ready WAIT_FOR signal_never_arrives"; +DELETE FROM t2 WHERE f = 0; +connection master2; +SET DEBUG_SYNC= "now WAIT_FOR master1_ready"; +SET DEBUG_SYNC= "commit_before_get_LOCK_after_binlog_sync SIGNAL master2_ready"; +INSERT INTO t VALUES (20); +connection master3; +SET DEBUG_SYNC= "now WAIT_FOR master2_ready"; +SELECT @@global.gtid_binlog_pos as 'Before the crash'; +Before the crash +0-1-10 +connection default; +# Kill the server +disconnect master1; +disconnect master2; +disconnect master3; +# restart: --rpl-semi-sync-slave-enabled=1 +FOUND 1 /Successfully truncated.*to remove transactions starting from GTID 0-1-10/ in mysqld.1.err +Pre-crash binlog file content: +include/show_binlog_events.inc +Log_name Pos Event_type Server_id End_log_pos Info +master-bin.000002 # Gtid # # BEGIN GTID #-#-# +master-bin.000002 # Query # # use `test`; DELETE FROM t +master-bin.000002 # Xid # # COMMIT /* XID */ +master-bin.000002 # Gtid # # BEGIN GTID #-#-# +master-bin.000002 # Query # # use `test`; INSERT INTO t VALUES (10) +master-bin.000002 # Xid # # COMMIT /* XID */ +master-bin.000002 # Gtid # # BEGIN GTID #-#-# +master-bin.000002 # Query # # use `test`; INSERT INTO tm VALUES (10) +master-bin.000002 # Query # # COMMIT +master-bin.000002 # Gtid # # BEGIN GTID #-#-# +master-bin.000002 # Query # # use `test`; DELETE FROM t2 WHERE f = 0 +master-bin.000002 # Query # # COMMIT +SELECT @@global.gtid_binlog_pos as 'After the crash'; +After the crash +0-1-9 +"One row should be present in table 't'" +SELECT * FROM t; +f +10 +DELETE FROM t; +# Case C. +CREATE PROCEDURE sp_blank_xa() +BEGIN +XA START 'blank'; +DELETE FROM t2 WHERE f = 0 /* no such record */; +XA END 'blank'; +XA PREPARE 'blank'; +END| +connect master1,localhost,root,,; +connect master2,localhost,root,,; +connect master3,localhost,root,,; +connection default; +INSERT INTO t VALUES (10); +INSERT INTO tm VALUES (10); +connection master1; +SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL master1_ready WAIT_FOR signal_never_arrives"; +INSERT INTO t VALUES (20); +connection master2; +SET DEBUG_SYNC= "now WAIT_FOR master1_ready"; +SET DEBUG_SYNC= "commit_before_get_LOCK_after_binlog_sync SIGNAL master2_ready"; +CALL sp_blank_xa; +connection master3; +SET DEBUG_SYNC= "now WAIT_FOR master2_ready"; +SELECT @@global.gtid_binlog_pos as 'Before the crash'; +Before the crash +0-1-15 +connection default; +# Kill the server +disconnect master1; +disconnect master2; +disconnect master3; +# restart: --rpl-semi-sync-slave-enabled=1 +FOUND 1 /Successfully truncated.*to remove transactions starting from GTID 0-1-14/ in mysqld.1.err +Pre-crash binlog file content: +include/show_binlog_events.inc +Log_name Pos Event_type Server_id End_log_pos Info +master-bin.000003 # Gtid # # BEGIN GTID #-#-# +master-bin.000003 # Query # # use `test`; DELETE FROM t +master-bin.000003 # Xid # # COMMIT /* XID */ +master-bin.000003 # Gtid # # GTID #-#-# +master-bin.000003 # Query # # use `test`; CREATE DEFINER=`root`@`localhost` PROCEDURE `sp_blank_xa`() +BEGIN +XA START 'blank'; +DELETE FROM t2 WHERE f = 0 /* no such record */; +XA END 'blank'; +XA PREPARE 'blank'; +END +master-bin.000003 # Gtid # # BEGIN GTID #-#-# +master-bin.000003 # Query # # use `test`; INSERT INTO t VALUES (10) +master-bin.000003 # Xid # # COMMIT /* XID */ +master-bin.000003 # Gtid # # BEGIN GTID #-#-# +master-bin.000003 # Query # # use `test`; INSERT INTO tm VALUES (10) +master-bin.000003 # Query # # COMMIT +SELECT @@global.gtid_binlog_pos as 'After the crash'; +After the crash +0-1-13 +"One row should be present in table 't'" +SELECT * FROM t; +f +10 +DELETE FROM t; +DROP PROCEDURE sp_blank_xa; +# Case D. +CREATE PROCEDURE sp_xa() +BEGIN +XA START 'xid'; +DELETE FROM t WHERE f = 10; +XA END 'xid'; +XA PREPARE 'xid'; +END| +connect master1,localhost,root,,; +connect master2,localhost,root,,; +connect master3,localhost,root,,; +connection default; +INSERT INTO t VALUES (10); +INSERT INTO tm VALUES (10); +connection master1; +SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL master1_ready WAIT_FOR signal_never_arrives"; +CALL sp_xa; +connection master2; +SET DEBUG_SYNC= "now WAIT_FOR master1_ready"; +SET DEBUG_SYNC= "commit_before_get_LOCK_after_binlog_sync SIGNAL master2_ready"; +INSERT INTO t2 VALUES (20); +connection master3; +SET DEBUG_SYNC= "now WAIT_FOR master2_ready"; +SELECT @@global.gtid_binlog_pos as 'Before the crash'; +Before the crash +0-1-20 +connection default; +# Kill the server +disconnect master1; +disconnect master2; +disconnect master3; +# restart: --rpl-semi-sync-slave-enabled=1 +FOUND 1 /Successfully truncated.*to remove transactions starting from GTID 0-1-20/ in mysqld.1.err +Pre-crash binlog file content: +include/show_binlog_events.inc +Log_name Pos Event_type Server_id End_log_pos Info +master-bin.000004 # Gtid # # BEGIN GTID #-#-# +master-bin.000004 # Query # # use `test`; DELETE FROM t +master-bin.000004 # Xid # # COMMIT /* XID */ +master-bin.000004 # Gtid # # GTID #-#-# +master-bin.000004 # Query # # use `test`; DROP PROCEDURE sp_blank_xa +master-bin.000004 # Gtid # # GTID #-#-# +master-bin.000004 # Query # # use `test`; CREATE DEFINER=`root`@`localhost` PROCEDURE `sp_xa`() +BEGIN +XA START 'xid'; +DELETE FROM t WHERE f = 10; +XA END 'xid'; +XA PREPARE 'xid'; +END +master-bin.000004 # Gtid # # BEGIN GTID #-#-# +master-bin.000004 # Query # # use `test`; INSERT INTO t VALUES (10) +master-bin.000004 # Xid # # COMMIT /* XID */ +master-bin.000004 # Gtid # # BEGIN GTID #-#-# +master-bin.000004 # Query # # use `test`; INSERT INTO tm VALUES (10) +master-bin.000004 # Query # # COMMIT +master-bin.000004 # Gtid # # XA START X'786964',X'',1 GTID #-#-# +master-bin.000004 # Query # # use `test`; DELETE FROM t WHERE f = 10 +master-bin.000004 # Query # # XA END X'786964',X'',1 +master-bin.000004 # XA_prepare # # XA PREPARE X'786964',X'',1 +SELECT @@global.gtid_binlog_pos as 'After the crash'; +After the crash +0-1-19 +"One row should be present in table 't'" +SELECT * FROM t; +f +10 +DELETE FROM t; +DROP PROCEDURE sp_xa; +# Cleanup +DROP TABLE t,t2,tm; +# End of the tests diff --git a/mysql-test/suite/binlog/r/binlog_truncate_multi_engine.result b/mysql-test/suite/binlog/r/binlog_truncate_multi_engine.result new file mode 100644 index 00000000000..b8fd04497f2 --- /dev/null +++ b/mysql-test/suite/binlog/r/binlog_truncate_multi_engine.result @@ -0,0 +1,189 @@ +call mtr.add_suppression("Can.t init tc log"); +call mtr.add_suppression("Aborting"); +CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb; +CREATE TABLE t2 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=rocksdb; +# +# +# Case "A" : "neither engine committed => rollback & binlog truncate" +# +RESET MASTER; +FLUSH LOGS; +SET GLOBAL max_binlog_size= 4096; +connect con1,localhost,root,,; +List of binary logs before rotation +show binary logs; +Log_name File_size +master-bin.000001 # +master-bin.000002 # +INSERT INTO t1 VALUES (1, REPEAT("x", 1)); +INSERT INTO t2 VALUES (1, REPEAT("x", 1)); +BEGIN; +INSERT INTO t2 VALUES (2, REPEAT("x", 4100)); +INSERT INTO t1 VALUES (2, REPEAT("x", 4100)); +SET DEBUG_SYNC= "commit_after_release_LOCK_log SIGNAL con1_ready WAIT_FOR signal_no_signal"; +COMMIT; +connection default; +SET DEBUG_SYNC= "now WAIT_FOR con1_ready"; +List of binary logs after rotation +show binary logs; +Log_name File_size +master-bin.000001 # +master-bin.000002 # +master-bin.000003 # +# restart the server with --rpl-semi-sync-slave-enabled=1 +# the server is restarted +# restart: --rpl-semi-sync-slave-enabled=1 +connection default; +# +# *** Summary: 1 row should be present in both tables; binlog is truncated; number of binlogs at reconnect - 3: +# +SELECT COUNT(*) FROM t1; +COUNT(*) +1 +SELECT COUNT(*) FROM t2; +COUNT(*) +1 +SELECT @@GLOBAL.gtid_binlog_state; +@@GLOBAL.gtid_binlog_state +0-1-2 +SELECT @@GLOBAL.gtid_binlog_pos; +@@GLOBAL.gtid_binlog_pos +0-1-2 +List of binary logs at the end of the tests +show binary logs; +Log_name File_size +master-bin.000001 # +master-bin.000002 # +master-bin.000003 # +# *** +DELETE FROM t1; +DELETE FROM t2; +disconnect con1; +# +Proof of the truncated binlog file is readable (two transactions must be seen): +/*!50530 SET @@SESSION.PSEUDO_SLAVE_MODE=1*/; +/*!40019 SET @@session.max_insert_delayed_threads=0*/; +/*!50003 SET @OLD_COMPLETION_TYPE=@@COMPLETION_TYPE,COMPLETION_TYPE=0*/; +DELIMITER /*!*/; +START TRANSACTION +/*!*/; +COMMIT/*!*/; +START TRANSACTION +/*!*/; +COMMIT/*!*/; +DELIMITER ; +# End of log file +ROLLBACK /* added by mysqlbinlog */; +/*!50003 SET COMPLETION_TYPE=@OLD_COMPLETION_TYPE*/; +/*!50530 SET @@SESSION.PSEUDO_SLAVE_MODE=0*/; +# +# +# Case "B" : "one engine has committed its transaction branch" +# +RESET MASTER; +FLUSH LOGS; +SET GLOBAL max_binlog_size= 4096; +connect con1,localhost,root,,; +List of binary logs before rotation +show binary logs; +Log_name File_size +master-bin.000001 # +master-bin.000002 # +INSERT INTO t1 VALUES (1, REPEAT("x", 1)); +INSERT INTO t2 VALUES (1, REPEAT("x", 1)); +SET GLOBAL debug_dbug="d,enable_log_write_upto_crash"; +BEGIN; +INSERT INTO t2 VALUES (2, REPEAT("x", 4100)); +INSERT INTO t1 VALUES (2, REPEAT("x", 4100)); +COMMIT; +connection default; +# restart: --rpl-semi-sync-slave-enabled=1 +connection default; +# +# *** Summary: 2 rows should be present in both tables; no binlog truncation; one extra binlog file compare with A; number of binlogs at reconnect - 4: +# +SELECT COUNT(*) FROM t1; +COUNT(*) +2 +SELECT COUNT(*) FROM t2; +COUNT(*) +2 +SELECT @@GLOBAL.gtid_binlog_state; +@@GLOBAL.gtid_binlog_state +0-1-3 +SELECT @@GLOBAL.gtid_binlog_pos; +@@GLOBAL.gtid_binlog_pos +0-1-3 +List of binary logs at the end of the tests +show binary logs; +Log_name File_size +master-bin.000001 # +master-bin.000002 # +master-bin.000003 # +master-bin.000004 # +# *** +DELETE FROM t1; +DELETE FROM t2; +disconnect con1; +# +# +# +# Case "C" : "both engines have committed its transaction branch" +# +RESET MASTER; +FLUSH LOGS; +SET GLOBAL max_binlog_size= 4096; +connect con1,localhost,root,,; +List of binary logs before rotation +show binary logs; +Log_name File_size +master-bin.000001 # +master-bin.000002 # +INSERT INTO t1 VALUES (1, REPEAT("x", 1)); +INSERT INTO t2 VALUES (1, REPEAT("x", 1)); +BEGIN; +INSERT INTO t2 VALUES (2, REPEAT("x", 4100)); +INSERT INTO t1 VALUES (2, REPEAT("x", 4100)); +SET DEBUG_SYNC= "commit_after_run_commit_ordered SIGNAL con1_ready WAIT_FOR signal_no_signal"; +COMMIT; +connection default; +SET DEBUG_SYNC= "now WAIT_FOR con1_ready"; +List of binary logs after rotation +show binary logs; +Log_name File_size +master-bin.000001 # +master-bin.000002 # +master-bin.000003 # +# restart the server with --rpl-semi-sync-slave-enabled=1 +# the server is restarted +# restart: --rpl-semi-sync-slave-enabled=1 +connection default; +# +# *** Summary: 2 rows should be present in both tables; no binlog truncation; the same # of binlog files as in B; number of binlogs at reconnect - 4: +# +SELECT COUNT(*) FROM t1; +COUNT(*) +2 +SELECT COUNT(*) FROM t2; +COUNT(*) +2 +SELECT @@GLOBAL.gtid_binlog_state; +@@GLOBAL.gtid_binlog_state +0-1-3 +SELECT @@GLOBAL.gtid_binlog_pos; +@@GLOBAL.gtid_binlog_pos +0-1-3 +List of binary logs at the end of the tests +show binary logs; +Log_name File_size +master-bin.000001 # +master-bin.000002 # +master-bin.000003 # +master-bin.000004 # +# *** +DELETE FROM t1; +DELETE FROM t2; +disconnect con1; +# +DROP TABLE t1, t2; +# End of the tests diff --git a/mysql-test/suite/binlog/r/binlog_truncate_multi_log.result b/mysql-test/suite/binlog/r/binlog_truncate_multi_log.result new file mode 100644 index 00000000000..e11a40feb2f --- /dev/null +++ b/mysql-test/suite/binlog/r/binlog_truncate_multi_log.result @@ -0,0 +1,53 @@ +call mtr.add_suppression("Can.t init tc log"); +call mtr.add_suppression("Aborting"); +SET @@global.max_binlog_size= 4096; +RESET MASTER; +FLUSH LOGS; +CREATE TABLE ti (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb; +CREATE TABLE tm (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=MyISAM; +connect master1,localhost,root,,; +"List of binary logs before rotation" +show binary logs; +Log_name File_size +master-bin.000001 # +master-bin.000002 # +INSERT INTO ti VALUES(1,"I am gonna survive"); +INSERT INTO tm VALUES(1,"me too!"); +SET DEBUG_SYNC= "commit_after_release_LOCK_after_binlog_sync SIGNAL master1_ready WAIT_FOR con1_go"; +INSERT INTO ti VALUES (2, REPEAT("x", 4100)); +connect master2,localhost,root,,; +SET DEBUG_SYNC= "now WAIT_FOR master1_ready"; +SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL master2_ready WAIT_FOR master2_go"; +INSERT INTO ti VALUES (3, "not gonna survive"); +connection default; +SET DEBUG_SYNC= "now WAIT_FOR master2_ready"; +"List of binary logs before crash" +show binary logs; +Log_name File_size +master-bin.000001 # +master-bin.000002 # +master-bin.000003 # +# The gtid binlog state prior the crash will be truncated at the end of the test +SELECT @@global.gtid_binlog_state; +@@global.gtid_binlog_state +0-1-6 +connection default; +# Kill the server +disconnect master1; +disconnect master2; +# restart: --rpl-semi-sync-slave-enabled=1 +FOUND 1 /truncated binlog file:.*master.*000002/ in mysqld.1.err +"One record should be present in table" +SELECT * FROM ti; +a b +1 I am gonna survive +# The truncated gtid binlog state +SELECT @@global.gtid_binlog_state; +@@global.gtid_binlog_state +0-1-4 +SELECT @@global.gtid_binlog_pos; +@@global.gtid_binlog_pos +0-1-4 +# Cleanup +DROP TABLE ti; +# End of the tests diff --git a/mysql-test/suite/binlog/r/binlog_truncate_multi_log_unsafe.result b/mysql-test/suite/binlog/r/binlog_truncate_multi_log_unsafe.result new file mode 100644 index 00000000000..0ee9a7c871d --- /dev/null +++ b/mysql-test/suite/binlog/r/binlog_truncate_multi_log_unsafe.result @@ -0,0 +1,58 @@ +SET @@global.max_binlog_size= 4096; +call mtr.add_suppression("Table '.*tm' is marked as crashed and should be repaired"); +call mtr.add_suppression("Got an error from unknown thread"); +call mtr.add_suppression("Checking table: '.*tm'"); +call mtr.add_suppression("Recovering table: '.*tm'"); +call mtr.add_suppression("Cannot truncate the binary log to file"); +call mtr.add_suppression("Crash recovery failed"); +call mtr.add_suppression("Can.t init tc log"); +call mtr.add_suppression("Aborting"); +call mtr.add_suppression("Found 1 prepared transactions"); +call mtr.add_suppression("mysqld: Table.*tm.*is marked as crashed"); +call mtr.add_suppression("Checking table.*tm"); +RESET MASTER; +FLUSH LOGS; +CREATE TABLE ti (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb; +CREATE TABLE tm (f INT) ENGINE=MYISAM; +INSERT INTO tm VALUES(1); +connect master1,localhost,root,,; +connect master2,localhost,root,,; +connect master3,localhost,root,,; +connection master1; +SET DEBUG_SYNC= "commit_after_release_LOCK_after_binlog_sync SIGNAL master1_ready WAIT_FOR master1_go"; +INSERT INTO ti VALUES (5 - 1, REPEAT("x", 4100)); +connection master2; +SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL master2_ready WAIT_FOR master2_go"; +INSERT INTO ti VALUES (5, REPEAT("x", 1)); +connection master3; +SET DEBUG_SYNC= "now WAIT_FOR master2_ready"; +SET DEBUG_SYNC= "commit_before_get_LOCK_after_binlog_sync SIGNAL master3_ready"; +INSERT INTO tm VALUES (2); +connection default; +SET DEBUG_SYNC= "now WAIT_FOR master3_ready"; +# The gtid binlog state prior the crash must be restored at the end of the test; +SELECT @@global.gtid_binlog_state; +@@global.gtid_binlog_state +0-1-9 +# Kill the server +# Failed restart as the semisync slave +# Normal restart +# restart +FOUND 1 /Cannot truncate the binary log to file/ in mysqld.1.err +# Proof that the in-doubt transactions are recovered by the 2nd normal server restart +SELECT COUNT(*) = 5 as 'True' FROM ti; +True +1 +SELECT COUNT(*) <= 1 FROM tm; +COUNT(*) <= 1 +1 +# The gtid binlog state prior the crash is restored now +SELECT @@GLOBAL.gtid_binlog_state; +@@GLOBAL.gtid_binlog_state +0-1-9 +SELECT @@GLOBAL.gtid_binlog_pos; +@@GLOBAL.gtid_binlog_pos +0-1-9 +# Cleanup +DROP TABLE ti, tm; +End of test diff --git a/mysql-test/suite/binlog/t/binlog_truncate_active_log.inc b/mysql-test/suite/binlog/t/binlog_truncate_active_log.inc new file mode 100644 index 00000000000..0bc83477d91 --- /dev/null +++ b/mysql-test/suite/binlog/t/binlog_truncate_active_log.inc @@ -0,0 +1,57 @@ +connect(master1,localhost,root,,); +connect(master2,localhost,root,,); +connect(master3,localhost,root,,); + +--connection default + +# First to commit few transactions +INSERT INTO t VALUES (10); +INSERT INTO tm VALUES (10); + +--connection master1 +# Hold insert after write to binlog and before "run_commit_ordered" in engine +SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL master1_ready WAIT_FOR signal_never_arrives"; +--send_eval $query1 + +--connection master2 +SET DEBUG_SYNC= "now WAIT_FOR master1_ready"; +SET DEBUG_SYNC= "commit_before_get_LOCK_after_binlog_sync SIGNAL master2_ready"; +--send_eval $query2 + +--connection master3 +SET DEBUG_SYNC= "now WAIT_FOR master2_ready"; +SELECT @@global.gtid_binlog_pos as 'Before the crash'; + +--connection default +--source include/kill_mysqld.inc +--disconnect master1 +--disconnect master2 +--disconnect master3 + +# +# Server restart +# +--let $restart_parameters= --rpl-semi-sync-slave-enabled=1 +--source include/start_mysqld.inc + +# Check error log for a successful truncate message. +--let $log_error_ = $MYSQLTEST_VARDIR/log/mysqld.1.err + +--let SEARCH_FILE=$log_error_ +--let SEARCH_PATTERN=Successfully truncated.*to remove transactions starting from GTID $truncate_gtid_pos + +--source include/search_pattern_in_file.inc + +--echo Pre-crash binlog file content: +--let $binlog_file= query_get_value(show binary logs, Log_name, $binlog_file_index) +--source include/show_binlog_events.inc + +SELECT @@global.gtid_binlog_pos as 'After the crash'; +--echo "One row should be present in table 't'" +SELECT * FROM t; + +# prepare binlog file index for the next test +--inc $binlog_file_index + +# Local cleanup +DELETE FROM t; diff --git a/mysql-test/suite/binlog/t/binlog_truncate_active_log.test b/mysql-test/suite/binlog/t/binlog_truncate_active_log.test new file mode 100644 index 00000000000..dbba8697b86 --- /dev/null +++ b/mysql-test/suite/binlog/t/binlog_truncate_active_log.test @@ -0,0 +1,102 @@ +# ==== Purpose ==== +# +# Test verifies the truncation of single binary log file. +# +# ==== References ==== +# +# MDEV-21117: recovery for --rpl-semi-sync-slave-enabled server + +--source include/have_innodb.inc +--source include/have_aria.inc +# File: binlog_truncate_active_log.inc included in test makes use of +# 'debug_sync' facility. +--source include/have_debug_sync.inc +--source include/have_binlog_format_statement.inc + +call mtr.add_suppression("Can.t init tc log"); +call mtr.add_suppression("Aborting"); + +# The following cases are tested: +# A. 2pc transaction is followed by a blank "zero-engines" one +# B. 2pc transaction follows the blank one +# C. Similarly to A, with the XA blank transaction + +RESET MASTER; +CREATE TABLE t (f INT) ENGINE=INNODB; +CREATE TABLE t2 (f INT) ENGINE=INNODB; +CREATE TABLE tm (f INT) ENGINE=Aria; + +# Old (pre-crash) binlog file index initial value. +# It keeps incremented at the end of each case. +--let $binlog_file_index=1 + +--echo # Case A. +# Using 'debug_sync' hold 'query1' execution after 'query1' is flushed and +# synced to binary log but not yet committed. In an another connection hold +# 'query2' execution after 'query2' is flushed and synced to binlog. +# Crash and restart server with --rpl-semi-sync-slave-enabled=1 +# +# During recovery of binary log 'query1' status is checked with InnoDB engine, +# it will be in prepared but not yet commited. All transactions starting from +# 'query1' onwards will be removed from the binary log. +# Show-binlog-events is to prove that. + +--let $truncate_gtid_pos = 0-1-6 +--let $query1 = INSERT INTO t VALUES (20) +--let $query2 = DELETE FROM t2 WHERE f = 0 /* no such record */ +--source binlog_truncate_active_log.inc + +--echo # Case B. +# The inverted sequence ends up to truncate starting from $query2 +--let $truncate_gtid_pos = 0-1-10 +--let $query1 = DELETE FROM t2 WHERE f = 0 +--let $query2 = INSERT INTO t VALUES (20) +--source binlog_truncate_active_log.inc + + +--echo # Case C. +delimiter |; +CREATE PROCEDURE sp_blank_xa() +BEGIN + XA START 'blank'; + DELETE FROM t2 WHERE f = 0 /* no such record */; + XA END 'blank'; + XA PREPARE 'blank'; +END| +delimiter ;| + +# The same as in A with $query2 being the zero-engine XA transaction. +# Both $query1 and $query2 are going to be truncated. +--let $truncate_gtid_pos = 0-1-14 +--let $query1 = INSERT INTO t VALUES (20) +--let $query2 = CALL sp_blank_xa +--source binlog_truncate_active_log.inc + +DROP PROCEDURE sp_blank_xa; + + +--echo # Case D. +delimiter |; +CREATE PROCEDURE sp_xa() +BEGIN + XA START 'xid'; + DELETE FROM t WHERE f = 10; + XA END 'xid'; + XA PREPARE 'xid'; +END| +delimiter ;| + +# The same as in B with $query1 being the prepared XA transaction. +# Truncation must occurs at $query2. +--let $truncate_gtid_pos = 0-1-20 +--let $query1 = CALL sp_xa +--let $query2 = INSERT INTO t2 VALUES (20) +--source binlog_truncate_active_log.inc + +DROP PROCEDURE sp_xa; + + +--echo # Cleanup +DROP TABLE t,t2,tm; + +--echo # End of the tests diff --git a/mysql-test/suite/binlog/t/binlog_truncate_multi_engine.inc b/mysql-test/suite/binlog/t/binlog_truncate_multi_engine.inc new file mode 100644 index 00000000000..52ce4741eaa --- /dev/null +++ b/mysql-test/suite/binlog/t/binlog_truncate_multi_engine.inc @@ -0,0 +1,73 @@ +# +# Invoked by binlog_truncate_multi_engine.test +# Parameters: +# $debug_sync_action describes debug-sync actions +# $kill_server 1 when to crash, 0 for regular restart +# $restart_parameters the caller may simulate partial commit at recovery +# $test_outcome summary of extected results +# $MYSQLD_DATADIR + +--echo # +--echo # +--echo # Case $case : $description +--echo # +RESET MASTER; +FLUSH LOGS; +SET GLOBAL max_binlog_size= 4096; + +connect(con1,localhost,root,,); +--echo List of binary logs before rotation +--source include/show_binary_logs.inc +INSERT INTO t1 VALUES (1, REPEAT("x", 1)); +INSERT INTO t2 VALUES (1, REPEAT("x", 1)); +if (`SELECT $case = "B"`) +{ + --write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +wait-binlog_truncate_multi_engine.test +EOF + + SET GLOBAL debug_dbug="d,enable_log_write_upto_crash"; +} +BEGIN; + INSERT INTO t2 VALUES (2, REPEAT("x", 4100)); + INSERT INTO t1 VALUES (2, REPEAT("x", 4100)); + +if (`SELECT $debug_sync_action != ""`) +{ + --eval SET DEBUG_SYNC= $debug_sync_action +} +send COMMIT; + +--connection default +if (`SELECT $case = "B"`) +{ + --source include/wait_until_disconnected.inc + --source include/start_mysqld.inc +} +if (`SELECT $case != "B"`) +{ + SET DEBUG_SYNC= "now WAIT_FOR con1_ready"; + --echo List of binary logs after rotation + --source include/show_binary_logs.inc + + --echo # restart the server with $restart_parameters + --echo # the server is restarted + --source include/restart_mysqld.inc +} + +--connection default +--echo # +--echo # *** Summary: $test_outcome: +--echo # +SELECT COUNT(*) FROM t1; +SELECT COUNT(*) FROM t2; +SELECT @@GLOBAL.gtid_binlog_state; +SELECT @@GLOBAL.gtid_binlog_pos; +--echo List of binary logs at the end of the tests +--source include/show_binary_logs.inc +--echo # *** +# cleanup +DELETE FROM t1; +DELETE FROM t2; +--disconnect con1 +--echo # diff --git a/mysql-test/suite/binlog/t/binlog_truncate_multi_engine.opt b/mysql-test/suite/binlog/t/binlog_truncate_multi_engine.opt new file mode 100644 index 00000000000..03e7d74f6db --- /dev/null +++ b/mysql-test/suite/binlog/t/binlog_truncate_multi_engine.opt @@ -0,0 +1 @@ +--plugin-load=$HA_ROCKSDB_SO diff --git a/mysql-test/suite/binlog/t/binlog_truncate_multi_engine.test b/mysql-test/suite/binlog/t/binlog_truncate_multi_engine.test new file mode 100644 index 00000000000..f8e32f16f0f --- /dev/null +++ b/mysql-test/suite/binlog/t/binlog_truncate_multi_engine.test @@ -0,0 +1,60 @@ +# ==== Purpose ==== +# +# Test verifies truncation of multiple binary logs with multiple transactional +# storage engines +# +# ==== References ==== +# +# MDEV-21117: recovery for --rpl-semi-sync-slave-enabled server + +--source include/have_rocksdb.inc +--source include/have_innodb.inc +--source include/have_debug.inc +--source include/have_debug_sync.inc +--source include/have_binlog_format_row.inc + +--let $old_max_binlog_size= `select @@global.max_binlog_size` +call mtr.add_suppression("Can.t init tc log"); +call mtr.add_suppression("Aborting"); +--let $MYSQLD_DATADIR= `SELECT @@datadir` + +CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb; +CREATE TABLE t2 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=rocksdb; + +--let $case = "A" +--let $description = "neither engine committed => rollback & binlog truncate" +# Hold off engine commits after write to binlog and its rotation. +# The transaction is killed along with the server after that. +--let $shutdown_timeout=0 +--let $debug_sync_action = "commit_after_release_LOCK_log SIGNAL con1_ready WAIT_FOR signal_no_signal" +--let $restart_parameters = --rpl-semi-sync-slave-enabled=1 +--let $test_outcome= 1 row should be present in both tables; binlog is truncated; number of binlogs at reconnect - 3 +--source binlog_truncate_multi_engine.inc +--echo Proof of the truncated binlog file is readable (two transactions must be seen): +--exec $MYSQL_BINLOG --short-form --skip-annotate-row-events $MYSQLD_DATADIR/master-bin.000002 + +--let $case = "B" +--let $description = "one engine has committed its transaction branch" +# Hold off after one engine has committed. +--let $shutdown_timeout=0 +--let $debug_sync_action = "" +# Both debug_sync and debug-dbug are required to make sure Engines remember the commit state +# debug_sync alone will not help. +--let $restart_parameters = --rpl-semi-sync-slave-enabled=1 +--let $test_outcome= 2 rows should be present in both tables; no binlog truncation; one extra binlog file compare with A; number of binlogs at reconnect - 4 +--source binlog_truncate_multi_engine.inc + +--let $case = "C" +--let $description= "both engines have committed its transaction branch" +--let $debug_sync_action = "commit_after_run_commit_ordered SIGNAL con1_ready WAIT_FOR signal_no_signal" +# Hold off after both engines have committed. The server is shut down. +--let $shutdown_timeout= +--let $restart_parameters = --rpl-semi-sync-slave-enabled=1 +--let $test_outcome= 2 rows should be present in both tables; no binlog truncation; the same # of binlog files as in B; number of binlogs at reconnect - 4 +--source binlog_truncate_multi_engine.inc + + + +DROP TABLE t1, t2; + +--echo # End of the tests diff --git a/mysql-test/suite/binlog/t/binlog_truncate_multi_log.test b/mysql-test/suite/binlog/t/binlog_truncate_multi_log.test new file mode 100644 index 00000000000..4ea7f9a559e --- /dev/null +++ b/mysql-test/suite/binlog/t/binlog_truncate_multi_log.test @@ -0,0 +1,77 @@ +# ==== Purpose ==== +# +# Test verifies truncation of multiple binary logs. +# +# ==== References ==== +# MDEV-21117: recovery for --rpl-semi-sync-slave-enabled server + +--source include/have_innodb.inc +--source include/have_debug_sync.inc +--source include/have_binlog_format_row.inc + +call mtr.add_suppression("Can.t init tc log"); +call mtr.add_suppression("Aborting"); + +SET @@global.max_binlog_size= 4096; + +RESET MASTER; +FLUSH LOGS; +CREATE TABLE ti (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb; +CREATE TABLE tm (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=MyISAM; + +connect(master1,localhost,root,,); +--echo "List of binary logs before rotation" +--source include/show_binary_logs.inc + +# Some load to either non- and transactional egines +# that should not affect the following recovery: +INSERT INTO ti VALUES(1,"I am gonna survive"); +INSERT INTO tm VALUES(1,"me too!"); + +# hold on near engine commit +SET DEBUG_SYNC= "commit_after_release_LOCK_after_binlog_sync SIGNAL master1_ready WAIT_FOR con1_go"; +--send INSERT INTO ti VALUES (2, REPEAT("x", 4100)) + +connect(master2,localhost,root,,); +# The 2nd trx for recovery, it does not rotate binlog +SET DEBUG_SYNC= "now WAIT_FOR master1_ready"; +SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL master2_ready WAIT_FOR master2_go"; +--send INSERT INTO ti VALUES (3, "not gonna survive") + +--connection default +SET DEBUG_SYNC= "now WAIT_FOR master2_ready"; +--echo "List of binary logs before crash" +--source include/show_binary_logs.inc +--echo # The gtid binlog state prior the crash will be truncated at the end of the test +SELECT @@global.gtid_binlog_state; + +--connection default +--source include/kill_mysqld.inc +--disconnect master1 +--disconnect master2 + +# +# Server restart +# +--let $restart_parameters= --rpl-semi-sync-slave-enabled=1 +--source include/start_mysqld.inc + +# Check error log for a successful truncate message. +let $log_error_ = $MYSQLTEST_VARDIR/log/mysqld.1.err; + +--let SEARCH_FILE=$log_error_ +--let SEARCH_PATTERN=truncated binlog file:.*master.*000002 +--source include/search_pattern_in_file.inc + + +--echo "One record should be present in table" +SELECT * FROM ti; + +--echo # The truncated gtid binlog state +SELECT @@global.gtid_binlog_state; +SELECT @@global.gtid_binlog_pos; + +--echo # Cleanup +DROP TABLE ti; + +--echo # End of the tests diff --git a/mysql-test/suite/binlog/t/binlog_truncate_multi_log_unsafe.test b/mysql-test/suite/binlog/t/binlog_truncate_multi_log_unsafe.test new file mode 100644 index 00000000000..04d8619e24e --- /dev/null +++ b/mysql-test/suite/binlog/t/binlog_truncate_multi_log_unsafe.test @@ -0,0 +1,119 @@ +# ==== Purpose ==== +# The test verifies attempt to recover by the semisync slave server whose +# binlog is unsafe for truncation. +# +# ==== Implementation ==== +# 2 binlog files are created with the 1st one destined to be the binlog +# checkpoint file for recovery. +# The final group of events is replication unsafe (myisam INSERT). +# Therefore the semisync slave recovery may not. +# +# Steps: +# 0 - Set max_binlog_size= 4096, to help an insert into a +# transaction table 'ti' get binlog rotated while the +# transaction won't be committed, being stopped at +# a prior to commit debug_sync point +# 1 - insert into a non-transactional 'tm' table completes with +# binary logging as well +# 2 - kill and attempt to restart the server as semisync slave that +# must produce an expected unsafe-to-recover error +# 3 - complete the test with a normal restart that successfully finds and +# commits the transaction in doubt. +# +# ==== References ==== +# +# MDEV-21117: recovery for --rpl-semi-sync-slave-enabled server +# + +--source include/have_innodb.inc +--source include/have_debug_sync.inc +--source include/have_binlog_format_row.inc + +SET @@global.max_binlog_size= 4096; + +call mtr.add_suppression("Table '.*tm' is marked as crashed and should be repaired"); +call mtr.add_suppression("Got an error from unknown thread"); +call mtr.add_suppression("Checking table: '.*tm'"); +call mtr.add_suppression("Recovering table: '.*tm'"); +call mtr.add_suppression("Cannot truncate the binary log to file"); +call mtr.add_suppression("Crash recovery failed"); +call mtr.add_suppression("Can.t init tc log"); +call mtr.add_suppression("Aborting"); +call mtr.add_suppression("Found 1 prepared transactions"); +call mtr.add_suppression("mysqld: Table.*tm.*is marked as crashed"); +call mtr.add_suppression("Checking table.*tm"); + +RESET MASTER; +FLUSH LOGS; +CREATE TABLE ti (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb; +CREATE TABLE tm (f INT) ENGINE=MYISAM; + +--let $row_count = 5 +--let $i = `select $row_count-2` +--disable_query_log +while ($i) +{ + --eval INSERT INTO ti VALUES ($i, REPEAT("x", 1)) + --dec $i +} +--enable_query_log +INSERT INTO tm VALUES(1); + +connect(master1,localhost,root,,); +connect(master2,localhost,root,,); +connect(master3,localhost,root,,); + +--connection master1 + +# The 1st trx binlogs, rotate binlog and hold on before committing at engine +SET DEBUG_SYNC= "commit_after_release_LOCK_after_binlog_sync SIGNAL master1_ready WAIT_FOR master1_go"; +--send_eval INSERT INTO ti VALUES ($row_count - 1, REPEAT("x", 4100)) + +--connection master2 + +# The 2nd trx for recovery, it does not rotate binlog +SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL master2_ready WAIT_FOR master2_go"; +--send_eval INSERT INTO ti VALUES ($row_count, REPEAT("x", 1)) + +--connection master3 +SET DEBUG_SYNC= "now WAIT_FOR master2_ready"; +SET DEBUG_SYNC= "commit_before_get_LOCK_after_binlog_sync SIGNAL master3_ready"; +--send INSERT INTO tm VALUES (2) + +--connection default +SET DEBUG_SYNC= "now WAIT_FOR master3_ready"; +--echo # The gtid binlog state prior the crash must be restored at the end of the test; +SELECT @@global.gtid_binlog_state; +--source include/kill_mysqld.inc + +# +# Server restarts +# +--echo # Failed restart as the semisync slave +--error 1 +--exec $MYSQLD_LAST_CMD --rpl-semi-sync-slave-enabled=1 >> $MYSQLTEST_VARDIR/log/mysqld.1.err 2>&1 + +--echo # Normal restart +--source include/start_mysqld.inc + +# Check error log for correct messages. +let $log_error_ = $MYSQLTEST_VARDIR/log/mysqld.1.err; + +--let SEARCH_FILE=$log_error_ +--let SEARCH_PATTERN=Cannot truncate the binary log to file +--source include/search_pattern_in_file.inc + +--echo # Proof that the in-doubt transactions are recovered by the 2nd normal server restart +--eval SELECT COUNT(*) = $row_count as 'True' FROM ti +# myisam table may require repair (which is not tested here) +--disable_warnings +SELECT COUNT(*) <= 1 FROM tm; +--enable_warnings + +--echo # The gtid binlog state prior the crash is restored now +SELECT @@GLOBAL.gtid_binlog_state; +SELECT @@GLOBAL.gtid_binlog_pos; + +--echo # Cleanup +DROP TABLE ti, tm; +--echo End of test diff --git a/mysql-test/suite/mariabackup/include/have_rocksdb.inc b/mysql-test/suite/mariabackup/include/have_rocksdb.inc deleted file mode 100644 index d59f76f6cf3..00000000000 --- a/mysql-test/suite/mariabackup/include/have_rocksdb.inc +++ /dev/null @@ -1,4 +0,0 @@ -if (`SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.PLUGINS WHERE PLUGIN_NAME = 'rocksdb'`) -{ - --skip Requires rocksdb -} \ No newline at end of file diff --git a/mysql-test/suite/rpl/r/rpl_semi_sync_fail_over.result b/mysql-test/suite/rpl/r/rpl_semi_sync_fail_over.result new file mode 100644 index 00000000000..233f4acbcc0 --- /dev/null +++ b/mysql-test/suite/rpl/r/rpl_semi_sync_fail_over.result @@ -0,0 +1,129 @@ +include/master-slave.inc +[connection master] +connection server_2; +include/stop_slave.inc +connection server_1; +RESET MASTER; +SET @@global.max_binlog_size= 4096; +connection server_2; +RESET MASTER; +SET @@global.max_binlog_size= 4096; +set @@global.rpl_semi_sync_slave_enabled = 1; +set @@global.gtid_slave_pos = ""; +CHANGE MASTER TO master_use_gtid= slave_pos; +include/start_slave.inc +connection server_1; +ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; +set @@global.rpl_semi_sync_master_enabled = 1; +set @@global.rpl_semi_sync_master_wait_point=AFTER_SYNC; +call mtr.add_suppression("Can.t init tc log"); +call mtr.add_suppression("Aborting"); +call mtr.add_suppression("1 client is using or hasn.t closed the table properly"); +call mtr.add_suppression("Table './mtr/test_suppressions' is marked as crashed and should be repaired"); +CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb; +INSERT INTO t1 VALUES (1, 'dummy1'); +connect conn_client,127.0.0.1,root,,test,$SERVER_MYPORT_1,; +SET DEBUG_SYNC= "commit_after_release_LOCK_after_binlog_sync SIGNAL con1_ready WAIT_FOR con1_go"; +INSERT INTO t1 VALUES (2, REPEAT("x", 4100)); +connection server_1; +SET DEBUG_SYNC= "now WAIT_FOR con1_ready"; +# Kill the server +connection server_2; +include/stop_slave.inc +SELECT @@GLOBAL.gtid_current_pos; +@@GLOBAL.gtid_current_pos +0-1-8 +# restart: --rpl-semi-sync-slave-enabled=1 +connection server_1; +FOUND 1 /truncated binlog file:.*master.*000001/ in mysqld.1.err +disconnect conn_client; +connection server_2; +set global rpl_semi_sync_master_enabled = 1; +set global rpl_semi_sync_master_wait_point=AFTER_SYNC; +connection server_1; +CHANGE MASTER TO master_host='127.0.0.1', master_port=$new_master_port, master_user='root', master_use_gtid=SLAVE_POS; +set global rpl_semi_sync_slave_enabled = 1; +set @@global.gtid_slave_pos=@@global.gtid_binlog_pos; +include/start_slave.inc +connection server_2; +INSERT INTO t1 VALUES (3, 'dummy3'); +# The gtid state on current master must be equal to ... +SHOW VARIABLES LIKE 'gtid_binlog_pos'; +Variable_name Value +gtid_binlog_pos 0-2-9 +connection server_1; +SELECT COUNT(*) = 3 as 'true' FROM t1; +true +1 +# ... the gtid states on the slave: +SHOW VARIABLES LIKE 'gtid_slave_pos'; +Variable_name Value +gtid_slave_pos 0-2-9 +SHOW VARIABLES LIKE 'gtid_binlog_pos'; +Variable_name Value +gtid_binlog_pos 0-2-9 +connection server_2; +connect conn_client,127.0.0.1,root,,test,$SERVER_MYPORT_2,; +SET DEBUG_SYNC= "commit_after_release_LOCK_after_binlog_sync SIGNAL con1_ready WAIT_FOR con1_go"; +INSERT INTO t1 VALUES (4, REPEAT("x", 4100)); +connect conn_client_2,127.0.0.1,root,,test,$SERVER_MYPORT_2,; +SET DEBUG_SYNC= "now WAIT_FOR con1_ready"; +SET DEBUG_SYNC= "commit_after_release_LOCK_log SIGNAL con1_ready WAIT_FOR con2_go"; +INSERT INTO t1 VALUES (5, REPEAT("x", 4100)); +connection server_2; +SET DEBUG_SYNC= "now WAIT_FOR con1_ready"; +# Kill the server +connection server_1; +include/stop_slave.inc +SELECT @@GLOBAL.gtid_current_pos; +@@GLOBAL.gtid_current_pos +0-2-11 +# restart: --rpl-semi-sync-slave-enabled=1 +connection server_2; +NOT FOUND /truncated binlog file:.*slave.*000001/ in mysqld.2.err +disconnect conn_client; +connection server_1; +set global rpl_semi_sync_master_enabled = 1; +set global rpl_semi_sync_master_wait_point=AFTER_SYNC; +connection server_2; +CHANGE MASTER TO master_host='127.0.0.1', master_port=$new_master_port, master_user='root', master_use_gtid=SLAVE_POS; +set global rpl_semi_sync_slave_enabled = 1; +set @@global.gtid_slave_pos=@@global.gtid_binlog_pos; +include/start_slave.inc +connection server_1; +INSERT INTO t1 VALUES (6, 'Done'); +# The gtid state on current master must be equal to ... +SHOW VARIABLES LIKE 'gtid_binlog_pos'; +Variable_name Value +gtid_binlog_pos 0-1-12 +connection server_2; +SELECT COUNT(*) = 6 as 'true' FROM t1; +true +1 +# ... the gtid states on the slave: +SHOW VARIABLES LIKE 'gtid_slave_pos'; +Variable_name Value +gtid_slave_pos 0-1-12 +SHOW VARIABLES LIKE 'gtid_binlog_pos'; +Variable_name Value +gtid_binlog_pos 0-1-12 +include/diff_tables.inc [server_1:t1, server_2:t1] +# Cleanup +connection server_1; +DROP TABLE t1; +connection server_2; +include/stop_slave.inc +connection server_1; +set @@global.rpl_semi_sync_master_enabled = 0; +set @@global.rpl_semi_sync_slave_enabled = 0; +set @@global.rpl_semi_sync_master_wait_point=default; +RESET SLAVE; +RESET MASTER; +connection server_2; +set @@global.rpl_semi_sync_master_enabled = 0; +set @@global.rpl_semi_sync_slave_enabled = 0; +set @@global.rpl_semi_sync_master_wait_point=default; +CHANGE MASTER TO master_host='127.0.0.1', master_port=$SERVER_MYPORT_1, master_user='root', master_use_gtid=no; +include/start_slave.inc +connection default; +include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_semi_sync_crash.inc b/mysql-test/suite/rpl/t/rpl_semi_sync_crash.inc new file mode 100644 index 00000000000..1f24c42f680 --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_semi_sync_crash.inc @@ -0,0 +1,77 @@ +if ($failover_to_slave) +{ + --let $server_to_crash=1 + --let $server_to_promote=2 + --let $new_master_port=$SERVER_MYPORT_2 + --let $client_port=$SERVER_MYPORT_1 + + --connect (conn_client,127.0.0.1,root,,test,$SERVER_MYPORT_1,) +} +if (!$failover_to_slave) +{ + --let $server_to_crash=2 + --let $server_to_promote=1 + --let $new_master_port=$SERVER_MYPORT_1 + --let $client_port=$SERVER_MYPORT_2 + + --connect (conn_client,127.0.0.1,root,,test,$SERVER_MYPORT_2,) +} + + +# Hold insert after write to binlog and before "run_commit_ordered" in engine + +SET DEBUG_SYNC= "commit_after_release_LOCK_after_binlog_sync SIGNAL con1_ready WAIT_FOR con1_go"; +--send_eval $query_to_crash + +# complicate recovery with an extra binlog file +if (!$failover_to_slave) +{ + --connect (conn_client_2,127.0.0.1,root,,test,$SERVER_MYPORT_2,) + # use the same signal with $query_to_crash + SET DEBUG_SYNC= "now WAIT_FOR con1_ready"; + SET DEBUG_SYNC= "commit_after_release_LOCK_log SIGNAL con1_ready WAIT_FOR con2_go"; + --send_eval $query2_to_crash +} + +--connection server_$server_to_crash +SET DEBUG_SYNC= "now WAIT_FOR con1_ready"; +--source include/kill_mysqld.inc + +--connection server_$server_to_promote +--error 2003 +--source include/stop_slave.inc +SELECT @@GLOBAL.gtid_current_pos; + +--let $restart_parameters=--rpl-semi-sync-slave-enabled=1 +--let $allow_rpl_inited=1 +--source include/start_mysqld.inc + +--connection server_$server_to_crash +--enable_reconnect +--source include/wait_until_connected_again.inc + +# Check error log for correct messages. +let $log_error_ = $MYSQLTEST_VARDIR/log/mysqld.$server_to_crash.err; +--let SEARCH_FILE=$log_error_ +--let SEARCH_PATTERN=$log_search_pattern +--source include/search_pattern_in_file.inc + +--disconnect conn_client + +# +# FAIL OVER now to new master +# +--connection server_$server_to_promote +set global rpl_semi_sync_master_enabled = 1; +set global rpl_semi_sync_master_wait_point=AFTER_SYNC; + +--connection server_$server_to_crash +--let $master_port=$SERVER_MYPORT_2 +if (`select $server_to_crash = 2`) +{ + --let $master_port=$SERVER_MYPORT_1 +} +evalp CHANGE MASTER TO master_host='127.0.0.1', master_port=$new_master_port, master_user='root', master_use_gtid=SLAVE_POS; +set global rpl_semi_sync_slave_enabled = 1; +set @@global.gtid_slave_pos=@@global.gtid_binlog_pos; +--source include/start_slave.inc diff --git a/mysql-test/suite/rpl/t/rpl_semi_sync_fail_over.cnf b/mysql-test/suite/rpl/t/rpl_semi_sync_fail_over.cnf new file mode 100644 index 00000000000..f8312bdc5b8 --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_semi_sync_fail_over.cnf @@ -0,0 +1,11 @@ +!include suite/rpl/rpl_1slave_base.cnf +!include include/default_client.cnf + + +[mysqld.1] +log-slave-updates +gtid-strict-mode=1 + +[mysqld.2] +log-slave-updates +gtid-strict-mode=1 diff --git a/mysql-test/suite/rpl/t/rpl_semi_sync_fail_over.test b/mysql-test/suite/rpl/t/rpl_semi_sync_fail_over.test new file mode 100644 index 00000000000..2c38cf4da54 --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_semi_sync_fail_over.test @@ -0,0 +1,144 @@ +# ==== Purpose ==== +# +# Test verifies replication failover scenario. +# +# ==== Implementation ==== +# +# Steps: +# 0 - Having two servers 1 and 2 enable semi-sync replication with +# with the master wait 'after_sync'. +# 1 - Insert a row. While inserting second row simulate +# a server crash at once the transaction is written to binlog, flushed +# and synced but the binlog position is not updated. +# 2 - Post crash-recovery on the old master execute there CHANGE MASTER +# TO command to connect to server id 2. +# 3 - The old master new slave server 1 must connect to the new +# master server 2. +# 4 - repeat the above to crash the new master and restore in role the old one +# +# ==== References ==== +# +# MDEV-21117: recovery for --rpl-semi-sync-slave-enabled server + + +--source include/have_innodb.inc +--source include/have_debug_sync.inc +--source include/have_binlog_format_row.inc +--source include/master-slave.inc + +# Initial slave +--connection server_2 +--source include/stop_slave.inc + +# Initial master +--connection server_1 +RESET MASTER; +SET @@global.max_binlog_size= 4096; + +--connection server_2 +RESET MASTER; +SET @@global.max_binlog_size= 4096; +set @@global.rpl_semi_sync_slave_enabled = 1; +set @@global.gtid_slave_pos = ""; +CHANGE MASTER TO master_use_gtid= slave_pos; +--source include/start_slave.inc + + +--connection server_1 +ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; +set @@global.rpl_semi_sync_master_enabled = 1; +set @@global.rpl_semi_sync_master_wait_point=AFTER_SYNC; + +call mtr.add_suppression("Can.t init tc log"); +call mtr.add_suppression("Aborting"); +call mtr.add_suppression("1 client is using or hasn.t closed the table properly"); +call mtr.add_suppression("Table './mtr/test_suppressions' is marked as crashed and should be repaired"); + +CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb; +INSERT INTO t1 VALUES (1, 'dummy1'); + +# +# CRASH the original master, and FAILOVER to the new +# + +# value 1 for server id 1 -> 2 failover +--let $failover_to_slave=1 +--let $query_to_crash= INSERT INTO t1 VALUES (2, REPEAT("x", 4100)) +--let $log_search_pattern=truncated binlog file:.*master.*000001 +--source rpl_semi_sync_crash.inc + +--connection server_2 +--let $rows_so_far=3 +--eval INSERT INTO t1 VALUES ($rows_so_far, 'dummy3') +--save_master_pos +--echo # The gtid state on current master must be equal to ... +SHOW VARIABLES LIKE 'gtid_binlog_pos'; + +--connection server_1 +--sync_with_master +--eval SELECT COUNT(*) = $rows_so_far as 'true' FROM t1 +--echo # ... the gtid states on the slave: +SHOW VARIABLES LIKE 'gtid_slave_pos'; +SHOW VARIABLES LIKE 'gtid_binlog_pos'; + +--connection server_2 +# +# CRASH the new master and FAILOVER back to the original +# + +# value 0 for the reverse server id 2 -> 1 failover +--let $failover_to_slave=0 +--let $query_to_crash = INSERT INTO t1 VALUES (4, REPEAT("x", 4100)) +--let $query2_to_crash= INSERT INTO t1 VALUES (5, REPEAT("x", 4100)) +--let $log_search_pattern=truncated binlog file:.*slave.*000001 +--source rpl_semi_sync_crash.inc + +--connection server_1 +--let $rows_so_far=6 +--eval INSERT INTO t1 VALUES ($rows_so_far, 'Done') +--save_master_pos +--echo # The gtid state on current master must be equal to ... +SHOW VARIABLES LIKE 'gtid_binlog_pos'; + +--connection server_2 +--sync_with_master +--eval SELECT COUNT(*) = $rows_so_far as 'true' FROM t1 +--echo # ... the gtid states on the slave: +SHOW VARIABLES LIKE 'gtid_slave_pos'; +SHOW VARIABLES LIKE 'gtid_binlog_pos'; + + +--let $diff_tables=server_1:t1, server_2:t1 +--source include/diff_tables.inc + +# +--echo # Cleanup +# +--connection server_1 +DROP TABLE t1; +--save_master_pos + +--connection server_2 +--sync_with_master +--source include/stop_slave.inc + +--connection server_1 +set @@global.rpl_semi_sync_master_enabled = 0; +set @@global.rpl_semi_sync_slave_enabled = 0; +set @@global.rpl_semi_sync_master_wait_point=default; +RESET SLAVE; +RESET MASTER; + +--connection server_2 +set @@global.rpl_semi_sync_master_enabled = 0; +set @@global.rpl_semi_sync_slave_enabled = 0; +set @@global.rpl_semi_sync_master_wait_point=default; + +evalp CHANGE MASTER TO master_host='127.0.0.1', master_port=$SERVER_MYPORT_1, master_user='root', master_use_gtid=no; +--source include/start_slave.inc + +connection default; +--enable_reconnect +--source include/wait_until_connected_again.inc + +--source include/rpl_end.inc diff --git a/sql/handler.cc b/sql/handler.cc index fc2c46395c3..78ee18a4542 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -1508,6 +1508,24 @@ uint ha_count_rw_all(THD *thd, Ha_trx_info **ptr_ha_info) return rw_ha_count; } +/* + Returns counted number of + read-write recoverable transaction participants. +*/ +uint ha_count_rw_2pc(THD *thd, bool all) +{ + unsigned rw_ha_count= 0; + THD_TRANS *trans=all ? &thd->transaction->all : &thd->transaction->stmt; + + for (Ha_trx_info * ha_info= trans->ha_list; ha_info; + ha_info= ha_info->next()) + { + if (ha_info->is_trx_read_write() && ha_info->ht()->recover) + ++rw_ha_count; + } + return rw_ha_count; +} + /** Check if we can skip the two-phase commit. @@ -1527,7 +1545,6 @@ uint ha_count_rw_all(THD *thd, Ha_trx_info **ptr_ha_info) engines with read-write changes. */ -static uint ha_check_and_coalesce_trx_read_only(THD *thd, Ha_trx_info *ha_list, bool all) @@ -1978,6 +1995,24 @@ int ha_commit_one_phase(THD *thd, bool all) DBUG_RETURN(res); } +static bool is_ro_1pc_trans(THD *thd, Ha_trx_info *ha_info, bool all, + bool is_real_trans) +{ + uint rw_ha_count= ha_check_and_coalesce_trx_read_only(thd, ha_info, all); + bool rw_trans= is_real_trans && + (rw_ha_count > (thd->is_current_stmt_binlog_disabled()?0U:1U)); + + return !rw_trans; +} + +static bool has_binlog_hton(Ha_trx_info *ha_info) +{ + bool rc; + for (rc= false; ha_info && !rc; ha_info= ha_info->next()) + rc= ha_info->ht() == binlog_hton; + + return rc; +} static int commit_one_phase_2(THD *thd, bool all, THD_TRANS *trans, bool is_real_trans) @@ -1991,9 +2026,17 @@ commit_one_phase_2(THD *thd, bool all, THD_TRANS *trans, bool is_real_trans) if (ha_info) { + int err; + + if (has_binlog_hton(ha_info) && + (err= binlog_commit(thd, all, + is_ro_1pc_trans(thd, ha_info, all, is_real_trans)))) + { + my_error(ER_ERROR_DURING_COMMIT, MYF(0), err); + error= 1; + } for (; ha_info; ha_info= ha_info_next) { - int err; handlerton *ht= ha_info->ht(); if ((err= ht->commit(ht, thd, all))) { @@ -2219,6 +2262,15 @@ int ha_commit_or_rollback_by_xid(XID *xid, bool commit) xaop.xid= xid; xaop.result= 1; + /* + When the binlogging service is enabled complete the transaction + by it first. + */ + if (commit) + binlog_commit_by_xid(binlog_hton, xid); + else + binlog_rollback_by_xid(binlog_hton, xid); + plugin_foreach(NULL, commit ? xacommit_handlerton : xarollback_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, &xaop); @@ -2314,7 +2366,7 @@ static my_xid wsrep_order_and_check_continuity(XID *list, int len) recover() step of xa. @note - there are three modes of operation: + there are four modes of operation: - automatic recover after a crash in this case commit_list != 0, tc_heuristic_recover==0 all xids from commit_list are committed, others are rolled back @@ -2325,6 +2377,9 @@ static my_xid wsrep_order_and_check_continuity(XID *list, int len) - no recovery (MySQL did not detect a crash) in this case commit_list==0, tc_heuristic_recover == 0 there should be no prepared transactions in this case. + - automatic recovery for the semisync slave server: uncommitted + transactions are rolled back and when they are in binlog it gets + truncated to the first uncommitted transaction start offset. */ struct xarecover_st { @@ -2332,8 +2387,181 @@ struct xarecover_st XID *list; HASH *commit_list; bool dry_run; + MEM_ROOT *mem_root; + bool error; }; +/** + Inserts a new hash member. + + returns a successfully created and inserted @c xid_recovery_member + into hash @c hash_arg, + or NULL. +*/ +static xid_recovery_member* +xid_member_insert(HASH *hash_arg, my_xid xid_arg, MEM_ROOT *ptr_mem_root) +{ + xid_recovery_member *member= (xid_recovery_member*) + alloc_root(ptr_mem_root, sizeof(xid_recovery_member)); + if (!member) + return NULL; + + member->xid= xid_arg; + member->in_engine_prepare= 1; + member->decided_to_commit= false; + + return my_hash_insert(hash_arg, (uchar*) member) ? NULL : member; +} + +/* + Inserts a new or updates an existing hash member to increment + the member's prepare counter. + + returns false on success, + true otherwise. +*/ +static bool xid_member_replace(HASH *hash_arg, my_xid xid_arg, + MEM_ROOT *ptr_mem_root) +{ + xid_recovery_member* member; + if ((member= (xid_recovery_member *) + my_hash_search(hash_arg, (uchar *)& xid_arg, sizeof(xid_arg)))) + member->in_engine_prepare++; + else + member= xid_member_insert(hash_arg, xid_arg, ptr_mem_root); + + return member == NULL; +} + +/* + A "transport" type for recovery completion with ha_recover_complete() +*/ +struct xarecover_complete_arg +{ + xid_recovery_member* member; + Binlog_offset *binlog_coord; + uint count; +}; + +/* + Flagged to commit member confirms to get committed. + Otherwise when + A. ptr_commit_max is NULL (implies the normal recovery), or + B. it's not NULL (can only be so in the semisync slave case) + and the value referenced is not greater than the member's coordinate + the decision is to rollback. + When both A,B do not hold - which is the semisync slave recovery + case - the decision is to commit. + + Returns true as commmit decision + false as rollback one +*/ +static bool xarecover_decide_to_commit(xid_recovery_member* member, + Binlog_offset *ptr_commit_max) +{ + return + member->decided_to_commit ? true : + !ptr_commit_max ? false : + (member->binlog_coord < *ptr_commit_max ? // semisync slave recovery + true : false); +} + +/* + Helper function for xarecover_do_commit_or_rollback_handlerton. + For a given hton decides what to do with a xid passed in the 2nd arg + and carries out the decision. +*/ +static void xarecover_do_commit_or_rollback(handlerton *hton, + xarecover_complete_arg *arg) +{ + xid_t x; + my_bool rc; + xid_recovery_member *member= arg->member; + Binlog_offset *ptr_commit_max= arg->binlog_coord; + + x.set(member->xid); + + rc= xarecover_decide_to_commit(member, ptr_commit_max) ? + hton->commit_by_xid(hton, &x) : hton->rollback_by_xid(hton, &x); + + /* + It's fine to have non-zero rc which would be from transaction + non-participant hton:s. + */ + DBUG_ASSERT(rc || member->in_engine_prepare > 0); + + if (!rc) + { + /* + This block relies on Engine to report XAER_NOTA at + "complete"_by_xid for unknown xid. + */ + member->in_engine_prepare--; + if (global_system_variables.log_warnings > 2) + sql_print_information("%s transaction with xid %llu", + member->decided_to_commit ? "Committed" : + "Rolled back", (ulonglong) member->xid); + } +} + +/* + Per hton recovery decider function. +*/ +static my_bool xarecover_do_commit_or_rollback_handlerton(THD *unused, + plugin_ref plugin, + void *arg) +{ + handlerton *hton= plugin_hton(plugin); + + if (hton->recover) + { + xarecover_do_commit_or_rollback(hton, (xarecover_complete_arg *) arg); + } + + return FALSE; +} + +/* + Completes binlog recovery for an input xid in the passed + member_arg to invoke decider functions for each handlerton. + + Returns always FALSE. +*/ +static my_bool xarecover_complete_and_count(void *member_arg, + void *param_arg) +{ + xid_recovery_member *member= (xid_recovery_member*) member_arg; + xarecover_complete_arg *complete_params= + (xarecover_complete_arg*) param_arg; + complete_params->member= member; + + (void) plugin_foreach(NULL, xarecover_do_commit_or_rollback_handlerton, + MYSQL_STORAGE_ENGINE_PLUGIN, complete_params); + + if (member->in_engine_prepare) + { + complete_params->count++; + if (global_system_variables.log_warnings > 2) + sql_print_warning("Found prepared transaction with xid %llu", + (ulonglong) member->xid); + } + + return false; +} + +/* + Completes binlog recovery to invoke decider functions for + each xid. + Returns the number of transactions remained doubtful. +*/ +uint ha_recover_complete(HASH *commit_list, Binlog_offset *coord) +{ + xarecover_complete_arg complete= { NULL, coord, 0 }; + (void) my_hash_iterate(commit_list, xarecover_complete_and_count, &complete); + + return complete.count; +} + static my_bool xarecover_handlerton(THD *unused, plugin_ref plugin, void *arg) { @@ -2395,7 +2623,20 @@ static my_bool xarecover_handlerton(THD *unused, plugin_ref plugin, info->found_my_xids++; continue; } - // recovery mode + + /* + Regular and semisync slave server recovery only collects + xids to make decisions on them later by the caller. + */ + if (info->mem_root) + { + if (xid_member_replace(info->commit_list, x, info->mem_root)) + { + info->error= true; + sql_print_error("Error in memory allocation at xarecover_handlerton"); + break; + } + } else if (IF_WSREP((wsrep_emulate_bin_log && wsrep_is_wsrep_xid(info->list + i) && x <= wsrep_limit), false) || @@ -2431,7 +2672,7 @@ static my_bool xarecover_handlerton(THD *unused, plugin_ref plugin, return FALSE; } -int ha_recover(HASH *commit_list) +int ha_recover(HASH *commit_list, MEM_ROOT *arg_mem_root) { struct xarecover_st info; DBUG_ENTER("ha_recover"); @@ -2439,6 +2680,8 @@ int ha_recover(HASH *commit_list) info.commit_list= commit_list; info.dry_run= (info.commit_list==0 && tc_heuristic_recover==0); info.list= NULL; + info.mem_root= arg_mem_root; + info.error= false; /* commit_list and tc_heuristic_recover cannot be set both */ DBUG_ASSERT(info.commit_list==0 || tc_heuristic_recover==0); @@ -2483,6 +2726,9 @@ int ha_recover(HASH *commit_list) info.found_my_xids, opt_tc_log_file); DBUG_RETURN(1); } + if (info.error) + DBUG_RETURN(1); + if (info.commit_list) sql_print_information("Crash table recovery finished."); DBUG_RETURN(0); diff --git a/sql/handler.h b/sql/handler.h index a7c455ae7c9..d2bb514f5cf 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -44,6 +44,7 @@ #include #include "sql_sequence.h" #include "mem_root_array.h" +#include // pair class Alter_info; class Virtual_column_info; @@ -931,6 +932,32 @@ struct xid_t { }; typedef struct xid_t XID; +/* + Enumerates a sequence in the order of + their creation that is in the top-down order of the index file. + Ranges from zero through MAX_binlog_id. + Not confuse the value with the binlog file numerical suffix, + neither with the binlog file line in the binlog index file. +*/ +typedef uint Binlog_file_id; +const Binlog_file_id MAX_binlog_id= UINT_MAX; +/* + Compound binlog-id and byte offset of transaction's first event + in a sequence (e.g the recovery sequence) of binlog files. + Binlog_offset(0,0) is the minimum value to mean + the first byte of the first binlog file. +*/ +typedef std::pair Binlog_offset; + +/* binlog-based recovery transaction descriptor */ +struct xid_recovery_member +{ + my_xid xid; + uint in_engine_prepare; // number of engines that have xid prepared + bool decided_to_commit; + Binlog_offset binlog_coord; // semisync recovery binlog offset +}; + /* for recover() handlerton call */ #define MIN_XID_LIST_SIZE 128 #define MAX_XID_LIST_SIZE (1024*128) @@ -5320,7 +5347,8 @@ int ha_commit_one_phase(THD *thd, bool all); int ha_commit_trans(THD *thd, bool all); int ha_rollback_trans(THD *thd, bool all); int ha_prepare(THD *thd); -int ha_recover(HASH *commit_list); +int ha_recover(HASH *commit_list, MEM_ROOT *mem_root= NULL); +uint ha_recover_complete(HASH *commit_list, Binlog_offset *coord= NULL); /* transactions: these functions never call handlerton functions directly */ int ha_enable_transaction(THD *thd, bool on); @@ -5448,4 +5476,8 @@ int del_global_index_stat(THD *thd, TABLE* table, KEY* key_info); int del_global_table_stat(THD *thd, const LEX_CSTRING *db, const LEX_CSTRING *table); uint ha_count_rw_all(THD *thd, Ha_trx_info **ptr_ha_info); bool non_existing_table_error(int error); +uint ha_count_rw_2pc(THD *thd, bool all); +uint ha_check_and_coalesce_trx_read_only(THD *thd, Ha_trx_info *ha_list, + bool all); + #endif /* HANDLER_INCLUDED */ diff --git a/sql/log.cc b/sql/log.cc index f1fef44e05f..047d644e5da 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -63,6 +63,12 @@ #include "wsrep_trans_observer.h" #endif /* WITH_WSREP */ +#ifdef HAVE_REPLICATION +#include "semisync_master.h" +#include "semisync_slave.h" +#include // pair +#endif + /* max size of the log message */ #define MAX_LOG_BUFFER_SIZE 1024 #define MAX_TIME_SIZE 32 @@ -88,16 +94,12 @@ static int binlog_savepoint_set(handlerton *hton, THD *thd, void *sv); static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv); static bool binlog_savepoint_rollback_can_release_mdl(handlerton *hton, THD *thd); -static int binlog_commit(handlerton *hton, THD *thd, bool all); static int binlog_rollback(handlerton *hton, THD *thd, bool all); static int binlog_prepare(handlerton *hton, THD *thd, bool all); -static int binlog_xa_recover_dummy(handlerton *hton, XID *xid_list, uint len); -static int binlog_commit_by_xid(handlerton *hton, XID *xid); -static int binlog_rollback_by_xid(handlerton *hton, XID *xid); static int binlog_start_consistent_snapshot(handlerton *hton, THD *thd); static int binlog_flush_cache(THD *thd, binlog_cache_mngr *cache_mngr, Log_event *end_ev, bool all, bool using_stmt, - bool using_trx); + bool using_trx, bool is_ro_1pc); static const LEX_CSTRING write_error_msg= { STRING_WITH_LEN("error writing to the binary log") }; @@ -1693,17 +1695,13 @@ int binlog_init(void *p) binlog_hton->savepoint_rollback= binlog_savepoint_rollback; binlog_hton->savepoint_rollback_can_release_mdl= binlog_savepoint_rollback_can_release_mdl; - binlog_hton->commit= binlog_commit; + binlog_hton->commit= [](handlerton *, THD *thd, bool all) { return 0; }; binlog_hton->rollback= binlog_rollback; binlog_hton->drop_table= [](handlerton *, const char*) { return -1; }; if (WSREP_ON || opt_bin_log) { binlog_hton->prepare= binlog_prepare; binlog_hton->start_consistent_snapshot= binlog_start_consistent_snapshot; - binlog_hton->commit_by_xid= binlog_commit_by_xid; - binlog_hton->rollback_by_xid= binlog_rollback_by_xid; - // recover needs to be set to make xa{commit,rollback}_handlerton effective - binlog_hton->recover= binlog_xa_recover_dummy; } binlog_hton->flags= HTON_NOT_USER_SELECTABLE | HTON_HIDDEN | HTON_NO_ROLLBACK; return 0; @@ -1770,7 +1768,7 @@ static int binlog_close_connection(handlerton *hton, THD *thd) static int binlog_flush_cache(THD *thd, binlog_cache_mngr *cache_mngr, Log_event *end_ev, bool all, bool using_stmt, - bool using_trx) + bool using_trx, bool is_ro_1pc= false) { int error= 0; DBUG_ENTER("binlog_flush_cache"); @@ -1797,7 +1795,8 @@ binlog_flush_cache(THD *thd, binlog_cache_mngr *cache_mngr, */ error= mysql_bin_log.write_transaction_to_binlog(thd, cache_mngr, end_ev, all, - using_stmt, using_trx); + using_stmt, using_trx, + is_ro_1pc); } else { @@ -1871,7 +1870,8 @@ inline size_t serialize_with_xid(XID *xid, char *buf, nonzero if an error pops up when flushing the cache. */ static inline int -binlog_commit_flush_trx_cache(THD *thd, bool all, binlog_cache_mngr *cache_mngr) +binlog_commit_flush_trx_cache(THD *thd, bool all, binlog_cache_mngr *cache_mngr, + bool ro_1pc) { DBUG_ENTER("binlog_commit_flush_trx_cache"); @@ -1892,7 +1892,7 @@ binlog_commit_flush_trx_cache(THD *thd, bool all, binlog_cache_mngr *cache_mngr) } Query_log_event end_evt(thd, buf, buflen, TRUE, TRUE, TRUE, 0); - DBUG_RETURN(binlog_flush_cache(thd, cache_mngr, &end_evt, all, FALSE, TRUE)); + DBUG_RETURN(binlog_flush_cache(thd, cache_mngr, &end_evt, all, FALSE, TRUE, ro_1pc)); } @@ -2011,35 +2011,30 @@ inline bool is_preparing_xa(THD *thd) static int binlog_prepare(handlerton *hton, THD *thd, bool all) { /* Do nothing unless the transaction is a user XA. */ - return is_preparing_xa(thd) ? binlog_commit(NULL, thd, all) : 0; + return is_preparing_xa(thd) ? binlog_commit(thd, all, FALSE) : 0; } -static int binlog_xa_recover_dummy(handlerton *hton __attribute__((unused)), - XID *xid_list __attribute__((unused)), - uint len __attribute__((unused))) -{ - /* Does nothing. */ - return 0; -} - - -static int binlog_commit_by_xid(handlerton *hton, XID *xid) +int binlog_commit_by_xid(handlerton *hton, XID *xid) { THD *thd= current_thd; + if (thd->is_current_stmt_binlog_disabled()) + return 0; (void) thd->binlog_setup_trx_data(); DBUG_ASSERT(thd->lex->sql_command == SQLCOM_XA_COMMIT); - return binlog_commit(hton, thd, TRUE); + return binlog_commit(thd, TRUE, FALSE); } -static int binlog_rollback_by_xid(handlerton *hton, XID *xid) +int binlog_rollback_by_xid(handlerton *hton, XID *xid) { THD *thd= current_thd; + if (thd->is_current_stmt_binlog_disabled()) + return 0; (void) thd->binlog_setup_trx_data(); DBUG_ASSERT(thd->lex->sql_command == SQLCOM_XA_ROLLBACK || @@ -2123,20 +2118,17 @@ static int binlog_commit_flush_xa_prepare(THD *thd, bool all, return (binlog_flush_cache(thd, cache_mngr, &end_evt, all, TRUE, TRUE)); } - /** This function is called once after each statement. It has the responsibility to flush the caches to the binary log on commits. - @param hton The binlog handlerton. @param thd The client thread that executes the transaction. @param all This is @c true if this is a real transaction commit, and @false otherwise. - - @see handlerton::commit + @param ro_1pc read-only one-phase commit transaction */ -static int binlog_commit(handlerton *hton, THD *thd, bool all) +int binlog_commit(THD *thd, bool all, bool ro_1pc) { int error= 0; PSI_stage_info org_stage; @@ -2168,7 +2160,6 @@ static int binlog_commit(handlerton *hton, THD *thd, bool all) YESNO(thd->transaction->all.modified_non_trans_table), YESNO(thd->transaction->stmt.modified_non_trans_table))); - thd->backup_stage(&org_stage); THD_STAGE_INFO(thd, stage_binlog_write); if (!cache_mngr->stmt_cache.empty()) @@ -2197,8 +2188,9 @@ static int binlog_commit(handlerton *hton, THD *thd, bool all) { error= is_preparing_xa(thd) ? binlog_commit_flush_xa_prepare(thd, all, cache_mngr) : - binlog_commit_flush_trx_cache (thd, all, cache_mngr); + binlog_commit_flush_trx_cache (thd, all, cache_mngr, ro_1pc); } + /* This is part of the stmt rollback. */ @@ -6228,7 +6220,8 @@ MYSQL_BIN_LOG::flush_and_set_pending_rows_event(THD *thd, bool MYSQL_BIN_LOG::write_gtid_event(THD *thd, bool standalone, - bool is_transactional, uint64 commit_id) + bool is_transactional, uint64 commit_id, + bool has_xid, bool is_ro_1pc) { rpl_gtid gtid; uint32 domain_id; @@ -6281,7 +6274,7 @@ MYSQL_BIN_LOG::write_gtid_event(THD *thd, bool standalone, Gtid_log_event gtid_event(thd, seq_no, domain_id, standalone, LOG_EVENT_SUPPRESS_USE_F, is_transactional, - commit_id); + commit_id, has_xid, is_ro_1pc); /* Write the event to the binary log. */ DBUG_ASSERT(this == &mysql_bin_log); @@ -7610,7 +7603,8 @@ MYSQL_BIN_LOG::write_transaction_to_binlog(THD *thd, binlog_cache_mngr *cache_mngr, Log_event *end_ev, bool all, bool using_stmt_cache, - bool using_trx_cache) + bool using_trx_cache, + bool is_ro_1pc) { group_commit_entry entry; Ha_trx_info *ha_info; @@ -7639,6 +7633,7 @@ MYSQL_BIN_LOG::write_transaction_to_binlog(THD *thd, entry.using_trx_cache= using_trx_cache; entry.need_unlog= is_preparing_xa(thd); ha_info= all ? thd->transaction->all.ha_list : thd->transaction->stmt.ha_list; + entry.ro_1pc= is_ro_1pc; for (; !entry.need_unlog && ha_info; ha_info= ha_info->next()) { @@ -8513,10 +8508,13 @@ MYSQL_BIN_LOG::write_transaction_or_stmt(group_commit_entry *entry, uint64 commit_id) { binlog_cache_mngr *mngr= entry->cache_mngr; + bool has_xid= entry->end_event->get_type_code() == XID_EVENT; + DBUG_ENTER("MYSQL_BIN_LOG::write_transaction_or_stmt"); if (write_gtid_event(entry->thd, is_prepared_xa(entry->thd), - entry->using_trx_cache, commit_id)) + entry->using_trx_cache, commit_id, + has_xid, entry->ro_1pc)) DBUG_RETURN(ER_ERROR_ON_WRITE); if (entry->using_stmt_cache && !mngr->stmt_cache.empty() && @@ -9247,6 +9245,11 @@ TC_LOG::run_commit_ordered(THD *thd, bool all) if (!ht->commit_ordered) continue; ht->commit_ordered(ht, thd, all); + DBUG_EXECUTE_IF("enable_log_write_upto_crash", + { + DBUG_SET_INITIAL("+d,crash_after_log_write_upto"); + sleep(1000); + }); DEBUG_SYNC(thd, "commit_after_run_commit_ordered"); } } @@ -9979,6 +9982,151 @@ int TC_LOG::using_heuristic_recover() /****** transaction coordinator log for 2pc - binlog() based solution ******/ #define TC_LOG_BINLOG MYSQL_BIN_LOG +/** + Truncates the current binlog to specified position. Removes the rest of binlogs + which are present after this binlog file. + + @param truncate_file Holds the binlog name to be truncated + @param truncate_pos Position within binlog from where it needs to + truncated. + + @retval true ok + @retval false error + +*/ +bool MYSQL_BIN_LOG::truncate_and_remove_binlogs(const char *file_name, + my_off_t pos, + rpl_gtid *ptr_gtid) +{ + int error= 0; +#ifdef HAVE_REPLICATION + LOG_INFO log_info; + THD *thd= current_thd; + my_off_t index_file_offset= 0; + File file= -1; + MY_STAT s; + my_off_t old_size; + + if ((error= find_log_pos(&log_info, file_name, 1))) + { + sql_print_error("Failed to locate binary log file:%s." + "Error:%d", file_name, error); + goto end; + } + + while (!(error= find_next_log(&log_info, 1))) + { + if (!index_file_offset) + { + index_file_offset= log_info.index_file_start_offset; + if ((error= open_purge_index_file(TRUE))) + { + sql_print_error("Failed to open purge index " + "file:%s. Error:%d", purge_index_file_name, error); + goto end; + } + } + if ((error= register_purge_index_entry(log_info.log_file_name))) + { + sql_print_error("Failed to copy %s to purge index" + " file. Error:%d", log_info.log_file_name, error); + goto end; + } + } + + if (error != LOG_INFO_EOF) + { + sql_print_error("Failed to find the next binlog to " + "add to purge index register. Error:%d", error); + goto end; + } + + if (is_inited_purge_index_file()) + { + if (!index_file_offset) + index_file_offset= log_info.index_file_start_offset; + + if ((error= sync_purge_index_file())) + { + sql_print_error("Failed to flush purge index " + "file. Error:%d", error); + goto end; + } + + // Trim index file + error= mysql_file_chsize(index_file.file, index_file_offset, '\n', + MYF(MY_WME)); + if (!error) + error= mysql_file_sync(index_file.file, MYF(MY_WME|MY_SYNC_FILESIZE)); + if (error) + { + sql_print_error("Failed to truncate binlog index " + "file:%s to offset:%llu. Error:%d", index_file_name, + index_file_offset, error); + goto end; + } + + /* Reset data in old index cache */ + if ((error= reinit_io_cache(&index_file, READ_CACHE, (my_off_t) 0, 0, 1))) + { + sql_print_error("Failed to reinit binlog index " + "file. Error:%d", error); + goto end; + } + + /* Read each entry from purge_index_file and delete the file. */ + if ((error= purge_index_entry(thd, NULL, TRUE))) + { + sql_print_error("Failed to process registered " + "files that would be purged."); + goto end; + } + } + + DBUG_ASSERT(pos); + + if ((file= mysql_file_open(key_file_binlog, file_name, + O_RDWR | O_BINARY, MYF(MY_WME))) < 0) + { + error= 1; + sql_print_error("Failed to open binlog file:%s for " + "truncation.", file_name); + goto end; + } + my_stat(file_name, &s, MYF(0)); + old_size= s.st_size; + clear_inuse_flag_when_closing(file); + /* Change binlog file size to truncate_pos */ + error= mysql_file_chsize(file, pos, 0, MYF(MY_WME)); + if (!error) + error= mysql_file_sync(file, MYF(MY_WME|MY_SYNC_FILESIZE)); + if (error) + { + sql_print_error("Failed to truncate the " + "binlog file:%s to size:%llu. Error:%d", + file_name, pos, error); + goto end; + } + else + { + char buf[21]; + longlong10_to_str(ptr_gtid->seq_no, buf, 10); + sql_print_information("Successfully truncated binlog file:%s " + "from previous file size %llu " + "to pos:%llu to remove transactions starting from " + "GTID %u-%u-%s", + file_name, old_size, pos, + ptr_gtid->domain_id, ptr_gtid->server_id, buf); + } + +end: + if (file >= 0) + mysql_file_close(file, MYF(MY_WME)); + + error= error || close_purge_index_file(); +#endif + return error > 0; +} int TC_LOG_BINLOG::open(const char *opt_name) { int error= 1; @@ -10454,7 +10602,520 @@ start_binlog_background_thread() return 0; } +#ifdef HAVE_REPLICATION +class Recovery_context +{ +public: + my_off_t prev_event_pos; + rpl_gtid last_gtid; + bool last_gtid_standalone; + bool last_gtid_valid; + bool last_gtid_no2pc; // true when the group does not end with Xid event + uint last_gtid_engines; + Binlog_offset last_gtid_coord; // + /* + When true, it's semisync slave recovery mode + rolls back transactions in doubt and wipes them off from binlog. + The rest of declarations deal with this type of recovery. + */ + bool do_truncate; + /* + transaction-in-doubt's gtid:s. `truncate_gtid` is the ultimate value, + if it's non-zero truncation is taking place to start from it. + Its value gets refined throughout binlog scanning conducted with at most + 2 rounds. + When an estimate is done in the 1st round of 2-round recovery its value + gets memorized for possible adoption as the ultimate `truncate_gtid`. + */ + rpl_gtid truncate_gtid, truncate_gtid_1st_round; + /* + the last non-transactional group that is located in binlog + behind truncate_gtid. + */ + rpl_gtid binlog_unsafe_gtid; + char binlog_truncate_file_name[FN_REFLEN] ; + char binlog_unsafe_file_name[FN_REFLEN] ; + /* + When do_truncate is true, the truncate position may not be + found in one round when recovered transactions are multi-engine + or just on different engines. + In the single recoverable engine case `truncate_reset_done` and + therefore `truncate_validated` remains `false` when the last + binlog is the binlog-checkpoint one. + The meaning of `truncate_reset_done` is according to the following example: + Let round = 1, Binlog contains the sequence of replication event groups: + [g1, G2, g3] + where `G` (in capital) stands for committed, `g` for prepared. + g1 is first set as truncation candidate, then G2 reset it to indicate + the actual truncation is behind (to the right of) it. + `truncate_validated` is set to true when `binlog_truncate_pos` (as of `g3`) + won't change. + Observe last_gtid_valid is affected, so in the above example `g1` that + was initially ignored for the gtid binlog state now seeing `G2` + would have to be added to it. See gtid_maybe_to_truncate. + */ + bool truncate_validated; // trued when the truncate position settled + bool truncate_reset_done; // trued when the position is to reevaluate + /* Flags the fact of truncate position estimation is done the 1st round */ + bool truncate_set_in_1st; + /* + Monotonically indexes binlog files in the recovery list. + When the list is "likely" singleton the value is UINT_MAX. + Otherwise enumeration starts with zero for the first file, increments + by one for any next file except for the last file in the list, which + is also the initial binlog file for recovery, + that is enumberated with UINT_MAX. + */ + Binlog_file_id id_binlog; + enum_binlog_checksum_alg checksum_alg; + Binlog_offset binlog_truncate_coord, + binlog_truncate_coord_1st_round; // pair is similar to truncate_gtid + Binlog_offset binlog_unsafe_coord; + /* + Populated at decide_or_assess() with gtid-in-doubt whose + binlog offset greater of equal by that of the current gtid truncate + candidate. + Gets empited by reset_truncate_coord into gtid binlog state. + */ + Dynamic_array *gtid_maybe_to_truncate; + Recovery_context(); + ~Recovery_context() { delete gtid_maybe_to_truncate; } + /* + Completes the recovery procedure. + In the normal case prepared xids gets committed when they also found + in binlog, otherwise they are rolled back. + In the semisync slave case the xids that are located in binlog in + a truncated tail get rolled back, otherwise they are committed. + Both decisions are contingent on safety to truncate. + */ + bool complete(MYSQL_BIN_LOG *log, HASH &xids); + /* + decides on commit of xid passed through member argument. + In the semisync slave case it assigns binlog coordinate to + any xid that remains in-doubt. Decision on them will be + done after binlog scan rounds. + */ + bool decide_or_assess(xid_recovery_member *member, int round, + Format_description_log_event *fdle, + LOG_INFO *linfo, my_off_t pos); + + /* + Assigns last_gtid and assesses the maximum (in the binlog offset term) + unsafe gtid (group of events). + */ + void process_gtid(int round, Gtid_log_event *gev, LOG_INFO *linfo); + + /* + Compute next action at the end of processing of the current binlog file. + It may increment the round. + When the round turns in the semisync-slave recovery + binlog_id, truncate_validated, truncate_reset_done + gets reset/set for the next round. + Within the 2nd round id_binlog keeps incrementing. + + Passed arguments: + round the current round that *may* be increment here + last_log_name the recovery starting binlog file + binlog_checkpoint_name + binlog checkpoint file + linfo binlog file list struct for next file + log pointer to mysql_bin_log instance + + Returns: 0 when rounds continue, maybe the current one remains + 1 when all rounds are done + */ + int next_binlog_or_round(int& round, + const char *last_log_name, + const char *binlog_checkpoint_name, + LOG_INFO *linfo, MYSQL_BIN_LOG *log); + /* + Relates to the semisync recovery. + Returns true when truncated tail does not contain non-transactional + group of events. + Otherwise returns false. + */ + bool is_safe_to_truncate() + { + return !do_truncate ? true : + (truncate_gtid.seq_no == 0 || // no truncate + binlog_unsafe_coord < binlog_truncate_coord); // or unsafe is earlier + } + + /* + Relates to the semisync recovery. + Is invoked when a standalone or non-2pc group is detected. + Both are unsafe to truncate in the semisync-slave recovery so + the maximum unsafe coordinate may be updated. + In the non-2pc group case though, *exeptionally*, + the no-engine group is considered safe, to be invalidated + to not contribute to binlog state. + */ + void update_binlog_unsafe_coord_if_needed(LOG_INFO *linfo); + + /* + Relates to the semisync recovery. + Is called when a committed or decided to-commit transaction is detected. + Actions: + truncate_gtid then is set to "nil" as indicated by rpl_gtid::seq_no := 0. + truncate_reset_done takes a note of that fact. + binlog_truncate_coord gets reset to the current gtid offset merely to + "suggest" any potential future truncate gtid must have a greater offset. + gtid_maybe_to_truncate gets emptied into gtid binlog state. + + Returns: + false on success, otherwise + true when OOM at rpl_global_gtid_binlog_state insert + */ + bool reset_truncate_coord(my_off_t pos); + + /* + Sets binlog_truncate_pos to the value of the current transaction's gtid. + In multi-engine case that might be just an assessment to be refined + in the current round and confirmed in a next one. + gtid_maybe_to_truncate receives the current gtid as a new element. + Returns + false on success, otherwise + true when OOM at gtid_maybe_to_truncate append + + */ + bool set_truncate_coord(LOG_INFO *linfo, int round, + enum_binlog_checksum_alg fd_checksum_alg); +}; + +bool Recovery_context::complete(MYSQL_BIN_LOG *log, HASH &xids) +{ + if (!do_truncate || is_safe_to_truncate()) + { + uint count_in_prepare= + ha_recover_complete(&xids, + !do_truncate ? NULL : + (truncate_gtid.seq_no > 0 ? + &binlog_truncate_coord : &last_gtid_coord)); + + if (count_in_prepare > 0 && global_system_variables.log_warnings > 2) + { + sql_print_warning("Could not complete %u number of transactions.", + count_in_prepare); + return false; // there's later dry run ha_recover() to error out + } + } + + /* Truncation is not done when there's no transaction to roll back */ + if (do_truncate && truncate_gtid.seq_no > 0) + { + if (is_safe_to_truncate()) + { + if (log->truncate_and_remove_binlogs(binlog_truncate_file_name, + binlog_truncate_coord.second, + &truncate_gtid)) + { + sql_print_error("Failed to truncate the binary log to " + "file:%s pos:%llu.", binlog_truncate_file_name, + binlog_truncate_coord.second); + return true; + } + } + else + { + sql_print_error("Cannot truncate the binary log to file:%s " + "pos:%llu as unsafe statement " + "is found at file:%s pos:%llu which is " + "beyond the truncation position;" + "all transactions in doubt are left intact. ", + binlog_truncate_file_name, binlog_truncate_coord.second, + binlog_unsafe_file_name, binlog_unsafe_coord.second); + return true; + } + } + + return false; +} + +Recovery_context::Recovery_context() : + prev_event_pos(0), + last_gtid_standalone(false), last_gtid_valid(false), last_gtid_no2pc(false), + last_gtid_engines(0), + do_truncate(rpl_semi_sync_slave_enabled), + truncate_validated(false), truncate_reset_done(false), + truncate_set_in_1st(false), id_binlog(MAX_binlog_id), + checksum_alg(BINLOG_CHECKSUM_ALG_UNDEF), gtid_maybe_to_truncate(NULL) +{ + last_gtid_coord= Binlog_offset(0,0); + binlog_truncate_coord= binlog_truncate_coord_1st_round= Binlog_offset(0,0); + binlog_unsafe_coord= Binlog_offset(0,0); + binlog_truncate_file_name[0]= 0; + binlog_unsafe_file_name [0]= 0; + binlog_unsafe_gtid= truncate_gtid= truncate_gtid_1st_round= rpl_gtid(); + if (do_truncate) + gtid_maybe_to_truncate= new Dynamic_array(16, 16); +} + +bool Recovery_context::reset_truncate_coord(my_off_t pos) +{ + DBUG_ASSERT(binlog_truncate_coord.second == 0 || + last_gtid_coord >= binlog_truncate_coord || + truncate_set_in_1st); + // save as backup to restore at next_binlog_or_round when necessary + if (truncate_set_in_1st && truncate_gtid_1st_round.seq_no == 0) + { + truncate_gtid_1st_round= truncate_gtid; + binlog_truncate_coord_1st_round= binlog_truncate_coord; + } + binlog_truncate_coord= Binlog_offset(id_binlog, pos); + truncate_gtid= rpl_gtid(); + truncate_reset_done= true; + for (uint i= 0; i < gtid_maybe_to_truncate->elements(); i++) + { + rpl_gtid gtid= gtid_maybe_to_truncate->at(i); + if (rpl_global_gtid_binlog_state.update_nolock(>id, false)) + return true; + } + gtid_maybe_to_truncate->clear(); + + return false; +} + +bool Recovery_context::set_truncate_coord(LOG_INFO *linfo, int round, + enum_binlog_checksum_alg fd_checksum) +{ + binlog_truncate_coord= last_gtid_coord; + strmake_buf(binlog_truncate_file_name, linfo->log_file_name); + + truncate_gtid= last_gtid; + checksum_alg= fd_checksum; + truncate_set_in_1st= (round == 1); + + return gtid_maybe_to_truncate->append(last_gtid); +} + +bool Recovery_context::decide_or_assess(xid_recovery_member *member, int round, + Format_description_log_event *fdle, + LOG_INFO *linfo, my_off_t pos) +{ + if (member) + { + /* + xid in doubt are resolved as follows: + in_engine_prepare is compared agaist binlogged info to + yield the commit-or-rollback decision in the normal case. + In the semisync-slave recovery the decision is done later + after the binlog scanning has determined the truncation offset. + */ + if (member->in_engine_prepare > last_gtid_engines) + { + char buf[21]; + longlong10_to_str(last_gtid.seq_no, buf, 10); + sql_print_error("Error to recovery multi-engine transaction: " + "the number of engines prepared %u exceeds the " + "respective number %u in its GTID %u-%u-%s " + "located at file:%s pos:%llu", + member->in_engine_prepare, last_gtid_engines, + last_gtid.domain_id, last_gtid.server_id, buf, + linfo->log_file_name, last_gtid_coord.second); + return true; + } + else if (member->in_engine_prepare < last_gtid_engines) + { + DBUG_ASSERT(member->in_engine_prepare > 0); + /* + This is an "unlikely" branch of two or more engines in transaction + that is partially committed, so to complete. + */ + member->decided_to_commit= true; + if (do_truncate) + { + /* Validated truncate at this point can be only in the 2nd round. */ + DBUG_ASSERT(!truncate_validated || + (round == 2 && truncate_set_in_1st && + last_gtid_coord < binlog_truncate_coord)); + /* + Estimated truncate must not be greater than the current one's + offset, unless the turn of the rounds. + */ + DBUG_ASSERT(truncate_validated || + (last_gtid_coord >= binlog_truncate_coord || + (round == 2 && truncate_set_in_1st))); + + if (!truncate_validated && reset_truncate_coord(pos)) + return true; + } + } + else // member->in_engine_prepare == last_gtid_engines + { + if (!do_truncate) // "normal" recovery + { + member->decided_to_commit= true; + } + else + { + member->binlog_coord= last_gtid_coord; + last_gtid_valid= false; + /* + First time truncate position estimate before its validation. + An estimate may change to involve reset_truncate_coord call. + */ + if (!truncate_validated) + { + if (truncate_gtid.seq_no == 0 /* was reset or never set */ || + (truncate_set_in_1st && round == 2 /* reevaluted at round turn */)) + { + if (set_truncate_coord(linfo, round, fdle->checksum_alg)) + return true; + } + else + { + /* Truncate estimate was done ago, this gtid can't improve it. */ + DBUG_ASSERT(last_gtid_coord >= binlog_truncate_coord); + + gtid_maybe_to_truncate->append(last_gtid); + } + + DBUG_ASSERT(member->decided_to_commit == false); // may redecided + } + else + { + /* + binlog truncate was determined, possibly to none, otherwise + its offset greater than that of the current gtid. + */ + DBUG_ASSERT(truncate_gtid.seq_no == 0 || + last_gtid_coord < binlog_truncate_coord); + member->decided_to_commit= true; + } + } + } + } + else if (do_truncate) // "0" < last_gtid_engines + { + /* + Similar to the partial commit branch above. + */ + DBUG_ASSERT(!truncate_validated || last_gtid_coord < binlog_truncate_coord); + DBUG_ASSERT(truncate_validated || + (last_gtid_coord >= binlog_truncate_coord || + (round == 2 && truncate_set_in_1st))); + + if (!truncate_validated && reset_truncate_coord(pos)) + return true; + } + + return false; +} + +void Recovery_context::update_binlog_unsafe_coord_if_needed(LOG_INFO *linfo) +{ + if (!do_truncate) + return; + + if (truncate_gtid.seq_no > 0 && // g1,U2, *not* G1,U2 + last_gtid_coord > binlog_truncate_coord) + { + DBUG_ASSERT(binlog_truncate_coord.second > 0); + /* + Potentially unsafe when the truncate coordinate is not determined, + just detected as unsafe when behind the latter. + */ + if (last_gtid_engines == 0) + { + last_gtid_valid= false; + } + else + { + binlog_unsafe_gtid= last_gtid; + binlog_unsafe_coord= last_gtid_coord; + strmake_buf(binlog_unsafe_file_name, linfo->log_file_name); + } + } +} + +void Recovery_context::process_gtid(int round, Gtid_log_event *gev, + LOG_INFO *linfo) +{ + last_gtid.domain_id= gev->domain_id; + last_gtid.server_id= gev->server_id; + last_gtid.seq_no= gev->seq_no; + last_gtid_engines= gev->extra_engines != UCHAR_MAX ? + gev->extra_engines + 1 : 0; + last_gtid_coord= Binlog_offset(id_binlog, prev_event_pos); + + DBUG_ASSERT(!last_gtid_valid); + DBUG_ASSERT(!last_gtid.seq_no == 0); + + if (round == 1 || (do_truncate && !truncate_validated)) + { + DBUG_ASSERT(!last_gtid_valid); + + last_gtid_no2pc= false; + last_gtid_standalone= + (gev->flags2 & Gtid_log_event::FL_STANDALONE) ? true : false; + if (do_truncate && last_gtid_standalone) + update_binlog_unsafe_coord_if_needed(linfo); + /* Update the binlog state with any 'valid' GTID logged after Gtid_list. */ + last_gtid_valid= true; // may flip at Xid when falls to truncate + } +} + +int Recovery_context::next_binlog_or_round(int& round, + const char *last_log_name, + const char *binlog_checkpoint_name, + LOG_INFO *linfo, + MYSQL_BIN_LOG *log) +{ + if (!strcmp(linfo->log_file_name, last_log_name)) + { + /* Exit the loop now at the end of the current round. */ + DBUG_ASSERT(round <= 2); + + if (do_truncate) + { + truncate_validated= truncate_reset_done; + truncate_reset_done= false; + /* + Restore the 1st round saved estimate if it was not refined in the 2nd. + That can only occur in multiple log files context when the inital file + has a truncation candidate (a `g`) and does not have any commited `G`, + *and* other files (binlog-checkpoint one and so on) do not have any + transaction-in-doubt. + */ + if (truncate_gtid.seq_no == 0 && truncate_set_in_1st) + { + DBUG_ASSERT(truncate_gtid_1st_round.seq_no > 0); + + truncate_gtid= truncate_gtid_1st_round; + binlog_truncate_coord= binlog_truncate_coord_1st_round; + } + } + return 1; + } + else if (round == 1) + { + if (do_truncate) + { + truncate_validated= truncate_reset_done; + if (!truncate_validated) + { + rpl_global_gtid_binlog_state.reset_nolock(); + gtid_maybe_to_truncate->clear(); + } + truncate_reset_done= false; + id_binlog= 0; + } + round++; + } + else if (do_truncate) // binlog looping within round 2 + { + id_binlog++; + + DBUG_ASSERT(id_binlog <= MAX_binlog_id); // the assert is "practical" + } + + DBUG_ASSERT(!do_truncate || id_binlog != MAX_binlog_id || + !strcmp(linfo->log_file_name, binlog_checkpoint_name)); + + return 0; +} +#endif /* Execute recovery of the binary log @@ -10476,16 +11137,25 @@ int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name, MEM_ROOT mem_root; char binlog_checkpoint_name[FN_REFLEN]; bool binlog_checkpoint_found; - bool first_round; IO_CACHE log; File file= -1; const char *errmsg; #ifdef HAVE_REPLICATION - rpl_gtid last_gtid; - bool last_gtid_standalone= false; - bool last_gtid_valid= false; + Recovery_context ctx; #endif DBUG_ENTER("TC_LOG_BINLOG::recover"); + /* + The for-loop variable is updated by the following rule set: + Initially set to 1. + After the initial binlog file is processed to identify + the Binlog-checkpoint file it is incremented when the latter file + is different from the initial one. Otherwise the only log has been + fully parsed so the for loop exits. + The 2nd round parses all earlier in binlog index order files + starting from the Binlog-checkpoint file. It ends when the initial + binlog file is reached. + */ + int round; if (! fdle->is_valid() || (my_hash_init(key_memory_binlog_recover_exec, &xids, @@ -10501,6 +11171,10 @@ int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name, fdle->flags&= ~LOG_EVENT_BINLOG_IN_USE_F; // abort on the first error + /* finds xids when root is not NULL */ + if (do_xa && ha_recover(&xids, &mem_root)) + goto err1; + /* Scan the binlog for XIDs that need to be committed if still in the prepared stage. @@ -10510,10 +11184,9 @@ int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name, */ binlog_checkpoint_found= false; - first_round= true; - for (;;) + for (round= 1;;) { - while ((ev= Log_event::read_log_event(first_round ? first_log : &log, + while ((ev= Log_event::read_log_event(round == 1 ? first_log : &log, fdle, opt_master_verify_checksum)) && ev->is_valid()) { @@ -10521,17 +11194,23 @@ int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name, switch (typ) { case XID_EVENT: + if (do_xa) { - if (do_xa) + xid_recovery_member *member= + (xid_recovery_member*) + my_hash_search(&xids, (uchar*) &static_cast(ev)->xid, + sizeof(my_xid)); +#ifndef HAVE_REPLICATION { - Xid_log_event *xev=(Xid_log_event *)ev; - uchar *x= (uchar *) memdup_root(&mem_root, (uchar*) &xev->xid, - sizeof(xev->xid)); - if (!x || my_hash_insert(&xids, x)) - goto err2; + if (member) + member->decided_to_commit= true; } - break; +#else + if (ctx.decide_or_assess(member, round, fdle, linfo, ev->log_pos)) + goto err2; +#endif } + break; case QUERY_EVENT: { Query_log_event *query_ev= (Query_log_event*) ev; @@ -10545,10 +11224,18 @@ int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name, if (!x || my_hash_insert(&ddl_log_ids, x)) goto err2; } +#ifdef HAVE_REPLICATION + if (((Query_log_event *)ev)->is_commit() || + ((Query_log_event *)ev)->is_rollback()) + { + ctx.last_gtid_no2pc= true; + ctx.update_binlog_unsafe_coord_if_needed(linfo); + } +#endif break; } case BINLOG_CHECKPOINT_EVENT: - if (first_round && do_xa) + if (round == 1 && do_xa) { size_t dir_len; Binlog_checkpoint_log_event *cev= (Binlog_checkpoint_log_event *)ev; @@ -10568,8 +11255,9 @@ int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name, } } break; +#ifdef HAVE_REPLICATION case GTID_LIST_EVENT: - if (first_round) + if (round == 1 || (ctx.do_truncate && ctx.id_binlog == 0)) { Gtid_list_log_event *glev= (Gtid_list_log_event *)ev; @@ -10579,20 +11267,13 @@ int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name, } break; -#ifdef HAVE_REPLICATION case GTID_EVENT: - if (first_round) - { - Gtid_log_event *gev= (Gtid_log_event *)ev; + ctx.process_gtid(round, (Gtid_log_event *)ev, linfo); + break; - /* Update the binlog state with any GTID logged after Gtid_list. */ - last_gtid.domain_id= gev->domain_id; - last_gtid.server_id= gev->server_id; - last_gtid.seq_no= gev->seq_no; - last_gtid_standalone= - ((gev->flags2 & Gtid_log_event::FL_STANDALONE) ? true : false); - last_gtid_valid= true; - } + case XA_PREPARE_LOG_EVENT: + ctx.last_gtid_no2pc= true; // TODO: complete MDEV-21469 that removes this block + ctx.update_binlog_unsafe_coord_if_needed(linfo); break; #endif @@ -10606,27 +11287,31 @@ int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name, default: /* Nothing. */ break; - } + } // end of switch #ifdef HAVE_REPLICATION - if (last_gtid_valid && - ((last_gtid_standalone && !ev->is_part_of_group(typ)) || - (!last_gtid_standalone && - (typ == XID_EVENT || - typ == XA_PREPARE_LOG_EVENT || - (LOG_EVENT_IS_QUERY(typ) && - (((Query_log_event *)ev)->is_commit() || - ((Query_log_event *)ev)->is_rollback())))))) + if (ctx.last_gtid_valid && + ((ctx.last_gtid_standalone && !ev->is_part_of_group(typ)) || + (!ctx.last_gtid_standalone && + (typ == XID_EVENT || ctx.last_gtid_no2pc)))) { - if (rpl_global_gtid_binlog_state.update_nolock(&last_gtid, false)) - goto err2; - last_gtid_valid= false; - } -#endif + DBUG_ASSERT(round == 1 || (ctx.do_truncate && !ctx.truncate_validated)); + DBUG_ASSERT(!ctx.last_gtid_no2pc || + (ctx.last_gtid_standalone || + typ == XA_PREPARE_LOG_EVENT || + (LOG_EVENT_IS_QUERY(typ) && + (((Query_log_event *)ev)->is_commit() || + ((Query_log_event *)ev)->is_rollback())))); + if (rpl_global_gtid_binlog_state.update_nolock(&ctx.last_gtid, false)) + goto err2; + ctx.last_gtid_valid= false; + } + ctx.prev_event_pos= ev->log_pos; +#endif delete ev; ev= NULL; - } + } // end of while /* If the last binlog checkpoint event points to an older log, we have to @@ -10636,11 +11321,10 @@ int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name, written by an older version of MariaDB (or MySQL) - these always have an (implicit) binlog checkpoint event at the start of the last binlog file. */ - if (first_round) + if (round == 1) { if (!binlog_checkpoint_found) break; - first_round= false; DBUG_EXECUTE_IF("xa_recover_expect_master_bin_000004", if (0 != strcmp("./master-bin.000004", binlog_checkpoint_name) && 0 != strcmp(".\\master-bin.000004", binlog_checkpoint_name)) @@ -10658,33 +11342,50 @@ int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name, end_io_cache(&log); mysql_file_close(file, MYF(MY_WME)); file= -1; + /* + NOTE: reading other binlog's FD is necessary for finding out + the checksum status of the respective binlog file. + */ + if (find_next_log(linfo, 1)) + { + sql_print_error("Error reading binlog files during recovery. " + "Aborting."); + goto err2; + } } +#ifdef HAVE_REPLICATION + int rc= ctx.next_binlog_or_round(round, last_log_name, + binlog_checkpoint_name, linfo, this); + if (rc == -1) + goto err2; + else if (rc == 1) + break; // all rounds done +#else if (!strcmp(linfo->log_file_name, last_log_name)) break; // No more files to do + round++; +#endif + if ((file= open_binlog(&log, linfo->log_file_name, &errmsg)) < 0) { sql_print_error("%s", errmsg); goto err2; } - /* - We do not need to read the Format_description_log_event of other binlog - files. It is not possible for a binlog checkpoint to span multiple - binlog files written by different versions of the server. So we can use - the first one read for reading from all binlog files. - */ - if (find_next_log(linfo, 1)) - { - sql_print_error("Error reading binlog files during recovery. Aborting."); - goto err2; - } fdle->reset_crypto(); - } + } // end of for if (do_xa) { - if (ha_recover(&xids)) - goto err2; + if (binlog_checkpoint_found) + { +#ifndef HAVE_REPLICATION + if (ha_recover_complete(&xids)) +#else + if (ctx.complete(this, xids)) +#endif + goto err2; + } } if (ddl_log_close_binlogged_events(&ddl_log_ids)) goto err2; @@ -10713,6 +11414,7 @@ err1: } + int MYSQL_BIN_LOG::do_binlog_recovery(const char *opt_name, bool do_xa_recovery) { diff --git a/sql/log.h b/sql/log.h index eaf7cde1c07..d61e4660330 100644 --- a/sql/log.h +++ b/sql/log.h @@ -477,6 +477,7 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG /* Flag used to optimise around wait_for_prior_commit. */ bool queued_by_other; ulong binlog_id; + bool ro_1pc; // passes the binlog_cache_mngr::ro_1pc value to Gtid ctor }; /* @@ -810,7 +811,8 @@ public: my_bool *with_annotate= 0); // binary log write bool write_transaction_to_binlog(THD *thd, binlog_cache_mngr *cache_mngr, Log_event *end_ev, bool all, - bool using_stmt_cache, bool using_trx_cache); + bool using_stmt_cache, bool using_trx_cache, + bool is_ro_1pc); bool write_incident_already_locked(THD *thd); bool write_incident(THD *thd); @@ -860,6 +862,9 @@ public: int purge_first_log(Relay_log_info* rli, bool included); int set_purge_index_file_name(const char *base_file_name); int open_purge_index_file(bool destroy); + bool truncate_and_remove_binlogs(const char *truncate_file, + my_off_t truncate_pos, + rpl_gtid *gtid); bool is_inited_purge_index_file(); int close_purge_index_file(); int clean_purge_index_file(); @@ -896,7 +901,8 @@ public: void set_status_variables(THD *thd); bool is_xidlist_idle(); bool write_gtid_event(THD *thd, bool standalone, bool is_transactional, - uint64 commit_id); + uint64 commit_id, + bool has_xid= false, bool ro_1pc= false); int read_state_from_file(); int write_state_to_file(); int get_most_recent_gtid_list(rpl_gtid **list, uint32 *size); @@ -1237,4 +1243,8 @@ class Gtid_list_log_event; const char * get_gtid_list_event(IO_CACHE *cache, Gtid_list_log_event **out_gtid_list); +int binlog_commit(THD *thd, bool all, bool is_ro_1pc); +int binlog_commit_by_xid(handlerton *hton, XID *xid); +int binlog_rollback_by_xid(handlerton *hton, XID *xid); + #endif /* LOG_H */ diff --git a/sql/log_event.cc b/sql/log_event.cc index 9c7c56b1c34..c77059ee8f5 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -53,6 +53,7 @@ #include "sql_digest.h" #include "zlib.h" #include "myisampack.h" +#include #define my_b_write_string(A, B) my_b_write((A), (uchar*)(B), (uint) (sizeof(B) - 1)) @@ -2560,10 +2561,12 @@ Binlog_checkpoint_log_event::Binlog_checkpoint_log_event( Gtid_log_event::Gtid_log_event(const uchar *buf, uint event_len, const Format_description_log_event *description_event) - : Log_event(buf, description_event), seq_no(0), commit_id(0) + : Log_event(buf, description_event), seq_no(0), commit_id(0), + flags_extra(0), extra_engines(0) { uint8 header_size= description_event->common_header_len; uint8 post_header_len= description_event->post_header_len[GTID_EVENT-1]; + const uchar *buf_0= buf; if (event_len < (uint) header_size + (uint) post_header_len || post_header_len < GTID_HEADER_LEN) return; @@ -2597,6 +2600,32 @@ Gtid_log_event::Gtid_log_event(const uchar *buf, uint event_len, memcpy(xid.data, buf, data_length); buf+= data_length; } + + /* the extra flags check and actions */ + if (static_cast(buf - buf_0) < event_len) + { + flags_extra= *buf++; + /* + extra engines flags presence is identifed by non-zero byte value + at this point + */ + if (flags_extra & FL_EXTRA_MULTI_ENGINE) + { + DBUG_ASSERT(static_cast(buf - buf_0) < event_len); + + extra_engines= *buf++; + + DBUG_ASSERT(extra_engines > 0); + } + } + /* + the strict '<' part of the assert corresponds to extra zero-padded + trailing bytes, + */ + DBUG_ASSERT(static_cast(buf - buf_0) <= event_len); + /* and the last of them is tested. */ + DBUG_ASSERT(static_cast(buf - buf_0) == event_len || + buf_0[event_len - 1] == 0); } diff --git a/sql/log_event.h b/sql/log_event.h index 990d95e1dc0..3adc7a26d93 100644 --- a/sql/log_event.h +++ b/sql/log_event.h @@ -489,6 +489,16 @@ class String; */ #define LOG_EVENT_IGNORABLE_F 0x80 +/** + @def LOG_EVENT_ACCEPT_OWN_F + + Flag sets by the semisync slave for accepting + the same server_id ("own") events which the slave must not have + in its state. Typically such events were never committed by + their originator (this server) and discared at its semisync-slave recovery. +*/ +#define LOG_EVENT_ACCEPT_OWN_F 0x4000 + /** @def LOG_EVENT_SKIP_REPLICATION_F @@ -3602,6 +3612,13 @@ public: event_mysql_xid_t xid; #endif uchar flags2; + uint flags_extra; // more flags area placed after the regular flags2's one + /* + Number of engine participants in transaction minus 1. + When zero the event does not contain that information. + */ + uint8 extra_engines; + /* Flags2. */ /* FL_STANDALONE is set when there is no terminating COMMIT event. */ @@ -3633,9 +3650,19 @@ public: /* FL_"COMMITTED or ROLLED-BACK"_XA is set for XA transaction. */ static const uchar FL_COMPLETED_XA= 128; + /* Flags_extra. */ + + /* + FL_EXTRA_MULTI_ENGINE is set for event group comprising a transaction + involving multiple storage engines. No flag and extra data are added + to the event when the transaction involves only one engine. + */ + static const uchar FL_EXTRA_MULTI_ENGINE= 1; + #ifdef MYSQL_SERVER Gtid_log_event(THD *thd_arg, uint64 seq_no, uint32 domain_id, bool standalone, - uint16 flags, bool is_transactional, uint64 commit_id); + uint16 flags, bool is_transactional, uint64 commit_id, + bool has_xid= false, bool is_ro_1pc= false); #ifdef HAVE_REPLICATION void pack_info(Protocol *protocol); virtual int do_apply_event(rpl_group_info *rgi); diff --git a/sql/log_event_server.cc b/sql/log_event_server.cc index e216724ca7a..d97e87fc4e9 100644 --- a/sql/log_event_server.cc +++ b/sql/log_event_server.cc @@ -644,7 +644,7 @@ Log_event::do_shall_skip(rpl_group_info *rgi) rli->replicate_same_server_id, rli->slave_skip_counter)); if ((server_id == global_system_variables.server_id && - !rli->replicate_same_server_id) || + !(rli->replicate_same_server_id || (flags & LOG_EVENT_ACCEPT_OWN_F))) || (rli->slave_skip_counter == 1 && rli->is_in_group()) || (flags & LOG_EVENT_SKIP_REPLICATION_F && opt_replicate_events_marked_for_skip != RPL_SKIP_REPLICATE)) @@ -3261,10 +3261,13 @@ bool Binlog_checkpoint_log_event::write() Gtid_log_event::Gtid_log_event(THD *thd_arg, uint64 seq_no_arg, uint32 domain_id_arg, bool standalone, uint16 flags_arg, bool is_transactional, - uint64 commit_id_arg) + uint64 commit_id_arg, bool has_xid, + bool ro_1pc) : Log_event(thd_arg, flags_arg, is_transactional), seq_no(seq_no_arg), commit_id(commit_id_arg), domain_id(domain_id_arg), - flags2((standalone ? FL_STANDALONE : 0) | (commit_id_arg ? FL_GROUP_COMMIT_ID : 0)) + flags2((standalone ? FL_STANDALONE : 0) | + (commit_id_arg ? FL_GROUP_COMMIT_ID : 0)), + flags_extra(0), extra_engines(0) { cache_type= Log_event::EVENT_NO_CACHE; bool is_tmp_table= thd_arg->lex->stmt_accessed_temp_table(); @@ -3287,15 +3290,40 @@ Gtid_log_event::Gtid_log_event(THD *thd_arg, uint64 seq_no_arg, flags2|= (thd_arg->rgi_slave->gtid_ev_flags2 & (FL_DDL|FL_WAITED)); XID_STATE &xid_state= thd->transaction->xid_state; - if (is_transactional && xid_state.is_explicit_XA() && - (thd->lex->sql_command == SQLCOM_XA_PREPARE || - xid_state.get_state_code() == XA_PREPARED)) + if (is_transactional) { - DBUG_ASSERT(thd->lex->xa_opt != XA_ONE_PHASE); + if (xid_state.is_explicit_XA() && + (thd->lex->sql_command == SQLCOM_XA_PREPARE || + xid_state.get_state_code() == XA_PREPARED)) + { + DBUG_ASSERT(thd->lex->xa_opt != XA_ONE_PHASE); - flags2|= thd->lex->sql_command == SQLCOM_XA_PREPARE ? - FL_PREPARED_XA : FL_COMPLETED_XA; - xid.set(xid_state.get_xid()); + flags2|= thd->lex->sql_command == SQLCOM_XA_PREPARE ? + FL_PREPARED_XA : FL_COMPLETED_XA; + xid.set(xid_state.get_xid()); + } + /* count non-zero extra recoverable engines; total = extra + 1 */ + if (has_xid) + { + DBUG_ASSERT(ha_count_rw_2pc(thd_arg, + thd_arg->in_multi_stmt_transaction_mode())); + + extra_engines= + ha_count_rw_2pc(thd_arg, thd_arg->in_multi_stmt_transaction_mode()) - 1; + } + else if (ro_1pc) + { + extra_engines= UCHAR_MAX; + } + else if (thd->lex->sql_command == SQLCOM_XA_PREPARE) + { + DBUG_ASSERT(thd_arg->in_multi_stmt_transaction_mode()); + + uint8 count= ha_count_rw_2pc(thd_arg, true); + extra_engines= count > 1 ? 0 : UCHAR_MAX; + } + if (extra_engines > 0) + flags_extra|= FL_EXTRA_MULTI_ENGINE; } } @@ -3339,19 +3367,19 @@ Gtid_log_event::peek(const uchar *event_start, size_t event_len, bool Gtid_log_event::write() { - uchar buf[GTID_HEADER_LEN+2+sizeof(XID)]; - size_t write_len; + uchar buf[GTID_HEADER_LEN+2+sizeof(XID) + /* flags_extra: */ 1+4]; + size_t write_len= 13; int8store(buf, seq_no); int4store(buf+8, domain_id); buf[12]= flags2; if (flags2 & FL_GROUP_COMMIT_ID) { - int8store(buf+13, commit_id); + DBUG_ASSERT(write_len + 8 == GTID_HEADER_LEN + 2); + + int8store(buf+write_len, commit_id); write_len= GTID_HEADER_LEN + 2; } - else - write_len= 13; if (flags2 & (FL_PREPARED_XA | FL_COMPLETED_XA)) { @@ -3363,6 +3391,16 @@ Gtid_log_event::write() memcpy(buf+write_len, xid.data, data_length); write_len+= data_length; } + if (flags_extra > 0) + { + buf[write_len]= flags_extra; + write_len++; + } + if (flags_extra & FL_EXTRA_MULTI_ENGINE) + { + buf[write_len]= extra_engines; + write_len++; + } if (write_len < GTID_HEADER_LEN) { diff --git a/sql/slave.cc b/sql/slave.cc index 17c2d59c9d7..e7aa0d4a510 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -6173,6 +6173,15 @@ static int queue_event(Master_info* mi, const uchar *buf, ulong event_len) uchar new_buf_arr[4096]; bool is_malloc = false; bool is_rows_event= false; + /* + The flag has replicate_same_server_id semantics and is raised to accept + a same-server-id event on the semisync slave, for both the gtid and legacy + connection modes. + Such events can appear as result of this server recovery so the event + was created there and replicated elsewhere right before the crash. At recovery + it could be evicted from the server's binlog. + */ + bool do_accept_own_server_id= false; /* FD_q must have been prepared for the first R_a event inside get_master_version_and_clock() @@ -6234,6 +6243,7 @@ static int queue_event(Master_info* mi, const uchar *buf, ulong event_len) unlock_data_lock= FALSE; goto err; } + DBUG_ASSERT(((uchar) buf[FLAGS_OFFSET] & LOG_EVENT_ACCEPT_OWN_F) == 0); if (mi->rli.relay_log.description_event_for_queue->binlog_version<4 && buf[EVENT_TYPE_OFFSET] != FORMAT_DESCRIPTION_EVENT /* a way to escape */) @@ -6933,7 +6943,8 @@ static int queue_event(Master_info* mi, const uchar *buf, ulong event_len) } else if ((s_id == global_system_variables.server_id && - !mi->rli.replicate_same_server_id) || + !(mi->rli.replicate_same_server_id || + (do_accept_own_server_id= rpl_semi_sync_slave_enabled))) || event_that_should_be_ignored(buf) || /* the following conjunction deals with IGNORE_SERVER_IDS, if set @@ -6993,6 +7004,19 @@ static int queue_event(Master_info* mi, const uchar *buf, ulong event_len) } else { + if (do_accept_own_server_id) + { + int2store(const_cast(buf + FLAGS_OFFSET), + uint2korr(buf + FLAGS_OFFSET) | LOG_EVENT_ACCEPT_OWN_F); + if (checksum_alg != BINLOG_CHECKSUM_ALG_OFF) + { + ha_checksum crc= 0; + + crc= my_checksum(crc, (const uchar *) buf, + event_len - BINLOG_CHECKSUM_LEN); + int4store(&buf[event_len - BINLOG_CHECKSUM_LEN], crc); + } + } if (likely(!rli->relay_log.write_event_buffer((uchar*)buf, event_len))) { mi->master_log_pos+= inc_pos; diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc index 125abe334fc..5e64007ef84 100644 --- a/storage/innobase/log/log0log.cc +++ b/storage/innobase/log/log0log.cc @@ -831,6 +831,7 @@ void log_write_up_to(lsn_t lsn, bool flush_to_disk, bool rotate_key, flush_lock.release(flush_lsn); log_flush_notify(flush_lsn); + DBUG_EXECUTE_IF("crash_after_log_write_upto", DBUG_SUICIDE();); } /** write to the log file up to the last log entry.