From 7ee1a41ce1c7409790bd27ce34dc61e540acb472 Mon Sep 17 00:00:00 2001 From: Kristian Nielsen Date: Wed, 8 Apr 2015 13:15:04 +0200 Subject: [PATCH] MDEV-7888, MDEV-7929: Parallel replication hangs sometimes on ANALYZE TABLE or DDL Follow-up patch with 10.1-specific changes. Add test cases that more closely resembles the original bug report (which uses the 10.1-specific --slave-parallel-mode=optimistic). Also fix the code so that ANALYZE statements are now marked as DDL, and will not be attempted to speculatively run in parallel with other transactions. --- .../rpl/r/rpl_parallel_optimistic.result | 168 ++++++++++++++++++ .../suite/rpl/t/rpl_parallel_optimistic.test | 124 +++++++++++++ sql/log_event.cc | 3 +- 3 files changed, 294 insertions(+), 1 deletion(-) diff --git a/mysql-test/suite/rpl/r/rpl_parallel_optimistic.result b/mysql-test/suite/rpl/r/rpl_parallel_optimistic.result index 2c34a54af1e..7593b490894 100644 --- a/mysql-test/suite/rpl/r/rpl_parallel_optimistic.result +++ b/mysql-test/suite/rpl/r/rpl_parallel_optimistic.result @@ -287,6 +287,174 @@ include/stop_slave.inc SET GLOBAL binlog_format= @old_format; SET GLOBAL tx_isolation= @old_isolation; include/start_slave.inc +*** MDEV-7888: ANALYZE TABLE does wakeup_subsequent_commits(), causing wrong binlog order and parallel replication hang *** +DROP TABLE t1, t2, t3; +CREATE TABLE t1 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB; +CREATE TABLE t2 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB; +CREATE TABLE t3 (a INT PRIMARY KEY, b INT) ENGINE=MyISAM; +INSERT INTO t2 VALUES (1,1), (2,1), (3,1), (4,1), (5,1); +include/save_master_gtid.inc +include/sync_with_master_gtid.inc +include/stop_slave.inc +SET @old_dbug= @@GLOBAL.debug_dbug; +SET GLOBAL debug_dbug= '+d,inject_analyze_table_sleep'; +ALTER TABLE t2 COMMENT "123abc"; +ANALYZE TABLE t2; +Table Op Msg_type Msg_text +test.t2 analyze status OK +INSERT INTO t1 VALUES (1,2); +INSERT INTO t1 VALUES (2,2); +INSERT INTO t1 VALUES (3,2); +INSERT INTO t1 VALUES (4,2); +INSERT INTO t3 VALUES (1,3); +ALTER TABLE t2 COMMENT "hello, world"; +BEGIN; +INSERT INTO t1 VALUES (5,4); +INSERT INTO t1 VALUES (6,4); +INSERT INTO t1 VALUES (7,4); +INSERT INTO t1 VALUES (8,4); +INSERT INTO t1 VALUES (9,4); +INSERT INTO t1 VALUES (10,4); +INSERT INTO t1 VALUES (11,4); +INSERT INTO t1 VALUES (12,4); +INSERT INTO t1 VALUES (13,4); +INSERT INTO t1 VALUES (14,4); +INSERT INTO t1 VALUES (15,4); +INSERT INTO t1 VALUES (16,4); +INSERT INTO t1 VALUES (17,4); +INSERT INTO t1 VALUES (18,4); +INSERT INTO t1 VALUES (19,4); +INSERT INTO t1 VALUES (20,4); +COMMIT; +INSERT INTO t1 VALUES (21,5); +INSERT INTO t1 VALUES (22,5); +SELECT * FROM t1 ORDER BY a; +a b +1 2 +2 2 +3 2 +4 2 +5 4 +6 4 +7 4 +8 4 +9 4 +10 4 +11 4 +12 4 +13 4 +14 4 +15 4 +16 4 +17 4 +18 4 +19 4 +20 4 +21 5 +22 5 +SELECT * FROM t2 ORDER BY a; +a b +1 1 +2 1 +3 1 +4 1 +5 1 +SELECT * FROM t3 ORDER BY a; +a b +1 3 +include/save_master_gtid.inc +include/start_slave.inc +include/sync_with_master_gtid.inc +SELECT * FROM t1 ORDER BY a; +a b +1 2 +2 2 +3 2 +4 2 +5 4 +6 4 +7 4 +8 4 +9 4 +10 4 +11 4 +12 4 +13 4 +14 4 +15 4 +16 4 +17 4 +18 4 +19 4 +20 4 +21 5 +22 5 +SELECT * FROM t2 ORDER BY a; +a b +1 1 +2 1 +3 1 +4 1 +5 1 +SELECT * FROM t3 ORDER BY a; +a b +1 3 +include/stop_slave.inc +SET GLOBAL debug_dbug= @old_debug; +include/start_slave.inc +*** MDEV-7929: record_gtid() for non-transactional event group calls wakeup_subsequent_commits() too early, causing slave hang. *** +include/stop_slave.inc +SET @old_dbug= @@GLOBAL.debug_dbug; +SET GLOBAL debug_dbug= '+d,inject_record_gtid_serverid_100_sleep'; +ALTER TABLE t3 COMMENT "DDL statement 1"; +INSERT INTO t1 VALUES (30,0); +INSERT INTO t1 VALUES (31,0); +INSERT INTO t1 VALUES (32,0); +INSERT INTO t1 VALUES (33,0); +INSERT INTO t1 VALUES (34,0); +INSERT INTO t1 VALUES (35,0); +INSERT INTO t1 VALUES (36,0); +SET @old_server_id= @@SESSION.server_id; +SET SESSION server_id= 100; +ANALYZE TABLE t2; +Table Op Msg_type Msg_text +test.t2 analyze status OK +SET SESSION server_id= @old_server_id; +INSERT INTO t1 VALUES (37,0); +ALTER TABLE t3 COMMENT "DDL statement 2"; +INSERT INTO t1 VALUES (38,0); +INSERT INTO t1 VALUES (39,0); +ALTER TABLE t3 COMMENT "DDL statement 3"; +SELECT * FROM t1 WHERE a >= 30 ORDER BY a; +a b +30 0 +31 0 +32 0 +33 0 +34 0 +35 0 +36 0 +37 0 +38 0 +39 0 +include/save_master_gtid.inc +include/start_slave.inc +include/sync_with_master_gtid.inc +SELECT * FROM t1 WHERE a >= 30 ORDER BY a; +a b +30 0 +31 0 +32 0 +33 0 +34 0 +35 0 +36 0 +37 0 +38 0 +39 0 +include/stop_slave.inc +SET GLOBAL debug_dbug= @old_debug; +include/start_slave.inc include/stop_slave.inc SET GLOBAL slave_parallel_mode=@old_parallel_mode; SET GLOBAL slave_parallel_threads=@old_parallel_threads; diff --git a/mysql-test/suite/rpl/t/rpl_parallel_optimistic.test b/mysql-test/suite/rpl/t/rpl_parallel_optimistic.test index 7afeb9e2779..e7d4f18f8a5 100644 --- a/mysql-test/suite/rpl/t/rpl_parallel_optimistic.test +++ b/mysql-test/suite/rpl/t/rpl_parallel_optimistic.test @@ -1,4 +1,5 @@ --source include/have_innodb.inc +--source include/have_debug.inc --source include/have_debug_sync.inc --let $rpl_topology=1->2 --source include/rpl_init.inc @@ -307,6 +308,129 @@ SET GLOBAL tx_isolation= @old_isolation; --source include/start_slave.inc +--echo *** MDEV-7888: ANALYZE TABLE does wakeup_subsequent_commits(), causing wrong binlog order and parallel replication hang *** + +--connection server_1 +DROP TABLE t1, t2, t3; +CREATE TABLE t1 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB; +CREATE TABLE t2 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB; +CREATE TABLE t3 (a INT PRIMARY KEY, b INT) ENGINE=MyISAM; +INSERT INTO t2 VALUES (1,1), (2,1), (3,1), (4,1), (5,1); +--source include/save_master_gtid.inc + +--connection server_2 +--source include/sync_with_master_gtid.inc +--source include/stop_slave.inc +SET @old_dbug= @@GLOBAL.debug_dbug; +SET GLOBAL debug_dbug= '+d,inject_analyze_table_sleep'; + +--connection server_1 +# The bug was that ANALYZE TABLE would call +# wakeup_subsequent_commits() too early, allowing the following +# transaction in the same group to run ahead and binlog and free the +# GCO. Then we get wrong binlog order and later access freed GCO, +# which causes lost wakeup of following GCO and thus replication hang. +# We injected a small sleep in ANALYZE to make the race easier to hit (this +# can only cause false negatives in versions with the bug, not false positives, +# so sleep is ok here. And it's in general not possible to trigger reliably +# the race with debug_sync, since the bugfix makes the race impossible). + +ALTER TABLE t2 COMMENT "123abc"; +ANALYZE TABLE t2; +INSERT INTO t1 VALUES (1,2); +INSERT INTO t1 VALUES (2,2); +INSERT INTO t1 VALUES (3,2); +INSERT INTO t1 VALUES (4,2); +INSERT INTO t3 VALUES (1,3); +ALTER TABLE t2 COMMENT "hello, world"; +BEGIN; +INSERT INTO t1 VALUES (5,4); +INSERT INTO t1 VALUES (6,4); +INSERT INTO t1 VALUES (7,4); +INSERT INTO t1 VALUES (8,4); +INSERT INTO t1 VALUES (9,4); +INSERT INTO t1 VALUES (10,4); +INSERT INTO t1 VALUES (11,4); +INSERT INTO t1 VALUES (12,4); +INSERT INTO t1 VALUES (13,4); +INSERT INTO t1 VALUES (14,4); +INSERT INTO t1 VALUES (15,4); +INSERT INTO t1 VALUES (16,4); +INSERT INTO t1 VALUES (17,4); +INSERT INTO t1 VALUES (18,4); +INSERT INTO t1 VALUES (19,4); +INSERT INTO t1 VALUES (20,4); +COMMIT; +INSERT INTO t1 VALUES (21,5); +INSERT INTO t1 VALUES (22,5); + +SELECT * FROM t1 ORDER BY a; +SELECT * FROM t2 ORDER BY a; +SELECT * FROM t3 ORDER BY a; +--source include/save_master_gtid.inc + +--connection server_2 +--source include/start_slave.inc +--source include/sync_with_master_gtid.inc + +SELECT * FROM t1 ORDER BY a; +SELECT * FROM t2 ORDER BY a; +SELECT * FROM t3 ORDER BY a; + +--source include/stop_slave.inc +SET GLOBAL debug_dbug= @old_debug; +--source include/start_slave.inc + +--echo *** MDEV-7929: record_gtid() for non-transactional event group calls wakeup_subsequent_commits() too early, causing slave hang. *** + +--connection server_2 +--source include/stop_slave.inc +SET @old_dbug= @@GLOBAL.debug_dbug; +# The bug was that record_gtid(), when there is no existing transaction from +# a DML event being replicated, would commit its own transaction. This wrongly +# caused wakeup_subsequent_commits(), with similar consequences as MDEV-7888 +# above. We simulate this condition with a small sleep in record_gtid() for +# a specific ANALYZE that we binlog with server id 100. +SET GLOBAL debug_dbug= '+d,inject_record_gtid_serverid_100_sleep'; + +--connection server_1 + +ALTER TABLE t3 COMMENT "DDL statement 1"; +INSERT INTO t1 VALUES (30,0); +INSERT INTO t1 VALUES (31,0); +INSERT INTO t1 VALUES (32,0); +INSERT INTO t1 VALUES (33,0); +INSERT INTO t1 VALUES (34,0); +INSERT INTO t1 VALUES (35,0); +INSERT INTO t1 VALUES (36,0); +SET @old_server_id= @@SESSION.server_id; +SET SESSION server_id= 100; +ANALYZE TABLE t2; +SET SESSION server_id= @old_server_id; +INSERT INTO t1 VALUES (37,0); +ALTER TABLE t3 COMMENT "DDL statement 2"; +INSERT INTO t1 VALUES (38,0); +INSERT INTO t1 VALUES (39,0); +ALTER TABLE t3 COMMENT "DDL statement 3"; + +SELECT * FROM t1 WHERE a >= 30 ORDER BY a; + +--source include/save_master_gtid.inc + + +--connection server_2 +--source include/start_slave.inc +--source include/sync_with_master_gtid.inc +SELECT * FROM t1 WHERE a >= 30 ORDER BY a; + + +--source include/stop_slave.inc +SET GLOBAL debug_dbug= @old_debug; +--source include/start_slave.inc + + +# Clean up. + --connection server_2 --source include/stop_slave.inc SET GLOBAL slave_parallel_mode=@old_parallel_mode; diff --git a/sql/log_event.cc b/sql/log_event.cc index 650610fab84..aa349960ea7 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -6405,7 +6405,8 @@ Gtid_log_event::Gtid_log_event(THD *thd_arg, uint64 seq_no_arg, if (thd_arg->transaction.stmt.trans_did_wait() || thd_arg->transaction.all.trans_did_wait()) flags2|= FL_WAITED; - if (sql_command_flags[thd->lex->sql_command] & CF_DISALLOW_IN_RO_TRANS) + if (sql_command_flags[thd->lex->sql_command] & + (CF_DISALLOW_IN_RO_TRANS | CF_AUTO_COMMIT_TRANS)) flags2|= FL_DDL; else if (is_transactional) flags2|= FL_TRANSACTIONAL;