mirror of
https://github.com/MariaDB/server.git
synced 2025-07-27 18:02:13 +03:00
MDEV-7888, MDEV-7929: Parallel replication hangs sometimes on ANALYZE TABLE or DDL
Follow-up patch with 10.1-specific changes. Add test cases that more closely resembles the original bug report (which uses the 10.1-specific --slave-parallel-mode=optimistic). Also fix the code so that ANALYZE statements are now marked as DDL, and will not be attempted to speculatively run in parallel with other transactions.
This commit is contained in:
@ -1,4 +1,5 @@
|
||||
--source include/have_innodb.inc
|
||||
--source include/have_debug.inc
|
||||
--source include/have_debug_sync.inc
|
||||
--let $rpl_topology=1->2
|
||||
--source include/rpl_init.inc
|
||||
@ -307,6 +308,129 @@ SET GLOBAL tx_isolation= @old_isolation;
|
||||
--source include/start_slave.inc
|
||||
|
||||
|
||||
--echo *** MDEV-7888: ANALYZE TABLE does wakeup_subsequent_commits(), causing wrong binlog order and parallel replication hang ***
|
||||
|
||||
--connection server_1
|
||||
DROP TABLE t1, t2, t3;
|
||||
CREATE TABLE t1 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB;
|
||||
CREATE TABLE t2 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB;
|
||||
CREATE TABLE t3 (a INT PRIMARY KEY, b INT) ENGINE=MyISAM;
|
||||
INSERT INTO t2 VALUES (1,1), (2,1), (3,1), (4,1), (5,1);
|
||||
--source include/save_master_gtid.inc
|
||||
|
||||
--connection server_2
|
||||
--source include/sync_with_master_gtid.inc
|
||||
--source include/stop_slave.inc
|
||||
SET @old_dbug= @@GLOBAL.debug_dbug;
|
||||
SET GLOBAL debug_dbug= '+d,inject_analyze_table_sleep';
|
||||
|
||||
--connection server_1
|
||||
# The bug was that ANALYZE TABLE would call
|
||||
# wakeup_subsequent_commits() too early, allowing the following
|
||||
# transaction in the same group to run ahead and binlog and free the
|
||||
# GCO. Then we get wrong binlog order and later access freed GCO,
|
||||
# which causes lost wakeup of following GCO and thus replication hang.
|
||||
# We injected a small sleep in ANALYZE to make the race easier to hit (this
|
||||
# can only cause false negatives in versions with the bug, not false positives,
|
||||
# so sleep is ok here. And it's in general not possible to trigger reliably
|
||||
# the race with debug_sync, since the bugfix makes the race impossible).
|
||||
|
||||
ALTER TABLE t2 COMMENT "123abc";
|
||||
ANALYZE TABLE t2;
|
||||
INSERT INTO t1 VALUES (1,2);
|
||||
INSERT INTO t1 VALUES (2,2);
|
||||
INSERT INTO t1 VALUES (3,2);
|
||||
INSERT INTO t1 VALUES (4,2);
|
||||
INSERT INTO t3 VALUES (1,3);
|
||||
ALTER TABLE t2 COMMENT "hello, world";
|
||||
BEGIN;
|
||||
INSERT INTO t1 VALUES (5,4);
|
||||
INSERT INTO t1 VALUES (6,4);
|
||||
INSERT INTO t1 VALUES (7,4);
|
||||
INSERT INTO t1 VALUES (8,4);
|
||||
INSERT INTO t1 VALUES (9,4);
|
||||
INSERT INTO t1 VALUES (10,4);
|
||||
INSERT INTO t1 VALUES (11,4);
|
||||
INSERT INTO t1 VALUES (12,4);
|
||||
INSERT INTO t1 VALUES (13,4);
|
||||
INSERT INTO t1 VALUES (14,4);
|
||||
INSERT INTO t1 VALUES (15,4);
|
||||
INSERT INTO t1 VALUES (16,4);
|
||||
INSERT INTO t1 VALUES (17,4);
|
||||
INSERT INTO t1 VALUES (18,4);
|
||||
INSERT INTO t1 VALUES (19,4);
|
||||
INSERT INTO t1 VALUES (20,4);
|
||||
COMMIT;
|
||||
INSERT INTO t1 VALUES (21,5);
|
||||
INSERT INTO t1 VALUES (22,5);
|
||||
|
||||
SELECT * FROM t1 ORDER BY a;
|
||||
SELECT * FROM t2 ORDER BY a;
|
||||
SELECT * FROM t3 ORDER BY a;
|
||||
--source include/save_master_gtid.inc
|
||||
|
||||
--connection server_2
|
||||
--source include/start_slave.inc
|
||||
--source include/sync_with_master_gtid.inc
|
||||
|
||||
SELECT * FROM t1 ORDER BY a;
|
||||
SELECT * FROM t2 ORDER BY a;
|
||||
SELECT * FROM t3 ORDER BY a;
|
||||
|
||||
--source include/stop_slave.inc
|
||||
SET GLOBAL debug_dbug= @old_debug;
|
||||
--source include/start_slave.inc
|
||||
|
||||
--echo *** MDEV-7929: record_gtid() for non-transactional event group calls wakeup_subsequent_commits() too early, causing slave hang. ***
|
||||
|
||||
--connection server_2
|
||||
--source include/stop_slave.inc
|
||||
SET @old_dbug= @@GLOBAL.debug_dbug;
|
||||
# The bug was that record_gtid(), when there is no existing transaction from
|
||||
# a DML event being replicated, would commit its own transaction. This wrongly
|
||||
# caused wakeup_subsequent_commits(), with similar consequences as MDEV-7888
|
||||
# above. We simulate this condition with a small sleep in record_gtid() for
|
||||
# a specific ANALYZE that we binlog with server id 100.
|
||||
SET GLOBAL debug_dbug= '+d,inject_record_gtid_serverid_100_sleep';
|
||||
|
||||
--connection server_1
|
||||
|
||||
ALTER TABLE t3 COMMENT "DDL statement 1";
|
||||
INSERT INTO t1 VALUES (30,0);
|
||||
INSERT INTO t1 VALUES (31,0);
|
||||
INSERT INTO t1 VALUES (32,0);
|
||||
INSERT INTO t1 VALUES (33,0);
|
||||
INSERT INTO t1 VALUES (34,0);
|
||||
INSERT INTO t1 VALUES (35,0);
|
||||
INSERT INTO t1 VALUES (36,0);
|
||||
SET @old_server_id= @@SESSION.server_id;
|
||||
SET SESSION server_id= 100;
|
||||
ANALYZE TABLE t2;
|
||||
SET SESSION server_id= @old_server_id;
|
||||
INSERT INTO t1 VALUES (37,0);
|
||||
ALTER TABLE t3 COMMENT "DDL statement 2";
|
||||
INSERT INTO t1 VALUES (38,0);
|
||||
INSERT INTO t1 VALUES (39,0);
|
||||
ALTER TABLE t3 COMMENT "DDL statement 3";
|
||||
|
||||
SELECT * FROM t1 WHERE a >= 30 ORDER BY a;
|
||||
|
||||
--source include/save_master_gtid.inc
|
||||
|
||||
|
||||
--connection server_2
|
||||
--source include/start_slave.inc
|
||||
--source include/sync_with_master_gtid.inc
|
||||
SELECT * FROM t1 WHERE a >= 30 ORDER BY a;
|
||||
|
||||
|
||||
--source include/stop_slave.inc
|
||||
SET GLOBAL debug_dbug= @old_debug;
|
||||
--source include/start_slave.inc
|
||||
|
||||
|
||||
# Clean up.
|
||||
|
||||
--connection server_2
|
||||
--source include/stop_slave.inc
|
||||
SET GLOBAL slave_parallel_mode=@old_parallel_mode;
|
||||
|
Reference in New Issue
Block a user