1
0
mirror of https://github.com/MariaDB/server.git synced 2025-07-27 18:02:13 +03:00

MDEV-7888, MDEV-7929: Parallel replication hangs sometimes on ANALYZE TABLE or DDL

Follow-up patch with 10.1-specific changes.

Add test cases that more closely resembles the original bug report (which uses
the 10.1-specific --slave-parallel-mode=optimistic).

Also fix the code so that ANALYZE statements are now marked as DDL, and will
not be attempted to speculatively run in parallel with other transactions.
This commit is contained in:
Kristian Nielsen
2015-04-08 13:15:04 +02:00
parent 48c10fb5f7
commit 7ee1a41ce1
3 changed files with 294 additions and 1 deletions

View File

@ -1,4 +1,5 @@
--source include/have_innodb.inc
--source include/have_debug.inc
--source include/have_debug_sync.inc
--let $rpl_topology=1->2
--source include/rpl_init.inc
@ -307,6 +308,129 @@ SET GLOBAL tx_isolation= @old_isolation;
--source include/start_slave.inc
--echo *** MDEV-7888: ANALYZE TABLE does wakeup_subsequent_commits(), causing wrong binlog order and parallel replication hang ***
--connection server_1
DROP TABLE t1, t2, t3;
CREATE TABLE t1 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB;
CREATE TABLE t2 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB;
CREATE TABLE t3 (a INT PRIMARY KEY, b INT) ENGINE=MyISAM;
INSERT INTO t2 VALUES (1,1), (2,1), (3,1), (4,1), (5,1);
--source include/save_master_gtid.inc
--connection server_2
--source include/sync_with_master_gtid.inc
--source include/stop_slave.inc
SET @old_dbug= @@GLOBAL.debug_dbug;
SET GLOBAL debug_dbug= '+d,inject_analyze_table_sleep';
--connection server_1
# The bug was that ANALYZE TABLE would call
# wakeup_subsequent_commits() too early, allowing the following
# transaction in the same group to run ahead and binlog and free the
# GCO. Then we get wrong binlog order and later access freed GCO,
# which causes lost wakeup of following GCO and thus replication hang.
# We injected a small sleep in ANALYZE to make the race easier to hit (this
# can only cause false negatives in versions with the bug, not false positives,
# so sleep is ok here. And it's in general not possible to trigger reliably
# the race with debug_sync, since the bugfix makes the race impossible).
ALTER TABLE t2 COMMENT "123abc";
ANALYZE TABLE t2;
INSERT INTO t1 VALUES (1,2);
INSERT INTO t1 VALUES (2,2);
INSERT INTO t1 VALUES (3,2);
INSERT INTO t1 VALUES (4,2);
INSERT INTO t3 VALUES (1,3);
ALTER TABLE t2 COMMENT "hello, world";
BEGIN;
INSERT INTO t1 VALUES (5,4);
INSERT INTO t1 VALUES (6,4);
INSERT INTO t1 VALUES (7,4);
INSERT INTO t1 VALUES (8,4);
INSERT INTO t1 VALUES (9,4);
INSERT INTO t1 VALUES (10,4);
INSERT INTO t1 VALUES (11,4);
INSERT INTO t1 VALUES (12,4);
INSERT INTO t1 VALUES (13,4);
INSERT INTO t1 VALUES (14,4);
INSERT INTO t1 VALUES (15,4);
INSERT INTO t1 VALUES (16,4);
INSERT INTO t1 VALUES (17,4);
INSERT INTO t1 VALUES (18,4);
INSERT INTO t1 VALUES (19,4);
INSERT INTO t1 VALUES (20,4);
COMMIT;
INSERT INTO t1 VALUES (21,5);
INSERT INTO t1 VALUES (22,5);
SELECT * FROM t1 ORDER BY a;
SELECT * FROM t2 ORDER BY a;
SELECT * FROM t3 ORDER BY a;
--source include/save_master_gtid.inc
--connection server_2
--source include/start_slave.inc
--source include/sync_with_master_gtid.inc
SELECT * FROM t1 ORDER BY a;
SELECT * FROM t2 ORDER BY a;
SELECT * FROM t3 ORDER BY a;
--source include/stop_slave.inc
SET GLOBAL debug_dbug= @old_debug;
--source include/start_slave.inc
--echo *** MDEV-7929: record_gtid() for non-transactional event group calls wakeup_subsequent_commits() too early, causing slave hang. ***
--connection server_2
--source include/stop_slave.inc
SET @old_dbug= @@GLOBAL.debug_dbug;
# The bug was that record_gtid(), when there is no existing transaction from
# a DML event being replicated, would commit its own transaction. This wrongly
# caused wakeup_subsequent_commits(), with similar consequences as MDEV-7888
# above. We simulate this condition with a small sleep in record_gtid() for
# a specific ANALYZE that we binlog with server id 100.
SET GLOBAL debug_dbug= '+d,inject_record_gtid_serverid_100_sleep';
--connection server_1
ALTER TABLE t3 COMMENT "DDL statement 1";
INSERT INTO t1 VALUES (30,0);
INSERT INTO t1 VALUES (31,0);
INSERT INTO t1 VALUES (32,0);
INSERT INTO t1 VALUES (33,0);
INSERT INTO t1 VALUES (34,0);
INSERT INTO t1 VALUES (35,0);
INSERT INTO t1 VALUES (36,0);
SET @old_server_id= @@SESSION.server_id;
SET SESSION server_id= 100;
ANALYZE TABLE t2;
SET SESSION server_id= @old_server_id;
INSERT INTO t1 VALUES (37,0);
ALTER TABLE t3 COMMENT "DDL statement 2";
INSERT INTO t1 VALUES (38,0);
INSERT INTO t1 VALUES (39,0);
ALTER TABLE t3 COMMENT "DDL statement 3";
SELECT * FROM t1 WHERE a >= 30 ORDER BY a;
--source include/save_master_gtid.inc
--connection server_2
--source include/start_slave.inc
--source include/sync_with_master_gtid.inc
SELECT * FROM t1 WHERE a >= 30 ORDER BY a;
--source include/stop_slave.inc
SET GLOBAL debug_dbug= @old_debug;
--source include/start_slave.inc
# Clean up.
--connection server_2
--source include/stop_slave.inc
SET GLOBAL slave_parallel_mode=@old_parallel_mode;