mirror of
https://github.com/MariaDB/server.git
synced 2025-09-02 09:41:40 +03:00
MDEV-7888, MDEV-7929: Parallel replication hangs sometimes on ANALYZE TABLE or DDL
The hangs occur when the group_commit_orderer object is freed before the last mark_start_commit() call on it - this loses the wakeup to other waiting worker threads, causing them to hang until killed manually. The object was freed because wakeup_subsequent_commits() was called two early in two places. For MDEV-7888, during ANALYZE TABLE, and for MDEV-7929 during record_gtid() after processing a DDL event. The group_commit_orderer object can be freed when its last transaction has called wait_for_prior_commit(). Fix by implementing a suspend/resume mechanism for wakeup_subsequent_commits() that can be used in places where a transaction is committed without this being the commit of the actual replication event group. Also add a protection mechanism (that asserts in debug builds) which can prevent the too-early free and hang if other similar bugs should remain in other parts of the code.
This commit is contained in:
@@ -1421,6 +1421,64 @@ a b
|
||||
99 99
|
||||
include/stop_slave.inc
|
||||
SET GLOBAL slave_transaction_retries= @old_retries;
|
||||
SET GLOBAL slave_parallel_threads=10;
|
||||
include/start_slave.inc
|
||||
*** MDEV-7888: ANALYZE TABLE does wakeup_subsequent_commits(), causing wrong binlog order and parallel replication hang ***
|
||||
include/stop_slave.inc
|
||||
SET @old_dbug= @@GLOBAL.debug_dbug;
|
||||
SET GLOBAL debug_dbug= '+d,inject_analyze_table_sleep';
|
||||
SET @old_dbug= @@SESSION.debug_dbug;
|
||||
SET SESSION debug_dbug="+d,binlog_force_commit_id";
|
||||
SET @commit_id= 10000;
|
||||
ANALYZE TABLE t2;
|
||||
Table Op Msg_type Msg_text
|
||||
test.t2 analyze status OK
|
||||
INSERT INTO t3 VALUES (120, 0);
|
||||
SET @commit_id= 10001;
|
||||
INSERT INTO t3 VALUES (121, 0);
|
||||
SET SESSION debug_dbug=@old_dbug;
|
||||
SELECT * FROM t3 WHERE a >= 120 ORDER BY a;
|
||||
a b
|
||||
120 0
|
||||
121 0
|
||||
include/save_master_gtid.inc
|
||||
include/start_slave.inc
|
||||
include/sync_with_master_gtid.inc
|
||||
SELECT * FROM t3 WHERE a >= 120 ORDER BY a;
|
||||
a b
|
||||
120 0
|
||||
121 0
|
||||
include/stop_slave.inc
|
||||
SET GLOBAL debug_dbug= @old_debug;
|
||||
include/start_slave.inc
|
||||
*** MDEV-7929: record_gtid() for non-transactional event group calls wakeup_subsequent_commits() too early, causing slave hang. ***
|
||||
include/stop_slave.inc
|
||||
SET @old_dbug= @@GLOBAL.debug_dbug;
|
||||
SET GLOBAL debug_dbug= '+d,inject_record_gtid_serverid_100_sleep';
|
||||
SET @old_dbug= @@SESSION.debug_dbug;
|
||||
SET SESSION debug_dbug="+d,binlog_force_commit_id";
|
||||
SET @old_server_id= @@SESSION.server_id;
|
||||
SET SESSION server_id= 100;
|
||||
SET @commit_id= 10010;
|
||||
ALTER TABLE t1 COMMENT "Hulubulu!";
|
||||
SET SESSION server_id= @old_server_id;
|
||||
INSERT INTO t3 VALUES (130, 0);
|
||||
SET @commit_id= 10011;
|
||||
INSERT INTO t3 VALUES (131, 0);
|
||||
SET SESSION debug_dbug=@old_dbug;
|
||||
SELECT * FROM t3 WHERE a >= 130 ORDER BY a;
|
||||
a b
|
||||
130 0
|
||||
131 0
|
||||
include/save_master_gtid.inc
|
||||
include/start_slave.inc
|
||||
include/sync_with_master_gtid.inc
|
||||
SELECT * FROM t3 WHERE a >= 130 ORDER BY a;
|
||||
a b
|
||||
130 0
|
||||
131 0
|
||||
include/stop_slave.inc
|
||||
SET GLOBAL debug_dbug= @old_debug;
|
||||
include/start_slave.inc
|
||||
include/stop_slave.inc
|
||||
SET GLOBAL slave_parallel_threads=@old_parallel_threads;
|
||||
|
Reference in New Issue
Block a user