mirror of
https://github.com/MariaDB/server.git
synced 2025-08-31 22:22:30 +03:00
MDEV-7326: Server deadlock in connection with parallel replication
The bug occurs when a transaction does a retry after all transactions have done mark_start_commit() in a batch of group commit from the master. In this case, the retrying transaction can unmark_start_commit() after the following batch has already started running and de-allocated the GCO. Then after retry, the transaction will re-do mark_start_commit() on a de-allocated GCO, and also wakeup of later GCOs can be lost. This was seen "in the wild" by a user, even though it is not known exactly what circumstances can lead to retry of one transaction after all transactions in a group have reached the commit phase. The lifetime around GCO was somewhat clunky anyway. With this patch, a GCO lives until rpl_parallel_entry::last_committed_sub_id has reached the last transaction in the GCO. This guarantees that the GCO will still be alive when a transaction does mark_start_commit(). Also, we now loop over the list of active GCOs for wakeup, to ensure we do not lose a wakeup even in the problematic case.
This commit is contained in:
@@ -1023,6 +1023,119 @@ SET GLOBAL slave_parallel_threads=0;
|
||||
SET GLOBAL slave_parallel_threads=10;
|
||||
CHANGE MASTER TO master_use_gtid=slave_pos;
|
||||
include/start_slave.inc
|
||||
*** MDEV-7326 Server deadlock in connection with parallel replication ***
|
||||
include/stop_slave.inc
|
||||
SET GLOBAL slave_parallel_threads=0;
|
||||
SET GLOBAL slave_parallel_threads=3;
|
||||
SET GLOBAL debug_dbug="+d,rpl_parallel_simulate_temp_err_xid";
|
||||
include/start_slave.inc
|
||||
SET @old_format= @@SESSION.binlog_format;
|
||||
SET binlog_format= STATEMENT;
|
||||
INSERT INTO t1 VALUES (foo(50,
|
||||
"rpl_parallel_start_waiting_for_prior SIGNAL t3_ready",
|
||||
"rpl_parallel_end_of_group SIGNAL prep_ready WAIT_FOR prep_cont"));
|
||||
SET DEBUG_SYNC= "now WAIT_FOR prep_ready";
|
||||
INSERT INTO t2 VALUES (foo(50,
|
||||
"rpl_parallel_simulate_temp_err_xid SIGNAL t1_ready1 WAIT_FOR t1_cont1",
|
||||
"rpl_parallel_retry_after_unmark SIGNAL t1_ready2 WAIT_FOR t1_cont2"));
|
||||
SET DEBUG_SYNC= "now WAIT_FOR t1_ready1";
|
||||
INSERT INTO t1 VALUES (foo(51,
|
||||
"rpl_parallel_before_mark_start_commit SIGNAL t2_ready1 WAIT_FOR t2_cont1",
|
||||
"rpl_parallel_after_mark_start_commit SIGNAL t2_ready2"));
|
||||
SET DEBUG_SYNC= "now WAIT_FOR t2_ready1";
|
||||
SET DEBUG_SYNC= "now SIGNAL t1_cont1";
|
||||
SET DEBUG_SYNC= "now WAIT_FOR t1_ready2";
|
||||
INSERT INTO t1 VALUES (52);
|
||||
SET BINLOG_FORMAT= @old_format;
|
||||
SELECT * FROM t2 WHERE a>=50 ORDER BY a;
|
||||
a
|
||||
50
|
||||
SELECT * FROM t1 WHERE a>=50 ORDER BY a;
|
||||
a
|
||||
50
|
||||
51
|
||||
52
|
||||
SET DEBUG_SYNC= "now SIGNAL prep_cont";
|
||||
SET DEBUG_SYNC= "now WAIT_FOR t3_ready";
|
||||
SET DEBUG_SYNC= "now SIGNAL t2_cont1";
|
||||
SET DEBUG_SYNC= "now WAIT_FOR t2_ready2";
|
||||
SET DEBUG_SYNC= "now SIGNAL t1_cont2";
|
||||
SELECT * FROM t2 WHERE a>=50 ORDER BY a;
|
||||
a
|
||||
50
|
||||
SELECT * FROM t1 WHERE a>=50 ORDER BY a;
|
||||
a
|
||||
50
|
||||
51
|
||||
52
|
||||
SET DEBUG_SYNC="reset";
|
||||
include/stop_slave.inc
|
||||
SET GLOBAL debug_dbug=@old_dbug;
|
||||
SET GLOBAL slave_parallel_threads=0;
|
||||
SET GLOBAL slave_parallel_threads=10;
|
||||
include/start_slave.inc
|
||||
*** MDEV-7326 Server deadlock in connection with parallel replication ***
|
||||
include/stop_slave.inc
|
||||
SET GLOBAL slave_parallel_threads=0;
|
||||
SET GLOBAL slave_parallel_threads=3;
|
||||
SET GLOBAL debug_dbug="+d,rpl_parallel_simulate_temp_err_xid";
|
||||
include/start_slave.inc
|
||||
SET @old_format= @@SESSION.binlog_format;
|
||||
SET binlog_format= STATEMENT;
|
||||
INSERT INTO t1 VALUES (foo(60,
|
||||
"rpl_parallel_start_waiting_for_prior SIGNAL t3_ready",
|
||||
"rpl_parallel_end_of_group SIGNAL prep_ready WAIT_FOR prep_cont"));
|
||||
SET DEBUG_SYNC= "now WAIT_FOR prep_ready";
|
||||
INSERT INTO t2 VALUES (foo(60,
|
||||
"rpl_parallel_simulate_temp_err_xid SIGNAL t1_ready1 WAIT_FOR t1_cont1",
|
||||
"rpl_parallel_retry_after_unmark SIGNAL t1_ready2 WAIT_FOR t1_cont2"));
|
||||
SET DEBUG_SYNC= "now WAIT_FOR t1_ready1";
|
||||
SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued1 WAIT_FOR master_cont1';
|
||||
SET binlog_format=statement;
|
||||
INSERT INTO t1 VALUES (foo(61,
|
||||
"rpl_parallel_before_mark_start_commit SIGNAL t2_ready1 WAIT_FOR t2_cont1",
|
||||
"rpl_parallel_after_mark_start_commit SIGNAL t2_ready2"));
|
||||
SET debug_sync='now WAIT_FOR master_queued1';
|
||||
SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued2';
|
||||
INSERT INTO t6 VALUES (62);
|
||||
SET debug_sync='now WAIT_FOR master_queued2';
|
||||
SET debug_sync='now SIGNAL master_cont1';
|
||||
SET debug_sync='RESET';
|
||||
SET BINLOG_FORMAT= @old_format;
|
||||
SELECT * FROM t2 WHERE a>=60 ORDER BY a;
|
||||
a
|
||||
60
|
||||
SELECT * FROM t1 WHERE a>=60 ORDER BY a;
|
||||
a
|
||||
60
|
||||
61
|
||||
SELECT * FROM t6 WHERE a>=60 ORDER BY a;
|
||||
a
|
||||
62
|
||||
SET DEBUG_SYNC= "now WAIT_FOR t2_ready1";
|
||||
SET DEBUG_SYNC= "now SIGNAL t1_cont1";
|
||||
SET DEBUG_SYNC= "now WAIT_FOR t1_ready2";
|
||||
SET DEBUG_SYNC= "now SIGNAL prep_cont";
|
||||
SET DEBUG_SYNC= "now WAIT_FOR t3_ready";
|
||||
SET DEBUG_SYNC= "now SIGNAL t2_cont1";
|
||||
SET DEBUG_SYNC= "now WAIT_FOR t2_ready2";
|
||||
SET DEBUG_SYNC= "now SIGNAL t1_cont2";
|
||||
SELECT * FROM t2 WHERE a>=60 ORDER BY a;
|
||||
a
|
||||
60
|
||||
SELECT * FROM t1 WHERE a>=60 ORDER BY a;
|
||||
a
|
||||
60
|
||||
61
|
||||
SELECT * FROM t6 WHERE a>=60 ORDER BY a;
|
||||
a
|
||||
62
|
||||
SET DEBUG_SYNC="reset";
|
||||
include/stop_slave.inc
|
||||
SET GLOBAL debug_dbug=@old_dbug;
|
||||
SET GLOBAL slave_parallel_threads=0;
|
||||
SET GLOBAL slave_parallel_threads=10;
|
||||
include/start_slave.inc
|
||||
include/stop_slave.inc
|
||||
SET GLOBAL slave_parallel_threads=@old_parallel_threads;
|
||||
include/start_slave.inc
|
||||
|
Reference in New Issue
Block a user