mirror of
https://github.com/MariaDB/server.git
synced 2025-09-11 05:52:26 +03:00
AKA rpl.rpl_parallel, binlog_encryption.rpl_parallel fails in buildbot with timeout in include A replication parallel worker thread can deadlock with another connection running SHOW SLAVE STATUS. That is, if the replication worker thread is in do_gco_wait() and is killed, it will already hold the LOCK_parallel_entry, and during error reporting, try to grab the err_lock. SHOW SLAVE STATUS, however, grabs these locks in reverse order. It will initially grab the err_lock, and then try to grab LOCK_parallel_entry. This leads to a deadlock when both threads have grabbed their first lock without the second. This patch implements the MDEV-31894 proposed fix to optimize the workers_idle() check to compare the last in-use relay log’s queued_count==dequeued_count for idleness. This removes the need for workers_idle() to grab LOCK_parallel_entry, as these values are atomically updated. Huge thanks to Kristian Nielsen for diagnosing the problem! Reviewed By: ============ Kristian Nielsen <knielsen@knielsen-hq.org> Andrei Elkin <andrei.elkin@mariadb.com>
67 lines
2.3 KiB
Plaintext
67 lines
2.3 KiB
Plaintext
include/master-slave.inc
|
|
[connection master]
|
|
#
|
|
# Initialize test data
|
|
connection master;
|
|
create table t1 (a int) engine=innodb;
|
|
insert into t1 values (1);
|
|
include/save_master_gtid.inc
|
|
connection slave;
|
|
include/sync_with_master_gtid.inc
|
|
include/stop_slave.inc
|
|
call mtr.add_suppression("Connection was killed");
|
|
call mtr.add_suppression("Commit failed due to failure of an earlier commit on which this one depends");
|
|
set @save_parallel_threads= @@global.slave_parallel_threads;
|
|
set @save_parallel_mode= @@global.slave_parallel_mode;
|
|
set @save_transaction_retries= @@global.slave_transaction_retries;
|
|
set @save_innodb_lock_wait_timeout= @@global.innodb_lock_wait_timeout;
|
|
set @@global.slave_parallel_threads= 2;
|
|
set @@global.slave_parallel_mode= CONSERVATIVE;
|
|
set @@global.slave_transaction_retries= 0;
|
|
set @@global.innodb_lock_wait_timeout= 10;
|
|
# Grabbing lock on innodb row to force future replication transaction to wait (and eventually timeout)
|
|
BEGIN;
|
|
select * from t1 where a=1 for update;
|
|
a
|
|
1
|
|
connection master;
|
|
set @old_dbug= @@session.debug_dbug;
|
|
set @@session.debug_dbug="+d,binlog_force_commit_id";
|
|
SET @commit_id= 10000;
|
|
update t1 set a=2 where a=1;
|
|
SET @commit_id= 10001;
|
|
insert into t1 values (3);
|
|
set @@session.debug_dbug= @old_dbug;
|
|
connection slave;
|
|
start slave;
|
|
# Waiting for first transaction to start (and be held at innodb row lock)..
|
|
# Waiting for next transaction to start and hold at do_gco_wait()..
|
|
connection slave1;
|
|
set @@session.debug_dbug="+d,hold_sss_with_err_lock";
|
|
show slave status;
|
|
connection slave;
|
|
set debug_sync="now wait_for sss_got_err_lock";
|
|
kill <TID of worker in do_gco_wait>;
|
|
set debug_sync="now signal sss_continue";
|
|
connection slave1;
|
|
# Waiting for SHOW SLAVE STATUS to complete..
|
|
# ..done
|
|
connection slave;
|
|
ROLLBACK;
|
|
include/wait_for_slave_sql_error.inc [errno=1927]
|
|
#
|
|
# Cleanup
|
|
connection master;
|
|
drop table t1;
|
|
include/save_master_gtid.inc
|
|
connection slave;
|
|
set debug_sync= "RESET";
|
|
set @@global.slave_parallel_threads= @save_parallel_threads;
|
|
set @@global.slave_parallel_mode= @save_parallel_mode;
|
|
set @@global.slave_transaction_retries= @save_transaction_retries;
|
|
set @@global.innodb_lock_wait_timeout= @save_innodb_lock_wait_timeout;
|
|
start slave sql_thread;
|
|
include/sync_with_master_gtid.inc
|
|
include/rpl_end.inc
|
|
# End of rpl_deadlock_show_slave_status.test
|