mirror of
https://github.com/MariaDB/server.git
synced 2025-07-30 16:24:05 +03:00
Bug#11763573 - 56299: MUTEX DEADLOCK WITH COM_BINLOG_DUMP, BINLOG PURGE, AND PROCESSLIST/KILL
The bug case is similar to one fixed earlier bug_49536. Deadlock involving LOCK_log appears to be possible because the purge running thread is holding LOCK_log whereas there is no sense of doing that and which fact was exploited by the earlier bug fixes. Fixed with small reengineering of rotate_and_purge(), adding two new methods and setting up a policy to execute those instead of the former rotate_and_purge(RP_LOCK_LOG_IS_ALREADY_LOCKED). The policy for using rotate(), purge() is that if the caller acquires LOCK_log itself, it should call rotate(), release the mutex and run purge(). Side effect of this patch is refining error message of bug@11747416 to print the whole path. mysql-test/suite/rpl/r/rpl_cant_read_event_incident.result: the file name printing is changed to a relative path instead of just the file name. mysql-test/suite/rpl/r/rpl_log_pos.result: the file name printing is changed to a relative path instead of just the file name. mysql-test/suite/rpl/r/rpl_manual_change_index_file.result: the file name printing is changed to a relative path instead of just the file name. mysql-test/suite/rpl/r/rpl_packet.result: the file name printing is changed to a relative path instead of just the file name. mysql-test/suite/rpl/r/rpl_rotate_purge_deadlock.result: new result file is added. mysql-test/suite/rpl/t/rpl_cant_read_event_incident.test: The test of that bug can't satisfy windows and unix backslash interpretation so windows execution is chosen to bypass. mysql-test/suite/rpl/t/rpl_rotate_purge_deadlock-master.opt: new opt file is added. mysql-test/suite/rpl/t/rpl_rotate_purge_deadlock.test: regression test is added as well as verification of a possible side effect of the fixes is tried. sql/log.cc: LOCK_log is never taken during execution of log purging routine. The former MYSQL_BIN_LOG::rotate_and_purge is made to necessarily acquiring and releasing LOCK_log. If caller takes the mutex itself it has to use a new rotate(), purge() methods combination and to never let purge() be run with LOCK_log grabbed. split apart to allow the caller to chose either it Simulation of concurrently rotating/purging threads is added. sql/log.h: new rotate(), purge() methods are added to be used instead of the former rotate_and_purge(RP_LOCK_LOG_IS_ALREADY_LOCKED). rotate_and_purge() signature is changed. Caller should not call rotate_and_purge() but rather {rotate(), purge()} if LOCK_log is acquired by it. sql/rpl_injector.cc: changes to reflect the new rotate_and_purge() signature. sql/sql_class.h: unnecessary constants are removed. sql/sql_parse.cc: changes to reflect the new rotate_and_purge() signature. sql/sql_reload.cc: changes to reflect the new rotate_and_purge() signature. sql/sql_repl.cc: followup for bug@11747416: the file name printing is changed to a relative path instead of just the file name.
This commit is contained in:
@ -11,7 +11,7 @@ reset slave;
|
||||
start slave;
|
||||
include/wait_for_slave_param.inc [Last_IO_Errno]
|
||||
Last_IO_Errno = '1236'
|
||||
Last_IO_Error = 'Got fatal error 1236 from master when reading data from binary log: 'binlog truncated in the middle of event; consider out of disk space on master; the last event was read from 'master-bin.000001' at 316, the last byte read was read from 'master-bin.000001' at 335.''
|
||||
Last_IO_Error = 'Got fatal error 1236 from master when reading data from binary log: 'binlog truncated in the middle of event; consider out of disk space on master; the last event was read from './master-bin.000001' at 316, the last byte read was read from './master-bin.000001' at 335.''
|
||||
reset master;
|
||||
stop slave;
|
||||
reset slave;
|
||||
|
@ -9,7 +9,7 @@ change master to master_log_pos=MASTER_LOG_POS;
|
||||
Read_Master_Log_Pos = '75'
|
||||
start slave;
|
||||
include/wait_for_slave_io_error.inc [errno=1236]
|
||||
Last_IO_Error = 'Got fatal error 1236 from master when reading data from binary log: 'log event entry exceeded max_allowed_packet; Increase max_allowed_packet on master; the last event was read from 'master-bin.000001' at 75, the last byte read was read from 'master-bin.000001' at 94.''
|
||||
Last_IO_Error = 'Got fatal error 1236 from master when reading data from binary log: 'log event entry exceeded max_allowed_packet; Increase max_allowed_packet on master; the last event was read from './master-bin.000001' at 75, the last byte read was read from './master-bin.000001' at 94.''
|
||||
include/stop_slave_sql.inc
|
||||
show master status;
|
||||
File Position Binlog_Do_DB Binlog_Ignore_DB
|
||||
|
@ -5,7 +5,7 @@ CREATE TABLE t1(c1 INT);
|
||||
FLUSH LOGS;
|
||||
call mtr.add_suppression('Got fatal error 1236 from master when reading data from binary log: .*could not find next log');
|
||||
include/wait_for_slave_io_error.inc [errno=1236]
|
||||
Last_IO_Error = 'Got fatal error 1236 from master when reading data from binary log: 'could not find next log; the last event was read from 'master-bin.000002' at 237, the last byte read was read from 'master-bin.000002' at 237.''
|
||||
Last_IO_Error = 'Got fatal error 1236 from master when reading data from binary log: 'could not find next log; the last event was read from './master-bin.000002' at 237, the last byte read was read from './master-bin.000002' at 237.''
|
||||
CREATE TABLE t2(c1 INT);
|
||||
FLUSH LOGS;
|
||||
CREATE TABLE t3(c1 INT);
|
||||
|
@ -37,7 +37,7 @@ DROP TABLE t1;
|
||||
CREATE TABLE t1 (f1 int PRIMARY KEY, f2 LONGTEXT, f3 LONGTEXT) ENGINE=MyISAM;
|
||||
INSERT INTO t1(f1, f2, f3) VALUES(1, REPEAT('a', @@global.max_allowed_packet), REPEAT('b', @@global.max_allowed_packet));
|
||||
include/wait_for_slave_io_error.inc [errno=1236]
|
||||
Last_IO_Error = 'Got fatal error 1236 from master when reading data from binary log: 'log event entry exceeded max_allowed_packet; Increase max_allowed_packet on master; the last event was read from 'master-bin.000001' at 463, the last byte read was read from 'master-bin.000001' at 482.''
|
||||
Last_IO_Error = 'Got fatal error 1236 from master when reading data from binary log: 'log event entry exceeded max_allowed_packet; Increase max_allowed_packet on master; the last event was read from './master-bin.000001' at 463, the last byte read was read from './master-bin.000001' at 482.''
|
||||
STOP SLAVE;
|
||||
RESET SLAVE;
|
||||
RESET MASTER;
|
||||
|
30
mysql-test/suite/rpl/r/rpl_rotate_purge_deadlock.result
Normal file
30
mysql-test/suite/rpl/r/rpl_rotate_purge_deadlock.result
Normal file
@ -0,0 +1,30 @@
|
||||
include/master-slave.inc
|
||||
[connection master]
|
||||
show binary logs;
|
||||
Log_name File_size
|
||||
master-bin.000001 #
|
||||
create table t1 (f text) engine=innodb;
|
||||
SET DEBUG_SYNC = 'at_purge_logs_before_date WAIT_FOR rotated';
|
||||
insert into t1 set f=repeat('a', 4096);
|
||||
*** there must be two logs in the list ***
|
||||
show binary logs;
|
||||
Log_name File_size
|
||||
master-bin.000001 #
|
||||
master-bin.000002 #
|
||||
insert into t1 set f=repeat('b', 4096);
|
||||
*** there must be three logs in the list ***
|
||||
show binary logs;
|
||||
Log_name File_size
|
||||
master-bin.000001 #
|
||||
master-bin.000002 #
|
||||
master-bin.000003 #
|
||||
SET DEBUG_SYNC = 'now SIGNAL rotated';
|
||||
SET DEBUG_SYNC = 'RESET';
|
||||
SET DEBUG_SYNC = 'RESET';
|
||||
SET DEBUG_SYNC = 'at_purge_logs_before_date WAIT_FOR rotated';
|
||||
insert into t1 set f=repeat('b', 4096);
|
||||
SET DEBUG_SYNC = 'now SIGNAL rotated';
|
||||
SET DEBUG_SYNC = 'RESET';
|
||||
SET DEBUG_SYNC = 'RESET';
|
||||
drop table t1;
|
||||
include/rpl_end.inc
|
@ -14,6 +14,11 @@
|
||||
|
||||
--source include/master-slave.inc
|
||||
--source include/have_binlog_format_mixed.inc
|
||||
#
|
||||
# Bug#13050593 swallows `\' from Last_IO_Error
|
||||
# todo: uncomment the filter once the bug is fixed.
|
||||
#
|
||||
--source include/not_windows.inc
|
||||
|
||||
call mtr.add_suppression("Error in Log_event::read_log_event()");
|
||||
|
||||
|
@ -0,0 +1 @@
|
||||
--max-binlog-size=4k --expire-logs-days=1
|
92
mysql-test/suite/rpl/t/rpl_rotate_purge_deadlock.test
Normal file
92
mysql-test/suite/rpl/t/rpl_rotate_purge_deadlock.test
Normal file
@ -0,0 +1,92 @@
|
||||
#
|
||||
# Bug#11763573 - 56299: MUTEX DEADLOCK WITH COM_BINLOG_DUMP, BINLOG PURGE, AND PROCESSLIST/KILL
|
||||
#
|
||||
source include/master-slave.inc;
|
||||
source include/have_debug_sync.inc;
|
||||
source include/have_binlog_format_row.inc;
|
||||
source include/have_innodb.inc;
|
||||
|
||||
#
|
||||
# Testing that execution of two concurrent INSERTing connections both
|
||||
# triggering the binlog rotation is correct even though their execution
|
||||
# is interleaved.
|
||||
# The test makes the first connection to complete the rotation part
|
||||
# and yields control to the second connection that rotates as well and
|
||||
# gets first on purging. And the fact of interleaving does not create
|
||||
# any issue.
|
||||
#
|
||||
|
||||
connection master;
|
||||
source include/show_binary_logs.inc;
|
||||
create table t1 (f text) engine=innodb;
|
||||
SET DEBUG_SYNC = 'at_purge_logs_before_date WAIT_FOR rotated';
|
||||
send insert into t1 set f=repeat('a', 4096);
|
||||
|
||||
connection master1;
|
||||
|
||||
let $wait_condition=
|
||||
SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST
|
||||
WHERE STATE like "debug sync point: at_purge_logs_before_date%";
|
||||
--source include/wait_condition.inc
|
||||
|
||||
--echo *** there must be two logs in the list ***
|
||||
source include/show_binary_logs.inc;
|
||||
|
||||
insert into t1 set f=repeat('b', 4096);
|
||||
|
||||
--echo *** there must be three logs in the list ***
|
||||
source include/show_binary_logs.inc;
|
||||
|
||||
SET DEBUG_SYNC = 'now SIGNAL rotated';
|
||||
SET DEBUG_SYNC = 'RESET';
|
||||
|
||||
# the first connection finally completes its INSERT
|
||||
connection master;
|
||||
reap;
|
||||
SET DEBUG_SYNC = 'RESET';
|
||||
|
||||
sync_slave_with_master;
|
||||
|
||||
|
||||
#
|
||||
# Testing the reported deadlock involving DUMP, KILL and INSERT threads
|
||||
#
|
||||
|
||||
connection master;
|
||||
SET DEBUG_SYNC = 'at_purge_logs_before_date WAIT_FOR rotated';
|
||||
send insert into t1 set f=repeat('b', 4096);
|
||||
|
||||
connection master1;
|
||||
|
||||
# make sure INSERT reaches waiting point
|
||||
let $wait_condition=
|
||||
SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST
|
||||
WHERE STATE like "debug sync point: at_purge_logs_before_date%";
|
||||
--source include/wait_condition.inc
|
||||
|
||||
# find and kill DUMP thread
|
||||
let $_tid= `select id from information_schema.processlist where command = 'Binlog Dump' limit 1`;
|
||||
--disable_query_log
|
||||
eval kill query $_tid;
|
||||
--enable_query_log
|
||||
|
||||
#
|
||||
# Now the proof is that the new DUMP thread has executed
|
||||
# a critical section of the deadlock without any regression and is UP
|
||||
#
|
||||
let $wait_condition=
|
||||
SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST
|
||||
WHERE command = 'Binlog Dump' and STATE like "Master has sent all binlog to slave%";
|
||||
--source include/wait_condition.inc
|
||||
|
||||
SET DEBUG_SYNC = 'now SIGNAL rotated';
|
||||
SET DEBUG_SYNC = 'RESET';
|
||||
|
||||
connection master;
|
||||
reap;
|
||||
SET DEBUG_SYNC = 'RESET';
|
||||
drop table t1;
|
||||
|
||||
sync_slave_with_master;
|
||||
|
||||
--source include/rpl_end.inc
|
Reference in New Issue
Block a user