mirror of
https://github.com/MariaDB/server.git
synced 2025-07-29 05:21:33 +03:00
Bug#58546 test rpl_packet timeout failure sporadically on PB
rpl_packet got a timeout failure sporadically on PB when stopping slave. The real reason of this bug is that STOP SLAVE stopped IO thread first and then stopped SQL thread. It was possible that IO thread stopped after replicating part of a transaction which SQL thread was executing. SQL thread would be hung if the transaction could not be rolled back safely. After this patch, STOP SLAVE will stop SQL thread first and then stop IO thread, which guarantees that IO thread will fetch the reset of the events of the transaction that SQL thread is executing, so that SQL thread can finish the transaction if it cannot be rolled back safely. Added below auxiliary files to make the test code neater. restart_slave_sql.inc rpl_connection_master.inc rpl_connection_slave.inc rpl_connection_slave1.inc
This commit is contained in:
43
mysql-test/include/restart_slave_sql.inc
Normal file
43
mysql-test/include/restart_slave_sql.inc
Normal file
@ -0,0 +1,43 @@
|
||||
# ==== Purpose ====
|
||||
#
|
||||
# Provide a earier way to restart SQL thread when you want to stop sql thread
|
||||
# and then start it immediately.
|
||||
#
|
||||
# Sources stop_slave_sql.inc to stop SQL thread on the current connection.
|
||||
# Then issues START SLAVE SQL_THREAD and then waits until
|
||||
# the SQL threads have started, or until a timeout is reached.
|
||||
#
|
||||
# Please use this instead of 'STOP|START SLAVE SQL_THREAD', to reduce the risk of
|
||||
# test case bugs.
|
||||
#
|
||||
#
|
||||
# ==== Usage ====
|
||||
#
|
||||
# [--let $slave_timeout= NUMBER]
|
||||
# [--let $rpl_debug= 1]
|
||||
# --source include/restart_slave_sql.inc
|
||||
#
|
||||
# Parameters:
|
||||
# $slave_timeout
|
||||
# See include/wait_for_slave_param.inc
|
||||
#
|
||||
# $rpl_debug
|
||||
# See include/rpl_init.inc
|
||||
|
||||
|
||||
--let $include_filename= restart_slave.inc
|
||||
--source include/begin_include_file.inc
|
||||
|
||||
|
||||
if (!$rpl_debug)
|
||||
{
|
||||
--disable_query_log
|
||||
}
|
||||
|
||||
source include/stop_slave_sql.inc;
|
||||
START SLAVE SQL_THREAD;
|
||||
source include/wait_for_slave_sql_to_start.inc;
|
||||
|
||||
|
||||
--let $include_filename= restart_slave.inc
|
||||
--source include/end_include_file.inc
|
2
mysql-test/include/rpl_connection_master.inc
Normal file
2
mysql-test/include/rpl_connection_master.inc
Normal file
@ -0,0 +1,2 @@
|
||||
let $rpl_connection_name= master;
|
||||
source include/rpl_connection.inc;
|
2
mysql-test/include/rpl_connection_slave.inc
Normal file
2
mysql-test/include/rpl_connection_slave.inc
Normal file
@ -0,0 +1,2 @@
|
||||
let $rpl_connection_name= slave;
|
||||
source include/rpl_connection.inc;
|
2
mysql-test/include/rpl_connection_slave1.inc
Normal file
2
mysql-test/include/rpl_connection_slave1.inc
Normal file
@ -0,0 +1,2 @@
|
||||
let $rpl_connection_name= slave1;
|
||||
source include/rpl_connection.inc;
|
@ -128,5 +128,47 @@ START SLAVE SQL_THREAD;
|
||||
include/wait_for_slave_sql_to_start.inc
|
||||
# Test end
|
||||
SET GLOBAL debug= '$debug_save';
|
||||
include/restart_slave.inc
|
||||
[connection master]
|
||||
DROP TABLE t1, t2;
|
||||
|
||||
# Bug#58546 test rpl_packet timeout failure sporadically on PB
|
||||
# ----------------------------------------------------------------------
|
||||
# STOP SLAVE stopped IO thread first and then stopped SQL thread. It was
|
||||
# possible that IO thread stopped after replicating part of a transaction
|
||||
# which SQL thread was executing. SQL thread would be hung if the
|
||||
# transaction could not be rolled back safely.
|
||||
# It caused some sporadic failures on PB2.
|
||||
#
|
||||
# This test verifies that when 'STOP SLAVE' is issued by a user, IO
|
||||
# thread will continue to fetch the rest events of the transaction which
|
||||
# is being executed by SQL thread and is not able to be rolled back safely.
|
||||
CREATE TABLE t1 (c1 INT KEY, c2 INT) ENGINE=InnoDB;
|
||||
CREATE TABLE t2 (c1 INT) ENGINE=MyISAM;
|
||||
INSERT INTO t1 VALUES(1, 1);
|
||||
SET GLOBAL debug= 'd,dump_thread_wait_before_send_xid';
|
||||
[connection slave]
|
||||
include/restart_slave.inc
|
||||
BEGIN;
|
||||
UPDATE t1 SET c2 = 2 WHERE c1 = 1;
|
||||
[connection master]
|
||||
BEGIN;
|
||||
INSERT INTO t1 VALUES(2, 2);
|
||||
INSERT INTO t2 VALUES(1);
|
||||
UPDATE t1 SET c2 = 3 WHERE c1 = 1;
|
||||
COMMIT;
|
||||
[connection slave1]
|
||||
STOP SLAVE;
|
||||
[connection slave]
|
||||
ROLLBACK;
|
||||
[connection master]
|
||||
SET DEBUG_SYNC= 'now SIGNAL signal.continue';
|
||||
SET DEBUG_SYNC= 'RESET';
|
||||
[connection slave]
|
||||
include/wait_for_slave_to_stop.inc
|
||||
[connection slave1]
|
||||
include/start_slave.inc
|
||||
[connection master]
|
||||
DROP TABLE t1, t2;
|
||||
SET GLOBAL debug= $debug_save;
|
||||
include/rpl_end.inc
|
||||
|
@ -26,8 +26,8 @@ let $old_net_buffer_length= `SELECT @@global.net_buffer_length`;
|
||||
SET @@global.max_allowed_packet=1024;
|
||||
SET @@global.net_buffer_length=1024;
|
||||
|
||||
sync_slave_with_master;
|
||||
# Restart slave for setting to take effect
|
||||
connection slave;
|
||||
source include/stop_slave.inc;
|
||||
source include/start_slave.inc;
|
||||
|
||||
|
@ -54,7 +54,69 @@ source extra/rpl_tests/rpl_stop_slave.test;
|
||||
|
||||
--echo # Test end
|
||||
SET GLOBAL debug= '$debug_save';
|
||||
source include/restart_slave_sql.inc;
|
||||
|
||||
connection master;
|
||||
--source include/rpl_connection_master.inc
|
||||
DROP TABLE t1, t2;
|
||||
|
||||
--echo
|
||||
--echo # Bug#58546 test rpl_packet timeout failure sporadically on PB
|
||||
--echo # ----------------------------------------------------------------------
|
||||
--echo # STOP SLAVE stopped IO thread first and then stopped SQL thread. It was
|
||||
--echo # possible that IO thread stopped after replicating part of a transaction
|
||||
--echo # which SQL thread was executing. SQL thread would be hung if the
|
||||
--echo # transaction could not be rolled back safely.
|
||||
--echo # It caused some sporadic failures on PB2.
|
||||
--echo #
|
||||
--echo # This test verifies that when 'STOP SLAVE' is issued by a user, IO
|
||||
--echo # thread will continue to fetch the rest events of the transaction which
|
||||
--echo # is being executed by SQL thread and is not able to be rolled back safely.
|
||||
|
||||
CREATE TABLE t1 (c1 INT KEY, c2 INT) ENGINE=InnoDB;
|
||||
CREATE TABLE t2 (c1 INT) ENGINE=MyISAM;
|
||||
INSERT INTO t1 VALUES(1, 1);
|
||||
|
||||
let $debug_save= `SELECT @@GLOBAL.debug`;
|
||||
SET GLOBAL debug= 'd,dump_thread_wait_before_send_xid';
|
||||
|
||||
sync_slave_with_master;
|
||||
|
||||
--source include/rpl_connection_slave.inc
|
||||
source include/restart_slave_sql.inc;
|
||||
|
||||
BEGIN;
|
||||
UPDATE t1 SET c2 = 2 WHERE c1 = 1;
|
||||
|
||||
--source include/rpl_connection_master.inc
|
||||
BEGIN;
|
||||
INSERT INTO t1 VALUES(2, 2);
|
||||
INSERT INTO t2 VALUES(1);
|
||||
UPDATE t1 SET c2 = 3 WHERE c1 = 1;
|
||||
COMMIT;
|
||||
|
||||
--source include/rpl_connection_slave1.inc
|
||||
let $show_statement= SHOW PROCESSLIST;
|
||||
let $field= Info;
|
||||
let $condition= = 'UPDATE t1 SET c2 = 3 WHERE c1 = 1';
|
||||
source include/wait_show_condition.inc;
|
||||
|
||||
send STOP SLAVE;
|
||||
|
||||
--source include/rpl_connection_slave.inc
|
||||
ROLLBACK;
|
||||
|
||||
--source include/rpl_connection_master.inc
|
||||
SET DEBUG_SYNC= 'now SIGNAL signal.continue';
|
||||
SET DEBUG_SYNC= 'RESET';
|
||||
|
||||
--source include/rpl_connection_slave.inc
|
||||
source include/wait_for_slave_to_stop.inc;
|
||||
|
||||
--source include/rpl_connection_slave1.inc
|
||||
reap;
|
||||
source include/start_slave.inc;
|
||||
|
||||
--source include/rpl_connection_master.inc
|
||||
DROP TABLE t1, t2;
|
||||
SET GLOBAL debug= $debug_save;
|
||||
--source include/rpl_end.inc
|
||||
|
22
sql/slave.cc
22
sql/slave.cc
@ -408,17 +408,6 @@ int terminate_slave_threads(Master_info* mi,int thread_mask,bool skip_lock)
|
||||
int error,force_all = (thread_mask & SLAVE_FORCE_ALL);
|
||||
pthread_mutex_t *sql_lock = &mi->rli.run_lock, *io_lock = &mi->run_lock;
|
||||
|
||||
if (thread_mask & (SLAVE_IO|SLAVE_FORCE_ALL))
|
||||
{
|
||||
DBUG_PRINT("info",("Terminating IO thread"));
|
||||
mi->abort_slave=1;
|
||||
if ((error=terminate_slave_thread(mi->io_thd, io_lock,
|
||||
&mi->stop_cond,
|
||||
&mi->slave_running,
|
||||
skip_lock)) &&
|
||||
!force_all)
|
||||
DBUG_RETURN(error);
|
||||
}
|
||||
if (thread_mask & (SLAVE_SQL|SLAVE_FORCE_ALL))
|
||||
{
|
||||
DBUG_PRINT("info",("Terminating SQL thread"));
|
||||
@ -430,6 +419,17 @@ int terminate_slave_threads(Master_info* mi,int thread_mask,bool skip_lock)
|
||||
!force_all)
|
||||
DBUG_RETURN(error);
|
||||
}
|
||||
if (thread_mask & (SLAVE_IO|SLAVE_FORCE_ALL))
|
||||
{
|
||||
DBUG_PRINT("info",("Terminating IO thread"));
|
||||
mi->abort_slave=1;
|
||||
if ((error=terminate_slave_thread(mi->io_thd, io_lock,
|
||||
&mi->stop_cond,
|
||||
&mi->slave_running,
|
||||
skip_lock)) &&
|
||||
!force_all)
|
||||
DBUG_RETURN(error);
|
||||
}
|
||||
DBUG_RETURN(0);
|
||||
}
|
||||
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include "log_event.h"
|
||||
#include "rpl_filter.h"
|
||||
#include <my_dir.h>
|
||||
#include "debug_sync.h"
|
||||
|
||||
int max_binlog_dump_events = 0; // unlimited
|
||||
my_bool opt_sporadic_binlog_dump_fail = 0;
|
||||
@ -556,6 +557,20 @@ impossible position";
|
||||
}
|
||||
#endif
|
||||
|
||||
DBUG_EXECUTE_IF("dump_thread_wait_before_send_xid",
|
||||
{
|
||||
if ((*packet)[EVENT_TYPE_OFFSET+1] == XID_EVENT)
|
||||
{
|
||||
net_flush(net);
|
||||
const char act[]=
|
||||
"now "
|
||||
"wait_for signal.continue";
|
||||
DBUG_ASSERT(opt_debug_sync_timeout > 0);
|
||||
DBUG_ASSERT(!debug_sync_set_action(current_thd,
|
||||
STRING_WITH_LEN(act)));
|
||||
}
|
||||
});
|
||||
|
||||
if ((*packet)[EVENT_TYPE_OFFSET+1] == FORMAT_DESCRIPTION_EVENT)
|
||||
{
|
||||
binlog_can_be_corrupted= test((*packet)[FLAGS_OFFSET+1] &
|
||||
@ -572,6 +587,14 @@ impossible position";
|
||||
goto err;
|
||||
}
|
||||
|
||||
DBUG_EXECUTE_IF("dump_thread_wait_before_send_xid",
|
||||
{
|
||||
if ((*packet)[EVENT_TYPE_OFFSET+1] == XID_EVENT)
|
||||
{
|
||||
net_flush(net);
|
||||
}
|
||||
});
|
||||
|
||||
DBUG_PRINT("info", ("log event code %d",
|
||||
(*packet)[LOG_EVENT_OFFSET+1] ));
|
||||
if ((*packet)[LOG_EVENT_OFFSET+1] == LOAD_EVENT)
|
||||
|
Reference in New Issue
Block a user