1
0
mirror of https://github.com/MariaDB/server.git synced 2025-07-29 05:21:33 +03:00

MDEV-32168: slave_error_param condition is never checked from the wait_for_slave_param.inc

Fix some random test failures following MDEV-32168 push.

Don't blindly set $rpl_only_running_threads in many places. Instead explicit
stop only the IO or SQL thread, as appropriate. Setting it interfered with
rpl_end.inc in some cases. Rather than clearing it afterwards, better to
not set it at all when it is not needed, removing ambiguity in the test
about the state of the replication threads.

Don't fail the test if include/stop_slave_io.inc finds an error in the IO
thread after stop. Such errors can be simply because slave stop happened in
the middle of the IO thread's initial communication with the master.

Signed-off-by: Kristian Nielsen <knielsen@knielsen-hq.org>
This commit is contained in:
Kristian Nielsen
2023-11-20 21:59:55 +01:00
parent 1ffa8c5072
commit ea4bcb9d98
37 changed files with 66 additions and 72 deletions

View File

@ -28,6 +28,9 @@
# there is an error in the IO thread.
# (If an error is _always_ expected, a better alternative might be to
# use wait_for_slave_io_error.inc instead of this file).
# Note: This is currently always enabled, since a simple STOP SLAVE
# IO_THREAD can cause an error if it interrupts the slave's initial
# communication with the master (MDEV-32892).
#
# $rpl_debug
# See include/rpl_init.inc
@ -39,12 +42,15 @@
--let $slave_param= Slave_IO_Running
--let $slave_param_value= No
if (!$rpl_allow_error)
{
--let $slave_error_param= Last_IO_Errno
}
--let $_io_stop_save_allow_error= $slave_error_param
# Disabled, as IO errors are left behind when a normal STOP SLAVE interrupts
# the initial communication between the IO thread and the master (MDEV-32892).
#if (!$rpl_allow_error)
#{
# --let $slave_error_param= Last_IO_Errno
#}
--source include/wait_for_slave_param.inc
--let $slave_error_param=
--let $slave_error_param= $_io_stop_save_allow_error
--let $include_filename= wait_for_slave_io_to_stop.inc

View File

@ -63,7 +63,7 @@ include/wait_for_slave_io_error.inc [errno=1236]
SHOW TABLES;
Tables_in_test
table1_no_encryption
include/stop_slave.inc
include/stop_slave_sql.inc
reset slave;
##########
# Cleanup

View File

@ -137,8 +137,7 @@ SHOW TABLES;
--disable_connect_log
# IO thread is stopped, stop SQL thread only
--let $rpl_only_running_threads= 1
--source include/stop_slave.inc
--source include/stop_slave_sql.inc
--enable_connect_log
reset slave;

View File

@ -62,7 +62,7 @@ include/wait_for_slave_io_error.inc [errno=1236]
# ..success
SHOW TABLES;
Tables_in_test
include/stop_slave.inc
include/stop_slave_sql.inc
reset slave;
##########
# Cleanup

View File

@ -133,8 +133,7 @@ SHOW TABLES;
--disable_connect_log
# IO thread is stopped, wait for SQL thread to be stopped
--let $rpl_only_running_threads= 1
--source include/stop_slave.inc
--source include/stop_slave_sql.inc
--enable_connect_log
reset slave;

View File

@ -192,7 +192,7 @@ SET GLOBAL max_binlog_cache_size= ORIGINAL_VALUE;
SET GLOBAL binlog_cache_size= ORIGINAL_VALUE;
SET GLOBAL max_binlog_stmt_cache_size= ORIGINAL_VALUE;
SET GLOBAL binlog_stmt_cache_size= ORIGINAL_VALUE;
include/stop_slave.inc
include/stop_slave_io.inc
include/start_slave.inc
connection master;
connection slave;

View File

@ -36,7 +36,7 @@ connection con_temp2;
COMMIT;
connection server_2;
include/wait_for_slave_sql_error.inc [errno=1062]
include/stop_slave.inc
include/stop_slave_io.inc
include/assert.inc [table t1 should have zero rows where a>32]
SELECT * FROM t1 WHERE a>32;
a

View File

@ -99,7 +99,7 @@ include/wait_for_slave_to_stop.inc
set default_master_connection = 'slave2';
include/wait_for_slave_sql_error.inc [errno=1942]
STOP SLAVE;
include/stop_slave.inc
include/wait_for_slave_io_to_stop.inc
set default_master_connection = 'slave1';
START SLAVE;
include/wait_for_slave_to_start.inc

View File

@ -121,8 +121,7 @@ set default_master_connection = 'slave2';
--let $slave_sql_errno= 1942
--source include/wait_for_slave_sql_error.inc
STOP SLAVE;
--let $rpl_only_running_threads= 1
--source include/stop_slave.inc
--source include/wait_for_slave_io_to_stop.inc
set default_master_connection = 'slave1';
START SLAVE;
--source include/wait_for_slave_to_start.inc

View File

@ -36,7 +36,8 @@ insert into t1 values (3);
--connection slave
--source include/start_slave.inc
--let $wait_condition= SELECT count(*)=1 FROM information_schema.processlist WHERE state LIKE 'Update_rows_log_event::find_row(-1)' and command LIKE 'Slave_worker';
# Wildcard for `state` as it depends on whether WSREP is compiled in or not.
--let $wait_condition= SELECT count(*)=1 FROM information_schema.processlist WHERE state LIKE 'Update_rows_log_event::find_row(%)' and command LIKE 'Slave_worker';
--source include/wait_condition.inc
--let $wait_condition= SELECT count(*)=1 FROM information_schema.processlist WHERE state LIKE 'Waiting for prior transaction to commit%' and command LIKE 'Slave_worker';
--source include/wait_condition.inc

View File

@ -55,7 +55,8 @@ drop table t2;
--source include/start_slave.inc
--echo # wait for T1
--let $wait_condition= SELECT count(*)=1 FROM information_schema.processlist WHERE state LIKE 'Update_rows_log_event::find_row(-1)' and command LIKE 'Slave_worker';
# Wildcard for `state` as it depends on whether WSREP is compiled in or not.
--let $wait_condition= SELECT count(*)=1 FROM information_schema.processlist WHERE state LIKE 'Update_rows_log_event::find_row(%)' and command LIKE 'Slave_worker';
--source include/wait_condition.inc
--echo # wait for T2

View File

@ -433,8 +433,7 @@ source include/show_binlog_events.inc;
--eval SET GLOBAL binlog_stmt_cache_size= $old_binlog_stmt_cache_size
# SQL slave is stopped, stop only IO thread
--let $rpl_only_running_threads= 1
source include/stop_slave.inc;
source include/stop_slave_io.inc;
source include/start_slave.inc;
connection master;

View File

@ -207,8 +207,7 @@ START SLAVE;
--let $status_items= Last_IO_Errno, Last_IO_Error
--source include/show_slave_status.inc
--let $rpl_only_running_threads= 1
--source include/stop_slave.inc
--source include/stop_slave_sql.inc
RESET SLAVE;
--connection master

View File

@ -42,7 +42,8 @@ include/save_master_gtid.inc
connection slave;
#
# Cleanup
include/stop_slave.inc
include/wait_for_slave_sql_to_stop.inc
include/stop_slave_io.inc
set @@global.slave_parallel_threads= 0;
set @@global.slave_parallel_mode= conservative;
set @@global.innodb_lock_wait_timeout= 50;

View File

@ -6,7 +6,7 @@ include/stop_slave.inc
CHANGE MASTER TO MASTER_USER= '', MASTER_PASSWORD= '';
START SLAVE;
include/wait_for_slave_io_error.inc [errno=1045, 1593]
include/stop_slave.inc
include/stop_slave_sql.inc
CHANGE MASTER TO MASTER_USER= 'root', MASTER_PASSWORD= '';
START SLAVE;
include/rpl_end.inc

View File

@ -135,7 +135,7 @@ i
2
3
SET @@global.debug_dbug=@saved_dbug;
include/stop_slave.inc
include/stop_slave_sql.inc
DO_DOMAIN_IDS (BEFORE) :
IGNORE_DOMAIN_IDS (BEFORE) :
CHANGE MASTER TO IGNORE_DOMAIN_IDS=(1), MASTER_USE_GTID=slave_pos;
@ -204,7 +204,7 @@ i
10
11
SET @@global.debug_dbug=@saved_dbug;
include/stop_slave.inc
include/stop_slave_sql.inc
DO_DOMAIN_IDS (BEFORE) :
IGNORE_DOMAIN_IDS (BEFORE) : 1
CHANGE MASTER TO IGNORE_DOMAIN_IDS=(), MASTER_USE_GTID=slave_pos;
@ -287,7 +287,7 @@ i
16
17
SET @@global.debug_dbug=@saved_dbug;
include/stop_slave.inc
include/stop_slave_sql.inc
DO_DOMAIN_IDS (BEFORE) :
IGNORE_DOMAIN_IDS (BEFORE) : 1
CHANGE MASTER TO IGNORE_DOMAIN_IDS=(), MASTER_USE_GTID=slave_pos;
@ -384,7 +384,7 @@ i
22
23
SET @@global.debug_dbug=@saved_dbug;
include/stop_slave.inc
include/stop_slave_sql.inc
DO_DOMAIN_IDS (BEFORE) :
IGNORE_DOMAIN_IDS (BEFORE) :
CHANGE MASTER TO IGNORE_DOMAIN_IDS=(1), MASTER_USE_GTID=slave_pos;

View File

@ -23,7 +23,8 @@ INSERT INTO t1 VALUES (2);
SET sql_log_bin=1;
START SLAVE;
include/wait_for_slave_sql_error.inc [errno=1062]
include/stop_slave.inc
include/wait_for_slave_io_to_start.inc
include/stop_slave_io.inc
SET GLOBAL gtid_slave_pos= "0-1-100";
include/start_slave.inc
SELECT * FROM t1 ORDER BY a;
@ -39,7 +40,7 @@ REPLACE INTO t1 VALUES (5);
SET debug_dbug= @dbug_save;
connection slave;
include/wait_for_slave_sql_error.inc [errno=1590]
include/stop_slave.inc
include/stop_slave_io.inc
SET sql_slave_skip_counter=1;
include/start_slave.inc
SELECT * FROM t1 ORDER BY a;

View File

@ -36,7 +36,7 @@ CHANGE MASTER TO master_host = '127.0.0.1', master_port = MASTER_PORT,
MASTER_USE_GTID=CURRENT_POS;
START SLAVE;
include/wait_for_slave_io_error.inc [errno=1236]
include/stop_slave.inc
include/stop_slave_sql.inc
CHANGE MASTER TO master_host = '127.0.0.1', master_port = MASTER_PORT,
MASTER_LOG_FILE="master-bin.000003", MASTER_LOG_POS=4;
include/start_slave.inc

View File

@ -226,7 +226,7 @@ connection slave;
call mtr.add_suppression("Slave SQL.*Duplicate entry .1. for key .PRIMARY.. on query.* error.* 1062");
call mtr.add_suppression("Slave SQL.*Request to stop slave SQL Thread received while applying a group that has non-transactional changes; waiting for completion of the group");
Heartbeat events are received while sql thread stopped (1 means 'yes'): 1
include/stop_slave.inc
include/stop_slave_io.inc
DROP TABLE t1;
*** Master send to slave ***

View File

@ -29,8 +29,7 @@ SELECT * FROM t1;
a b c
1 1 1
2 2 3
stop slave;
include/stop_slave.inc
include/stop_slave_io.inc
reset slave;
connection master;
reset master;
@ -189,8 +188,7 @@ SELECT * FROM t1;
a b c
1 1 1
2 2 3
stop slave;
include/stop_slave.inc
include/stop_slave_io.inc
reset slave;
connection master;
reset master;

View File

@ -192,7 +192,7 @@ SET GLOBAL max_binlog_cache_size= ORIGINAL_VALUE;
SET GLOBAL binlog_cache_size= ORIGINAL_VALUE;
SET GLOBAL max_binlog_stmt_cache_size= ORIGINAL_VALUE;
SET GLOBAL binlog_stmt_cache_size= ORIGINAL_VALUE;
include/stop_slave.inc
include/stop_slave_io.inc
include/start_slave.inc
connection master;
connection slave;

View File

@ -36,7 +36,7 @@ connection con_temp2;
COMMIT;
connection server_2;
include/wait_for_slave_sql_error.inc [errno=1062]
include/stop_slave.inc
include/stop_slave_io.inc
include/assert.inc [table t1 should have zero rows where a>32]
SELECT * FROM t1 WHERE a>32;
a

View File

@ -191,7 +191,7 @@ SET GLOBAL max_binlog_cache_size= ORIGINAL_VALUE;
SET GLOBAL binlog_cache_size= ORIGINAL_VALUE;
SET GLOBAL max_binlog_stmt_cache_size= ORIGINAL_VALUE;
SET GLOBAL binlog_stmt_cache_size= ORIGINAL_VALUE;
include/stop_slave.inc
include/stop_slave_io.inc
include/start_slave.inc
connection master;
connection slave;

View File

@ -14,7 +14,7 @@ connection master;
UPDATE t1_11753004, t2_11753004 SET t1_11753004.c1=3, t2_11753004.c1=4 WHERE t1_11753004.c1=1 OR t2_11753004.c1=2;
connection slave;
include/wait_for_slave_sql_error.inc [errno=1593 ]
include/stop_slave.inc
include/stop_slave_io.inc
SET @@global.debug_dbug=@saved_debug;
include/start_slave.inc
connection master;

View File

@ -192,7 +192,7 @@ SET GLOBAL max_binlog_cache_size= ORIGINAL_VALUE;
SET GLOBAL binlog_cache_size= ORIGINAL_VALUE;
SET GLOBAL max_binlog_stmt_cache_size= ORIGINAL_VALUE;
SET GLOBAL binlog_stmt_cache_size= ORIGINAL_VALUE;
include/stop_slave.inc
include/stop_slave_io.inc
include/start_slave.inc
connection master;
connection slave;

View File

@ -109,7 +109,7 @@ START SLAVE;
include/wait_for_slave_param.inc [Last_IO_Errno]
Last_IO_Errno = '1236'
Last_IO_Error = 'Got fatal error 1236 from master when reading data from binary log: 'Client requested master to start replication from impossible position; the first event 'master-bin.000001' at XXX, the last event read from 'master-bin.000001' at XXX, the last byte read from 'master-bin.000001' at XXX.''
include/stop_slave.inc
include/stop_slave_sql.inc
RESET SLAVE;
connection master;
RESET MASTER;

View File

@ -13,7 +13,7 @@ connection master;
insert into t1 values (1);
reset master;
connection slave;
include/stop_slave.inc
include/stop_slave_sql.inc
reset slave;
include/start_slave.inc
set global rpl_semi_sync_slave_enabled = OFF;

View File

@ -54,7 +54,8 @@ drop table t2;
--source include/start_slave.inc
--echo # wait for T1
--let $wait_condition= SELECT count(*)=1 FROM information_schema.processlist WHERE state LIKE 'Update_rows_log_event::find_row(-1)' and command LIKE 'Slave_worker';
# Wildcard for `state` as it depends on whether WSREP is compiled in or not.
--let $wait_condition= SELECT count(*)=1 FROM information_schema.processlist WHERE state LIKE 'Update_rows_log_event::find_row(%)' and command LIKE 'Slave_worker';
--source include/wait_condition.inc
--echo # wait for T2
@ -81,8 +82,9 @@ DROP TABLE t1;
--connection slave
--echo #
--echo # Cleanup
--let $rpl_only_running_threads= 1
--source include/stop_slave.inc
--let $rpl_allow_error= 1
--source include/wait_for_slave_sql_to_stop.inc
--source include/stop_slave_io.inc
eval set @@global.slave_parallel_threads= $save_slave_parallel_threads;
eval set @@global.slave_parallel_mode= $save_slave_parallel_mode;
eval set @@global.innodb_lock_wait_timeout= $save_innodb_lock_wait_timeout;

View File

@ -16,8 +16,7 @@ CHANGE MASTER TO MASTER_USER= '', MASTER_PASSWORD= '';
START SLAVE;
--let $slave_io_errno= 1045, 1593
--source include/wait_for_slave_io_error.inc
--let $rpl_only_running_threads= 1
--source include/stop_slave.inc
--source include/stop_slave_sql.inc
CHANGE MASTER TO MASTER_USER= 'root', MASTER_PASSWORD= '';
START SLAVE;

View File

@ -149,8 +149,7 @@ connection slave;
SELECT * FROM t1;
SET @@global.debug_dbug=@saved_dbug;
--let $rpl_only_running_threads= 1
--source include/stop_slave.inc
--source include/stop_slave_sql.inc
let $do_domain_ids_before= query_get_value(SHOW SLAVE STATUS, Replicate_Do_Domain_Ids, 1);
let $ignore_domain_ids_before= query_get_value(SHOW SLAVE STATUS, Replicate_Ignore_Domain_Ids, 1);
--echo DO_DOMAIN_IDS (BEFORE) : $do_domain_ids_before
@ -218,7 +217,7 @@ SELECT * FROM t1;
SET @@global.debug_dbug=@saved_dbug;
--source include/stop_slave.inc
--source include/stop_slave_sql.inc
let $do_domain_ids_before= query_get_value(SHOW SLAVE STATUS, Replicate_Do_Domain_Ids, 1);
let $ignore_domain_ids_before= query_get_value(SHOW SLAVE STATUS, Replicate_Ignore_Domain_Ids, 1);
--echo DO_DOMAIN_IDS (BEFORE) : $do_domain_ids_before
@ -286,7 +285,7 @@ SELECT * FROM t1;
SET @@global.debug_dbug=@saved_dbug;
--source include/stop_slave.inc
--source include/stop_slave_sql.inc
let $do_domain_ids_before= query_get_value(SHOW SLAVE STATUS, Replicate_Do_Domain_Ids, 1);
let $ignore_domain_ids_before= query_get_value(SHOW SLAVE STATUS, Replicate_Ignore_Domain_Ids, 1);
--echo DO_DOMAIN_IDS (BEFORE) : $do_domain_ids_before
@ -354,7 +353,7 @@ SELECT * FROM t1;
SET @@global.debug_dbug=@saved_dbug;
--source include/stop_slave.inc
--source include/stop_slave_sql.inc
let $do_domain_ids_before= query_get_value(SHOW SLAVE STATUS, Replicate_Do_Domain_Ids, 1);
let $ignore_domain_ids_before= query_get_value(SHOW SLAVE STATUS, Replicate_Ignore_Domain_Ids, 1);
--echo DO_DOMAIN_IDS (BEFORE) : $do_domain_ids_before

View File

@ -31,8 +31,8 @@ SET sql_log_bin=1;
START SLAVE;
--let $slave_sql_errno=1062
--source include/wait_for_slave_sql_error.inc
--let $rpl_only_running_threads= 1
--source include/stop_slave.inc
--source include/wait_for_slave_io_to_start.inc
--source include/stop_slave_io.inc
# Skip the problem event from the master.
SET GLOBAL gtid_slave_pos= "0-1-100";
--source include/start_slave.inc
@ -51,7 +51,7 @@ SET debug_dbug= @dbug_save;
--connection slave
--let $slave_sql_errno=1590
--source include/wait_for_slave_sql_error.inc
--source include/stop_slave.inc
--source include/stop_slave_io.inc
SET sql_slave_skip_counter=1;
--source include/start_slave.inc
--sync_with_master

View File

@ -50,8 +50,7 @@ eval CHANGE MASTER TO master_host = '127.0.0.1', master_port = $MASTER_MYPORT,
START SLAVE;
--let $slave_io_errno= 1236
--source include/wait_for_slave_io_error.inc
--let $rpl_only_running_threads= 1
--source include/stop_slave.inc
--source include/stop_slave_sql.inc
--replace_result $MASTER_MYPORT MASTER_PORT
eval CHANGE MASTER TO master_host = '127.0.0.1', master_port = $MASTER_MYPORT,

View File

@ -327,8 +327,7 @@ sleep 4;
let $rcvd_heartbeats_after= query_get_value(SHOW STATUS LIKE 'slave_received_heartbeats', Value, 1);
let $result= query_get_value(SELECT ($rcvd_heartbeats_after - $rcvd_heartbeats_before) > 0 AS Result, Result, 1);
--echo Heartbeat events are received while sql thread stopped (1 means 'yes'): $result
--let $rpl_only_running_threads= 1
--source include/stop_slave.inc
--source include/stop_slave_io.inc
DROP TABLE t1;
--echo

View File

@ -40,9 +40,7 @@ SELECT * FROM t1;
SELECT * FROM t1;
# restart replication for the next testcase
stop slave;
--let $rpl_only_running_threads= 1
--source include/stop_slave.inc
--source include/stop_slave_io.inc
reset slave;
connection master;
reset master;
@ -160,9 +158,7 @@ SELECT * FROM t1;
SELECT * FROM t1;
# restart replication for the next testcase
stop slave;
--let $rpl_only_running_threads= 1
--source include/stop_slave.inc
--source include/stop_slave_io.inc
reset slave;
connection master;
reset master;

View File

@ -95,8 +95,7 @@ COMMIT;
--connection server_2
--let $slave_sql_errno= 1062
--source include/wait_for_slave_sql_error.inc
--let $rpl_only_running_threads= 1
--source include/stop_slave.inc
--source include/stop_slave_io.inc
--let $assert_cond= COUNT(*) = 0 FROM t1 WHERE a>32
--let $assert_text= table t1 should have zero rows where a>32
--source include/assert.inc

View File

@ -39,8 +39,7 @@ SET @@global.debug_dbug="d,inject_tblmap_same_id_maps_diff_table";
# wait for error 1593 (ER_SLAVE_FATAL_ERROR)
--let $slave_sql_errno=1593
--source include/wait_for_slave_sql_error.inc
--let $rpl_only_running_threads= 1
--source include/stop_slave.inc
--source include/stop_slave_io.inc
# clean up
SET @@global.debug_dbug=@saved_debug;

View File

@ -19,8 +19,7 @@ insert into t1 values (1);
reset master;
--connection slave
--let $rpl_only_running_threads= 1
--source include/stop_slave.inc
--source include/stop_slave_sql.inc
reset slave;
--source include/start_slave.inc