From 632dd304c7d1915d46a1b7e1fcca7821cae6e9d6 Mon Sep 17 00:00:00 2001 From: Brandon Nesterenko Date: Thu, 11 Jul 2024 06:55:45 -0600 Subject: [PATCH] MDEV-34554: rpl_change_master_demote sporadically fails on buildbot MDEV-34274 did not fix the test failure. The test has a START SLAVE UNTIL condition, where we can't use sync_with_master_gtid.inc, wait_for_slave_to_start.inc, or wait_for_slave_to_stop.inc because our MTR connection thread races with the start/stop of the SQL/IO threads. So instead, for slave start, we prove the threads started by waiting for the connection count to increase by 2; and for slave stop, we wait for the processlist count to return to its pre start slave number. --- .../rpl/r/rpl_change_master_demote.result | 3 +-- .../suite/rpl/t/rpl_change_master_demote.test | 21 ++++++++++++------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/mysql-test/suite/rpl/r/rpl_change_master_demote.result b/mysql-test/suite/rpl/r/rpl_change_master_demote.result index dc6e5c2ffee..5924b833709 100644 --- a/mysql-test/suite/rpl/r/rpl_change_master_demote.result +++ b/mysql-test/suite/rpl/r/rpl_change_master_demote.result @@ -501,8 +501,7 @@ Warnings: Note 1278 It is recommended to use --skip-slave-start when doing step-by-step replication with START SLAVE UNTIL; otherwise, you will get problems if you get an unexpected slave's mariadbd restart # Slave needs time to start and stop automatically # Waiting for both SQL and IO threads to have started.. -# Waiting for SQL thread to be killed.. -# Waiting for IO thread to be killed.. +# Waiting for Slave SQL and IO threads to be killed.. # Validating neither SQL nor IO threads are running.. # ..success # Clean slave state of master diff --git a/mysql-test/suite/rpl/t/rpl_change_master_demote.test b/mysql-test/suite/rpl/t/rpl_change_master_demote.test index 9754b03f1cc..255e9e34b9c 100644 --- a/mysql-test/suite/rpl/t/rpl_change_master_demote.test +++ b/mysql-test/suite/rpl/t/rpl_change_master_demote.test @@ -277,36 +277,43 @@ SELECT VARIABLE_NAME, GLOBAL_VALUE FROM INFORMATION_SCHEMA.SYSTEM_VARIABLES WHER --echo # position pointing to a previous event (because --echo # master_demote_to_slave=1 merges gtid_binlog_pos into gtid_slave_pos). ---let $pre_start_slave_thread_count= query_get_value(SHOW STATUS LIKE 'Connections', Value, 1) +# Note that we can't use sync_with_master_gtid.inc, +# wait_for_slave_to_start.inc, or wait_for_slave_to_stop.inc because our MTR +# connection thread races with the start/stop of the SQL/IO threads. So +# instead, for slave start, we prove the threads started by waiting for the +# connection count to increase by 2; and for slave stop, we wait for the +# processlist count to return to its pre start slave number. + +--let $pre_start_slave_conn_count= query_get_value(SHOW STATUS LIKE 'Connections', Value, 1) +--let $pre_start_slave_process_count= `SELECT count(*) from information_schema.PROCESSLIST` --replace_result $ssu_middle_binlog_pos ssu_middle_binlog_pos eval START SLAVE UNTIL master_gtid_pos="$ssu_middle_binlog_pos"; --echo # Slave needs time to start and stop automatically --echo # Waiting for both SQL and IO threads to have started.. ---let $expected_cons_after_start_slave= `SELECT ($pre_start_slave_thread_count + 2)` +--let $expected_cons_after_start_slave= `SELECT ($pre_start_slave_conn_count + 2)` --let $status_var= Connections --let $status_var_value= $expected_cons_after_start_slave --let $status_var_comparsion= >= --source include/wait_for_status_var.inc --let $status_var_comparsion= ---echo # Waiting for SQL thread to be killed.. ---let $wait_condition= SELECT count(*)=0 from information_schema.PROCESSLIST where COMMAND="Slave_SQL" ---source include/wait_condition.inc ---echo # Waiting for IO thread to be killed.. ---let $wait_condition= SELECT count(*)=0 from information_schema.PROCESSLIST where COMMAND="Slave_IO" +--echo # Waiting for Slave SQL and IO threads to be killed.. +--let $wait_condition= SELECT count(*)=$pre_start_slave_process_count from information_schema.PROCESSLIST --source include/wait_condition.inc --echo # Validating neither SQL nor IO threads are running.. --let $io_state= query_get_value("SHOW SLAVE STATUS", Slave_IO_State, 1) if (`SELECT strcmp("$io_state","") != 0`) { + --echo # Slave_IO_State is "$io_state" but should be empty die "IO thread should not be running after START SLAVE UNTIL master_gtid_pos using a pre-existing GTID"; } --let $sql_state= query_get_value("SHOW SLAVE STATUS", Slave_SQL_Running_State, 1) if (`SELECT strcmp("$sql_state","") != 0`) { + --echo # Slave_SQL_Running_State is "$sql_state" but should be empty die "SQL thread should not be running after START SLAVE UNTIL master_gtid_pos using a pre-existing GTID"; } --echo # ..success