From 01ca57ec1615015e163123fa1882bcc278c81b9a Mon Sep 17 00:00:00 2001 From: Brandon Nesterenko Date: Wed, 17 Jan 2024 07:14:11 -0700 Subject: [PATCH] MDEV-32168: Postpush fix for rpl_domain_id_filter_master_crash While a replica may be reading events from the primary, the primary is killed. Left to its own devices, the IO thread may or may not stop in error, depending on what it is doing when its connection to the primary is killed (e.g. a failed read results in an error, whereas if the IO thread is idly waiting for events when the connection dies, it will enter into a reconnect loop and reconnect). MDEV-32168 changed the test to always wait for the reconnect, thus breaking the error case, as the IO thread would be stopped at a time of expecting it to be running. The fix is to manually stop/start the IO thread to ensure it is in a consistent state. Note that rpl_domain_id_filter_master_crash.test will need additional changes after fixing MDEV-33268 Reviewed By: ============ Kristian Nielsen --- .../rpl_domain_id_filter_master_crash.result | 5 +++-- .../t/rpl_domain_id_filter_master_crash.test | 20 +++++++++++++++++-- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/mysql-test/suite/rpl/r/rpl_domain_id_filter_master_crash.result b/mysql-test/suite/rpl/r/rpl_domain_id_filter_master_crash.result index 4169a2ddc26..a54ff99b591 100644 --- a/mysql-test/suite/rpl/r/rpl_domain_id_filter_master_crash.result +++ b/mysql-test/suite/rpl/r/rpl_domain_id_filter_master_crash.result @@ -38,8 +38,9 @@ connection master; include/rpl_start_server.inc [server_number=1] # Master has restarted successfully connection slave; -include/wait_for_slave_io_to_start.inc -include/wait_for_slave_sql_to_start.inc +include/stop_slave_sql.inc +include/stop_slave_io.inc +include/start_slave.inc select * from ti; a 1 diff --git a/mysql-test/suite/rpl/t/rpl_domain_id_filter_master_crash.test b/mysql-test/suite/rpl/t/rpl_domain_id_filter_master_crash.test index b1fa9af33a4..cdfdc098f5a 100644 --- a/mysql-test/suite/rpl/t/rpl_domain_id_filter_master_crash.test +++ b/mysql-test/suite/rpl/t/rpl_domain_id_filter_master_crash.test @@ -67,10 +67,26 @@ connection master; save_master_pos; --connection slave + +# Left to its own devices, the IO thread may or may not stop in error, +# depending on what it is doing when its connection to the primary is killed +# (e.g. a failed read results in an error, whereas if the IO thread is idly +# waiting for events when the connection dies, it will enter into a reconnect +# loop and reconnect). So we manually stop/start the IO thread to ensure it is +# in a consistent state +# +# FIXME: We shouldn't need to stop/start the SQL thread here, but due to +# MDEV-33268, we have to. So after fixing 33268, this should only stop/start +# the IO thread. Note the SQL thread must be stopped first due to an invalid +# DBUG_ASSERT in the IO thread's stop logic that depends on the state of the +# SQL thread (also reported and to be fixed in the same ticket). +# +--source include/stop_slave_sql.inc --let rpl_allow_error=1 ---source include/wait_for_slave_io_to_start.inc +--source include/stop_slave_io.inc --let rpl_allow_error= ---source include/wait_for_slave_sql_to_start.inc +--source include/start_slave.inc + sync_with_master; select * from ti; select * from tm;