diff --git a/mysql-test/suite/rpl/r/rpl_semi_sync_after_sync_coord_consistency.result b/mysql-test/suite/rpl/r/rpl_semi_sync_after_sync_coord_consistency.result new file mode 100644 index 00000000000..fdddd438d82 --- /dev/null +++ b/mysql-test/suite/rpl/r/rpl_semi_sync_after_sync_coord_consistency.result @@ -0,0 +1,88 @@ +include/master-slave.inc +[connection master] +connection slave; +include/stop_slave.inc +set @old_enabled= @@global.rpl_semi_sync_slave_enabled; +set @old_dbug= @@global.debug_dbug; +set global rpl_semi_sync_slave_enabled= 1; +connection master; +set @old_enabled= @@global.rpl_semi_sync_master_enabled; +set @old_timeout= @@global.rpl_semi_sync_master_timeout; +set @old_wait_point= @@global.rpl_semi_sync_master_wait_point; +set global rpl_semi_sync_master_enabled= 1; +set global rpl_semi_sync_master_timeout= 2000; +set global rpl_semi_sync_master_wait_point= AFTER_SYNC; +connection slave; +include/start_slave.inc +# Ensure slave connection is semi-simulate_delay_semisync_slave_reply +connection master; +connection slave; +# +# Initialize test data +connection master; +create table t1 (a int) engine=innodb; +include/save_master_gtid.inc +connection slave; +include/sync_with_master_gtid.inc +include/stop_slave.inc +set @@global.debug_dbug="+d,simulate_delay_semisync_slave_reply"; +include/start_slave.inc +# Ensure connection is semi-sync +connection master; +connection slave; +connection master; +set @old_bgc_count= @@global.binlog_commit_wait_count; +set @old_bgc_usec= @@global.binlog_commit_wait_usec; +set @@global.binlog_commit_wait_count=1; +set @@global.binlog_commit_wait_usec=100000; +connection server_1; +set debug_sync= "commit_after_release_LOCK_log SIGNAL ddl_binlogged WAIT_FOR ddl_cont"; +# T1 (DDL) +create table t2 (a int); +connect server_1_sync, localhost, root,,; +connection server_1_sync; +set debug_sync= "now WAIT_FOR ddl_binlogged"; +include/save_master_gtid.inc +connection server_2; +set debug_sync= "now WAIT_FOR io_thd_at_slave_reply"; +connection master; +set debug_sync= "commit_before_get_LOCK_after_binlog_sync SIGNAL mdl_binlogged WAIT_FOR mdl_cont"; +# T2 (DML) +insert into t1 values (100);; +connection server_1_sync; +set debug_sync= "now WAIT_FOR mdl_binlogged"; +# Both transactions binlogged and released LOCK_log, and are just before +# wait_after_sync() +set debug_sync= "now SIGNAL ddl_cont"; +set debug_sync= "now SIGNAL mdl_cont"; +connection server_2; +# slave_reply for DDL +set debug_sync= "now SIGNAL io_thd_do_reply"; +# slave_reply for MDL +set debug_sync= "now WAIT_FOR io_thd_at_slave_reply"; +set debug_sync= "now SIGNAL io_thd_do_reply"; +# Reaping MDL.. +connection master; +# ..done +# Reaping DDL.. +connection server_1; +# ..done +# +# Cleanup +connection slave; +include/stop_slave.inc +set @@global.rpl_semi_sync_slave_enabled= @old_enabled; +set @@global.debug_dbug= @old_dbug; +include/start_slave.inc +connection master; +set @@global.binlog_commit_wait_count= @old_bgc_count; +set @@global.binlog_commit_wait_usec= @old_bgc_usec; +set @@global.rpl_semi_sync_master_enabled= @old_enabled; +set @@global.rpl_semi_sync_master_timeout= @old_timeout; +set @@global.rpl_semi_sync_master_wait_point= @old_wait_point; +drop table t1, t2; +include/save_master_gtid.inc +connection slave; +include/sync_with_master_gtid.inc +include/rpl_end.inc +# End of rpl_semi_sync_after_sync_coord_consistency.test diff --git a/mysql-test/suite/rpl/t/rpl_semi_sync_after_sync_coord_consistency.test b/mysql-test/suite/rpl/t/rpl_semi_sync_after_sync_coord_consistency.test new file mode 100644 index 00000000000..774f267c252 --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_semi_sync_after_sync_coord_consistency.test @@ -0,0 +1,183 @@ +# +# This test ensures that a race condition (reported in MDEV-35109) which +# could cause semi-sync transactions binlogging through MYSQL_BIN_LOG::write() +# to hang until the semi-sync timeout has been hit, despite the ACK actually +# having been received. The problem was that, for a primary configured with +# wait_point=AFTER_SYNC, if two threads T1 (binlogging through +# MYSQL_BIN_LOG::write()) and T2 were binlogging at the same time, T1 could +# accidentally wait for the binlog coordinates of T2. Prior to MDEV-33551, +# this only resulted in delayed transactions, because all transactions shared +# the same condition variable for ACK signaling. However, with the MDEV-33551 +# changes, each thread has its own condition variable to signal. So T1 could +# wait indefinitely when either: +# 1) T1's ACK is received but not T2's when T1 goes into wait_after_sync(), +# because the ACK receiver thread has already notified about the T1 ACK, +# but T1 was _actually_ waiting on T2's ACK, and therefore tries to wait +# (in vain). +# +# 2) T1 goes to wait_after_sync() before any ACKs have arrived. When T1's ACK +# comes in, T1 is woken up; however, sees it needs to wait more (because +# it was actually waiting on T2's ACK), and goes to wait again (this time, +# in vain). +# +# Note that the actual cause of T1 waiting on T2's binlog coordinates is when +# MYSQL_BIN_LOG::write() would call Repl_semisync_master::wait_after_sync(), +# the binlog offset parameter was read as the end of MYSQL_BIN_LOG::log_file, +# which is shared among transactions. So if T2 had updated the binary log +# _after_ T1 had released LOCK_log, but not yet invoked wait_after_sync(), it +# would use the end of the binary log file as the binlog offset, which was that +# of T2 (or any future transaction). The patch itself fixes this issue, and +# ensures consistency between the binary log coordinates a transaction uses +# between report_binlog_update() and wait_after_sync(). +# +# This test reproduces the above race condition, to ensure that T1 can no +# longer use T2's binlog coordinates when wait_point=AFTER_SYNC (AFTER_COMMIT +# was never affected). That is, because T1 in the above scenario must binlog +# through MYSQL_BIN_LOG::write(), it is a DDL. T2 is an MDL that commits +# through binlog group commit. We use debug_sync to stop both T1 and T2 +# after binlogging, reporting their coordinates to the semi-sync cache (i.e. +# after report_binlog_update()), and after releasing LOCK_log. Debug_sync is +# used on the replica to ensure no ACKs are sent until after both T1 and T2 +# are paused. Then, T1 and T2 are signaled for wakeup, and both ACKs are sent. +# Prior to MDEV-35109 fixes, this results in T1 timing out after the configured +# 2 second timeout. +# +# References: +# MDEV-33551: Semi-sync Wait Point AFTER_COMMIT Slow on Workloads with Heavy +# Concurrency +# MDEV-35109: Semi-sync Replication stalling Primary using +# wait_point=AFTER_SYNC +# + +--source include/have_innodb.inc +# Test is format independent +--source include/have_binlog_format_row.inc +--source include/master-slave.inc + +--connection slave +--source include/stop_slave.inc +set @old_enabled= @@global.rpl_semi_sync_slave_enabled; +set @old_dbug= @@global.debug_dbug; +set global rpl_semi_sync_slave_enabled= 1; + +--connection master +set @old_enabled= @@global.rpl_semi_sync_master_enabled; +set @old_timeout= @@global.rpl_semi_sync_master_timeout; +set @old_wait_point= @@global.rpl_semi_sync_master_wait_point; +set global rpl_semi_sync_master_enabled= 1; +set global rpl_semi_sync_master_timeout= 2000; # 2s +set global rpl_semi_sync_master_wait_point= AFTER_SYNC; + +--connection slave +--source include/start_slave.inc + +--echo # Ensure slave connection is semi-simulate_delay_semisync_slave_reply +--connection master +--let $status_var_value= ON +--let $status_var= Rpl_semi_sync_master_status +--source include/wait_for_status_var.inc +--connection slave +--let $status_var_value= ON +--let $status_var= Rpl_semi_sync_slave_status +--source include/wait_for_status_var.inc + +--echo # +--echo # Initialize test data +--connection master + +# It is simpler to create t1 before using debug_sync on the slave_reply +create table t1 (a int) engine=innodb; +--source include/save_master_gtid.inc +--connection slave +--source include/sync_with_master_gtid.inc + +--source include/stop_slave.inc +set @@global.debug_dbug="+d,simulate_delay_semisync_slave_reply"; +--source include/start_slave.inc + +--echo # Ensure connection is semi-sync +--connection master +--let $status_var_value= ON +--let $status_var= Rpl_semi_sync_master_status +--source include/wait_for_status_var.inc +--connection slave +--let $status_var_value= ON +--let $status_var= Rpl_semi_sync_slave_status +--source include/wait_for_status_var.inc + +--connection master +set @old_bgc_count= @@global.binlog_commit_wait_count; +set @old_bgc_usec= @@global.binlog_commit_wait_usec; +set @@global.binlog_commit_wait_count=1; +set @@global.binlog_commit_wait_usec=100000; + +--connection server_1 +set debug_sync= "commit_after_release_LOCK_log SIGNAL ddl_binlogged WAIT_FOR ddl_cont"; +--echo # T1 (DDL) +--send create table t2 (a int) + +--connect(server_1_sync, localhost, root,,) +--connection server_1_sync +set debug_sync= "now WAIT_FOR ddl_binlogged"; +--source include/save_master_gtid.inc + +--connection server_2 +set debug_sync= "now WAIT_FOR io_thd_at_slave_reply"; + +--connection master +set debug_sync= "commit_before_get_LOCK_after_binlog_sync SIGNAL mdl_binlogged WAIT_FOR mdl_cont"; +--echo # T2 (DML) +--send insert into t1 values (100); + +--connection server_1_sync +set debug_sync= "now WAIT_FOR mdl_binlogged"; + +--echo # Both transactions binlogged and released LOCK_log, and are just before +--echo # wait_after_sync() +set debug_sync= "now SIGNAL ddl_cont"; +# Ensure ddl_cont is seen before next SIGNAL +--let $wait_condition= SELECT count(*)=0 FROM information_schema.processlist WHERE state LIKE "%commit_after_release_LOCK_log%" +--source include/wait_condition.inc + +set debug_sync= "now SIGNAL mdl_cont"; + +--connection server_2 +--echo # slave_reply for DDL +set debug_sync= "now SIGNAL io_thd_do_reply"; +--echo # slave_reply for MDL +set debug_sync= "now WAIT_FOR io_thd_at_slave_reply"; +set debug_sync= "now SIGNAL io_thd_do_reply"; + +--echo # Reaping MDL.. +--connection master +--reap +--echo # ..done + +--echo # Reaping DDL.. +--connection server_1 +--reap +--echo # ..done + + +--echo # +--echo # Cleanup +--connection slave +--source include/stop_slave.inc +set @@global.rpl_semi_sync_slave_enabled= @old_enabled; +set @@global.debug_dbug= @old_dbug; +--source include/start_slave.inc + +--connection master +set @@global.binlog_commit_wait_count= @old_bgc_count; +set @@global.binlog_commit_wait_usec= @old_bgc_usec; +set @@global.rpl_semi_sync_master_enabled= @old_enabled; +set @@global.rpl_semi_sync_master_timeout= @old_timeout; +set @@global.rpl_semi_sync_master_wait_point= @old_wait_point; +drop table t1, t2; +--source include/save_master_gtid.inc + +--connection slave +--source include/sync_with_master_gtid.inc + +--source include/rpl_end.inc +--echo # End of rpl_semi_sync_after_sync_coord_consistency.test diff --git a/sql/log.cc b/sql/log.cc index a58dff0b545..cfccde176ee 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -6866,8 +6866,8 @@ err: mysql_mutex_assert_not_owner(&LOCK_after_binlog_sync); mysql_mutex_assert_not_owner(&LOCK_commit_ordered); #ifdef HAVE_REPLICATION - if (repl_semisync_master.report_binlog_update( - thd, thd, log_file_name, file->pos_in_file)) + if (repl_semisync_master.report_binlog_update(thd, thd, + log_file_name, offset)) { sql_print_error("Failed to run 'after_flush' hooks"); error= 1; @@ -6894,13 +6894,14 @@ err: mysql_mutex_lock(&LOCK_after_binlog_sync); mysql_mutex_unlock(&LOCK_log); + DEBUG_SYNC(thd, "commit_after_release_LOCK_log"); + mysql_mutex_assert_not_owner(&LOCK_prepare_ordered); mysql_mutex_assert_not_owner(&LOCK_log); mysql_mutex_assert_owner(&LOCK_after_binlog_sync); mysql_mutex_assert_not_owner(&LOCK_commit_ordered); #ifdef HAVE_REPLICATION - if (repl_semisync_master.wait_after_sync(log_file_name, - file->pos_in_file)) + if (repl_semisync_master.wait_after_sync(log_file_name, offset)) { error=1; /* error is already printed inside hook */