diff --git a/include/mysql_com.h b/include/mysql_com.h index b837704c19d..2cdda8457ea 100644 --- a/include/mysql_com.h +++ b/include/mysql_com.h @@ -477,7 +477,7 @@ typedef struct st_net { char net_skip_rest_factor; my_bool thread_specific_malloc; unsigned char compress; - my_bool unused3; /* Please remove with the next incompatible ABI change. */ + my_bool pkt_nr_can_be_reset; /* Pointer to query object in query cache, do not equal NULL (0) for queries in cache that have not stored its results yet diff --git a/mysql-test/include/search_pattern_in_file.inc b/mysql-test/include/search_pattern_in_file.inc index a899a9294cc..3105f7f9077 100644 --- a/mysql-test/include/search_pattern_in_file.inc +++ b/mysql-test/include/search_pattern_in_file.inc @@ -51,12 +51,15 @@ # Created: 2011-11-11 mleich # +--error 0,1 perl; use strict; die "SEARCH_FILE not set" unless $ENV{SEARCH_FILE}; my @search_files= glob($ENV{SEARCH_FILE}); my $search_pattern= $ENV{SEARCH_PATTERN} or die "SEARCH_PATTERN not set"; my $search_range= $ENV{SEARCH_RANGE}; + my $silent= $ENV{SEARCH_SILENT}; + my $search_result= 0; my $content; foreach my $search_file (@search_files) { open(FILE, '<', $search_file) || die("Can't open file $search_file: $!"); @@ -89,16 +92,39 @@ perl; { @matches=($content =~ /$search_pattern/gm); } - my $res=@matches ? "FOUND " . scalar(@matches) : "NOT FOUND"; + my $res; + if (@matches) + { + $res="FOUND " . scalar(@matches); + $search_result= 1; + } + else + { + $res= "NOT FOUND"; + } $ENV{SEARCH_FILE} =~ s{^.*?([^/\\]+)$}{$1}; - if ($ENV{SEARCH_OUTPUT} eq "matches") { - foreach (@matches) { - print $_ . "\n"; - } - } else { - print "$res /$search_pattern/ in $ENV{SEARCH_FILE}\n"; + if (!$silent || $search_result) + { + if ($ENV{SEARCH_OUTPUT} eq "matches") + { + foreach (@matches) + { + print $_ . "\n"; + } + } + else + { + print "$res /$search_pattern/ in $ENV{SEARCH_FILE}\n"; + } } die "$ENV{SEARCH_ABORT}\n" if $ENV{SEARCH_ABORT} && $res =~ /^$ENV{SEARCH_ABORT}/; + exit($search_result != 1); EOF + +let $SEARCH_RESULT= 1; # Found pattern +if ($errno) +{ + let $SEARCH_RESULT= 0; # Did not find pattern +} diff --git a/mysql-test/include/wait_for_pattern_in_file.inc b/mysql-test/include/wait_for_pattern_in_file.inc new file mode 100644 index 00000000000..52226acd2da --- /dev/null +++ b/mysql-test/include/wait_for_pattern_in_file.inc @@ -0,0 +1,56 @@ +# ==== Purpose ==== +# +# Waits until pattern comes into log file or until a timeout is reached. +# This is a timeout wrapper for search_pattern_in_file.inc +# +# +# ==== Usage ==== +# +# [--let $timeout= NUMBER in seconds] +# For other parameters, check search_pattern_in_file.inc + +--let $wait_save_keep_include_silent=$keep_include_silent +--let $include_filename= wait_for_pattern_in_file.inc +--source include/begin_include_file.inc +--let $keep_include_silent= 1 + +let $_timeout= $timeout; +if (!$_timeout) +{ + let $_timeout= 10; + if ($VALGRIND_TEST) + { + let $_timeout= 30; + } +} + +let $_timeout_counter=`SELECT $_timeout * 10`; +let SEARCH_SILENT=1; + +let $_continue= 1; +while ($_continue) +{ + source include/search_pattern_in_file.inc; + if ($SEARCH_RESULT) + { + # Found match + let $_continue= 0; + } + if (!$SEARCH_RESULT) + { + dec $_timeout_counter; + if ($_timeout_counter == 1) + { + let $SEARCH_SILENT= 0; + } + if (!$_timeout_counter) + { + let $_continue= 0; + } + } +} + +let SEARCH_SILENT=0; + +--source include/end_include_file.inc +--let $keep_include_silent=$wait_save_keep_include_silent diff --git a/mysql-test/main/mysqld--help.result b/mysql-test/main/mysqld--help.result index 62119751ac2..219bad22528 100644 --- a/mysql-test/main/mysqld--help.result +++ b/mysql-test/main/mysqld--help.result @@ -1138,7 +1138,7 @@ The following specify which files/extra groups are read (specified before remain The tracing level for semi-sync replication. --rpl-semi-sync-master-wait-no-slave Wait until timeout when no semi-synchronous replication - slave available (enabled by default). + slave is available. (Defaults to on; use --skip-rpl-semi-sync-master-wait-no-slave to disable.) --rpl-semi-sync-master-wait-point=name Should transaction wait for semi-sync ack after having diff --git a/mysql-test/suite/binlog_encryption/rpl_semi_sync.result b/mysql-test/suite/binlog_encryption/rpl_semi_sync.result index d18bd1efda7..cff1a73996d 100644 --- a/mysql-test/suite/binlog_encryption/rpl_semi_sync.result +++ b/mysql-test/suite/binlog_encryption/rpl_semi_sync.result @@ -6,7 +6,6 @@ call mtr.add_suppression("Read semi-sync reply"); call mtr.add_suppression("Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT."); call mtr.add_suppression("mysqld: Got an error reading communication packets"); connection slave; -call mtr.add_suppression("Master server does not support semi-sync"); call mtr.add_suppression("Semi-sync slave .* reply"); call mtr.add_suppression("Slave SQL.*Request to stop slave SQL Thread received while applying a group that has non-transactional changes; waiting for completion of the group"); connection master; @@ -26,7 +25,7 @@ set global rpl_semi_sync_slave_enabled= 0; # Main test of semi-sync replication start here # connection master; -set global rpl_semi_sync_master_timeout= 60000; +set global rpl_semi_sync_master_timeout= 2000; [ default state of semi-sync on master should be OFF ] show variables like 'rpl_semi_sync_master_enabled'; Variable_name Value @@ -161,11 +160,15 @@ connection slave; # Test semi-sync master will switch OFF after one transaction # timeout waiting for slave reply. # +connection master; +show status like "Rpl_semi_sync_master_status"; +Variable_name Value +Rpl_semi_sync_master_status ON connection slave; include/stop_slave.inc connection master; include/kill_binlog_dump_threads.inc -set global rpl_semi_sync_master_timeout= 5000; +set global rpl_semi_sync_master_timeout= 2000; [ master status should be ON ] show status like 'Rpl_semi_sync_master_no_tx'; Variable_name Value @@ -315,6 +318,8 @@ include/kill_binlog_dump_threads.inc connection slave; include/start_slave.inc connection master; +connection slave; +connection master; create table t1 (a int) engine = ENGINE_TYPE; insert into t1 values (1); insert into t1 values (2), (3); @@ -357,6 +362,8 @@ show status like 'Rpl_semi_sync_slave_status'; Variable_name Value Rpl_semi_sync_slave_status ON connection master; +connection slave; +connection master; [ master semi-sync should be ON ] show status like 'Rpl_semi_sync_master_clients'; Variable_name Value diff --git a/mysql-test/suite/rpl/r/rpl_binlog_dump_slave_gtid_state_info.result b/mysql-test/suite/rpl/r/rpl_binlog_dump_slave_gtid_state_info.result index 98688df7273..38a9afd4d02 100644 --- a/mysql-test/suite/rpl/r/rpl_binlog_dump_slave_gtid_state_info.result +++ b/mysql-test/suite/rpl/r/rpl_binlog_dump_slave_gtid_state_info.result @@ -8,6 +8,7 @@ CHANGE MASTER TO MASTER_USE_GTID=current_pos; include/start_slave.inc connection master; "Test Case 1: Start binlog_dump to slave_server(#), pos(master-bin.000001, ###), using_gtid(1), gtid('')" +include/wait_for_pattern_in_file.inc FOUND 1 /using_gtid\(1\), gtid\(\'\'\).*/ in mysqld.1.err connection slave; include/stop_slave.inc @@ -15,6 +16,7 @@ CHANGE MASTER TO MASTER_USE_GTID=no; include/start_slave.inc connection master; "Test Case 2: Start binlog_dump to slave_server(#), pos(master-bin.000001, ###), using_gtid(0), gtid('')" +include/wait_for_pattern_in_file.inc FOUND 1 /using_gtid\(0\), gtid\(\'\'\).*/ in mysqld.1.err CREATE TABLE t (f INT) ENGINE=INNODB; INSERT INTO t VALUES(10); @@ -25,6 +27,7 @@ CHANGE MASTER TO MASTER_USE_GTID=slave_pos; include/start_slave.inc connection master; "Test Case 3: Start binlog_dump to slave_server(#), pos(master-bin.000001, ###), using_gtid(1), gtid('0-1-2')" +include/wait_for_pattern_in_file.inc FOUND 1 /using_gtid\(1\), gtid\(\'0-1-2\'\).*/ in mysqld.1.err SET @@SESSION.gtid_domain_id=10; INSERT INTO t VALUES(20); @@ -35,6 +38,7 @@ CHANGE MASTER TO MASTER_USE_GTID=slave_pos; include/start_slave.inc connection master; "Test Case 4: Start binlog_dump to slave_server(#), pos(master-bin.000001, ###), using_gtid(1), gtid('0-1-2,10-1-1')" +include/wait_for_pattern_in_file.inc FOUND 1 /using_gtid\(1\), gtid\(\'0-1-2,10-1-1\'\).*/ in mysqld.1.err "===== Clean up =====" connection slave; diff --git a/mysql-test/suite/rpl/r/rpl_circular_semi_sync.result b/mysql-test/suite/rpl/r/rpl_circular_semi_sync.result index 5664b7913d2..2596d346479 100644 --- a/mysql-test/suite/rpl/r/rpl_circular_semi_sync.result +++ b/mysql-test/suite/rpl/r/rpl_circular_semi_sync.result @@ -1,5 +1,7 @@ include/master-slave.inc [connection master] +connection server_2; +call mtr.add_suppression("Timeout waiting for reply of binlog"); # Master server_1 and Slave server_2 initialization ... connection server_2; include/stop_slave.inc @@ -40,6 +42,8 @@ set @@global.rpl_semi_sync_master_enabled = 1; INSERT INTO t1(a) VALUES (2); include/save_master_gtid.inc connection server_1; +include/stop_slave.inc +include/start_slave.inc # # the successful sync is a required proof # diff --git a/mysql-test/suite/rpl/r/rpl_delayed_slave.result b/mysql-test/suite/rpl/r/rpl_delayed_slave.result index e7daa3328ce..a48e98b0006 100644 --- a/mysql-test/suite/rpl/r/rpl_delayed_slave.result +++ b/mysql-test/suite/rpl/r/rpl_delayed_slave.result @@ -67,6 +67,9 @@ include/stop_slave.inc # CHANGE MASTER TO MASTER_DELAY = 2*T include/start_slave.inc connection master; +INSERT INTO t1 VALUES ('Syncing slave', 5); +connection slave; +connection master; INSERT INTO t1 VALUES (delay_on_slave(1), 6); Warnings: Note 1592 Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT. Statement is unsafe because it uses a system variable that may have a different value on the slave diff --git a/mysql-test/suite/rpl/r/rpl_semi_sync.result b/mysql-test/suite/rpl/r/rpl_semi_sync.result index d18bd1efda7..cff1a73996d 100644 --- a/mysql-test/suite/rpl/r/rpl_semi_sync.result +++ b/mysql-test/suite/rpl/r/rpl_semi_sync.result @@ -6,7 +6,6 @@ call mtr.add_suppression("Read semi-sync reply"); call mtr.add_suppression("Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT."); call mtr.add_suppression("mysqld: Got an error reading communication packets"); connection slave; -call mtr.add_suppression("Master server does not support semi-sync"); call mtr.add_suppression("Semi-sync slave .* reply"); call mtr.add_suppression("Slave SQL.*Request to stop slave SQL Thread received while applying a group that has non-transactional changes; waiting for completion of the group"); connection master; @@ -26,7 +25,7 @@ set global rpl_semi_sync_slave_enabled= 0; # Main test of semi-sync replication start here # connection master; -set global rpl_semi_sync_master_timeout= 60000; +set global rpl_semi_sync_master_timeout= 2000; [ default state of semi-sync on master should be OFF ] show variables like 'rpl_semi_sync_master_enabled'; Variable_name Value @@ -161,11 +160,15 @@ connection slave; # Test semi-sync master will switch OFF after one transaction # timeout waiting for slave reply. # +connection master; +show status like "Rpl_semi_sync_master_status"; +Variable_name Value +Rpl_semi_sync_master_status ON connection slave; include/stop_slave.inc connection master; include/kill_binlog_dump_threads.inc -set global rpl_semi_sync_master_timeout= 5000; +set global rpl_semi_sync_master_timeout= 2000; [ master status should be ON ] show status like 'Rpl_semi_sync_master_no_tx'; Variable_name Value @@ -315,6 +318,8 @@ include/kill_binlog_dump_threads.inc connection slave; include/start_slave.inc connection master; +connection slave; +connection master; create table t1 (a int) engine = ENGINE_TYPE; insert into t1 values (1); insert into t1 values (2), (3); @@ -357,6 +362,8 @@ show status like 'Rpl_semi_sync_slave_status'; Variable_name Value Rpl_semi_sync_slave_status ON connection master; +connection slave; +connection master; [ master semi-sync should be ON ] show status like 'Rpl_semi_sync_master_clients'; Variable_name Value diff --git a/mysql-test/suite/rpl/r/rpl_semi_sync_after_sync.result b/mysql-test/suite/rpl/r/rpl_semi_sync_after_sync.result index f2240817489..301e4c819e4 100644 --- a/mysql-test/suite/rpl/r/rpl_semi_sync_after_sync.result +++ b/mysql-test/suite/rpl/r/rpl_semi_sync_after_sync.result @@ -7,7 +7,6 @@ call mtr.add_suppression("Read semi-sync reply"); call mtr.add_suppression("Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT."); call mtr.add_suppression("mysqld: Got an error reading communication packets"); connection slave; -call mtr.add_suppression("Master server does not support semi-sync"); call mtr.add_suppression("Semi-sync slave .* reply"); call mtr.add_suppression("Slave SQL.*Request to stop slave SQL Thread received while applying a group that has non-transactional changes; waiting for completion of the group"); connection master; @@ -27,7 +26,7 @@ set global rpl_semi_sync_slave_enabled= 0; # Main test of semi-sync replication start here # connection master; -set global rpl_semi_sync_master_timeout= 60000; +set global rpl_semi_sync_master_timeout= 2000; [ default state of semi-sync on master should be OFF ] show variables like 'rpl_semi_sync_master_enabled'; Variable_name Value @@ -162,11 +161,15 @@ connection slave; # Test semi-sync master will switch OFF after one transaction # timeout waiting for slave reply. # +connection master; +show status like "Rpl_semi_sync_master_status"; +Variable_name Value +Rpl_semi_sync_master_status ON connection slave; include/stop_slave.inc connection master; include/kill_binlog_dump_threads.inc -set global rpl_semi_sync_master_timeout= 5000; +set global rpl_semi_sync_master_timeout= 2000; [ master status should be ON ] show status like 'Rpl_semi_sync_master_no_tx'; Variable_name Value @@ -316,6 +319,8 @@ include/kill_binlog_dump_threads.inc connection slave; include/start_slave.inc connection master; +connection slave; +connection master; create table t1 (a int) engine = ENGINE_TYPE; insert into t1 values (1); insert into t1 values (2), (3); @@ -358,6 +363,8 @@ show status like 'Rpl_semi_sync_slave_status'; Variable_name Value Rpl_semi_sync_slave_status ON connection master; +connection slave; +connection master; [ master semi-sync should be ON ] show status like 'Rpl_semi_sync_master_clients'; Variable_name Value diff --git a/mysql-test/suite/rpl/r/rpl_semi_sync_after_sync_row.result b/mysql-test/suite/rpl/r/rpl_semi_sync_after_sync_row.result index fcced801d65..6c7db8ca342 100644 --- a/mysql-test/suite/rpl/r/rpl_semi_sync_after_sync_row.result +++ b/mysql-test/suite/rpl/r/rpl_semi_sync_after_sync_row.result @@ -7,7 +7,6 @@ call mtr.add_suppression("Read semi-sync reply"); call mtr.add_suppression("Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT."); call mtr.add_suppression("mysqld: Got an error reading communication packets"); connection slave; -call mtr.add_suppression("Master server does not support semi-sync"); call mtr.add_suppression("Semi-sync slave .* reply"); call mtr.add_suppression("Slave SQL.*Request to stop slave SQL Thread received while applying a group that has non-transactional changes; waiting for completion of the group"); connection master; @@ -27,7 +26,7 @@ set global rpl_semi_sync_slave_enabled= 0; # Main test of semi-sync replication start here # connection master; -set global rpl_semi_sync_master_timeout= 60000; +set global rpl_semi_sync_master_timeout= 2000; [ default state of semi-sync on master should be OFF ] show variables like 'rpl_semi_sync_master_enabled'; Variable_name Value @@ -162,11 +161,15 @@ connection slave; # Test semi-sync master will switch OFF after one transaction # timeout waiting for slave reply. # +connection master; +show status like "Rpl_semi_sync_master_status"; +Variable_name Value +Rpl_semi_sync_master_status ON connection slave; include/stop_slave.inc connection master; include/kill_binlog_dump_threads.inc -set global rpl_semi_sync_master_timeout= 5000; +set global rpl_semi_sync_master_timeout= 2000; [ master status should be ON ] show status like 'Rpl_semi_sync_master_no_tx'; Variable_name Value @@ -316,6 +319,8 @@ include/kill_binlog_dump_threads.inc connection slave; include/start_slave.inc connection master; +connection slave; +connection master; create table t1 (a int) engine = ENGINE_TYPE; insert into t1 values (1); insert into t1 values (2), (3); @@ -358,6 +363,8 @@ show status like 'Rpl_semi_sync_slave_status'; Variable_name Value Rpl_semi_sync_slave_status ON connection master; +connection slave; +connection master; [ master semi-sync should be ON ] show status like 'Rpl_semi_sync_master_clients'; Variable_name Value diff --git a/mysql-test/suite/rpl/r/rpl_semi_sync_event.result b/mysql-test/suite/rpl/r/rpl_semi_sync_event.result index 917e7c2b02b..b1eb623cc99 100644 --- a/mysql-test/suite/rpl/r/rpl_semi_sync_event.result +++ b/mysql-test/suite/rpl/r/rpl_semi_sync_event.result @@ -7,7 +7,6 @@ call mtr.add_suppression("Read semi-sync reply"); call mtr.add_suppression("Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT."); call mtr.add_suppression("mysqld: Got an error reading communication packets"); connection slave; -call mtr.add_suppression("Master server does not support semi-sync"); call mtr.add_suppression("Semi-sync slave .* reply"); call mtr.add_suppression("Slave SQL.*Request to stop slave SQL Thread received while applying a group that has non-transactional changes; waiting for completion of the group"); connection master; diff --git a/mysql-test/suite/rpl/r/rpl_semi_sync_event_after_sync.result b/mysql-test/suite/rpl/r/rpl_semi_sync_event_after_sync.result index 24daf0d72b5..34af8d31315 100644 --- a/mysql-test/suite/rpl/r/rpl_semi_sync_event_after_sync.result +++ b/mysql-test/suite/rpl/r/rpl_semi_sync_event_after_sync.result @@ -8,7 +8,6 @@ call mtr.add_suppression("Read semi-sync reply"); call mtr.add_suppression("Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT."); call mtr.add_suppression("mysqld: Got an error reading communication packets"); connection slave; -call mtr.add_suppression("Master server does not support semi-sync"); call mtr.add_suppression("Semi-sync slave .* reply"); call mtr.add_suppression("Slave SQL.*Request to stop slave SQL Thread received while applying a group that has non-transactional changes; waiting for completion of the group"); connection master; diff --git a/mysql-test/suite/rpl/r/rpl_semi_sync_fail_over.result b/mysql-test/suite/rpl/r/rpl_semi_sync_fail_over.result index 8956eee2d2f..1c94c239fc6 100644 --- a/mysql-test/suite/rpl/r/rpl_semi_sync_fail_over.result +++ b/mysql-test/suite/rpl/r/rpl_semi_sync_fail_over.result @@ -5,6 +5,7 @@ include/stop_slave.inc connection server_1; RESET MASTER; SET @@global.max_binlog_size= 4096; +set @@global.rpl_semi_sync_master_enabled = 1; connection server_2; RESET MASTER; SET @@global.max_binlog_size= 4096; @@ -14,7 +15,6 @@ CHANGE MASTER TO master_use_gtid= slave_pos; include/start_slave.inc connection server_1; ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; -set @@global.rpl_semi_sync_master_enabled = 1; set @@global.rpl_semi_sync_master_wait_point=AFTER_SYNC; CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb; INSERT INTO t1 VALUES (1, 'dummy1'); diff --git a/mysql-test/suite/rpl/r/rpl_semi_sync_no_missed_ack_after_add_slave.result b/mysql-test/suite/rpl/r/rpl_semi_sync_no_missed_ack_after_add_slave.result new file mode 100644 index 00000000000..19fed30ffb7 --- /dev/null +++ b/mysql-test/suite/rpl/r/rpl_semi_sync_no_missed_ack_after_add_slave.result @@ -0,0 +1,48 @@ +include/rpl_init.inc [topology=1->2,1->3] +connection server_1; +set @old_enabled= @@global.rpl_semi_sync_master_enabled; +set @old_timeout= @@global.rpl_semi_sync_master_timeout; +set global rpl_semi_sync_master_enabled= 1; +set global rpl_semi_sync_master_timeout= 500; +connection server_2; +include/stop_slave.inc +set @old_enabled= @@global.rpl_semi_sync_slave_enabled; +set @old_dbug= @@global.debug_dbug; +set global rpl_semi_sync_slave_enabled= 1; +set global debug_dbug="+d,simulate_delay_semisync_slave_reply"; +include/start_slave.inc +connection server_3; +include/stop_slave.inc +set @old_enabled= @@global.rpl_semi_sync_slave_enabled; +set global rpl_semi_sync_slave_enabled= 1; +include/start_slave.inc +# Ensure primary recognizes both replicas are semi-sync +connection server_1; +connection server_1; +create table t1 (a int); +connection server_2; +# Verifying server_2 did not send ACK +connection server_3; +# Verifying server_3 did send ACK +connection server_1; +# Verifying master's semi-sync status is still ON (This failed pre-MDEV-32960 fixes) +# Verifying rpl_semi_sync_master_yes_tx incremented +# +# Cleanup +connection server_2; +set global rpl_semi_sync_slave_enabled= @old_enabled; +set global debug_dbug= @old_dbug; +include/stop_slave.inc +connection server_3; +set global rpl_semi_sync_slave_enabled= @old_enabled; +include/stop_slave.inc +connection server_1; +set global rpl_semi_sync_master_enabled= @old_enabled; +set global rpl_semi_sync_master_timeout= @old_timeout; +drop table t1; +connection server_2; +include/start_slave.inc +connection server_3; +include/start_slave.inc +include/rpl_end.inc +# End of rpl_semi_sync_no_missed_ack_after_add_slave.test diff --git a/mysql-test/suite/rpl/r/rpl_semi_sync_slave_enabled_consistent.result b/mysql-test/suite/rpl/r/rpl_semi_sync_slave_enabled_consistent.result new file mode 100644 index 00000000000..99c3124957f --- /dev/null +++ b/mysql-test/suite/rpl/r/rpl_semi_sync_slave_enabled_consistent.result @@ -0,0 +1,35 @@ +include/master-slave.inc +[connection master] +call mtr.add_suppression("Replication event checksum verification failed"); +call mtr.add_suppression("could not queue event from master"); +# +# Set up a semisync connection +connection master; +set @@global.rpl_semi_sync_master_enabled= ON; +connection slave; +stop slave io_thread; +set @@global.rpl_semi_sync_slave_enabled= ON; +set @old_dbug= @@global.debug_dbug; +set @@global.debug_dbug= "+d,corrupt_queue_event"; +set @@global.debug_dbug= "+d,pause_before_io_read_event"; +set @@global.debug_dbug= "+d,placeholder"; +start slave io_thread; +# Disable semi-sync on the slave while the IO thread is active +set debug_sync='now wait_for io_thread_at_read_event'; +set @@global.rpl_semi_sync_slave_enabled= OFF; +set debug_sync='now signal io_thread_continue_read_event'; +# Waiting for the slave to stop with the error from corrupt_queue_event +connection slave; +include/wait_for_slave_io_error.inc [errno=1595,1743] +# Sleep 1 to give time for Ack_receiver to receive COM_QUIT +include/assert_grep.inc [Check that there is no 'Read semi-sync reply magic number error' in error log.] +# +# Cleanup +connection slave; +include/stop_slave.inc +set @@global.debug_dbug= @old_dbug; +include/start_slave.inc +connection master; +set @@global.rpl_semi_sync_master_enabled= default; +include/rpl_end.inc +# End of rpl_semi_sync_slave_enabled_consistent.test diff --git a/mysql-test/suite/rpl/r/rpl_semi_sync_slave_reply_fail.result b/mysql-test/suite/rpl/r/rpl_semi_sync_slave_reply_fail.result index e482da7d0fc..3c9cf71aeca 100644 --- a/mysql-test/suite/rpl/r/rpl_semi_sync_slave_reply_fail.result +++ b/mysql-test/suite/rpl/r/rpl_semi_sync_slave_reply_fail.result @@ -4,6 +4,7 @@ connection slave; include/stop_slave.inc connection master; call mtr.add_suppression("Timeout waiting for reply of binlog*"); +call mtr.add_suppression("Master server does not read semi-sync messages*"); set global rpl_semi_sync_master_enabled = ON; SET @@GLOBAL.rpl_semi_sync_master_timeout=100; create table t1 (i int); @@ -15,8 +16,8 @@ SET GLOBAL debug_dbug="+d,semislave_failed_net_flush"; include/start_slave.inc connection master; connection slave; -"Assert that the net_fulsh() reply failed is present in slave error log. -FOUND 1 /Semi-sync slave net_flush\(\) reply failed/ in mysqld.2.err +"Assert that Master server does not read semi-sync messages" is present in slave error log. +FOUND 1 /Master server does not read semi-sync messages/ in mysqld.2.err "Assert that Slave IO thread is up and running." SHOW STATUS LIKE 'Slave_running'; Variable_name Value diff --git a/mysql-test/suite/rpl/r/rpl_semisync_ali_issues.result b/mysql-test/suite/rpl/r/rpl_semisync_ali_issues.result index 530bbac633b..d7f87ae5fd3 100644 --- a/mysql-test/suite/rpl/r/rpl_semisync_ali_issues.result +++ b/mysql-test/suite/rpl/r/rpl_semisync_ali_issues.result @@ -14,7 +14,6 @@ CALL mtr.add_suppression("Failed on request_dump()*"); CALL mtr.add_suppression("Semi-sync master failed on*"); CALL mtr.add_suppression("Master command COM_BINLOG_DUMP failed*"); CALL mtr.add_suppression("on master failed*"); -CALL mtr.add_suppression("Master server does not support semi-sync*"); CALL mtr.add_suppression("Semi-sync slave net_flush*"); CALL mtr.add_suppression("Failed to flush master info*"); CALL mtr.add_suppression("Request to stop slave SQL Thread received while apply*"); @@ -196,7 +195,7 @@ Variable_name Value Rpl_semi_sync_master_clients 0 show status like 'Rpl_semi_sync_master_status'; Variable_name Value -Rpl_semi_sync_master_status OFF +Rpl_semi_sync_master_status ON connection slave; START SLAVE IO_THREAD; include/wait_for_slave_io_to_start.inc diff --git a/mysql-test/suite/rpl/r/rpl_session_var.result b/mysql-test/suite/rpl/r/rpl_session_var.result index 67863583f8d..f9794df3be7 100644 --- a/mysql-test/suite/rpl/r/rpl_session_var.result +++ b/mysql-test/suite/rpl/r/rpl_session_var.result @@ -1,5 +1,16 @@ include/master-slave.inc [connection master] +select @@rpl_semi_sync_master_enabled; +@@rpl_semi_sync_master_enabled +0 +connection slave; +select @@rpl_semi_sync_slave_enabled; +@@rpl_semi_sync_slave_enabled +0 +show status like "rpl_semi_sync_slave_status"; +Variable_name Value +Rpl_semi_sync_slave_status OFF +connection master; drop table if exists t1; Warnings: Note 1051 Unknown table 'test.t1' diff --git a/mysql-test/suite/rpl/r/rpl_session_var2.result b/mysql-test/suite/rpl/r/rpl_session_var2.result new file mode 100644 index 00000000000..645eca02492 --- /dev/null +++ b/mysql-test/suite/rpl/r/rpl_session_var2.result @@ -0,0 +1,69 @@ +include/master-slave.inc +[connection master] +select @@rpl_semi_sync_master_enabled; +@@rpl_semi_sync_master_enabled +1 +connection slave; +select @@rpl_semi_sync_slave_enabled; +@@rpl_semi_sync_slave_enabled +1 +show status like "rpl_semi_sync_slave_status"; +Variable_name Value +Rpl_semi_sync_slave_status ON +connection master; +drop table if exists t1; +Warnings: +Note 1051 Unknown table 'test.t1' +create table t1(a varchar(100),b int); +set @@session.sql_mode=pipes_as_concat; +insert into t1 values('My'||'SQL', 1); +set @@session.sql_mode=default; +insert into t1 values('1'||'2', 2); +select * from t1 where b<3 order by a; +a b +1 2 +MySQL 1 +connection slave; +select * from t1 where b<3 order by a; +a b +1 2 +MySQL 1 +connection master; +set @@session.sql_mode=ignore_space; +insert into t1 values(password ('MySQL'), 3); +set @@session.sql_mode=ansi_quotes; +create table "t2" ("a" int); +drop table t1, t2; +set @@session.sql_mode=default; +create table t1(a int auto_increment primary key); +create table t2(b int, a int); +set @@session.sql_auto_is_null=1; +insert into t1 values(null); +insert into t2 select 1,a from t1 where a is null; +set @@session.sql_auto_is_null=0; +insert into t1 values(null); +insert into t2 select 2,a from t1 where a is null; +select * from t2 order by b; +b a +1 1 +connection slave; +select * from t2 order by b; +b a +1 1 +connection master; +drop table t1,t2; +connection slave; +connection master; +CREATE TABLE t1 ( +`id` int(11) NOT NULL auto_increment, +`data` varchar(100), +PRIMARY KEY (`id`) +) ENGINE=MyISAM; +INSERT INTO t1(data) VALUES(SESSION_USER()); +connection slave; +SELECT length(data) < 100 FROM t1; +length(data) < 100 +1 +connection master; +drop table t1; +include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_binlog_dump_slave_gtid_state_info.test b/mysql-test/suite/rpl/t/rpl_binlog_dump_slave_gtid_state_info.test index f26e9565671..942a23576c4 100644 --- a/mysql-test/suite/rpl/t/rpl_binlog_dump_slave_gtid_state_info.test +++ b/mysql-test/suite/rpl/t/rpl_binlog_dump_slave_gtid_state_info.test @@ -59,7 +59,7 @@ if(!$log_error_) --let SEARCH_FILE=$log_error_ --let SEARCH_RANGE=-50000 --let SEARCH_PATTERN=using_gtid\(1\), gtid\(\'\'\).* ---source include/search_pattern_in_file.inc +--source include/wait_for_pattern_in_file.inc --connection slave --source include/stop_slave.inc @@ -71,7 +71,7 @@ CHANGE MASTER TO MASTER_USE_GTID=no; --let SEARCH_FILE=$log_error_ --let SEARCH_RANGE=-50000 --let SEARCH_PATTERN=using_gtid\(0\), gtid\(\'\'\).* ---source include/search_pattern_in_file.inc +--source include/wait_for_pattern_in_file.inc CREATE TABLE t (f INT) ENGINE=INNODB; INSERT INTO t VALUES(10); save_master_pos; @@ -89,7 +89,7 @@ CHANGE MASTER TO MASTER_USE_GTID=slave_pos; --let SEARCH_FILE=$log_error_ --let SEARCH_RANGE=-50000 --let SEARCH_PATTERN=using_gtid\(1\), gtid\(\'0-1-2\'\).* ---source include/search_pattern_in_file.inc +--source include/wait_for_pattern_in_file.inc SET @@SESSION.gtid_domain_id=10; INSERT INTO t VALUES(20); save_master_pos; @@ -107,7 +107,7 @@ CHANGE MASTER TO MASTER_USE_GTID=slave_pos; --let SEARCH_FILE=$log_error_ --let SEARCH_RANGE=-50000 --let SEARCH_PATTERN=using_gtid\(1\), gtid\(\'0-1-2,10-1-1\'\).* ---source include/search_pattern_in_file.inc +--source include/wait_for_pattern_in_file.inc --echo "===== Clean up =====" --connection slave diff --git a/mysql-test/suite/rpl/t/rpl_circular_semi_sync.test b/mysql-test/suite/rpl/t/rpl_circular_semi_sync.test index 267fa621945..e533c54bdc1 100644 --- a/mysql-test/suite/rpl/t/rpl_circular_semi_sync.test +++ b/mysql-test/suite/rpl/t/rpl_circular_semi_sync.test @@ -7,6 +7,9 @@ --source include/have_binlog_format_mixed.inc --source include/master-slave.inc +connection server_2; +call mtr.add_suppression("Timeout waiting for reply of binlog"); + # The following tests prove # A. # no out-of-order gtid error is done to the stict gtid mode semisync @@ -66,10 +69,18 @@ evalp CHANGE MASTER TO master_host='127.0.0.1', master_port=$SERVER_MYPORT_2, ma --connection server_2 set @@global.gtid_strict_mode = true; set @@global.rpl_semi_sync_master_enabled = 1; + +# The following command is likely to cause the slave master is not yet setup +# for semi-sync + INSERT INTO t1(a) VALUES (2); --source include/save_master_gtid.inc --connection server_1 +# Update slave to notice that server_2 now has rpl_semi_sync_master_enabled +--source include/stop_slave.inc +--source include/start_slave.inc + --echo # --echo # the successful sync is a required proof --echo # diff --git a/mysql-test/suite/rpl/t/rpl_delayed_slave.test b/mysql-test/suite/rpl/t/rpl_delayed_slave.test index 7dd7b9cf6d9..b1a5a2eaee2 100644 --- a/mysql-test/suite/rpl/t/rpl_delayed_slave.test +++ b/mysql-test/suite/rpl/t/rpl_delayed_slave.test @@ -189,6 +189,12 @@ eval CHANGE MASTER TO MASTER_DELAY = $time2; --enable_query_log --source include/start_slave.inc +# Ensure that slave has started properly +--connection master +INSERT INTO t1 VALUES ('Syncing slave', 5); +--save_master_pos +--sync_slave_with_master + --connection master INSERT INTO t1 VALUES (delay_on_slave(1), 6); --save_master_pos diff --git a/mysql-test/suite/rpl/t/rpl_mariadb_slave_capability.test b/mysql-test/suite/rpl/t/rpl_mariadb_slave_capability.test index 18228e75e2e..de6af2f7b2e 100644 --- a/mysql-test/suite/rpl/t/rpl_mariadb_slave_capability.test +++ b/mysql-test/suite/rpl/t/rpl_mariadb_slave_capability.test @@ -11,7 +11,7 @@ set @old_master_binlog_checksum= @@global.binlog_checksum; # empty Gtid_list event # # Test this by binlog rotation before we log any GTIDs. -connection slave; +sync_slave_with_master; --source include/stop_slave.inc --echo # Test slave with no capability gets dummy event, which is ignored. set @old_dbug= @@global.debug_dbug; diff --git a/mysql-test/suite/rpl/t/rpl_semi_sync.test b/mysql-test/suite/rpl/t/rpl_semi_sync.test index c3cd918b5fc..12f2d473c2b 100644 --- a/mysql-test/suite/rpl/t/rpl_semi_sync.test +++ b/mysql-test/suite/rpl/t/rpl_semi_sync.test @@ -17,7 +17,6 @@ call mtr.add_suppression("Read semi-sync reply"); call mtr.add_suppression("Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT."); call mtr.add_suppression("mysqld: Got an error reading communication packets"); connection slave; -call mtr.add_suppression("Master server does not support semi-sync"); call mtr.add_suppression("Semi-sync slave .* reply"); call mtr.add_suppression("Slave SQL.*Request to stop slave SQL Thread received while applying a group that has non-transactional changes; waiting for completion of the group"); connection master; @@ -51,7 +50,7 @@ set global rpl_semi_sync_slave_enabled= 0; connection master; -set global rpl_semi_sync_master_timeout= 60000; # 60s +set global rpl_semi_sync_master_timeout= 2000; # 2s echo [ default state of semi-sync on master should be OFF ]; show variables like 'rpl_semi_sync_master_enabled'; @@ -195,12 +194,16 @@ sync_slave_with_master; --echo # Test semi-sync master will switch OFF after one transaction --echo # timeout waiting for slave reply. --echo # + +connection master; +show status like "Rpl_semi_sync_master_status"; + connection slave; source include/stop_slave.inc; connection master; --source include/kill_binlog_dump_threads.inc -set global rpl_semi_sync_master_timeout= 5000; +set global rpl_semi_sync_master_timeout= 2000; # The first semi-sync check should be on because after slave stop, # there are no transactions on the master. @@ -232,8 +235,8 @@ show status like 'Rpl_semi_sync_master_status'; show status like 'Rpl_semi_sync_master_no_tx'; show status like 'Rpl_semi_sync_master_yes_tx'; -# Semi-sync status on master is now OFF, so all these transactions -# will be replicated asynchronously. +# Semi-sync status on master is now ON, but there are no slaves attached, +# so all these transactions will be replicated asynchronously. delete from t1 where a=10; delete from t1 where a=9; delete from t1 where a=8; @@ -367,6 +370,9 @@ let $status_var= Rpl_semi_sync_master_clients; let $status_var_value= 1; source include/wait_for_status_var.inc; +sync_slave_with_master; +connection master; + replace_result $engine_type ENGINE_TYPE; eval create table t1 (a int) engine = $engine_type; insert into t1 values (1); @@ -413,6 +419,10 @@ connection master; let $status_var= Rpl_semi_sync_master_clients; let $status_var_value= 1; source include/wait_for_status_var.inc; + +sync_slave_with_master; +connection master; + echo [ master semi-sync should be ON ]; show status like 'Rpl_semi_sync_master_clients'; show status like 'Rpl_semi_sync_master_status'; diff --git a/mysql-test/suite/rpl/t/rpl_semi_sync_event.test b/mysql-test/suite/rpl/t/rpl_semi_sync_event.test index d4df9b4041b..86e1522e6c4 100644 --- a/mysql-test/suite/rpl/t/rpl_semi_sync_event.test +++ b/mysql-test/suite/rpl/t/rpl_semi_sync_event.test @@ -14,7 +14,6 @@ call mtr.add_suppression("Unsafe statement written to the binary log using state call mtr.add_suppression("mysqld: Got an error reading communication packets"); connection slave; -call mtr.add_suppression("Master server does not support semi-sync"); call mtr.add_suppression("Semi-sync slave .* reply"); call mtr.add_suppression("Slave SQL.*Request to stop slave SQL Thread received while applying a group that has non-transactional changes; waiting for completion of the group"); diff --git a/mysql-test/suite/rpl/t/rpl_semi_sync_fail_over.test b/mysql-test/suite/rpl/t/rpl_semi_sync_fail_over.test index 6a691ae04f6..17d7b50d614 100644 --- a/mysql-test/suite/rpl/t/rpl_semi_sync_fail_over.test +++ b/mysql-test/suite/rpl/t/rpl_semi_sync_fail_over.test @@ -18,6 +18,7 @@ --connection server_1 RESET MASTER; SET @@global.max_binlog_size= 4096; +set @@global.rpl_semi_sync_master_enabled = 1; --connection server_2 RESET MASTER; @@ -29,7 +30,6 @@ CHANGE MASTER TO master_use_gtid= slave_pos; --connection server_1 ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; -set @@global.rpl_semi_sync_master_enabled = 1; set @@global.rpl_semi_sync_master_wait_point=AFTER_SYNC; CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb; diff --git a/mysql-test/suite/rpl/t/rpl_semi_sync_no_missed_ack_after_add_slave.cnf b/mysql-test/suite/rpl/t/rpl_semi_sync_no_missed_ack_after_add_slave.cnf new file mode 100644 index 00000000000..cb7062d5602 --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_semi_sync_no_missed_ack_after_add_slave.cnf @@ -0,0 +1,12 @@ +!include include/default_mysqld.cnf + +[mysqld.1] + +[mysqld.2] + +[mysqld.3] + +[ENV] +SERVER_MYPORT_1= @mysqld.1.port +SERVER_MYPORT_2= @mysqld.2.port +SERVER_MYPORT_3= @mysqld.3.port diff --git a/mysql-test/suite/rpl/t/rpl_semi_sync_no_missed_ack_after_add_slave.test b/mysql-test/suite/rpl/t/rpl_semi_sync_no_missed_ack_after_add_slave.test new file mode 100644 index 00000000000..c8870e47e00 --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_semi_sync_no_missed_ack_after_add_slave.test @@ -0,0 +1,122 @@ +# +# This test ensures that a primary will listen for ACKs by newly added +# semi-sync connections connections, after a pre-existing connection is already +# established. MDEV-32960 reported that the newly added slave's ACK can be +# ignored if listen_on_sockets() does not timeout before +# rpl_semi_sync_master_timeout, and if the existing semi-sync connections fail +# to send ACKs, semi-sync is switched off. +# +# This test ensures this in a two-replica setup with a semi-sync timeout of +# 500ms, and delaying the ACK reply of the first-established replica by 800ms +# to force a timeout, and allowing the second replica to immediately ACK. +# +# References: +# MDEV-32960: Semi-sync ACKed Transaction can Timeout and Switch Off +# Semi-sync with Multiple Replicas +# +--source include/have_debug.inc +# binlog_format independent +--source include/have_binlog_format_statement.inc + +--let $rpl_topology= 1->2,1->3 +--source include/rpl_init.inc + + +--connection server_1 +set @old_enabled= @@global.rpl_semi_sync_master_enabled; +set @old_timeout= @@global.rpl_semi_sync_master_timeout; +set global rpl_semi_sync_master_enabled= 1; +set global rpl_semi_sync_master_timeout= 500; + +--connection server_2 +--source include/stop_slave.inc +set @old_enabled= @@global.rpl_semi_sync_slave_enabled; +set @old_dbug= @@global.debug_dbug; +set global rpl_semi_sync_slave_enabled= 1; +set global debug_dbug="+d,simulate_delay_semisync_slave_reply"; +--source include/start_slave.inc + +--connection server_3 +--source include/stop_slave.inc +set @old_enabled= @@global.rpl_semi_sync_slave_enabled; +set global rpl_semi_sync_slave_enabled= 1; +--source include/start_slave.inc + +--echo # Ensure primary recognizes both replicas are semi-sync +--connection server_1 +--let $status_var_value= 2 +--let $status_var= rpl_semi_sync_master_clients +--source include/wait_for_status_var.inc + +--let $master_ss_status= query_get_value(SHOW STATUS LIKE 'rpl_semi_sync_master_status', Value, 1) +if (`SELECT strcmp("$master_ss_status", "ON") != 0`) +{ + SHOW STATUS LIKE 'rpl_semi_sync_master_status'; + --die rpl_semi_sync_master_status should be ON to start +} + +--connection server_1 +--let $init_master_yes_tx= query_get_value(SHOW STATUS LIKE 'rpl_semi_sync_master_yes_tx', Value, 1) +create table t1 (a int); + +--connection server_2 +--echo # Verifying server_2 did not send ACK +--let $slave1_sent_ack= query_get_value(SHOW STATUS LIKE 'rpl_semi_sync_slave_send_ack', Value, 1) +if (`SELECT $slave1_sent_ack`) +{ + SHOW STATUS LIKE 'rpl_semi_sync_slave_send_ack'; + --die server_2 should not have sent semi-sync ACK to primary +} + +--connection server_3 +--echo # Verifying server_3 did send ACK +--let $slave2_sent_ack= query_get_value(SHOW STATUS LIKE 'rpl_semi_sync_slave_send_ack', Value, 1) +if (`SELECT NOT $slave2_sent_ack`) +{ + SHOW STATUS LIKE 'rpl_semi_sync_slave_send_ack'; + --die server_3 should have sent semi-sync ACK to primary +} + +--connection server_1 +--echo # Verifying master's semi-sync status is still ON (This failed pre-MDEV-32960 fixes) +let $master_ss_status= query_get_value(SHOW STATUS LIKE 'rpl_semi_sync_master_status', Value, 1); +if (`SELECT strcmp("$master_ss_status", "ON") != 0`) +{ + SHOW STATUS LIKE 'rpl_semi_sync_master_status'; + --die rpl_semi_sync_master_status should not have switched off after server_3 ACKed transaction +} + +--echo # Verifying rpl_semi_sync_master_yes_tx incremented +--let $cur_master_yes_tx= query_get_value(SHOW STATUS LIKE 'rpl_semi_sync_master_yes_tx', Value, 1) +if (`SELECT $cur_master_yes_tx != ($init_master_yes_tx + 1)`) +{ + --echo # Initial yes_tx: $init_master_yes_tx + --echo # Current yes_tx: $cur_master_yes_tx + --die rpl_semi_sync_master_yes_tx should have been incremented by primary +} + + +--echo # +--echo # Cleanup + +--connection server_2 +set global rpl_semi_sync_slave_enabled= @old_enabled; +set global debug_dbug= @old_dbug; +--source include/stop_slave.inc + +--connection server_3 +set global rpl_semi_sync_slave_enabled= @old_enabled; +--source include/stop_slave.inc + +--connection server_1 +set global rpl_semi_sync_master_enabled= @old_enabled; +set global rpl_semi_sync_master_timeout= @old_timeout; +drop table t1; + +--connection server_2 +--source include/start_slave.inc +--connection server_3 +--source include/start_slave.inc + +--source include/rpl_end.inc +--echo # End of rpl_semi_sync_no_missed_ack_after_add_slave.test diff --git a/mysql-test/suite/rpl/t/rpl_semi_sync_slave_enabled_consistent.test b/mysql-test/suite/rpl/t/rpl_semi_sync_slave_enabled_consistent.test new file mode 100644 index 00000000000..9e388ab4419 --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_semi_sync_slave_enabled_consistent.test @@ -0,0 +1,73 @@ +# +# MDEV-32551: "Read semi-sync reply magic number error" warnings on master +# +# Test that changing rpl_semi_sync_master_enabled after startup does not +# cause problems with semi-sync cleanup. +# + +--source include/have_debug.inc +--source include/have_debug_sync.inc + +# Test is binlog format independent, so save resources +--source include/have_binlog_format_row.inc +--source include/master-slave.inc + +call mtr.add_suppression("Replication event checksum verification failed"); +call mtr.add_suppression("could not queue event from master"); + +--echo # +--echo # Set up a semisync connection +--connection master +set @@global.rpl_semi_sync_master_enabled= ON; + +--connection slave +stop slave io_thread; +set @@global.rpl_semi_sync_slave_enabled= ON; +set @old_dbug= @@global.debug_dbug; + +# Force an error to abort out of the main IO thread loop +set @@global.debug_dbug= "+d,corrupt_queue_event"; + +# Pause the IO thread as soon as the main loop starts. Note we can't use +# processlist where "Waiting for master to send event" because the +# "corrupt_queue_event" will trigger before we can turn semisync OFF +set @@global.debug_dbug= "+d,pause_before_io_read_event"; + +# Because the other debug_dbug points are automatically negated when they are +# run, and there is a bug that if "-d" takes us to an empty debug string state, +# _all_ debug_print statements are output +set @@global.debug_dbug= "+d,placeholder"; + +start slave io_thread; + +--echo # Disable semi-sync on the slave while the IO thread is active +set debug_sync='now wait_for io_thread_at_read_event'; +set @@global.rpl_semi_sync_slave_enabled= OFF; +set debug_sync='now signal io_thread_continue_read_event'; + +--echo # Waiting for the slave to stop with the error from corrupt_queue_event +--connection slave +--let $slave_io_errno= 1595,1743 +--source include/wait_for_slave_io_error.inc + +--echo # Sleep 1 to give time for Ack_receiver to receive COM_QUIT +--sleep 1 + +--let $assert_text= Check that there is no 'Read semi-sync reply magic number error' in error log. +--let $assert_select=magic number error +--let $assert_file= $MYSQLTEST_VARDIR/log/mysqld.1.err +--let $assert_count= 0 +--let $assert_only_after=CURRENT_TEST +--source include/assert_grep.inc + +--echo # +--echo # Cleanup +--connection slave +--source include/stop_slave.inc +set @@global.debug_dbug= @old_dbug; +--source include/start_slave.inc +--connection master +set @@global.rpl_semi_sync_master_enabled= default; + +--source include/rpl_end.inc +--echo # End of rpl_semi_sync_slave_enabled_consistent.test diff --git a/mysql-test/suite/rpl/t/rpl_semi_sync_slave_reply_fail.test b/mysql-test/suite/rpl/t/rpl_semi_sync_slave_reply_fail.test index 2ebc092e33d..84462ed6426 100644 --- a/mysql-test/suite/rpl/t/rpl_semi_sync_slave_reply_fail.test +++ b/mysql-test/suite/rpl/t/rpl_semi_sync_slave_reply_fail.test @@ -31,6 +31,7 @@ --connection master call mtr.add_suppression("Timeout waiting for reply of binlog*"); +call mtr.add_suppression("Master server does not read semi-sync messages*"); --let $sav_timeout_master=`SELECT @@GLOBAL.rpl_semi_sync_master_timeout` set global rpl_semi_sync_master_enabled = ON; SET @@GLOBAL.rpl_semi_sync_master_timeout=100; @@ -54,9 +55,9 @@ if(!$log_error_) # does not know the location of its .err log, use default location let $log_error_ = $MYSQLTEST_VARDIR/log/mysqld.2.err; } ---echo "Assert that the net_fulsh() reply failed is present in slave error log. +--echo "Assert that Master server does not read semi-sync messages" is present in slave error log. --let SEARCH_FILE=$log_error_ ---let SEARCH_PATTERN=Semi-sync slave net_flush\(\) reply failed +--let SEARCH_PATTERN=Master server does not read semi-sync messages --source include/search_pattern_in_file.inc --echo "Assert that Slave IO thread is up and running." diff --git a/mysql-test/suite/rpl/t/rpl_semisync_ali_issues.test b/mysql-test/suite/rpl/t/rpl_semisync_ali_issues.test index 5e6f350b191..c5c1daa4672 100644 --- a/mysql-test/suite/rpl/t/rpl_semisync_ali_issues.test +++ b/mysql-test/suite/rpl/t/rpl_semisync_ali_issues.test @@ -16,7 +16,6 @@ CALL mtr.add_suppression("Failed on request_dump()*"); CALL mtr.add_suppression("Semi-sync master failed on*"); CALL mtr.add_suppression("Master command COM_BINLOG_DUMP failed*"); CALL mtr.add_suppression("on master failed*"); -CALL mtr.add_suppression("Master server does not support semi-sync*"); CALL mtr.add_suppression("Semi-sync slave net_flush*"); CALL mtr.add_suppression("Failed to flush master info*"); CALL mtr.add_suppression("Request to stop slave SQL Thread received while apply*"); diff --git a/mysql-test/suite/rpl/t/rpl_session_var.test b/mysql-test/suite/rpl/t/rpl_session_var.test index cf3faa6578c..3ea6d5dabae 100644 --- a/mysql-test/suite/rpl/t/rpl_session_var.test +++ b/mysql-test/suite/rpl/t/rpl_session_var.test @@ -7,6 +7,12 @@ disable_query_log; call mtr.add_suppression("Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT"); enable_query_log; +select @@rpl_semi_sync_master_enabled; +connection slave; +select @@rpl_semi_sync_slave_enabled; +show status like "rpl_semi_sync_slave_status"; +connection master; + drop table if exists t1; create table t1(a varchar(100),b int); set @@session.sql_mode=pipes_as_concat; diff --git a/mysql-test/suite/rpl/t/rpl_session_var2-master.opt b/mysql-test/suite/rpl/t/rpl_session_var2-master.opt new file mode 100644 index 00000000000..edb0c915bd6 --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_session_var2-master.opt @@ -0,0 +1 @@ +--rpl_semi_sync_master_enabled=1 --rpl_semi_sync_slave_enabled=1 diff --git a/mysql-test/suite/rpl/t/rpl_session_var2-slave.opt b/mysql-test/suite/rpl/t/rpl_session_var2-slave.opt new file mode 100644 index 00000000000..c9f3082ed07 --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_session_var2-slave.opt @@ -0,0 +1 @@ +--rpl_semi_sync_slave_enabled=1 diff --git a/mysql-test/suite/rpl/t/rpl_session_var2.test b/mysql-test/suite/rpl/t/rpl_session_var2.test new file mode 100644 index 00000000000..cbf8a5cf316 --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_session_var2.test @@ -0,0 +1,3 @@ +# Replication of session variables when semi-sync is on + +--source rpl_session_var.test diff --git a/mysql-test/suite/rpl/t/rpl_shutdown_wait_semisync_slaves.test b/mysql-test/suite/rpl/t/rpl_shutdown_wait_semisync_slaves.test index 2c63df30fde..0547a97f681 100644 --- a/mysql-test/suite/rpl/t/rpl_shutdown_wait_semisync_slaves.test +++ b/mysql-test/suite/rpl/t/rpl_shutdown_wait_semisync_slaves.test @@ -28,6 +28,9 @@ while (`SELECT $i <= $slaves`) --inc $i } +# The following script will restart master and slaves. This will also set +# rpl_semi_sync_master_enabled=0 + --source include/rpl_shutdown_wait_slaves.inc --let i= 2 while (`SELECT $i <= $slaves`) diff --git a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result index 319b86583fc..00c66ddf072 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result +++ b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result @@ -3465,7 +3465,7 @@ COMMAND_LINE_ARGUMENT REQUIRED VARIABLE_NAME RPL_SEMI_SYNC_MASTER_WAIT_NO_SLAVE VARIABLE_SCOPE GLOBAL VARIABLE_TYPE BOOLEAN -VARIABLE_COMMENT Wait until timeout when no semi-synchronous replication slave available (enabled by default). +VARIABLE_COMMENT Wait until timeout when no semi-synchronous replication slave is available. NUMERIC_MIN_VALUE NULL NUMERIC_MAX_VALUE NULL NUMERIC_BLOCK_SIZE NULL diff --git a/sql/log.cc b/sql/log.cc index 518f4df3f94..3ca387d5ab9 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -11011,7 +11011,7 @@ Recovery_context::Recovery_context() : prev_event_pos(0), last_gtid_standalone(false), last_gtid_valid(false), last_gtid_no2pc(false), last_gtid_engines(0), - do_truncate(rpl_semi_sync_slave_enabled), + do_truncate(repl_semisync_slave.get_slave_enabled()), truncate_validated(false), truncate_reset_done(false), truncate_set_in_1st(false), id_binlog(MAX_binlog_id), checksum_alg(BINLOG_CHECKSUM_ALG_UNDEF), gtid_maybe_to_truncate(NULL) diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 8b7888dfb79..2766bc00e3a 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -7404,7 +7404,7 @@ SHOW_VAR status_vars[]= { SHOW_FUNC_ENTRY("Rpl_semi_sync_master_net_avg_wait_time", &SHOW_FNAME(avg_net_wait_time)), {"Rpl_semi_sync_master_request_ack", (char*) &rpl_semi_sync_master_request_ack, SHOW_LONGLONG}, {"Rpl_semi_sync_master_get_ack", (char*)&rpl_semi_sync_master_get_ack, SHOW_LONGLONG}, - {"Rpl_semi_sync_slave_status", (char*) &rpl_semi_sync_slave_status, SHOW_BOOL}, + SHOW_FUNC_ENTRY("Rpl_semi_sync_slave_status", &rpl_semi_sync_enabled), {"Rpl_semi_sync_slave_send_ack", (char*) &rpl_semi_sync_slave_send_ack, SHOW_LONGLONG}, #endif /* HAVE_REPLICATION */ #ifdef HAVE_QUERY_CACHE diff --git a/sql/net_serv.cc b/sql/net_serv.cc index 70e71d9a21b..3dff8442c6a 100644 --- a/sql/net_serv.cc +++ b/sql/net_serv.cc @@ -156,6 +156,7 @@ my_bool my_net_init(NET *net, Vio *vio, void *thd, uint my_flags) net->where_b = net->remain_in_buf=0; net->net_skip_rest_factor= 0; net->last_errno=0; + net->pkt_nr_can_be_reset= 0; net->thread_specific_malloc= MY_TEST(my_flags & MY_THREAD_SPECIFIC); net->thd= 0; #ifdef MYSQL_SERVER @@ -1057,8 +1058,10 @@ retry: { /* Probably in MIT threads */ if (retry_count++ < net->retry_count) continue; - EXTRA_DEBUG_fprintf(stderr, "%s: read looped with error %d, aborting thread\n", - my_progname,vio_errno(net->vio)); + EXTRA_DEBUG_fprintf(stderr, "%s: read looped with error %d on " + "file %lld, aborting thread\n", + my_progname, vio_errno(net->vio), + (longlong) vio_fd(net->vio)); } #ifndef MYSQL_SERVER if (length != 0 && vio_errno(net->vio) == SOCKET_EINTR) @@ -1094,19 +1097,31 @@ retry: #endif if (net->buff[net->where_b + 3] != (uchar) net->pkt_nr) { -#ifndef MYSQL_SERVER - if (net->buff[net->where_b + 3] == (uchar) (net->pkt_nr -1)) + if (net->pkt_nr_can_be_reset) { /* - If the server was killed then the server may have missed the - last sent client packet and the packet numbering may be one off. + We are using a protocol like semi-sync where master and slave + sends packets in parallel. + Copy current one as it can be useful for debugging. */ - DBUG_PRINT("warning", ("Found possible out of order packets")); - expect_error_packet= 1; + net->pkt_nr= net->buff[net->where_b + 3]; } else + { +#ifndef MYSQL_SERVER + if (net->buff[net->where_b + 3] == (uchar) (net->pkt_nr -1)) + { + /* + If the server was killed then the server may have missed the + last sent client packet and the packet numbering may be one off. + */ + DBUG_PRINT("warning", ("Found possible out of order packets")); + expect_error_packet= 1; + } + else #endif - goto packets_out_of_order; + goto packets_out_of_order; + } } net->compress_pkt_nr= ++net->pkt_nr; #ifdef HAVE_COMPRESS diff --git a/sql/rpl_mi.cc b/sql/rpl_mi.cc index 8322bcd3042..73c61af92c9 100644 --- a/sql/rpl_mi.cc +++ b/sql/rpl_mi.cc @@ -44,7 +44,7 @@ Master_info::Master_info(LEX_CSTRING *connection_name_arg, in_start_all_slaves(0), in_stop_all_slaves(0), in_flush_all_relay_logs(0), users(0), killed(0), total_ddl_groups(0), total_non_trans_groups(0), total_trans_groups(0), - do_accept_own_server_id(false) + do_accept_own_server_id(false), semi_sync_reply_enabled(0) { char *tmp; host[0] = 0; user[0] = 0; password[0] = 0; diff --git a/sql/rpl_mi.h b/sql/rpl_mi.h index 92938ac2f94..5857e8e043d 100644 --- a/sql/rpl_mi.h +++ b/sql/rpl_mi.h @@ -376,6 +376,12 @@ class Master_info : public Slave_reporting_capability it must be ignored similarly to the replicate-same-server-id rule. */ bool do_accept_own_server_id; + /* + Set to 1 when semi_sync is enabled. Set to 0 if there is any transmit + problems to the slave, in which case any furter semi-sync reply is + ignored + */ + bool semi_sync_reply_enabled; }; int init_master_info(Master_info* mi, const char* master_info_fname, diff --git a/sql/semisync_master.cc b/sql/semisync_master.cc index 670a6d8d9ed..9f30a8206a4 100644 --- a/sql/semisync_master.cc +++ b/sql/semisync_master.cc @@ -91,7 +91,9 @@ Active_tranx::Active_tranx(mysql_mutex_t *lock, for (int idx = 0; idx < m_num_entries; ++idx) m_trx_htb[idx] = NULL; +#ifdef EXTRA_DEBUG sql_print_information("Semi-sync replication initialized for transactions."); +#endif } Active_tranx::~Active_tranx() @@ -352,8 +354,7 @@ Repl_semi_sync_master::Repl_semi_sync_master() m_state(0), m_wait_point(0) { - strcpy(m_reply_file_name, ""); - strcpy(m_wait_file_name, ""); + m_reply_file_name[0]= m_wait_file_name[0]= 0; } int Repl_semi_sync_master::init_object() @@ -379,20 +380,10 @@ int Repl_semi_sync_master::init_object() { result = enable_master(); if (!result) - { result= ack_receiver.start(); /* Start the ACK thread. */ - /* - If rpl_semi_sync_master_wait_no_slave is disabled, let's temporarily - switch off semisync to avoid hang if there's none active slave. - */ - if (!rpl_semi_sync_master_wait_no_slave) - switch_off(); - } } else - { disable_master(); - } return result; } @@ -441,7 +432,7 @@ void Repl_semi_sync_master::disable_master() */ switch_off(); - assert(m_active_tranxs != NULL); + DBUG_ASSERT(m_active_tranxs != NULL); delete m_active_tranxs; m_active_tranxs = NULL; @@ -450,7 +441,6 @@ void Repl_semi_sync_master::disable_master() m_commit_file_name_inited = false; set_master_enabled(false); - sql_print_information("Semi-sync replication disabled on the master."); } unlock(); @@ -537,31 +527,34 @@ void Repl_semi_sync_master::add_slave() void Repl_semi_sync_master::remove_slave() { lock(); - rpl_semi_sync_master_clients--; - - /* Only switch off if semi-sync is enabled and is on */ - if (get_master_enabled() && is_on()) + if (!(--rpl_semi_sync_master_clients) && !rpl_semi_sync_master_wait_no_slave) { - /* If user has chosen not to wait if no semi-sync slave available - and the last semi-sync slave exits, turn off semi-sync on master - immediately. - */ - if (!rpl_semi_sync_master_wait_no_slave && - rpl_semi_sync_master_clients == 0) - switch_off(); + /* + Signal transactions waiting in commit_trx() that they do not have to + wait anymore. + */ + cond_broadcast(); } unlock(); } + +/* + Check report package + + @retval 0 ok + @retval 1 Error + @retval -1 Slave is going down (ok) +*/ + int Repl_semi_sync_master::report_reply_packet(uint32 server_id, const uchar *packet, ulong packet_len) { - int result= -1; + int result= 1; // Assume error char log_file_name[FN_REFLEN+1]; my_off_t log_file_pos; ulong log_file_len = 0; - DBUG_ENTER("Repl_semi_sync_master::report_reply_packet"); DBUG_EXECUTE_IF("semisync_corrupt_magic", @@ -569,7 +562,14 @@ int Repl_semi_sync_master::report_reply_packet(uint32 server_id, if (unlikely(packet[REPLY_MAGIC_NUM_OFFSET] != Repl_semi_sync_master::k_packet_magic_num)) { - sql_print_error("Read semi-sync reply magic number error"); + if (packet[0] == COM_QUIT && packet_len == 1) + { + /* Slave sent COM_QUIT as part of IO thread going down */ + sql_print_information("slave IO thread has stopped"); + DBUG_RETURN(-1); + } + else + sql_print_error("Read semi-sync reply magic number error"); goto l_end; } @@ -597,14 +597,13 @@ int Repl_semi_sync_master::report_reply_packet(uint32 server_id, rpl_semi_sync_master_get_ack++; report_reply_binlog(server_id, log_file_name, log_file_pos); - result= 0; + DBUG_RETURN(0); l_end: - if (result == -1) { char buf[256]; - octet2hex(buf, (const char*) packet, std::min(static_cast(sizeof(buf)-1), - packet_len)); + octet2hex(buf, (const char*) packet, + MY_MIN(sizeof(buf)-1, (size_t) packet_len)); sql_print_information("First bytes of the packet from semisync slave " "server-id %d: %s", server_id, buf); @@ -668,7 +667,7 @@ int Repl_semi_sync_master::report_reply_binlog(uint32 server_id, m_reply_file_name_inited = true; /* Remove all active transaction nodes before this point. */ - assert(m_active_tranxs != NULL); + DBUG_ASSERT(m_active_tranxs != NULL); m_active_tranxs->clear_active_tranx_nodes(log_file_name, log_file_pos); DBUG_PRINT("semisync", ("%s: Got reply at (%s, %lu)", @@ -809,6 +808,8 @@ int Repl_semi_sync_master::dump_start(THD* thd, (long) thd->variables.server_id, log_file, (ulong) log_pos); + /* Mark that semi-sync net->pkt_nr is not reliable */ + thd->net.pkt_nr_can_be_reset= 1; return 0; } @@ -827,8 +828,15 @@ void Repl_semi_sync_master::dump_end(THD* thd) int Repl_semi_sync_master::commit_trx(const char* trx_wait_binlog_name, my_off_t trx_wait_binlog_pos) { + bool success= 0; DBUG_ENTER("Repl_semi_sync_master::commit_trx"); + if (!rpl_semi_sync_master_clients && !rpl_semi_sync_master_wait_no_slave) + { + rpl_semi_sync_master_no_transactions++; + DBUG_RETURN(0); + } + if (get_master_enabled() && trx_wait_binlog_name) { struct timespec start_ts; @@ -836,7 +844,7 @@ int Repl_semi_sync_master::commit_trx(const char* trx_wait_binlog_name, int wait_result; PSI_stage_info old_stage; THD *thd= current_thd; - + bool aborted= 0; set_timespec(start_ts, 0); DEBUG_SYNC(thd, "rpl_semisync_master_commit_trx_before_lock"); @@ -859,6 +867,13 @@ int Repl_semi_sync_master::commit_trx(const char* trx_wait_binlog_name, while (is_on() && !thd_killed(thd)) { + /* We have to check these again as things may have changed */ + if (!rpl_semi_sync_master_clients && !rpl_semi_sync_master_wait_no_slave) + { + aborted= 1; + break; + } + if (m_reply_file_name_inited) { int cmp = Active_tranx::compare(m_reply_file_name, m_reply_file_pos, @@ -873,6 +888,7 @@ int Repl_semi_sync_master::commit_trx(const char* trx_wait_binlog_name, "Repl_semi_sync_master::commit_trx", m_reply_file_name, (ulong)m_reply_file_pos)); + success= 1; break; } } @@ -973,13 +989,13 @@ int Repl_semi_sync_master::commit_trx(const char* trx_wait_binlog_name, m_active_tranxs may be NULL if someone disabled semi sync during cond_timewait() */ - assert(thd_killed(thd) || !m_active_tranxs || - !m_active_tranxs->is_tranx_end_pos(trx_wait_binlog_name, - trx_wait_binlog_pos)); + DBUG_ASSERT(thd_killed(thd) || !m_active_tranxs || aborted || + !m_active_tranxs->is_tranx_end_pos(trx_wait_binlog_name, + trx_wait_binlog_pos)); l_end: /* Update the status counter. */ - if (is_on()) + if (success) rpl_semi_sync_master_yes_transactions++; else rpl_semi_sync_master_no_transactions++; @@ -1014,18 +1030,20 @@ void Repl_semi_sync_master::switch_off() { DBUG_ENTER("Repl_semi_sync_master::switch_off"); - m_state = false; + if (m_state) + { + m_state = false; - /* Clear the active transaction list. */ - assert(m_active_tranxs != NULL); - m_active_tranxs->clear_active_tranx_nodes(NULL, 0); - - rpl_semi_sync_master_off_times++; - m_wait_file_name_inited = false; - m_reply_file_name_inited = false; - sql_print_information("Semi-sync replication switched OFF."); - cond_broadcast(); /* wake up all waiting threads */ + /* Clear the active transaction list. */ + DBUG_ASSERT(m_active_tranxs != NULL); + m_active_tranxs->clear_active_tranx_nodes(NULL, 0); + rpl_semi_sync_master_off_times++; + m_wait_file_name_inited = false; + m_reply_file_name_inited = false; + sql_print_information("Semi-sync replication switched OFF."); + } + cond_broadcast(); /* wake up all waiting threads */ DBUG_VOID_RETURN; } @@ -1072,9 +1090,10 @@ int Repl_semi_sync_master::reserve_sync_header(String* packet) { DBUG_ENTER("Repl_semi_sync_master::reserve_sync_header"); - /* Set the magic number and the sync status. By default, no sync - * is required. - */ + /* + Set the magic number and the sync status. By default, no sync + is required. + */ packet->append(reinterpret_cast(k_sync_header), sizeof(k_sync_header)); DBUG_RETURN(0); @@ -1087,7 +1106,6 @@ int Repl_semi_sync_master::update_sync_header(THD* thd, unsigned char *packet, { int cmp = 0; bool sync = false; - DBUG_ENTER("Repl_semi_sync_master::update_sync_header"); /* If the semi-sync master is not enabled, or the slave is not a semi-sync @@ -1103,16 +1121,11 @@ int Repl_semi_sync_master::update_sync_header(THD* thd, unsigned char *packet, /* This is the real check inside the mutex. */ if (!get_master_enabled()) - { - assert(sync == false); goto l_end; - } if (is_on()) { /* semi-sync is ON */ - sync = false; /* No sync unless a transaction is involved. */ - if (m_reply_file_name_inited) { cmp = Active_tranx::compare(log_file_name, log_file_pos, @@ -1126,15 +1139,10 @@ int Repl_semi_sync_master::update_sync_header(THD* thd, unsigned char *packet, } } + cmp = 1; if (m_wait_file_name_inited) - { cmp = Active_tranx::compare(log_file_name, log_file_pos, m_wait_file_name, m_wait_file_pos); - } - else - { - cmp = 1; - } /* If we are already waiting for some transaction replies which * are later in binlog, do not wait for this one event. @@ -1144,7 +1152,7 @@ int Repl_semi_sync_master::update_sync_header(THD* thd, unsigned char *packet, /* * We only wait if the event is a transaction's ending event. */ - assert(m_active_tranxs != NULL); + DBUG_ASSERT(m_active_tranxs != NULL); sync = m_active_tranxs->is_tranx_end_pos(log_file_name, log_file_pos); } @@ -1172,13 +1180,12 @@ int Repl_semi_sync_master::update_sync_header(THD* thd, unsigned char *packet, l_end: unlock(); - /* We do not need to clear sync flag because we set it to 0 when we - * reserve the packet header. - */ + /* + We do not need to clear sync flag in packet because we set it to 0 when we + reserve the packet header. + */ if (sync) - { - (packet)[2] = k_packet_flag_sync; - } + packet[2]= k_packet_flag_sync; DBUG_RETURN(0); } @@ -1225,7 +1232,7 @@ int Repl_semi_sync_master::write_tranx_in_binlog(const char* log_file_name, if (is_on()) { - assert(m_active_tranxs != NULL); + DBUG_ASSERT(m_active_tranxs != NULL); if(m_active_tranxs->insert_tranx_node(log_file_name, log_file_pos)) { /* @@ -1256,7 +1263,7 @@ int Repl_semi_sync_master::flush_net(THD *thd, DBUG_ENTER("Repl_semi_sync_master::flush_net"); - assert((unsigned char)event_buf[1] == k_packet_magic_num); + DBUG_ASSERT((unsigned char)event_buf[1] == k_packet_magic_num); if ((unsigned char)event_buf[2] != k_packet_flag_sync) { /* current event does not require reply */ @@ -1274,6 +1281,11 @@ int Repl_semi_sync_master::flush_net(THD *thd, goto l_end; } + /* + We have to do a net_clear() as with semi-sync the slave_reply's are + interleaved with data from the master and then the net->pkt_nr + cannot be kept in sync. Better to start pkt_nr from 0 again. + */ net_clear(net, 0); net->pkt_nr++; net->compress_pkt_nr++; @@ -1300,11 +1312,7 @@ int Repl_semi_sync_master::after_reset_master() lock(); - if (rpl_semi_sync_master_clients == 0 && - !rpl_semi_sync_master_wait_no_slave) - m_state = 0; - else - m_state = get_master_enabled()? 1 : 0; + m_state = get_master_enabled() ? 1 : 0; m_wait_file_name_inited = false; m_reply_file_name_inited = false; @@ -1338,18 +1346,6 @@ int Repl_semi_sync_master::before_reset_master() DBUG_RETURN(result); } -void Repl_semi_sync_master::check_and_switch() -{ - lock(); - if (get_master_enabled() && is_on()) - { - if (!rpl_semi_sync_master_wait_no_slave - && rpl_semi_sync_master_clients == 0) - switch_off(); - } - unlock(); -} - void Repl_semi_sync_master::set_export_stats() { lock(); @@ -1363,7 +1359,6 @@ void Repl_semi_sync_master::set_export_stats() ((rpl_semi_sync_master_net_wait_num) ? (ulong)((double)rpl_semi_sync_master_net_wait_time / ((double)rpl_semi_sync_master_net_wait_num)) : 0); - unlock(); } diff --git a/sql/semisync_master.h b/sql/semisync_master.h index 5451ad512c6..99f46869354 100644 --- a/sql/semisync_master.h +++ b/sql/semisync_master.h @@ -633,8 +633,6 @@ class Repl_semi_sync_master /*called before reset master*/ int before_reset_master(); - void check_and_switch(); - /* Determines if the given thread is currently awaiting a semisync_ack. Note that the thread's value is protected by this class's LOCK_binlog, so this diff --git a/sql/semisync_master_ack_receiver.cc b/sql/semisync_master_ack_receiver.cc index a76e3447ac8..6453b7ef32a 100644 --- a/sql/semisync_master_ack_receiver.cc +++ b/sql/semisync_master_ack_receiver.cc @@ -24,7 +24,8 @@ extern PSI_cond_key key_COND_ack_receiver; #ifdef HAVE_PSI_THREAD_INTERFACE extern PSI_thread_key key_thread_ack_receiver; #endif -extern Repl_semi_sync_master repl_semisync; + +int global_ack_signal_fd= -1; /* Callback function of ack receive thread */ pthread_handler_t ack_receive_handler(void *arg) @@ -45,6 +46,7 @@ Ack_receiver::Ack_receiver() m_status= ST_DOWN; mysql_mutex_init(key_LOCK_ack_receiver, &m_mutex, NULL); mysql_cond_init(key_COND_ack_receiver, &m_cond, NULL); + mysql_cond_init(key_COND_ack_receiver, &m_cond_reply, NULL); m_pid= 0; DBUG_VOID_RETURN; @@ -57,6 +59,7 @@ void Ack_receiver::cleanup() stop(); mysql_mutex_destroy(&m_mutex); mysql_cond_destroy(&m_cond); + mysql_cond_destroy(&m_cond_reply); DBUG_VOID_RETURN; } @@ -104,6 +107,7 @@ void Ack_receiver::stop() if (m_status == ST_UP) { m_status= ST_STOPPING; + signal_listener(); // Signal listener thread to stop mysql_cond_broadcast(&m_cond); while (m_status == ST_STOPPING) @@ -118,6 +122,21 @@ void Ack_receiver::stop() DBUG_VOID_RETURN; } +#ifndef DBUG_OFF +void static dbug_verify_no_duplicate_slaves(Slave_ilist *m_slaves, THD *thd) +{ + I_List_iterator it(*m_slaves); + Slave *slave; + while ((slave= it++)) + { + DBUG_ASSERT(slave->thd->variables.server_id != thd->variables.server_id); + } +} +#else +#define dbug_verify_no_duplicate_slaves(A,B) do {} while(0) +#endif + + bool Ack_receiver::add_slave(THD *thd) { Slave *slave; @@ -126,17 +145,23 @@ bool Ack_receiver::add_slave(THD *thd) if (!(slave= new Slave)) DBUG_RETURN(true); + slave->active= 0; slave->thd= thd; slave->vio= *thd->net.vio; slave->vio.mysql_socket.m_psi= NULL; slave->vio.read_timeout= 1; mysql_mutex_lock(&m_mutex); + + dbug_verify_no_duplicate_slaves(&m_slaves, thd); + m_slaves.push_back(slave); m_slaves_changed= true; mysql_cond_broadcast(&m_cond); mysql_mutex_unlock(&m_mutex); + signal_listener(); // Inform listener that there are new slaves + DBUG_RETURN(false); } @@ -144,6 +169,7 @@ void Ack_receiver::remove_slave(THD *thd) { I_List_iterator it(m_slaves); Slave *slave; + bool slaves_changed= 0; DBUG_ENTER("Ack_receiver::remove_slave"); mysql_mutex_lock(&m_mutex); @@ -153,10 +179,23 @@ void Ack_receiver::remove_slave(THD *thd) if (slave->thd == thd) { delete slave; - m_slaves_changed= true; + slaves_changed= true; break; } } + if (slaves_changed) + { + m_slaves_changed= true; + mysql_cond_broadcast(&m_cond); + /* + Wait until Ack_receiver::run() acknowledges remove of slave + As this is only sent under the mutex and after listners has + been collected, we know that listener has ignored the found + slave. + */ + if (m_status != ST_DOWN) + mysql_cond_wait(&m_cond_reply, &m_mutex); + } mysql_mutex_unlock(&m_mutex); DBUG_VOID_RETURN; @@ -167,10 +206,15 @@ inline void Ack_receiver::set_stage_info(const PSI_stage_info &stage) (void)MYSQL_SET_STAGE(stage.m_key, __FILE__, __LINE__); } -inline void Ack_receiver::wait_for_slave_connection() +void Ack_receiver::wait_for_slave_connection(THD *thd) { - set_stage_info(stage_waiting_for_semi_sync_slave); - mysql_cond_wait(&m_cond, &m_mutex); + thd->enter_cond(&m_cond, &m_mutex, &stage_waiting_for_semi_sync_slave, + 0, __func__, __FILE__, __LINE__); + + while (m_status == ST_UP && m_slaves.is_empty()) + mysql_cond_wait(&m_cond, &m_mutex); + + thd->exit_cond(0, __func__, __FILE__, __LINE__); } /* Auxilary function to initialize a NET object with given net buffer. */ @@ -188,11 +232,10 @@ void Ack_receiver::run() THD *thd= new THD(next_thread_id()); NET net; unsigned char net_buff[REPLY_MESSAGE_MAX_LENGTH]; + DBUG_ENTER("Ack_receiver::run"); my_thread_init(); - DBUG_ENTER("Ack_receiver::run"); - #ifdef HAVE_POLL Poll_socket_listener listener(m_slaves); #else @@ -207,64 +250,76 @@ void Ack_receiver::run() thd->set_command(COM_DAEMON); init_net(&net, net_buff, REPLY_MESSAGE_MAX_LENGTH); - mysql_mutex_lock(&m_mutex); + /* + Mark that we have to setup the listener. Note that only this functions can + set m_slaves_changed to false + */ m_slaves_changed= true; - mysql_mutex_unlock(&m_mutex); while (1) { - int ret; - uint slave_count __attribute__((unused))= 0; + int ret, slave_count; Slave *slave; mysql_mutex_lock(&m_mutex); - if (unlikely(m_status == ST_STOPPING)) + if (unlikely(m_status != ST_UP)) goto end; - set_stage_info(stage_waiting_for_semi_sync_ack_from_slave); if (unlikely(m_slaves_changed)) { if (unlikely(m_slaves.is_empty())) { - wait_for_slave_connection(); - mysql_mutex_unlock(&m_mutex); + m_slaves_changed= false; + mysql_cond_broadcast(&m_cond_reply); // Signal remove_slave + wait_for_slave_connection(thd); + /* Wait for slave unlocks m_mutex */ continue; } + set_stage_info(stage_waiting_for_semi_sync_ack_from_slave); if ((slave_count= listener.init_slave_sockets()) == 0) + { + mysql_mutex_unlock(&m_mutex); + m_slaves_changed= true; + continue; // Retry + } + if (slave_count < 0) goto end; m_slaves_changed= false; + mysql_cond_broadcast(&m_cond_reply); // Signal remove_slave + } + #ifdef HAVE_POLL DBUG_PRINT("info", ("fd count %u", slave_count)); #else DBUG_PRINT("info", ("fd count %u, max_fd %d", slave_count, (int) listener.get_max_fd())); #endif - } + mysql_mutex_unlock(&m_mutex); ret= listener.listen_on_sockets(); + if (ret <= 0) { - mysql_mutex_unlock(&m_mutex); - ret= DBUG_EVALUATE_IF("rpl_semisync_simulate_select_error", -1, ret); if (ret == -1 && errno != EINTR) sql_print_information("Failed to wait on semi-sync sockets, " "error: errno=%d", socket_errno); - /* Sleep 1us, so other threads can catch the m_mutex easily. */ - my_sleep(1); continue; } + listener.clear_signal(); + mysql_mutex_lock(&m_mutex); set_stage_info(stage_reading_semi_sync_ack); Slave_ilist_iterator it(m_slaves); while ((slave= it++)) { - if (listener.is_socket_active(slave)) + if (slave->active) // Set in init_slave_sockets() { ulong len; + /* Semi-sync packets will always be sent with pkt_nr == 1 */ net_clear(&net, 0); net.vio= &slave->vio; /* @@ -275,29 +330,40 @@ void Ack_receiver::run() len= my_net_read(&net); if (likely(len != packet_error)) - repl_semisync_master.report_reply_packet(slave->server_id(), - net.read_pos, len); - else { - if (net.last_errno == ER_NET_READ_ERROR) + int res; + res= repl_semisync_master.report_reply_packet(slave->server_id(), + net.read_pos, len); + if (unlikely(res < 0)) { - listener.clear_socket_info(slave); + /* + Slave has sent COM_QUIT or other failure. + Delete it from listener + */ + it.remove(); } + } + else if (net.last_errno == ER_NET_READ_ERROR) + { if (net.last_errno > 0 && global_system_variables.log_warnings > 2) sql_print_warning("Semisync ack receiver got error %d \"%s\" " "from slave server-id %d", net.last_errno, ER_DEFAULT(net.last_errno), slave->server_id()); + it.remove(); } } } mysql_mutex_unlock(&m_mutex); } + end: sql_print_information("Stopping ack receiver thread"); m_status= ST_DOWN; - delete thd; mysql_cond_broadcast(&m_cond); + mysql_cond_broadcast(&m_cond_reply); mysql_mutex_unlock(&m_mutex); + + delete thd; DBUG_VOID_RETURN; } diff --git a/sql/semisync_master_ack_receiver.h b/sql/semisync_master_ack_receiver.h index d869bd2e6d4..d3d9de27b89 100644 --- a/sql/semisync_master_ack_receiver.h +++ b/sql/semisync_master_ack_receiver.h @@ -29,6 +29,7 @@ struct Slave :public ilink #ifdef HAVE_POLL uint m_fds_index; #endif + bool active; my_socket sock_fd() const { return vio.mysql_socket.fd; } uint server_id() const { return thd->variables.server_id; } }; @@ -46,6 +47,7 @@ typedef I_List_iterator Slave_ilist_iterator; add_slave: maintain a new semisync slave's information remove_slave: remove a semisync slave's information */ + class Ack_receiver : public Repl_semi_sync_base { public: @@ -96,15 +98,20 @@ public: { m_trace_level= trace_level; } + bool running() + { + return m_status != ST_DOWN; + } + private: enum status {ST_UP, ST_DOWN, ST_STOPPING}; - uint8 m_status; + enum status m_status; /* Protect m_status, m_slaves_changed and m_slaves. ack thread and other session may access the variables at the same time. */ mysql_mutex_t m_mutex; - mysql_cond_t m_cond; + mysql_cond_t m_cond, m_cond_reply; /* If slave list is updated(add or remove). */ bool m_slaves_changed; @@ -116,25 +123,73 @@ private: Ack_receiver& operator=(const Ack_receiver &ack_receiver); void set_stage_info(const PSI_stage_info &stage); - void wait_for_slave_connection(); + void wait_for_slave_connection(THD *thd); }; -#ifdef HAVE_POLL -#include -#include +extern int global_ack_signal_fd; -class Poll_socket_listener +class Ack_listener { public: - Poll_socket_listener(const Slave_ilist &slaves) + int local_read_signal; + const Slave_ilist &m_slaves; + + Ack_listener(const Slave_ilist &slaves) :m_slaves(slaves) { + int pipes[2]; + pipe(pipes); + global_ack_signal_fd= pipes[1]; + local_read_signal= pipes[0]; + fcntl(pipes[0], F_SETFL, O_NONBLOCK); + fcntl(pipes[1], F_SETFL, O_NONBLOCK); } + virtual ~Ack_listener() + { + close(global_ack_signal_fd); + close(local_read_signal); + global_ack_signal_fd= -1; + } + + virtual bool has_signal_data()= 0; + + /* Clear data sent by signal_listener() to abort read */ + void clear_signal() + { + if (has_signal_data()) + { + char buff[100]; + /* Clear the signal message */ + read(local_read_signal, buff, sizeof(buff)); + } + } +}; + +static inline void signal_listener() +{ + my_write(global_ack_signal_fd, (uchar*) "a", 1, MYF(0)); +} + +#ifdef HAVE_POLL +#include + +class Poll_socket_listener final : public Ack_listener +{ +private: + std::vector m_fds; + +public: + Poll_socket_listener(const Slave_ilist &slaves) + :Ack_listener(slaves) + {} + + virtual ~Poll_socket_listener() = default; + bool listen_on_sockets() { - return poll(m_fds.data(), m_fds.size(), 1000 /*1 Second timeout*/); + return poll(m_fds.data(), m_fds.size(), -1); } bool is_socket_active(const Slave *slave) @@ -148,15 +203,29 @@ public: m_fds[slave->m_fds_index].events= 0; } - uint init_slave_sockets() + bool has_signal_data() override + { + /* The signal fd is always first */ + return (m_fds[0].revents & POLLIN); + } + + int init_slave_sockets() { Slave_ilist_iterator it(const_cast(m_slaves)); Slave *slave; uint fds_index= 0; + pollfd poll_fd; m_fds.clear(); + /* First put in the signal socket */ + poll_fd.fd= local_read_signal; + poll_fd.events= POLLIN; + m_fds.push_back(poll_fd); + fds_index++; + while ((slave= it++)) { + slave->active= 1; pollfd poll_fd; poll_fd.fd= slave->sock_fd(); poll_fd.events= POLLIN; @@ -165,29 +234,30 @@ public: } return fds_index; } - -private: - const Slave_ilist &m_slaves; - std::vector m_fds; }; #else //NO POLL -class Select_socket_listener +class Select_socket_listener final : public Ack_listener { +private: + my_socket m_max_fd; + fd_set m_init_fds; + fd_set m_fds; + public: Select_socket_listener(const Slave_ilist &slaves) - :m_slaves(slaves), m_max_fd(INVALID_SOCKET) - { - } + :Ack_listener(slaves), m_max_fd(INVALID_SOCKET) + {} + + virtual ~Select_socket_listener() = default; bool listen_on_sockets() { /* Reinitialize the fds with active fds before calling select */ m_fds= m_init_fds; - struct timeval tv= {1,0}; /* select requires max fd + 1 for the first argument */ - return select((int) m_max_fd+1, &m_fds, NULL, NULL, &tv); + return select((int) m_max_fd+1, &m_fds, NULL, NULL, NULL); } bool is_socket_active(const Slave *slave) @@ -195,43 +265,61 @@ public: return FD_ISSET(slave->sock_fd(), &m_fds); } + bool has_signal_data() override + { + return FD_ISSET(local_read_signal, &m_fds); + } + void clear_socket_info(const Slave *slave) { FD_CLR(slave->sock_fd(), &m_init_fds); } - uint init_slave_sockets() + int init_slave_sockets() { Slave_ilist_iterator it(const_cast(m_slaves)); Slave *slave; uint fds_index= 0; FD_ZERO(&m_init_fds); + m_max_fd= -1; + + /* First put in the signal socket */ + FD_SET(local_read_signal, &m_init_fds); + fds_index++; + set_if_bigger(m_max_fd, local_read_signal); +#ifndef _WIN32 + if (local_read_signal > FD_SETSIZE) + { + int socket_id= local_read_signal; + sql_print_error("Semisync slave socket fd is %u. " + "select() cannot handle if the socket fd is " + "greater than %u (FD_SETSIZE).", socket_id, FD_SETSIZE); + return -1; + } +#endif + while ((slave= it++)) { my_socket socket_id= slave->sock_fd(); - m_max_fd= (socket_id > m_max_fd ? socket_id : m_max_fd); + set_if_bigger(m_max_fd, socket_id); #ifndef _WIN32 if (socket_id > FD_SETSIZE) { sql_print_error("Semisync slave socket fd is %u. " "select() cannot handle if the socket fd is " "greater than %u (FD_SETSIZE).", socket_id, FD_SETSIZE); - return 0; + it.remove(); + continue; } #endif //_WIN32 FD_SET(socket_id, &m_init_fds); fds_index++; + slave->active= 1; } return fds_index; } my_socket get_max_fd() { return m_max_fd; } - -private: - const Slave_ilist &m_slaves; - my_socket m_max_fd; - fd_set m_init_fds; - fd_set m_fds; }; #endif //HAVE_POLL diff --git a/sql/semisync_slave.cc b/sql/semisync_slave.cc index 3e7578b6a53..a56e22eab3e 100644 --- a/sql/semisync_slave.cc +++ b/sql/semisync_slave.cc @@ -20,20 +20,9 @@ Repl_semi_sync_slave repl_semisync_slave; -my_bool rpl_semi_sync_slave_enabled= 0; - +my_bool global_rpl_semi_sync_slave_enabled= 0; char rpl_semi_sync_slave_delay_master; -my_bool rpl_semi_sync_slave_status= 0; ulong rpl_semi_sync_slave_trace_level; - -/* - indicate whether or not the slave should send a reply to the master. - - This is set to true in repl_semi_slave_read_event if the current - event read is the last event of a transaction. And the value is - checked in repl_semi_slave_queue_event. -*/ -bool semi_sync_need_reply= false; unsigned int rpl_semi_sync_slave_kill_conn_timeout; unsigned long long rpl_semi_sync_slave_send_ack = 0; @@ -44,14 +33,26 @@ int Repl_semi_sync_slave::init_object() m_init_done = true; /* References to the parameter works after set_options(). */ - set_slave_enabled(rpl_semi_sync_slave_enabled); + set_slave_enabled(global_rpl_semi_sync_slave_enabled); set_trace_level(rpl_semi_sync_slave_trace_level); set_delay_master(rpl_semi_sync_slave_delay_master); set_kill_conn_timeout(rpl_semi_sync_slave_kill_conn_timeout); - return result; } +static bool local_semi_sync_enabled; + +int rpl_semi_sync_enabled(THD *thd, SHOW_VAR *var, void *buff, + system_status_var *status_var, + enum_var_type scope) +{ + local_semi_sync_enabled= repl_semisync_slave.get_slave_enabled(); + var->type= SHOW_BOOL; + var->value= (char*) &local_semi_sync_enabled; + return 0; +} + + int Repl_semi_sync_slave::slave_read_sync_header(const uchar *header, unsigned long total_len, int *semi_flags, @@ -61,12 +62,12 @@ int Repl_semi_sync_slave::slave_read_sync_header(const uchar *header, int read_res = 0; DBUG_ENTER("Repl_semi_sync_slave::slave_read_sync_header"); - if (rpl_semi_sync_slave_status) + if (get_slave_enabled()) { if (DBUG_EVALUATE_IF("semislave_corrupt_log", 0, 1) && header[0] == k_packet_magic_num) { - semi_sync_need_reply = (header[1] & k_packet_flag_sync); + bool semi_sync_need_reply = (header[1] & k_packet_flag_sync); *payload_len = total_len - 2; *payload = header + 2; @@ -85,7 +86,9 @@ int Repl_semi_sync_slave::slave_read_sync_header(const uchar *header, "len: %lu", total_len); read_res = -1; } - } else { + } + else + { *payload= header; *payload_len= total_len; } @@ -93,9 +96,23 @@ int Repl_semi_sync_slave::slave_read_sync_header(const uchar *header, DBUG_RETURN(read_res); } -int Repl_semi_sync_slave::slave_start(Master_info *mi) +/* + Set default semisync variables and print some replication info to the log + + Note that the main setup is done in request_transmit() +*/ + +void Repl_semi_sync_slave::slave_start(Master_info *mi) { - bool semi_sync= get_slave_enabled(); + + /* + Set semi_sync_enabled at slave start. This is not changed until next + slave start or reconnect. + */ + bool semi_sync= global_rpl_semi_sync_slave_enabled; + + set_slave_enabled(semi_sync); + mi->semi_sync_reply_enabled= 0; sql_print_information("Slave I/O thread: Start %s replication to\ master '%s@%s:%d' in log '%s' at position %lu", @@ -104,30 +121,29 @@ int Repl_semi_sync_slave::slave_start(Master_info *mi) const_cast(mi->master_log_name), (unsigned long)(mi->master_log_pos)); - if (semi_sync && !rpl_semi_sync_slave_status) - rpl_semi_sync_slave_status= 1; - /*clear the counter*/ rpl_semi_sync_slave_send_ack= 0; - return 0; } -int Repl_semi_sync_slave::slave_stop(Master_info *mi) +void Repl_semi_sync_slave::slave_stop(Master_info *mi) { if (get_slave_enabled()) kill_connection(mi->mysql); - if (rpl_semi_sync_slave_status) - rpl_semi_sync_slave_status= 0; - - return 0; + set_slave_enabled(0); } -int Repl_semi_sync_slave::reset_slave(Master_info *mi) +void Repl_semi_sync_slave::slave_reconnect(Master_info *mi) { - return 0; + /* + Start semi-sync either if it globally enabled or if was enabled + before the reconnect. + */ + if (global_rpl_semi_sync_slave_enabled || get_slave_enabled()) + slave_start(mi); } + void Repl_semi_sync_slave::kill_connection(MYSQL *mysql) { if (!mysql) @@ -194,34 +210,44 @@ int Repl_semi_sync_slave::request_transmit(Master_info *mi) !(res= mysql_store_result(mysql))) { sql_print_error("Execution failed on master: %s, error :%s", query, mysql_error(mysql)); + set_slave_enabled(0); return 1; } row= mysql_fetch_row(res); if (DBUG_EVALUATE_IF("master_not_support_semisync", 1, 0) - || !row) + || (!row || ! row[1])) { /* Master does not support semi-sync */ - sql_print_warning("Master server does not support semi-sync, " - "fallback to asynchronous replication"); - rpl_semi_sync_slave_status= 0; + if (!row) + sql_print_warning("Master server does not support semi-sync, " + "fallback to asynchronous replication"); + set_slave_enabled(0); mysql_free_result(res); return 0; } + if (strcmp(row[1], "ON")) + sql_print_information("Slave has semi-sync enabled but master server does " + "not. Semi-sync will be activated when master " + "enables it"); mysql_free_result(res); /* Tell master dump thread that we want to do semi-sync - replication + replication. This is done by setting a thread local variable in + the master connection. */ query= "SET @rpl_semi_sync_slave= 1"; if (mysql_real_query(mysql, query, (ulong)strlen(query))) { - sql_print_error("Set 'rpl_semi_sync_slave=1' on master failed"); + sql_print_error("%s on master failed", query); + set_slave_enabled(0); return 1; } + mi->semi_sync_reply_enabled= 1; + /* Inform net_server that pkt_nr can come out of order */ + mi->mysql->net.pkt_nr_can_be_reset= 1; mysql_free_result(mysql_store_result(mysql)); - rpl_semi_sync_slave_status= 1; return 0; } @@ -231,46 +257,41 @@ int Repl_semi_sync_slave::slave_reply(Master_info *mi) MYSQL* mysql= mi->mysql; const char *binlog_filename= const_cast(mi->master_log_name); my_off_t binlog_filepos= mi->master_log_pos; - NET *net= &mysql->net; uchar reply_buffer[REPLY_MAGIC_NUM_LEN + REPLY_BINLOG_POS_LEN + REPLY_BINLOG_NAME_LEN]; int reply_res = 0; size_t name_len = strlen(binlog_filename); - DBUG_ENTER("Repl_semi_sync_slave::slave_reply"); + DBUG_ASSERT(get_slave_enabled() && mi->semi_sync_reply_enabled); - if (rpl_semi_sync_slave_status && semi_sync_need_reply) + /* Prepare the buffer of the reply. */ + reply_buffer[REPLY_MAGIC_NUM_OFFSET] = k_packet_magic_num; + int8store(reply_buffer + REPLY_BINLOG_POS_OFFSET, binlog_filepos); + memcpy(reply_buffer + REPLY_BINLOG_NAME_OFFSET, + binlog_filename, + name_len + 1 /* including trailing '\0' */); + + DBUG_PRINT("semisync", ("%s: reply (%s, %lu)", + "Repl_semi_sync_slave::slave_reply", + binlog_filename, (ulong)binlog_filepos)); + + /* + We have to do a net_clear() as with semi-sync the slave_reply's are + interleaved with data from the master and then the net->pkt_nr + cannot be kept in sync. Better to start pkt_nr from 0 again. + */ + net_clear(net, 0); + /* Send the reply. */ + reply_res = my_net_write(net, reply_buffer, + name_len + REPLY_BINLOG_NAME_OFFSET); + if (!reply_res) { - /* Prepare the buffer of the reply. */ - reply_buffer[REPLY_MAGIC_NUM_OFFSET] = k_packet_magic_num; - int8store(reply_buffer + REPLY_BINLOG_POS_OFFSET, binlog_filepos); - memcpy(reply_buffer + REPLY_BINLOG_NAME_OFFSET, - binlog_filename, - name_len + 1 /* including trailing '\0' */); - - DBUG_PRINT("semisync", ("%s: reply (%s, %lu)", - "Repl_semi_sync_slave::slave_reply", - binlog_filename, (ulong)binlog_filepos)); - - net_clear(net, 0); - /* Send the reply. */ - reply_res = my_net_write(net, reply_buffer, - name_len + REPLY_BINLOG_NAME_OFFSET); + reply_res= DBUG_EVALUATE_IF("semislave_failed_net_flush", 1, + net_flush(net)); if (!reply_res) - { - reply_res = DBUG_EVALUATE_IF("semislave_failed_net_flush", 1, net_flush(net)); - if (reply_res) - sql_print_error("Semi-sync slave net_flush() reply failed"); rpl_semi_sync_slave_send_ack++; - } - else - { - sql_print_error("Semi-sync slave send reply failed: %s (%d)", - net->last_error, net->last_errno); - } } - DBUG_RETURN(reply_res); } diff --git a/sql/semisync_slave.h b/sql/semisync_slave.h index a8229245ab1..6811584c9c8 100644 --- a/sql/semisync_slave.h +++ b/sql/semisync_slave.h @@ -33,7 +33,7 @@ class Master_info; class Repl_semi_sync_slave :public Repl_semi_sync_base { public: - Repl_semi_sync_slave() :m_slave_enabled(false) {} + Repl_semi_sync_slave() :m_slave_enabled(false) {} ~Repl_semi_sync_slave() = default; void set_trace_level(unsigned long trace_level) { @@ -45,7 +45,7 @@ public: */ int init_object(); - bool get_slave_enabled() { + inline bool get_slave_enabled() { return m_slave_enabled; } @@ -53,7 +53,7 @@ public: m_slave_enabled = enabled; } - bool is_delay_master(){ + inline bool is_delay_master(){ return m_delay_master; } @@ -88,24 +88,23 @@ public: * binlog position. */ int slave_reply(Master_info* mi); - int slave_start(Master_info *mi); - int slave_stop(Master_info *mi); - int request_transmit(Master_info*); + void slave_start(Master_info *mi); + void slave_stop(Master_info *mi); + void slave_reconnect(Master_info *mi); + int request_transmit(Master_info *mi); void kill_connection(MYSQL *mysql); - int reset_slave(Master_info *mi); private: /* True when init_object has been called */ bool m_init_done; - bool m_slave_enabled; /* semi-sycn is enabled on the slave */ + bool m_slave_enabled; /* semi-sync is enabled on the slave */ bool m_delay_master; unsigned int m_kill_conn_timeout; }; /* System and status variables for the slave component */ -extern my_bool rpl_semi_sync_slave_enabled; -extern my_bool rpl_semi_sync_slave_status; +extern my_bool global_rpl_semi_sync_slave_enabled; extern ulong rpl_semi_sync_slave_trace_level; extern Repl_semi_sync_slave repl_semisync_slave; @@ -113,4 +112,7 @@ extern char rpl_semi_sync_slave_delay_master; extern unsigned int rpl_semi_sync_slave_kill_conn_timeout; extern unsigned long long rpl_semi_sync_slave_send_ack; +extern int rpl_semi_sync_enabled(THD *thd, SHOW_VAR *var, void *buff, + system_status_var *status_var, + enum_var_type scope); #endif /* SEMISYNC_SLAVE_H */ diff --git a/sql/slave.cc b/sql/slave.cc index e745749d64d..3318c92b317 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -4659,6 +4659,7 @@ static int try_to_reconnect(THD *thd, MYSQL *mysql, Master_info *mi, sql_print_information("%s", messages[SLAVE_RECON_MSG_KILLED_AFTER]); return 1; } + repl_semisync_slave.slave_reconnect(mi); return 0; } @@ -4747,14 +4748,7 @@ pthread_handler_t handle_slave_io(void *arg) } thd->variables.wsrep_on= 0; - if (DBUG_EVALUATE_IF("failed_slave_start", 1, 0) - || repl_semisync_slave.slave_start(mi)) - { - mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, NULL, - ER_THD(thd, ER_SLAVE_FATAL_ERROR), - "Failed to run 'thread_start' hook"); - goto err; - } + repl_semisync_slave.slave_start(mi); if (!(mi->mysql = mysql = mysql_init(NULL))) { @@ -4848,6 +4842,7 @@ connected: if (try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings, reconnect_messages[SLAVE_RECON_ACT_REG])) goto err; + goto connected; } @@ -4911,6 +4906,15 @@ connected: we're in fact receiving nothing. */ THD_STAGE_INFO(thd, stage_waiting_for_master_to_send_event); + +#ifdef ENABLED_DEBUG_SYNC + DBUG_EXECUTE_IF("pause_before_io_read_event", + { + DBUG_ASSERT(!debug_sync_set_action( thd, STRING_WITH_LEN( + "now signal io_thread_at_read_event wait_for io_thread_continue_read_event"))); + DBUG_SET("-d,pause_before_io_read_event"); + };); +#endif event_len= read_event(mysql, mi, &suppress_warnings, &network_read_len); if (check_io_slave_killed(mi, NullS)) goto err; @@ -5010,17 +5014,36 @@ Stopping slave I/O thread due to out-of-memory error from master"); goto err; } - if (rpl_semi_sync_slave_status && (mi->semi_ack & SEMI_SYNC_NEED_ACK)) + if (repl_semisync_slave.get_slave_enabled() && + mi->semi_sync_reply_enabled && + (mi->semi_ack & SEMI_SYNC_NEED_ACK)) { - /* - We deliberately ignore the error in slave_reply, such error should - not cause the slave IO thread to stop, and the error messages are - already reported. - */ - DBUG_EXECUTE_IF("simulate_delay_semisync_slave_reply", my_sleep(800000);); - (void)repl_semisync_slave.slave_reply(mi); - } + DBUG_EXECUTE_IF("simulate_delay_semisync_slave_reply", + my_sleep(800000);); + if (repl_semisync_slave.slave_reply(mi)) + { + /* + Master is not responding (gone away?) or it has turned semi sync + off. Turning off semi-sync responses as there is no point in sending + data to the master if the master not receiving the messages. + This also stops the logs from getting filled with + "Semi-sync slave net_flush() reply failed" messages. + On reconnect semi sync will be turned on again, if the + master has semi-sync enabled. + We check mi->abort_slave to see if the io thread was + killed and in this case we do not need an error message as + we know what is going on. + */ + if (!mi->abort_slave) + sql_print_error("Master server does not read semi-sync messages " + "last_error: %s (%d). " + "Fallback to asynchronous replication", + mi->mysql->net.last_error, + mi->mysql->net.last_errno); + mi->semi_sync_reply_enabled= 0; + } + } if (mi->using_gtid == Master_info::USE_GTID_NO && /* If rpl_semi_sync_slave_delay_master is enabled, we will flush @@ -6879,7 +6902,7 @@ dbug_gtid_accept: */ mi->do_accept_own_server_id= (s_id == global_system_variables.server_id && - rpl_semi_sync_slave_enabled && opt_gtid_strict_mode && + repl_semisync_slave.get_slave_enabled() && opt_gtid_strict_mode && mi->using_gtid != Master_info::USE_GTID_NO && !mysql_bin_log.check_strict_gtid_sequence(event_gtid.domain_id, event_gtid.server_id, diff --git a/sql/sql_class.cc b/sql/sql_class.cc index a03635bc868..fdf84ef8a99 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -1876,14 +1876,6 @@ void add_diff_to_status(STATUS_VAR *to_var, STATUS_VAR *from_var, */ } -#define SECONDS_TO_WAIT_FOR_KILL 2 -#if !defined(_WIN32) && defined(HAVE_SELECT) -/* my_sleep() can wait for sub second times */ -#define WAIT_FOR_KILL_TRY_TIMES 20 -#else -#define WAIT_FOR_KILL_TRY_TIMES 2 -#endif - /** Awake a thread. diff --git a/sql/sql_class.h b/sql/sql_class.h index 6bbac5647be..d61ee1545b6 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -652,6 +652,15 @@ enum killed_type KILL_TYPE_QUERY }; +#define SECONDS_TO_WAIT_FOR_KILL 2 +#define SECONDS_TO_WAIT_FOR_DUMP_THREAD_KILL 10 +#if !defined(_WIN32) && defined(HAVE_SELECT) +/* my_sleep() can wait for sub second times */ +#define WAIT_FOR_KILL_TRY_TIMES 20 +#else +#define WAIT_FOR_KILL_TRY_TIMES 2 +#endif + #include "sql_lex.h" /* Must be here */ class Delayed_insert; diff --git a/sql/sql_list.h b/sql/sql_list.h index a9ab5415d5a..909e6209c44 100644 --- a/sql/sql_list.h +++ b/sql/sql_list.h @@ -799,7 +799,9 @@ public: class base_ilist_iterator { base_ilist *list; - struct ilink **el,*current; + struct ilink **el; +protected: + struct ilink *current; public: base_ilist_iterator(base_ilist &list_par) :list(&list_par), el(&list_par.first),current(0) {} @@ -811,6 +813,13 @@ public: el= ¤t->next; return current; } + /* Unlink element returned by last next() call */ + inline void unlink(void) + { + struct ilink **tmp= current->prev; + current->unlink(); + el= tmp; + } }; @@ -840,6 +849,13 @@ template class I_List_iterator :public base_ilist_iterator public: I_List_iterator(I_List &a) : base_ilist_iterator(a) {} inline T* operator++(int) { return (T*) base_ilist_iterator::next(); } + /* Remove element returned by last next() call */ + inline void remove(void) + { + unlink(); + delete (T*) current; + current= 0; // Safety + } }; /** diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index c09a59b3adc..c30c825ffb8 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -2126,7 +2126,6 @@ dispatch_command_return dispatch_command(enum enum_server_command command, THD * { ulong pos; ushort flags; - uint32 slave_server_id; status_var_increment(thd->status_var.com_other); @@ -2137,10 +2136,26 @@ dispatch_command_return dispatch_command(enum enum_server_command command, THD * /* TODO: The following has to be changed to an 8 byte integer */ pos = uint4korr(packet); flags = uint2korr(packet + 4); - thd->variables.server_id=0; /* avoid suicide */ - if ((slave_server_id= uint4korr(packet+6))) // mysqlbinlog.server_id==0 - kill_zombie_dump_threads(slave_server_id); - thd->variables.server_id = slave_server_id; + if ((thd->variables.server_id= uint4korr(packet+6))) + { + bool got_error; + + got_error= kill_zombie_dump_threads(thd, + thd->variables.server_id); + if (got_error || thd->killed) + { + if (!thd->killed) + my_printf_error(ER_MASTER_FATAL_ERROR_READING_BINLOG, + "Could not start dump thread for slave: %u as " + "it has already a running dump thread", + MYF(0), (uint) thd->variables.server_id); + else if (! thd->get_stmt_da()->is_set()) + thd->send_kill_message(); + error= TRUE; + thd->unregister_slave(); // todo: can be extraneous + break; + } + } const char *name= packet + 10; size_t nlen= strlen(name); @@ -2148,6 +2163,8 @@ dispatch_command_return dispatch_command(enum enum_server_command command, THD * general_log_print(thd, command, "Log: '%s' Pos: %lu", name, pos); if (nlen < FN_REFLEN) mysql_binlog_send(thd, thd->strmake(name, nlen), (my_off_t)pos, flags); + if (thd->killed && ! thd->get_stmt_da()->is_set()) + thd->send_kill_message(); thd->unregister_slave(); // todo: can be extraneous /* fake COM_QUIT -- if we get here, the thread needs to terminate */ error = TRUE; diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc index 70b4b9aedf7..1ac6db00a15 100644 --- a/sql/sql_repl.cc +++ b/sql/sql_repl.cc @@ -2060,7 +2060,7 @@ send_event_to_slave(binlog_send_info *info, Log_event_type event_type, } if (need_sync && repl_semisync_master.flush_net(info->thd, - packet->c_ptr_safe())) + packet->c_ptr())) { info->error= ER_UNKNOWN_ERROR; return "Failed to run hook 'after_send_event'"; @@ -3011,8 +3011,13 @@ err: if (info->thd->killed == KILL_SLAVE_SAME_ID) { - info->errmsg= "A slave with the same server_uuid/server_id as this slave " - "has connected to the master"; + /* + Note that the text is limited to 64 characters in errmsg-utf8 in + ER_ABORTING_CONNECTION. + */ + info->errmsg= + "A slave with the same server_uuid/server_id is already " + "connected"; info->error= ER_SLAVE_SAME_ID; } @@ -3385,6 +3390,7 @@ int stop_slave(THD* thd, Master_info* mi, bool net_report ) @retval 0 success @retval 1 error */ + int reset_slave(THD *thd, Master_info* mi) { MY_STAT stat_area; @@ -3472,8 +3478,6 @@ int reset_slave(THD *thd, Master_info* mi) else if (global_system_variables.log_warnings > 1) sql_print_information("Deleted Master_info file '%s'.", fname); - if (rpl_semi_sync_slave_enabled) - repl_semisync_slave.reset_slave(mi); err: mi->unlock_slave_threads(); if (unlikely(error)) @@ -3501,43 +3505,89 @@ err: struct kill_callback_arg { - kill_callback_arg(uint32 id): slave_server_id(id), thd(0) {} - uint32 slave_server_id; + kill_callback_arg(THD *thd_arg, uint32 id): + thd(thd_arg), slave_server_id(id), counter(0) {} THD *thd; + uint32 slave_server_id; + uint counter; }; -static my_bool kill_callback(THD *thd, kill_callback_arg *arg) + +/* + Collect all active dump threads +*/ + +static my_bool kill_callback_collect(THD *thd, kill_callback_arg *arg) { if (thd->get_command() == COM_BINLOG_DUMP && - thd->variables.server_id == arg->slave_server_id) + thd->variables.server_id == arg->slave_server_id && + thd != arg->thd) { - arg->thd= thd; + arg->counter++; mysql_mutex_lock(&thd->LOCK_thd_kill); // Lock from delete mysql_mutex_lock(&thd->LOCK_thd_data); - return 1; + thd->awake_no_mutex(KILL_SLAVE_SAME_ID); // Mark killed + /* + Remover the thread from ack_receiver to ensure it is not + sending acks to the master anymore. + */ + ack_receiver.remove_slave(thd); + + mysql_mutex_unlock(&thd->LOCK_thd_data); + mysql_mutex_unlock(&thd->LOCK_thd_kill); } return 0; } -void kill_zombie_dump_threads(uint32 slave_server_id) -{ - kill_callback_arg arg(slave_server_id); - server_threads.iterate(kill_callback, &arg); +/* + Check if there are any active dump threads +*/ - if (arg.thd) - { - /* - Here we do not call kill_one_thread() as - it will be slow because it will iterate through the list - again. We just to do kill the thread ourselves. - */ - arg.thd->awake_no_mutex(KILL_SLAVE_SAME_ID); - mysql_mutex_unlock(&arg.thd->LOCK_thd_kill); - mysql_mutex_unlock(&arg.thd->LOCK_thd_data); - } +static my_bool kill_callback_check(THD *thd, kill_callback_arg *arg) +{ + return (thd->get_command() == COM_BINLOG_DUMP && + thd->variables.server_id == arg->slave_server_id && + thd != arg->thd); } + +/** + Try to kill running dump threads on the master + + @result 0 ok + @result 1 old slave thread exists and does not want to die + + There should not be more than one dump thread with the same server id + this code has however in the past has several issues. To ensure that + things works in all cases (now and in the future), this code is collecting + all matching server id's and killing all of them. +*/ + +bool kill_zombie_dump_threads(THD *thd, uint32 slave_server_id) +{ + kill_callback_arg arg(thd, slave_server_id); + server_threads.iterate(kill_callback_collect, &arg); + + if (!arg.counter) + return 0; + + /* + Wait up to SECONDS_TO_WAIT_FOR_DUMP_THREAD_KILL for kill + of all dump thread, trying every 1/10 of second. + */ + for (uint i= 10 * SECONDS_TO_WAIT_FOR_DUMP_THREAD_KILL ; + --i > 0 && !thd->killed; + i++) + { + if (!server_threads.iterate(kill_callback_check, &arg)) + return 0; // All dump thread are killed + my_sleep(1000000L / 10); // Wait 1/10 of a second + } + return 1; +} + + /** Get value for a string parameter with error checking diff --git a/sql/sql_repl.h b/sql/sql_repl.h index 95916e31abf..3be1e18ca0d 100644 --- a/sql/sql_repl.h +++ b/sql/sql_repl.h @@ -43,7 +43,7 @@ void adjust_linfo_offsets(my_off_t purge_offset); void show_binlogs_get_fields(THD *thd, List *field_list); bool show_binlogs(THD* thd); extern int init_master_info(Master_info* mi); -void kill_zombie_dump_threads(uint32 slave_server_id); +bool kill_zombie_dump_threads(THD *thd, uint32 slave_server_id); int check_binlog_magic(IO_CACHE* log, const char** errmsg); int compare_log_name(const char *log_1, const char *log_2); diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index be8a66ff7f2..dea04bf3bc5 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -3478,13 +3478,6 @@ static bool fix_rpl_semi_sync_master_wait_point(sys_var *self, THD *thd, return false; } -static bool fix_rpl_semi_sync_master_wait_no_slave(sys_var *self, THD *thd, - enum_var_type type) -{ - repl_semisync_master.check_and_switch(); - return false; -} - static Sys_var_on_access_global Sys_semisync_master_enabled( @@ -3511,12 +3504,11 @@ static Sys_var_on_access_global Sys_semisync_master_wait_no_slave( "rpl_semi_sync_master_wait_no_slave", - "Wait until timeout when no semi-synchronous replication slave " - "available (enabled by default).", + "Wait until timeout when no semi-synchronous replication slave is " + "available.", GLOBAL_VAR(rpl_semi_sync_master_wait_no_slave), CMD_LINE(OPT_ARG), DEFAULT(TRUE), - NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), - ON_UPDATE(fix_rpl_semi_sync_master_wait_no_slave)); + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0)); static Sys_var_on_access_global @@ -3543,13 +3535,6 @@ Sys_semisync_master_wait_point( NO_MUTEX_GUARD, NOT_IN_BINLOG,ON_CHECK(0), ON_UPDATE(fix_rpl_semi_sync_master_wait_point)); -static bool fix_rpl_semi_sync_slave_enabled(sys_var *self, THD *thd, - enum_var_type type) -{ - repl_semisync_slave.set_slave_enabled(rpl_semi_sync_slave_enabled != 0); - return false; -} - static bool fix_rpl_semi_sync_slave_trace_level(sys_var *self, THD *thd, enum_var_type type) { @@ -3577,10 +3562,9 @@ static Sys_var_on_access_global