1
0
mirror of https://github.com/MariaDB/server.git synced 2025-08-07 00:04:31 +03:00

MDEV-33582 Add more warnings to be able to better diagnose network issues

Warnings are added to net_server.cc when
global_system_variables.log_warnings >= 4.

When the above condition holds then:
- All communication errors from net_serv.cc is also written to the
  error log.
- In case of a of not being able to read or write a packet, a more
  detailed error is given.

Other things:
- Added detection of slaves that has hangup to Ack_receiver::run()
- vio_close() is now first marking the socket closed before closing it.
  The reason for this is to ensure that the connection that gets a read
  error can check if the reason was that the socket was closed.
- Add a new state to vio to be able to detect if vio is acive, shutdown or
  closed. This is used to detect if socket is closed by another thread.
- Testing of the new warnings is done in rpl_get_lock.test
- Suppress some of the new warnings in mtr to allow one to run some of
  the tests with -mysqld=--log-warnings=4. All test in the 'rpl' suite
  can now be run with this option.
 - Ensure that global.log_warnings are restored at test end in a way
   that allows one to use mtr --mysqld=--log-warnings=4.

Reviewed-by: <serg@mariadb.org>,<brandon.nesterenko@mariadb.com>
This commit is contained in:
Monty
2024-03-01 18:16:33 +02:00
parent 48f42ab2e5
commit 567c097359
40 changed files with 211 additions and 53 deletions

View File

@@ -59,6 +59,10 @@
#VARCHAR(M)
#
--disable_query_log
call mtr.add_suppression("Could not read packet:.* errno: 11");
--enable_query_log
--let $_saved_conn= $CURRENT_CONNECTION
let $binformat = `SHOW VARIABLES LIKE '%binlog_format%'`;

View File

@@ -1,6 +1,7 @@
include/master-slave.inc
[connection master]
connection master;
SET @org_log_warnings=@@GLOBAL.LOG_WARNINGS;
SET GLOBAL LOG_WARNINGS=2;
connection slave;
include/stop_slave.inc
@@ -41,11 +42,11 @@ connection master;
include/wait_for_pattern_in_file.inc
FOUND 1 /using_gtid\(1\), gtid\(\'0-1-2,10-1-1\'\).*/ in mysqld.1.err
"===== Clean up ====="
SET GLOBAL LOG_WARNINGS=@org_log_warnings;
connection slave;
include/stop_slave.inc
CHANGE MASTER TO MASTER_USE_GTID=no;
include/start_slave.inc
connection master;
DROP TABLE t;
SET GLOBAL LOG_WARNINGS=default;
include/rpl_end.inc

View File

@@ -1,9 +1,6 @@
include/master-slave.inc
[connection master]
connection master;
call mtr.add_suppression("mysqld: Table '.*gtid_slave_pos' is marked as crashed and should be repaired");
call mtr.add_suppression("Checking table: './mysql/gtid_slave_pos'");
call mtr.add_suppression("mysql.gtid_slave_pos: 1 client is using or hasn't closed the table properly");
SET @@session.gtid_domain_id= 0;
create table ti (a int auto_increment primary key) engine=innodb;
create table tm (a int auto_increment primary key) engine=myisam;

View File

@@ -1,6 +1,6 @@
include/master-slave.inc
[connection master]
CALL mtr.add_suppression("Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT");
SET GLOBAL LOG_WARNINGS=4;
create table t1(n int);
insert into t1 values(get_lock("lock",2));
disconnect master;
@@ -35,4 +35,5 @@ NULL
connection master1;
drop table t1;
connection slave;
connection default;
include/rpl_end.inc

View File

@@ -4,6 +4,7 @@ connection server_1;
call mtr.add_suppression("Checking table:");
call mtr.add_suppression("client is using or hasn't closed the table properly");
call mtr.add_suppression("Table .* is marked as crashed and should be repaired");
call mtr.add_suppression("Could not read packet:.* errno: 11");
flush tables;
ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB;
CREATE TABLE t1 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB;

View File

@@ -50,5 +50,4 @@ CHANGE MASTER TO MASTER_USE_GTID=no;
include/start_slave.inc
connection master;
DROP TABLE t;
SET GLOBAL LOG_WARNINGS=default;
include/rpl_end.inc

View File

@@ -9,8 +9,6 @@ Variable_name Slave_heartbeat_period
Value 60.000
SET @saved_dbug= @@GLOBAL.debug_dbug;
SET GLOBAL debug_dbug="+d,simulate_slave_heartbeat_network_error";
CALL mtr.add_suppression('SET @master_heartbeat_period to master failed with error');
CALL mtr.add_suppression('Master command COM_REGISTER_SLAVE failed: failed registering on master, reconnecting to try again');
include/start_slave.inc
connection master;
drop table if exists t1;

View File

@@ -2,6 +2,8 @@ include/master-slave.inc
[connection master]
call mtr.add_suppression("Slave I/O: Got a packet bigger than 'slave_max_allowed_packet' bytes, .*error.* 1153");
call mtr.add_suppression("Log entry on master is longer than slave_max_allowed_packet");
call mtr.add_suppression("Could not write packet:");
call mtr.add_suppression("Got a packet bigger than 'max_allowed_packet' bytes");
drop database if exists DB_NAME_OF_MAX_LENGTH_AKA_NAME_LEN_64_BYTES_____________________;
create database DB_NAME_OF_MAX_LENGTH_AKA_NAME_LEN_64_BYTES_____________________;
connection master;

View File

@@ -626,6 +626,7 @@ include/save_master_gtid.inc
connection server_2;
include/sync_with_master_gtid.inc
connection server_2;
SET @org_log_warnings=@@GLOBAL.LOG_WARNINGS;
set global log_warnings=2;
BEGIN;
INSERT INTO t1 SET a=1;
@@ -651,7 +652,7 @@ connection server_2;
include/sync_with_master_gtid.inc
connection server_2;
include/stop_slave.inc
set global log_warnings=default;
set global log_warnings=@org_log_warnings;
SET GLOBAL slave_parallel_mode=@old_parallel_mode;
SET GLOBAL slave_parallel_threads=@old_parallel_threads;
include/start_slave.inc

View File

@@ -32,7 +32,6 @@ include/diff_tables.inc [master:t0, slave:t0]
include/diff_tables.inc [master:t1, slave:t1]
connection slave;
include/stop_slave.inc
set global log_warnings=default;
SET GLOBAL slave_parallel_mode=@old_parallel_mode;
SET GLOBAL slave_parallel_threads=@old_parallel_threads;
include/start_slave.inc

View File

@@ -32,7 +32,6 @@ include/diff_tables.inc [master:t0, slave:t0]
include/diff_tables.inc [master:t1, slave:t1]
connection slave;
include/stop_slave.inc
set global log_warnings=default;
SET GLOBAL slave_parallel_mode=@old_parallel_mode;
SET GLOBAL slave_parallel_threads=@old_parallel_threads;
include/start_slave.inc

View File

@@ -20,6 +20,5 @@ FOUND 1 /The slave is applying a ROW event on behalf of an UPDATE statement on t
FOUND 1 /The slave is applying a ROW event on behalf of a DELETE statement on table t1 and is currently taking a considerable amount/ in mysqld.2.err
include/stop_slave.inc
SET @@GLOBAL.debug_dbug = @saved_dbug;
SET GLOBAL log_warnings = 2;
include/start_slave.inc
include/rpl_end.inc

View File

@@ -2,6 +2,8 @@ include/master-slave.inc
[connection master]
call mtr.add_suppression("Replication event checksum verification failed");
call mtr.add_suppression("could not queue event from master");
call mtr.add_suppression("Semisync ack receiver.*error reading communication packets");
call mtr.add_suppression("Semisync ack receiver got hangup");
#
# Set up a semisync connection
connection master;

View File

@@ -1,6 +1,5 @@
include/master-slave.inc
[connection master]
call mtr.add_suppression("Master is configured to log replication events");
connection slave;
connection slave;
include/wait_for_slave_to_stop.inc

View File

@@ -39,6 +39,7 @@
--source include/master-slave.inc
--connection master
SET @org_log_warnings=@@GLOBAL.LOG_WARNINGS;
SET GLOBAL LOG_WARNINGS=2;
--connection slave
@@ -110,6 +111,7 @@ CHANGE MASTER TO MASTER_USE_GTID=slave_pos;
--source include/wait_for_pattern_in_file.inc
--echo "===== Clean up ====="
SET GLOBAL LOG_WARNINGS=@org_log_warnings;
--connection slave
--source include/stop_slave.inc
CHANGE MASTER TO MASTER_USE_GTID=no;
@@ -117,5 +119,4 @@ CHANGE MASTER TO MASTER_USE_GTID=no;
--connection master
DROP TABLE t;
SET GLOBAL LOG_WARNINGS=default;
--source include/rpl_end.inc

View File

@@ -6,9 +6,12 @@
connection master;
--disable_query_log
call mtr.add_suppression("mysqld: Table '.*gtid_slave_pos' is marked as crashed and should be repaired");
call mtr.add_suppression("Checking table: './mysql/gtid_slave_pos'");
call mtr.add_suppression("mysql.gtid_slave_pos: 1 client is using or hasn't closed the table properly");
call mtr.add_suppression("Could not read packet:.* errno: 11");
--enable_query_log
SET @@session.gtid_domain_id= 0;
create table ti (a int auto_increment primary key) engine=innodb;

View File

@@ -30,6 +30,10 @@
--let $rpl_skip_start_slave=1
--source include/master-slave.inc
--disable_query_log
call mtr.add_suppression("Could not read packet:.* errno: 11");
--enable_query_log
# Do an insert on master
CREATE TABLE t1(a int);
INSERT INTO t1 VALUES(1);

View File

@@ -1,6 +1,16 @@
source include/master-slave.inc;
--disable_query_log
CALL mtr.add_suppression("Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT");
call mtr.add_suppression("Could not read packet:.* errno: 11 ");
# The following one comes from calling dirty_close on client side
call mtr.add_suppression("Could not read packet:.* errno: 2 ");
--enable_query_log
let $org_log_warnings=`select @@global.log_warnings`;
# Test extended warnings
SET GLOBAL LOG_WARNINGS=4;
create table t1(n int);
# Use of get_lock gives a warning for unsafeness if binlog_format=statement
@@ -41,6 +51,10 @@ connection master1;
drop table t1;
sync_slave_with_master;
connection default;
--disable_query_log
--eval SET GLOBAL LOG_WARNINGS=$org_log_warnings;
--enable_query_log
--source include/rpl_end.inc

View File

@@ -12,6 +12,7 @@
call mtr.add_suppression("Checking table:");
call mtr.add_suppression("client is using or hasn't closed the table properly");
call mtr.add_suppression("Table .* is marked as crashed and should be repaired");
call mtr.add_suppression("Could not read packet:.* errno: 11");
flush tables;
ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB;

View File

@@ -5,6 +5,10 @@
--let $rpl_topology=1->2
--source include/rpl_init.inc
--disable_query_log
call mtr.add_suppression("Could not read packet:.* errno: 11");
--enable_query_log
--echo *** Test crashing master with InnoDB disabled, the binlog gtid state should still be correctly recovered. ***
--connection server_1

View File

@@ -93,5 +93,4 @@ CHANGE MASTER TO MASTER_USE_GTID=no;
--connection master
DROP TABLE t;
SET GLOBAL LOG_WARNINGS=default;
--source include/rpl_end.inc

View File

@@ -3,6 +3,12 @@
--source include/have_debug.inc
--source include/master-slave.inc
--disable_query_log
CALL mtr.add_suppression('SET @master_heartbeat_period to master failed with error');
CALL mtr.add_suppression('Master command COM_REGISTER_SLAVE failed: failed registering on master, reconnecting to try again');
call mtr.add_suppression("Could not read packet:.* errno: 11");
--enable_query_log
connection slave;
--source include/stop_slave.inc
set @restore_slave_net_timeout= @@global.slave_net_timeout;
@@ -14,14 +20,13 @@ set @@global.slave_net_timeout= 10;
### Checking the range
###
#
# default period slave_net_timeout/2
#
--query_vertical show status like 'Slave_heartbeat_period';
SET @saved_dbug= @@GLOBAL.debug_dbug;
SET GLOBAL debug_dbug="+d,simulate_slave_heartbeat_network_error";
CALL mtr.add_suppression('SET @master_heartbeat_period to master failed with error');
CALL mtr.add_suppression('Master command COM_REGISTER_SLAVE failed: failed registering on master, reconnecting to try again');
--source include/start_slave.inc

View File

@@ -20,6 +20,9 @@ source include/master-slave.inc;
call mtr.add_suppression("Slave I/O: Got a packet bigger than 'slave_max_allowed_packet' bytes, .*error.* 1153");
call mtr.add_suppression("Log entry on master is longer than slave_max_allowed_packet");
call mtr.add_suppression("Could not write packet:");
call mtr.add_suppression("Got a packet bigger than 'max_allowed_packet' bytes");
let $db= DB_NAME_OF_MAX_LENGTH_AKA_NAME_LEN_64_BYTES_____________________;
disable_warnings;
eval drop database if exists $db;

View File

@@ -508,6 +508,7 @@ DELETE FROM t2;
# The 1st of the following two trx:s a blocker on slave
--connection server_2
SET @org_log_warnings=@@GLOBAL.LOG_WARNINGS;
set global log_warnings=2;
BEGIN;
INSERT INTO t1 SET a=1;
@@ -555,7 +556,7 @@ DELETE FROM t2;
#
--connection server_2
--source include/stop_slave.inc
set global log_warnings=default;
set global log_warnings=@org_log_warnings;
SET GLOBAL slave_parallel_mode=@old_parallel_mode;
SET GLOBAL slave_parallel_threads=@old_parallel_threads;
--source include/start_slave.inc

View File

@@ -206,7 +206,6 @@ while($i > 0)
#
--connection slave
--source include/stop_slave.inc
set global log_warnings=default;
SET GLOBAL slave_parallel_mode=@old_parallel_mode;
SET GLOBAL slave_parallel_threads=@old_parallel_threads;
--source include/start_slave.inc

View File

@@ -51,7 +51,9 @@ DROP TABLE t1;
# cleanup
--source include/stop_slave.inc
SET @@GLOBAL.debug_dbug = @saved_dbug;
--disable_query_log
--eval SET GLOBAL log_warnings = $log_warnings_save
--enable_query_log
--source include/start_slave.inc
--source include/rpl_end.inc

View File

@@ -10,6 +10,11 @@
--source include/have_binlog_format_row.inc
--source include/master-slave.inc
--disable_query_log
call mtr.add_suppression("Could not read packet:.* errno: 11");
flush tables;
--enable_query_log
# Initial slave
--connection server_2
--source include/stop_slave.inc

View File

@@ -1,13 +1,13 @@
!include ../my.cnf
[mysqld.1]
log_warnings=9
log_warnings=3
[mysqld.2]
log_warnings=9
log_warnings=3
[mysqld.3]
log_warnings=9
log_warnings=3
[ENV]
SERVER_MYPORT_3= @mysqld.3.port

View File

@@ -14,7 +14,8 @@
call mtr.add_suppression("Replication event checksum verification failed");
call mtr.add_suppression("could not queue event from master");
call mtr.add_suppression("Semisync ack receiver.*error reading communication packets");
call mtr.add_suppression("Semisync ack receiver got hangup");
--echo #
--echo # Set up a semisync connection
--connection master

View File

@@ -3,7 +3,10 @@
#
--source include/master-slave.inc
--disable_query_log
call mtr.add_suppression("Master is configured to log replication events");
call mtr.add_suppression("Could not read packet:.* errno: 11");
--enable_query_log
--connection slave