mirror of
https://github.com/MariaDB/server.git
synced 2025-08-08 11:22:35 +03:00
MDEV-16091: Seconds_Behind_Master spikes to millions of seconds
Problem: ======== A slave’s relay log format description event is used when calculating Seconds_Behind_Master (SBM). This forces the SBM value to spike when processing these events, as their creation date is set to the timestamp that the IO thread begins. Solution: ======== When the slave generates a format description event, mark the event as a relay log event so it does not update the rli->last_master_timestamp variable. Reviewed By: ============ Andrei Elkin <andrei.elkin@mariadb.com>
This commit is contained in:
@@ -0,0 +1,41 @@
|
|||||||
|
include/master-slave.inc
|
||||||
|
[connection master]
|
||||||
|
connection slave;
|
||||||
|
include/stop_slave.inc
|
||||||
|
SET @save_dbug= @@GLOBAL.debug_dbug;
|
||||||
|
SET @@global.debug_dbug="+d,pause_sql_thread_on_fde";
|
||||||
|
include/start_slave.inc
|
||||||
|
# Future events must be logged at least 2 seconds after
|
||||||
|
# the slave starts
|
||||||
|
connection master;
|
||||||
|
# Write events to ensure slave will be consistent with master
|
||||||
|
create table t1 (a int);
|
||||||
|
insert into t1 values (1);
|
||||||
|
# Flush logs on master forces slave to generate a Format description
|
||||||
|
# event in its relay log
|
||||||
|
flush logs;
|
||||||
|
connection slave;
|
||||||
|
# Ignore FDEs that happen before the CREATE/INSERT commands
|
||||||
|
SET DEBUG_SYNC='now WAIT_FOR paused_on_fde';
|
||||||
|
SET DEBUG_SYNC='now SIGNAL sql_thread_continue';
|
||||||
|
SET DEBUG_SYNC='now WAIT_FOR paused_on_fde';
|
||||||
|
SET DEBUG_SYNC='now SIGNAL sql_thread_continue';
|
||||||
|
# On the next FDE, the slave should have the master CREATE/INSERT events
|
||||||
|
SET DEBUG_SYNC='now WAIT_FOR paused_on_fde';
|
||||||
|
select count(*)=1 from t1;
|
||||||
|
count(*)=1
|
||||||
|
1
|
||||||
|
# The relay log FDE has been processed - here we check to ensure it was
|
||||||
|
# not considered in Seconds_Behind_Master calculation
|
||||||
|
connection slave1;
|
||||||
|
# Safely resume slave SQL thread
|
||||||
|
SET @@global.debug_dbug='';
|
||||||
|
SET DEBUG_SYNC='pause_sql_thread_on_fde CLEAR';
|
||||||
|
SET DEBUG_SYNC='now SIGNAL sql_thread_continue';
|
||||||
|
SET DEBUG_SYNC='RESET';
|
||||||
|
connection master;
|
||||||
|
DROP TABLE t1;
|
||||||
|
connection slave;
|
||||||
|
connection slave;
|
||||||
|
SET @@global.debug_dbug=$save_dbug;
|
||||||
|
include/rpl_end.inc
|
86
mysql-test/suite/rpl/t/rpl_seconds_behind_master_spike.test
Normal file
86
mysql-test/suite/rpl/t/rpl_seconds_behind_master_spike.test
Normal file
@@ -0,0 +1,86 @@
|
|||||||
|
#
|
||||||
|
# Purpose:
|
||||||
|
# This test validates that a slave's relay log format description event is
|
||||||
|
# not used to calculate the Seconds_Behind_Master time displayed by
|
||||||
|
# SHOW SLAVE STATUS.
|
||||||
|
#
|
||||||
|
# Methodology:
|
||||||
|
# Ensure that a slave's reported Seconds_Behind_Master does not point before
|
||||||
|
# a time in which we can prove that it has progressed beyond. The slave's
|
||||||
|
# relay log events are created using the timestamp at which the IO thread was
|
||||||
|
# created. Therefore, after starting the slave's IO thread, we sleep so any
|
||||||
|
# proceeding events are forced to have later timestamps. After sleeping, we run
|
||||||
|
# MDL statements on the master and save the time at which they are binlogged.
|
||||||
|
# Once the slave executes these MDL commands, we have proven that the slave has
|
||||||
|
# caught up to this saved timestamp. At this point, if the value of
|
||||||
|
# Seconds_Behind_Master points before the time in which the MDL events were
|
||||||
|
# logged, it is invalid.
|
||||||
|
#
|
||||||
|
# References:
|
||||||
|
# MDEV-16091: Seconds_Behind_Master spikes to millions of seconds
|
||||||
|
#
|
||||||
|
--source include/have_debug.inc
|
||||||
|
--source include/have_innodb.inc
|
||||||
|
--source include/master-slave.inc
|
||||||
|
|
||||||
|
--connection slave
|
||||||
|
--source include/stop_slave.inc
|
||||||
|
SET @save_dbug= @@GLOBAL.debug_dbug;
|
||||||
|
SET @@global.debug_dbug="+d,pause_sql_thread_on_fde";
|
||||||
|
--source include/start_slave.inc
|
||||||
|
|
||||||
|
--let $sleep_time=2
|
||||||
|
--echo # Future events must be logged at least $sleep_time seconds after
|
||||||
|
--echo # the slave starts
|
||||||
|
--sleep $sleep_time
|
||||||
|
|
||||||
|
--connection master
|
||||||
|
--echo # Write events to ensure slave will be consistent with master
|
||||||
|
create table t1 (a int);
|
||||||
|
insert into t1 values (1);
|
||||||
|
--let $t_master_events_logged= `SELECT UNIX_TIMESTAMP()`
|
||||||
|
|
||||||
|
--echo # Flush logs on master forces slave to generate a Format description
|
||||||
|
--echo # event in its relay log
|
||||||
|
flush logs;
|
||||||
|
|
||||||
|
--connection slave
|
||||||
|
--echo # Ignore FDEs that happen before the CREATE/INSERT commands
|
||||||
|
SET DEBUG_SYNC='now WAIT_FOR paused_on_fde';
|
||||||
|
SET DEBUG_SYNC='now SIGNAL sql_thread_continue';
|
||||||
|
SET DEBUG_SYNC='now WAIT_FOR paused_on_fde';
|
||||||
|
SET DEBUG_SYNC='now SIGNAL sql_thread_continue';
|
||||||
|
|
||||||
|
--echo # On the next FDE, the slave should have the master CREATE/INSERT events
|
||||||
|
SET DEBUG_SYNC='now WAIT_FOR paused_on_fde';
|
||||||
|
select count(*)=1 from t1;
|
||||||
|
|
||||||
|
--echo # The relay log FDE has been processed - here we check to ensure it was
|
||||||
|
--echo # not considered in Seconds_Behind_Master calculation
|
||||||
|
--connection slave1
|
||||||
|
let $sbm= query_get_value(SHOW SLAVE STATUS, Seconds_Behind_Master, 1);
|
||||||
|
--let $t_now= `SELECT UNIX_TIMESTAMP()`
|
||||||
|
|
||||||
|
if(`select $sbm > $t_now - $t_master_events_logged`)
|
||||||
|
{
|
||||||
|
die "A relay log event was incorrectly used to set Seconds_Behind_Master";
|
||||||
|
}
|
||||||
|
|
||||||
|
--echo # Safely resume slave SQL thread
|
||||||
|
SET @@global.debug_dbug='';
|
||||||
|
SET DEBUG_SYNC='pause_sql_thread_on_fde CLEAR';
|
||||||
|
SET DEBUG_SYNC='now SIGNAL sql_thread_continue';
|
||||||
|
|
||||||
|
# Reset last sql_thread_continue signal
|
||||||
|
SET DEBUG_SYNC='RESET';
|
||||||
|
|
||||||
|
# Cleanup
|
||||||
|
--connection master
|
||||||
|
DROP TABLE t1;
|
||||||
|
--save_master_pos
|
||||||
|
--sync_slave_with_master
|
||||||
|
|
||||||
|
--connection slave
|
||||||
|
SET @@global.debug_dbug=$save_dbug;
|
||||||
|
|
||||||
|
--source include/rpl_end.inc
|
@@ -3463,6 +3463,7 @@ bool MYSQL_BIN_LOG::open(const char *log_name,
|
|||||||
opt_slave_sql_verify_checksum ? (enum_binlog_checksum_alg) binlog_checksum_options
|
opt_slave_sql_verify_checksum ? (enum_binlog_checksum_alg) binlog_checksum_options
|
||||||
: BINLOG_CHECKSUM_ALG_OFF;
|
: BINLOG_CHECKSUM_ALG_OFF;
|
||||||
s.checksum_alg= relay_log_checksum_alg;
|
s.checksum_alg= relay_log_checksum_alg;
|
||||||
|
s.set_relay_log_event();
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
s.checksum_alg= (enum_binlog_checksum_alg)binlog_checksum_options;
|
s.checksum_alg= (enum_binlog_checksum_alg)binlog_checksum_options;
|
||||||
|
@@ -4033,6 +4033,15 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli,
|
|||||||
#endif /* WITH_WSREP */
|
#endif /* WITH_WSREP */
|
||||||
|
|
||||||
thread_safe_increment64(&rli->executed_entries);
|
thread_safe_increment64(&rli->executed_entries);
|
||||||
|
DBUG_EXECUTE_IF(
|
||||||
|
"pause_sql_thread_on_fde",
|
||||||
|
if (ev && typ == FORMAT_DESCRIPTION_EVENT) {
|
||||||
|
DBUG_ASSERT(!debug_sync_set_action(
|
||||||
|
thd,
|
||||||
|
STRING_WITH_LEN(
|
||||||
|
"now SIGNAL paused_on_fde WAIT_FOR sql_thread_continue")));
|
||||||
|
});
|
||||||
|
|
||||||
DBUG_RETURN(exec_res);
|
DBUG_RETURN(exec_res);
|
||||||
}
|
}
|
||||||
mysql_mutex_unlock(&rli->data_lock);
|
mysql_mutex_unlock(&rli->data_lock);
|
||||||
|
Reference in New Issue
Block a user