1
0
mirror of https://github.com/MariaDB/server.git synced 2025-08-08 11:22:35 +03:00
Files
mariadb/mysql-test/suite/rpl/t/rpl_seconds_behind_master_spike.test
ParadoxV5 8a45128106 Add MDEV-25999 & MDEV-36840 to MDEV-16091’s test
1. Stricten MDEV-16091’s `rpl.rpl_seconds_behind_master_spike`
   to prove that its fix covered MDEV-25999 as well
2. Add a Parallel Replication variant via a new `.combinations` file;
   its current failure confirm MDEV-36840.

Because Parallel Replication queues Format Description Events as
position update metaevents, this variant uses a separate set of API-
compatible breakpoints to pause both the main and worker threads.
To support them, this commit replaces the MDEV-33327 patch c75905cacb
with a live-activation solution. In contrast, previous iterations queued
the activation pre-test and relied on other factors to enable the break.

Reviewed-by: Brandon Nesterenko <brandon.nesterenko@mariadb.com>
2025-07-07 14:03:49 -06:00

189 lines
6.1 KiB
Plaintext

#
# Purpose:
# This test validates that a slave's relay log format description event is
# not used to calculate the Seconds_Behind_Master time displayed by
# SHOW SLAVE STATUS.
#
# Methodology:
# Ensure that a slave's reported Seconds_Behind_Master does not point before
# a time in which we can prove that it has progressed beyond. The slave's
# relay log events are created using the timestamp at which the IO thread was
# created. Therefore, after starting the slave's IO thread, we sleep so any
# proceeding events are forced to have later timestamps. After sleeping, we run
# MDL statements on the master and save the time at which they are binlogged.
# Once the slave executes these MDL commands, we have proven that the slave has
# caught up to this saved timestamp. At this point, if the value of
# Seconds_Behind_Master points before the time in which the MDL events were
# logged, it is invalid.
#
# References:
# MDEV-16091: Seconds_Behind_Master spikes to millions of seconds
# MDEV-25999: Unrealistic "Seconds behind master" value
# MDEV-36840: Seconds_Behind_Master Spike at Log Rotation on Parallel Replica
#
--source include/have_debug.inc
--source include/have_debug_sync.inc
--source include/have_innodb.inc
--source include/master-slave.inc
--connection slave
# Using_Gtid needs to start as NO before updating debug_dbug
--source include/stop_slave.inc
CHANGE MASTER TO MASTER_USE_GTID=NO;
--source include/start_slave.inc
--let $sleep_time=2
--echo # Future events must be logged at least $sleep_time seconds after
--echo # the slave starts
--sleep $sleep_time
--connection master
--echo # Write events to ensure slave will be consistent with master
create table t1 (a int);
insert into t1 values (1);
--sync_slave_with_master
let $wait_condition= SELECT STATE LIKE '%waiting for more updates'
FROM information_schema.PROCESSLIST WHERE COMMAND = 'Slave_SQL';
--source include/wait_condition.inc
--echo # Enable breakpoints
SET @save_dbug= @@GLOBAL.debug_dbug;
SET @@GLOBAL.debug_dbug=
'+d,pause_sql_thread_on_fde,negate_clock_diff_with_master';
--connection master
--echo # Flush logs on master forces slave to generate a Format description
--echo # event in its relay log
flush logs;
--connection slave
--echo # On the next FDE, the slave should have the master CREATE/INSERT events
--sleep $sleep_time
SET DEBUG_SYNC='now WAIT_FOR paused_on_fde';
select count(*)=1 from t1;
--echo # The relay log FDE has been processed - here we check to ensure it was
--echo # not considered in Seconds_Behind_Master calculation
# `Seconds_Behind_Master` must remains 0.
# * MDEV-16091: `Seconds_Behind_Master` should not point at the FDE's timestamp,
# which is the time of `start_slave.inc`.
# * MDEV-25999 & MDEV-36840: For a new event stream, `Seconds_Behind_Master`
# should not point at anything at all, because FDEs are not user content.
--let $status_items= Seconds_Behind_Master
--source include/show_slave_status.inc
--echo # Safely resume slave SQL (worker) thread
--let $dbug_wait_state="debug sync point: now"
--echo # Prove SQL thread is in state $dbug_wait_state
--let $wait_condition= SELECT STATE=$dbug_wait_state from information_schema.PROCESSLIST where COMMAND="Slave_SQL"
--source include/wait_condition.inc
SET @@global.debug_dbug="-d,pause_sql_thread_on_fde";
SET DEBUG_SYNC='now SIGNAL sql_thread_continue';
# We have to wait for the SQL thread to acknowledge the sql_thread_continue
# signal. Otherwise the below RESET command can overwrite the signal before
# the SQL thread is notified to proceed, causing it to "permanently" become
# stuck awaiting the signal (until timeout is reached).
--echo # Wait for SQL thread to continue into normal execution
--let $wait_condition= SELECT STATE!= $dbug_wait_state from information_schema.PROCESSLIST where COMMAND="Slave_SQL"
--source include/wait_condition.inc
# Reset last sql_thread_continue signal
SET DEBUG_SYNC='RESET';
--echo #
--echo # MDEV-29639
--echo # When receiving an event after the SQL Thread idles,
--echo # Seconds_Behind_Master should not update before it updates
--echo # last_master_timestamp
--connection slave
--source include/stop_slave.inc
set @@global.debug_dbug="+d,pause_sql_thread_on_next_event";
--source include/start_slave.inc
--connection master
insert into t1 values(2);
--source include/save_master_gtid.inc
# Each event after starting will trigger a pause, so continually send signal
# sql_thread_continue until caught up
--connection slave
--let $caught_up=0
--let $tries= 0
set debug_sync='now wait_for paused_on_event';
--disable_query_log
while (!$caught_up)
{
set debug_sync='now signal sql_thread_continue';
--let $slave_gtid= `SELECT @@global.gtid_slave_pos`
if (`SELECT strcmp("$master_pos","$slave_gtid") = 0`)
{
--inc $caught_up
}
--inc $tries
# Wait 30s
if (`SELECT $tries > 300`)
{
--die Replica failed to sync with primary
}
sleep 0.1;
}
set debug_sync="RESET";
--enable_query_log
--connection master
--echo # Sleeping 1s to create a visible SBM gap between events
sleep 1;
insert into t1 values(3);
--source include/save_master_gtid.inc
--connection slave
set debug_sync='now wait_for paused_on_event';
--let $sbm= query_get_value(SHOW SLAVE STATUS, Seconds_Behind_Master, 1)
if ($sbm)
{
--echo # Expected Seconds_Behind_Master to be 0 but was $sbm
--die Seconds_Behind_Master should not show updates before last_master_timestamp is updated
}
# Continually send signal sql_thread_continue until caught up
--let $caught_up=0
--let $tries= 0
--disable_query_log
while (!$caught_up)
{
set debug_sync='now signal sql_thread_continue';
--let $slave_gtid= `SELECT @@global.gtid_slave_pos`
if (`SELECT strcmp("$master_pos","$slave_gtid") = 0`)
{
--inc $caught_up
}
--inc $tries
# Wait 30s
if (`SELECT $tries > 300`)
{
--die Replica failed to sync with primary
}
sleep 0.1;
}
--enable_query_log
# Cleanup
--source include/stop_slave.inc
set debug_sync='RESET';
SET @@global.debug_dbug=$save_dbug;
--source include/start_slave.inc
--source include/sync_with_master_gtid.inc
--connection master
DROP TABLE t1;
--source include/rpl_end.inc