1
0
mirror of https://github.com/MariaDB/server.git synced 2025-08-07 00:04:31 +03:00

MDEV-32265: seconds_behind_master is inaccurate for Delayed replication

If a replica is actively delaying a transaction when restarted (STOP
SLAVE/START SLAVE), when the sql thread is back up,
Seconds_Behind_Master will present as 0 until the configured
MASTER_DELAY has passed. That is, before the restart,
last_master_timestamp is updated to the timestamp of the delayed
event. Then after the restart, the negation of sql_thread_caught_up
is skipped because the timestamp of the event has already been used
for the last_master_timestamp, and their update is grouped together
in the same conditional block.

This patch fixes this by separating the negation of
sql_thread_caught_up out of the timestamp-dependent block, so it is
called any time an idle parallel slave queues an event to a worker.

Note that sql_thread_caught_up is still left in the check for internal
events, as SBM should remain idle in such case to not "magically" begin
incrementing.

Reviewed By:
============
Andrei Elkin <andrei.elkin@mariadb.com>
This commit is contained in:
Brandon Nesterenko
2023-09-27 14:39:03 -06:00
parent 9517755165
commit c5f776e9fa
3 changed files with 34 additions and 10 deletions

View File

@@ -36,10 +36,6 @@ create table t2 (a int);
--echo #
--echo # Pt 1) Ensure SBM is updated immediately upon arrival of the next event
--echo # Lock t1 on slave so the first received transaction does not complete/commit
--connection slave
LOCK TABLES t1 WRITE;
--connection master
--echo # Sleep 2 to allow a buffer between events for SBM check
sleep 2;
@@ -65,8 +61,31 @@ if (`SELECT $sbm_trx1_arrive > ($seconds_since_idling + 1)`)
}
--echo # ..done
--echo # MDEV-32265. At time of STOP SLAVE, if the SQL Thread is currently
--echo # delaying a transaction; then when the reciprocal START SLAVE occurs,
--echo # if the event is still to be delayed, SBM should resume accordingly
--source include/stop_slave.inc
--source include/start_slave.inc
--connection slave
UNLOCK TABLES;
--echo # Waiting for replica to resume the delay for the transaction
--let $wait_condition= SELECT count(*) FROM information_schema.processlist WHERE state LIKE 'Waiting until MASTER_DELAY seconds after master executed event';
--source include/wait_condition.inc
--echo # Sleeping 1s to increment SBM
sleep 1;
--echo # Ensuring Seconds_Behind_Master increases after sleeping..
--let $sbm_trx1_after_1s_sleep= query_get_value(SHOW SLAVE STATUS, Seconds_Behind_Master, 1)
if (`SELECT $sbm_trx1_after_1s_sleep <= $sbm_trx1_arrive`)
{
--echo # ..failed
--die Seconds_Behind_Master did not increase after sleeping, but should have
}
--echo # ..done
--source include/sync_with_master_gtid.inc
--echo #