mirror of
https://github.com/MariaDB/server.git
synced 2025-07-30 16:24:05 +03:00
MDEV-29639: Seconds_Behind_Master is incorrect for Delayed, Parallel Replicas
Problem ======== On a parallel, delayed replica, Seconds_Behind_Master will not be calculated until after MASTER_DELAY seconds have passed and the event has finished executing, resulting in potentially very large values of Seconds_Behind_Master (which could be much larger than the MASTER_DELAY parameter) for the entire duration the event is delayed. This contradicts the documented MASTER_DELAY behavior, which specifies how many seconds to withhold replicated events from execution. Solution ======== After a parallel replica idles, the first event after idling should immediately update last_master_timestamp with the time that it began execution on the primary. Reviewed By =========== Andrei Elkin <andrei.elkin@mariadb.com>
This commit is contained in:
60
mysql-test/suite/rpl/r/rpl_delayed_parallel_slave_sbm.result
Normal file
60
mysql-test/suite/rpl/r/rpl_delayed_parallel_slave_sbm.result
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
include/master-slave.inc
|
||||||
|
[connection master]
|
||||||
|
connection slave;
|
||||||
|
include/stop_slave.inc
|
||||||
|
change master to master_delay=3, master_use_gtid=Slave_Pos;
|
||||||
|
set @@GLOBAL.slave_parallel_threads=2;
|
||||||
|
include/start_slave.inc
|
||||||
|
connection master;
|
||||||
|
create table t1 (a int);
|
||||||
|
include/sync_slave_sql_with_master.inc
|
||||||
|
#
|
||||||
|
# Pt 1) Ensure SBM is updated immediately upon arrival of the next event
|
||||||
|
# Lock t1 on slave so the first received transaction does not complete/commit
|
||||||
|
connection slave;
|
||||||
|
LOCK TABLES t1 WRITE;
|
||||||
|
connection master;
|
||||||
|
# Sleep 2 to allow a buffer between events for SBM check
|
||||||
|
insert into t1 values (0);
|
||||||
|
include/save_master_gtid.inc
|
||||||
|
connection slave;
|
||||||
|
# Waiting for transaction to arrive on slave and begin SQL Delay..
|
||||||
|
# Validating SBM is updated on event arrival..
|
||||||
|
# ..done
|
||||||
|
connection slave;
|
||||||
|
UNLOCK TABLES;
|
||||||
|
include/sync_with_master_gtid.inc
|
||||||
|
#
|
||||||
|
# Pt 2) If the SQL thread has not entered an idle state, ensure
|
||||||
|
# following events do not update SBM
|
||||||
|
# Stop slave IO thread so it receives both events together on restart
|
||||||
|
connection slave;
|
||||||
|
include/stop_slave_io.inc
|
||||||
|
connection master;
|
||||||
|
# Sleep 2 to allow a buffer between events for SBM check
|
||||||
|
insert into t1 values (1);
|
||||||
|
# Sleep 3 to create gap between events
|
||||||
|
insert into t1 values (2);
|
||||||
|
connection slave;
|
||||||
|
LOCK TABLES t1 WRITE;
|
||||||
|
START SLAVE IO_THREAD;
|
||||||
|
# Wait for first transaction to complete SQL delay and begin execution..
|
||||||
|
# Validate SBM calculation doesn't use the second transaction because SQL thread shouldn't have gone idle..
|
||||||
|
# ..and that SBM wasn't calculated using prior committed transactions
|
||||||
|
# ..done
|
||||||
|
connection slave;
|
||||||
|
UNLOCK TABLES;
|
||||||
|
#
|
||||||
|
# Cleanup
|
||||||
|
# Reset master_delay
|
||||||
|
include/stop_slave.inc
|
||||||
|
CHANGE MASTER TO master_delay=0;
|
||||||
|
set @@GLOBAL.slave_parallel_threads=4;
|
||||||
|
include/start_slave.inc
|
||||||
|
connection master;
|
||||||
|
DROP TABLE t1;
|
||||||
|
include/save_master_gtid.inc
|
||||||
|
connection slave;
|
||||||
|
include/sync_with_master_gtid.inc
|
||||||
|
include/rpl_end.inc
|
||||||
|
# End of rpl_delayed_parallel_slave_sbm.test
|
@ -0,0 +1 @@
|
|||||||
|
--slave-parallel-threads=4
|
133
mysql-test/suite/rpl/t/rpl_delayed_parallel_slave_sbm.test
Normal file
133
mysql-test/suite/rpl/t/rpl_delayed_parallel_slave_sbm.test
Normal file
@ -0,0 +1,133 @@
|
|||||||
|
#
|
||||||
|
# This test ensures that after a delayed parallel slave has idled, i.e.
|
||||||
|
# executed everything in its relay log, the next event group that the SQL
|
||||||
|
# thread reads from the relay log will immediately be used in the
|
||||||
|
# Seconds_Behind_Master. In particular, it ensures that the calculation for
|
||||||
|
# Seconds_Behind_Master is based on the timestamp of the new transaction,
|
||||||
|
# rather than the last committed transaction.
|
||||||
|
#
|
||||||
|
# References:
|
||||||
|
# MDEV-29639: Seconds_Behind_Master is incorrect for Delayed, Parallel
|
||||||
|
# Replicas
|
||||||
|
#
|
||||||
|
|
||||||
|
--source include/master-slave.inc
|
||||||
|
|
||||||
|
--connection slave
|
||||||
|
--source include/stop_slave.inc
|
||||||
|
--let $master_delay= 3
|
||||||
|
--eval change master to master_delay=$master_delay, master_use_gtid=Slave_Pos
|
||||||
|
--let $old_slave_threads= `SELECT @@GLOBAL.slave_parallel_threads`
|
||||||
|
set @@GLOBAL.slave_parallel_threads=2;
|
||||||
|
--source include/start_slave.inc
|
||||||
|
|
||||||
|
--connection master
|
||||||
|
create table t1 (a int);
|
||||||
|
--source include/sync_slave_sql_with_master.inc
|
||||||
|
|
||||||
|
--echo #
|
||||||
|
--echo # Pt 1) Ensure SBM is updated immediately upon arrival of the next event
|
||||||
|
|
||||||
|
--echo # Lock t1 on slave so the first received transaction does not complete/commit
|
||||||
|
--connection slave
|
||||||
|
LOCK TABLES t1 WRITE;
|
||||||
|
|
||||||
|
--connection master
|
||||||
|
--echo # Sleep 2 to allow a buffer between events for SBM check
|
||||||
|
sleep 2;
|
||||||
|
|
||||||
|
--let $ts_trx_before_ins= `SELECT UNIX_TIMESTAMP()`
|
||||||
|
--let insert_ctr= 0
|
||||||
|
--eval insert into t1 values ($insert_ctr)
|
||||||
|
--inc $insert_ctr
|
||||||
|
--source include/save_master_gtid.inc
|
||||||
|
|
||||||
|
--connection slave
|
||||||
|
|
||||||
|
--echo # Waiting for transaction to arrive on slave and begin SQL Delay..
|
||||||
|
--let $wait_condition= SELECT count(*) FROM information_schema.processlist WHERE state LIKE 'Waiting until MASTER_DELAY seconds after master executed event';
|
||||||
|
--source include/wait_condition.inc
|
||||||
|
|
||||||
|
--echo # Validating SBM is updated on event arrival..
|
||||||
|
--let $sbm_trx1_arrive= query_get_value(SHOW SLAVE STATUS, Seconds_Behind_Master, 1)
|
||||||
|
--let $seconds_since_idling= `SELECT UNIX_TIMESTAMP() - $ts_trx_before_ins`
|
||||||
|
if (`SELECT $sbm_trx1_arrive > ($seconds_since_idling + 1)`)
|
||||||
|
{
|
||||||
|
--echo # SBM was $sbm_trx1_arrive yet shouldn't have been larger than $seconds_since_idling + 1 (for possible negative clock_diff_with_master)
|
||||||
|
--die Seconds_Behind_Master should reset after idling
|
||||||
|
}
|
||||||
|
--echo # ..done
|
||||||
|
|
||||||
|
--connection slave
|
||||||
|
UNLOCK TABLES;
|
||||||
|
--source include/sync_with_master_gtid.inc
|
||||||
|
|
||||||
|
--echo #
|
||||||
|
--echo # Pt 2) If the SQL thread has not entered an idle state, ensure
|
||||||
|
--echo # following events do not update SBM
|
||||||
|
|
||||||
|
--echo # Stop slave IO thread so it receives both events together on restart
|
||||||
|
--connection slave
|
||||||
|
--source include/stop_slave_io.inc
|
||||||
|
|
||||||
|
--connection master
|
||||||
|
|
||||||
|
--echo # Sleep 2 to allow a buffer between events for SBM check
|
||||||
|
sleep 2;
|
||||||
|
--let $ts_trxpt2_before_ins= `SELECT UNIX_TIMESTAMP()`
|
||||||
|
--eval insert into t1 values ($insert_ctr)
|
||||||
|
--inc $insert_ctr
|
||||||
|
--echo # Sleep 3 to create gap between events
|
||||||
|
sleep 3;
|
||||||
|
--eval insert into t1 values ($insert_ctr)
|
||||||
|
--inc $insert_ctr
|
||||||
|
--let $ts_trx_after_ins= `SELECT UNIX_TIMESTAMP()`
|
||||||
|
|
||||||
|
--connection slave
|
||||||
|
LOCK TABLES t1 WRITE;
|
||||||
|
|
||||||
|
START SLAVE IO_THREAD;
|
||||||
|
|
||||||
|
--echo # Wait for first transaction to complete SQL delay and begin execution..
|
||||||
|
--let $wait_condition= SELECT count(*) FROM information_schema.processlist WHERE state LIKE 'Waiting for table metadata lock%' AND command LIKE 'Slave_Worker';
|
||||||
|
--source include/wait_condition.inc
|
||||||
|
|
||||||
|
--echo # Validate SBM calculation doesn't use the second transaction because SQL thread shouldn't have gone idle..
|
||||||
|
--let $sbm_after_trx_no_idle= query_get_value(SHOW SLAVE STATUS, Seconds_Behind_Master, 1)
|
||||||
|
--let $timestamp_trxpt2_arrive= `SELECT UNIX_TIMESTAMP()`
|
||||||
|
if (`SELECT $sbm_after_trx_no_idle < $timestamp_trxpt2_arrive - $ts_trx_after_ins`)
|
||||||
|
{
|
||||||
|
--let $cmpv= `SELECT $timestamp_trxpt2_arrive - $ts_trx_after_ins`
|
||||||
|
--echo # SBM $sbm_after_trx_no_idle was more recent than time since last transaction ($cmpv seconds)
|
||||||
|
--die Seconds_Behind_Master should not have used second transaction timestamp
|
||||||
|
}
|
||||||
|
--let $seconds_since_idling= `SELECT ($timestamp_trxpt2_arrive - $ts_trxpt2_before_ins)`
|
||||||
|
--echo # ..and that SBM wasn't calculated using prior committed transactions
|
||||||
|
if (`SELECT $sbm_after_trx_no_idle > ($seconds_since_idling + 1)`)
|
||||||
|
{
|
||||||
|
--echo # SBM was $sbm_after_trx_no_idle yet shouldn't have been larger than $seconds_since_idling + 1 (for possible negative clock_diff_with_master)
|
||||||
|
--die Seconds_Behind_Master calculation should not have used prior committed transaction
|
||||||
|
}
|
||||||
|
--echo # ..done
|
||||||
|
|
||||||
|
--connection slave
|
||||||
|
UNLOCK TABLES;
|
||||||
|
|
||||||
|
--echo #
|
||||||
|
--echo # Cleanup
|
||||||
|
|
||||||
|
--echo # Reset master_delay
|
||||||
|
--source include/stop_slave.inc
|
||||||
|
--eval CHANGE MASTER TO master_delay=0
|
||||||
|
--eval set @@GLOBAL.slave_parallel_threads=$old_slave_threads
|
||||||
|
--source include/start_slave.inc
|
||||||
|
|
||||||
|
--connection master
|
||||||
|
DROP TABLE t1;
|
||||||
|
--source include/save_master_gtid.inc
|
||||||
|
|
||||||
|
--connection slave
|
||||||
|
--source include/sync_with_master_gtid.inc
|
||||||
|
|
||||||
|
--source include/rpl_end.inc
|
||||||
|
--echo # End of rpl_delayed_parallel_slave_sbm.test
|
@ -45,8 +45,7 @@ rpt_handle_event(rpl_parallel_thread::queued_event *qev,
|
|||||||
rgi->event_relay_log_pos= qev->event_relay_log_pos;
|
rgi->event_relay_log_pos= qev->event_relay_log_pos;
|
||||||
rgi->future_event_relay_log_pos= qev->future_event_relay_log_pos;
|
rgi->future_event_relay_log_pos= qev->future_event_relay_log_pos;
|
||||||
strcpy(rgi->future_event_master_log_name, qev->future_event_master_log_name);
|
strcpy(rgi->future_event_master_log_name, qev->future_event_master_log_name);
|
||||||
if (!(ev->is_artificial_event() || ev->is_relay_log_event() ||
|
if (event_can_update_last_master_timestamp(ev))
|
||||||
(ev->when == 0)))
|
|
||||||
rgi->last_master_timestamp= ev->when + (time_t)ev->exec_time;
|
rgi->last_master_timestamp= ev->when + (time_t)ev->exec_time;
|
||||||
err= apply_event_and_update_pos_for_parallel(ev, thd, rgi);
|
err= apply_event_and_update_pos_for_parallel(ev, thd, rgi);
|
||||||
|
|
||||||
|
31
sql/slave.cc
31
sql/slave.cc
@ -4125,10 +4125,10 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli,
|
|||||||
the user might be surprised to see a claim that the slave is up to date
|
the user might be surprised to see a claim that the slave is up to date
|
||||||
long before those queued events are actually executed.
|
long before those queued events are actually executed.
|
||||||
*/
|
*/
|
||||||
if (!rli->mi->using_parallel() &&
|
if ((!rli->mi->using_parallel()) && event_can_update_last_master_timestamp(ev))
|
||||||
!(ev->is_artificial_event() || ev->is_relay_log_event() || (ev->when == 0)))
|
|
||||||
{
|
{
|
||||||
rli->last_master_timestamp= ev->when + (time_t) ev->exec_time;
|
rli->last_master_timestamp= ev->when + (time_t) ev->exec_time;
|
||||||
|
rli->sql_thread_caught_up= false;
|
||||||
DBUG_ASSERT(rli->last_master_timestamp >= 0);
|
DBUG_ASSERT(rli->last_master_timestamp >= 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -4177,6 +4177,17 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli,
|
|||||||
|
|
||||||
if (rli->mi->using_parallel())
|
if (rli->mi->using_parallel())
|
||||||
{
|
{
|
||||||
|
if (unlikely((rli->last_master_timestamp == 0 ||
|
||||||
|
rli->sql_thread_caught_up) &&
|
||||||
|
event_can_update_last_master_timestamp(ev)))
|
||||||
|
{
|
||||||
|
if (rli->last_master_timestamp < ev->when)
|
||||||
|
{
|
||||||
|
rli->last_master_timestamp= ev->when;
|
||||||
|
rli->sql_thread_caught_up= false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
int res= rli->parallel.do_event(serial_rgi, ev, event_size);
|
int res= rli->parallel.do_event(serial_rgi, ev, event_size);
|
||||||
/*
|
/*
|
||||||
In parallel replication, we need to update the relay log position
|
In parallel replication, we need to update the relay log position
|
||||||
@ -4192,7 +4203,7 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli,
|
|||||||
This is the case for pre-10.0 events without GTID, and for handling
|
This is the case for pre-10.0 events without GTID, and for handling
|
||||||
slave_skip_counter.
|
slave_skip_counter.
|
||||||
*/
|
*/
|
||||||
if (!(ev->is_artificial_event() || ev->is_relay_log_event() || (ev->when == 0)))
|
if (event_can_update_last_master_timestamp(ev))
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
Ignore FD's timestamp as it does not reflect the slave execution
|
Ignore FD's timestamp as it does not reflect the slave execution
|
||||||
@ -4200,7 +4211,8 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli,
|
|||||||
data modification event execution last long all this time
|
data modification event execution last long all this time
|
||||||
Seconds_Behind_Master is zero.
|
Seconds_Behind_Master is zero.
|
||||||
*/
|
*/
|
||||||
if (ev->get_type_code() != FORMAT_DESCRIPTION_EVENT)
|
if (ev->get_type_code() != FORMAT_DESCRIPTION_EVENT &&
|
||||||
|
rli->last_master_timestamp < ev->when)
|
||||||
rli->last_master_timestamp= ev->when + (time_t) ev->exec_time;
|
rli->last_master_timestamp= ev->when + (time_t) ev->exec_time;
|
||||||
|
|
||||||
DBUG_ASSERT(rli->last_master_timestamp >= 0);
|
DBUG_ASSERT(rli->last_master_timestamp >= 0);
|
||||||
@ -7544,7 +7556,6 @@ static Log_event* next_event(rpl_group_info *rgi, ulonglong *event_size)
|
|||||||
|
|
||||||
if (hot_log)
|
if (hot_log)
|
||||||
mysql_mutex_unlock(log_lock);
|
mysql_mutex_unlock(log_lock);
|
||||||
rli->sql_thread_caught_up= false;
|
|
||||||
DBUG_RETURN(ev);
|
DBUG_RETURN(ev);
|
||||||
}
|
}
|
||||||
if (opt_reckless_slave) // For mysql-test
|
if (opt_reckless_slave) // For mysql-test
|
||||||
@ -7711,7 +7722,6 @@ static Log_event* next_event(rpl_group_info *rgi, ulonglong *event_size)
|
|||||||
rli->relay_log.wait_for_update_relay_log(rli->sql_driver_thd);
|
rli->relay_log.wait_for_update_relay_log(rli->sql_driver_thd);
|
||||||
// re-acquire data lock since we released it earlier
|
// re-acquire data lock since we released it earlier
|
||||||
mysql_mutex_lock(&rli->data_lock);
|
mysql_mutex_lock(&rli->data_lock);
|
||||||
rli->sql_thread_caught_up= false;
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
/*
|
/*
|
||||||
@ -7902,12 +7912,19 @@ event(errno: %d cur_log->error: %d)",
|
|||||||
{
|
{
|
||||||
sql_print_information("Error reading relay log event: %s",
|
sql_print_information("Error reading relay log event: %s",
|
||||||
"slave SQL thread was killed");
|
"slave SQL thread was killed");
|
||||||
DBUG_RETURN(0);
|
goto end;
|
||||||
}
|
}
|
||||||
|
|
||||||
err:
|
err:
|
||||||
if (errmsg)
|
if (errmsg)
|
||||||
sql_print_error("Error reading relay log event: %s", errmsg);
|
sql_print_error("Error reading relay log event: %s", errmsg);
|
||||||
|
|
||||||
|
end:
|
||||||
|
/*
|
||||||
|
Set that we are not caught up so if there is a hang/problem on restart,
|
||||||
|
Seconds_Behind_Master will still grow.
|
||||||
|
*/
|
||||||
|
rli->sql_thread_caught_up= false;
|
||||||
DBUG_RETURN(0);
|
DBUG_RETURN(0);
|
||||||
}
|
}
|
||||||
#ifdef WITH_WSREP
|
#ifdef WITH_WSREP
|
||||||
|
12
sql/slave.h
12
sql/slave.h
@ -49,6 +49,7 @@
|
|||||||
#include "rpl_filter.h"
|
#include "rpl_filter.h"
|
||||||
#include "rpl_tblmap.h"
|
#include "rpl_tblmap.h"
|
||||||
#include "rpl_gtid.h"
|
#include "rpl_gtid.h"
|
||||||
|
#include "log_event.h"
|
||||||
|
|
||||||
#define SLAVE_NET_TIMEOUT 60
|
#define SLAVE_NET_TIMEOUT 60
|
||||||
|
|
||||||
@ -293,6 +294,17 @@ extern char *report_host, *report_password;
|
|||||||
|
|
||||||
extern I_List<THD> threads;
|
extern I_List<THD> threads;
|
||||||
|
|
||||||
|
/*
|
||||||
|
Check that a binlog event (read from the relay log) is valid to update
|
||||||
|
last_master_timestamp. That is, a valid event is one with a consistent
|
||||||
|
timestamp which originated from a primary server.
|
||||||
|
*/
|
||||||
|
static inline bool event_can_update_last_master_timestamp(Log_event *ev)
|
||||||
|
{
|
||||||
|
return ev && !(ev->is_artificial_event() || ev->is_relay_log_event() ||
|
||||||
|
(ev->when == 0));
|
||||||
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
#define close_active_mi() /* no-op */
|
#define close_active_mi() /* no-op */
|
||||||
#endif /* HAVE_REPLICATION */
|
#endif /* HAVE_REPLICATION */
|
||||||
|
Reference in New Issue
Block a user