mirror of
https://github.com/MariaDB/server.git
synced 2025-07-29 05:21:33 +03:00
MDEV-5509: Seconds_behind_master incorrect in parallel replication
The problem was a race between the SQL driver thread and the worker threads. The SQL driver thread would set rli->last_master_timestamp to zero to mark that it has caught up with the master, while the worker threads would set it to the timestamp of the executed event. This can happen out-of-order in parallel replication, causing the "caught up" status to be overwritten and Seconds_Behind_Master to wrongly grow when the slave is idle. To fix, introduce a separate flag rli->sql_thread_caught_up to mark that the SQL driver thread is caught up. This avoids issues with worker threads overwriting the SQL driver thread status. In parallel replication, we then make SHOW SLAVE STATUS check in addition that all worker threads are idle before showing Seconds_Behind_Master as 0 due to slave idle.
This commit is contained in:
18
mysql-test/suite/rpl/r/rpl_parallel2.result
Normal file
18
mysql-test/suite/rpl/r/rpl_parallel2.result
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
include/rpl_init.inc [topology=1->2]
|
||||||
|
*** MDEV-5509: Incorrect value for Seconds_Behind_Master if parallel replication ***
|
||||||
|
SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads;
|
||||||
|
include/stop_slave.inc
|
||||||
|
SET GLOBAL slave_parallel_threads=5;
|
||||||
|
include/start_slave.inc
|
||||||
|
CREATE TABLE t1 (a INT PRIMARY KEY, b INT);
|
||||||
|
CALL mtr.add_suppression("Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT. Statement is unsafe because it uses a system function that may return a different value on the slave");
|
||||||
|
INSERT INTO t1 VALUES (1,sleep(2));
|
||||||
|
Warnings:
|
||||||
|
Note 1592 Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT. Statement is unsafe because it uses a system function that may return a different value on the slave.
|
||||||
|
Seconds_Behind_Master should be zero here because the slave is fully caught up and idle.
|
||||||
|
Seconds_Behind_Master = '0'
|
||||||
|
include/stop_slave.inc
|
||||||
|
SET GLOBAL slave_parallel_threads=@old_parallel_threads;
|
||||||
|
include/start_slave.inc
|
||||||
|
DROP TABLE t1;
|
||||||
|
include/rpl_end.inc
|
41
mysql-test/suite/rpl/t/rpl_parallel2.test
Normal file
41
mysql-test/suite/rpl/t/rpl_parallel2.test
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
--source include/have_binlog_format_statement.inc
|
||||||
|
--let $rpl_topology=1->2
|
||||||
|
--source include/rpl_init.inc
|
||||||
|
|
||||||
|
--echo *** MDEV-5509: Incorrect value for Seconds_Behind_Master if parallel replication ***
|
||||||
|
|
||||||
|
--connection server_2
|
||||||
|
SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads;
|
||||||
|
--source include/stop_slave.inc
|
||||||
|
SET GLOBAL slave_parallel_threads=5;
|
||||||
|
--source include/start_slave.inc
|
||||||
|
|
||||||
|
--connection server_1
|
||||||
|
CREATE TABLE t1 (a INT PRIMARY KEY, b INT);
|
||||||
|
CALL mtr.add_suppression("Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT. Statement is unsafe because it uses a system function that may return a different value on the slave");
|
||||||
|
--save_master_pos
|
||||||
|
|
||||||
|
--connection server_2
|
||||||
|
--sync_with_master
|
||||||
|
|
||||||
|
--connection server_1
|
||||||
|
INSERT INTO t1 VALUES (1,sleep(2));
|
||||||
|
--save_master_pos
|
||||||
|
|
||||||
|
--connection server_2
|
||||||
|
--sync_with_master
|
||||||
|
|
||||||
|
--echo Seconds_Behind_Master should be zero here because the slave is fully caught up and idle.
|
||||||
|
--let $status_items= Seconds_Behind_Master
|
||||||
|
--source include/show_slave_status.inc
|
||||||
|
|
||||||
|
|
||||||
|
--connection server_2
|
||||||
|
--source include/stop_slave.inc
|
||||||
|
SET GLOBAL slave_parallel_threads=@old_parallel_threads;
|
||||||
|
--source include/start_slave.inc
|
||||||
|
|
||||||
|
--connection server_1
|
||||||
|
DROP TABLE t1;
|
||||||
|
|
||||||
|
--source include/rpl_end.inc
|
@ -766,6 +766,27 @@ rpl_parallel::wait_for_done()
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool
|
||||||
|
rpl_parallel::workers_idle()
|
||||||
|
{
|
||||||
|
struct rpl_parallel_entry *e;
|
||||||
|
uint32 i, max_i;
|
||||||
|
|
||||||
|
max_i= domain_hash.records;
|
||||||
|
for (i= 0; i < max_i; ++i)
|
||||||
|
{
|
||||||
|
bool active;
|
||||||
|
e= (struct rpl_parallel_entry *)my_hash_element(&domain_hash, i);
|
||||||
|
mysql_mutex_lock(&e->LOCK_parallel_entry);
|
||||||
|
active= e->current_sub_id > e->last_committed_sub_id;
|
||||||
|
mysql_mutex_unlock(&e->LOCK_parallel_entry);
|
||||||
|
if (active)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return (i == max_i);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
do_event() is executed by the sql_driver_thd thread.
|
do_event() is executed by the sql_driver_thd thread.
|
||||||
It's main purpose is to find a thread that can execute the query.
|
It's main purpose is to find a thread that can execute the query.
|
||||||
|
@ -117,6 +117,7 @@ struct rpl_parallel {
|
|||||||
void reset();
|
void reset();
|
||||||
rpl_parallel_entry *find(uint32 domain_id);
|
rpl_parallel_entry *find(uint32 domain_id);
|
||||||
void wait_for_done();
|
void wait_for_done();
|
||||||
|
bool workers_idle();
|
||||||
bool do_event(rpl_group_info *serial_rgi, Log_event *ev,
|
bool do_event(rpl_group_info *serial_rgi, Log_event *ev,
|
||||||
ulonglong event_size);
|
ulonglong event_size);
|
||||||
};
|
};
|
||||||
|
@ -56,7 +56,7 @@ Relay_log_info::Relay_log_info(bool is_slave_recovery)
|
|||||||
is_fake(FALSE),
|
is_fake(FALSE),
|
||||||
#endif
|
#endif
|
||||||
group_master_log_pos(0), log_space_total(0), ignore_log_space_limit(0),
|
group_master_log_pos(0), log_space_total(0), ignore_log_space_limit(0),
|
||||||
last_master_timestamp(0), slave_skip_counter(0),
|
last_master_timestamp(0), sql_thread_caught_up(true), slave_skip_counter(0),
|
||||||
abort_pos_wait(0), slave_run_id(0), sql_driver_thd(),
|
abort_pos_wait(0), slave_run_id(0), sql_driver_thd(),
|
||||||
inited(0), abort_slave(0), slave_running(0), until_condition(UNTIL_NONE),
|
inited(0), abort_slave(0), slave_running(0), until_condition(UNTIL_NONE),
|
||||||
until_log_pos(0), retried_trans(0), executed_entries(0),
|
until_log_pos(0), retried_trans(0), executed_entries(0),
|
||||||
@ -1287,9 +1287,14 @@ void Relay_log_info::stmt_done(my_off_t event_master_log_pos,
|
|||||||
(probably ok - except in some very rare cases, only consequence
|
(probably ok - except in some very rare cases, only consequence
|
||||||
is that value may take some time to display in
|
is that value may take some time to display in
|
||||||
Seconds_Behind_Master - not critical).
|
Seconds_Behind_Master - not critical).
|
||||||
|
|
||||||
|
In parallel replication, we take care to not set last_master_timestamp
|
||||||
|
backwards, in case of out-of-order calls here.
|
||||||
*/
|
*/
|
||||||
if (!(event_creation_time == 0 &&
|
if (!(event_creation_time == 0 &&
|
||||||
IF_DBUG(debug_not_change_ts_if_art_event > 0, 1)))
|
IF_DBUG(debug_not_change_ts_if_art_event > 0, 1)) &&
|
||||||
|
!(rgi->is_parallel_exec && event_creation_time <= last_master_timestamp)
|
||||||
|
)
|
||||||
last_master_timestamp= event_creation_time;
|
last_master_timestamp= event_creation_time;
|
||||||
}
|
}
|
||||||
DBUG_VOID_RETURN;
|
DBUG_VOID_RETURN;
|
||||||
|
@ -221,6 +221,12 @@ public:
|
|||||||
bool sql_force_rotate_relay;
|
bool sql_force_rotate_relay;
|
||||||
|
|
||||||
time_t last_master_timestamp;
|
time_t last_master_timestamp;
|
||||||
|
/*
|
||||||
|
The SQL driver thread sets this true while it is waiting at the end of the
|
||||||
|
relay log for more events to arrive. SHOW SLAVE STATUS uses this to report
|
||||||
|
Seconds_Behind_Master as zero while the SQL thread is so waiting.
|
||||||
|
*/
|
||||||
|
bool sql_thread_caught_up;
|
||||||
|
|
||||||
void clear_until_condition();
|
void clear_until_condition();
|
||||||
|
|
||||||
|
39
sql/slave.cc
39
sql/slave.cc
@ -2619,8 +2619,24 @@ static bool send_show_master_info_data(THD *thd, Master_info *mi, bool full,
|
|||||||
if ((mi->slave_running == MYSQL_SLAVE_RUN_CONNECT) &&
|
if ((mi->slave_running == MYSQL_SLAVE_RUN_CONNECT) &&
|
||||||
mi->rli.slave_running)
|
mi->rli.slave_running)
|
||||||
{
|
{
|
||||||
long time_diff= ((long)(time(0) - mi->rli.last_master_timestamp)
|
long time_diff;
|
||||||
- mi->clock_diff_with_master);
|
bool idle;
|
||||||
|
time_t stamp= mi->rli.last_master_timestamp;
|
||||||
|
|
||||||
|
if (!stamp)
|
||||||
|
idle= true;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
idle= mi->rli.sql_thread_caught_up;
|
||||||
|
if (opt_slave_parallel_threads > 0 && idle &&
|
||||||
|
!mi->rli.parallel.workers_idle())
|
||||||
|
idle= false;
|
||||||
|
}
|
||||||
|
if (idle)
|
||||||
|
time_diff= 0;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
time_diff= ((long)(time(0) - stamp) - mi->clock_diff_with_master);
|
||||||
/*
|
/*
|
||||||
Apparently on some systems time_diff can be <0. Here are possible
|
Apparently on some systems time_diff can be <0. Here are possible
|
||||||
reasons related to MySQL:
|
reasons related to MySQL:
|
||||||
@ -2636,13 +2652,15 @@ static bool send_show_master_info_data(THD *thd, Master_info *mi, bool full,
|
|||||||
slave is 2. At SHOW SLAVE STATUS time, assume that the difference
|
slave is 2. At SHOW SLAVE STATUS time, assume that the difference
|
||||||
between timestamp of slave and rli->last_master_timestamp is 0
|
between timestamp of slave and rli->last_master_timestamp is 0
|
||||||
(i.e. they are in the same second), then we get 0-(2-1)=-1 as a result.
|
(i.e. they are in the same second), then we get 0-(2-1)=-1 as a result.
|
||||||
This confuses users, so we don't go below 0: hence the max().
|
This confuses users, so we don't go below 0.
|
||||||
|
|
||||||
last_master_timestamp == 0 (an "impossible" timestamp 1970) is a
|
last_master_timestamp == 0 (an "impossible" timestamp 1970) is a
|
||||||
special marker to say "consider we have caught up".
|
special marker to say "consider we have caught up".
|
||||||
*/
|
*/
|
||||||
protocol->store((longlong)(mi->rli.last_master_timestamp ?
|
if (time_diff < 0)
|
||||||
max(0, time_diff) : 0));
|
time_diff= 0;
|
||||||
|
}
|
||||||
|
protocol->store((longlong)time_diff);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -6100,6 +6118,7 @@ static Log_event* next_event(rpl_group_info *rgi, ulonglong *event_size)
|
|||||||
|
|
||||||
if (hot_log)
|
if (hot_log)
|
||||||
mysql_mutex_unlock(log_lock);
|
mysql_mutex_unlock(log_lock);
|
||||||
|
rli->sql_thread_caught_up= false;
|
||||||
DBUG_RETURN(ev);
|
DBUG_RETURN(ev);
|
||||||
}
|
}
|
||||||
if (opt_reckless_slave) // For mysql-test
|
if (opt_reckless_slave) // For mysql-test
|
||||||
@ -6137,12 +6156,10 @@ static Log_event* next_event(rpl_group_info *rgi, ulonglong *event_size)
|
|||||||
Seconds_Behind_Master would be zero only when master has no
|
Seconds_Behind_Master would be zero only when master has no
|
||||||
more updates in binlog for slave. The heartbeat can be sent
|
more updates in binlog for slave. The heartbeat can be sent
|
||||||
in a (small) fraction of slave_net_timeout. Until it's done
|
in a (small) fraction of slave_net_timeout. Until it's done
|
||||||
rli->last_master_timestamp is temporarely (for time of
|
rli->sql_thread_caught_up is temporarely (for time of waiting for
|
||||||
waiting for the following event) reset whenever EOF is
|
the following event) set whenever EOF is reached.
|
||||||
reached.
|
|
||||||
*/
|
*/
|
||||||
time_t save_timestamp= rli->last_master_timestamp;
|
rli->sql_thread_caught_up= true;
|
||||||
rli->last_master_timestamp= 0;
|
|
||||||
|
|
||||||
DBUG_ASSERT(rli->relay_log.get_open_count() ==
|
DBUG_ASSERT(rli->relay_log.get_open_count() ==
|
||||||
rli->cur_log_old_open_count);
|
rli->cur_log_old_open_count);
|
||||||
@ -6268,7 +6285,7 @@ static Log_event* next_event(rpl_group_info *rgi, ulonglong *event_size)
|
|||||||
rli->relay_log.wait_for_update_relay_log(rli->sql_driver_thd);
|
rli->relay_log.wait_for_update_relay_log(rli->sql_driver_thd);
|
||||||
// re-acquire data lock since we released it earlier
|
// re-acquire data lock since we released it earlier
|
||||||
mysql_mutex_lock(&rli->data_lock);
|
mysql_mutex_lock(&rli->data_lock);
|
||||||
rli->last_master_timestamp= save_timestamp;
|
rli->sql_thread_caught_up= false;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
/*
|
/*
|
||||||
|
Reference in New Issue
Block a user