1
0
mirror of https://github.com/MariaDB/server.git synced 2025-08-08 11:22:35 +03:00

MDEV-27161: Add option for SQL thread to limit maximum execution time per query replicated

New Feature:
============
This patch adds a new system variable, @@slave_max_statement_time,
which limits the execution time of s slave’s events that implements
an equivalent to @@max_statement_time for slave applier.

Reviewed By:
============
Andrei Elkin <andrei.elkin@mariadb.com>
This commit is contained in:
Brandon Nesterenko
2022-06-27 12:29:10 -06:00
committed by Andrei
parent 7864d955f3
commit 360d99429c
13 changed files with 434 additions and 7 deletions

View File

@@ -1223,6 +1223,11 @@ The following specify which files/extra groups are read (specified before remain
--slave-max-allowed-packet=#
The maximum packet length to sent successfully from the
master to slave.
--slave-max-statement-time=#
A query that has taken more than slave_max_statement_time
seconds to run on the slave will be aborted. The argument
will be treated as a decimal value with microsecond
precision. A value of 0 (default) means no timeout
--slave-net-timeout=#
Number of seconds to wait for more data from any
master/slave connection before aborting the read
@@ -1800,6 +1805,7 @@ slave-ddl-exec-mode IDEMPOTENT
slave-domain-parallel-threads 0
slave-exec-mode STRICT
slave-max-allowed-packet 1073741824
slave-max-statement-time 0
slave-net-timeout 60
slave-parallel-max-queued 131072
slave-parallel-mode conservative

View File

@@ -0,0 +1,79 @@
#
# Helper test file to ensure that an event running on a slave which executes
# for longer than @@slave_max_statement_time will time out. By default, it will
# use the sleep function to imitate a long running function. This can be
# changed to use locks using the parameter with_lock.
#
# Parameters:
# with_lock (boolean, in) : Changes the long running command from using SLEEP
# to using locks. In particular, the MTR test will take the table level
# write lock on the slave side, while the slave concurrently tries to
# execute an insert statement.
#
# use_load_data (boolean, in) : If in row logging format, uses LOAD DATA
# INFILLE command to create Load_log_events to create the events which
# should time out
#
--connection master
create table t1(a int not null auto_increment, b int, primary key(a)) engine=InnoDB;
--source include/save_master_gtid.inc
--connection slave
--source include/sync_with_master_gtid.inc
--source include/stop_slave.inc
SET @old_slave_max_statement_time=@@GLOBAL.slave_max_statement_time;
SET GLOBAL slave_max_statement_time=0.75;
--connection master
--echo # Long running command due to a lock conflict
if (!$use_load_data)
{
INSERT INTO t1(b) VALUES (1);
}
if ($use_load_data)
{
load data infile '../../std_data/rpl_loaddata.dat' into table t1;
}
--source include/save_master_gtid.inc
--connection slave1
BEGIN; INSERT INTO t1(b) VALUES (1);
--connection slave
--echo # Starting slave to receive event which will take longer to execute
--echo # than slave_max_statement_time
START SLAVE;
# ER_SLAVE_STATEMENT_TIMEOUT
--let $slave_sql_errno= 4192
--source include/wait_for_slave_sql_error.inc
--echo # Ensuring event was not processed..
--let $t1_count= `select count(*) from t1`
if ($t1_count != 0)
{
--die Event should have timed out on the slave and not been executed
}
--echo # ..success
--echo # Remove slave timeout and catch up to master
SET GLOBAL slave_max_statement_time=0;
--connection slave1
ROLLBACK;
--source include/start_slave.inc
--source include/sync_with_master_gtid.inc
--echo # Test case cleanup
--connection master
DROP TABLE t1;
--source include/save_master_gtid.inc
--connection slave
--source include/sync_with_master_gtid.inc
--source include/stop_slave.inc
SET GLOBAL slave_max_statement_time=@old_slave_max_statement_time;
--source include/start_slave.inc

View File

@@ -0,0 +1,170 @@
include/master-slave.inc
[connection master]
#
# Set up
#
connection master;
SET STATEMENT sql_log_bin=0 FOR CALL mtr.add_suppression("Unsafe statement written to the binary log using statement format");
SET STATEMENT sql_log_bin=0 FOR CALL mtr.add_suppression("Unsafe statement written to the binary log using statement format");
connection slave;
SET STATEMENT sql_log_bin=0 FOR CALL mtr.add_suppression("Slave log event execution was interrupted");
SET STATEMENT sql_log_bin=0 FOR CALL mtr.add_suppression("Unsafe statement written to the binary log using statement format");
SET @save_slave_max_statement_time=@@GLOBAL.slave_max_statement_time;
#
# Test Case 1) Using a serial slave, the SQL thread should time out when
# its underlying event executes for longer than @@slave_max_statement_time.
#
connection master;
create table t1(a int not null auto_increment, b int, primary key(a)) engine=InnoDB;
include/save_master_gtid.inc
connection slave;
include/sync_with_master_gtid.inc
include/stop_slave.inc
SET @old_slave_max_statement_time=@@GLOBAL.slave_max_statement_time;
SET GLOBAL slave_max_statement_time=0.75;
connection master;
# Long running command due to a lock conflict
INSERT INTO t1(b) VALUES (1);
include/save_master_gtid.inc
connection slave1;
BEGIN;
INSERT INTO t1(b) VALUES (1);
connection slave;
# Starting slave to receive event which will take longer to execute
# than slave_max_statement_time
START SLAVE;
include/wait_for_slave_sql_error.inc [errno=4192]
# Ensuring event was not processed..
# ..success
# Remove slave timeout and catch up to master
SET GLOBAL slave_max_statement_time=0;
connection slave1;
ROLLBACK;
include/start_slave.inc
include/sync_with_master_gtid.inc
# Test case cleanup
connection master;
DROP TABLE t1;
include/save_master_gtid.inc
connection slave;
include/sync_with_master_gtid.inc
include/stop_slave.inc
SET GLOBAL slave_max_statement_time=@old_slave_max_statement_time;
include/start_slave.inc
#
# Test Case 2) Using a parallel slave, a worker thread should time out
# when its underlying event executes for longer than
# @@slave_max_statement_time
#
include/stop_slave.inc
SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads;
SET @old_parallel_mode=@@GLOBAL.slave_parallel_mode;
SET GLOBAL slave_parallel_threads=2;
SET GLOBAL slave_parallel_mode='optimistic';
include/start_slave.inc
connection master;
create table t1(a int not null auto_increment, b int, primary key(a)) engine=InnoDB;
include/save_master_gtid.inc
connection slave;
include/sync_with_master_gtid.inc
include/stop_slave.inc
SET @old_slave_max_statement_time=@@GLOBAL.slave_max_statement_time;
SET GLOBAL slave_max_statement_time=0.75;
connection master;
# Long running command due to a lock conflict
INSERT INTO t1(b) VALUES (1);
include/save_master_gtid.inc
connection slave1;
BEGIN;
INSERT INTO t1(b) VALUES (1);
connection slave;
# Starting slave to receive event which will take longer to execute
# than slave_max_statement_time
START SLAVE;
include/wait_for_slave_sql_error.inc [errno=4192]
# Ensuring event was not processed..
# ..success
# Remove slave timeout and catch up to master
SET GLOBAL slave_max_statement_time=0;
connection slave1;
ROLLBACK;
include/start_slave.inc
include/sync_with_master_gtid.inc
# Test case cleanup
connection master;
DROP TABLE t1;
include/save_master_gtid.inc
connection slave;
include/sync_with_master_gtid.inc
include/stop_slave.inc
SET GLOBAL slave_max_statement_time=@old_slave_max_statement_time;
include/start_slave.inc
include/stop_slave.inc
SET GLOBAL slave_parallel_mode=@old_parallel_mode;
SET GLOBAL slave_parallel_threads=@old_parallel_threads;
include/start_slave.inc
#
# Test Case 3) Load-based log events (from LOAD DATA INFILE) should time
# out if their execution time exceeds @@slave_max_statement_time
#
connection master;
create table t1(a int not null auto_increment, b int, primary key(a)) engine=InnoDB;
include/save_master_gtid.inc
connection slave;
include/sync_with_master_gtid.inc
include/stop_slave.inc
SET @old_slave_max_statement_time=@@GLOBAL.slave_max_statement_time;
SET GLOBAL slave_max_statement_time=0.75;
connection master;
# Long running command due to a lock conflict
load data infile '../../std_data/rpl_loaddata.dat' into table t1;
include/save_master_gtid.inc
connection slave1;
BEGIN;
INSERT INTO t1(b) VALUES (1);
connection slave;
# Starting slave to receive event which will take longer to execute
# than slave_max_statement_time
START SLAVE;
include/wait_for_slave_sql_error.inc [errno=4192]
# Ensuring event was not processed..
# ..success
# Remove slave timeout and catch up to master
SET GLOBAL slave_max_statement_time=0;
connection slave1;
ROLLBACK;
include/start_slave.inc
include/sync_with_master_gtid.inc
# Test case cleanup
connection master;
DROP TABLE t1;
include/save_master_gtid.inc
connection slave;
include/sync_with_master_gtid.inc
include/stop_slave.inc
SET GLOBAL slave_max_statement_time=@old_slave_max_statement_time;
include/start_slave.inc
#
# Test Case 4) Locally executed long running statements should not time
# out due to @@slave_max_statement_time
#
connection slave;
include/stop_slave.inc
SET @old_slave_max_statement_time=@@GLOBAL.slave_max_statement_time;
SET @old_gtid_domain_id=@@GLOBAL.gtid_domain_id;
SET @@GLOBAL.slave_max_statement_time=0.75;
SET @@GLOBAL.gtid_domain_id=1;
include/start_slave.inc
CREATE TABLE t2 (a int);
SET STATEMENT sql_log_bin=0 FOR INSERT INTO t2 SELECT SLEEP(1);
DROP TABLE t2;
include/stop_slave.inc
SET GLOBAL gtid_domain_id=@old_gtid_domain_id;
SET GLOBAL slave_max_statement_time=@old_slave_max_statement_time;
include/start_slave.inc
# Cleanup
include/stop_slave.inc
SET GLOBAL slave_max_statement_time=@save_slave_max_statement_time;
include/start_slave.inc
include/rpl_end.inc
# End of rpl_slave_max_statement_time.test

View File

@@ -0,0 +1,110 @@
#
# Purpose:
# This test ensures that the slave can limit the execution time of its
# events via the global system variable @@slave_max_statement_time.
#
# Methodology:
# This test uses the following test cases to ensure that a slave will
# correctly limit the execution time of its events:
# 1) Using a serial slave, the SQL thread should time out when its underlying
# event executes for longer than @@slave_max_statement_time.
# 2) Using a parallel slave, a worker thread should time out when its
# underlying event executes for longer than @@slave_max_statement_time.
# 3) Load-based log events (from LOAD DATA INFILE) should time out if their
# execution time exceeds @@slave_max_statement_time
# 4) Locally executed long running statements should not time out due to
# @@slave_max_statement_time.
#
# References:
# MDEV-27161: Add option for SQL thread to limit maximum execution time per
# query replicated
#
--source include/have_innodb.inc
--source include/master-slave.inc
--echo #
--echo # Set up
--echo #
--connection master
SET STATEMENT sql_log_bin=0 FOR CALL mtr.add_suppression("Unsafe statement written to the binary log using statement format");
SET STATEMENT sql_log_bin=0 FOR CALL mtr.add_suppression("Unsafe statement written to the binary log using statement format");
--connection slave
SET STATEMENT sql_log_bin=0 FOR CALL mtr.add_suppression("Slave log event execution was interrupted");
SET STATEMENT sql_log_bin=0 FOR CALL mtr.add_suppression("Unsafe statement written to the binary log using statement format");
SET @save_slave_max_statement_time=@@GLOBAL.slave_max_statement_time;
--let $with_lock= 1
--echo #
--echo # Test Case 1) Using a serial slave, the SQL thread should time out when
--echo # its underlying event executes for longer than @@slave_max_statement_time.
--echo #
--source include/rpl_slave_max_statement_time.inc
--echo #
--echo # Test Case 2) Using a parallel slave, a worker thread should time out
--echo # when its underlying event executes for longer than
--echo # @@slave_max_statement_time
--echo #
--source include/stop_slave.inc
SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads;
SET @old_parallel_mode=@@GLOBAL.slave_parallel_mode;
SET GLOBAL slave_parallel_threads=2;
SET GLOBAL slave_parallel_mode='optimistic';
--source include/start_slave.inc
--source include/rpl_slave_max_statement_time.inc
--source include/stop_slave.inc
SET GLOBAL slave_parallel_mode=@old_parallel_mode;
SET GLOBAL slave_parallel_threads=@old_parallel_threads;
--source include/start_slave.inc
--echo #
--echo # Test Case 3) Load-based log events (from LOAD DATA INFILE) should time
--echo # out if their execution time exceeds @@slave_max_statement_time
--echo #
--let $use_load_data= 1
--source include/rpl_slave_max_statement_time.inc
--let $use_load_data=
--echo #
--echo # Test Case 4) Locally executed long running statements should not time
--echo # out due to @@slave_max_statement_time
--echo #
--connection slave
--source include/stop_slave.inc
SET @old_slave_max_statement_time=@@GLOBAL.slave_max_statement_time;
SET @old_gtid_domain_id=@@GLOBAL.gtid_domain_id;
SET @@GLOBAL.slave_max_statement_time=0.75;
SET @@GLOBAL.gtid_domain_id=1;
--source include/start_slave.inc
CREATE TABLE t2 (a int);
SET STATEMENT sql_log_bin=0 FOR INSERT INTO t2 SELECT SLEEP(1);
--let $t2_count= `SELECT COUNT(*) FROM t2`
if ($t2_count != 1)
{
--die Local long running insert statement should have completed
}
DROP TABLE t2;
--source include/stop_slave.inc
SET GLOBAL gtid_domain_id=@old_gtid_domain_id;
SET GLOBAL slave_max_statement_time=@old_slave_max_statement_time;
--source include/start_slave.inc
--echo # Cleanup
--source include/stop_slave.inc
SET GLOBAL slave_max_statement_time=@save_slave_max_statement_time;
--source include/start_slave.inc
--source include/rpl_end.inc
--echo # End of rpl_slave_max_statement_time.test

View File

@@ -3722,6 +3722,16 @@ NUMERIC_BLOCK_SIZE 1024
ENUM_VALUE_LIST NULL
READ_ONLY NO
COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME SLAVE_MAX_STATEMENT_TIME
VARIABLE_SCOPE GLOBAL
VARIABLE_TYPE DOUBLE
VARIABLE_COMMENT A query that has taken more than slave_max_statement_time seconds to run on the slave will be aborted. The argument will be treated as a decimal value with microsecond precision. A value of 0 (default) means no timeout
NUMERIC_MIN_VALUE 0
NUMERIC_MAX_VALUE 31536000
NUMERIC_BLOCK_SIZE NULL
ENUM_VALUE_LIST NULL
READ_ONLY NO
COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME SLAVE_NET_TIMEOUT
VARIABLE_SCOPE GLOBAL
VARIABLE_TYPE INT UNSIGNED

View File

@@ -200,7 +200,7 @@ static void inline slave_rows_error_report(enum loglevel level, int ha_error,
err->get_sql_errno());
}
if (ha_error != 0)
if (ha_error != 0 && !thd->killed)
rli->report(level, errcode, rgi->gtid_info(),
"Could not execute %s event on table %s.%s;"
"%s handler error %s; "
@@ -5728,6 +5728,13 @@ int Rows_log_event::do_apply_event(rpl_group_info *rgi)
*/
DBUG_ASSERT(rgi->thd == thd);
/*
Where a Query_log_event can rely on the normal command execution logic to
set/reset the slave thread's timer; a Rows_log_event update needs to set
the timer itself
*/
thd->set_query_timer();
/*
If there is no locks taken, this is the first binrow event seen
after the table map events. We should then lock all the tables
@@ -6120,6 +6127,12 @@ int Rows_log_event::do_apply_event(rpl_group_info *rgi)
if (likely(error == 0) && !transactional_table)
thd->transaction->all.modified_non_trans_table=
thd->transaction->stmt.modified_non_trans_table= TRUE;
if (likely(error == 0))
{
error= thd->killed_errno();
if (error && !thd->is_error())
my_error(error, MYF(0));
}
} // row processing loop
while (error == 0 && (m_curr_row != m_rows_end));
@@ -6189,11 +6202,13 @@ int Rows_log_event::do_apply_event(rpl_group_info *rgi)
free_root(thd->mem_root, MYF(MY_KEEP_PREALLOC));
}
thd->reset_query_timer();
DBUG_RETURN(error);
err:
restore_empty_query_table_list(thd->lex);
rgi->slave_close_thread_tables(thd);
thd->reset_query_timer();
DBUG_RETURN(error);
}

View File

@@ -461,6 +461,8 @@ ulonglong binlog_cache_size=0;
ulonglong binlog_file_cache_size=0;
ulonglong max_binlog_cache_size=0;
ulong slave_max_allowed_packet= 0;
double slave_max_statement_time_double;
ulonglong slave_max_statement_time;
ulonglong binlog_stmt_cache_size=0;
ulonglong max_binlog_stmt_cache_size=0;
ulonglong test_flags;
@@ -8718,6 +8720,8 @@ static int get_options(int *argc_ptr, char ***argv_ptr)
max_relay_log_size_var->option.def_value=
max_binlog_size_var->option.def_value;
}
slave_max_statement_time=
double2ulonglong(slave_max_statement_time_double * 1e6);
}
#endif

View File

@@ -244,6 +244,8 @@ extern ulonglong binlog_cache_size, binlog_stmt_cache_size, binlog_file_cache_si
extern ulonglong max_binlog_cache_size, max_binlog_stmt_cache_size;
extern ulong max_binlog_size;
extern ulong slave_max_allowed_packet;
extern ulonglong slave_max_statement_time;
extern double slave_max_statement_time_double;
extern ulong opt_binlog_rows_event_max_size;
extern ulong binlog_row_metadata;
extern ulong thread_cache_size;

View File

@@ -570,6 +570,8 @@ constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_SLAVE_EXEC_MODE=
REPL_SLAVE_ADMIN_ACL | SUPER_ACL;
constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_SLAVE_MAX_ALLOWED_PACKET=
REPL_SLAVE_ADMIN_ACL | SUPER_ACL;
constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_SLAVE_MAX_STATEMENT_TIME=
REPL_SLAVE_ADMIN_ACL;
constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_SLAVE_NET_TIMEOUT=
REPL_SLAVE_ADMIN_ACL | SUPER_ACL;
constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_SLAVE_PARALLEL_MAX_QUEUED=

View File

@@ -10078,3 +10078,5 @@ WARN_OPTION_CHANGING
eng "%s is implicitly changing the value of '%s' from '%s' to '%s'"
ER_CM_OPTION_MISSING_REQUIREMENT
eng "CHANGE MASTER TO option '%s=%s' is missing requirement %s"
ER_SLAVE_STATEMENT_TIMEOUT 70100
eng "Slave log event execution was interrupted (slave_max_statement_time exceeded)"

View File

@@ -2095,7 +2095,8 @@ int THD::killed_errno()
DBUG_RETURN(ER_QUERY_INTERRUPTED);
case KILL_TIMEOUT:
case KILL_TIMEOUT_HARD:
DBUG_RETURN(ER_STATEMENT_TIMEOUT);
DBUG_RETURN(slave_thread ?
ER_SLAVE_STATEMENT_TIMEOUT : ER_STATEMENT_TIMEOUT);
case KILL_SERVER:
case KILL_SERVER_HARD:
DBUG_RETURN(ER_SERVER_SHUTDOWN);

View File

@@ -5431,24 +5431,29 @@ public:
void set_query_timer()
{
#ifndef EMBEDDED_LIBRARY
/*
Slave vs user threads have timeouts configured via different variables,
so pick the appropriate one to use.
*/
ulonglong timeout_val=
slave_thread ? slave_max_statement_time : variables.max_statement_time;
/*
Don't start a query timer if
- If timeouts are not set
- if we are in a stored procedure or sub statement
- If this is a slave thread
- If we already have set a timeout (happens when running prepared
statements that calls mysql_execute_command())
*/
if (!variables.max_statement_time || spcont || in_sub_stmt ||
slave_thread || query_timer.expired == 0)
if (!timeout_val || spcont || in_sub_stmt || query_timer.expired == 0)
return;
thr_timer_settime(&query_timer, variables.max_statement_time);
thr_timer_settime(&query_timer, timeout_val);
#endif
}
void reset_query_timer()
{
#ifndef EMBEDDED_LIBRARY
if (spcont || in_sub_stmt || slave_thread)
if (spcont || in_sub_stmt)
return;
if (!query_timer.expired)
thr_timer_end(&query_timer);

View File

@@ -2455,6 +2455,27 @@ Sys_gtid_ignore_duplicates(
DEFAULT(FALSE), NO_MUTEX_GUARD,
NOT_IN_BINLOG, ON_CHECK(check_gtid_ignore_duplicates),
ON_UPDATE(fix_gtid_ignore_duplicates));
static bool
update_slave_max_statement_time(sys_var *self, THD *thd, enum_var_type type)
{
slave_max_statement_time=
double2ulonglong(slave_max_statement_time_double * 1e6);
return false;
}
static Sys_var_on_access_global<
Sys_var_double, PRIV_SET_SYSTEM_GLOBAL_VAR_SLAVE_MAX_STATEMENT_TIME>
Sys_slave_max_statement_time(
"slave_max_statement_time",
"A query that has taken more than slave_max_statement_time seconds to "
"run on the slave will be aborted. The argument will be treated as a "
"decimal value with microsecond precision. A value of 0 (default) "
"means no timeout",
GLOBAL_VAR(slave_max_statement_time_double), CMD_LINE(REQUIRED_ARG),
VALID_RANGE(0, LONG_TIMEOUT), DEFAULT(0), NO_MUTEX_GUARD,
NOT_IN_BINLOG, ON_CHECK(0), ON_UPDATE(update_slave_max_statement_time));
#endif