mirror of
https://github.com/MariaDB/server.git
synced 2025-09-02 09:41:40 +03:00
out of order at retry The test failures were of two sorts. One is that the number of retries what the slave thought as a temporary error exceeded the default value of the slave retry option. The 2nd issue was an out of order commit by transactions that were supposed to error out instead. Both issues are caused by the same reason that the post-temporary-error retry did not check possibly already existing error status. This is mended with refining conditions to retry. Specifically, a retrying worker checks `rpl_parallel_entry::stop_on_error_sub_id` that a potential failing predecessor could set to its own sub id. Now should the member be set the retrying follower errors out with ER_PRIOR_COMMIT_FAILED.
484 lines
15 KiB
Plaintext
484 lines
15 KiB
Plaintext
--source include/have_innodb.inc
|
|
--source include/have_debug.inc
|
|
--source include/have_debug_sync.inc
|
|
--let $rpl_topology=1->2
|
|
--source include/rpl_init.inc
|
|
|
|
--echo *** Test retry of transactions that fail to replicate due to deadlock or similar temporary error. ***
|
|
|
|
--connection server_1
|
|
ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB;
|
|
CREATE TABLE t1 (a int PRIMARY KEY, b INT) ENGINE=InnoDB;
|
|
INSERT INTO t1 VALUES (1,1);
|
|
--save_master_pos
|
|
|
|
# Use a stored function to inject a debug_sync into the appropriate THD.
|
|
# The function does nothing on the master, and on the slave it injects the
|
|
# desired debug_sync action(s).
|
|
SET sql_log_bin=0;
|
|
--delimiter ||
|
|
CREATE FUNCTION foo(x INT, d1 VARCHAR(500), d2 VARCHAR(500))
|
|
RETURNS INT DETERMINISTIC
|
|
BEGIN
|
|
RETURN x;
|
|
END
|
|
||
|
|
--delimiter ;
|
|
SET sql_log_bin=1;
|
|
|
|
--connection server_2
|
|
SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads;
|
|
--source include/stop_slave.inc
|
|
SET GLOBAL slave_parallel_threads=5;
|
|
--source include/start_slave.inc
|
|
--sync_with_master
|
|
SET sql_log_bin=0;
|
|
--delimiter ||
|
|
CREATE FUNCTION foo(x INT, d1 VARCHAR(500), d2 VARCHAR(500))
|
|
RETURNS INT DETERMINISTIC
|
|
BEGIN
|
|
IF d1 != '' THEN
|
|
SET debug_sync = d1;
|
|
END IF;
|
|
IF d2 != '' THEN
|
|
SET debug_sync = d2;
|
|
END IF;
|
|
RETURN x;
|
|
END
|
|
||
|
|
--delimiter ;
|
|
SET sql_log_bin=1;
|
|
--source include/stop_slave.inc
|
|
|
|
--connection server_1
|
|
SET gtid_seq_no = 100;
|
|
BEGIN;
|
|
INSERT INTO t1 VALUES (2,1);
|
|
UPDATE t1 SET b=b+1 WHERE a=1;
|
|
INSERT INTO t1 VALUES (3,1);
|
|
COMMIT;
|
|
SELECT * FROM t1 ORDER BY a;
|
|
--save_master_pos
|
|
|
|
--connection server_2
|
|
SET @old_dbug= @@GLOBAL.debug_dbug;
|
|
SET GLOBAL debug_dbug="+d,rpl_parallel_simulate_temp_err_gtid_0_x_100";
|
|
let $old_retry= query_get_value(SHOW STATUS LIKE 'Slave_retried_transactions', Value, 1);
|
|
--source include/start_slave.inc
|
|
--sync_with_master
|
|
SET GLOBAL debug_dbug=@old_dbug;
|
|
let $new_retry= query_get_value(SHOW STATUS LIKE 'Slave_retried_transactions', Value, 1);
|
|
--disable_query_log
|
|
eval SELECT $new_retry - $old_retry AS retries;
|
|
--enable_query_log
|
|
|
|
SELECT * FROM t1 ORDER BY a;
|
|
|
|
|
|
--echo *** Test that double retry works when the first retry also fails with temp error ***
|
|
--source include/stop_slave.inc
|
|
|
|
--connection server_1
|
|
SET gtid_seq_no = 100;
|
|
SET @old_server_id = @@server_id;
|
|
SET server_id = 10;
|
|
BEGIN;
|
|
INSERT INTO t1 VALUES (4,1);
|
|
UPDATE t1 SET b=b+1 WHERE a=1;
|
|
INSERT INTO t1 VALUES (5,1);
|
|
INSERT INTO t1 VALUES (6,1);
|
|
COMMIT;
|
|
SET server_id = @old_server_id;
|
|
SELECT * FROM t1 ORDER BY a;
|
|
--save_master_pos
|
|
|
|
--connection server_2
|
|
SET @old_dbug= @@GLOBAL.debug_dbug;
|
|
SET GLOBAL debug_dbug="+d,rpl_parallel_simulate_temp_err_gtid_0_x_100,rpl_parallel_simulate_double_temp_err_gtid_0_x_100";
|
|
let $old_retry= query_get_value(SHOW STATUS LIKE 'Slave_retried_transactions', Value, 1);
|
|
--source include/start_slave.inc
|
|
--sync_with_master
|
|
SET GLOBAL debug_dbug=@old_dbug;
|
|
let $new_retry= query_get_value(SHOW STATUS LIKE 'Slave_retried_transactions', Value, 1);
|
|
--disable_query_log
|
|
eval SELECT $new_retry - $old_retry AS retries;
|
|
--enable_query_log
|
|
|
|
SELECT * FROM t1 ORDER BY a;
|
|
|
|
|
|
--echo *** Test too many retries, eventually causing failure. ***
|
|
--source include/stop_slave.inc
|
|
|
|
--connection server_1
|
|
SET gtid_seq_no = 100;
|
|
SET @old_server_id = @@server_id;
|
|
SET server_id = 11;
|
|
BEGIN;
|
|
INSERT INTO t1 VALUES (7,1);
|
|
UPDATE t1 SET b=b+1 WHERE a=1;
|
|
INSERT INTO t1 VALUES (8,1);
|
|
INSERT INTO t1 VALUES (9,1);
|
|
COMMIT;
|
|
SET server_id = @old_server_id;
|
|
SELECT * FROM t1 ORDER BY a;
|
|
--save_master_pos
|
|
|
|
--connection server_2
|
|
SET sql_log_bin=0;
|
|
CALL mtr.add_suppression("Slave worker thread retried transaction 10 time\\(s\\) in vain, giving up");
|
|
CALL mtr.add_suppression("Slave: Deadlock found when trying to get lock; try restarting transaction");
|
|
CALL mtr.add_suppression("Slave worker thread retried transaction .* in vain, giving up");
|
|
SET sql_log_bin=1;
|
|
|
|
SET @old_dbug= @@GLOBAL.debug_dbug;
|
|
SET GLOBAL debug_dbug="+d,rpl_parallel_simulate_temp_err_gtid_0_x_100,rpl_parallel_simulate_infinite_temp_err_gtid_0_x_100";
|
|
let $old_retry= query_get_value(SHOW STATUS LIKE 'Slave_retried_transactions', Value, 1);
|
|
START SLAVE;
|
|
--let $slave_sql_errno= 1213
|
|
--source include/wait_for_slave_sql_error.inc
|
|
SET GLOBAL debug_dbug=@old_dbug;
|
|
let $new_retry= query_get_value(SHOW STATUS LIKE 'Slave_retried_transactions', Value, 1);
|
|
--disable_query_log
|
|
eval SELECT $new_retry - $old_retry AS retries;
|
|
--enable_query_log
|
|
|
|
SELECT * FROM t1 ORDER BY a;
|
|
STOP SLAVE IO_THREAD;
|
|
--source include/start_slave.inc
|
|
--sync_with_master
|
|
SELECT * FROM t1 ORDER BY a;
|
|
|
|
--echo *** Test retry of event group that spans multiple relay log files. ***
|
|
|
|
--connection server_1
|
|
CREATE TABLE t2 (a int PRIMARY KEY, b BLOB) ENGINE=InnoDB;
|
|
INSERT INTO t2 VALUES (1,"Hulubullu");
|
|
--save_master_pos
|
|
|
|
--connection server_2
|
|
--sync_with_master
|
|
--source include/stop_slave.inc
|
|
SET @old_max= @@GLOBAL.max_relay_log_size;
|
|
SET GLOBAL max_relay_log_size=4096;
|
|
|
|
--connection server_1
|
|
--let $big= `SELECT REPEAT("*", 5000)`
|
|
SET gtid_seq_no = 100;
|
|
SET @old_server_id = @@server_id;
|
|
SET server_id = 12;
|
|
BEGIN;
|
|
--disable_query_log
|
|
eval INSERT INTO t2 VALUES (2, CONCAT("Hello ", "$big"));
|
|
eval INSERT INTO t2 VALUES (3, CONCAT("Long data: ", "$big"));
|
|
--enable_query_log
|
|
INSERT INTO t1 VALUES (10, 4);
|
|
COMMIT;
|
|
SET server_id = @old_server_id;
|
|
SELECT * FROM t1 WHERE a >= 10 ORDER BY a;
|
|
SELECT a, LENGTH(b) FROM t2 ORDER BY a;
|
|
--save_master_pos
|
|
|
|
--connection server_2
|
|
SET @old_dbug= @@GLOBAL.debug_dbug;
|
|
SET GLOBAL debug_dbug="+d,rpl_parallel_simulate_temp_err_gtid_0_x_100";
|
|
let $old_retry= query_get_value(SHOW STATUS LIKE 'Slave_retried_transactions', Value, 1);
|
|
--source include/start_slave.inc
|
|
--sync_with_master
|
|
SET GLOBAL debug_dbug=@old_dbug;
|
|
let $new_retry= query_get_value(SHOW STATUS LIKE 'Slave_retried_transactions', Value, 1);
|
|
--disable_query_log
|
|
eval SELECT $new_retry - $old_retry AS retries;
|
|
--enable_query_log
|
|
|
|
SELECT * FROM t1 WHERE a >= 10 ORDER BY a;
|
|
SELECT a, LENGTH(b) FROM t2 ORDER BY a;
|
|
|
|
--connection server_1
|
|
INSERT INTO t1 VALUES (11,11);
|
|
--disable_query_log
|
|
eval INSERT INTO t2 VALUES (4, "$big");
|
|
--enable_query_log
|
|
--save_master_pos
|
|
|
|
--connection server_2
|
|
--sync_with_master
|
|
SELECT * FROM t1 WHERE a >= 10 ORDER BY a;
|
|
SELECT a, LENGTH(b) FROM t2 ORDER BY a;
|
|
SET GLOBAL max_relay_log_size=@old_max;
|
|
|
|
|
|
--echo *** MDEV-7065: Incorrect relay log position in parallel replication after retry of transaction ***
|
|
|
|
--connection server_2
|
|
--source include/stop_slave.inc
|
|
|
|
--connection server_1
|
|
BEGIN;
|
|
INSERT INTO t1 VALUES (100, 0);
|
|
INSERT INTO t1 VALUES (101, 0);
|
|
INSERT INTO t1 VALUES (102, 0);
|
|
INSERT INTO t1 VALUES (103, 0);
|
|
COMMIT;
|
|
SELECT * FROM t1 WHERE a >= 100 ORDER BY a;
|
|
--save_master_pos
|
|
|
|
--connection server_2
|
|
# Inject a DBUG error insert to cause the XID event of the single transaction
|
|
# from the master to fail with a deadlock error and be retried.
|
|
# The bug was that the retry of the XID would leave the relay log position
|
|
# incorrect (off by the size of XID event).
|
|
SET @old_dbug= @@GLOBAL.debug_dbug;
|
|
SET GLOBAL debug_dbug="+d,rpl_parallel_simulate_temp_err_xid";
|
|
let $old_retry= query_get_value(SHOW STATUS LIKE 'Slave_retried_transactions', Value, 1);
|
|
--source include/start_slave.inc
|
|
--sync_with_master
|
|
SET GLOBAL debug_dbug=@old_dbug;
|
|
let $new_retry= query_get_value(SHOW STATUS LIKE 'Slave_retried_transactions', Value, 1);
|
|
--disable_query_log
|
|
eval SELECT $new_retry - $old_retry AS retries;
|
|
--enable_query_log
|
|
|
|
SELECT * FROM t1 WHERE a >= 100 ORDER BY a;
|
|
# Stop the SQL thread. When the bug was there to give the incorrect relay log
|
|
# position, the restart of the SQL thread would read garbage data from the
|
|
# middle of an event and fail with relay log IO error.
|
|
--source include/stop_slave_sql.inc
|
|
|
|
--connection server_1
|
|
INSERT INTO t1 VALUES (104, 1);
|
|
INSERT INTO t1 VALUES (105, 1);
|
|
INSERT INTO t1 VALUES (106, 1);
|
|
INSERT INTO t1 VALUES (107, 1);
|
|
INSERT INTO t1 VALUES (108, 1);
|
|
INSERT INTO t1 VALUES (109, 1);
|
|
--save_master_pos
|
|
|
|
--connection server_2
|
|
--source include/start_slave.inc
|
|
--sync_with_master
|
|
SELECT * FROM t1 WHERE a >= 100 ORDER BY a;
|
|
|
|
|
|
--echo *** MDEV-6917: Parallel replication: "Commit failed due to failure of an earlier commit on which this one depends", but no prior failure seen **
|
|
|
|
--connection server_1
|
|
CREATE TABLE t3 (a INT PRIMARY KEY, b INT, KEY b_idx(b)) ENGINE=InnoDB;
|
|
INSERT INTO t3 VALUES (1,NULL), (2,2), (3,NULL), (4,4), (5, NULL), (6, 6);
|
|
CREATE TABLE t4 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB;
|
|
|
|
# We need statement binlog format to be able to inject debug_sync statements
|
|
# on the slave with calls to foo().
|
|
SET @old_format= @@SESSION.binlog_format;
|
|
SET binlog_format='statement';
|
|
--save_master_pos
|
|
|
|
--connection server_2
|
|
--sync_with_master
|
|
--source include/stop_slave.inc
|
|
CHANGE MASTER TO master_use_gtid=no;
|
|
|
|
--connection server_1
|
|
|
|
# Create a group commit with three transactions T1, T2, T3.
|
|
# T2 will block T1 on the slave where we will make it run first, so it will be
|
|
# deadlock killed.
|
|
# The bug was that in this case, T3 was signalled to fail due to T2 failing,
|
|
# even though the retry of T2 was later successful.
|
|
|
|
--connect (con1,127.0.0.1,root,,test,$SERVER_MYPORT_1,)
|
|
SET @old_format= @@SESSION.binlog_format;
|
|
SET binlog_format='statement';
|
|
BEGIN;
|
|
INSERT INTO t4 VALUES (10, foo(1, 'before_execute_sql_command WAIT_FOR t1_start', ''));
|
|
UPDATE t3 SET b=NULL WHERE a=6;
|
|
SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued1 WAIT_FOR master_cont1';
|
|
send COMMIT;
|
|
--connection server_1
|
|
SET debug_sync='now WAIT_FOR master_queued1';
|
|
|
|
--connect (con2,127.0.0.1,root,,test,$SERVER_MYPORT_1,)
|
|
SET @old_format= @@SESSION.binlog_format;
|
|
SET binlog_format='statement';
|
|
BEGIN;
|
|
INSERT INTO t4 VALUES (20, foo(2, 'group_commit_waiting_for_prior SIGNAL t2_waiting', ''));
|
|
DELETE FROM t3 WHERE b <= 3;
|
|
SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued2';
|
|
send COMMIT;
|
|
|
|
--connection server_1
|
|
SET debug_sync='now WAIT_FOR master_queued2';
|
|
|
|
--connect (con3,127.0.0.1,root,,test,$SERVER_MYPORT_1,)
|
|
SET @old_format= @@SESSION.binlog_format;
|
|
SET binlog_format='statement';
|
|
BEGIN;
|
|
INSERT INTO t4 VALUES (30, foo(3, 'before_execute_sql_command WAIT_FOR t3_start', 'group_commit_waiting_for_prior SIGNAL t3_waiting'));
|
|
INSERT INTO t3 VALUES (7,7);
|
|
SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued3';
|
|
send COMMIT;
|
|
|
|
--connection server_1
|
|
SET debug_sync='now WAIT_FOR master_queued3';
|
|
SET debug_sync='now SIGNAL master_cont1';
|
|
|
|
--connection con1
|
|
REAP;
|
|
SET binlog_format=@old_format;
|
|
--connection con2
|
|
REAP;
|
|
SET binlog_format=@old_format;
|
|
--connection con3
|
|
REAP;
|
|
SET debug_sync='RESET';
|
|
SET binlog_format=@old_format;
|
|
|
|
--connection server_1
|
|
--save_master_pos
|
|
SELECT * FROM t3 ORDER BY a;
|
|
|
|
|
|
--connection server_2
|
|
let $old_retry= query_get_value(SHOW STATUS LIKE 'Slave_retried_transactions', Value, 1);
|
|
SET @old_dbug=@@GLOBAL.debug_dbug;
|
|
SET GLOBAL debug_dbug="+d,thd_need_ordering_with_force";
|
|
--source include/start_slave.inc
|
|
# First, wait for T2 to complete up to where it is waiting for T1 to group
|
|
# commit for both of them. This will set locks that will block T1, causing
|
|
# a deadlock kill and retry of T2. T1 and T3 are still blocked at the start
|
|
# of each their SQL statements.
|
|
SET debug_sync='now WAIT_FOR t2_waiting';
|
|
# Now let T3 move on until the point where it is itself ready to commit.
|
|
SET debug_sync='now SIGNAL t3_start';
|
|
SET debug_sync='now WAIT_FOR t3_waiting';
|
|
# Now T2 and T3 are set up, so we can let T1 proceed.
|
|
SET debug_sync='now SIGNAL t1_start';
|
|
# Now we can wait for the slave to catch up.
|
|
# We should see T2 being deadlock killed and retried.
|
|
# The bug was that T2 deadlock kill would cause T3 to fail due to failure
|
|
# of an earlier commit. This is wrong as T2 did not fail, it was only
|
|
# retried.
|
|
--sync_with_master
|
|
SET GLOBAL debug_dbug=@old_dbug;
|
|
SET debug_sync='RESET';
|
|
let $new_retry= query_get_value(SHOW STATUS LIKE 'Slave_retried_transactions', Value, 1);
|
|
--disable_query_log
|
|
eval SELECT $new_retry - $old_retry >= 1 AS retries;
|
|
--enable_query_log
|
|
SELECT * FROM t3 ORDER BY a;
|
|
|
|
|
|
--connection server_1
|
|
SET binlog_format=@old_format;
|
|
|
|
|
|
# Clean up of the above part.
|
|
--connection server_2
|
|
--source include/stop_slave.inc
|
|
SET GLOBAL slave_parallel_threads=@old_parallel_threads;
|
|
--source include/start_slave.inc
|
|
|
|
--connection server_1
|
|
DROP TABLE t1, t2, t3, t4;
|
|
DROP function foo;
|
|
|
|
--sync_slave_with_master server_2
|
|
|
|
#
|
|
# MDEV-12746 rpl.rpl_parallel_optimistic_nobinlog fails committing out of order at retry
|
|
#
|
|
|
|
--connection server_1
|
|
CREATE TABLE t1 (a int PRIMARY KEY, b INT) ENGINE=InnoDB;
|
|
|
|
|
|
# Replicate create-t1 and prepare to re-start slave in optimistic mode
|
|
--sync_slave_with_master server_2
|
|
--source include/stop_slave.inc
|
|
SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads;
|
|
SET @@GLOBAL.slave_parallel_threads=5;
|
|
SET @old_parallel_mode=@@GLOBAL.slave_parallel_mode;
|
|
SET @@GLOBAL.slave_parallel_mode='aggressive';
|
|
SET @old_lock_wait_timeout=@@GLOBAL.innodb_lock_wait_timeout;
|
|
SET @@GLOBAL.innodb_lock_wait_timeout=2;
|
|
SET @old_slave_transaction_retries=@@GLOBAL.slave_transaction_retries;
|
|
SET @@GLOBAL.slave_transaction_retries=1;
|
|
|
|
--echo # Spoilers on the slave side causing temporary errors
|
|
--connect (spoiler_21,127.0.0.1,root,,test,$SLAVE_MYPORT)
|
|
BEGIN;
|
|
INSERT INTO t1 SET a=1,b=2;
|
|
|
|
--connect (spoiler_22,127.0.0.1,root,,test,$SLAVE_MYPORT)
|
|
BEGIN;
|
|
INSERT INTO t1 SET a=2,b=2;
|
|
|
|
--echo # Master payload
|
|
--connection server_1
|
|
SET @@SESSION.GTID_SEQ_NO=1000;
|
|
INSERT INTO t1 SET a=1,b=1;
|
|
SET @@SESSION.GTID_SEQ_NO=1001;
|
|
INSERT INTO t1 SET a=2,b=1;
|
|
|
|
--echo # Start slave whose both appliers is destined to being blocked
|
|
--connection server_2
|
|
SET @old_dbug= @@GLOBAL.debug_dbug;
|
|
SET @@GLOBAL.debug_dbug="+d,rpl_parallel_simulate_wait_at_retry";
|
|
--source include/start_slave.inc
|
|
|
|
--echo # Make sure the 2nd seqno_1001 worker has gotten to waiting
|
|
--let $wait_condition= SELECT count(*) FROM information_schema.processlist WHERE state LIKE '%debug sync point: now%';
|
|
--source include/wait_condition.inc
|
|
|
|
|
|
--echo # Signal to the 1st to proceed after it has reached termination state
|
|
SET @@DEBUG_SYNC='now SIGNAL proceed_by_1000';
|
|
--connection spoiler_21
|
|
ROLLBACK;
|
|
|
|
--echo # Release the 2nd worker to proceed
|
|
--connection spoiler_22
|
|
ROLLBACK;
|
|
--connection server_2
|
|
SET @@DEBUG_SYNC='now SIGNAL proceed_by_1001';
|
|
|
|
--echo # observe how it all ends
|
|
if (`SELECT count(*) = 1 FROM t1 WHERE a = 1`)
|
|
{
|
|
--echo "*** Unexpected commit by the first Worker ***"
|
|
SELECT * from t1;
|
|
--die
|
|
}
|
|
|
|
--echo # Wait for the workers to go home and check the result of applying
|
|
--let $wait_condition=SELECT count(*) = 0 FROM information_schema.processlist WHERE command = 'Slave_worker'
|
|
--source include/wait_condition.inc
|
|
if (`SELECT count(*) = 1 FROM t1 WHERE a = 2`)
|
|
{
|
|
--echo
|
|
--echo "*** Error: congrats, you hit MDEV-12746 issue. ***"
|
|
--echo
|
|
--die
|
|
}
|
|
--echo # which is OK
|
|
|
|
#
|
|
# Clean up
|
|
#
|
|
--connection server_2
|
|
--source include/stop_slave.inc
|
|
SET @@GLOBAL.slave_parallel_threads=@old_parallel_threads;
|
|
SET @@GLOBAL.slave_parallel_mode=@old_parallel_mode;
|
|
SET @@GLOBAL.innodb_lock_wait_timeout=@old_lock_wait_timeout;
|
|
SET @@GLOBAL.slave_transaction_retries=@old_slave_transaction_retries;
|
|
SET @@GLOBAL.debug_dbug=@old_dbug;
|
|
SET debug_sync='RESET';
|
|
--source include/start_slave.inc
|
|
|
|
--connection server_1
|
|
DROP TABLE t1;
|
|
|
|
--sync_slave_with_master server_2
|
|
|
|
--source include/rpl_end.inc
|