mirror of
https://github.com/MariaDB/server.git
synced 2025-08-08 11:22:35 +03:00
MDEV-12746 rpl.rpl_parallel_optimistic_nobinlog fails committing
out of order at retry The test failures were of two sorts. One is that the number of retries what the slave thought as a temporary error exceeded the default value of the slave retry option. The 2nd issue was an out of order commit by transactions that were supposed to error out instead. Both issues are caused by the same reason that the post-temporary-error retry did not check possibly already existing error status. This is mended with refining conditions to retry. Specifically, a retrying worker checks `rpl_parallel_entry::stop_on_error_sub_id` that a potential failing predecessor could set to its own sub id. Now should the member be set the retrying follower errors out with ER_PRIOR_COMMIT_FAILED.
This commit is contained in:
@@ -128,6 +128,7 @@ SELECT * FROM t1 ORDER BY a;
|
||||
SET sql_log_bin=0;
|
||||
CALL mtr.add_suppression("Slave worker thread retried transaction 10 time\\(s\\) in vain, giving up");
|
||||
CALL mtr.add_suppression("Slave: Deadlock found when trying to get lock; try restarting transaction");
|
||||
CALL mtr.add_suppression("Slave worker thread retried transaction .* in vain, giving up");
|
||||
SET sql_log_bin=1;
|
||||
|
||||
SET @old_dbug= @@GLOBAL.debug_dbug;
|
||||
@@ -371,7 +372,7 @@ SELECT * FROM t3 ORDER BY a;
|
||||
SET binlog_format=@old_format;
|
||||
|
||||
|
||||
# Clean up.
|
||||
# Clean up of the above part.
|
||||
--connection server_2
|
||||
--source include/stop_slave.inc
|
||||
SET GLOBAL slave_parallel_threads=@old_parallel_threads;
|
||||
@@ -381,4 +382,102 @@ SET GLOBAL slave_parallel_threads=@old_parallel_threads;
|
||||
DROP TABLE t1, t2, t3, t4;
|
||||
DROP function foo;
|
||||
|
||||
--sync_slave_with_master server_2
|
||||
|
||||
#
|
||||
# MDEV-12746 rpl.rpl_parallel_optimistic_nobinlog fails committing out of order at retry
|
||||
#
|
||||
|
||||
--connection server_1
|
||||
CREATE TABLE t1 (a int PRIMARY KEY, b INT) ENGINE=InnoDB;
|
||||
|
||||
|
||||
# Replicate create-t1 and prepare to re-start slave in optimistic mode
|
||||
--sync_slave_with_master server_2
|
||||
--source include/stop_slave.inc
|
||||
SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads;
|
||||
SET @@GLOBAL.slave_parallel_threads=5;
|
||||
SET @old_parallel_mode=@@GLOBAL.slave_parallel_mode;
|
||||
SET @@GLOBAL.slave_parallel_mode='aggressive';
|
||||
SET @old_lock_wait_timeout=@@GLOBAL.innodb_lock_wait_timeout;
|
||||
SET @@GLOBAL.innodb_lock_wait_timeout=2;
|
||||
SET @old_slave_transaction_retries=@@GLOBAL.slave_transaction_retries;
|
||||
SET @@GLOBAL.slave_transaction_retries=1;
|
||||
|
||||
--echo # Spoilers on the slave side causing temporary errors
|
||||
--connect (spoiler_21,127.0.0.1,root,,test,$SLAVE_MYPORT)
|
||||
BEGIN;
|
||||
INSERT INTO t1 SET a=1,b=2;
|
||||
|
||||
--connect (spoiler_22,127.0.0.1,root,,test,$SLAVE_MYPORT)
|
||||
BEGIN;
|
||||
INSERT INTO t1 SET a=2,b=2;
|
||||
|
||||
--echo # Master payload
|
||||
--connection server_1
|
||||
SET @@SESSION.GTID_SEQ_NO=1000;
|
||||
INSERT INTO t1 SET a=1,b=1;
|
||||
SET @@SESSION.GTID_SEQ_NO=1001;
|
||||
INSERT INTO t1 SET a=2,b=1;
|
||||
|
||||
--echo # Start slave whose both appliers is destined to being blocked
|
||||
--connection server_2
|
||||
SET @old_dbug= @@GLOBAL.debug_dbug;
|
||||
SET @@GLOBAL.debug_dbug="+d,rpl_parallel_simulate_wait_at_retry";
|
||||
--source include/start_slave.inc
|
||||
|
||||
--echo # Make sure the 2nd seqno_1001 worker has gotten to waiting
|
||||
--let $wait_condition= SELECT count(*) FROM information_schema.processlist WHERE state LIKE '%debug sync point: now%';
|
||||
--source include/wait_condition.inc
|
||||
|
||||
|
||||
--echo # Signal to the 1st to proceed after it has reached termination state
|
||||
SET @@DEBUG_SYNC='now SIGNAL proceed_by_1000';
|
||||
--connection spoiler_21
|
||||
ROLLBACK;
|
||||
|
||||
--echo # Release the 2nd worker to proceed
|
||||
--connection spoiler_22
|
||||
ROLLBACK;
|
||||
--connection server_2
|
||||
SET @@DEBUG_SYNC='now SIGNAL proceed_by_1001';
|
||||
|
||||
--echo # observe how it all ends
|
||||
if (`SELECT count(*) = 1 FROM t1 WHERE a = 1`)
|
||||
{
|
||||
--echo "*** Unexpected commit by the first Worker ***"
|
||||
SELECT * from t1;
|
||||
--die
|
||||
}
|
||||
|
||||
--echo # Wait for the workers to go home and check the result of applying
|
||||
--let $wait_condition=SELECT count(*) = 0 FROM information_schema.processlist WHERE command = 'Slave_worker'
|
||||
--source include/wait_condition.inc
|
||||
if (`SELECT count(*) = 1 FROM t1 WHERE a = 2`)
|
||||
{
|
||||
--echo
|
||||
--echo "*** Error: congrats, you hit MDEV-12746 issue. ***"
|
||||
--echo
|
||||
--die
|
||||
}
|
||||
--echo # which is OK
|
||||
|
||||
#
|
||||
# Clean up
|
||||
#
|
||||
--connection server_2
|
||||
--source include/stop_slave.inc
|
||||
SET @@GLOBAL.slave_parallel_threads=@old_parallel_threads;
|
||||
SET @@GLOBAL.slave_parallel_mode=@old_parallel_mode;
|
||||
SET @@GLOBAL.innodb_lock_wait_timeout=@old_lock_wait_timeout;
|
||||
SET @@GLOBAL.slave_transaction_retries=@old_slave_transaction_retries;
|
||||
SET @@GLOBAL.debug_dbug=@old_dbug;
|
||||
SET debug_sync='RESET';
|
||||
--source include/start_slave.inc
|
||||
|
||||
--connection server_1
|
||||
DROP TABLE t1;
|
||||
|
||||
--sync_slave_with_master server_2
|
||||
|
||||
--source include/rpl_end.inc
|
||||
|
Reference in New Issue
Block a user