mirror of
https://github.com/MariaDB/server.git
synced 2025-08-07 00:04:31 +03:00
MDEV-31655: Parallel replication deadlock victim preference code errorneously removed
Restore code to make InnoDB choose the second transaction as a deadlock victim if two transactions deadlock that need to commit in-order for parallel replication. This code was erroneously removed when VATS was implemented in InnoDB. Also add a test case for InnoDB choosing the right deadlock victim. Also fixes this bug, with testcase that reliably reproduces: MDEV-28776: rpl.rpl_mark_optimize_tbl_ddl fails with timeout on sync_with_master Reviewed-by: Marko Mäkelä <marko.makela@mariadb.com> Signed-off-by: Kristian Nielsen <knielsen@knielsen-hq.org>
This commit is contained in:
51
mysql-test/suite/rpl/r/rpl_parallel_deadlock_victim.result
Normal file
51
mysql-test/suite/rpl/r/rpl_parallel_deadlock_victim.result
Normal file
@@ -0,0 +1,51 @@
|
||||
include/master-slave.inc
|
||||
[connection master]
|
||||
MDEV-31655: Parallel replication deadlock victim preference code erroneously removed
|
||||
connection server_1;
|
||||
ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB;
|
||||
CREATE TABLE t1 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB;
|
||||
BEGIN;
|
||||
COMMIT;
|
||||
include/save_master_gtid.inc
|
||||
connection server_2;
|
||||
include/sync_with_master_gtid.inc
|
||||
include/stop_slave.inc
|
||||
SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads;
|
||||
SET @old_parallel_mode=@@GLOBAL.slave_parallel_mode;
|
||||
set @@global.slave_parallel_threads= 5;
|
||||
set @@global.slave_parallel_mode= conservative;
|
||||
SET @old_dbug= @@GLOBAL.debug_dbug;
|
||||
SET GLOBAL debug_dbug= "+d,rpl_mdev31655_zero_retries";
|
||||
connection server_1;
|
||||
SET @old_dbug= @@SESSION.debug_dbug;
|
||||
SET SESSION debug_dbug="+d,binlog_force_commit_id";
|
||||
SET @commit_id= 1+1000;
|
||||
SET @commit_id= 2+1000;
|
||||
SET @commit_id= 3+1000;
|
||||
SET @commit_id= 4+1000;
|
||||
SET @commit_id= 5+1000;
|
||||
SET @commit_id= 6+1000;
|
||||
SET @commit_id= 7+1000;
|
||||
SET @commit_id= 8+1000;
|
||||
SET @commit_id= 9+1000;
|
||||
SET @commit_id= 10+1000;
|
||||
SET SESSION debug_dbug= @old_dbug;
|
||||
SELECT COUNT(*), SUM(a*100*b) FROM t1;
|
||||
COUNT(*) SUM(a*100*b)
|
||||
10 225000
|
||||
include/save_master_gtid.inc
|
||||
connection server_2;
|
||||
include/start_slave.inc
|
||||
include/sync_with_master_gtid.inc
|
||||
SET GLOBAL debug_dbug= @old_dbug;
|
||||
SELECT COUNT(*), SUM(a*100*b) FROM t1;
|
||||
COUNT(*) SUM(a*100*b)
|
||||
10 225000
|
||||
connection server_2;
|
||||
include/stop_slave.inc
|
||||
SET GLOBAL slave_parallel_threads=@old_parallel_threads;
|
||||
SET GLOBAL slave_parallel_mode=@old_parallel_mode;
|
||||
include/start_slave.inc
|
||||
connection server_1;
|
||||
DROP TABLE t1;
|
||||
include/rpl_end.inc
|
50
mysql-test/suite/rpl/r/rpl_parallel_deadlock_victim2.result
Normal file
50
mysql-test/suite/rpl/r/rpl_parallel_deadlock_victim2.result
Normal file
@@ -0,0 +1,50 @@
|
||||
include/master-slave.inc
|
||||
[connection master]
|
||||
connection master;
|
||||
ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB;
|
||||
CREATE TABLE t1(a INT) ENGINE=INNODB;
|
||||
INSERT INTO t1 VALUES(1);
|
||||
include/save_master_gtid.inc
|
||||
connection slave;
|
||||
include/sync_with_master_gtid.inc
|
||||
include/stop_slave.inc
|
||||
set @@global.slave_parallel_threads= 2;
|
||||
set @@global.slave_parallel_mode= OPTIMISTIC;
|
||||
set @@global.slave_transaction_retries= 2;
|
||||
*** MDEV-28776: rpl.rpl_mark_optimize_tbl_ddl fails with timeout on sync_with_master
|
||||
connection master;
|
||||
SET @@gtid_seq_no= 100;
|
||||
INSERT INTO t1 SELECT 1+a FROM t1;
|
||||
SET @@gtid_seq_no= 200;
|
||||
INSERT INTO t1 SELECT 2+a FROM t1;
|
||||
SELECT * FROM t1 ORDER BY a;
|
||||
a
|
||||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
include/save_master_gtid.inc
|
||||
connection slave;
|
||||
SET @save_dbug= @@GLOBAL.debug_dbug;
|
||||
SET GLOBAL debug_dbug="+d,rpl_parallel_delay_gtid_0_x_100_start";
|
||||
SET GLOBAL debug_dbug="+d,rpl_write_record_small_sleep_gtid_100_200";
|
||||
SET GLOBAL debug_dbug="+d,small_sleep_after_lock_wait";
|
||||
SET GLOBAL debug_dbug="+d,rpl_delay_deadlock_kill";
|
||||
include/start_slave.inc
|
||||
include/sync_with_master_gtid.inc
|
||||
SET GLOBAL debug_dbug= @save_dbug;
|
||||
SELECT * FROM t1 ORDER BY a;
|
||||
a
|
||||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
connection slave;
|
||||
include/stop_slave.inc
|
||||
SET @@global.slave_parallel_threads= 0;
|
||||
SET @@global.slave_parallel_mode= optimistic;
|
||||
SET @@global.slave_transaction_retries= 10;
|
||||
include/start_slave.inc
|
||||
connection master;
|
||||
DROP TABLE t1;
|
||||
include/rpl_end.inc
|
86
mysql-test/suite/rpl/t/rpl_parallel_deadlock_victim.test
Normal file
86
mysql-test/suite/rpl/t/rpl_parallel_deadlock_victim.test
Normal file
@@ -0,0 +1,86 @@
|
||||
--source include/have_innodb.inc
|
||||
--source include/have_debug.inc
|
||||
--source include/master-slave.inc
|
||||
|
||||
|
||||
--echo MDEV-31655: Parallel replication deadlock victim preference code erroneously removed
|
||||
# The problem was that InnoDB would choose the wrong deadlock victim.
|
||||
# Create a lot of transactions that can cause deadlocks, and use error
|
||||
# injection to check that the first transactions in each group is never
|
||||
# selected as deadlock victim.
|
||||
--let $rows= 10
|
||||
--let $transactions= 5
|
||||
--let $gcos= 10
|
||||
|
||||
--connection server_1
|
||||
ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB;
|
||||
CREATE TABLE t1 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB;
|
||||
BEGIN;
|
||||
--disable_query_log
|
||||
--let $i= 0
|
||||
while ($i < 10) {
|
||||
eval INSERT INTO t1 VALUES ($i, 0);
|
||||
inc $i;
|
||||
}
|
||||
--enable_query_log
|
||||
COMMIT;
|
||||
--source include/save_master_gtid.inc
|
||||
|
||||
--connection server_2
|
||||
--source include/sync_with_master_gtid.inc
|
||||
--source include/stop_slave.inc
|
||||
SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads;
|
||||
SET @old_parallel_mode=@@GLOBAL.slave_parallel_mode;
|
||||
eval set @@global.slave_parallel_threads= $transactions;
|
||||
set @@global.slave_parallel_mode= conservative;
|
||||
SET @old_dbug= @@GLOBAL.debug_dbug;
|
||||
# This error injection will allow no retries for GTIDs divisible by 1000.
|
||||
SET GLOBAL debug_dbug= "+d,rpl_mdev31655_zero_retries";
|
||||
|
||||
--connection server_1
|
||||
SET @old_dbug= @@SESSION.debug_dbug;
|
||||
SET SESSION debug_dbug="+d,binlog_force_commit_id";
|
||||
|
||||
--let $j= 1
|
||||
while ($j <= $gcos) {
|
||||
eval SET @commit_id= $j+1000;
|
||||
--let $i= 0
|
||||
while ($i < $transactions) {
|
||||
--disable_query_log
|
||||
eval SET SESSION gtid_seq_no= 1000 + 1000*$j + $i;
|
||||
BEGIN;
|
||||
--let $k= 0
|
||||
while ($k < $rows) {
|
||||
eval UPDATE t1 SET b=b+1 WHERE a=(($i+$k) MOD $rows);
|
||||
inc $k;
|
||||
}
|
||||
COMMIT;
|
||||
--enable_query_log
|
||||
inc $i;
|
||||
}
|
||||
inc $j;
|
||||
}
|
||||
|
||||
SET SESSION debug_dbug= @old_dbug;
|
||||
SELECT COUNT(*), SUM(a*100*b) FROM t1;
|
||||
|
||||
--source include/save_master_gtid.inc
|
||||
|
||||
--connection server_2
|
||||
--source include/start_slave.inc
|
||||
--source include/sync_with_master_gtid.inc
|
||||
SET GLOBAL debug_dbug= @old_dbug;
|
||||
SELECT COUNT(*), SUM(a*100*b) FROM t1;
|
||||
|
||||
|
||||
# Clean up.
|
||||
--connection server_2
|
||||
--source include/stop_slave.inc
|
||||
SET GLOBAL slave_parallel_threads=@old_parallel_threads;
|
||||
SET GLOBAL slave_parallel_mode=@old_parallel_mode;
|
||||
--source include/start_slave.inc
|
||||
|
||||
--connection server_1
|
||||
DROP TABLE t1;
|
||||
|
||||
--source include/rpl_end.inc
|
83
mysql-test/suite/rpl/t/rpl_parallel_deadlock_victim2.test
Normal file
83
mysql-test/suite/rpl/t/rpl_parallel_deadlock_victim2.test
Normal file
@@ -0,0 +1,83 @@
|
||||
--source include/master-slave.inc
|
||||
--source include/have_innodb.inc
|
||||
--source include/have_debug.inc
|
||||
--source include/have_binlog_format_statement.inc
|
||||
|
||||
--connection master
|
||||
ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB;
|
||||
CREATE TABLE t1(a INT) ENGINE=INNODB;
|
||||
INSERT INTO t1 VALUES(1);
|
||||
--source include/save_master_gtid.inc
|
||||
|
||||
--connection slave
|
||||
--source include/sync_with_master_gtid.inc
|
||||
--source include/stop_slave.inc
|
||||
--let $save_transaction_retries= `SELECT @@global.slave_transaction_retries`
|
||||
--let $save_slave_parallel_threads= `SELECT @@global.slave_parallel_threads`
|
||||
--let $save_slave_parallel_mode= `SELECT @@global.slave_parallel_mode`
|
||||
set @@global.slave_parallel_threads= 2;
|
||||
set @@global.slave_parallel_mode= OPTIMISTIC;
|
||||
set @@global.slave_transaction_retries= 2;
|
||||
|
||||
--echo *** MDEV-28776: rpl.rpl_mark_optimize_tbl_ddl fails with timeout on sync_with_master
|
||||
# This was a failure where a transaction T1 could deadlock multiple times
|
||||
# with T2, eventually exceeding the default --slave-transaction-retries=10.
|
||||
# Root cause was MDEV-31655, causing InnoDB to wrongly choose T1 as deadlock
|
||||
# victim over T2. If thread scheduling is right, it was possible for T1 to
|
||||
# repeatedly deadlock, roll back, and have time to grab an S lock again before
|
||||
# T2 woke up and got its waiting X lock, thus repeating the same deadlock over
|
||||
# and over.
|
||||
# Once the bug is fixed, it is not possible to re-create the same execution
|
||||
# and thread scheduling. Instead we inject small sleeps in a way that
|
||||
# triggered the problem when the bug was there, to demonstrate that the
|
||||
# problem no longer occurs.
|
||||
|
||||
--connection master
|
||||
# T1
|
||||
SET @@gtid_seq_no= 100;
|
||||
INSERT INTO t1 SELECT 1+a FROM t1;
|
||||
# T2
|
||||
SET @@gtid_seq_no= 200;
|
||||
INSERT INTO t1 SELECT 2+a FROM t1;
|
||||
|
||||
SELECT * FROM t1 ORDER BY a;
|
||||
--source include/save_master_gtid.inc
|
||||
|
||||
--connection slave
|
||||
SET @save_dbug= @@GLOBAL.debug_dbug;
|
||||
|
||||
# Inject various delays to hint thread scheduling to happen in the way that
|
||||
# triggered MDEV-28776.
|
||||
|
||||
# Small delay starting T1 so it will be the youngest trx and be chosen over
|
||||
# T2 as the deadlock victim by default in InnoDB.
|
||||
SET GLOBAL debug_dbug="+d,rpl_parallel_delay_gtid_0_x_100_start";
|
||||
|
||||
# Small delay before taking insert X lock to give time for both T1 and T2 to
|
||||
# get the S lock first and cause a deadlock.
|
||||
SET GLOBAL debug_dbug="+d,rpl_write_record_small_sleep_gtid_100_200";
|
||||
|
||||
# Small delay after T2's wait on the X lock, to give time for T1 retry to
|
||||
# re-aquire the T1 S lock first.
|
||||
SET GLOBAL debug_dbug="+d,small_sleep_after_lock_wait";
|
||||
|
||||
# Delay deadlock kill of T2.
|
||||
SET GLOBAL debug_dbug="+d,rpl_delay_deadlock_kill";
|
||||
|
||||
--source include/start_slave.inc
|
||||
--source include/sync_with_master_gtid.inc
|
||||
SET GLOBAL debug_dbug= @save_dbug;
|
||||
SELECT * FROM t1 ORDER BY a;
|
||||
|
||||
# Cleanup.
|
||||
--connection slave
|
||||
--source include/stop_slave.inc
|
||||
eval SET @@global.slave_parallel_threads= $save_slave_parallel_threads;
|
||||
eval SET @@global.slave_parallel_mode= $save_slave_parallel_mode;
|
||||
eval SET @@global.slave_transaction_retries= $save_transaction_retries;
|
||||
--source include/start_slave.inc
|
||||
|
||||
--connection master
|
||||
DROP TABLE t1;
|
||||
|
||||
--source include/rpl_end.inc
|
Reference in New Issue
Block a user