mirror of
https://github.com/MariaDB/server.git
synced 2025-08-08 11:22:35 +03:00
MDEV-34042: Deadlock kill of XA PREPARE can break replication / rpl.rpl_parallel_multi_domain_xa sporadic failure
Refinement of the original patch. Move the code to reset the kill up into the parent class Xid_apply_log_event, to also fix the similar issue for XA COMMIT. Increase the number of slave retries in the test case rpl.rpl_parallel_multi_domain_xa to fix some sporadic failures. The test generates massive amounts of conflicting transactions in multiple independent domains, which can cause multiple rollback+retry for a transaction as it conflicts with transactions in other domains one-by-one. Signed-off-by: Kristian Nielsen <knielsen@knielsen-hq.org>
This commit is contained in:
@@ -6,6 +6,8 @@ connection master;
|
||||
ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB;
|
||||
connection slave;
|
||||
include/stop_slave.inc
|
||||
SET @old_transaction_retries = @@GLOBAL.slave_transaction_retries;
|
||||
SET @@global.slave_transaction_retries = 1000;
|
||||
SET @old_parallel_threads = @@GLOBAL.slave_parallel_threads;
|
||||
SET @old_slave_domain_parallel_threads = @@GLOBAL.slave_domain_parallel_threads;
|
||||
SET @@global.slave_parallel_threads = 5;
|
||||
@@ -45,6 +47,7 @@ include/stop_slave.inc
|
||||
SET @@global.slave_parallel_mode = @old_parallel_mode;
|
||||
SET @@global.slave_parallel_threads = @old_parallel_threads;
|
||||
SET @@global.slave_domain_parallel_threads = @old_slave_domain_parallel_threads;
|
||||
SET @@global.slave_transaction_retries = @old_transaction_retries;
|
||||
include/start_slave.inc
|
||||
connection master;
|
||||
DROP TABLE t1;
|
||||
|
@@ -21,6 +21,12 @@ ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB;
|
||||
--connection slave
|
||||
--sync_with_master
|
||||
--source include/stop_slave.inc
|
||||
# This test runs huge number of transactions independently in parallel that
|
||||
# all conflict on a single row. This requires a large number of retries, as a
|
||||
# transaction can repeatedly conflict/deadlock with a large number of other
|
||||
# transactions (in a different domain) one by one.
|
||||
SET @old_transaction_retries = @@GLOBAL.slave_transaction_retries;
|
||||
SET @@global.slave_transaction_retries = 1000;
|
||||
SET @old_parallel_threads = @@GLOBAL.slave_parallel_threads;
|
||||
SET @old_slave_domain_parallel_threads = @@GLOBAL.slave_domain_parallel_threads;
|
||||
SET @@global.slave_parallel_threads = 5;
|
||||
@@ -160,6 +166,7 @@ SET @@global.slave_parallel_mode = 'optimistic';
|
||||
SET @@global.slave_parallel_mode = @old_parallel_mode;
|
||||
SET @@global.slave_parallel_threads = @old_parallel_threads;
|
||||
SET @@global.slave_domain_parallel_threads = @old_slave_domain_parallel_threads;
|
||||
SET @@global.slave_transaction_retries = @old_transaction_retries;
|
||||
--source include/start_slave.inc
|
||||
|
||||
--connection master
|
||||
|
@@ -4066,6 +4066,9 @@ int Xid_apply_log_event::do_apply_event(rpl_group_info *rgi)
|
||||
thd->wsrep_affected_rows= 0;
|
||||
#endif
|
||||
|
||||
#ifndef DBUG_OFF
|
||||
bool record_gtid_delayed_for_xa= false;
|
||||
#endif
|
||||
if (rgi->gtid_pending)
|
||||
{
|
||||
sub_id= rgi->gtid_sub_id;
|
||||
@@ -4084,6 +4087,10 @@ int Xid_apply_log_event::do_apply_event(rpl_group_info *rgi)
|
||||
return 1;
|
||||
});
|
||||
}
|
||||
#ifndef DBUG_OFF
|
||||
else
|
||||
record_gtid_delayed_for_xa= true;
|
||||
#endif
|
||||
}
|
||||
|
||||
general_log_print(thd, COM_QUERY, get_query());
|
||||
@@ -4093,6 +4100,22 @@ int Xid_apply_log_event::do_apply_event(rpl_group_info *rgi)
|
||||
{
|
||||
DBUG_ASSERT(!thd->transaction->xid_state.is_explicit_XA());
|
||||
|
||||
DBUG_ASSERT(record_gtid_delayed_for_xa);
|
||||
if (thd->rgi_slave->is_parallel_exec)
|
||||
{
|
||||
/*
|
||||
With XA, since the transaction is prepared/committed without updating
|
||||
the GTID pos (MDEV-32020...), we need here to clear any pending
|
||||
deadlock kill.
|
||||
|
||||
Otherwise if the kill happened after the prepare/commit completed, it
|
||||
might end up killing the subsequent GTID position update, causing the
|
||||
slave to fail with error.
|
||||
*/
|
||||
wait_for_pending_deadlock_kill(thd, thd->rgi_slave);
|
||||
thd->reset_killed();
|
||||
}
|
||||
|
||||
if ((err= do_record_gtid(thd, rgi, false, &hton, true)))
|
||||
return err;
|
||||
}
|
||||
@@ -4209,19 +4232,6 @@ int XA_prepare_log_event::do_commit()
|
||||
else
|
||||
res= trans_xa_commit(thd);
|
||||
|
||||
if (thd->rgi_slave->is_parallel_exec)
|
||||
{
|
||||
/*
|
||||
Since the transaction is prepared/committed without updating the GTID pos
|
||||
(MDEV-32020...), we need here to clear any pending deadlock kill.
|
||||
Otherwise if the kill happened after the prepare/commit completed, it
|
||||
might end up killing the subsequent GTID position update, causing the
|
||||
slave to fail with error.
|
||||
*/
|
||||
wait_for_pending_deadlock_kill(thd, thd->rgi_slave);
|
||||
thd->reset_killed();
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
#endif // HAVE_REPLICATION
|
||||
|
Reference in New Issue
Block a user