mirror of
https://github.com/MariaDB/server.git
synced 2025-08-08 11:22:35 +03:00
MDEV-35110 Deadlock on Replica during BACKUP STAGE BLOCK_COMMIT on XA transactions
This is an extension of MDEV-30423 "Deadlock on Replica during BACKUP STAGE BLOCK_COMMIT on XA transactions" The original commit in MDEV-30423 was not complete as some usage in XA of MDL_BACKUP_COMMIT locks did not set thd->backup_commit_lock. This is required to be set when using parallel replication. Fixed by ensuring that all usage of BACKUP_COMMIT lock i XA is uniform and all sets thd->backup_commit_lock. I also changed all locks to be MDL_EXPLICIT to keep also that part uniform. A regression test is added.
This commit is contained in:
42
mysql-test/suite/rpl/r/parallel_backup_xa_debug.result
Normal file
42
mysql-test/suite/rpl/r/parallel_backup_xa_debug.result
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
include/master-slave.inc
|
||||||
|
[connection master]
|
||||||
|
connection master;
|
||||||
|
CREATE TABLE t (a INT) ENGINE = innodb;
|
||||||
|
connection slave;
|
||||||
|
include/stop_slave.inc
|
||||||
|
SET @old_parallel_threads= @@GLOBAL.slave_parallel_threads;
|
||||||
|
SET @old_parallel_mode = @@GLOBAL.slave_parallel_mode;
|
||||||
|
SET @@global.slave_parallel_threads= 2;
|
||||||
|
SET @@global.slave_parallel_mode = 'optimistic';
|
||||||
|
connection master;
|
||||||
|
# MDEV-35110
|
||||||
|
SET @@gtid_seq_no=100;
|
||||||
|
insert into t set a=1;
|
||||||
|
xa start 'x';
|
||||||
|
insert into t set a=2;
|
||||||
|
xa end 'x';
|
||||||
|
xa prepare 'x';
|
||||||
|
connection slave;
|
||||||
|
SET @@global.debug_dbug="+d,hold_worker_on_schedule";
|
||||||
|
start slave;
|
||||||
|
connection slave1;
|
||||||
|
backup stage start;
|
||||||
|
backup stage block_commit;
|
||||||
|
connection slave;
|
||||||
|
SET debug_sync = 'now SIGNAL continue_worker';
|
||||||
|
SET debug_sync = RESET;
|
||||||
|
connection slave1;
|
||||||
|
backup stage end;
|
||||||
|
connection master;
|
||||||
|
xa rollback 'x';
|
||||||
|
connection slave;
|
||||||
|
# Clean up.
|
||||||
|
connection slave;
|
||||||
|
include/stop_slave.inc
|
||||||
|
SET @@global.debug_dbug="";
|
||||||
|
SET @@global.slave_parallel_threads= @old_parallel_threads;
|
||||||
|
SET @@global.slave_parallel_mode = @old_parallel_mode;
|
||||||
|
include/start_slave.inc
|
||||||
|
connection server_1;
|
||||||
|
DROP TABLE t;
|
||||||
|
include/rpl_end.inc
|
64
mysql-test/suite/rpl/t/parallel_backup_xa_debug.test
Normal file
64
mysql-test/suite/rpl/t/parallel_backup_xa_debug.test
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
# Verify deadlock between XA-PREPARE and BACKUP on the optimistic slave
|
||||||
|
--source include/have_debug.inc
|
||||||
|
--source include/have_debug_sync.inc
|
||||||
|
--source include/have_innodb.inc
|
||||||
|
# The test is not format specific, MIXED is required to optimize testing time
|
||||||
|
--source include/have_binlog_format_mixed.inc
|
||||||
|
--source include/master-slave.inc
|
||||||
|
|
||||||
|
--connection master
|
||||||
|
CREATE TABLE t (a INT) ENGINE = innodb;
|
||||||
|
|
||||||
|
--sync_slave_with_master
|
||||||
|
--source include/stop_slave.inc
|
||||||
|
SET @old_parallel_threads= @@GLOBAL.slave_parallel_threads;
|
||||||
|
SET @old_parallel_mode = @@GLOBAL.slave_parallel_mode;
|
||||||
|
SET @@global.slave_parallel_threads= 2;
|
||||||
|
SET @@global.slave_parallel_mode = 'optimistic';
|
||||||
|
|
||||||
|
--connection master
|
||||||
|
--echo # MDEV-35110
|
||||||
|
SET @@gtid_seq_no=100;
|
||||||
|
insert into t set a=1;
|
||||||
|
xa start 'x';
|
||||||
|
insert into t set a=2;
|
||||||
|
xa end 'x';
|
||||||
|
xa prepare 'x';
|
||||||
|
|
||||||
|
--connection slave
|
||||||
|
SET @@global.debug_dbug="+d,hold_worker_on_schedule";
|
||||||
|
start slave;
|
||||||
|
--let $wait_condition= SELECT count(*) = 1 FROM information_schema.processlist WHERE state LIKE "Waiting for prior transaction to commit"
|
||||||
|
--source include/wait_condition.inc
|
||||||
|
|
||||||
|
--connection slave1
|
||||||
|
backup stage start;
|
||||||
|
--send backup stage block_commit
|
||||||
|
|
||||||
|
--connection slave
|
||||||
|
--let $wait_condition= SELECT count(*) = 1 FROM information_schema.processlist WHERE state LIKE "Waiting for backup lock"
|
||||||
|
SET debug_sync = 'now SIGNAL continue_worker';
|
||||||
|
SET debug_sync = RESET;
|
||||||
|
|
||||||
|
--connection slave1
|
||||||
|
reap;
|
||||||
|
backup stage end;
|
||||||
|
|
||||||
|
--connection master
|
||||||
|
xa rollback 'x';
|
||||||
|
|
||||||
|
--sync_slave_with_master
|
||||||
|
|
||||||
|
--echo # Clean up.
|
||||||
|
--connection slave
|
||||||
|
--source include/stop_slave.inc
|
||||||
|
SET @@global.debug_dbug="";
|
||||||
|
SET @@global.slave_parallel_threads= @old_parallel_threads;
|
||||||
|
SET @@global.slave_parallel_mode = @old_parallel_mode;
|
||||||
|
|
||||||
|
--source include/start_slave.inc
|
||||||
|
|
||||||
|
--connection server_1
|
||||||
|
DROP TABLE t;
|
||||||
|
|
||||||
|
--source include/rpl_end.inc
|
@@ -1781,7 +1781,13 @@ int ha_commit_trans(THD *thd, bool all)
|
|||||||
DBUG_PRINT("info", ("is_real_trans: %d rw_trans: %d rw_ha_count: %d",
|
DBUG_PRINT("info", ("is_real_trans: %d rw_trans: %d rw_ha_count: %d",
|
||||||
is_real_trans, rw_trans, rw_ha_count));
|
is_real_trans, rw_trans, rw_ha_count));
|
||||||
|
|
||||||
if (rw_trans)
|
/*
|
||||||
|
backup_commit_lock may have already been set.
|
||||||
|
This can happen in case of spider that does xa_commit() by
|
||||||
|
calling ha_commit_trans() from spader_commit().
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (rw_trans && !thd->backup_commit_lock)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
Acquire a metadata lock which will ensure that COMMIT is blocked
|
Acquire a metadata lock which will ensure that COMMIT is blocked
|
||||||
@@ -2052,8 +2058,8 @@ end:
|
|||||||
not needed.
|
not needed.
|
||||||
*/
|
*/
|
||||||
thd->mdl_context.release_lock(mdl_backup.ticket);
|
thd->mdl_context.release_lock(mdl_backup.ticket);
|
||||||
|
thd->backup_commit_lock= 0;
|
||||||
}
|
}
|
||||||
thd->backup_commit_lock= 0;
|
|
||||||
#ifdef WITH_WSREP
|
#ifdef WITH_WSREP
|
||||||
if (wsrep_is_active(thd) && is_real_trans && !error &&
|
if (wsrep_is_active(thd) && is_real_trans && !error &&
|
||||||
(rw_ha_count == 0 || all) &&
|
(rw_ha_count == 0 || all) &&
|
||||||
|
120
sql/xa.cc
120
sql/xa.cc
@@ -507,6 +507,40 @@ bool trans_xa_end(THD *thd)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
Get the BACKUP_COMMIT lock for the duration of the XA.
|
||||||
|
|
||||||
|
The metadata lock which will ensure that COMMIT is blocked
|
||||||
|
by active FLUSH TABLES WITH READ LOCK (and vice versa COMMIT in
|
||||||
|
progress blocks FTWRL) and also by MDL_BACKUP_WAIT_COMMIT.
|
||||||
|
We allow FLUSHer to COMMIT; we assume FLUSHer knows what it does.
|
||||||
|
|
||||||
|
Note that the function sets thd->backup_lock on sucess. The caller needs
|
||||||
|
to reset thd->backup_commit_lock before returning!
|
||||||
|
*/
|
||||||
|
|
||||||
|
static bool trans_xa_get_backup_lock(THD *thd, MDL_request *mdl_request)
|
||||||
|
{
|
||||||
|
DBUG_ASSERT(thd->backup_commit_lock == 0);
|
||||||
|
MDL_REQUEST_INIT(mdl_request, MDL_key::BACKUP, "", "", MDL_BACKUP_COMMIT,
|
||||||
|
MDL_EXPLICIT);
|
||||||
|
if (thd->mdl_context.acquire_lock(mdl_request,
|
||||||
|
thd->variables.lock_wait_timeout))
|
||||||
|
return 1;
|
||||||
|
thd->backup_commit_lock= mdl_request;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void trans_xa_release_backup_lock(THD *thd)
|
||||||
|
{
|
||||||
|
if (thd->backup_commit_lock)
|
||||||
|
{
|
||||||
|
thd->mdl_context.release_lock(thd->backup_commit_lock->ticket);
|
||||||
|
thd->backup_commit_lock= 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
Put a XA transaction in the PREPARED state.
|
Put a XA transaction in the PREPARED state.
|
||||||
|
|
||||||
@@ -529,22 +563,15 @@ bool trans_xa_prepare(THD *thd)
|
|||||||
my_error(ER_XAER_NOTA, MYF(0));
|
my_error(ER_XAER_NOTA, MYF(0));
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/*
|
|
||||||
Acquire metadata lock which will ensure that COMMIT is blocked
|
|
||||||
by active FLUSH TABLES WITH READ LOCK (and vice versa COMMIT in
|
|
||||||
progress blocks FTWRL).
|
|
||||||
|
|
||||||
We allow FLUSHer to COMMIT; we assume FLUSHer knows what it does.
|
|
||||||
*/
|
|
||||||
MDL_request mdl_request;
|
MDL_request mdl_request;
|
||||||
MDL_REQUEST_INIT(&mdl_request, MDL_key::BACKUP, "", "", MDL_BACKUP_COMMIT,
|
if (trans_xa_get_backup_lock(thd, &mdl_request) ||
|
||||||
MDL_STATEMENT);
|
|
||||||
if (thd->mdl_context.acquire_lock(&mdl_request,
|
|
||||||
thd->variables.lock_wait_timeout) ||
|
|
||||||
ha_prepare(thd))
|
ha_prepare(thd))
|
||||||
{
|
{
|
||||||
if (!mdl_request.ticket)
|
if (!mdl_request.ticket)
|
||||||
|
{
|
||||||
|
/* Failed to get the backup lock */
|
||||||
ha_rollback_trans(thd, TRUE);
|
ha_rollback_trans(thd, TRUE);
|
||||||
|
}
|
||||||
thd->variables.option_bits&= ~(OPTION_BEGIN | OPTION_KEEP_LOG);
|
thd->variables.option_bits&= ~(OPTION_BEGIN | OPTION_KEEP_LOG);
|
||||||
thd->transaction->all.reset();
|
thd->transaction->all.reset();
|
||||||
thd->server_status&=
|
thd->server_status&=
|
||||||
@@ -572,6 +599,7 @@ bool trans_xa_prepare(THD *thd)
|
|||||||
res= thd->variables.pseudo_slave_mode || thd->slave_thread ?
|
res= thd->variables.pseudo_slave_mode || thd->slave_thread ?
|
||||||
slave_applier_reset_xa_trans(thd) : 0;
|
slave_applier_reset_xa_trans(thd) : 0;
|
||||||
}
|
}
|
||||||
|
trans_xa_release_backup_lock(thd);
|
||||||
}
|
}
|
||||||
|
|
||||||
DBUG_RETURN(res);
|
DBUG_RETURN(res);
|
||||||
@@ -635,19 +663,8 @@ bool trans_xa_commit(THD *thd)
|
|||||||
res= 1;
|
res= 1;
|
||||||
goto _end_external_xid;
|
goto _end_external_xid;
|
||||||
}
|
}
|
||||||
|
|
||||||
res= xa_trans_rolled_back(xs);
|
res= xa_trans_rolled_back(xs);
|
||||||
/*
|
if (trans_xa_get_backup_lock(thd, &mdl_request))
|
||||||
Acquire metadata lock which will ensure that COMMIT is blocked
|
|
||||||
by active FLUSH TABLES WITH READ LOCK (and vice versa COMMIT in
|
|
||||||
progress blocks FTWRL).
|
|
||||||
|
|
||||||
We allow FLUSHer to COMMIT; we assume FLUSHer knows what it does.
|
|
||||||
*/
|
|
||||||
MDL_REQUEST_INIT(&mdl_request, MDL_key::BACKUP, "", "", MDL_BACKUP_COMMIT,
|
|
||||||
MDL_EXPLICIT);
|
|
||||||
if (thd->mdl_context.acquire_lock(&mdl_request,
|
|
||||||
thd->variables.lock_wait_timeout))
|
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
We can't rollback an XA transaction on lock failure due to
|
We can't rollback an XA transaction on lock failure due to
|
||||||
@@ -659,14 +676,11 @@ bool trans_xa_commit(THD *thd)
|
|||||||
res= true;
|
res= true;
|
||||||
goto _end_external_xid;
|
goto _end_external_xid;
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
|
||||||
thd->backup_commit_lock= &mdl_request;
|
|
||||||
}
|
|
||||||
DBUG_ASSERT(!xid_state.xid_cache_element);
|
DBUG_ASSERT(!xid_state.xid_cache_element);
|
||||||
|
|
||||||
xid_state.xid_cache_element= xs;
|
xid_state.xid_cache_element= xs;
|
||||||
ha_commit_or_rollback_by_xid(thd->lex->xid, !res);
|
ha_commit_or_rollback_by_xid(thd->lex->xid, !res);
|
||||||
|
|
||||||
if (!res && thd->is_error())
|
if (!res && thd->is_error())
|
||||||
{
|
{
|
||||||
// hton completion error retains xs/xid in the cache,
|
// hton completion error retains xs/xid in the cache,
|
||||||
@@ -682,11 +696,7 @@ bool trans_xa_commit(THD *thd)
|
|||||||
res= res || thd->is_error();
|
res= res || thd->is_error();
|
||||||
if (!xid_deleted)
|
if (!xid_deleted)
|
||||||
xs->acquired_to_recovered();
|
xs->acquired_to_recovered();
|
||||||
if (mdl_request.ticket)
|
trans_xa_release_backup_lock(thd);
|
||||||
{
|
|
||||||
thd->mdl_context.release_lock(mdl_request.ticket);
|
|
||||||
thd->backup_commit_lock= 0;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
my_error(ER_XAER_NOTA, MYF(0));
|
my_error(ER_XAER_NOTA, MYF(0));
|
||||||
@@ -709,7 +719,8 @@ bool trans_xa_commit(THD *thd)
|
|||||||
if ((res= MY_TEST(r)))
|
if ((res= MY_TEST(r)))
|
||||||
my_error(r == 1 ? ER_XA_RBROLLBACK : ER_XAER_RMERR, MYF(0));
|
my_error(r == 1 ? ER_XA_RBROLLBACK : ER_XAER_RMERR, MYF(0));
|
||||||
}
|
}
|
||||||
else if (thd->transaction->xid_state.xid_cache_element->xa_state == XA_PREPARED)
|
else if (thd->transaction->xid_state.xid_cache_element->xa_state ==
|
||||||
|
XA_PREPARED)
|
||||||
{
|
{
|
||||||
MDL_request mdl_request;
|
MDL_request mdl_request;
|
||||||
if (thd->lex->xa_opt != XA_NONE)
|
if (thd->lex->xa_opt != XA_NONE)
|
||||||
@@ -718,18 +729,7 @@ bool trans_xa_commit(THD *thd)
|
|||||||
DBUG_RETURN(TRUE);
|
DBUG_RETURN(TRUE);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
if (trans_xa_get_backup_lock(thd, &mdl_request))
|
||||||
Acquire metadata lock which will ensure that COMMIT is blocked
|
|
||||||
by active FLUSH TABLES WITH READ LOCK (and vice versa COMMIT in
|
|
||||||
progress blocks FTWRL).
|
|
||||||
|
|
||||||
We allow FLUSHer to COMMIT; we assume FLUSHer knows what it does.
|
|
||||||
*/
|
|
||||||
MDL_REQUEST_INIT(&mdl_request, MDL_key::BACKUP, "", "", MDL_BACKUP_COMMIT,
|
|
||||||
MDL_TRANSACTION);
|
|
||||||
|
|
||||||
if (thd->mdl_context.acquire_lock(&mdl_request,
|
|
||||||
thd->variables.lock_wait_timeout))
|
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
We can't rollback an XA transaction on lock failure due to
|
We can't rollback an XA transaction on lock failure due to
|
||||||
@@ -756,6 +756,7 @@ bool trans_xa_commit(THD *thd)
|
|||||||
}
|
}
|
||||||
|
|
||||||
thd->m_transaction_psi= NULL;
|
thd->m_transaction_psi= NULL;
|
||||||
|
trans_xa_release_backup_lock(thd);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@@ -793,7 +794,8 @@ bool trans_xa_commit(THD *thd)
|
|||||||
bool trans_xa_rollback(THD *thd)
|
bool trans_xa_rollback(THD *thd)
|
||||||
{
|
{
|
||||||
XID_STATE &xid_state= thd->transaction->xid_state;
|
XID_STATE &xid_state= thd->transaction->xid_state;
|
||||||
|
MDL_request mdl_request;
|
||||||
|
bool error;
|
||||||
DBUG_ENTER("trans_xa_rollback");
|
DBUG_ENTER("trans_xa_rollback");
|
||||||
|
|
||||||
if (!xid_state.is_explicit_XA() ||
|
if (!xid_state.is_explicit_XA() ||
|
||||||
@@ -814,7 +816,6 @@ bool trans_xa_rollback(THD *thd)
|
|||||||
{
|
{
|
||||||
bool res;
|
bool res;
|
||||||
bool xid_deleted= false;
|
bool xid_deleted= false;
|
||||||
MDL_request mdl_request;
|
|
||||||
bool rw_trans= (xs->rm_error != ER_XA_RBROLLBACK);
|
bool rw_trans= (xs->rm_error != ER_XA_RBROLLBACK);
|
||||||
|
|
||||||
if (rw_trans && thd->is_read_only_ctx())
|
if (rw_trans && thd->is_read_only_ctx())
|
||||||
@@ -824,10 +825,7 @@ bool trans_xa_rollback(THD *thd)
|
|||||||
goto _end_external_xid;
|
goto _end_external_xid;
|
||||||
}
|
}
|
||||||
|
|
||||||
MDL_REQUEST_INIT(&mdl_request, MDL_key::BACKUP, "", "", MDL_BACKUP_COMMIT,
|
if (trans_xa_get_backup_lock(thd, &mdl_request))
|
||||||
MDL_EXPLICIT);
|
|
||||||
if (thd->mdl_context.acquire_lock(&mdl_request,
|
|
||||||
thd->variables.lock_wait_timeout))
|
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
We can't rollback an XA transaction on lock failure due to
|
We can't rollback an XA transaction on lock failure due to
|
||||||
@@ -838,10 +836,6 @@ bool trans_xa_rollback(THD *thd)
|
|||||||
|
|
||||||
goto _end_external_xid;
|
goto _end_external_xid;
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
|
||||||
thd->backup_commit_lock= &mdl_request;
|
|
||||||
}
|
|
||||||
res= xa_trans_rolled_back(xs);
|
res= xa_trans_rolled_back(xs);
|
||||||
DBUG_ASSERT(!xid_state.xid_cache_element);
|
DBUG_ASSERT(!xid_state.xid_cache_element);
|
||||||
|
|
||||||
@@ -858,11 +852,7 @@ bool trans_xa_rollback(THD *thd)
|
|||||||
xid_state.xid_cache_element= 0;
|
xid_state.xid_cache_element= 0;
|
||||||
if (!xid_deleted)
|
if (!xid_deleted)
|
||||||
xs->acquired_to_recovered();
|
xs->acquired_to_recovered();
|
||||||
if (mdl_request.ticket)
|
trans_xa_release_backup_lock(thd);
|
||||||
{
|
|
||||||
thd->mdl_context.release_lock(mdl_request.ticket);
|
|
||||||
thd->backup_commit_lock= 0;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
my_error(ER_XAER_NOTA, MYF(0));
|
my_error(ER_XAER_NOTA, MYF(0));
|
||||||
@@ -879,11 +869,7 @@ bool trans_xa_rollback(THD *thd)
|
|||||||
DBUG_RETURN(TRUE);
|
DBUG_RETURN(TRUE);
|
||||||
}
|
}
|
||||||
|
|
||||||
MDL_request mdl_request;
|
if (trans_xa_get_backup_lock(thd, &mdl_request))
|
||||||
MDL_REQUEST_INIT(&mdl_request, MDL_key::BACKUP, "", "", MDL_BACKUP_COMMIT,
|
|
||||||
MDL_STATEMENT);
|
|
||||||
if (thd->mdl_context.acquire_lock(&mdl_request,
|
|
||||||
thd->variables.lock_wait_timeout))
|
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
We can't rollback an XA transaction on lock failure due to
|
We can't rollback an XA transaction on lock failure due to
|
||||||
@@ -894,7 +880,9 @@ bool trans_xa_rollback(THD *thd)
|
|||||||
DBUG_RETURN(true);
|
DBUG_RETURN(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
DBUG_RETURN(xa_trans_force_rollback(thd));
|
error= xa_trans_force_rollback(thd);
|
||||||
|
trans_xa_release_backup_lock(thd);
|
||||||
|
DBUG_RETURN(error);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user