diff --git a/mysql-test/suite/rpl/r/parallel_backup_xa_debug.result b/mysql-test/suite/rpl/r/parallel_backup_xa_debug.result new file mode 100644 index 00000000000..aa5ff772552 --- /dev/null +++ b/mysql-test/suite/rpl/r/parallel_backup_xa_debug.result @@ -0,0 +1,42 @@ +include/master-slave.inc +[connection master] +connection master; +CREATE TABLE t (a INT) ENGINE = innodb; +connection slave; +include/stop_slave.inc +SET @old_parallel_threads= @@GLOBAL.slave_parallel_threads; +SET @old_parallel_mode = @@GLOBAL.slave_parallel_mode; +SET @@global.slave_parallel_threads= 2; +SET @@global.slave_parallel_mode = 'optimistic'; +connection master; +# MDEV-35110 +SET @@gtid_seq_no=100; +insert into t set a=1; +xa start 'x'; +insert into t set a=2; +xa end 'x'; +xa prepare 'x'; +connection slave; +SET @@global.debug_dbug="+d,hold_worker_on_schedule"; +start slave; +connection slave1; +backup stage start; +backup stage block_commit; +connection slave; +SET debug_sync = 'now SIGNAL continue_worker'; +SET debug_sync = RESET; +connection slave1; +backup stage end; +connection master; +xa rollback 'x'; +connection slave; +# Clean up. +connection slave; +include/stop_slave.inc +SET @@global.debug_dbug=""; +SET @@global.slave_parallel_threads= @old_parallel_threads; +SET @@global.slave_parallel_mode = @old_parallel_mode; +include/start_slave.inc +connection server_1; +DROP TABLE t; +include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/parallel_backup_xa_debug.test b/mysql-test/suite/rpl/t/parallel_backup_xa_debug.test new file mode 100644 index 00000000000..03fc66d89e7 --- /dev/null +++ b/mysql-test/suite/rpl/t/parallel_backup_xa_debug.test @@ -0,0 +1,64 @@ +# Verify deadlock between XA-PREPARE and BACKUP on the optimistic slave +--source include/have_debug.inc +--source include/have_debug_sync.inc +--source include/have_innodb.inc +# The test is not format specific, MIXED is required to optimize testing time +--source include/have_binlog_format_mixed.inc +--source include/master-slave.inc + +--connection master +CREATE TABLE t (a INT) ENGINE = innodb; + +--sync_slave_with_master +--source include/stop_slave.inc +SET @old_parallel_threads= @@GLOBAL.slave_parallel_threads; +SET @old_parallel_mode = @@GLOBAL.slave_parallel_mode; +SET @@global.slave_parallel_threads= 2; +SET @@global.slave_parallel_mode = 'optimistic'; + +--connection master +--echo # MDEV-35110 +SET @@gtid_seq_no=100; +insert into t set a=1; +xa start 'x'; + insert into t set a=2; +xa end 'x'; +xa prepare 'x'; + +--connection slave +SET @@global.debug_dbug="+d,hold_worker_on_schedule"; +start slave; +--let $wait_condition= SELECT count(*) = 1 FROM information_schema.processlist WHERE state LIKE "Waiting for prior transaction to commit" +--source include/wait_condition.inc + +--connection slave1 +backup stage start; +--send backup stage block_commit + +--connection slave +--let $wait_condition= SELECT count(*) = 1 FROM information_schema.processlist WHERE state LIKE "Waiting for backup lock" +SET debug_sync = 'now SIGNAL continue_worker'; +SET debug_sync = RESET; + +--connection slave1 +reap; +backup stage end; + +--connection master +xa rollback 'x'; + +--sync_slave_with_master + +--echo # Clean up. +--connection slave +--source include/stop_slave.inc +SET @@global.debug_dbug=""; +SET @@global.slave_parallel_threads= @old_parallel_threads; +SET @@global.slave_parallel_mode = @old_parallel_mode; + +--source include/start_slave.inc + +--connection server_1 +DROP TABLE t; + +--source include/rpl_end.inc diff --git a/sql/handler.cc b/sql/handler.cc index 0384b41617e..470a80f4fa0 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -1781,7 +1781,13 @@ int ha_commit_trans(THD *thd, bool all) DBUG_PRINT("info", ("is_real_trans: %d rw_trans: %d rw_ha_count: %d", is_real_trans, rw_trans, rw_ha_count)); - if (rw_trans) + /* + backup_commit_lock may have already been set. + This can happen in case of spider that does xa_commit() by + calling ha_commit_trans() from spader_commit(). + */ + + if (rw_trans && !thd->backup_commit_lock) { /* Acquire a metadata lock which will ensure that COMMIT is blocked @@ -2052,8 +2058,8 @@ end: not needed. */ thd->mdl_context.release_lock(mdl_backup.ticket); + thd->backup_commit_lock= 0; } - thd->backup_commit_lock= 0; #ifdef WITH_WSREP if (wsrep_is_active(thd) && is_real_trans && !error && (rw_ha_count == 0 || all) && diff --git a/sql/xa.cc b/sql/xa.cc index 622e624bb98..6beb0944baf 100644 --- a/sql/xa.cc +++ b/sql/xa.cc @@ -507,6 +507,40 @@ bool trans_xa_end(THD *thd) } +/* + Get the BACKUP_COMMIT lock for the duration of the XA. + + The metadata lock which will ensure that COMMIT is blocked + by active FLUSH TABLES WITH READ LOCK (and vice versa COMMIT in + progress blocks FTWRL) and also by MDL_BACKUP_WAIT_COMMIT. + We allow FLUSHer to COMMIT; we assume FLUSHer knows what it does. + + Note that the function sets thd->backup_lock on sucess. The caller needs + to reset thd->backup_commit_lock before returning! +*/ + +static bool trans_xa_get_backup_lock(THD *thd, MDL_request *mdl_request) +{ + DBUG_ASSERT(thd->backup_commit_lock == 0); + MDL_REQUEST_INIT(mdl_request, MDL_key::BACKUP, "", "", MDL_BACKUP_COMMIT, + MDL_EXPLICIT); + if (thd->mdl_context.acquire_lock(mdl_request, + thd->variables.lock_wait_timeout)) + return 1; + thd->backup_commit_lock= mdl_request; + return 0; +} + +static inline void trans_xa_release_backup_lock(THD *thd) +{ + if (thd->backup_commit_lock) + { + thd->mdl_context.release_lock(thd->backup_commit_lock->ticket); + thd->backup_commit_lock= 0; + } +} + + /** Put a XA transaction in the PREPARED state. @@ -529,22 +563,15 @@ bool trans_xa_prepare(THD *thd) my_error(ER_XAER_NOTA, MYF(0)); else { - /* - Acquire metadata lock which will ensure that COMMIT is blocked - by active FLUSH TABLES WITH READ LOCK (and vice versa COMMIT in - progress blocks FTWRL). - - We allow FLUSHer to COMMIT; we assume FLUSHer knows what it does. - */ MDL_request mdl_request; - MDL_REQUEST_INIT(&mdl_request, MDL_key::BACKUP, "", "", MDL_BACKUP_COMMIT, - MDL_STATEMENT); - if (thd->mdl_context.acquire_lock(&mdl_request, - thd->variables.lock_wait_timeout) || + if (trans_xa_get_backup_lock(thd, &mdl_request) || ha_prepare(thd)) { if (!mdl_request.ticket) + { + /* Failed to get the backup lock */ ha_rollback_trans(thd, TRUE); + } thd->variables.option_bits&= ~(OPTION_BEGIN | OPTION_KEEP_LOG); thd->transaction->all.reset(); thd->server_status&= @@ -572,6 +599,7 @@ bool trans_xa_prepare(THD *thd) res= thd->variables.pseudo_slave_mode || thd->slave_thread ? slave_applier_reset_xa_trans(thd) : 0; } + trans_xa_release_backup_lock(thd); } DBUG_RETURN(res); @@ -635,19 +663,8 @@ bool trans_xa_commit(THD *thd) res= 1; goto _end_external_xid; } - res= xa_trans_rolled_back(xs); - /* - Acquire metadata lock which will ensure that COMMIT is blocked - by active FLUSH TABLES WITH READ LOCK (and vice versa COMMIT in - progress blocks FTWRL). - - We allow FLUSHer to COMMIT; we assume FLUSHer knows what it does. - */ - MDL_REQUEST_INIT(&mdl_request, MDL_key::BACKUP, "", "", MDL_BACKUP_COMMIT, - MDL_EXPLICIT); - if (thd->mdl_context.acquire_lock(&mdl_request, - thd->variables.lock_wait_timeout)) + if (trans_xa_get_backup_lock(thd, &mdl_request)) { /* We can't rollback an XA transaction on lock failure due to @@ -659,14 +676,11 @@ bool trans_xa_commit(THD *thd) res= true; goto _end_external_xid; } - else - { - thd->backup_commit_lock= &mdl_request; - } DBUG_ASSERT(!xid_state.xid_cache_element); xid_state.xid_cache_element= xs; ha_commit_or_rollback_by_xid(thd->lex->xid, !res); + if (!res && thd->is_error()) { // hton completion error retains xs/xid in the cache, @@ -682,11 +696,7 @@ bool trans_xa_commit(THD *thd) res= res || thd->is_error(); if (!xid_deleted) xs->acquired_to_recovered(); - if (mdl_request.ticket) - { - thd->mdl_context.release_lock(mdl_request.ticket); - thd->backup_commit_lock= 0; - } + trans_xa_release_backup_lock(thd); } else my_error(ER_XAER_NOTA, MYF(0)); @@ -709,7 +719,8 @@ bool trans_xa_commit(THD *thd) if ((res= MY_TEST(r))) my_error(r == 1 ? ER_XA_RBROLLBACK : ER_XAER_RMERR, MYF(0)); } - else if (thd->transaction->xid_state.xid_cache_element->xa_state == XA_PREPARED) + else if (thd->transaction->xid_state.xid_cache_element->xa_state == + XA_PREPARED) { MDL_request mdl_request; if (thd->lex->xa_opt != XA_NONE) @@ -718,18 +729,7 @@ bool trans_xa_commit(THD *thd) DBUG_RETURN(TRUE); } - /* - Acquire metadata lock which will ensure that COMMIT is blocked - by active FLUSH TABLES WITH READ LOCK (and vice versa COMMIT in - progress blocks FTWRL). - - We allow FLUSHer to COMMIT; we assume FLUSHer knows what it does. - */ - MDL_REQUEST_INIT(&mdl_request, MDL_key::BACKUP, "", "", MDL_BACKUP_COMMIT, - MDL_TRANSACTION); - - if (thd->mdl_context.acquire_lock(&mdl_request, - thd->variables.lock_wait_timeout)) + if (trans_xa_get_backup_lock(thd, &mdl_request)) { /* We can't rollback an XA transaction on lock failure due to @@ -756,6 +756,7 @@ bool trans_xa_commit(THD *thd) } thd->m_transaction_psi= NULL; + trans_xa_release_backup_lock(thd); } } else @@ -793,7 +794,8 @@ bool trans_xa_commit(THD *thd) bool trans_xa_rollback(THD *thd) { XID_STATE &xid_state= thd->transaction->xid_state; - + MDL_request mdl_request; + bool error; DBUG_ENTER("trans_xa_rollback"); if (!xid_state.is_explicit_XA() || @@ -814,7 +816,6 @@ bool trans_xa_rollback(THD *thd) { bool res; bool xid_deleted= false; - MDL_request mdl_request; bool rw_trans= (xs->rm_error != ER_XA_RBROLLBACK); if (rw_trans && thd->is_read_only_ctx()) @@ -824,10 +825,7 @@ bool trans_xa_rollback(THD *thd) goto _end_external_xid; } - MDL_REQUEST_INIT(&mdl_request, MDL_key::BACKUP, "", "", MDL_BACKUP_COMMIT, - MDL_EXPLICIT); - if (thd->mdl_context.acquire_lock(&mdl_request, - thd->variables.lock_wait_timeout)) + if (trans_xa_get_backup_lock(thd, &mdl_request)) { /* We can't rollback an XA transaction on lock failure due to @@ -838,10 +836,6 @@ bool trans_xa_rollback(THD *thd) goto _end_external_xid; } - else - { - thd->backup_commit_lock= &mdl_request; - } res= xa_trans_rolled_back(xs); DBUG_ASSERT(!xid_state.xid_cache_element); @@ -858,11 +852,7 @@ bool trans_xa_rollback(THD *thd) xid_state.xid_cache_element= 0; if (!xid_deleted) xs->acquired_to_recovered(); - if (mdl_request.ticket) - { - thd->mdl_context.release_lock(mdl_request.ticket); - thd->backup_commit_lock= 0; - } + trans_xa_release_backup_lock(thd); } else my_error(ER_XAER_NOTA, MYF(0)); @@ -879,11 +869,7 @@ bool trans_xa_rollback(THD *thd) DBUG_RETURN(TRUE); } - MDL_request mdl_request; - MDL_REQUEST_INIT(&mdl_request, MDL_key::BACKUP, "", "", MDL_BACKUP_COMMIT, - MDL_STATEMENT); - if (thd->mdl_context.acquire_lock(&mdl_request, - thd->variables.lock_wait_timeout)) + if (trans_xa_get_backup_lock(thd, &mdl_request)) { /* We can't rollback an XA transaction on lock failure due to @@ -894,7 +880,9 @@ bool trans_xa_rollback(THD *thd) DBUG_RETURN(true); } - DBUG_RETURN(xa_trans_force_rollback(thd)); + error= xa_trans_force_rollback(thd); + trans_xa_release_backup_lock(thd); + DBUG_RETURN(error); }