mirror of
https://github.com/MariaDB/server.git
synced 2025-07-29 05:21:33 +03:00
MDEV-21953 deadlock between BACKUP STAGE BLOCK_COMMIT and parallel repl.
The issue was: T1, a parallel slave worker thread, is waiting for another worker thread to commit. While waiting, it has the MDL_BACKUP_COMMIT lock. T2, working for mariabackup, is doing BACKUP STAGE BLOCK_COMMIT and blocks all commits. This causes a deadlock as the thread T1 is waiting for can't commit. Fixed by moving locking of MDL_BACKUP_COMMIT from ha_commit_trans() to commit_one_phase_2() Other things: - Added a new argument to ha_comit_one_phase() to signal if the transaction was a write transaction. - Ensured that ha_maria::implicit_commit() is always called under MDL_BACKUP_COMMIT. This code is not needed in 10.5 - Ensure that MDL_Request values 'type' and 'ticket' are always initialized. This makes it easier to check the state of the MDL_Request. - Moved thd->store_globals() earlier in handle_rpl_parallel_thread() as thd->init_for_queries() could use a MDL that could crash if store_globals where not called. - Don't call ha_enable_transactions() in THD::init_for_queries() as this is both slow (uses MDL locks) and not needed.
This commit is contained in:
117
sql/handler.cc
117
sql/handler.cc
@ -114,7 +114,7 @@ static TYPELIB known_extensions= {0,"known_exts", NULL, NULL};
|
||||
uint known_extensions_id= 0;
|
||||
|
||||
static int commit_one_phase_2(THD *thd, bool all, THD_TRANS *trans,
|
||||
bool is_real_trans);
|
||||
bool is_real_trans, bool rw_trans);
|
||||
|
||||
|
||||
static plugin_ref ha_default_plugin(THD *thd)
|
||||
@ -133,6 +133,23 @@ static plugin_ref ha_default_tmp_plugin(THD *thd)
|
||||
return ha_default_plugin(thd);
|
||||
}
|
||||
|
||||
#if defined(WITH_ARIA_STORAGE_ENGINE) && MYSQL_VERSION_ID < 100500
|
||||
void ha_maria_implicit_commit(THD *thd, bool new_trn)
|
||||
{
|
||||
if (ha_maria::has_active_transaction(thd))
|
||||
{
|
||||
int error;
|
||||
MDL_request mdl_request;
|
||||
mdl_request.init(MDL_key::BACKUP, "", "", MDL_BACKUP_COMMIT, MDL_EXPLICIT);
|
||||
error= thd->mdl_context.acquire_lock(&mdl_request,
|
||||
thd->variables.lock_wait_timeout);
|
||||
ha_maria::implicit_commit(thd, new_trn);
|
||||
if (!error)
|
||||
thd->mdl_context.release_lock(mdl_request.ticket);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/** @brief
|
||||
Return the default storage engine handlerton for thread
|
||||
@ -1445,10 +1462,6 @@ int ha_commit_trans(THD *thd, bool all)
|
||||
DBUG_RETURN(2);
|
||||
}
|
||||
|
||||
#ifdef WITH_ARIA_STORAGE_ENGINE
|
||||
ha_maria::implicit_commit(thd, TRUE);
|
||||
#endif
|
||||
|
||||
if (!ha_info)
|
||||
{
|
||||
/*
|
||||
@ -1462,7 +1475,9 @@ int ha_commit_trans(THD *thd, bool all)
|
||||
wsrep_commit_empty(thd, all);
|
||||
}
|
||||
#endif /* WITH_WSREP */
|
||||
DBUG_RETURN(0);
|
||||
|
||||
ha_maria_implicit_commit(thd, TRUE);
|
||||
DBUG_RETURN(error);
|
||||
}
|
||||
|
||||
DBUG_EXECUTE_IF("crash_commit_before", DBUG_SUICIDE(););
|
||||
@ -1475,33 +1490,9 @@ int ha_commit_trans(THD *thd, bool all)
|
||||
/* rw_trans is TRUE when we in a transaction changing data */
|
||||
bool rw_trans= is_real_trans &&
|
||||
(rw_ha_count > (thd->is_current_stmt_binlog_disabled()?0U:1U));
|
||||
MDL_request mdl_request;
|
||||
DBUG_PRINT("info", ("is_real_trans: %d rw_trans: %d rw_ha_count: %d",
|
||||
is_real_trans, rw_trans, rw_ha_count));
|
||||
|
||||
if (rw_trans)
|
||||
{
|
||||
/*
|
||||
Acquire a metadata lock which will ensure that COMMIT is blocked
|
||||
by an active FLUSH TABLES WITH READ LOCK (and vice versa:
|
||||
COMMIT in progress blocks FTWRL).
|
||||
|
||||
We allow the owner of FTWRL to COMMIT; we assume that it knows
|
||||
what it does.
|
||||
*/
|
||||
mdl_request.init(MDL_key::BACKUP, "", "", MDL_BACKUP_COMMIT, MDL_EXPLICIT);
|
||||
|
||||
if (!WSREP(thd) &&
|
||||
thd->mdl_context.acquire_lock(&mdl_request,
|
||||
thd->variables.lock_wait_timeout))
|
||||
{
|
||||
ha_rollback_trans(thd, all);
|
||||
DBUG_RETURN(1);
|
||||
}
|
||||
|
||||
DEBUG_SYNC(thd, "ha_commit_trans_after_acquire_commit_lock");
|
||||
}
|
||||
|
||||
if (rw_trans &&
|
||||
opt_readonly &&
|
||||
!(thd->security_ctx->master_access & SUPER_ACL) &&
|
||||
@ -1541,7 +1532,7 @@ int ha_commit_trans(THD *thd, bool all)
|
||||
// Here, the call will not commit inside InnoDB. It is only working
|
||||
// around closing thd->transaction.stmt open by TR_table::open().
|
||||
if (all)
|
||||
commit_one_phase_2(thd, false, &thd->transaction.stmt, false);
|
||||
commit_one_phase_2(thd, false, &thd->transaction.stmt, false, false);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -1561,7 +1552,7 @@ int ha_commit_trans(THD *thd, bool all)
|
||||
goto wsrep_err;
|
||||
}
|
||||
#endif /* WITH_WSREP */
|
||||
error= ha_commit_one_phase(thd, all);
|
||||
error= ha_commit_one_phase(thd, all, rw_trans);
|
||||
#ifdef WITH_WSREP
|
||||
if (run_wsrep_hooks)
|
||||
error= error || wsrep_after_commit(thd, all);
|
||||
@ -1613,7 +1604,7 @@ int ha_commit_trans(THD *thd, bool all)
|
||||
|
||||
if (!is_real_trans)
|
||||
{
|
||||
error= commit_one_phase_2(thd, all, trans, is_real_trans);
|
||||
error= commit_one_phase_2(thd, all, trans, is_real_trans, rw_trans);
|
||||
goto done;
|
||||
}
|
||||
#ifdef WITH_WSREP
|
||||
@ -1631,7 +1622,7 @@ int ha_commit_trans(THD *thd, bool all)
|
||||
DEBUG_SYNC(thd, "ha_commit_trans_after_log_and_order");
|
||||
DBUG_EXECUTE_IF("crash_commit_after_log", DBUG_SUICIDE(););
|
||||
|
||||
error= commit_one_phase_2(thd, all, trans, is_real_trans) ? 2 : 0;
|
||||
error= commit_one_phase_2(thd, all, trans, is_real_trans, rw_trans) ? 2 : 0;
|
||||
#ifdef WITH_WSREP
|
||||
if (run_wsrep_hooks && (error || (error = wsrep_after_commit(thd, all))))
|
||||
{
|
||||
@ -1694,16 +1685,6 @@ err:
|
||||
thd->rgi_slave->is_parallel_exec);
|
||||
}
|
||||
end:
|
||||
if (rw_trans && mdl_request.ticket)
|
||||
{
|
||||
/*
|
||||
We do not always immediately release transactional locks
|
||||
after ha_commit_trans() (see uses of ha_enable_transaction()),
|
||||
thus we release the commit blocker lock as soon as it's
|
||||
not needed.
|
||||
*/
|
||||
thd->mdl_context.release_lock(mdl_request.ticket);
|
||||
}
|
||||
#ifdef WITH_WSREP
|
||||
if (wsrep_is_active(thd) && is_real_trans && !error &&
|
||||
(rw_ha_count == 0 || all) &&
|
||||
@ -1719,6 +1700,7 @@ end:
|
||||
/**
|
||||
@note
|
||||
This function does not care about global read lock. A caller should.
|
||||
However backup locks are handled in commit_one_phase_2.
|
||||
|
||||
@param[in] all Is set in case of explicit commit
|
||||
(COMMIT statement), or implicit commit
|
||||
@ -1727,7 +1709,7 @@ end:
|
||||
autocommit=1.
|
||||
*/
|
||||
|
||||
int ha_commit_one_phase(THD *thd, bool all)
|
||||
int ha_commit_one_phase(THD *thd, bool all, bool rw_trans)
|
||||
{
|
||||
THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
|
||||
/*
|
||||
@ -1753,20 +1735,50 @@ int ha_commit_one_phase(THD *thd, bool all)
|
||||
if ((res= thd->wait_for_prior_commit()))
|
||||
DBUG_RETURN(res);
|
||||
}
|
||||
res= commit_one_phase_2(thd, all, trans, is_real_trans);
|
||||
res= commit_one_phase_2(thd, all, trans, is_real_trans, rw_trans);
|
||||
DBUG_RETURN(res);
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
commit_one_phase_2(THD *thd, bool all, THD_TRANS *trans, bool is_real_trans)
|
||||
commit_one_phase_2(THD *thd, bool all, THD_TRANS *trans, bool is_real_trans,
|
||||
bool rw_trans)
|
||||
{
|
||||
int error= 0;
|
||||
uint count= 0;
|
||||
Ha_trx_info *ha_info= trans->ha_list, *ha_info_next;
|
||||
MDL_request mdl_request;
|
||||
DBUG_ENTER("commit_one_phase_2");
|
||||
if (is_real_trans)
|
||||
DEBUG_SYNC(thd, "commit_one_phase_2");
|
||||
|
||||
if (rw_trans)
|
||||
{
|
||||
/*
|
||||
Acquire a metadata lock which will ensure that COMMIT is blocked
|
||||
by an active FLUSH TABLES WITH READ LOCK (and vice versa:
|
||||
COMMIT in progress blocks FTWRL).
|
||||
|
||||
We allow the owner of FTWRL to COMMIT; we assume that it knows
|
||||
what it does.
|
||||
*/
|
||||
mdl_request.init(MDL_key::BACKUP, "", "", MDL_BACKUP_COMMIT, MDL_EXPLICIT);
|
||||
|
||||
if (!WSREP(thd) &&
|
||||
thd->mdl_context.acquire_lock(&mdl_request,
|
||||
thd->variables.lock_wait_timeout))
|
||||
{
|
||||
my_error(ER_ERROR_DURING_COMMIT, MYF(0), 1);
|
||||
ha_rollback_trans(thd, all);
|
||||
DBUG_RETURN(1);
|
||||
}
|
||||
DEBUG_SYNC(thd, "ha_commit_trans_after_acquire_commit_lock");
|
||||
}
|
||||
|
||||
#if defined(WITH_ARIA_STORAGE_ENGINE) && MYSQL_VERSION_ID < 100500
|
||||
ha_maria::implicit_commit(thd, TRUE);
|
||||
#endif
|
||||
|
||||
if (ha_info)
|
||||
{
|
||||
for (; ha_info; ha_info= ha_info_next)
|
||||
@ -1795,6 +1807,17 @@ commit_one_phase_2(THD *thd, bool all, THD_TRANS *trans, bool is_real_trans)
|
||||
#endif
|
||||
}
|
||||
}
|
||||
if (mdl_request.ticket)
|
||||
{
|
||||
/*
|
||||
We do not always immediately release transactional locks
|
||||
after ha_commit_trans() (see uses of ha_enable_transaction()),
|
||||
thus we release the commit blocker lock as soon as it's
|
||||
not needed.
|
||||
*/
|
||||
thd->mdl_context.release_lock(mdl_request.ticket);
|
||||
}
|
||||
|
||||
/* Free resources and perform other cleanup even for 'empty' transactions. */
|
||||
if (is_real_trans)
|
||||
{
|
||||
|
Reference in New Issue
Block a user