1
0
mirror of https://github.com/MariaDB/server.git synced 2025-07-30 16:24:05 +03:00

MDEV-23586 Mariabackup: GTID saved for replication in 10.4.14 is wrong

MDEV-21953 deadlock between BACKUP STAGE BLOCK_COMMIT and parallel
replication

Fixed by partly reverting MDEV-21953 to put back MDL_BACKUP_COMMIT locking
before log_and_order.

The original problem for MDEV-21953 was that while a thread was waiting in
for another threads to commit in 'log_and_order', it had the
MDL_BACKUP_COMMIT lock. The backup thread was waiting to get the
MDL_BACKUP_WAIT_COMMIT lock, which blocks all new MDL_BACKUP_COMMIT locks.
This causes a deadlock as the waited-for thread can never get past the
MDL_BACKUP_COMMIT lock in ha_commit_trans.

The main part of the bug fix is to release the MDL_BACKUP_COMMIT lock while
a thread is waiting for other 'previous' threads to commit. This ensures
that no transactional thread keeps MDL_BACKUP_COMMIT while waiting, which
ensures that there are no deadlocks anymore.
This commit is contained in:
Monty
2020-09-07 10:38:12 +03:00
parent 3cdbaa04bd
commit 16ea692ed4
6 changed files with 134 additions and 59 deletions

View File

@ -114,7 +114,7 @@ static TYPELIB known_extensions= {0,"known_exts", NULL, NULL};
uint known_extensions_id= 0;
static int commit_one_phase_2(THD *thd, bool all, THD_TRANS *trans,
bool is_real_trans, bool rw_trans);
bool is_real_trans);
static plugin_ref ha_default_plugin(THD *thd)
@ -1490,9 +1490,40 @@ int ha_commit_trans(THD *thd, bool all)
/* rw_trans is TRUE when we in a transaction changing data */
bool rw_trans= is_real_trans &&
(rw_ha_count > (thd->is_current_stmt_binlog_disabled()?0U:1U));
MDL_request mdl_backup;
DBUG_PRINT("info", ("is_real_trans: %d rw_trans: %d rw_ha_count: %d",
is_real_trans, rw_trans, rw_ha_count));
if (rw_trans)
{
/*
Acquire a metadata lock which will ensure that COMMIT is blocked
by an active FLUSH TABLES WITH READ LOCK (and vice versa:
COMMIT in progress blocks FTWRL).
We allow the owner of FTWRL to COMMIT; we assume that it knows
what it does.
*/
mdl_backup.init(MDL_key::BACKUP, "", "", MDL_BACKUP_COMMIT, MDL_EXPLICIT);
if (!WSREP(thd))
{
if (thd->mdl_context.acquire_lock(&mdl_backup,
thd->variables.lock_wait_timeout))
{
ha_rollback_trans(thd, all);
DBUG_RETURN(1);
}
thd->backup_commit_lock= &mdl_backup;
}
DEBUG_SYNC(thd, "ha_commit_trans_after_acquire_commit_lock");
/* Use shortcut as we already have the MDL_BACKUP_COMMIT lock */
ha_maria::implicit_commit(thd, TRUE);
}
else
ha_maria_implicit_commit(thd, TRUE);
if (rw_trans &&
opt_readonly &&
!(thd->security_ctx->master_access & SUPER_ACL) &&
@ -1532,7 +1563,7 @@ int ha_commit_trans(THD *thd, bool all)
// Here, the call will not commit inside InnoDB. It is only working
// around closing thd->transaction.stmt open by TR_table::open().
if (all)
commit_one_phase_2(thd, false, &thd->transaction.stmt, false, false);
commit_one_phase_2(thd, false, &thd->transaction.stmt, false);
}
}
#endif
@ -1552,7 +1583,7 @@ int ha_commit_trans(THD *thd, bool all)
goto wsrep_err;
}
#endif /* WITH_WSREP */
error= ha_commit_one_phase(thd, all, rw_trans);
error= ha_commit_one_phase(thd, all);
#ifdef WITH_WSREP
if (run_wsrep_hooks)
error= error || wsrep_after_commit(thd, all);
@ -1604,7 +1635,7 @@ int ha_commit_trans(THD *thd, bool all)
if (!is_real_trans)
{
error= commit_one_phase_2(thd, all, trans, is_real_trans, rw_trans);
error= commit_one_phase_2(thd, all, trans, is_real_trans);
goto done;
}
#ifdef WITH_WSREP
@ -1622,7 +1653,7 @@ int ha_commit_trans(THD *thd, bool all)
DEBUG_SYNC(thd, "ha_commit_trans_after_log_and_order");
DBUG_EXECUTE_IF("crash_commit_after_log", DBUG_SUICIDE(););
error= commit_one_phase_2(thd, all, trans, is_real_trans, rw_trans) ? 2 : 0;
error= commit_one_phase_2(thd, all, trans, is_real_trans) ? 2 : 0;
#ifdef WITH_WSREP
if (run_wsrep_hooks && (error || (error = wsrep_after_commit(thd, all))))
{
@ -1685,6 +1716,17 @@ err:
thd->rgi_slave->is_parallel_exec);
}
end:
if (mdl_backup.ticket)
{
/*
We do not always immediately release transactional locks
after ha_commit_trans() (see uses of ha_enable_transaction()),
thus we release the commit blocker lock as soon as it's
not needed.
*/
thd->mdl_context.release_lock(mdl_backup.ticket);
}
thd->backup_commit_lock= 0;
#ifdef WITH_WSREP
if (wsrep_is_active(thd) && is_real_trans && !error &&
(rw_ha_count == 0 || all) &&
@ -1699,8 +1741,8 @@ end:
/**
@note
This function does not care about global read lock. A caller should.
However backup locks are handled in commit_one_phase_2.
This function does not care about global read lock or backup locks,
the caller should.
@param[in] all Is set in case of explicit commit
(COMMIT statement), or implicit commit
@ -1709,7 +1751,7 @@ end:
autocommit=1.
*/
int ha_commit_one_phase(THD *thd, bool all, bool rw_trans)
int ha_commit_one_phase(THD *thd, bool all)
{
THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
/*
@ -1735,50 +1777,21 @@ int ha_commit_one_phase(THD *thd, bool all, bool rw_trans)
if ((res= thd->wait_for_prior_commit()))
DBUG_RETURN(res);
}
res= commit_one_phase_2(thd, all, trans, is_real_trans, rw_trans);
res= commit_one_phase_2(thd, all, trans, is_real_trans);
DBUG_RETURN(res);
}
static int
commit_one_phase_2(THD *thd, bool all, THD_TRANS *trans, bool is_real_trans,
bool rw_trans)
commit_one_phase_2(THD *thd, bool all, THD_TRANS *trans, bool is_real_trans)
{
int error= 0;
uint count= 0;
Ha_trx_info *ha_info= trans->ha_list, *ha_info_next;
MDL_request mdl_request;
DBUG_ENTER("commit_one_phase_2");
if (is_real_trans)
DEBUG_SYNC(thd, "commit_one_phase_2");
if (rw_trans)
{
/*
Acquire a metadata lock which will ensure that COMMIT is blocked
by an active FLUSH TABLES WITH READ LOCK (and vice versa:
COMMIT in progress blocks FTWRL).
We allow the owner of FTWRL to COMMIT; we assume that it knows
what it does.
*/
mdl_request.init(MDL_key::BACKUP, "", "", MDL_BACKUP_COMMIT, MDL_EXPLICIT);
if (!WSREP(thd) &&
thd->mdl_context.acquire_lock(&mdl_request,
thd->variables.lock_wait_timeout))
{
my_error(ER_ERROR_DURING_COMMIT, MYF(0), 1);
ha_rollback_trans(thd, all);
DBUG_RETURN(1);
}
DEBUG_SYNC(thd, "ha_commit_trans_after_acquire_commit_lock");
}
#if defined(WITH_ARIA_STORAGE_ENGINE) && MYSQL_VERSION_ID < 100500
ha_maria::implicit_commit(thd, TRUE);
#endif
if (ha_info)
{
for (; ha_info; ha_info= ha_info_next)
@ -1807,16 +1820,6 @@ commit_one_phase_2(THD *thd, bool all, THD_TRANS *trans, bool is_real_trans,
#endif
}
}
if (mdl_request.ticket)
{
/*
We do not always immediately release transactional locks
after ha_commit_trans() (see uses of ha_enable_transaction()),
thus we release the commit blocker lock as soon as it's
not needed.
*/
thd->mdl_context.release_lock(mdl_request.ticket);
}
/* Free resources and perform other cleanup even for 'empty' transactions. */
if (is_real_trans)