1
0
mirror of https://github.com/MariaDB/server.git synced 2025-07-29 05:21:33 +03:00

MDEV-16005 sporadic failures with galera tests MW-328B and MW-328C

These test can sporadically show mutex deadlock warnings between LOCK_wsrep_thd
and LOCK_thd_data mutexes. This means that these mutexes can be locked in opposite
order by different threads, and thus result in deadlock situation.
To fix such issue, the locking policy of these mutexes should be revised and
enforced to be uniform. However, a quick code review shows that the number of
lock/unlock operations for these mutexes combined is between 100-200, and all these
mutex invocations should be checked/fixed.

On the other hand, it turns out that LOCK_wsrep_thd is used for protecting access to
wsrep variables of THD (wsrep_conflict_state, wsrep_query_state), whereas LOCK_thd_data
protects query, db and mysys_var variables in THD. Extending LOCK_thd_data to protect
also wsrep variables looks like a viable solution, as there should not be a use case
where separate threads need simultaneous access to wsrep variables and THD data variables.

In this commit LOCK_wsrep_thd mutex is refactored to be replaced by LOCK_thd_data.
By bluntly replacing LOCK_wsrep_thd by LOCK_thd_data, will result in double locking
of LOCK_thd_data, and some adjustements have been performed to fix such situations.
This commit is contained in:
sjaakola
2018-04-24 10:26:34 +03:00
parent 82d4f08186
commit 2f0b8f3e02
14 changed files with 94 additions and 106 deletions

View File

@ -939,13 +939,13 @@ bool do_command(THD *thd)
#ifdef WITH_WSREP
if (WSREP(thd))
{
mysql_mutex_lock(&thd->LOCK_wsrep_thd);
mysql_mutex_lock(&thd->LOCK_thd_data);
thd->wsrep_query_state= QUERY_IDLE;
if (thd->wsrep_conflict_state==MUST_ABORT)
{
wsrep_client_rollback(thd);
}
mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
mysql_mutex_unlock(&thd->LOCK_thd_data);
}
#endif /* WITH_WSREP */
@ -991,15 +991,15 @@ bool do_command(THD *thd)
packet_length= my_net_read_packet(net, 1);
#ifdef WITH_WSREP
if (WSREP(thd)) {
mysql_mutex_lock(&thd->LOCK_wsrep_thd);
mysql_mutex_lock(&thd->LOCK_thd_data);
/* these THD's are aborted or are aborting during being idle */
if (thd->wsrep_conflict_state == ABORTING)
{
while (thd->wsrep_conflict_state == ABORTING) {
mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
mysql_mutex_unlock(&thd->LOCK_thd_data);
my_sleep(1000);
mysql_mutex_lock(&thd->LOCK_wsrep_thd);
mysql_mutex_lock(&thd->LOCK_thd_data);
}
thd->store_globals();
}
@ -1009,7 +1009,7 @@ bool do_command(THD *thd)
}
thd->wsrep_query_state= QUERY_EXEC;
mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
mysql_mutex_unlock(&thd->LOCK_thd_data);
}
#endif /* WITH_WSREP */
@ -1022,13 +1022,13 @@ bool do_command(THD *thd)
#ifdef WITH_WSREP
if (WSREP(thd))
{
mysql_mutex_lock(&thd->LOCK_wsrep_thd);
mysql_mutex_lock(&thd->LOCK_thd_data);
if (thd->wsrep_conflict_state == MUST_ABORT)
{
DBUG_PRINT("wsrep",("aborted for wsrep rollback: %lu", thd->real_id));
wsrep_client_rollback(thd);
}
mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
mysql_mutex_unlock(&thd->LOCK_thd_data);
}
#endif /* WITH_WSREP */
@ -1256,7 +1256,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
thd->wsrep_PA_safe= true;
}
mysql_mutex_lock(&thd->LOCK_wsrep_thd);
mysql_mutex_lock(&thd->LOCK_thd_data);
thd->wsrep_query_state= QUERY_EXEC;
if (thd->wsrep_conflict_state== RETRY_AUTOCOMMIT)
{
@ -1272,14 +1272,14 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
{
my_error(ER_LOCK_DEADLOCK, MYF(0), "wsrep aborted transaction");
WSREP_DEBUG("Deadlock error for: %s", thd->query());
mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
mysql_mutex_unlock(&thd->LOCK_thd_data);
thd->reset_killed();
thd->mysys_var->abort = 0;
thd->wsrep_conflict_state = NO_CONFLICT;
thd->wsrep_retry_counter = 0;
goto dispatch_end;
}
mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
mysql_mutex_unlock(&thd->LOCK_thd_data);
}
#endif /* WITH_WSREP */
#if defined(ENABLED_PROFILING)
@ -1934,10 +1934,10 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
DBUG_ASSERT((command != COM_QUIT && command != COM_STMT_CLOSE)
|| thd->get_stmt_da()->is_disabled());
/* wsrep BF abort in query exec phase */
mysql_mutex_lock(&thd->LOCK_wsrep_thd);
mysql_mutex_lock(&thd->LOCK_thd_data);
do_end_of_statement= thd->wsrep_conflict_state != REPLAYING &&
thd->wsrep_conflict_state != RETRY_AUTOCOMMIT;
mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
mysql_mutex_unlock(&thd->LOCK_thd_data);
}
else
do_end_of_statement= true;
@ -7192,7 +7192,7 @@ static void wsrep_mysql_parse(THD *thd, char *rawbuf, uint length,
if (WSREP(thd)) {
/* wsrep BF abort in query exec phase */
mysql_mutex_lock(&thd->LOCK_wsrep_thd);
mysql_mutex_lock(&thd->LOCK_thd_data);
if (thd->wsrep_conflict_state == MUST_ABORT) {
wsrep_client_rollback(thd);
@ -7201,8 +7201,11 @@ static void wsrep_mysql_parse(THD *thd, char *rawbuf, uint length,
if (thd->wsrep_conflict_state == MUST_REPLAY)
{
mysql_mutex_unlock(&thd->LOCK_thd_data);
if (thd->lex->explain)
delete_explain_query(thd->lex);
mysql_mutex_lock(&thd->LOCK_thd_data);
wsrep_replay_transaction(thd);
}
@ -7242,13 +7245,13 @@ static void wsrep_mysql_parse(THD *thd, char *rawbuf, uint length,
if (thd->wsrep_conflict_state != REPLAYING)
thd->wsrep_retry_counter= 0; // reset
}
mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
mysql_mutex_unlock(&thd->LOCK_thd_data);
thd->reset_killed();
}
else
{
set_if_smaller(thd->wsrep_retry_counter, 0); // reset; eventually ok
mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
mysql_mutex_unlock(&thd->LOCK_thd_data);
}
}
@ -8207,7 +8210,7 @@ kill_one_thread(THD *thd, longlong id, killed_state kill_signal, killed_type typ
if (((thd->security_ctx->master_access & SUPER_ACL) ||
thd->security_ctx->user_matches(tmp->security_ctx)) &&
!wsrep_thd_is_BF(tmp, true))
!wsrep_thd_is_BF(tmp, false))
{
tmp->awake(kill_signal);
error=0;