1
0
mirror of https://github.com/MariaDB/server.git synced 2025-07-27 18:02:13 +03:00

MDEV-23536 : Race condition between KILL and transaction commit

A race condition may occur between the execution of transaction commit,
and an execution of a KILL statement that would attempt to abort that
transaction.

MDEV-17092 worked around this race condition by modifying InnoDB code.
After that issue was closed, Sergey Vojtovich pointed out that this
race condition would better be fixed above the storage engine layer:

If you look carefully into the above, you can conclude that
thd->free_connection() can be called concurrently with
KILL/thd->awake(). Which is the bug. And it is partially fixed in
THD::~THD(), that is destructor waits for KILL completion:

Fix: Add necessary mutex operations to THD::free_connection()
and move WSREP specific code also there. This ensures that no
one is using THD while we do free_connection(). These mutexes
will also ensures that there can't be concurrent KILL/THD::awake().

innobase_kill_query
  We can now remove usage of trx_sys_mutex introduced on MDEV-17092.

trx_t::free()
  Poison trx->state and trx->mysql_thd

This patch is validated with an RQG run similar to the one that
reproduced MDEV-17092.
This commit is contained in:
Jan Lindström
2020-12-17 14:20:23 +02:00
parent 18254c18d9
commit 775fccea0c
5 changed files with 41 additions and 39 deletions

View File

@ -1357,12 +1357,15 @@ void THD::change_user(void)
/* Do operations that may take a long time */
void THD::cleanup(void)
void THD::cleanup(bool have_mutex)
{
DBUG_ENTER("THD::cleanup");
DBUG_ASSERT(cleanup_done == 0);
set_killed(KILL_CONNECTION);
if (have_mutex)
set_killed_no_mutex(KILL_CONNECTION,0,0);
else
set_killed(KILL_CONNECTION);
#ifdef ENABLE_WHEN_BINLOG_WILL_BE_ABLE_TO_PREPARE
if (transaction.xid_state.xa_state == XA_PREPARED)
{
@ -1437,6 +1440,28 @@ void THD::cleanup(void)
void THD::free_connection()
{
DBUG_ASSERT(free_connection_done == 0);
/* Check that we have already called thd->unlink() */
DBUG_ASSERT(prev == 0 && next == 0);
/*
Other threads may have a lock on THD::LOCK_thd_data or
THD::LOCK_thd_kill to ensure that this THD is not deleted
while they access it. The following mutex_lock ensures
that no one else is using this THD and it's now safe to
continue.
For example consider KILL-statement execution on
sql_parse.cc kill_one_thread() that will use
THD::LOCK_thd_data to protect victim thread during
THD::awake().
*/
mysql_mutex_lock(&LOCK_thd_data);
mysql_mutex_lock(&LOCK_thd_kill);
#ifdef WITH_WSREP
delete wsrep_rgi;
wsrep_rgi= 0;
#endif /* WITH_WSREP */
my_free(db);
db= NULL;
#ifndef EMBEDDED_LIBRARY
@ -1445,8 +1470,8 @@ void THD::free_connection()
net.vio= 0;
net_end(&net);
#endif
if (!cleanup_done)
cleanup();
if (!cleanup_done)
cleanup(true); // We have locked THD::LOCK_thd_kill
ha_close_connection(this);
plugin_thdvar_cleanup(this);
mysql_audit_free_thd(this);
@ -1457,6 +1482,8 @@ void THD::free_connection()
#if defined(ENABLED_PROFILING)
profiling.restart(); // Reset profiling
#endif
mysql_mutex_unlock(&LOCK_thd_kill);
mysql_mutex_unlock(&LOCK_thd_data);
}
/*
@ -1512,9 +1539,6 @@ THD::~THD()
mysql_mutex_lock(&LOCK_thd_data);
mysql_mutex_unlock(&LOCK_thd_data);
#ifdef WITH_WSREP
delete wsrep_rgi;
#endif
if (!free_connection_done)
free_connection();