From 53455866809feadc88b81f547b96376d15f7a038 Mon Sep 17 00:00:00 2001 From: Thayumanavar Date: Fri, 9 Nov 2012 14:54:35 +0530 Subject: [PATCH] BUG#14458232 - CRASH IN THD_IS_TRANSACTION_ACTIVE DURING THREAD POOLING STRESS TEST PROBLEM: Connection stress tests which consists of concurrent kill connections interleaved with mysql ping queries cause the mysqld server which uses thread pool scheduler to crash. FIX: Killing a connection involves shutdown and close of client socket and this can cause EPOLLHUP(or EPOLLERR) events to be to be queued and handled after disarming and cleanup of of the connection object (THD) is being done.We disarm the the connection by modifying the epoll mask to zero which ensure no events come and release the ownership of waiting thread that collect events and then do the cleanup of THD. object.As per the linux kernel epoll source code ( http://lxr.linux.no/linux+*/fs/eventpoll.c#L1771), EPOLLHUP (or EPOLLERR) can't be masked even if we set EPOLL mask to zero. So we disarm the connection and thus prevent execution of any query processing handler/queueing to client ctx. queue by removing the client fd from the epoll set via EPOLL_CTL_DEL. Also there is a race condition which involve the following threads: 1) Thread X executing KILL CONNECTION Y and is in THD::awake and using mysys_var (holding LOCK_thd_data). 2) Thread Y in tp_process_event executing and is being killed. 3) Thread Z receives KILL flag internally and possible call the tp_thd_cleanup function which set thread session variable and changing mysys_var. The fix for the above race is to set thread session variable under LOCK_thd_data. We also do not call THD::awake if we found the thread in the thread list that is to be killed but it's KILL_CONNECTION flag set thus avoiding any possible concurrent cleanup. This patch is approved by Mikael Ronstrom via email review. --- include/mysql/thread_pool_priv.h | 1 + sql/sql_class.cc | 12 ++++++++++++ sql/sql_parse.cc | 10 ++++++++-- 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/include/mysql/thread_pool_priv.h b/include/mysql/thread_pool_priv.h index babe0ab6c08..1368c8c52be 100644 --- a/include/mysql/thread_pool_priv.h +++ b/include/mysql/thread_pool_priv.h @@ -61,6 +61,7 @@ uint thd_get_net_read_write(THD *thd); void thd_set_mysys_var(THD *thd, st_my_thread_var *mysys_var); ulong thd_get_net_wait_timeout(THD *thd); my_socket thd_get_fd(THD *thd); +int thd_store_globals(THD* thd); THD *first_global_thread(); THD *next_global_thread(THD *thd); diff --git a/sql/sql_class.cc b/sql/sql_class.cc index e32844d06ea..9b0a76bf749 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -468,6 +468,18 @@ my_socket thd_get_fd(THD *thd) return thd->net.vio->sd; } +/** + Set thread specific environment required for thd cleanup in thread pool. + + @param thd THD object + + @retval 1 if thread-specific enviroment could be set else 0 +*/ +int thd_store_globals(THD* thd) +{ + return thd->store_globals(); +} + /** Get thread attributes for connection threads diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index ea07bfce0cb..7bbcff4bc2b 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -6471,8 +6471,14 @@ uint kill_one_thread(THD *thd, ulong id, bool only_kill_query) if ((thd->security_ctx->master_access & SUPER_ACL) || thd->security_ctx->user_matches(tmp->security_ctx)) { - tmp->awake(only_kill_query ? THD::KILL_QUERY : THD::KILL_CONNECTION); - error=0; + /* process the kill only if thread is not already undergoing any kill + connection. + */ + if (tmp->killed != THD::KILL_CONNECTION) + { + tmp->awake(only_kill_query ? THD::KILL_QUERY : THD::KILL_CONNECTION); + } + error= 0; } else error=ER_KILL_DENIED_ERROR;