From 230506b99635c626f3846c6a2f424426e13f8faa Mon Sep 17 00:00:00 2001 From: Serguey Zefirov Date: Fri, 28 Feb 2025 14:05:16 +0300 Subject: [PATCH] fix(MCOL-5396): Fix possible infinite loop in plugin--PrimProc communication If you manage to shut down PrimProc just before plugin is trying to send Calpont Select Execution Plan to PrimProc, you now get a nice error message about PrimProc being down instead of endless logs of failed reconnection attempts. --- dbcon/mysql/ha_mcs_impl.cpp | 54 ++++++++++++++++++++++++++++++++++--- 1 file changed, 51 insertions(+), 3 deletions(-) diff --git a/dbcon/mysql/ha_mcs_impl.cpp b/dbcon/mysql/ha_mcs_impl.cpp index 45db727b9..21adbb281 100644 --- a/dbcon/mysql/ha_mcs_impl.cpp +++ b/dbcon/mysql/ha_mcs_impl.cpp @@ -19,6 +19,8 @@ #define PREFER_MY_CONFIG_H #include #include +#include +#include #include #include #include @@ -130,6 +132,7 @@ using namespace funcexp; #include "ha_mcs_logging.h" #include "ha_subquery.h" + namespace cal_impl_if { extern bool nonConstFunc(Item_func* ifp); @@ -2350,8 +2353,18 @@ int ha_mcs::impl_rnd_init(TABLE* table, const std::vector& condStack) ByteStream msg; ByteStream emsgBs; + int ntries = 10; + + // XXX: MCOL-5396: unable to reach this code. while (true) { + string emsg; + if (ntries < 0) + { + emsg = "Lost connection to ExeMgr. Please contact your administrator"; + setError(thd, ER_INTERNAL_ERROR, emsg); + return ER_INTERNAL_ERROR; + } try { ByteStream::quadbyte qb = 4; @@ -2369,7 +2382,6 @@ int ha_mcs::impl_rnd_init(TABLE* table, const std::vector& condStack) emsgBs.restart(); msg = hndl->exeMgr->read(); emsgBs = hndl->exeMgr->read(); - string emsg; if (msg.length() == 0 || emsgBs.length() == 0) { @@ -2435,6 +2447,10 @@ int ha_mcs::impl_rnd_init(TABLE* table, const std::vector& condStack) ti.conn_hndl = hndl; + using namespace std::chrono_literals; + std::this_thread::sleep_for(100ms); + ntries --; + try { hndl->connect(); @@ -4231,8 +4247,18 @@ int ha_mcs_impl_group_by_init(mcs_handler_info* handler_info, TABLE* table) ByteStream msg; ByteStream emsgBs; + int ntries = 10; + + // XXX: MCOL-5396: unable to reach this code. while (true) { + string emsg; + if (ntries < 0) + { + emsg = "Lost connection to ExeMgr. Please contact your administrator"; + setError(thd, ER_INTERNAL_ERROR, emsg); + return ER_INTERNAL_ERROR; + } try { ByteStream::quadbyte qb = 4; @@ -4250,7 +4276,6 @@ int ha_mcs_impl_group_by_init(mcs_handler_info* handler_info, TABLE* table) emsgBs.restart(); msg = hndl->exeMgr->read(); emsgBs = hndl->exeMgr->read(); - string emsg; if (msg.length() == 0 || emsgBs.length() == 0) { @@ -4317,6 +4342,11 @@ int ha_mcs_impl_group_by_init(mcs_handler_info* handler_info, TABLE* table) ci->cal_conn_hndl = hndl; ci->cal_conn_hndl_st.pop(); ci->cal_conn_hndl_st.push(ci->cal_conn_hndl); + + using namespace std::chrono_literals; + std::this_thread::sleep_for(100ms); + ntries --; + try { hndl->connect(); @@ -4929,8 +4959,23 @@ int ha_mcs_impl_pushdown_init(mcs_handler_info* handler_info, TABLE* table, bool ByteStream msg; ByteStream emsgBs; + int ntries = 10; + + // The issue is MCOL-5396. + // The delay below is used to trigger old infinite loop condition and + // prove that mitigation works. While it looks like unused code, it is + // important enough to have it just in case. + // using namespace std::chrono_literals; + // std::this_thread::sleep_for(10000ms); while (true) { + string emsg; + if (ntries < 0) + { + emsg = "Lost connection to ExeMgr. Please contact your administrator"; + setError(thd, ER_INTERNAL_ERROR, emsg); + return ER_INTERNAL_ERROR; + } try { ByteStream::quadbyte qb = 4; @@ -4948,7 +4993,6 @@ int ha_mcs_impl_pushdown_init(mcs_handler_info* handler_info, TABLE* table, bool emsgBs.restart(); msg = hndl->exeMgr->read(); emsgBs = hndl->exeMgr->read(); - string emsg; if (msg.length() == 0 || emsgBs.length() == 0) { @@ -5016,6 +5060,10 @@ int ha_mcs_impl_pushdown_init(mcs_handler_info* handler_info, TABLE* table, bool ci->cal_conn_hndl = hndl; + using namespace std::chrono_literals; + std::this_thread::sleep_for(100ms); + ntries --; + try { hndl->connect();