1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-08-01 06:46:55 +03:00

MCOL-1523 - enhance to failover module when ddl/dmlproc crashes

This commit is contained in:
David Hill
2018-07-26 14:30:14 -05:00
parent f9f6dc43dd
commit 926314bf17
5 changed files with 65 additions and 22 deletions

View File

@ -49,6 +49,15 @@ using namespace logging;
#include "clientrotator.h" #include "clientrotator.h"
//#include "idb_mysql.h"
/** Debug macro */
#ifdef INFINIDB_DEBUG
#define IDEBUG(x) {x;}
#else
#define IDEBUG(x) {}
#endif
#define LOG_TO_CERR #define LOG_TO_CERR
namespace execplan namespace execplan
@ -60,13 +69,36 @@ const uint64_t LOCAL_EXEMGR_PORT = 8601;
string ClientRotator::getModule() string ClientRotator::getModule()
{ {
string installDir = startup::StartUp::installDir(); string installDir = startup::StartUp::installDir();
//Log to debug.log
LoggingID logid( 24, 0, 0);
string fileName = installDir + "/local/module"; string fileName = installDir + "/local/module";
string module; string module;
ifstream moduleFile (fileName.c_str()); ifstream moduleFile (fileName.c_str());
if (moduleFile.is_open()) if (moduleFile.is_open())
{
getline (moduleFile, module); getline (moduleFile, module);
}
else
{
{
logging::Message::Args args1;
logging::Message msg(1);
std::ostringstream oss;
oss << "ClientRotator::getModule open status2 =" << strerror(errno);
args1.add(oss.str());
args1.add(fileName);
msg.format( args1 );
Logger logger(logid.fSubsysID);
logger.logMessage(LOG_TYPE_DEBUG, msg, logid);
}
}
moduleFile.close(); moduleFile.close();
return module; return module;
} }

View File

@ -827,7 +827,7 @@ void processMSG(messageqcpp::IOSocket* cfIos)
} }
if (opState == oam::MAN_OFFLINE || opState == oam::MAN_DISABLED if (opState == oam::MAN_OFFLINE || opState == oam::MAN_DISABLED
|| opState == oam::AUTO_DISABLED ) { || opState == oam::AUTO_DISABLED || opState == oam::AUTO_OFFLINE) {
oam.dbrmctl("halt"); oam.dbrmctl("halt");
log.writeLog(__LINE__, "'dbrmctl halt' done", LOG_TYPE_DEBUG); log.writeLog(__LINE__, "'dbrmctl halt' done", LOG_TYPE_DEBUG);
@ -848,7 +848,7 @@ void processMSG(messageqcpp::IOSocket* cfIos)
} }
else else
{ {
log.writeLog(__LINE__, "ERROR: module not stopped", LOG_TYPE_ERROR); log.writeLog(__LINE__, "ERROR: module not stopped, state = " + oam.itoa(opState), LOG_TYPE_ERROR);
status = API_FAILURE; status = API_FAILURE;
break; break;
} }

View File

@ -1395,7 +1395,7 @@ static void chldHandleThread(MonitorConfig config)
{} {}
// check if process failover is needed due to process outage // check if process failover is needed due to process outage
aMonitor.checkProcessFailover((*listPtr).ProcessName); aMonitor.checkModuleFailover((*listPtr).ProcessName);
//check the db health //check the db health
if (DBFunctionalMonitorFlag == "y" ) { if (DBFunctionalMonitorFlag == "y" ) {
@ -1470,7 +1470,7 @@ static void chldHandleThread(MonitorConfig config)
(*listPtr).processID = 0; (*listPtr).processID = 0;
// check if process failover is needed due to process outage // check if process failover is needed due to process outage
aMonitor.checkProcessFailover((*listPtr).ProcessName); aMonitor.checkModuleFailover((*listPtr).ProcessName);
break; break;
} }
else else

View File

@ -1174,7 +1174,7 @@ void ProcessMonitor::processMessage(messageqcpp::ByteStream msg, messageqcpp::IO
// error in launching a process // error in launching a process
if ( requestStatus == oam::API_FAILURE && if ( requestStatus == oam::API_FAILURE &&
(*listPtr).RunType == SIMPLEX) (*listPtr).RunType == SIMPLEX)
checkProcessFailover((*listPtr).ProcessName); checkModuleFailover((*listPtr).ProcessName);
else else
break; break;
} }
@ -4625,19 +4625,19 @@ std::string ProcessMonitor::sendMsgProcMon1( std::string module, ByteStream msg,
} }
/****************************************************************************************** /******************************************************************************************
* @brief checkProcessFailover * @brief checkModuleFailover
* *
* purpose: check if process failover is needed due to a process outage * purpose: check if module failover is needed due to a process outage
* *
******************************************************************************************/ ******************************************************************************************/
void ProcessMonitor::checkProcessFailover( std::string processName) void ProcessMonitor::checkModuleFailover( std::string processName)
{ {
Oam oam; Oam oam;
//force failover on certain processes //force failover on certain processes
if ( processName == "DDLProc" || if ( processName == "DDLProc" ||
processName == "DMLProc" ) { processName == "DMLProc" ) {
log.writeLog(__LINE__, "checkProcessFailover: process failover, process outage of " + processName, LOG_TYPE_CRITICAL); log.writeLog(__LINE__, "checkModuleFailover: process failover, process outage of " + processName, LOG_TYPE_CRITICAL);
try try
{ {
@ -4656,26 +4656,37 @@ void ProcessMonitor::checkProcessFailover( std::string processName)
systemprocessstatus.processstatus[i].ProcessOpState == oam::AUTO_OFFLINE || systemprocessstatus.processstatus[i].ProcessOpState == oam::AUTO_OFFLINE ||
systemprocessstatus.processstatus[i].ProcessOpState == oam::FAILED ) { systemprocessstatus.processstatus[i].ProcessOpState == oam::FAILED ) {
// found a AVAILABLE mate, start it // found a AVAILABLE mate, start it
log.writeLog(__LINE__, "start process on module " + systemprocessstatus.processstatus[i].Module, LOG_TYPE_DEBUG); log.writeLog(__LINE__, "Change UM Master to module " + systemprocessstatus.processstatus[i].Module, LOG_TYPE_DEBUG);
log.writeLog(__LINE__, "Disable local UM module " + config.moduleName(), LOG_TYPE_DEBUG);
log.writeLog(__LINE__, "Stop local UM module " + config.moduleName(), LOG_TYPE_DEBUG);
log.writeLog(__LINE__, "Disable Local will Enable UM module " + systemprocessstatus.processstatus[i].Module, LOG_TYPE_DEBUG);
try { oam::DeviceNetworkConfig devicenetworkconfig;
oam.setSystemConfig("PrimaryUMModuleName", systemprocessstatus.processstatus[i].Module); oam::DeviceNetworkList devicenetworklist;
//distribute config file devicenetworkconfig.DeviceName = config.moduleName();
oam.distributeConfigFile("system"); devicenetworklist.push_back(devicenetworkconfig);
sleep(1);
}
catch(...) {}
try try
{ {
oam.startProcess(systemprocessstatus.processstatus[i].Module, processName, FORCEFUL, ACK_YES); oam.stopModule(devicenetworklist, oam::FORCEFUL, oam::ACK_YES);
log.writeLog(__LINE__, "success start process on module " + systemprocessstatus.processstatus[i].Module, LOG_TYPE_DEBUG); log.writeLog(__LINE__, "success stopModule on module " + config.moduleName(), LOG_TYPE_DEBUG);
try
{
oam.disableModule(devicenetworklist);
log.writeLog(__LINE__, "success disableModule on module " + config.moduleName(), LOG_TYPE_DEBUG);
} }
catch (exception& e) catch (exception& e)
{ {
log.writeLog(__LINE__, "failed start process on module " + systemprocessstatus.processstatus[i].Module, LOG_TYPE_ERROR); log.writeLog(__LINE__, "failed disableModule on module " + config.moduleName(), LOG_TYPE_ERROR);
} }
}
catch (exception& e)
{
log.writeLog(__LINE__, "failed stopModule on module " + config.moduleName(), LOG_TYPE_ERROR);
}
break; break;
} }
} }

View File

@ -487,7 +487,7 @@ public:
/** /**
*@brief check if module failover is needed due to a process outage *@brief check if module failover is needed due to a process outage
*/ */
void checkProcessFailover( std::string processName); void checkModuleFailover(std::string processName);
/** /**
*@brief run upgrade script *@brief run upgrade script