1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-08-26 05:02:32 +03:00

MCOL-1523 - enhance to failover module when ddl/dmlproc crashes

This commit is contained in:
David Hill
2018-07-26 14:30:14 -05:00
parent f9f6dc43dd
commit 926314bf17
5 changed files with 65 additions and 22 deletions

View File

@@ -1174,7 +1174,7 @@ void ProcessMonitor::processMessage(messageqcpp::ByteStream msg, messageqcpp::IO
// error in launching a process
if ( requestStatus == oam::API_FAILURE &&
(*listPtr).RunType == SIMPLEX)
checkProcessFailover((*listPtr).ProcessName);
checkModuleFailover((*listPtr).ProcessName);
else
break;
}
@@ -4625,19 +4625,19 @@ std::string ProcessMonitor::sendMsgProcMon1( std::string module, ByteStream msg,
}
/******************************************************************************************
* @brief checkProcessFailover
* @brief checkModuleFailover
*
* purpose: check if process failover is needed due to a process outage
* purpose: check if module failover is needed due to a process outage
*
******************************************************************************************/
void ProcessMonitor::checkProcessFailover( std::string processName)
void ProcessMonitor::checkModuleFailover( std::string processName)
{
Oam oam;
//force failover on certain processes
if ( processName == "DDLProc" ||
processName == "DMLProc" ) {
log.writeLog(__LINE__, "checkProcessFailover: process failover, process outage of " + processName, LOG_TYPE_CRITICAL);
log.writeLog(__LINE__, "checkModuleFailover: process failover, process outage of " + processName, LOG_TYPE_CRITICAL);
try
{
@@ -4656,26 +4656,37 @@ void ProcessMonitor::checkProcessFailover( std::string processName)
systemprocessstatus.processstatus[i].ProcessOpState == oam::AUTO_OFFLINE ||
systemprocessstatus.processstatus[i].ProcessOpState == oam::FAILED ) {
// found a AVAILABLE mate, start it
log.writeLog(__LINE__, "start process on module " + systemprocessstatus.processstatus[i].Module, LOG_TYPE_DEBUG);
log.writeLog(__LINE__, "Change UM Master to module " + systemprocessstatus.processstatus[i].Module, LOG_TYPE_DEBUG);
log.writeLog(__LINE__, "Disable local UM module " + config.moduleName(), LOG_TYPE_DEBUG);
log.writeLog(__LINE__, "Stop local UM module " + config.moduleName(), LOG_TYPE_DEBUG);
log.writeLog(__LINE__, "Disable Local will Enable UM module " + systemprocessstatus.processstatus[i].Module, LOG_TYPE_DEBUG);
try {
oam.setSystemConfig("PrimaryUMModuleName", systemprocessstatus.processstatus[i].Module);
//distribute config file
oam.distributeConfigFile("system");
sleep(1);
}
catch(...) {}
oam::DeviceNetworkConfig devicenetworkconfig;
oam::DeviceNetworkList devicenetworklist;
devicenetworkconfig.DeviceName = config.moduleName();
devicenetworklist.push_back(devicenetworkconfig);
try
{
oam.startProcess(systemprocessstatus.processstatus[i].Module, processName, FORCEFUL, ACK_YES);
log.writeLog(__LINE__, "success start process on module " + systemprocessstatus.processstatus[i].Module, LOG_TYPE_DEBUG);
oam.stopModule(devicenetworklist, oam::FORCEFUL, oam::ACK_YES);
log.writeLog(__LINE__, "success stopModule on module " + config.moduleName(), LOG_TYPE_DEBUG);
try
{
oam.disableModule(devicenetworklist);
log.writeLog(__LINE__, "success disableModule on module " + config.moduleName(), LOG_TYPE_DEBUG);
}
catch (exception& e)
{
log.writeLog(__LINE__, "failed disableModule on module " + config.moduleName(), LOG_TYPE_ERROR);
}
}
catch (exception& e)
{
log.writeLog(__LINE__, "failed start process on module " + systemprocessstatus.processstatus[i].Module, LOG_TYPE_ERROR);
log.writeLog(__LINE__, "failed stopModule on module " + config.moduleName(), LOG_TYPE_ERROR);
}
break;
}
}