1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-30 19:23:07 +03:00

MCOL-1523 - enhance to failover module when ddl/dmlproc crashes

This commit is contained in:
David Hill
2018-07-26 14:30:14 -05:00
parent f9f6dc43dd
commit 926314bf17
5 changed files with 65 additions and 22 deletions

View File

@ -49,6 +49,15 @@ using namespace logging;
#include "clientrotator.h"
//#include "idb_mysql.h"
/** Debug macro */
#ifdef INFINIDB_DEBUG
#define IDEBUG(x) {x;}
#else
#define IDEBUG(x) {}
#endif
#define LOG_TO_CERR
namespace execplan
@ -60,13 +69,36 @@ const uint64_t LOCAL_EXEMGR_PORT = 8601;
string ClientRotator::getModule()
{
string installDir = startup::StartUp::installDir();
//Log to debug.log
LoggingID logid( 24, 0, 0);
string fileName = installDir + "/local/module";
string module;
ifstream moduleFile (fileName.c_str());
if (moduleFile.is_open())
{
getline (moduleFile, module);
}
else
{
{
logging::Message::Args args1;
logging::Message msg(1);
std::ostringstream oss;
oss << "ClientRotator::getModule open status2 =" << strerror(errno);
args1.add(oss.str());
args1.add(fileName);
msg.format( args1 );
Logger logger(logid.fSubsysID);
logger.logMessage(LOG_TYPE_DEBUG, msg, logid);
}
}
moduleFile.close();
return module;
}

View File

@ -827,7 +827,7 @@ void processMSG(messageqcpp::IOSocket* cfIos)
}
if (opState == oam::MAN_OFFLINE || opState == oam::MAN_DISABLED
|| opState == oam::AUTO_DISABLED ) {
|| opState == oam::AUTO_DISABLED || opState == oam::AUTO_OFFLINE) {
oam.dbrmctl("halt");
log.writeLog(__LINE__, "'dbrmctl halt' done", LOG_TYPE_DEBUG);
@ -848,7 +848,7 @@ void processMSG(messageqcpp::IOSocket* cfIos)
}
else
{
log.writeLog(__LINE__, "ERROR: module not stopped", LOG_TYPE_ERROR);
log.writeLog(__LINE__, "ERROR: module not stopped, state = " + oam.itoa(opState), LOG_TYPE_ERROR);
status = API_FAILURE;
break;
}

View File

@ -1395,7 +1395,7 @@ static void chldHandleThread(MonitorConfig config)
{}
// check if process failover is needed due to process outage
aMonitor.checkProcessFailover((*listPtr).ProcessName);
aMonitor.checkModuleFailover((*listPtr).ProcessName);
//check the db health
if (DBFunctionalMonitorFlag == "y" ) {
@ -1470,7 +1470,7 @@ static void chldHandleThread(MonitorConfig config)
(*listPtr).processID = 0;
// check if process failover is needed due to process outage
aMonitor.checkProcessFailover((*listPtr).ProcessName);
aMonitor.checkModuleFailover((*listPtr).ProcessName);
break;
}
else

View File

@ -1174,7 +1174,7 @@ void ProcessMonitor::processMessage(messageqcpp::ByteStream msg, messageqcpp::IO
// error in launching a process
if ( requestStatus == oam::API_FAILURE &&
(*listPtr).RunType == SIMPLEX)
checkProcessFailover((*listPtr).ProcessName);
checkModuleFailover((*listPtr).ProcessName);
else
break;
}
@ -4625,19 +4625,19 @@ std::string ProcessMonitor::sendMsgProcMon1( std::string module, ByteStream msg,
}
/******************************************************************************************
* @brief checkProcessFailover
* @brief checkModuleFailover
*
* purpose: check if process failover is needed due to a process outage
* purpose: check if module failover is needed due to a process outage
*
******************************************************************************************/
void ProcessMonitor::checkProcessFailover( std::string processName)
void ProcessMonitor::checkModuleFailover( std::string processName)
{
Oam oam;
//force failover on certain processes
if ( processName == "DDLProc" ||
processName == "DMLProc" ) {
log.writeLog(__LINE__, "checkProcessFailover: process failover, process outage of " + processName, LOG_TYPE_CRITICAL);
log.writeLog(__LINE__, "checkModuleFailover: process failover, process outage of " + processName, LOG_TYPE_CRITICAL);
try
{
@ -4656,26 +4656,37 @@ void ProcessMonitor::checkProcessFailover( std::string processName)
systemprocessstatus.processstatus[i].ProcessOpState == oam::AUTO_OFFLINE ||
systemprocessstatus.processstatus[i].ProcessOpState == oam::FAILED ) {
// found a AVAILABLE mate, start it
log.writeLog(__LINE__, "start process on module " + systemprocessstatus.processstatus[i].Module, LOG_TYPE_DEBUG);
log.writeLog(__LINE__, "Change UM Master to module " + systemprocessstatus.processstatus[i].Module, LOG_TYPE_DEBUG);
log.writeLog(__LINE__, "Disable local UM module " + config.moduleName(), LOG_TYPE_DEBUG);
log.writeLog(__LINE__, "Stop local UM module " + config.moduleName(), LOG_TYPE_DEBUG);
log.writeLog(__LINE__, "Disable Local will Enable UM module " + systemprocessstatus.processstatus[i].Module, LOG_TYPE_DEBUG);
try {
oam.setSystemConfig("PrimaryUMModuleName", systemprocessstatus.processstatus[i].Module);
//distribute config file
oam.distributeConfigFile("system");
sleep(1);
}
catch(...) {}
oam::DeviceNetworkConfig devicenetworkconfig;
oam::DeviceNetworkList devicenetworklist;
devicenetworkconfig.DeviceName = config.moduleName();
devicenetworklist.push_back(devicenetworkconfig);
try
{
oam.startProcess(systemprocessstatus.processstatus[i].Module, processName, FORCEFUL, ACK_YES);
log.writeLog(__LINE__, "success start process on module " + systemprocessstatus.processstatus[i].Module, LOG_TYPE_DEBUG);
oam.stopModule(devicenetworklist, oam::FORCEFUL, oam::ACK_YES);
log.writeLog(__LINE__, "success stopModule on module " + config.moduleName(), LOG_TYPE_DEBUG);
try
{
oam.disableModule(devicenetworklist);
log.writeLog(__LINE__, "success disableModule on module " + config.moduleName(), LOG_TYPE_DEBUG);
}
catch (exception& e)
{
log.writeLog(__LINE__, "failed disableModule on module " + config.moduleName(), LOG_TYPE_ERROR);
}
}
catch (exception& e)
{
log.writeLog(__LINE__, "failed start process on module " + systemprocessstatus.processstatus[i].Module, LOG_TYPE_ERROR);
log.writeLog(__LINE__, "failed stopModule on module " + config.moduleName(), LOG_TYPE_ERROR);
}
break;
}
}

View File

@ -487,7 +487,7 @@ public:
/**
*@brief check if module failover is needed due to a process outage
*/
void checkProcessFailover( std::string processName);
void checkModuleFailover(std::string processName);
/**
*@brief run upgrade script