You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-07-30 19:23:07 +03:00
MCOL-1523 - enhance to failover module when ddl/dmlproc crashes
This commit is contained in:
@ -49,6 +49,15 @@ using namespace logging;
|
||||
|
||||
#include "clientrotator.h"
|
||||
|
||||
//#include "idb_mysql.h"
|
||||
|
||||
/** Debug macro */
|
||||
#ifdef INFINIDB_DEBUG
|
||||
#define IDEBUG(x) {x;}
|
||||
#else
|
||||
#define IDEBUG(x) {}
|
||||
#endif
|
||||
|
||||
#define LOG_TO_CERR
|
||||
|
||||
namespace execplan
|
||||
@ -60,13 +69,36 @@ const uint64_t LOCAL_EXEMGR_PORT = 8601;
|
||||
string ClientRotator::getModule()
|
||||
{
|
||||
string installDir = startup::StartUp::installDir();
|
||||
|
||||
//Log to debug.log
|
||||
LoggingID logid( 24, 0, 0);
|
||||
|
||||
string fileName = installDir + "/local/module";
|
||||
|
||||
string module;
|
||||
ifstream moduleFile (fileName.c_str());
|
||||
|
||||
if (moduleFile.is_open())
|
||||
{
|
||||
getline (moduleFile, module);
|
||||
}
|
||||
else
|
||||
{
|
||||
{
|
||||
logging::Message::Args args1;
|
||||
logging::Message msg(1);
|
||||
std::ostringstream oss;
|
||||
oss << "ClientRotator::getModule open status2 =" << strerror(errno);
|
||||
args1.add(oss.str());
|
||||
args1.add(fileName);
|
||||
msg.format( args1 );
|
||||
Logger logger(logid.fSubsysID);
|
||||
logger.logMessage(LOG_TYPE_DEBUG, msg, logid);
|
||||
}
|
||||
}
|
||||
|
||||
moduleFile.close();
|
||||
|
||||
return module;
|
||||
}
|
||||
|
||||
|
@ -827,7 +827,7 @@ void processMSG(messageqcpp::IOSocket* cfIos)
|
||||
}
|
||||
|
||||
if (opState == oam::MAN_OFFLINE || opState == oam::MAN_DISABLED
|
||||
|| opState == oam::AUTO_DISABLED ) {
|
||||
|| opState == oam::AUTO_DISABLED || opState == oam::AUTO_OFFLINE) {
|
||||
|
||||
oam.dbrmctl("halt");
|
||||
log.writeLog(__LINE__, "'dbrmctl halt' done", LOG_TYPE_DEBUG);
|
||||
@ -848,7 +848,7 @@ void processMSG(messageqcpp::IOSocket* cfIos)
|
||||
}
|
||||
else
|
||||
{
|
||||
log.writeLog(__LINE__, "ERROR: module not stopped", LOG_TYPE_ERROR);
|
||||
log.writeLog(__LINE__, "ERROR: module not stopped, state = " + oam.itoa(opState), LOG_TYPE_ERROR);
|
||||
status = API_FAILURE;
|
||||
break;
|
||||
}
|
||||
|
@ -1395,7 +1395,7 @@ static void chldHandleThread(MonitorConfig config)
|
||||
{}
|
||||
|
||||
// check if process failover is needed due to process outage
|
||||
aMonitor.checkProcessFailover((*listPtr).ProcessName);
|
||||
aMonitor.checkModuleFailover((*listPtr).ProcessName);
|
||||
|
||||
//check the db health
|
||||
if (DBFunctionalMonitorFlag == "y" ) {
|
||||
@ -1470,7 +1470,7 @@ static void chldHandleThread(MonitorConfig config)
|
||||
(*listPtr).processID = 0;
|
||||
|
||||
// check if process failover is needed due to process outage
|
||||
aMonitor.checkProcessFailover((*listPtr).ProcessName);
|
||||
aMonitor.checkModuleFailover((*listPtr).ProcessName);
|
||||
break;
|
||||
}
|
||||
else
|
||||
|
@ -1174,7 +1174,7 @@ void ProcessMonitor::processMessage(messageqcpp::ByteStream msg, messageqcpp::IO
|
||||
// error in launching a process
|
||||
if ( requestStatus == oam::API_FAILURE &&
|
||||
(*listPtr).RunType == SIMPLEX)
|
||||
checkProcessFailover((*listPtr).ProcessName);
|
||||
checkModuleFailover((*listPtr).ProcessName);
|
||||
else
|
||||
break;
|
||||
}
|
||||
@ -4625,19 +4625,19 @@ std::string ProcessMonitor::sendMsgProcMon1( std::string module, ByteStream msg,
|
||||
}
|
||||
|
||||
/******************************************************************************************
|
||||
* @brief checkProcessFailover
|
||||
* @brief checkModuleFailover
|
||||
*
|
||||
* purpose: check if process failover is needed due to a process outage
|
||||
* purpose: check if module failover is needed due to a process outage
|
||||
*
|
||||
******************************************************************************************/
|
||||
void ProcessMonitor::checkProcessFailover( std::string processName)
|
||||
void ProcessMonitor::checkModuleFailover( std::string processName)
|
||||
{
|
||||
Oam oam;
|
||||
|
||||
//force failover on certain processes
|
||||
if ( processName == "DDLProc" ||
|
||||
processName == "DMLProc" ) {
|
||||
log.writeLog(__LINE__, "checkProcessFailover: process failover, process outage of " + processName, LOG_TYPE_CRITICAL);
|
||||
log.writeLog(__LINE__, "checkModuleFailover: process failover, process outage of " + processName, LOG_TYPE_CRITICAL);
|
||||
|
||||
try
|
||||
{
|
||||
@ -4656,26 +4656,37 @@ void ProcessMonitor::checkProcessFailover( std::string processName)
|
||||
systemprocessstatus.processstatus[i].ProcessOpState == oam::AUTO_OFFLINE ||
|
||||
systemprocessstatus.processstatus[i].ProcessOpState == oam::FAILED ) {
|
||||
// found a AVAILABLE mate, start it
|
||||
log.writeLog(__LINE__, "start process on module " + systemprocessstatus.processstatus[i].Module, LOG_TYPE_DEBUG);
|
||||
log.writeLog(__LINE__, "Change UM Master to module " + systemprocessstatus.processstatus[i].Module, LOG_TYPE_DEBUG);
|
||||
log.writeLog(__LINE__, "Disable local UM module " + config.moduleName(), LOG_TYPE_DEBUG);
|
||||
log.writeLog(__LINE__, "Stop local UM module " + config.moduleName(), LOG_TYPE_DEBUG);
|
||||
log.writeLog(__LINE__, "Disable Local will Enable UM module " + systemprocessstatus.processstatus[i].Module, LOG_TYPE_DEBUG);
|
||||
|
||||
try {
|
||||
oam.setSystemConfig("PrimaryUMModuleName", systemprocessstatus.processstatus[i].Module);
|
||||
oam::DeviceNetworkConfig devicenetworkconfig;
|
||||
oam::DeviceNetworkList devicenetworklist;
|
||||
|
||||
//distribute config file
|
||||
oam.distributeConfigFile("system");
|
||||
sleep(1);
|
||||
}
|
||||
catch(...) {}
|
||||
devicenetworkconfig.DeviceName = config.moduleName();
|
||||
devicenetworklist.push_back(devicenetworkconfig);
|
||||
|
||||
try
|
||||
{
|
||||
oam.startProcess(systemprocessstatus.processstatus[i].Module, processName, FORCEFUL, ACK_YES);
|
||||
log.writeLog(__LINE__, "success start process on module " + systemprocessstatus.processstatus[i].Module, LOG_TYPE_DEBUG);
|
||||
oam.stopModule(devicenetworklist, oam::FORCEFUL, oam::ACK_YES);
|
||||
log.writeLog(__LINE__, "success stopModule on module " + config.moduleName(), LOG_TYPE_DEBUG);
|
||||
|
||||
try
|
||||
{
|
||||
oam.disableModule(devicenetworklist);
|
||||
log.writeLog(__LINE__, "success disableModule on module " + config.moduleName(), LOG_TYPE_DEBUG);
|
||||
}
|
||||
catch (exception& e)
|
||||
{
|
||||
log.writeLog(__LINE__, "failed disableModule on module " + config.moduleName(), LOG_TYPE_ERROR);
|
||||
}
|
||||
}
|
||||
catch (exception& e)
|
||||
{
|
||||
log.writeLog(__LINE__, "failed start process on module " + systemprocessstatus.processstatus[i].Module, LOG_TYPE_ERROR);
|
||||
log.writeLog(__LINE__, "failed stopModule on module " + config.moduleName(), LOG_TYPE_ERROR);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -487,7 +487,7 @@ public:
|
||||
/**
|
||||
*@brief check if module failover is needed due to a process outage
|
||||
*/
|
||||
void checkProcessFailover( std::string processName);
|
||||
void checkModuleFailover(std::string processName);
|
||||
|
||||
/**
|
||||
*@brief run upgrade script
|
||||
|
Reference in New Issue
Block a user