You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-10-21 11:33:43 +03:00
Merge branch 'develop-1.1' into 1.1-merge-up-20180817
This commit is contained in:
@@ -340,8 +340,9 @@ int main(int argc, char** argv)
|
||||
{
|
||||
log.writeLog(__LINE__, "Standby PM not responding, infinidb shutting down", LOG_TYPE_CRITICAL);
|
||||
//Set the alarm
|
||||
aMonitor.sendAlarm(config.moduleName().c_str(), STARTUP_DIAGNOTICS_FAILURE, SET);
|
||||
sleep (1);
|
||||
// aMonitor.sendAlarm(config.moduleName().c_str(), STARTUP_DIAGNOTICS_FAILURE, SET);
|
||||
// sleep (1);
|
||||
|
||||
string cmd = startup::StartUp::installDir() + "/bin/infinidb stop > /dev/null 2>&1";
|
||||
system(cmd.c_str());
|
||||
}
|
||||
@@ -369,8 +370,7 @@ int main(int argc, char** argv)
|
||||
sysConfig->setConfig("ProcMgr_Alarm", "IPAddr", IPaddr);
|
||||
|
||||
log.writeLog(__LINE__, "set ProcMgr IPaddr to Old Standby Module: " + IPaddr, LOG_TYPE_DEBUG);
|
||||
|
||||
//update Calpont Config table
|
||||
//update MariaDB ColumnStore Config table
|
||||
try
|
||||
{
|
||||
sysConfig->write();
|
||||
@@ -554,8 +554,8 @@ int main(int argc, char** argv)
|
||||
{
|
||||
log.writeLog(__LINE__, "Check DB mounts failed, shutting down", LOG_TYPE_CRITICAL);
|
||||
//Set the alarm
|
||||
aMonitor.sendAlarm(config.moduleName().c_str(), STARTUP_DIAGNOTICS_FAILURE, SET);
|
||||
sleep (1);
|
||||
// aMonitor.sendAlarm(config.moduleName().c_str(), STARTUP_DIAGNOTICS_FAILURE, SET);
|
||||
// sleep (1);
|
||||
string cmd = startup::StartUp::installDir() + "/bin/columnstore stop > /dev/null 2>&1";
|
||||
system(cmd.c_str());
|
||||
}
|
||||
@@ -1463,7 +1463,7 @@ static void chldHandleThread(MonitorConfig config)
|
||||
(*listPtr).processID != 0 ) ||
|
||||
( (*listPtr).state == oam::ACTIVE && (*listPtr).processID == 0 ) )
|
||||
{
|
||||
log.writeLog(__LINE__, "*****Calpont Process Restarting: " + (*listPtr).ProcessName + ", old PID = " + oam.itoa((*listPtr).processID), LOG_TYPE_CRITICAL);
|
||||
log.writeLog(__LINE__, "*****MariaDB ColumnStore Process Restarting: " + (*listPtr).ProcessName + ", old PID = " + oam.itoa((*listPtr).processID), LOG_TYPE_CRITICAL);
|
||||
|
||||
if ( (*listPtr).dieCounter >= processRestartCount ||
|
||||
processRestartCount == 0)
|
||||
@@ -1523,7 +1523,7 @@ static void chldHandleThread(MonitorConfig config)
|
||||
{}
|
||||
|
||||
// check if process failover is needed due to process outage
|
||||
aMonitor.checkProcessFailover((*listPtr).ProcessName);
|
||||
aMonitor.checkModuleFailover((*listPtr).ProcessName);
|
||||
|
||||
//check the db health
|
||||
if (DBFunctionalMonitorFlag == "y" )
|
||||
@@ -1605,7 +1605,7 @@ static void chldHandleThread(MonitorConfig config)
|
||||
(*listPtr).processID = 0;
|
||||
|
||||
// check if process failover is needed due to process outage
|
||||
aMonitor.checkProcessFailover((*listPtr).ProcessName);
|
||||
aMonitor.checkModuleFailover((*listPtr).ProcessName);
|
||||
break;
|
||||
}
|
||||
else
|
||||
@@ -1681,7 +1681,7 @@ static void chldHandleThread(MonitorConfig config)
|
||||
}
|
||||
|
||||
//Log this event
|
||||
log.writeLog(__LINE__, "Calpont Process " + (*listPtr).ProcessName + restartStatus, LOG_TYPE_INFO);
|
||||
log.writeLog(__LINE__, "MariaDB ColumnStore Process " + (*listPtr).ProcessName + restartStatus, LOG_TYPE_INFO);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2707,6 +2707,9 @@ void processStatusMSG(messageqcpp::IOSocket* cfIos)
|
||||
memcpy(fShmSystemStatus[0].StateChangeDate, oam.getCurrentTime().c_str(), DATESIZE);
|
||||
log.writeLog(__LINE__, "statusControl: REQUEST RECEIVED: Set System State = " + oamState[state], LOG_TYPE_DEBUG);
|
||||
}
|
||||
|
||||
BRM::DBRM dbrm;
|
||||
dbrm.setSystemQueryReady(true);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
@@ -1261,7 +1261,7 @@ void ProcessMonitor::processMessage(messageqcpp::ByteStream msg, messageqcpp::IO
|
||||
// error in launching a process
|
||||
if ( requestStatus == oam::API_FAILURE &&
|
||||
(*listPtr).RunType == SIMPLEX)
|
||||
checkProcessFailover((*listPtr).ProcessName);
|
||||
checkModuleFailover((*listPtr).ProcessName);
|
||||
else
|
||||
break;
|
||||
}
|
||||
@@ -4963,20 +4963,19 @@ std::string ProcessMonitor::sendMsgProcMon1( std::string module, ByteStream msg,
|
||||
}
|
||||
|
||||
/******************************************************************************************
|
||||
* @brief checkProcessFailover
|
||||
* @brief checkModuleFailover
|
||||
*
|
||||
* purpose: check if process failover is needed due to a process outage
|
||||
* purpose: check if module failover is needed due to a process outage
|
||||
*
|
||||
******************************************************************************************/
|
||||
void ProcessMonitor::checkProcessFailover( std::string processName)
|
||||
void ProcessMonitor::checkModuleFailover( std::string processName)
|
||||
{
|
||||
Oam oam;
|
||||
|
||||
//force failover on certain processes
|
||||
if ( processName == "DDLProc" ||
|
||||
processName == "DMLProc" )
|
||||
{
|
||||
log.writeLog(__LINE__, "checkProcessFailover: process failover, process outage of " + processName, LOG_TYPE_CRITICAL);
|
||||
processName == "DMLProc" ) {
|
||||
log.writeLog(__LINE__, "checkModuleFailover: process failover, process outage of " + processName, LOG_TYPE_CRITICAL);
|
||||
|
||||
try
|
||||
{
|
||||
@@ -4999,27 +4998,36 @@ void ProcessMonitor::checkProcessFailover( std::string processName)
|
||||
systemprocessstatus.processstatus[i].ProcessOpState == oam::FAILED )
|
||||
{
|
||||
// found a AVAILABLE mate, start it
|
||||
log.writeLog(__LINE__, "start process on module " + systemprocessstatus.processstatus[i].Module, LOG_TYPE_DEBUG);
|
||||
log.writeLog(__LINE__, "Change UM Master to module " + systemprocessstatus.processstatus[i].Module, LOG_TYPE_DEBUG);
|
||||
log.writeLog(__LINE__, "Disable local UM module " + config.moduleName(), LOG_TYPE_DEBUG);
|
||||
log.writeLog(__LINE__, "Stop local UM module " + config.moduleName(), LOG_TYPE_DEBUG);
|
||||
log.writeLog(__LINE__, "Disable Local will Enable UM module " + systemprocessstatus.processstatus[i].Module, LOG_TYPE_DEBUG);
|
||||
|
||||
oam::DeviceNetworkConfig devicenetworkconfig;
|
||||
oam::DeviceNetworkList devicenetworklist;
|
||||
|
||||
devicenetworkconfig.DeviceName = config.moduleName();
|
||||
devicenetworklist.push_back(devicenetworkconfig);
|
||||
|
||||
try
|
||||
{
|
||||
oam.setSystemConfig("PrimaryUMModuleName", systemprocessstatus.processstatus[i].Module);
|
||||
oam.stopModule(devicenetworklist, oam::FORCEFUL, oam::ACK_YES);
|
||||
log.writeLog(__LINE__, "success stopModule on module " + config.moduleName(), LOG_TYPE_DEBUG);
|
||||
|
||||
//distribute config file
|
||||
oam.distributeConfigFile("system");
|
||||
sleep(1);
|
||||
}
|
||||
catch (...) {}
|
||||
|
||||
try
|
||||
{
|
||||
oam.startProcess(systemprocessstatus.processstatus[i].Module, processName, FORCEFUL, ACK_YES);
|
||||
log.writeLog(__LINE__, "success start process on module " + systemprocessstatus.processstatus[i].Module, LOG_TYPE_DEBUG);
|
||||
}
|
||||
catch (exception& e)
|
||||
{
|
||||
log.writeLog(__LINE__, "failed start process on module " + systemprocessstatus.processstatus[i].Module, LOG_TYPE_ERROR);
|
||||
try
|
||||
{
|
||||
oam.disableModule(devicenetworklist);
|
||||
log.writeLog(__LINE__, "success disableModule on module " + config.moduleName(), LOG_TYPE_DEBUG);
|
||||
}
|
||||
catch (exception& e)
|
||||
{
|
||||
log.writeLog(__LINE__, "failed disableModule on module " + config.moduleName(), LOG_TYPE_ERROR);
|
||||
}
|
||||
}
|
||||
catch (exception& e)
|
||||
{
|
||||
log.writeLog(__LINE__, "failed stopModule on module " + config.moduleName(), LOG_TYPE_ERROR);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
@@ -5036,9 +5044,6 @@ void ProcessMonitor::checkProcessFailover( std::string processName)
|
||||
// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: Caught unknown exception!", LOG_TYPE_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
|
||||
}
|
||||
|
||||
/******************************************************************************************
|
||||
@@ -6583,6 +6588,8 @@ int ProcessMonitor::glusterAssign(std::string dbrootID)
|
||||
|
||||
if ( WEXITSTATUS(ret) != 0 )
|
||||
{
|
||||
log.writeLog(__LINE__, "glusterAssign mount failure: dbroot: " + dbrootID + " error: " + oam.itoa(WEXITSTATUS(ret)), LOG_TYPE_ERROR);
|
||||
|
||||
ifstream in("/tmp/glusterAssign.txt");
|
||||
in.seekg(0, std::ios::end);
|
||||
int size = in.tellg();
|
||||
@@ -6630,6 +6637,8 @@ int ProcessMonitor::glusterUnassign(std::string dbrootID)
|
||||
|
||||
if ( WEXITSTATUS(ret) != 0 )
|
||||
{
|
||||
log.writeLog(__LINE__, "glusterUnassign mount failure: dbroot: " + dbrootID + " error: " + oam.itoa(WEXITSTATUS(ret)), LOG_TYPE_ERROR);
|
||||
|
||||
ifstream in("/tmp/glusterUnassign.txt");
|
||||
in.seekg(0, std::ios::end);
|
||||
int size = in.tellg();
|
||||
|
@@ -488,7 +488,7 @@ public:
|
||||
/**
|
||||
*@brief check if module failover is needed due to a process outage
|
||||
*/
|
||||
void checkProcessFailover( std::string processName);
|
||||
void checkModuleFailover(std::string processName);
|
||||
|
||||
/**
|
||||
*@brief run upgrade script
|
||||
|
Reference in New Issue
Block a user