1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-10-21 11:33:43 +03:00

Merge branch 'develop-1.1' into 1.1-merge-up-20180817

This commit is contained in:
Andrew Hutchings
2018-08-17 16:17:13 +01:00
47 changed files with 1526 additions and 756 deletions

View File

@@ -340,8 +340,9 @@ int main(int argc, char** argv)
{
log.writeLog(__LINE__, "Standby PM not responding, infinidb shutting down", LOG_TYPE_CRITICAL);
//Set the alarm
aMonitor.sendAlarm(config.moduleName().c_str(), STARTUP_DIAGNOTICS_FAILURE, SET);
sleep (1);
// aMonitor.sendAlarm(config.moduleName().c_str(), STARTUP_DIAGNOTICS_FAILURE, SET);
// sleep (1);
string cmd = startup::StartUp::installDir() + "/bin/infinidb stop > /dev/null 2>&1";
system(cmd.c_str());
}
@@ -369,8 +370,7 @@ int main(int argc, char** argv)
sysConfig->setConfig("ProcMgr_Alarm", "IPAddr", IPaddr);
log.writeLog(__LINE__, "set ProcMgr IPaddr to Old Standby Module: " + IPaddr, LOG_TYPE_DEBUG);
//update Calpont Config table
//update MariaDB ColumnStore Config table
try
{
sysConfig->write();
@@ -554,8 +554,8 @@ int main(int argc, char** argv)
{
log.writeLog(__LINE__, "Check DB mounts failed, shutting down", LOG_TYPE_CRITICAL);
//Set the alarm
aMonitor.sendAlarm(config.moduleName().c_str(), STARTUP_DIAGNOTICS_FAILURE, SET);
sleep (1);
// aMonitor.sendAlarm(config.moduleName().c_str(), STARTUP_DIAGNOTICS_FAILURE, SET);
// sleep (1);
string cmd = startup::StartUp::installDir() + "/bin/columnstore stop > /dev/null 2>&1";
system(cmd.c_str());
}
@@ -1463,7 +1463,7 @@ static void chldHandleThread(MonitorConfig config)
(*listPtr).processID != 0 ) ||
( (*listPtr).state == oam::ACTIVE && (*listPtr).processID == 0 ) )
{
log.writeLog(__LINE__, "*****Calpont Process Restarting: " + (*listPtr).ProcessName + ", old PID = " + oam.itoa((*listPtr).processID), LOG_TYPE_CRITICAL);
log.writeLog(__LINE__, "*****MariaDB ColumnStore Process Restarting: " + (*listPtr).ProcessName + ", old PID = " + oam.itoa((*listPtr).processID), LOG_TYPE_CRITICAL);
if ( (*listPtr).dieCounter >= processRestartCount ||
processRestartCount == 0)
@@ -1523,7 +1523,7 @@ static void chldHandleThread(MonitorConfig config)
{}
// check if process failover is needed due to process outage
aMonitor.checkProcessFailover((*listPtr).ProcessName);
aMonitor.checkModuleFailover((*listPtr).ProcessName);
//check the db health
if (DBFunctionalMonitorFlag == "y" )
@@ -1605,7 +1605,7 @@ static void chldHandleThread(MonitorConfig config)
(*listPtr).processID = 0;
// check if process failover is needed due to process outage
aMonitor.checkProcessFailover((*listPtr).ProcessName);
aMonitor.checkModuleFailover((*listPtr).ProcessName);
break;
}
else
@@ -1681,7 +1681,7 @@ static void chldHandleThread(MonitorConfig config)
}
//Log this event
log.writeLog(__LINE__, "Calpont Process " + (*listPtr).ProcessName + restartStatus, LOG_TYPE_INFO);
log.writeLog(__LINE__, "MariaDB ColumnStore Process " + (*listPtr).ProcessName + restartStatus, LOG_TYPE_INFO);
}
}
}
@@ -2707,6 +2707,9 @@ void processStatusMSG(messageqcpp::IOSocket* cfIos)
memcpy(fShmSystemStatus[0].StateChangeDate, oam.getCurrentTime().c_str(), DATESIZE);
log.writeLog(__LINE__, "statusControl: REQUEST RECEIVED: Set System State = " + oamState[state], LOG_TYPE_DEBUG);
}
BRM::DBRM dbrm;
dbrm.setSystemQueryReady(true);
}
}
break;

View File

@@ -1261,7 +1261,7 @@ void ProcessMonitor::processMessage(messageqcpp::ByteStream msg, messageqcpp::IO
// error in launching a process
if ( requestStatus == oam::API_FAILURE &&
(*listPtr).RunType == SIMPLEX)
checkProcessFailover((*listPtr).ProcessName);
checkModuleFailover((*listPtr).ProcessName);
else
break;
}
@@ -4963,20 +4963,19 @@ std::string ProcessMonitor::sendMsgProcMon1( std::string module, ByteStream msg,
}
/******************************************************************************************
* @brief checkProcessFailover
* @brief checkModuleFailover
*
* purpose: check if process failover is needed due to a process outage
* purpose: check if module failover is needed due to a process outage
*
******************************************************************************************/
void ProcessMonitor::checkProcessFailover( std::string processName)
void ProcessMonitor::checkModuleFailover( std::string processName)
{
Oam oam;
//force failover on certain processes
if ( processName == "DDLProc" ||
processName == "DMLProc" )
{
log.writeLog(__LINE__, "checkProcessFailover: process failover, process outage of " + processName, LOG_TYPE_CRITICAL);
processName == "DMLProc" ) {
log.writeLog(__LINE__, "checkModuleFailover: process failover, process outage of " + processName, LOG_TYPE_CRITICAL);
try
{
@@ -4999,27 +4998,36 @@ void ProcessMonitor::checkProcessFailover( std::string processName)
systemprocessstatus.processstatus[i].ProcessOpState == oam::FAILED )
{
// found a AVAILABLE mate, start it
log.writeLog(__LINE__, "start process on module " + systemprocessstatus.processstatus[i].Module, LOG_TYPE_DEBUG);
log.writeLog(__LINE__, "Change UM Master to module " + systemprocessstatus.processstatus[i].Module, LOG_TYPE_DEBUG);
log.writeLog(__LINE__, "Disable local UM module " + config.moduleName(), LOG_TYPE_DEBUG);
log.writeLog(__LINE__, "Stop local UM module " + config.moduleName(), LOG_TYPE_DEBUG);
log.writeLog(__LINE__, "Disable Local will Enable UM module " + systemprocessstatus.processstatus[i].Module, LOG_TYPE_DEBUG);
oam::DeviceNetworkConfig devicenetworkconfig;
oam::DeviceNetworkList devicenetworklist;
devicenetworkconfig.DeviceName = config.moduleName();
devicenetworklist.push_back(devicenetworkconfig);
try
{
oam.setSystemConfig("PrimaryUMModuleName", systemprocessstatus.processstatus[i].Module);
oam.stopModule(devicenetworklist, oam::FORCEFUL, oam::ACK_YES);
log.writeLog(__LINE__, "success stopModule on module " + config.moduleName(), LOG_TYPE_DEBUG);
//distribute config file
oam.distributeConfigFile("system");
sleep(1);
}
catch (...) {}
try
{
oam.startProcess(systemprocessstatus.processstatus[i].Module, processName, FORCEFUL, ACK_YES);
log.writeLog(__LINE__, "success start process on module " + systemprocessstatus.processstatus[i].Module, LOG_TYPE_DEBUG);
}
catch (exception& e)
{
log.writeLog(__LINE__, "failed start process on module " + systemprocessstatus.processstatus[i].Module, LOG_TYPE_ERROR);
try
{
oam.disableModule(devicenetworklist);
log.writeLog(__LINE__, "success disableModule on module " + config.moduleName(), LOG_TYPE_DEBUG);
}
catch (exception& e)
{
log.writeLog(__LINE__, "failed disableModule on module " + config.moduleName(), LOG_TYPE_ERROR);
}
}
catch (exception& e)
{
log.writeLog(__LINE__, "failed stopModule on module " + config.moduleName(), LOG_TYPE_ERROR);
}
break;
}
@@ -5036,9 +5044,6 @@ void ProcessMonitor::checkProcessFailover( std::string processName)
// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: Caught unknown exception!", LOG_TYPE_ERROR);
}
}
return;
}
/******************************************************************************************
@@ -6583,6 +6588,8 @@ int ProcessMonitor::glusterAssign(std::string dbrootID)
if ( WEXITSTATUS(ret) != 0 )
{
log.writeLog(__LINE__, "glusterAssign mount failure: dbroot: " + dbrootID + " error: " + oam.itoa(WEXITSTATUS(ret)), LOG_TYPE_ERROR);
ifstream in("/tmp/glusterAssign.txt");
in.seekg(0, std::ios::end);
int size = in.tellg();
@@ -6630,6 +6637,8 @@ int ProcessMonitor::glusterUnassign(std::string dbrootID)
if ( WEXITSTATUS(ret) != 0 )
{
log.writeLog(__LINE__, "glusterUnassign mount failure: dbroot: " + dbrootID + " error: " + oam.itoa(WEXITSTATUS(ret)), LOG_TYPE_ERROR);
ifstream in("/tmp/glusterUnassign.txt");
in.seekg(0, std::ios::end);
int size = in.tellg();

View File

@@ -488,7 +488,7 @@ public:
/**
*@brief check if module failover is needed due to a process outage
*/
void checkProcessFailover( std::string processName);
void checkModuleFailover(std::string processName);
/**
*@brief run upgrade script