1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-10-31 18:30:33 +03:00

Merge pull request #312 from mariadb-corporation/MCOL-976

MCOL-976: Change how processes are restarted after losing or regainin…
This commit is contained in:
david hill
2017-11-13 17:12:48 -06:00
committed by GitHub
2 changed files with 108 additions and 59 deletions

View File

@@ -1635,9 +1635,16 @@ void pingDeviceThread()
processManager.distributeConfigFile("system");
sleep(1);
// if a PM module was started successfully, restart ACTIVE ExeMgr(s) / mysqld
// if a PM module was started successfully, restart ACTIVE DBRM(s), ExeMgr(s) / mysqld
if( moduleName.find("pm") == 0 ) {
processManager.restartProcessType("ExeMgr", moduleName);
processManager.restartProcessType("DBRMControllerNode", moduleName);
processManager.restartProcessType("DBRMWorkerNode");
processManager.stopProcessType("DDLProc");
processManager.stopProcessType("DMLProc");
processManager.stopProcessType("ExeMgr");
processManager.restartProcessType("PrimProc");
sleep(1);
processManager.restartProcessType("ExeMgr");
}
string moduleType = moduleName.substr(0,MAX_MODULE_TYPE_SIZE);
@@ -1667,9 +1674,11 @@ void pingDeviceThread()
// if a PM module was started successfully, DMLProc/DDLProc
if( moduleName.find("pm") == 0 ) {
processManager.restartProcessType("DDLProc", moduleName);
processManager.restartProcessType("WriteEngineServer");
sleep(1);
processManager.restartProcessType("DMLProc", moduleName);
processManager.restartProcessType("DDLProc");
sleep(1);
processManager.restartProcessType("DMLProc");
}
//enable query stats
@@ -1680,6 +1689,78 @@ void pingDeviceThread()
processManager.setSystemState(oam::ACTIVE);
//reset standby module
string newStandbyModule = processManager.getStandbyModule();
//send message to start new Standby Process-Manager, if needed
if ( !newStandbyModule.empty() && newStandbyModule != "NONE") {
processManager.setStandbyModule(newStandbyModule);
}
else
{
Config* sysConfig = Config::makeConfig();
// clear Standby OAM Module
sysConfig->setConfig("SystemConfig", "StandbyOAMModuleName", oam::UnassignedName);
sysConfig->setConfig("ProcStatusControlStandby", "IPAddr", oam::UnassignedIpAddr);
//update Calpont Config table
try {
sysConfig->write();
}
catch(...)
{
log.writeLog(__LINE__, "ERROR: sysConfig->write", LOG_TYPE_ERROR);
}
}
if ( moduletypeconfig.RunType == SIMPLEX ) {
//start SIMPLEX runtype processes on a SIMPLEX runtype module
string moduletype = moduleName.substr(0,MAX_MODULE_TYPE_SIZE);
DeviceNetworkList::iterator pt = moduletypeconfig.ModuleNetworkList.begin();
for( ; pt != moduletypeconfig.ModuleNetworkList.end() ; pt++)
{
string launchModuleName = (*pt).DeviceName;
string launchModuletype = launchModuleName.substr(0,MAX_MODULE_TYPE_SIZE);
if ( moduletype != launchModuletype )
continue;
//skip if active pm module (local module)
if ( launchModuleName == config.moduleName() )
continue;
//check if module is active before starting any SIMPLEX STANDBY apps
try{
int launchopState = oam::ACTIVE;
bool degraded;
oam.getModuleStatus(launchModuleName, launchopState, degraded);
if (launchopState != oam::ACTIVE && launchopState != oam::STANDBY ) {
continue;
}
}
catch (exception& ex)
{
// string error = ex.what();
// log.writeLog(__LINE__, "EXCEPTION ERROR on : " + error, LOG_TYPE_ERROR);
}
catch(...)
{
// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": Caught unknown exception!", LOG_TYPE_ERROR);
}
int status;
log.writeLog(__LINE__, "Starting up STANDBY process on module " + launchModuleName, LOG_TYPE_DEBUG);
for ( int j = 0 ; j < 20 ; j ++ )
{
status = processManager.startModule(launchModuleName, oam::FORCEFUL, oam::AUTO_OFFLINE);
if ( status == API_SUCCESS)
break;
}
log.writeLog(__LINE__, "pingDeviceThread: ACK received from '" + launchModuleName + "' Process-Monitor, return status = " + oam.itoa(status), LOG_TYPE_DEBUG);
}
}
//clear count
moduleInfoList[moduleName] = 0;
}

View File

@@ -3439,15 +3439,23 @@ void ProcessManager::recycleProcess(string module)
//restart ExeMgrs/mysql if module is a pm
if ( moduleType == "pm" ) {
// restartProcessType("DBRMWorkerNode");
// restartProcessType("PrimProc");
// restartProcessType("WriteEngineServer");
restartProcessType("DBRMControllerNode", module);
restartProcessType("DBRMWorkerNode");
stopProcessType("DDLProc");
stopProcessType("DMLProc");
stopProcessType("ExeMgr");
restartProcessType("PrimProc");
sleep(1);
restartProcessType("ExeMgr");
sleep(1);
restartProcessType("mysql");
}
else
{
restartProcessType("DBRMControllerNode", module);
restartProcessType("DBRMWorkerNode");
restartProcessType("ExeMgr");
}
if ( PrimaryUMModuleName == module )
{
restartProcessType("DDLProc", module);
@@ -3457,54 +3465,13 @@ void ProcessManager::recycleProcess(string module)
if( moduleType == "pm" && PrimaryUMModuleName != module)
{
// restartProcessType("DBRMControllerNode", module);
// sleep(1);
reinitProcessType("DDLProc");
restartProcessType("WriteEngineServer");
sleep(1);
restartProcessType("DDLProc");
sleep(1);
restartProcessType("DMLProc", module);
}
//wait for DMLProc to go ACTIVE
/* uint16_t rtn = 0;
bool bfirst = true;
while (rtn == 0)
{
ProcessStatus DMLprocessstatus;
try {
oam.getProcessStatus("DMLProc", PrimaryUMModuleName, DMLprocessstatus);
}
catch (exception& ex)
{
// string error = ex.what();
// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: " + error, LOG_TYPE_ERROR);
}
catch(...)
{
// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: Caught unknown exception!", LOG_TYPE_ERROR);
}
if (DMLprocessstatus.ProcessOpState == oam::BUSY_INIT) {
if (bfirst)
{
log.writeLog(__LINE__, "Waiting for DMLProc to finish rollback" , LOG_TYPE_INFO);
bfirst = false;
}
}
if (DMLprocessstatus.ProcessOpState == oam::ACTIVE) {
rtn = oam::ACTIVE;
break;
}
if (DMLprocessstatus.ProcessOpState == oam::FAILED) {
rtn = oam::FAILED;
break;
}
// wait some more
sleep(2);
}
*/
return;
}
@@ -4291,6 +4258,7 @@ int ProcessManager::restartProcessType( std::string processName, std::string ski
SystemProcessStatus systemprocessstatus;
ProcessStatus processstatus;
int retStatus = API_SUCCESS;
bool setPMProcIPs = true;
log.writeLog(__LINE__, "restartProcessType: Restart all " + processName, LOG_TYPE_DEBUG);
@@ -4338,8 +4306,8 @@ int ProcessManager::restartProcessType( std::string processName, std::string ski
( systemprocessstatus.processstatus[i].ProcessOpState == oam::COLD_STANDBY && !manualFlag ) )
continue;
if( processName.find("DDLProc") == 0 ||
processName.find("DMLProc") == 0 ) {
if ( (processName.find("DDLProc") == 0 || processName.find("DMLProc") == 0) && setPMProcIPs )
{
string procModuleType = systemprocessstatus.processstatus[i].Module.substr(0,MAX_MODULE_TYPE_SIZE);
if ( procModuleType == "pm" && PMwithUM == "y" )
continue;
@@ -4366,11 +4334,11 @@ int ProcessManager::restartProcessType( std::string processName, std::string ski
// if DDL or DMLProc, change IP Address
if ( retStatus == oam::API_SUCCESS )
{
if( processName.find("DDLProc") == 0 ||
processName.find("DMLProc") == 0 ) {
if ( (processName.find("DDLProc") == 0 || processName.find("DMLProc") == 0) && setPMProcIPs )
{
processManager.setPMProcIPs(systemprocessstatus.processstatus[i].Module, processName);
return retStatus;
setPMProcIPs = false;
continue;
}
}
}
@@ -8220,7 +8188,7 @@ int ProcessManager::setPMProcIPs( std::string moduleName, std::string processNam
pthread_mutex_unlock(&THREAD_LOCK);
log.writeLog(__LINE__, "setPMProcIPs failed", LOG_TYPE_DEBUG);
//log.writeLog(__LINE__, "setPMProcIPs failed", LOG_TYPE_DEBUG);
return API_SUCCESS;