You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-10-31 18:30:33 +03:00
Merge pull request #312 from mariadb-corporation/MCOL-976
MCOL-976: Change how processes are restarted after losing or regainin…
This commit is contained in:
@@ -1635,9 +1635,16 @@ void pingDeviceThread()
|
||||
processManager.distributeConfigFile("system");
|
||||
sleep(1);
|
||||
|
||||
// if a PM module was started successfully, restart ACTIVE ExeMgr(s) / mysqld
|
||||
// if a PM module was started successfully, restart ACTIVE DBRM(s), ExeMgr(s) / mysqld
|
||||
if( moduleName.find("pm") == 0 ) {
|
||||
processManager.restartProcessType("ExeMgr", moduleName);
|
||||
processManager.restartProcessType("DBRMControllerNode", moduleName);
|
||||
processManager.restartProcessType("DBRMWorkerNode");
|
||||
processManager.stopProcessType("DDLProc");
|
||||
processManager.stopProcessType("DMLProc");
|
||||
processManager.stopProcessType("ExeMgr");
|
||||
processManager.restartProcessType("PrimProc");
|
||||
sleep(1);
|
||||
processManager.restartProcessType("ExeMgr");
|
||||
}
|
||||
|
||||
string moduleType = moduleName.substr(0,MAX_MODULE_TYPE_SIZE);
|
||||
@@ -1667,9 +1674,11 @@ void pingDeviceThread()
|
||||
|
||||
// if a PM module was started successfully, DMLProc/DDLProc
|
||||
if( moduleName.find("pm") == 0 ) {
|
||||
processManager.restartProcessType("DDLProc", moduleName);
|
||||
processManager.restartProcessType("WriteEngineServer");
|
||||
sleep(1);
|
||||
processManager.restartProcessType("DMLProc", moduleName);
|
||||
processManager.restartProcessType("DDLProc");
|
||||
sleep(1);
|
||||
processManager.restartProcessType("DMLProc");
|
||||
}
|
||||
|
||||
//enable query stats
|
||||
@@ -1680,6 +1689,78 @@ void pingDeviceThread()
|
||||
|
||||
processManager.setSystemState(oam::ACTIVE);
|
||||
|
||||
//reset standby module
|
||||
string newStandbyModule = processManager.getStandbyModule();
|
||||
|
||||
//send message to start new Standby Process-Manager, if needed
|
||||
if ( !newStandbyModule.empty() && newStandbyModule != "NONE") {
|
||||
processManager.setStandbyModule(newStandbyModule);
|
||||
}
|
||||
else
|
||||
{
|
||||
Config* sysConfig = Config::makeConfig();
|
||||
|
||||
// clear Standby OAM Module
|
||||
sysConfig->setConfig("SystemConfig", "StandbyOAMModuleName", oam::UnassignedName);
|
||||
sysConfig->setConfig("ProcStatusControlStandby", "IPAddr", oam::UnassignedIpAddr);
|
||||
|
||||
//update Calpont Config table
|
||||
try {
|
||||
sysConfig->write();
|
||||
}
|
||||
catch(...)
|
||||
{
|
||||
log.writeLog(__LINE__, "ERROR: sysConfig->write", LOG_TYPE_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
if ( moduletypeconfig.RunType == SIMPLEX ) {
|
||||
//start SIMPLEX runtype processes on a SIMPLEX runtype module
|
||||
string moduletype = moduleName.substr(0,MAX_MODULE_TYPE_SIZE);
|
||||
DeviceNetworkList::iterator pt = moduletypeconfig.ModuleNetworkList.begin();
|
||||
for( ; pt != moduletypeconfig.ModuleNetworkList.end() ; pt++)
|
||||
{
|
||||
string launchModuleName = (*pt).DeviceName;
|
||||
string launchModuletype = launchModuleName.substr(0,MAX_MODULE_TYPE_SIZE);
|
||||
if ( moduletype != launchModuletype )
|
||||
continue;
|
||||
|
||||
//skip if active pm module (local module)
|
||||
if ( launchModuleName == config.moduleName() )
|
||||
continue;
|
||||
|
||||
//check if module is active before starting any SIMPLEX STANDBY apps
|
||||
try{
|
||||
int launchopState = oam::ACTIVE;
|
||||
bool degraded;
|
||||
oam.getModuleStatus(launchModuleName, launchopState, degraded);
|
||||
|
||||
if (launchopState != oam::ACTIVE && launchopState != oam::STANDBY ) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
catch (exception& ex)
|
||||
{
|
||||
// string error = ex.what();
|
||||
// log.writeLog(__LINE__, "EXCEPTION ERROR on : " + error, LOG_TYPE_ERROR);
|
||||
}
|
||||
catch(...)
|
||||
{
|
||||
// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": Caught unknown exception!", LOG_TYPE_ERROR);
|
||||
}
|
||||
|
||||
int status;
|
||||
log.writeLog(__LINE__, "Starting up STANDBY process on module " + launchModuleName, LOG_TYPE_DEBUG);
|
||||
for ( int j = 0 ; j < 20 ; j ++ )
|
||||
{
|
||||
status = processManager.startModule(launchModuleName, oam::FORCEFUL, oam::AUTO_OFFLINE);
|
||||
if ( status == API_SUCCESS)
|
||||
break;
|
||||
}
|
||||
log.writeLog(__LINE__, "pingDeviceThread: ACK received from '" + launchModuleName + "' Process-Monitor, return status = " + oam.itoa(status), LOG_TYPE_DEBUG);
|
||||
}
|
||||
}
|
||||
|
||||
//clear count
|
||||
moduleInfoList[moduleName] = 0;
|
||||
}
|
||||
|
||||
@@ -3439,15 +3439,23 @@ void ProcessManager::recycleProcess(string module)
|
||||
|
||||
//restart ExeMgrs/mysql if module is a pm
|
||||
if ( moduleType == "pm" ) {
|
||||
// restartProcessType("DBRMWorkerNode");
|
||||
// restartProcessType("PrimProc");
|
||||
// restartProcessType("WriteEngineServer");
|
||||
restartProcessType("DBRMControllerNode", module);
|
||||
restartProcessType("DBRMWorkerNode");
|
||||
stopProcessType("DDLProc");
|
||||
stopProcessType("DMLProc");
|
||||
stopProcessType("ExeMgr");
|
||||
restartProcessType("PrimProc");
|
||||
sleep(1);
|
||||
restartProcessType("ExeMgr");
|
||||
sleep(1);
|
||||
restartProcessType("mysql");
|
||||
}
|
||||
else
|
||||
{
|
||||
restartProcessType("DBRMControllerNode", module);
|
||||
restartProcessType("DBRMWorkerNode");
|
||||
restartProcessType("ExeMgr");
|
||||
|
||||
}
|
||||
if ( PrimaryUMModuleName == module )
|
||||
{
|
||||
restartProcessType("DDLProc", module);
|
||||
@@ -3457,54 +3465,13 @@ void ProcessManager::recycleProcess(string module)
|
||||
|
||||
if( moduleType == "pm" && PrimaryUMModuleName != module)
|
||||
{
|
||||
// restartProcessType("DBRMControllerNode", module);
|
||||
// sleep(1);
|
||||
reinitProcessType("DDLProc");
|
||||
restartProcessType("WriteEngineServer");
|
||||
sleep(1);
|
||||
restartProcessType("DDLProc");
|
||||
sleep(1);
|
||||
restartProcessType("DMLProc", module);
|
||||
}
|
||||
|
||||
//wait for DMLProc to go ACTIVE
|
||||
/* uint16_t rtn = 0;
|
||||
bool bfirst = true;
|
||||
while (rtn == 0)
|
||||
{
|
||||
ProcessStatus DMLprocessstatus;
|
||||
try {
|
||||
oam.getProcessStatus("DMLProc", PrimaryUMModuleName, DMLprocessstatus);
|
||||
}
|
||||
catch (exception& ex)
|
||||
{
|
||||
// string error = ex.what();
|
||||
// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: " + error, LOG_TYPE_ERROR);
|
||||
}
|
||||
catch(...)
|
||||
{
|
||||
// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: Caught unknown exception!", LOG_TYPE_ERROR);
|
||||
}
|
||||
|
||||
if (DMLprocessstatus.ProcessOpState == oam::BUSY_INIT) {
|
||||
if (bfirst)
|
||||
{
|
||||
log.writeLog(__LINE__, "Waiting for DMLProc to finish rollback" , LOG_TYPE_INFO);
|
||||
bfirst = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (DMLprocessstatus.ProcessOpState == oam::ACTIVE) {
|
||||
rtn = oam::ACTIVE;
|
||||
break;
|
||||
}
|
||||
|
||||
if (DMLprocessstatus.ProcessOpState == oam::FAILED) {
|
||||
rtn = oam::FAILED;
|
||||
break;
|
||||
}
|
||||
|
||||
// wait some more
|
||||
sleep(2);
|
||||
}
|
||||
*/
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -4291,6 +4258,7 @@ int ProcessManager::restartProcessType( std::string processName, std::string ski
|
||||
SystemProcessStatus systemprocessstatus;
|
||||
ProcessStatus processstatus;
|
||||
int retStatus = API_SUCCESS;
|
||||
bool setPMProcIPs = true;
|
||||
|
||||
log.writeLog(__LINE__, "restartProcessType: Restart all " + processName, LOG_TYPE_DEBUG);
|
||||
|
||||
@@ -4338,8 +4306,8 @@ int ProcessManager::restartProcessType( std::string processName, std::string ski
|
||||
( systemprocessstatus.processstatus[i].ProcessOpState == oam::COLD_STANDBY && !manualFlag ) )
|
||||
continue;
|
||||
|
||||
if( processName.find("DDLProc") == 0 ||
|
||||
processName.find("DMLProc") == 0 ) {
|
||||
if ( (processName.find("DDLProc") == 0 || processName.find("DMLProc") == 0) && setPMProcIPs )
|
||||
{
|
||||
string procModuleType = systemprocessstatus.processstatus[i].Module.substr(0,MAX_MODULE_TYPE_SIZE);
|
||||
if ( procModuleType == "pm" && PMwithUM == "y" )
|
||||
continue;
|
||||
@@ -4366,11 +4334,11 @@ int ProcessManager::restartProcessType( std::string processName, std::string ski
|
||||
// if DDL or DMLProc, change IP Address
|
||||
if ( retStatus == oam::API_SUCCESS )
|
||||
{
|
||||
if( processName.find("DDLProc") == 0 ||
|
||||
processName.find("DMLProc") == 0 ) {
|
||||
|
||||
if ( (processName.find("DDLProc") == 0 || processName.find("DMLProc") == 0) && setPMProcIPs )
|
||||
{
|
||||
processManager.setPMProcIPs(systemprocessstatus.processstatus[i].Module, processName);
|
||||
return retStatus;
|
||||
setPMProcIPs = false;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -8220,7 +8188,7 @@ int ProcessManager::setPMProcIPs( std::string moduleName, std::string processNam
|
||||
|
||||
pthread_mutex_unlock(&THREAD_LOCK);
|
||||
|
||||
log.writeLog(__LINE__, "setPMProcIPs failed", LOG_TYPE_DEBUG);
|
||||
//log.writeLog(__LINE__, "setPMProcIPs failed", LOG_TYPE_DEBUG);
|
||||
|
||||
return API_SUCCESS;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user