You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-10-31 18:30:33 +03:00
Merge pull request #312 from mariadb-corporation/MCOL-976
MCOL-976: Change how processes are restarted after losing or regainin…
This commit is contained in:
@@ -1635,9 +1635,16 @@ void pingDeviceThread()
|
|||||||
processManager.distributeConfigFile("system");
|
processManager.distributeConfigFile("system");
|
||||||
sleep(1);
|
sleep(1);
|
||||||
|
|
||||||
// if a PM module was started successfully, restart ACTIVE ExeMgr(s) / mysqld
|
// if a PM module was started successfully, restart ACTIVE DBRM(s), ExeMgr(s) / mysqld
|
||||||
if( moduleName.find("pm") == 0 ) {
|
if( moduleName.find("pm") == 0 ) {
|
||||||
processManager.restartProcessType("ExeMgr", moduleName);
|
processManager.restartProcessType("DBRMControllerNode", moduleName);
|
||||||
|
processManager.restartProcessType("DBRMWorkerNode");
|
||||||
|
processManager.stopProcessType("DDLProc");
|
||||||
|
processManager.stopProcessType("DMLProc");
|
||||||
|
processManager.stopProcessType("ExeMgr");
|
||||||
|
processManager.restartProcessType("PrimProc");
|
||||||
|
sleep(1);
|
||||||
|
processManager.restartProcessType("ExeMgr");
|
||||||
}
|
}
|
||||||
|
|
||||||
string moduleType = moduleName.substr(0,MAX_MODULE_TYPE_SIZE);
|
string moduleType = moduleName.substr(0,MAX_MODULE_TYPE_SIZE);
|
||||||
@@ -1667,9 +1674,11 @@ void pingDeviceThread()
|
|||||||
|
|
||||||
// if a PM module was started successfully, DMLProc/DDLProc
|
// if a PM module was started successfully, DMLProc/DDLProc
|
||||||
if( moduleName.find("pm") == 0 ) {
|
if( moduleName.find("pm") == 0 ) {
|
||||||
processManager.restartProcessType("DDLProc", moduleName);
|
processManager.restartProcessType("WriteEngineServer");
|
||||||
sleep(1);
|
sleep(1);
|
||||||
processManager.restartProcessType("DMLProc", moduleName);
|
processManager.restartProcessType("DDLProc");
|
||||||
|
sleep(1);
|
||||||
|
processManager.restartProcessType("DMLProc");
|
||||||
}
|
}
|
||||||
|
|
||||||
//enable query stats
|
//enable query stats
|
||||||
@@ -1680,6 +1689,78 @@ void pingDeviceThread()
|
|||||||
|
|
||||||
processManager.setSystemState(oam::ACTIVE);
|
processManager.setSystemState(oam::ACTIVE);
|
||||||
|
|
||||||
|
//reset standby module
|
||||||
|
string newStandbyModule = processManager.getStandbyModule();
|
||||||
|
|
||||||
|
//send message to start new Standby Process-Manager, if needed
|
||||||
|
if ( !newStandbyModule.empty() && newStandbyModule != "NONE") {
|
||||||
|
processManager.setStandbyModule(newStandbyModule);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Config* sysConfig = Config::makeConfig();
|
||||||
|
|
||||||
|
// clear Standby OAM Module
|
||||||
|
sysConfig->setConfig("SystemConfig", "StandbyOAMModuleName", oam::UnassignedName);
|
||||||
|
sysConfig->setConfig("ProcStatusControlStandby", "IPAddr", oam::UnassignedIpAddr);
|
||||||
|
|
||||||
|
//update Calpont Config table
|
||||||
|
try {
|
||||||
|
sysConfig->write();
|
||||||
|
}
|
||||||
|
catch(...)
|
||||||
|
{
|
||||||
|
log.writeLog(__LINE__, "ERROR: sysConfig->write", LOG_TYPE_ERROR);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( moduletypeconfig.RunType == SIMPLEX ) {
|
||||||
|
//start SIMPLEX runtype processes on a SIMPLEX runtype module
|
||||||
|
string moduletype = moduleName.substr(0,MAX_MODULE_TYPE_SIZE);
|
||||||
|
DeviceNetworkList::iterator pt = moduletypeconfig.ModuleNetworkList.begin();
|
||||||
|
for( ; pt != moduletypeconfig.ModuleNetworkList.end() ; pt++)
|
||||||
|
{
|
||||||
|
string launchModuleName = (*pt).DeviceName;
|
||||||
|
string launchModuletype = launchModuleName.substr(0,MAX_MODULE_TYPE_SIZE);
|
||||||
|
if ( moduletype != launchModuletype )
|
||||||
|
continue;
|
||||||
|
|
||||||
|
//skip if active pm module (local module)
|
||||||
|
if ( launchModuleName == config.moduleName() )
|
||||||
|
continue;
|
||||||
|
|
||||||
|
//check if module is active before starting any SIMPLEX STANDBY apps
|
||||||
|
try{
|
||||||
|
int launchopState = oam::ACTIVE;
|
||||||
|
bool degraded;
|
||||||
|
oam.getModuleStatus(launchModuleName, launchopState, degraded);
|
||||||
|
|
||||||
|
if (launchopState != oam::ACTIVE && launchopState != oam::STANDBY ) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (exception& ex)
|
||||||
|
{
|
||||||
|
// string error = ex.what();
|
||||||
|
// log.writeLog(__LINE__, "EXCEPTION ERROR on : " + error, LOG_TYPE_ERROR);
|
||||||
|
}
|
||||||
|
catch(...)
|
||||||
|
{
|
||||||
|
// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": Caught unknown exception!", LOG_TYPE_ERROR);
|
||||||
|
}
|
||||||
|
|
||||||
|
int status;
|
||||||
|
log.writeLog(__LINE__, "Starting up STANDBY process on module " + launchModuleName, LOG_TYPE_DEBUG);
|
||||||
|
for ( int j = 0 ; j < 20 ; j ++ )
|
||||||
|
{
|
||||||
|
status = processManager.startModule(launchModuleName, oam::FORCEFUL, oam::AUTO_OFFLINE);
|
||||||
|
if ( status == API_SUCCESS)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
log.writeLog(__LINE__, "pingDeviceThread: ACK received from '" + launchModuleName + "' Process-Monitor, return status = " + oam.itoa(status), LOG_TYPE_DEBUG);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
//clear count
|
//clear count
|
||||||
moduleInfoList[moduleName] = 0;
|
moduleInfoList[moduleName] = 0;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3439,15 +3439,23 @@ void ProcessManager::recycleProcess(string module)
|
|||||||
|
|
||||||
//restart ExeMgrs/mysql if module is a pm
|
//restart ExeMgrs/mysql if module is a pm
|
||||||
if ( moduleType == "pm" ) {
|
if ( moduleType == "pm" ) {
|
||||||
// restartProcessType("DBRMWorkerNode");
|
restartProcessType("DBRMControllerNode", module);
|
||||||
// restartProcessType("PrimProc");
|
restartProcessType("DBRMWorkerNode");
|
||||||
// restartProcessType("WriteEngineServer");
|
stopProcessType("DDLProc");
|
||||||
|
stopProcessType("DMLProc");
|
||||||
|
stopProcessType("ExeMgr");
|
||||||
|
restartProcessType("PrimProc");
|
||||||
|
sleep(1);
|
||||||
restartProcessType("ExeMgr");
|
restartProcessType("ExeMgr");
|
||||||
|
sleep(1);
|
||||||
restartProcessType("mysql");
|
restartProcessType("mysql");
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
{
|
||||||
|
restartProcessType("DBRMControllerNode", module);
|
||||||
|
restartProcessType("DBRMWorkerNode");
|
||||||
restartProcessType("ExeMgr");
|
restartProcessType("ExeMgr");
|
||||||
|
}
|
||||||
if ( PrimaryUMModuleName == module )
|
if ( PrimaryUMModuleName == module )
|
||||||
{
|
{
|
||||||
restartProcessType("DDLProc", module);
|
restartProcessType("DDLProc", module);
|
||||||
@@ -3457,54 +3465,13 @@ void ProcessManager::recycleProcess(string module)
|
|||||||
|
|
||||||
if( moduleType == "pm" && PrimaryUMModuleName != module)
|
if( moduleType == "pm" && PrimaryUMModuleName != module)
|
||||||
{
|
{
|
||||||
// restartProcessType("DBRMControllerNode", module);
|
restartProcessType("WriteEngineServer");
|
||||||
// sleep(1);
|
sleep(1);
|
||||||
reinitProcessType("DDLProc");
|
restartProcessType("DDLProc");
|
||||||
sleep(1);
|
sleep(1);
|
||||||
restartProcessType("DMLProc", module);
|
restartProcessType("DMLProc", module);
|
||||||
}
|
}
|
||||||
|
|
||||||
//wait for DMLProc to go ACTIVE
|
|
||||||
/* uint16_t rtn = 0;
|
|
||||||
bool bfirst = true;
|
|
||||||
while (rtn == 0)
|
|
||||||
{
|
|
||||||
ProcessStatus DMLprocessstatus;
|
|
||||||
try {
|
|
||||||
oam.getProcessStatus("DMLProc", PrimaryUMModuleName, DMLprocessstatus);
|
|
||||||
}
|
|
||||||
catch (exception& ex)
|
|
||||||
{
|
|
||||||
// string error = ex.what();
|
|
||||||
// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: " + error, LOG_TYPE_ERROR);
|
|
||||||
}
|
|
||||||
catch(...)
|
|
||||||
{
|
|
||||||
// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: Caught unknown exception!", LOG_TYPE_ERROR);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (DMLprocessstatus.ProcessOpState == oam::BUSY_INIT) {
|
|
||||||
if (bfirst)
|
|
||||||
{
|
|
||||||
log.writeLog(__LINE__, "Waiting for DMLProc to finish rollback" , LOG_TYPE_INFO);
|
|
||||||
bfirst = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (DMLprocessstatus.ProcessOpState == oam::ACTIVE) {
|
|
||||||
rtn = oam::ACTIVE;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (DMLprocessstatus.ProcessOpState == oam::FAILED) {
|
|
||||||
rtn = oam::FAILED;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// wait some more
|
|
||||||
sleep(2);
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -4291,6 +4258,7 @@ int ProcessManager::restartProcessType( std::string processName, std::string ski
|
|||||||
SystemProcessStatus systemprocessstatus;
|
SystemProcessStatus systemprocessstatus;
|
||||||
ProcessStatus processstatus;
|
ProcessStatus processstatus;
|
||||||
int retStatus = API_SUCCESS;
|
int retStatus = API_SUCCESS;
|
||||||
|
bool setPMProcIPs = true;
|
||||||
|
|
||||||
log.writeLog(__LINE__, "restartProcessType: Restart all " + processName, LOG_TYPE_DEBUG);
|
log.writeLog(__LINE__, "restartProcessType: Restart all " + processName, LOG_TYPE_DEBUG);
|
||||||
|
|
||||||
@@ -4338,8 +4306,8 @@ int ProcessManager::restartProcessType( std::string processName, std::string ski
|
|||||||
( systemprocessstatus.processstatus[i].ProcessOpState == oam::COLD_STANDBY && !manualFlag ) )
|
( systemprocessstatus.processstatus[i].ProcessOpState == oam::COLD_STANDBY && !manualFlag ) )
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if( processName.find("DDLProc") == 0 ||
|
if ( (processName.find("DDLProc") == 0 || processName.find("DMLProc") == 0) && setPMProcIPs )
|
||||||
processName.find("DMLProc") == 0 ) {
|
{
|
||||||
string procModuleType = systemprocessstatus.processstatus[i].Module.substr(0,MAX_MODULE_TYPE_SIZE);
|
string procModuleType = systemprocessstatus.processstatus[i].Module.substr(0,MAX_MODULE_TYPE_SIZE);
|
||||||
if ( procModuleType == "pm" && PMwithUM == "y" )
|
if ( procModuleType == "pm" && PMwithUM == "y" )
|
||||||
continue;
|
continue;
|
||||||
@@ -4366,11 +4334,11 @@ int ProcessManager::restartProcessType( std::string processName, std::string ski
|
|||||||
// if DDL or DMLProc, change IP Address
|
// if DDL or DMLProc, change IP Address
|
||||||
if ( retStatus == oam::API_SUCCESS )
|
if ( retStatus == oam::API_SUCCESS )
|
||||||
{
|
{
|
||||||
if( processName.find("DDLProc") == 0 ||
|
if ( (processName.find("DDLProc") == 0 || processName.find("DMLProc") == 0) && setPMProcIPs )
|
||||||
processName.find("DMLProc") == 0 ) {
|
{
|
||||||
|
|
||||||
processManager.setPMProcIPs(systemprocessstatus.processstatus[i].Module, processName);
|
processManager.setPMProcIPs(systemprocessstatus.processstatus[i].Module, processName);
|
||||||
return retStatus;
|
setPMProcIPs = false;
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -8220,7 +8188,7 @@ int ProcessManager::setPMProcIPs( std::string moduleName, std::string processNam
|
|||||||
|
|
||||||
pthread_mutex_unlock(&THREAD_LOCK);
|
pthread_mutex_unlock(&THREAD_LOCK);
|
||||||
|
|
||||||
log.writeLog(__LINE__, "setPMProcIPs failed", LOG_TYPE_DEBUG);
|
//log.writeLog(__LINE__, "setPMProcIPs failed", LOG_TYPE_DEBUG);
|
||||||
|
|
||||||
return API_SUCCESS;
|
return API_SUCCESS;
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user