1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-11-25 20:23:16 +03:00

MCOL-3842: Fix how processes are restarted in failover event.

This commit is contained in:
benthompson15
2020-04-03 12:35:11 -05:00
parent 89e3f6121b
commit 7c65d7fda0
3 changed files with 47 additions and 31 deletions

View File

@@ -2095,11 +2095,6 @@ void pingDeviceThread()
oam.dbrmctl("halt");
log.writeLog(__LINE__, "'dbrmctl halt' done", LOG_TYPE_DEBUG);
processManager.setSystemState(oam::BUSY_INIT);
//string cmd = "/etc/init.d/glusterd restart > /dev/null 2>&1";
//system(cmd.c_str());
//send notification
oam.sendDeviceNotification(moduleName, MODULE_DOWN);
@@ -2112,9 +2107,7 @@ void pingDeviceThread()
//set module to disable state
processManager.disableModule(moduleName, false);
//call dbrm control
oam.dbrmctl("reload");
log.writeLog(__LINE__, "'dbrmctl reload' done", LOG_TYPE_DEBUG);
// if pm, move dbroots to other pms
if ( ( moduleName.find("pm") == 0 && !amazon && ( DBRootStorageType != "internal") ) ||
@@ -2145,6 +2138,9 @@ void pingDeviceThread()
{
processManager.setModuleState(moduleName, oam::AUTO_DISABLED);
//call dbrm control
oam.dbrmctl("reload");
log.writeLog(__LINE__, "'dbrmctl reload' done", LOG_TYPE_DEBUG);
// resume the dbrm
oam.dbrmctl("resume");
log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG);
@@ -2163,6 +2159,9 @@ void pingDeviceThread()
{
if ( moduleName.find("um") == 0 )
{
//call dbrm control
oam.dbrmctl("reload");
log.writeLog(__LINE__, "'dbrmctl reload' done", LOG_TYPE_DEBUG);
// resume the dbrm
oam.dbrmctl("resume");
log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG);
@@ -2353,8 +2352,8 @@ void pingDeviceThread()
}
}
//set recycle process
processManager.recycleProcess(moduleName);
//set reinit process
processManager.reinitProcesses();
//set query system state ready
processManager.setQuerySystemState(true);
@@ -2368,6 +2367,9 @@ void pingDeviceThread()
( opState != oam::AUTO_DISABLED ) )
{
//call dbrm control
oam.dbrmctl("reload");
log.writeLog(__LINE__, "'dbrmctl reload' done", LOG_TYPE_DEBUG);
// resume the dbrm
oam.dbrmctl("resume");
log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG);
@@ -2378,13 +2380,16 @@ void pingDeviceThread()
}
else
{
//call dbrm control
oam.dbrmctl("reload");
log.writeLog(__LINE__, "'dbrmctl reload' done", LOG_TYPE_DEBUG);
// non-amazon
// resume the dbrm
oam.dbrmctl("resume");
log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG);
//set recycle process
processManager.recycleProcess(moduleName);
//set reinit process
processManager.reinitProcesses();
//set query system state ready
processManager.setQuerySystemState(true);

View File

@@ -921,8 +921,6 @@ void processMSG(messageqcpp::IOSocket* cfIos)
status = processManager.disableModule(moduleName, true);
log.writeLog(__LINE__, "Disable Module Completed on " + moduleName, LOG_TYPE_INFO);
processManager.recycleProcess(moduleName);
//check for SIMPLEX Processes on mate might need to be started
processManager.checkSimplexModule(moduleName);
@@ -3739,12 +3737,6 @@ int ProcessManager::disableModule(string target, bool manualFlag)
bool degraded;
oam.getModuleStatus(target, opState, degraded);
if (opState == newState || opState == oam::MAN_DISABLED)
{
pthread_mutex_unlock(&THREAD_LOCK);
return API_SUCCESS;
}
// if current state is AUTO_DISABLED and new state is MAN_DISABLED
// update state to MAN_DISABLED
@@ -3822,7 +3814,6 @@ int ProcessManager::disableModule(string target, bool manualFlag)
{
return API_FAILURE;
}
processManager.recycleProcess(target);
//check for SIMPLEX Processes on mate might need to be started
processManager.checkSimplexModule(target);
@@ -3830,11 +3821,31 @@ int ProcessManager::disableModule(string target, bool manualFlag)
//distribute config file
distributeConfigFile("system");
processManager.reinitProcesses();
log.writeLog(__LINE__, "disableModule successfully complete for " + target, LOG_TYPE_DEBUG);
return API_SUCCESS;
}
void ProcessManager::reinitProcesses()
{
log.writeLog(__LINE__, "reinitProcesses... ", LOG_TYPE_DEBUG);
restartProcessType("DBRMControllerNode");
reinitProcessType("ExeMgr");
reinitProcessType("DBRMWorkerNode");
restartProcessType("WriteEngineServer");
sleep(1);
startProcessType("DDLProc");
sleep(1);
startProcessType("DMLProc");
reinitProcessType("DDLProc");
reinitProcessType("DMLProc");
log.writeLog(__LINE__, "reinitProcesses complete", LOG_TYPE_DEBUG);
}
/******************************************************************************************
* @brief recycleProcess
*
@@ -10040,8 +10051,12 @@ int ProcessManager::OAMParentModuleChange()
for ( ; pt1 != downModuleList.end() ; pt1++)
{
disableModule(*pt1, false);
processManager.setProcessStates(*pt1, oam::AUTO_OFFLINE);
// Don't do this again for downOAMParentName we just did it 3 lines ago
if (*pt1 != downOAMParentName)
{
disableModule(*pt1, false);
processManager.setProcessStates(*pt1, oam::AUTO_OFFLINE);
}
}
//distribute config file
@@ -10090,9 +10105,6 @@ int ProcessManager::OAMParentModuleChange()
sleep(2);
}
//set recycle process
processManager.recycleProcess(downOAMParentName);
//restart/reinit processes to force their release of the controller node port
if ( ( config.ServerInstallType() == oam::INSTALL_COMBINE_DM_UM_PM) &&
( moduleNameList.size() <= 0 && config.moduleType() == "pm") )
@@ -10189,9 +10201,6 @@ int ProcessManager::OAMParentModuleChange()
processManager.setMySQLReplication(devicenetworklist, config.moduleName());
}
//set query system state not ready
processManager.setQuerySystemState(true);
// clear alarm
aManager.sendAlarmReport(config.moduleName().c_str(), MODULE_SWITCH_ACTIVE, CLEAR);
@@ -10363,8 +10372,6 @@ std::string ProcessManager::getStandbyModule()
//not gluster, check by status
try
{
oam.getProcessStatus(systemprocessstatus);
for ( unsigned int i = 0 ; i < systemprocessstatus.processstatus.size(); i++)
{
if ( systemprocessstatus.processstatus[i].ProcessName == "ProcessManager" &&

View File

@@ -299,6 +299,10 @@ public:
*/
int disableModule(std::string target, bool manualFlag);
/**
*@brief reinit Processes trying to replace recycleProcess
*/
void reinitProcesses();
/**
*@brief recycle Processes
*/