1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-11-25 20:23:16 +03:00

MCOL-3842: Fix how processes are restarted in failover event.

This commit is contained in:
benthompson15
2020-04-03 12:35:11 -05:00
parent 89e3f6121b
commit 7c65d7fda0
3 changed files with 47 additions and 31 deletions

View File

@@ -2095,11 +2095,6 @@ void pingDeviceThread()
oam.dbrmctl("halt"); oam.dbrmctl("halt");
log.writeLog(__LINE__, "'dbrmctl halt' done", LOG_TYPE_DEBUG); log.writeLog(__LINE__, "'dbrmctl halt' done", LOG_TYPE_DEBUG);
processManager.setSystemState(oam::BUSY_INIT);
//string cmd = "/etc/init.d/glusterd restart > /dev/null 2>&1";
//system(cmd.c_str());
//send notification //send notification
oam.sendDeviceNotification(moduleName, MODULE_DOWN); oam.sendDeviceNotification(moduleName, MODULE_DOWN);
@@ -2112,9 +2107,7 @@ void pingDeviceThread()
//set module to disable state //set module to disable state
processManager.disableModule(moduleName, false); processManager.disableModule(moduleName, false);
//call dbrm control
oam.dbrmctl("reload");
log.writeLog(__LINE__, "'dbrmctl reload' done", LOG_TYPE_DEBUG);
// if pm, move dbroots to other pms // if pm, move dbroots to other pms
if ( ( moduleName.find("pm") == 0 && !amazon && ( DBRootStorageType != "internal") ) || if ( ( moduleName.find("pm") == 0 && !amazon && ( DBRootStorageType != "internal") ) ||
@@ -2145,6 +2138,9 @@ void pingDeviceThread()
{ {
processManager.setModuleState(moduleName, oam::AUTO_DISABLED); processManager.setModuleState(moduleName, oam::AUTO_DISABLED);
//call dbrm control
oam.dbrmctl("reload");
log.writeLog(__LINE__, "'dbrmctl reload' done", LOG_TYPE_DEBUG);
// resume the dbrm // resume the dbrm
oam.dbrmctl("resume"); oam.dbrmctl("resume");
log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG); log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG);
@@ -2163,6 +2159,9 @@ void pingDeviceThread()
{ {
if ( moduleName.find("um") == 0 ) if ( moduleName.find("um") == 0 )
{ {
//call dbrm control
oam.dbrmctl("reload");
log.writeLog(__LINE__, "'dbrmctl reload' done", LOG_TYPE_DEBUG);
// resume the dbrm // resume the dbrm
oam.dbrmctl("resume"); oam.dbrmctl("resume");
log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG); log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG);
@@ -2353,8 +2352,8 @@ void pingDeviceThread()
} }
} }
//set recycle process //set reinit process
processManager.recycleProcess(moduleName); processManager.reinitProcesses();
//set query system state ready //set query system state ready
processManager.setQuerySystemState(true); processManager.setQuerySystemState(true);
@@ -2368,6 +2367,9 @@ void pingDeviceThread()
( opState != oam::AUTO_DISABLED ) ) ( opState != oam::AUTO_DISABLED ) )
{ {
//call dbrm control
oam.dbrmctl("reload");
log.writeLog(__LINE__, "'dbrmctl reload' done", LOG_TYPE_DEBUG);
// resume the dbrm // resume the dbrm
oam.dbrmctl("resume"); oam.dbrmctl("resume");
log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG); log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG);
@@ -2378,13 +2380,16 @@ void pingDeviceThread()
} }
else else
{ {
//call dbrm control
oam.dbrmctl("reload");
log.writeLog(__LINE__, "'dbrmctl reload' done", LOG_TYPE_DEBUG);
// non-amazon // non-amazon
// resume the dbrm // resume the dbrm
oam.dbrmctl("resume"); oam.dbrmctl("resume");
log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG); log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG);
//set recycle process //set reinit process
processManager.recycleProcess(moduleName); processManager.reinitProcesses();
//set query system state ready //set query system state ready
processManager.setQuerySystemState(true); processManager.setQuerySystemState(true);

View File

@@ -921,8 +921,6 @@ void processMSG(messageqcpp::IOSocket* cfIos)
status = processManager.disableModule(moduleName, true); status = processManager.disableModule(moduleName, true);
log.writeLog(__LINE__, "Disable Module Completed on " + moduleName, LOG_TYPE_INFO); log.writeLog(__LINE__, "Disable Module Completed on " + moduleName, LOG_TYPE_INFO);
processManager.recycleProcess(moduleName);
//check for SIMPLEX Processes on mate might need to be started //check for SIMPLEX Processes on mate might need to be started
processManager.checkSimplexModule(moduleName); processManager.checkSimplexModule(moduleName);
@@ -3739,12 +3737,6 @@ int ProcessManager::disableModule(string target, bool manualFlag)
bool degraded; bool degraded;
oam.getModuleStatus(target, opState, degraded); oam.getModuleStatus(target, opState, degraded);
if (opState == newState || opState == oam::MAN_DISABLED)
{
pthread_mutex_unlock(&THREAD_LOCK);
return API_SUCCESS;
}
// if current state is AUTO_DISABLED and new state is MAN_DISABLED // if current state is AUTO_DISABLED and new state is MAN_DISABLED
// update state to MAN_DISABLED // update state to MAN_DISABLED
@@ -3822,7 +3814,6 @@ int ProcessManager::disableModule(string target, bool manualFlag)
{ {
return API_FAILURE; return API_FAILURE;
} }
processManager.recycleProcess(target);
//check for SIMPLEX Processes on mate might need to be started //check for SIMPLEX Processes on mate might need to be started
processManager.checkSimplexModule(target); processManager.checkSimplexModule(target);
@@ -3830,11 +3821,31 @@ int ProcessManager::disableModule(string target, bool manualFlag)
//distribute config file //distribute config file
distributeConfigFile("system"); distributeConfigFile("system");
processManager.reinitProcesses();
log.writeLog(__LINE__, "disableModule successfully complete for " + target, LOG_TYPE_DEBUG); log.writeLog(__LINE__, "disableModule successfully complete for " + target, LOG_TYPE_DEBUG);
return API_SUCCESS; return API_SUCCESS;
} }
void ProcessManager::reinitProcesses()
{
log.writeLog(__LINE__, "reinitProcesses... ", LOG_TYPE_DEBUG);
restartProcessType("DBRMControllerNode");
reinitProcessType("ExeMgr");
reinitProcessType("DBRMWorkerNode");
restartProcessType("WriteEngineServer");
sleep(1);
startProcessType("DDLProc");
sleep(1);
startProcessType("DMLProc");
reinitProcessType("DDLProc");
reinitProcessType("DMLProc");
log.writeLog(__LINE__, "reinitProcesses complete", LOG_TYPE_DEBUG);
}
/****************************************************************************************** /******************************************************************************************
* @brief recycleProcess * @brief recycleProcess
* *
@@ -10039,10 +10050,14 @@ int ProcessManager::OAMParentModuleChange()
vector<string>::iterator pt1 = downModuleList.begin(); vector<string>::iterator pt1 = downModuleList.begin();
for ( ; pt1 != downModuleList.end() ; pt1++) for ( ; pt1 != downModuleList.end() ; pt1++)
{
// Don't do this again for downOAMParentName we just did it 3 lines ago
if (*pt1 != downOAMParentName)
{ {
disableModule(*pt1, false); disableModule(*pt1, false);
processManager.setProcessStates(*pt1, oam::AUTO_OFFLINE); processManager.setProcessStates(*pt1, oam::AUTO_OFFLINE);
} }
}
//distribute config file //distribute config file
distributeConfigFile("system"); distributeConfigFile("system");
@@ -10090,9 +10105,6 @@ int ProcessManager::OAMParentModuleChange()
sleep(2); sleep(2);
} }
//set recycle process
processManager.recycleProcess(downOAMParentName);
//restart/reinit processes to force their release of the controller node port //restart/reinit processes to force their release of the controller node port
if ( ( config.ServerInstallType() == oam::INSTALL_COMBINE_DM_UM_PM) && if ( ( config.ServerInstallType() == oam::INSTALL_COMBINE_DM_UM_PM) &&
( moduleNameList.size() <= 0 && config.moduleType() == "pm") ) ( moduleNameList.size() <= 0 && config.moduleType() == "pm") )
@@ -10189,9 +10201,6 @@ int ProcessManager::OAMParentModuleChange()
processManager.setMySQLReplication(devicenetworklist, config.moduleName()); processManager.setMySQLReplication(devicenetworklist, config.moduleName());
} }
//set query system state not ready
processManager.setQuerySystemState(true);
// clear alarm // clear alarm
aManager.sendAlarmReport(config.moduleName().c_str(), MODULE_SWITCH_ACTIVE, CLEAR); aManager.sendAlarmReport(config.moduleName().c_str(), MODULE_SWITCH_ACTIVE, CLEAR);
@@ -10363,8 +10372,6 @@ std::string ProcessManager::getStandbyModule()
//not gluster, check by status //not gluster, check by status
try try
{ {
oam.getProcessStatus(systemprocessstatus);
for ( unsigned int i = 0 ; i < systemprocessstatus.processstatus.size(); i++) for ( unsigned int i = 0 ; i < systemprocessstatus.processstatus.size(); i++)
{ {
if ( systemprocessstatus.processstatus[i].ProcessName == "ProcessManager" && if ( systemprocessstatus.processstatus[i].ProcessName == "ProcessManager" &&

View File

@@ -299,6 +299,10 @@ public:
*/ */
int disableModule(std::string target, bool manualFlag); int disableModule(std::string target, bool manualFlag);
/**
*@brief reinit Processes trying to replace recycleProcess
*/
void reinitProcesses();
/** /**
*@brief recycle Processes *@brief recycle Processes
*/ */