From e5f18964f06e8624bf27bea088fdee792447c40c Mon Sep 17 00:00:00 2001 From: David Hill Date: Tue, 4 Sep 2018 16:41:44 -0500 Subject: [PATCH 1/4] MCOL-1523 --- oam/install_scripts/columnstoreAlias | 3 + oamapps/mcsadmin/mcsadmin.cpp | 30 +++- oamapps/postConfigure/postConfigure.cpp | 188 +++++++++++------------- procmgr/processmanager.cpp | 149 ++++++++++++------- 4 files changed, 212 insertions(+), 158 deletions(-) diff --git a/oam/install_scripts/columnstoreAlias b/oam/install_scripts/columnstoreAlias index cd225c1a9..255eb7e7e 100644 --- a/oam/install_scripts/columnstoreAlias +++ b/oam/install_scripts/columnstoreAlias @@ -10,5 +10,8 @@ alias core='cd /var/log/mariadb/columnstore/corefiles' alias tmsg='tail -f /var/log/messages' alias tdebug='tail -f /var/log/mariadb/columnstore/debug.log' alias tinfo='tail -f /var/log/mariadb/columnstore/info.log' +alias terror='tail -f /var/log/mariadb/columnstore/err.log' +alias twarning='tail -f /var/log/mariadb/columnstore/warning.log' +alias tcrit='tail -f /var/log/mariadb/columnstore/crit.log' alias dbrm='cd /usr/local/mariadb/columnstore/data1/systemFiles/dbrm' alias module='cat /usr/local/mariadb/columnstore/local/module' diff --git a/oamapps/mcsadmin/mcsadmin.cpp b/oamapps/mcsadmin/mcsadmin.cpp index 935d081cc..6090058d4 100644 --- a/oamapps/mcsadmin/mcsadmin.cpp +++ b/oamapps/mcsadmin/mcsadmin.cpp @@ -7036,15 +7036,33 @@ int processCommand(string* arguments) if (systemstatus.SystemOpState == oam::ACTIVE ) { try { - cout << endl << " Starting Modules" << endl; - oam.startModule(devicenetworklist, ackTemp); +// cout << endl << " Starting Modules" << endl; +// oam.startModule(devicenetworklist, ackTemp); //reload DBRM with new configuration, needs to be done here after startModule - cmd = startup::StartUp::installDir() + "/bin/dbrmctl reload > /dev/null 2>&1"; - system(cmd.c_str()); - sleep(15); +// cmd = startup::StartUp::installDir() + "/bin/dbrmctl reload > /dev/null 2>&1"; +// system(cmd.c_str()); +// sleep(15); - cout << " Successful start of Modules " << endl; +// cout << " Successful start of Modules " << endl; + + cout << endl << " Restarting System "; + int returnStatus = oam.restartSystem(gracefulTemp, ackTemp); + switch (returnStatus) + { + case API_SUCCESS: + if ( waitForActive() ) + cout << endl << " Successful restart of System " << endl << endl; + else + cout << endl << "**** restartSystem Failed : check log files" << endl; + break; + case API_CANCELLED: + cout << endl << " Restart of System canceled" << endl << endl; + break; + default: + cout << endl << "**** restartSystem Failed : Check system logs" << endl; + break; + } } catch (exception& e) { diff --git a/oamapps/postConfigure/postConfigure.cpp b/oamapps/postConfigure/postConfigure.cpp index 98227da9d..e69677039 100644 --- a/oamapps/postConfigure/postConfigure.cpp +++ b/oamapps/postConfigure/postConfigure.cpp @@ -1247,26 +1247,16 @@ int main(int argc, char *argv[]) //amazon install setup check bool amazonInstall = false; string cloud = oam::UnassignedName; - system("aws --version > /tmp/amazon.log 2>&1"); - - ifstream in("/tmp/amazon.log"); - - in.seekg(0, std::ios::end); - int size = in.tellg(); - if ( size == 0 || oam.checkLogStatus("/tmp/amazon.log", "not found")) + + if (!multi_server_quick_install) { - // not running on amazon with ec2-api-tools - if (amazon_quick_install) - { - cout << "ERROR: Amazon Quick Installer was specified, bu the AMazon CLI API packages isnt installed, exiting" << endl; - exit(1); - } + system("aws --version > /tmp/amazon.log 2>&1"); - amazonInstall = false; - } - else - { - if ( size == 0 || oam.checkLogStatus("/tmp/amazon.log", "not installed")) + ifstream in("/tmp/amazon.log"); + + in.seekg(0, std::ios::end); + int size = in.tellg(); + if ( size == 0 || oam.checkLogStatus("/tmp/amazon.log", "not found")) { // not running on amazon with ec2-api-tools if (amazon_quick_install) @@ -1278,9 +1268,23 @@ int main(int argc, char *argv[]) amazonInstall = false; } else - amazonInstall = true; - } + { + if ( size == 0 || oam.checkLogStatus("/tmp/amazon.log", "not installed")) + { + // not running on amazon with ec2-api-tools + if (amazon_quick_install) + { + cout << "ERROR: Amazon Quick Installer was specified, bu the AMazon CLI API packages isnt installed, exiting" << endl; + exit(1); + } + amazonInstall = false; + } + else + amazonInstall = true; + } + } + try { cloud = sysConfig->getConfig(InstallSection, "Cloud"); } @@ -3090,7 +3094,9 @@ int main(int argc, char *argv[]) //check if dbrm data resides in older directory path and inform user if it does dbrmDirCheck(); - if ( IserverTypeInstall == oam::INSTALL_COMBINE_DM_UM_PM && pmNumber == 1) { + if ( ( IserverTypeInstall == oam::INSTALL_COMBINE_DM_UM_PM ) || + ( (IserverTypeInstall != oam::INSTALL_COMBINE_DM_UM_PM) && pmwithum ) ) + { //run the mysql / mysqld setup scripts cout << endl << "===== Running the MariaDB ColumnStore MariaDB Server setup scripts =====" << endl << endl; @@ -3098,7 +3104,57 @@ int main(int argc, char *argv[]) // call the mysql setup scripts mysqlSetup(); - sleep(5); + sleep(3); + } + + if ( IserverTypeInstall != oam::INSTALL_COMBINE_DM_UM_PM || + pmNumber > 1 ) + { + if ( password.empty() ) + { + cout << endl; + cout << "Next step is to enter the password to access the other Servers." << endl; + cout << "This is either your password or you can default to using a ssh key" << endl; + cout << "If using a password, the password needs to be the same on all Servers." << endl << endl; + + if ( noPrompting ) { + cout << "Enter password, hit 'enter' to default to using a ssh key, or 'exit' > " << endl; + password = "ssh"; + } + else + { + while(true) + { + char *pass1, *pass2; + + pass1=getpass("Enter password, hit 'enter' to default to using a ssh key, or 'exit' > "); + if ( strcmp(pass1, "") == 0 ) { + password = "ssh"; + break; + } + + string p1 = pass1; + if ( p1 == "exit") + exit(0); + + pass2=getpass("Confirm password > "); + string p2 = pass2; + if ( p1 == p2 ) { + password = p2; + break; + } + else + cout << "Password mismatch, please re-enter" << endl; + } + + //add single quote for special characters + if ( password != "ssh" ) + { + password = "'" + password + "'"; + } + + } + } } int thread_id = 0; @@ -3116,7 +3172,7 @@ int main(int argc, char *argv[]) //skip interface with remote servers and perform install if ( !nonDistribute ) { - // + // // perform remote install of other servers in the system // cout << endl << "===== System Installation =====" << endl << endl; @@ -3173,67 +3229,8 @@ int main(int argc, char *argv[]) if( !pkgCheck(columnstorePackage) ) exit(1); - if ( password.empty() ) - { - cout << endl; - cout << "Next step is to enter the password to access the other Servers." << endl; - cout << "This is either your password or you can default to using a ssh key" << endl; - cout << "If using a password, the password needs to be the same on all Servers." << endl << endl; - } - - while(true) - { - char *pass1, *pass2; - - if ( noPrompting ) { - cout << "Enter password, hit 'enter' to default to using a ssh key, or 'exit' > " << endl; - if ( password.empty() ) - password = "ssh"; - break; - } - - //check for command line option password - if ( !password.empty() ) - break; - - pass1=getpass("Enter password, hit 'enter' to default to using a ssh key, or 'exit' > "); - if ( strcmp(pass1, "") == 0 ) { - password = "ssh"; - break; - } - - if ( pass1 == "exit") - exit(0); - - string p1 = pass1; - pass2=getpass("Confirm password > "); - string p2 = pass2; - if ( p1 == p2 ) { - password = p2; - break; - } - else - cout << "Password mismatch, please re-enter" << endl; - } - - //add single quote for special characters - if ( password != "ssh" ) - { - password = "'" + password + "'"; - } - checkSystemMySQLPort(mysqlPort, sysConfig, USER, password, childmodulelist, IserverTypeInstall, pmwithum); - if ( ( IserverTypeInstall == oam::INSTALL_COMBINE_DM_UM_PM ) || - ( (IserverTypeInstall != oam::INSTALL_COMBINE_DM_UM_PM) && pmwithum ) ) - { - cout << endl << "===== Running the MariaDB ColumnStore MariaDB ColumnStore setup scripts =====" << endl << endl; - - // call the mysql setup scripts - mysqlSetup(); - sleep(5); - } - string AmazonInstall = "0"; if ( amazonInstall ) AmazonInstall = "1"; @@ -3411,19 +3408,7 @@ int main(int argc, char *argv[]) cout << " DONE" << endl; } } - else - { - if ( ( IserverTypeInstall == oam::INSTALL_COMBINE_DM_UM_PM ) || - ( (IserverTypeInstall != oam::INSTALL_COMBINE_DM_UM_PM) && pmwithum ) ) - { - cout << endl << "===== Running the MariaDB ColumnStore MariaDB ColumnStore setup scripts =====" << endl << endl; - - // call the mysql setup scripts - mysqlSetup(); - sleep(5); - } - } - + //configure data redundancy if (DataRedundancy) { @@ -3641,9 +3626,6 @@ int main(int argc, char *argv[]) } //set mysql replication, if wasn't setup before on system -// if ( ( mysqlRep && pmwithum ) || -// ( mysqlRep && (umNumber > 1) ) || -// ( mysqlRep && (pmNumber > 1) && (IserverTypeInstall == oam::INSTALL_COMBINE_DM_UM_PM) ) ) if ( mysqlRep ) { cout << endl << "Run MariaDB ColumnStore Replication Setup.. "; @@ -3665,7 +3647,10 @@ int main(int argc, char *argv[]) cout << "Enter the following command to define MariaDB ColumnStore Alias Commands" << endl << endl; - cout << ". " + installDir + "/bin/columnstoreAlias" << endl << endl; + if ( !rootUser ) + cout << ". /etc/profile.d/columnstoreEnv.sh" << endl; + + cout << ". /etc/profile.d/columnstoreAlias.sh" << endl << endl; cout << "Enter 'mcsmysql' to access the MariaDB ColumnStore SQL console" << endl; cout << "Enter 'mcsadmin' to access the MariaDB ColumnStore Admin console" << endl << endl; @@ -3682,7 +3667,10 @@ int main(int argc, char *argv[]) cout << "Enter the following command to define MariaDB ColumnStore Alias Commands" << endl << endl; - cout << ". " + installDir + "/bin/columnstoreAlias" << endl << endl; + if ( !rootUser ) + cout << ". /etc/profile.d/columnstoreEnv.sh" << endl; + + cout << ". /etc/profile.d/columnstoreAlias.sh" << endl << endl; cout << "Enter 'mcsmysql' to access the MariaDB ColumnStore SQL console" << endl; cout << "Enter 'mcsadmin' to access the MariaDB ColumnStore Admin console" << endl << endl; diff --git a/procmgr/processmanager.cpp b/procmgr/processmanager.cpp index 0a054f9c3..89f9a145c 100755 --- a/procmgr/processmanager.cpp +++ b/procmgr/processmanager.cpp @@ -575,10 +575,12 @@ void processMSG(messageqcpp::IOSocket* cfIos) if ( count > 0 ) { + string module = oam::UnassignedName; for (int i = 0; i < count; i++) { msg >> value; devicenetworkconfig.DeviceName = value; + module = value; msg >> value; devicenetworkconfig.UserTempDeviceName = value; msg >> value; @@ -606,11 +608,24 @@ void processMSG(messageqcpp::IOSocket* cfIos) } if( status == API_SUCCESS) { + processManager.setSystemState(oam::BUSY_INIT); + + //set query system state not ready + processManager.setQuerySystemState(false); + + //set recycle process + processManager.recycleProcess(target, true); + //distribute config file processManager.distributeConfigFile("system"); + processManager.setSystemState(oam::ACTIVE); + + //set query system state ready + processManager.setQuerySystemState(true); + //call dbrm control - oam.dbrmctl("halt"); +/* oam.dbrmctl("halt"); log.writeLog(__LINE__, "'dbrmctl halt' done", LOG_TYPE_DEBUG); oam.dbrmctl("reload"); @@ -618,13 +633,7 @@ void processMSG(messageqcpp::IOSocket* cfIos) oam.dbrmctl("resume"); log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG); - -// processManager.restartProcessType("ExeMgr"); - - //setup MySQL Replication for started modules -// log.writeLog(__LINE__, "Setup MySQL Replication for module being started", LOG_TYPE_DEBUG); -// processManager.setMySQLReplication(startdevicenetworklist); - } +*/ } } else { @@ -829,8 +838,10 @@ void processMSG(messageqcpp::IOSocket* cfIos) if (opState == oam::MAN_OFFLINE || opState == oam::MAN_DISABLED || opState == oam::AUTO_DISABLED || opState == oam::AUTO_OFFLINE) { - oam.dbrmctl("halt"); - log.writeLog(__LINE__, "'dbrmctl halt' done", LOG_TYPE_DEBUG); + processManager.setSystemState(oam::BUSY_INIT); + + //set query system state not ready + processManager.setQuerySystemState(false); status = processManager.disableModule(moduleName, true); log.writeLog(__LINE__, "Disable Module Completed on " + moduleName, LOG_TYPE_INFO); @@ -839,14 +850,11 @@ void processMSG(messageqcpp::IOSocket* cfIos) //check for SIMPLEX Processes on mate might need to be started processManager.checkSimplexModule(moduleName); + + processManager.setSystemState(oam::ACTIVE); - //call dbrm control -// oam.dbrmctl("reload"); -// log.writeLog(__LINE__, "'dbrmctl reload' done", LOG_TYPE_DEBUG); - - // resume the dbrm - oam.dbrmctl("resume"); - log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG); + //set query system state ready + processManager.setQuerySystemState(true); } else { @@ -910,7 +918,7 @@ void processMSG(messageqcpp::IOSocket* cfIos) DeviceNetworkList::iterator listPT = devicenetworklist.begin(); - //stopModules being removed with the REMOVE option, which will stop process + // do stopmodule then enable for( ; listPT != devicenetworklist.end() ; listPT++) { string moduleName = (*listPT).DeviceName; @@ -933,6 +941,9 @@ void processMSG(messageqcpp::IOSocket* cfIos) } if (opState == oam::MAN_DISABLED) { + processManager.stopModule(moduleName, graceful, manualFlag); + log.writeLog(__LINE__, "stop Module Completed on " + moduleName, LOG_TYPE_INFO); + status = processManager.enableModule(moduleName, oam::MAN_OFFLINE); log.writeLog(__LINE__, "Enable Module Completed on " + moduleName, LOG_TYPE_INFO); } @@ -1246,6 +1257,9 @@ void processMSG(messageqcpp::IOSocket* cfIos) log.writeLog(__LINE__, "STOPSYSTEM: ACK back to sender"); } + //set query system state ready + processManager.setQuerySystemState(true); + startsystemthreadStop = false; break; @@ -2758,9 +2772,6 @@ void processMSG(messageqcpp::IOSocket* cfIos) log.writeLog(__LINE__, "MSG RECEIVED: Process Restarted on " + moduleName + "/" + processName); //set query system states not ready - BRM::DBRM dbrm; - dbrm.setSystemQueryReady(false); - processManager.setQuerySystemState(false); processManager.setSystemState(oam::BUSY_INIT); @@ -2841,12 +2852,14 @@ void processMSG(messageqcpp::IOSocket* cfIos) break; sleep(1); } - dbrm.setSystemQueryReady(true); + processManager.setQuerySystemState(true); + } // if a DDLProc was restarted, reinit DMLProc if( processName == "DDLProc") { processManager.reinitProcessType("DMLProc"); + processManager.setQuerySystemState(true); } //only run on auto process restart @@ -2893,9 +2906,7 @@ void processMSG(messageqcpp::IOSocket* cfIos) } } - //enable query stats - dbrm.setSystemQueryReady(true); - + //set query system states ready processManager.setQuerySystemState(true); processManager.setSystemState(oam::ACTIVE); @@ -3386,7 +3397,7 @@ int ProcessManager::disableModule(string target, bool manualFlag) /****************************************************************************************** * @brief recycleProcess * -* purpose: recyle process, generally after some disable module is run +* purpose: recyle process, done after disable/enable module * ******************************************************************************************/ void ProcessManager::recycleProcess(string module, bool enableModule) @@ -3410,48 +3421,65 @@ void ProcessManager::recycleProcess(string module, bool enableModule) //recycle DBRM processes in all cases restartProcessType("DBRMControllerNode"); restartProcessType("DBRMWorkerNode"); + sleep(5); restartProcessType("DMLProc"); return; } //recycle DBRM processes in all cases - restartProcessType("DBRMControllerNode", module); - restartProcessType("DBRMWorkerNode"); +// restartProcessType("DBRMControllerNode", module); +// restartProcessType("DBRMWorkerNode"); - - // only recycle dmlproc, if down/up module is non-parent UM - if ( ( moduleType == "um" ) && - ( PrimaryUMModuleName != module) ) + // only recycle ddl/dmlproc, if down/up module is non-parent UM +/* if ( ( moduleType == "um" ) && + if ( PrimaryUMModuleName != module) { + restartProcessType("DDLProc",module); restartProcessType("DMLProc",module); return; } - - if( PrimaryUMModuleName == module) - { - stopProcessType("DDLProc"); - stopProcessType("DMLProc"); - } +*/ +// if( PrimaryUMModuleName == module) +// { +// stopProcessType("DDLProc"); +// stopProcessType("DMLProc"); +// } + + stopProcessType("WriteEngineServer"); stopProcessType("ExeMgr"); + + stopProcessType("PrimProc"); - restartProcessType("PrimProc"); - sleep(1); + stopProcessType("DBRMControllerNode"); + stopProcessType("DBRMWorkerNode"); + + stopProcessType("DDLProc"); + stopProcessType("DMLProc"); - restartProcessType("mysqld"); + stopProcessType("mysqld"); - restartProcessType("WriteEngineServer"); - sleep(1); +// restartProcessType("mysqld"); + + startProcessType("DBRMControllerNode"); + startProcessType("DBRMWorkerNode"); + + startProcessType("PrimProc"); + sleep(5); + + startProcessType("WriteEngineServer"); + sleep(3); startProcessType("ExeMgr"); - sleep(1); startProcessType("DDLProc"); sleep(1); startProcessType("DMLProc"); + startProcessType("mysqld"); + return; } @@ -3500,8 +3528,8 @@ int ProcessManager::enableModule(string target, int state, bool failover) setStandbyModule(newStandbyModule); //set recycle process - if (!failover) - recycleProcess(target); +// if (!failover) +// recycleProcess(target); log.writeLog(__LINE__, "enableModule request for " + target + " completed", LOG_TYPE_DEBUG); @@ -3774,6 +3802,7 @@ void ProcessManager::setSystemState(uint16_t state) Oam oam; ALARMManager aManager; Configuration config; + ProcessManager processManager(config, log); log.writeLog(__LINE__, "Set System State = " + oamState[state], LOG_TYPE_DEBUG); @@ -3794,6 +3823,9 @@ void ProcessManager::setSystemState(uint16_t state) // Process Alarms string system = "System"; if( state == oam::ACTIVE ) { + //set query system states ready + processManager.setQuerySystemState(true); + //clear alarms if set aManager.sendAlarmReport(system.c_str(), SYSTEM_DOWN_AUTO, CLEAR); aManager.sendAlarmReport(system.c_str(), SYSTEM_DOWN_MANUAL, CLEAR); @@ -6244,7 +6276,7 @@ int ProcessManager::sendMsgProcMon( std::string module, ByteStream msg, int requ string IPAddr = sysConfig->getConfig(msgPort, "IPAddr"); if ( IPAddr == oam::UnassignedIpAddr ) { - log.writeLog(__LINE__, "sendMsgProcMon ping failure", LOG_TYPE_ERROR); + log.writeLog(__LINE__, "sendMsgProcMon ping failure " + module + " " + IPAddr, LOG_TYPE_ERROR); return oam::API_SUCCESS; } @@ -6253,7 +6285,7 @@ int ProcessManager::sendMsgProcMon( std::string module, ByteStream msg, int requ string cmd = cmdLine + IPAddr + cmdOption; if ( system(cmd.c_str()) != 0) { //ping failure - log.writeLog(__LINE__, "sendMsgProcMon ping failure", LOG_TYPE_ERROR); + log.writeLog(__LINE__, "sendMsgProcMon ping failure " + module + " " + IPAddr, LOG_TYPE_ERROR); return oam::API_SUCCESS; } } @@ -6490,12 +6522,22 @@ void ProcessManager::setQuerySystemState(bool set) try { dbrm.setSystemQueryReady(set); - log.writeLog(__LINE__, "setQuerySystemState successful", LOG_TYPE_DEBUG); + log.writeLog(__LINE__, "setSystemQueryReady successful", LOG_TYPE_DEBUG); + + try { + dbrm.setSystemReady(set); + log.writeLog(__LINE__, "setSystemReady successful", LOG_TYPE_DEBUG); + } + catch(...) + { + log.writeLog(__LINE__, "setSystemReady failed", LOG_TYPE_DEBUG); + log.writeLog(__LINE__, "setSystemReady failed", LOG_TYPE_ERROR); + } } catch(...) { - log.writeLog(__LINE__, "setQuerySystemState failed", LOG_TYPE_DEBUG); - log.writeLog(__LINE__, "setQuerySystemState failed", LOG_TYPE_ERROR); + log.writeLog(__LINE__, "setSystemQueryReady failed", LOG_TYPE_DEBUG); + log.writeLog(__LINE__, "setSystemQueryReady failed", LOG_TYPE_ERROR); } } @@ -6993,7 +7035,7 @@ void startSystemThread(oam::DeviceNetworkList Devicenetworklist) } //set query system state not ready - processManager.setQuerySystemState(true); + processManager.setQuerySystemState(false); // Bug 4554: Wait until DMLProc is finished with rollback if (status == oam::API_SUCCESS) @@ -7062,6 +7104,9 @@ void startSystemThread(oam::DeviceNetworkList Devicenetworklist) processManager.setSystemState(rtn); } + //set query system state ready + processManager.setQuerySystemState(true); + // exit thread log.writeLog(__LINE__, "startSystemThread Exit", LOG_TYPE_DEBUG); startsystemthreadStatus = status; From 8b0507b9872ce3946006e3de90448a39606b3fda Mon Sep 17 00:00:00 2001 From: David Hill Date: Wed, 5 Sep 2018 14:53:13 -0500 Subject: [PATCH 2/4] MCOL-1523 --- oamapps/mcsadmin/mcsadmin.cpp | 11 +-- oamapps/postConfigure/postConfigure.cpp | 2 +- procmgr/main.cpp | 10 +-- procmgr/processmanager.cpp | 113 ++++++++---------------- procmon/main.cpp | 22 ++--- 5 files changed, 52 insertions(+), 106 deletions(-) diff --git a/oamapps/mcsadmin/mcsadmin.cpp b/oamapps/mcsadmin/mcsadmin.cpp index 6090058d4..befcb68fa 100644 --- a/oamapps/mcsadmin/mcsadmin.cpp +++ b/oamapps/mcsadmin/mcsadmin.cpp @@ -7036,17 +7036,8 @@ int processCommand(string* arguments) if (systemstatus.SystemOpState == oam::ACTIVE ) { try { -// cout << endl << " Starting Modules" << endl; -// oam.startModule(devicenetworklist, ackTemp); - - //reload DBRM with new configuration, needs to be done here after startModule -// cmd = startup::StartUp::installDir() + "/bin/dbrmctl reload > /dev/null 2>&1"; -// system(cmd.c_str()); -// sleep(15); - -// cout << " Successful start of Modules " << endl; - cout << endl << " Restarting System "; + gracefulTemp = oam::FORCEFUL; int returnStatus = oam.restartSystem(gracefulTemp, ackTemp); switch (returnStatus) { diff --git a/oamapps/postConfigure/postConfigure.cpp b/oamapps/postConfigure/postConfigure.cpp index e69677039..fe7a3b337 100644 --- a/oamapps/postConfigure/postConfigure.cpp +++ b/oamapps/postConfigure/postConfigure.cpp @@ -3172,7 +3172,7 @@ int main(int argc, char *argv[]) //skip interface with remote servers and perform install if ( !nonDistribute ) { - // + // // perform remote install of other servers in the system // cout << endl << "===== System Installation =====" << endl << endl; diff --git a/procmgr/main.cpp b/procmgr/main.cpp index 2747fda16..5ef5113f1 100644 --- a/procmgr/main.cpp +++ b/procmgr/main.cpp @@ -1489,7 +1489,7 @@ void pingDeviceThread() if (moduleInfoList[moduleName] >= ModuleHeartbeatCount || opState == oam::DOWN || opState == oam::AUTO_DISABLED) { - log.writeLog(__LINE__, "Module alive, bring it back online: " + moduleName, LOG_TYPE_DEBUG); + log.writeLog(__LINE__, "*** Module alive, bring it back online: " + moduleName, LOG_TYPE_DEBUG); string PrimaryUMModuleName = config.moduleName(); try { @@ -1927,7 +1927,7 @@ void pingDeviceThread() { //Log failure, issue alarm, set moduleOpState Configuration config; - log.writeLog(__LINE__, "module is down: " + moduleName, LOG_TYPE_CRITICAL); + log.writeLog(__LINE__, "*** module is down: " + moduleName, LOG_TYPE_CRITICAL); //set query system state not ready BRM::DBRM dbrm; @@ -2013,9 +2013,6 @@ void pingDeviceThread() // resume the dbrm oam.dbrmctl("resume"); log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG); - - //set recycle process - processManager.recycleProcess(moduleName); } // return values = 'ip address' for running or rebooting, stopped or terminated @@ -2234,9 +2231,6 @@ void pingDeviceThread() oam.dbrmctl("resume"); log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG); - //set recycle process - processManager.recycleProcess(moduleName); - //enable query stats dbrm.setSystemQueryReady(true); diff --git a/procmgr/processmanager.cpp b/procmgr/processmanager.cpp index 89f9a145c..daacdbf10 100755 --- a/procmgr/processmanager.cpp +++ b/procmgr/processmanager.cpp @@ -619,21 +619,11 @@ void processMSG(messageqcpp::IOSocket* cfIos) //distribute config file processManager.distributeConfigFile("system"); - processManager.setSystemState(oam::ACTIVE); - //set query system state ready processManager.setQuerySystemState(true); - //call dbrm control -/* oam.dbrmctl("halt"); - log.writeLog(__LINE__, "'dbrmctl halt' done", LOG_TYPE_DEBUG); - - oam.dbrmctl("reload"); - log.writeLog(__LINE__, "'dbrmctl reload' done", LOG_TYPE_DEBUG); - - oam.dbrmctl("resume"); - log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG); -*/ } + processManager.setSystemState(oam::ACTIVE); + } } else { @@ -846,15 +836,10 @@ void processMSG(messageqcpp::IOSocket* cfIos) status = processManager.disableModule(moduleName, true); log.writeLog(__LINE__, "Disable Module Completed on " + moduleName, LOG_TYPE_INFO); - processManager.recycleProcess(moduleName); - - //check for SIMPLEX Processes on mate might need to be started - processManager.checkSimplexModule(moduleName); - - processManager.setSystemState(oam::ACTIVE); - //set query system state ready processManager.setQuerySystemState(true); + + processManager.setSystemState(oam::ACTIVE); } else { @@ -1611,6 +1596,7 @@ void processMSG(messageqcpp::IOSocket* cfIos) status = retStatus; } } + //now stop local module processManager.stopModule(config.moduleName(), graceful, manualFlag ); @@ -1627,7 +1613,7 @@ void processMSG(messageqcpp::IOSocket* cfIos) oam::DeviceNetworkList devicenetworklist; pthread_t startsystemthread; - pthread_create (&startsystemthread, NULL, (void*(*)(void*)) &startSystemThread, &devicenetworklist); + status = pthread_create (&startsystemthread, NULL, (void*(*)(void*)) &startSystemThread, &devicenetworklist); if ( status != 0 ) { log.writeLog(__LINE__, "STARTMODULE: pthread_create failed, return status = " + oam.itoa(status)); @@ -1636,20 +1622,19 @@ void processMSG(messageqcpp::IOSocket* cfIos) if (status == 0 && ackIndicator) { - // BUG 4554 We don't need the join because calpont console is now looking for "Active" - // We need to return the ack right away to let console know we got the message. -// pthread_join(startsystemthread, NULL); -// status = startsystemthreadStatus; + pthread_join(startsystemthread, NULL); + status = startsystemthreadStatus; } - - // setup MySQL Replication after switchover command -/* if (graceful == FORCEFUL) + + // setup MySQL Replication after FORCE restart command + if ( (status == API_SUCCESS) && + (graceful == oam::FORCEFUL) ) { - log.writeLog(__LINE__, "Setup MySQL Replication for restartSystem FORCE, used by switch-parent command", LOG_TYPE_DEBUG); + log.writeLog(__LINE__, "Setup MySQL Replication for restartSystem FORCE", LOG_TYPE_DEBUG); oam::DeviceNetworkList devicenetworklist; processManager.setMySQLReplication(devicenetworklist); } -*/ + log.writeLog(__LINE__, "RESTARTSYSTEM: Start System Request Completed", LOG_TYPE_INFO); } @@ -3277,6 +3262,7 @@ int ProcessManager::shutdownModule(string target, ByteStream::byte actionIndicat int ProcessManager::disableModule(string target, bool manualFlag) { Oam oam; + ProcessManager processManager(config, log); ModuleConfig moduleconfig; log.writeLog(__LINE__, "disableModule request for " + target, LOG_TYPE_DEBUG); @@ -3386,6 +3372,11 @@ int ProcessManager::disableModule(string target, bool manualFlag) if ( updateWorkerNodeconfig() != API_SUCCESS ) return API_FAILURE; + processManager.recycleProcess(target); + + //check for SIMPLEX Processes on mate might need to be started + processManager.checkSimplexModule(target); + //distribute config file distributeConfigFile("system"); @@ -3414,37 +3405,6 @@ void ProcessManager::recycleProcess(string module, bool enableModule) oam.getSystemConfig("PrimaryUMModuleName", PrimaryUMModuleName); } catch(...) {} - - // restart DBRM Process and DMLProc and return if enable module is being done - if (enableModule) - { - //recycle DBRM processes in all cases - restartProcessType("DBRMControllerNode"); - restartProcessType("DBRMWorkerNode"); - sleep(5); - - restartProcessType("DMLProc"); - return; - } - - //recycle DBRM processes in all cases -// restartProcessType("DBRMControllerNode", module); -// restartProcessType("DBRMWorkerNode"); - - // only recycle ddl/dmlproc, if down/up module is non-parent UM -/* if ( ( moduleType == "um" ) && - if ( PrimaryUMModuleName != module) - { - restartProcessType("DDLProc",module); - restartProcessType("DMLProc",module); - return; - } -*/ -// if( PrimaryUMModuleName == module) -// { -// stopProcessType("DDLProc"); -// stopProcessType("DMLProc"); -// } stopProcessType("WriteEngineServer"); @@ -3526,10 +3486,6 @@ int ProcessManager::enableModule(string target, int state, bool failover) if ( newStandbyModule == target) setStandbyModule(newStandbyModule); - - //set recycle process -// if (!failover) -// recycleProcess(target); log.writeLog(__LINE__, "enableModule request for " + target + " completed", LOG_TYPE_DEBUG); @@ -6518,15 +6474,15 @@ void ProcessManager::setQuerySystemState(bool set) Oam oam; BRM::DBRM dbrm; - log.writeLog(__LINE__, "setQuerySystemState = " + oam.itoa(set), LOG_TYPE_DEBUG); + log.writeLog(__LINE__, "setQuerySystemState called = " + oam.itoa(set), LOG_TYPE_DEBUG); try { dbrm.setSystemQueryReady(set); - log.writeLog(__LINE__, "setSystemQueryReady successful", LOG_TYPE_DEBUG); + log.writeLog(__LINE__, "setSystemQueryReady = " + oam.itoa(set), LOG_TYPE_DEBUG); try { dbrm.setSystemReady(set); - log.writeLog(__LINE__, "setSystemReady successful", LOG_TYPE_DEBUG); + log.writeLog(__LINE__, "setSystemReady = " + oam.itoa(set), LOG_TYPE_DEBUG); } catch(...) { @@ -7089,23 +7045,28 @@ void startSystemThread(oam::DeviceNetworkList Devicenetworklist) } if (DMLprocessstatus.ProcessOpState == oam::ACTIVE) { - rtn = oam::ACTIVE; + rtn = oam::ACTIVE; break; } if (DMLprocessstatus.ProcessOpState == oam::FAILED) { - rtn = oam::FAILED; + rtn = oam::FAILED; + status = oam::API_FAILURE; break; } - // wait some more - sleep(2); - } - processManager.setSystemState(rtn); + // wait some more + sleep(2); + } + + if ( rtn = oam::ACTIVE ) + //set query system state not ready + processManager.setQuerySystemState(true); + + processManager.setSystemState(rtn); } - - //set query system state ready - processManager.setQuerySystemState(true); + else + processManager.setSystemState(oam::FAILED); // exit thread log.writeLog(__LINE__, "startSystemThread Exit", LOG_TYPE_DEBUG); diff --git a/procmon/main.cpp b/procmon/main.cpp index d6edd4ac7..a30de1fa1 100644 --- a/procmon/main.cpp +++ b/procmon/main.cpp @@ -2451,17 +2451,17 @@ void processStatusMSG(messageqcpp::IOSocket* cfIos) } //if DMLProc set to ACTIVE, set system state to ACTIVE if in an INIT state - if ( processName == "DMLProc" && state == oam::ACTIVE ) - { - if ( fShmSystemStatus[0].OpState == oam::BUSY_INIT || - fShmSystemStatus[0].OpState == oam::MAN_INIT || - fShmSystemStatus[0].OpState == oam::AUTO_INIT ) - { - fShmSystemStatus[0].OpState = state; - memcpy(fShmSystemStatus[0].StateChangeDate, oam.getCurrentTime().c_str(), DATESIZE); - log.writeLog(__LINE__, "statusControl: REQUEST RECEIVED: Set System State = " + oamState[state], LOG_TYPE_DEBUG); - } - } +// if ( processName == "DMLProc" && state == oam::ACTIVE ) +// { +// if ( fShmSystemStatus[0].OpState == oam::BUSY_INIT || +// fShmSystemStatus[0].OpState == oam::MAN_INIT || +// fShmSystemStatus[0].OpState == oam::AUTO_INIT ) +// { +// fShmSystemStatus[0].OpState = state; +// memcpy(fShmSystemStatus[0].StateChangeDate, oam.getCurrentTime().c_str(), DATESIZE); +// log.writeLog(__LINE__, "statusControl: REQUEST RECEIVED: Set System State = " + oamState[state], LOG_TYPE_DEBUG); +// } +// } } break; From 21f108896d02d74a2e6ca9945d40f67e816dd176 Mon Sep 17 00:00:00 2001 From: David Hill Date: Wed, 12 Sep 2018 08:36:13 -0500 Subject: [PATCH 3/4] MCOL-1523 - fix compile issue --- procmon/main.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/procmon/main.cpp b/procmon/main.cpp index a3b8d52ef..ac7c761cc 100644 --- a/procmon/main.cpp +++ b/procmon/main.cpp @@ -2443,10 +2443,6 @@ void processStatusMSG(messageqcpp::IOSocket* cfIos) memcpy(fShmSystemStatus[0].StateChangeDate, oam.getCurrentTime().c_str(), DATESIZE); log.writeLog(__LINE__, "statusControl: REQUEST RECEIVED: Set System State = " + oamState[state], LOG_TYPE_DEBUG); } - - BRM::DBRM dbrm; - dbrm.setSystemQueryReady(true); - } } break; From 8ec02bfce5efd124a950e4be706b037df81f147f Mon Sep 17 00:00:00 2001 From: David Hill Date: Wed, 12 Sep 2018 14:31:23 -0500 Subject: [PATCH 4/4] MCOL-1423 --- procmgr/processmanager.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/procmgr/processmanager.cpp b/procmgr/processmanager.cpp index 97e42533f..42d1c167b 100755 --- a/procmgr/processmanager.cpp +++ b/procmgr/processmanager.cpp @@ -2846,10 +2846,13 @@ void processMSG(messageqcpp::IOSocket* cfIos) } - // if a DDLProc was restarted, reinit DMLProc + // if a DDLProc was restarted, restart DMLProc if( processName == "DDLProc") { processManager.reinitProcessType("DMLProc"); + //set query system states ready processManager.setQuerySystemState(true); + + processManager.setSystemState(oam::ACTIVE); } //only run on auto process restart @@ -2900,6 +2903,8 @@ void processMSG(messageqcpp::IOSocket* cfIos) processManager.setQuerySystemState(true); processManager.setSystemState(oam::ACTIVE); + + log.writeLog(__LINE__, "MSG RECEIVED: Process Restarted Completed"); } break;