diff --git a/procmgr/main.cpp b/procmgr/main.cpp index 630f97a73..94844092f 100644 --- a/procmgr/main.cpp +++ b/procmgr/main.cpp @@ -49,6 +49,7 @@ string cloud; bool amazon = false; string PMInstanceType; string UMInstanceType; +string AmazonPMFailover = "y"; string GlusterConfig = "n"; bool rootUser = true; string USER = "root"; @@ -143,6 +144,7 @@ int main(int argc, char **argv) { oam.getSystemConfig("PMInstanceType", PMInstanceType); oam.getSystemConfig("UMInstanceType", UMInstanceType); +// oam.getSystemConfig("AmazonPMFailover", AmazonPMFailover); amazon = true; } @@ -1279,8 +1281,9 @@ void pingDeviceThread() int status; // if pm, move dbroots back to pm - if ( ( moduleName.find("pm") == 0 ) || - ( moduleName.find("pm") == 0 && downActiveOAMModule ) ) { + if ( ( moduleName.find("pm") == 0 && !amazon ) || + ( moduleName.find("pm") == 0 && amazon && downActiveOAMModule ) || + ( moduleName.find("pm") == 0 && amazon && AmazonPMFailover == "y") ) { //restart to get the versionbuffer files closed so it can be unmounted processManager.restartProcessType("WriteEngineServer", moduleName); @@ -1556,8 +1559,8 @@ void pingDeviceThread() aManager.sendAlarmReport(moduleName.c_str(), MODULE_DOWN_AUTO, SET); // if pm, move dbroots back to pm - if ( ( moduleName.find("pm") == 0 ) || - ( moduleName.find("pm") == 0 && downActiveOAMModule ) ) { + if ( ( moduleName.find("pm") == 0 && !amazon ) || + ( moduleName.find("pm") == 0 && amazon && downActiveOAMModule ) ) { //move dbroots to other modules try { log.writeLog(__LINE__, "Call autoMovePmDbroot", LOG_TYPE_DEBUG); @@ -1590,9 +1593,9 @@ void pingDeviceThread() log.writeLog(__LINE__, "Module failed to auto start: " + moduleName, LOG_TYPE_CRITICAL); -// if ( amazon ) -// processManager.setSystemState(oam::FAILED); -// else + if ( amazon ) + processManager.setSystemState(oam::FAILED); + else processManager.setSystemState(oam::ACTIVE); //set query system state ready @@ -1660,8 +1663,8 @@ void pingDeviceThread() log.writeLog(__LINE__, "'dbrmctl reload' done", LOG_TYPE_DEBUG); // if pm, move dbroots to other pms -// if ( !amazon || -// ( amazon ) ) { + if ( !amazon || + ( amazon && AmazonPMFailover == "y") ) { if( moduleName.find("pm") == 0 ) { try { log.writeLog(__LINE__, "Call autoMovePmDbroot", LOG_TYPE_DEBUG); @@ -1680,7 +1683,7 @@ void pingDeviceThread() log.writeLog(__LINE__, "EXCEPTION ERROR on autoMovePmDbroot: Caught unknown exception!", LOG_TYPE_ERROR); } } -// } + } // if Cloud Instance // state = running, then instance is rebooting, monitor for recovery diff --git a/procmgr/processmanager.cpp b/procmgr/processmanager.cpp index e77b651ea..4a770fd02 100644 --- a/procmgr/processmanager.cpp +++ b/procmgr/processmanager.cpp @@ -53,6 +53,7 @@ extern string USER; extern bool HDFS; extern string localHostName; extern string PMwithUM; +extern string AmazonPMFailover; typedef map moduleList; extern moduleList moduleInfoList; @@ -4398,7 +4399,6 @@ int ProcessManager::addModule(oam::DeviceNetworkList devicenetworklist, std::str string mysqldPackage; string calpontPackage1; string calpontPackage2; - string version; string systemID; string packageType = "rpm"; @@ -4431,7 +4431,6 @@ int ProcessManager::addModule(oam::DeviceNetworkList devicenetworklist, std::str homedir = p; } - version = systemsoftware.Version + "-" + systemsoftware.Release; if ( packageType != "binary") { string separator = "-"; if ( packageType == "deb" ) @@ -5026,7 +5025,7 @@ int ProcessManager::addModule(oam::DeviceNetworkList devicenetworklist, std::str if ( packageType != "binary" ) { log.writeLog(__LINE__, "addModule - user_installer run for " + remoteModuleName, LOG_TYPE_DEBUG); - string cmd = installDir + "/bin/user_installer.sh " + remoteModuleName + " " + remoteModuleIP + " " + password + " " + version + " initial " + packageType + " --nodeps none " + MySQLPort + " 1 > /tmp/user_installer.log"; + string cmd = installDir + "/bin/user_installer.sh " + remoteModuleName + " " + remoteModuleIP + " " + password + " " + calpontPackage + " " + calpontPackage1 + " " + calpontPackage2 + " " + mysqlPackage + " " + mysqldPackage + " initial " + packageType + " --nodeps none " + MySQLPort + " 1 > /tmp/user_installer.log"; log.writeLog(__LINE__, "addModule cmd: " + cmd, LOG_TYPE_DEBUG); @@ -5064,7 +5063,7 @@ int ProcessManager::addModule(oam::DeviceNetworkList devicenetworklist, std::str if ( remoteModuleType == "pm" ) { if ( packageType != "binary" ) { log.writeLog(__LINE__, "addModule - performance_installer run for " + remoteModuleName, LOG_TYPE_DEBUG); - string cmd = installDir + "/bin/performance_installer.sh " + remoteModuleName + " " + remoteModuleIP + " " + password + " " + version + " initial " + packageType + " --nodeps 1 > /tmp/performance_installer.log"; + string cmd = installDir + "/bin/performance_installer.sh " + remoteModuleName + " " + remoteModuleIP + " " + password + " " + calpontPackage + " " + calpontPackage1 + " " + calpontPackage2 + " " + mysqlPackage + " " + mysqldPackage + " initial " + packageType + " --nodeps 1 > /tmp/performance_installer.log"; log.writeLog(__LINE__, "addModule cmd: " + cmd, LOG_TYPE_DEBUG); rtnCode = system(cmd.c_str()); @@ -8625,7 +8624,7 @@ int ProcessManager::OAMParentModuleChange() noAckCount = 0; //if Amazon Parent PM is restarting, monitor when back active and take needed actions -/* if (amazonParentRestart) + if (amazonParentRestart) { log.writeLog(__LINE__, "Amazon Parent pinging, waiting until it's active", LOG_TYPE_DEBUG); sleep(60); @@ -8658,7 +8657,7 @@ int ProcessManager::OAMParentModuleChange() sleep(5); } } -*/ + sleep(1); break; } @@ -8836,7 +8835,7 @@ int ProcessManager::OAMParentModuleChange() {} //do amazon failover -/* if (amazon && AmazonPMFailover == "n") + if (amazon && AmazonPMFailover == "n") { log.writeLog(__LINE__, " ", LOG_TYPE_DEBUG); log.writeLog(__LINE__, "*** OAMParentModule outage, AmazonPMFailover not set, wating for instance to restart ***", LOG_TYPE_DEBUG); @@ -8911,7 +8910,7 @@ int ProcessManager::OAMParentModuleChange() //clear and go monitor again failover = false; - }*/ + } } } @@ -9186,7 +9185,9 @@ int ProcessManager::OAMParentModuleChange() } //restart DDLProc/DMLProc to perform any rollbacks, if needed - if ( ( config.ServerInstallType() != oam::INSTALL_COMBINE_DM_UM_PM ) ) { + //dont rollback in amazon, wait until down pm recovers + if ( ( config.ServerInstallType() != oam::INSTALL_COMBINE_DM_UM_PM ) + && !amazon ) { processManager.restartProcessType("DDLProc", config.moduleName()); sleep(1); processManager.restartProcessType("DMLProc", config.moduleName()); @@ -10066,20 +10067,7 @@ int ProcessManager::setMySQLReplication(oam::DeviceNetworkList devicenetworklist if ( moduleType == "pm" && PMwithUM == "n" ) continue; } - - //check status, skip if module is offline - int opState = oam::ACTIVE; - bool degraded; - try { - oam.getModuleStatus(remoteModuleName, opState, degraded); - } - catch(...) - { - } - - if (opState != oam::ACTIVE) - continue; - + ByteStream msg1; ByteStream::byte requestID = oam::SLAVEREP; if ( !enable ) {