diff --git a/oam/oamcpp/liboamcpp.cpp b/oam/oamcpp/liboamcpp.cpp index 9a405e978..7483ca239 100644 --- a/oam/oamcpp/liboamcpp.cpp +++ b/oam/oamcpp/liboamcpp.cpp @@ -5295,6 +5295,7 @@ namespace oam dbrootList dbroot1; dbroot1.push_back(*pt1); + bool returnDbRoot = false; //send msg to unmount dbroot if module is not offline int opState; @@ -5306,7 +5307,6 @@ namespace oam {} if (opState != oam::AUTO_OFFLINE || opState != oam::AUTO_DISABLED) { -// bool unmountPass = true; try { mountDBRoot(dbroot1, false); @@ -5316,13 +5316,8 @@ namespace oam writeLog("ERROR: dbroot failed to unmount", LOG_TYPE_ERROR ); cout << endl << "ERROR: umountDBRoot api failure" << endl; exceptionControl("manualMovePmDbroot", API_FAILURE); -// unmountPass = false; } -// if ( !unmountPass) { -// dbrootlist.erase(pt1); -// break; -// } } //check for amazon moving required @@ -5340,38 +5335,79 @@ namespace oam //if Gluster, do the assign command if ( DataRedundancyConfig == "y") { - try { + try + { string errmsg; int ret = glusterctl(oam::GLUSTER_ASSIGN, *pt1, toPM, errmsg); - if ( ret != 0 ) + if ( ret == 0 ) + { + todbrootConfigList.push_back(*pt2); + residedbrootConfigList.erase(pt2); + } + else { cerr << "FAILURE: Error assigning gluster dbroot# " + *pt1 + " to pm" + toPMID + ", error: " + errmsg << endl; - exceptionControl("manualMovePmDbroot", API_FAILURE); + writeLog("FAILURE: Error assigning gluster dbroot# " + *pt1 + " to pm" + toPMID + ", error: " + errmsg, LOG_TYPE_ERROR ); + returnDbRoot = true; } } catch (exception& e) { cout << endl << "**** glusterctl API exception: " << e.what() << endl; cerr << "FAILURE: Error assigning gluster dbroot# " + *pt1 + " to pm" + toPMID << endl; - exceptionControl("manualMovePmDbroot", API_FAILURE); + writeLog("FAILURE: Error assigning gluster dbroot# " + *pt1 + " to pm" + toPMID, LOG_TYPE_ERROR ); + returnDbRoot = true; } catch (...) { cout << endl << "**** glusterctl API exception: UNKNOWN" << endl; cerr << "FAILURE: Error assigning gluster dbroot# " + *pt1 + " to pm" + toPMID << endl; - exceptionControl("manualMovePmDbroot", API_FAILURE); + writeLog("FAILURE: Error assigning gluster dbroot# " + *pt1 + " to pm" + toPMID, LOG_TYPE_ERROR ); + returnDbRoot = true; } } - todbrootConfigList.push_back(*pt2); - - residedbrootConfigList.erase(pt2); - + if (returnDbRoot) + { + // something went wrong return it back to original owner + try + { + string errmsg; + writeLog("reassigning gluster dbroot# " + *pt1 + " to pm" + residePMID, LOG_TYPE_ERROR ); + int ret = glusterctl(oam::GLUSTER_ASSIGN, *pt1, residePM, errmsg); + if ( ret != 0 ) + { + cerr << "FAILURE: Error reassigning gluster dbroot# " + *pt1 + " to pm" + residePMID + ", error: " + errmsg << endl; + writeLog("FAILURE: Error reassigning gluster dbroot# " + *pt1 + " to pm" + residePMID + ", error: " + errmsg, LOG_TYPE_ERROR ); + exceptionControl("manualMovePmDbroot", API_INVALID_STATE); + } + mountDBRoot(dbroot1); + //get updated Columnstore.xml distributed + distributeConfigFile("system"); + return; + } + catch (exception& e) + { + cout << endl << "**** glusterctl API exception: " << e.what() << endl; + cerr << "FAILURE: Error reassigning gluster dbroot# " + *pt1 + " to pm" + residePMID << endl; + writeLog("FAILURE: Error reassigning gluster dbroot# " + *pt1 + " to pm" + residePMID, LOG_TYPE_ERROR ); + exceptionControl("manualMovePmDbroot", API_INVALID_STATE); + } + catch (...) + { + cout << endl << "**** glusterctl API exception: UNKNOWN" << endl; + cerr << "FAILURE: Error reassigning gluster dbroot# " + *pt1 + " to pm" + residePMID << endl; + writeLog("FAILURE: Error reassigning gluster dbroot# " + *pt1 + " to pm" + residePMID, LOG_TYPE_ERROR ); + exceptionControl("manualMovePmDbroot", API_INVALID_STATE); + } + } break; } } } + + //set the 2 pms dbroot config try { @@ -5381,7 +5417,7 @@ namespace oam { writeLog("ERROR: setPmDbrootConfig api failure for pm" + residePMID , LOG_TYPE_ERROR ); cout << endl << "ERROR: setPmDbrootConfig api failure for pm" + residePMID << endl; - exceptionControl("manualMovePmDbroot", API_FAILURE); + exceptionControl("manualMovePmDbroot", API_INVALID_STATE); } try @@ -5392,7 +5428,7 @@ namespace oam { writeLog("ERROR: setPmDbrootConfig api failure for pm" + toPMID , LOG_TYPE_ERROR ); cout << endl << "ERROR: setPmDbrootConfig api failure for pm" + toPMID << endl; - exceptionControl("manualMovePmDbroot", API_FAILURE); + exceptionControl("manualMovePmDbroot", API_INVALID_STATE); } //send msg to mount dbroot @@ -5980,7 +6016,7 @@ namespace oam } if (!found) { - writeLog("No dbroots found in ../Calpont/local/moveDbrootTransactionLog", LOG_TYPE_DEBUG ); + writeLog("No dbroots found in " + InstallDir + "/moveDbrootTransactionLog", LOG_TYPE_DEBUG ); cout << "No dbroots found in " << fileName << endl; } @@ -6518,32 +6554,7 @@ namespace oam for( ; pt3 != dbrootlist.end() ; pt3++) { todbrootConfigList.push_back(*pt3); - -/* if ( DataRedundancyConfig == "y") - { - try { - string errmsg; - int ret = glusterctl(oam::GLUSTER_ASSIGN, itoa(*pt3), toPM, errmsg); - if ( ret != 0 ) - { - cerr << "FAILURE: Error assigning gluster dbroot# " + itoa(*pt3) + " to pm" + toPMID + ", error: " + errmsg << endl; - exceptionControl("assignPmDbrootConfig", API_FAILURE); - } - } - catch (exception& e) - { - cout << endl << "**** glusterctl API exception: " << e.what() << endl; - cerr << "FAILURE: Error assigning gluster dbroot# " + itoa(*pt3) + " to pm" + toPMID << endl; - exceptionControl("assignPmDbrootConfig", API_FAILURE); - } - catch (...) - { - cout << endl << "**** glusterctl API exception: UNKNOWN" << endl; - cerr << "FAILURE: Error assigning gluster dbroot# " + itoa(*pt3) + " to pm" + toPMID << endl; - exceptionControl("assignPmDbrootConfig", API_FAILURE); - } - } -*/ } + } try { @@ -6961,12 +6972,14 @@ namespace oam { cout << endl << "**** glusterctl API exception: " << e.what() << endl; cerr << "FAILURE: Error assigning gluster dbroot# " + itoa(dbrootID) << endl; + writeLog("FAILURE: Error assigning gluster dbroot# " + itoa(dbrootID), LOG_TYPE_ERROR ); exceptionControl("removeDbroot", API_FAILURE); } catch (...) { cout << endl << "**** glusterctl API exception: UNKNOWN" << endl; cerr << "FAILURE: Error assigning gluster dbroot# " + itoa(dbrootID) << endl; + writeLog("FAILURE: Error assigning gluster dbroot# " + itoa(dbrootID), LOG_TYPE_ERROR ); exceptionControl("removeDbroot", API_FAILURE); } } diff --git a/procmgr/main.cpp b/procmgr/main.cpp index 2747fda16..995c851c2 100644 --- a/procmgr/main.cpp +++ b/procmgr/main.cpp @@ -1574,7 +1574,7 @@ void pingDeviceThread() { // no dbroots, fail module log.writeLog(__LINE__, "autoUnMovePmDbroot left no dbroots mounted, failing module restart: " + moduleName, LOG_TYPE_WARNING); - + //Issue an alarm aManager.sendAlarmReport(moduleName.c_str(), MODULE_DOWN_AUTO, SET); @@ -1597,7 +1597,7 @@ void pingDeviceThread() //set query system state ready processManager.setQuerySystemState(true); - break; + goto break_case; } } catch(...) @@ -1619,25 +1619,24 @@ void pingDeviceThread() if ( retry == 5 ) { log.writeLog(__LINE__, "autoUnMovePmDbroot: Failed. Fail Module", LOG_TYPE_WARNING); - + log.writeLog(__LINE__, "System DBRM READ ONLY - Verify dbroot mounts.", LOG_TYPE_WARNING); //Issue an alarm aManager.sendAlarmReport(moduleName.c_str(), MODULE_DOWN_AUTO, SET); //set module to disable state processManager.disableModule(moduleName, true); + // Need to do something here to verify data mounts before resuming + // Best to assume if we reach this you need to put into readonly and verify all dbroots are mounted + //call dbrm control - oam.dbrmctl("reload"); - log.writeLog(__LINE__, "'dbrmctl reload' done", LOG_TYPE_DEBUG); - - // resume the dbrm - oam.dbrmctl("resume"); - log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG); + oam.dbrmctl("readonly"); + log.writeLog(__LINE__, "'dbrmctl readonly' done", LOG_TYPE_DEBUG); //clear count moduleInfoList[moduleName] = 0; - processManager.setSystemState(oam::ACTIVE); + processManager.setSystemState(oam::DEGRADED); //set query system state ready processManager.setQuerySystemState(true); @@ -2358,6 +2357,7 @@ void pingDeviceThread() } } //end of for loop } + break_case: // check and take action if LAN outage is flagged if (LANOUTAGESUPPORT && !LANOUTAGEACTIVE && LOCALNICDOWN) diff --git a/procmgr/processmanager.cpp b/procmgr/processmanager.cpp index 0a054f9c3..3cc094a0a 100755 --- a/procmgr/processmanager.cpp +++ b/procmgr/processmanager.cpp @@ -6244,7 +6244,7 @@ int ProcessManager::sendMsgProcMon( std::string module, ByteStream msg, int requ string IPAddr = sysConfig->getConfig(msgPort, "IPAddr"); if ( IPAddr == oam::UnassignedIpAddr ) { - log.writeLog(__LINE__, "sendMsgProcMon ping failure", LOG_TYPE_ERROR); + log.writeLog(__LINE__, "sendMsgProcMon ping failure " + module + " " + IPAddr, LOG_TYPE_ERROR); return oam::API_SUCCESS; } @@ -6253,7 +6253,7 @@ int ProcessManager::sendMsgProcMon( std::string module, ByteStream msg, int requ string cmd = cmdLine + IPAddr + cmdOption; if ( system(cmd.c_str()) != 0) { //ping failure - log.writeLog(__LINE__, "sendMsgProcMon ping failure", LOG_TYPE_ERROR); + log.writeLog(__LINE__, "sendMsgProcMon ping failure " + module + " " + IPAddr, LOG_TYPE_ERROR); return oam::API_SUCCESS; } } diff --git a/procmon/processmonitor.cpp b/procmon/processmonitor.cpp index 754d6ccf2..8b7b13165 100644 --- a/procmon/processmonitor.cpp +++ b/procmon/processmonitor.cpp @@ -6107,10 +6107,13 @@ int ProcessMonitor::glusterAssign(std::string dbrootID) command = "sudo mount -tglusterfs -odirect-io-mode=enable " + moduleIPAddr + ":/dbroot" + dbrootID + " " + startup::StartUp::installDir() + "/data" + dbrootID + " > /tmp/glusterAssign.txt 2>&1"; } + int ret = system(command.c_str()); if ( WEXITSTATUS(ret) != 0 ) { + log.writeLog(__LINE__, "glusterAssign mount failure: dbroot: " + dbrootID + " error: " + oam.itoa(WEXITSTATUS(ret)), LOG_TYPE_ERROR); + ifstream in("/tmp/glusterAssign.txt"); in.seekg(0, std::ios::end); int size = in.tellg(); @@ -6151,9 +6154,13 @@ int ProcessMonitor::glusterUnassign(std::string dbrootID) { command = "sudo umount -f " + startup::StartUp::installDir() + "/data" + dbrootID + " > /tmp/glusterUnassign.txt 2>&1"; } + int ret = system(command.c_str()); + if ( WEXITSTATUS(ret) != 0 ) { + log.writeLog(__LINE__, "glusterUnassign mount failure: dbroot: " + dbrootID + " error: " + oam.itoa(WEXITSTATUS(ret)), LOG_TYPE_ERROR); + ifstream in("/tmp/glusterUnassign.txt"); in.seekg(0, std::ios::end); int size = in.tellg();