From 9283635460a67451a52938a08f8f9c0745947679 Mon Sep 17 00:00:00 2001 From: David Hill Date: Mon, 5 Dec 2016 22:35:01 +0000 Subject: [PATCH] add more checks around the getsystems --- oam/install_scripts/columnstore | 2 +- oam/install_scripts/post-install | 10 +++++----- oam/install_scripts/pre-uninstall | 10 +++++----- oam/oamcpp/liboamcpp.cpp | 6 ++++-- oamapps/mcsadmin/mcsadmin.cpp | 2 +- procmgr/processmanager.cpp | 17 ++++++++++++++--- procmon/main.cpp | 20 +++++++++++--------- procmon/processmonitor.cpp | 7 ++++++- 8 files changed, 47 insertions(+), 27 deletions(-) diff --git a/oam/install_scripts/columnstore b/oam/install_scripts/columnstore index 315256cd9..376a23671 100644 --- a/oam/install_scripts/columnstore +++ b/oam/install_scripts/columnstore @@ -81,7 +81,7 @@ start() { exit 0 fi - (sudo mkdir -p /var/lock/subsys && touch /var/lock/subsys/columnstore) >/dev/null 2>&1 + (sudo mkdir -p /var/lock/subsys && sudo chmod 777 /var/lock/subsys && sudo touch /var/lock/subsys/columnstore) >/dev/null 2>&1 if [ -x $InstallDir/bin/columnstore.pre-start ]; then $InstallDir/bin/columnstore.pre-start diff --git a/oam/install_scripts/post-install b/oam/install_scripts/post-install index 33a3f6e35..238c55f1a 100755 --- a/oam/install_scripts/post-install +++ b/oam/install_scripts/post-install @@ -7,11 +7,11 @@ prefix=/usr/local installdir=$prefix/mariadb/columnstore rpmmode=install -user=$USER -sudo=sudo -if [ -z "$user" ]; then - user=root - sudo=" " +user=root +sudo=" " +if [ $USER != "root"]; then + user=$USER + sudo="sudo " fi quiet=0 diff --git a/oam/install_scripts/pre-uninstall b/oam/install_scripts/pre-uninstall index ee3c72e2b..6b1a05152 100755 --- a/oam/install_scripts/pre-uninstall +++ b/oam/install_scripts/pre-uninstall @@ -7,12 +7,12 @@ prefix=/usr/local installdir=$prefix/mariadb/columnstore rpmmode=install -user=$USER -sudo=sudo -if [ -z "$user" ]; then - user=root - sudo=" " +sudo=" " +if [ $USER != "root"]; then + user=$USER + sudo="sudo " fi + quiet=0 for arg in "$@"; do diff --git a/oam/oamcpp/liboamcpp.cpp b/oam/oamcpp/liboamcpp.cpp index 19ed72529..7ef10b1cb 100644 --- a/oam/oamcpp/liboamcpp.cpp +++ b/oam/oamcpp/liboamcpp.cpp @@ -9271,7 +9271,7 @@ namespace oam catch(...) { processor.shutdown(); - throw std::runtime_error("error"); + throw std::runtime_error("write error"); } @@ -9282,7 +9282,7 @@ namespace oam catch(...) { processor.shutdown(); - throw std::runtime_error("error"); + throw std::runtime_error("read error"); } ByteStream::byte returnRequestType; @@ -9649,6 +9649,8 @@ namespace oam return true; } } + writeLog("checkSystemRunning - system reported down", LOG_TYPE_DEBUG ); + return false; } } //namespace oam diff --git a/oamapps/mcsadmin/mcsadmin.cpp b/oamapps/mcsadmin/mcsadmin.cpp index 8de8e93b3..f485232c3 100644 --- a/oamapps/mcsadmin/mcsadmin.cpp +++ b/oamapps/mcsadmin/mcsadmin.cpp @@ -4683,7 +4683,7 @@ int processCommand(string* arguments) } cout << " "; cout.width(20); - cout << "---------------"; + //cout << "---------------"; if ( AmazonElasticIPCount > 0 ) { cout.width(20); diff --git a/procmgr/processmanager.cpp b/procmgr/processmanager.cpp index 0eb29bf54..4b9e3d5dd 100644 --- a/procmgr/processmanager.cpp +++ b/procmgr/processmanager.cpp @@ -2631,8 +2631,7 @@ void processMSG(messageqcpp::IOSocket* cfIos) case PROCESSALARM: { - string dbroot; - msg >> dbroot; + log.writeLog(__LINE__, "MSG RECEIVED: Process Alarm Message"); ByteStream::byte alarmID; std::string componentID; @@ -2839,6 +2838,18 @@ void processMSG(messageqcpp::IOSocket* cfIos) processManager.restartProcessType("DDLProc"); processManager.restartProcessType("DMLProc"); sleep(1); + + string DMLmodule = config.OAMParentName(); + if ( config.ServerInstallType() != oam::INSTALL_COMBINE_DM_UM_PM ) { + string PrimaryUMModuleName; + try { + oam.getSystemConfig("PrimaryUMModuleName", PrimaryUMModuleName); + } + catch(...) {} + if ( !PrimaryUMModuleName.empty() ) + DMLmodule = PrimaryUMModuleName; + } + // Wait for DMLProc to be ACTIVE BRM::DBRM dbrm; state = AUTO_OFFLINE; @@ -2848,7 +2859,7 @@ void processMSG(messageqcpp::IOSocket* cfIos) || state == oam::AUTO_INIT || state == oam::ROLLBACK_INIT) { - oam.getProcessStatus("DMLProc", config.OAMParentName(), procstat); + oam.getProcessStatus("DMLProc", DMLmodule, procstat); state = procstat.ProcessOpState; if ( procstat.ProcessOpState == oam::ACTIVE) break; diff --git a/procmon/main.cpp b/procmon/main.cpp index b6ec2799f..aa210ef85 100644 --- a/procmon/main.cpp +++ b/procmon/main.cpp @@ -138,7 +138,9 @@ int main(int argc, char **argv) if ( cloud == "amazon-ec2" ) { if(!aMonitor.amazonIPCheck()) { - string cmd = startup::StartUp::installDir() + "/bin/infinidb stop > /dev/null 2>&1"; + log.writeLog(__LINE__, "ERROR: amazonIPCheck failed, exiting", LOG_TYPE_CRITICAL); + sleep(2); + string cmd = startup::StartUp::installDir() + "/bin/columnstore stop > /dev/null 2>&1"; system(cmd.c_str()); exit(1); } @@ -377,7 +379,7 @@ int main(int argc, char **argv) if ( config.OAMParentName() == oam::UnassignedName ) { cerr << endl << "OAMParentModuleName == oam::UnassignedName, exiting " << endl; log.writeLog(__LINE__, "OAMParentModuleName == oam::UnassignedName, exiting", LOG_TYPE_CRITICAL); - exit (-1); + exit (1); } //check if module is in a DISABLED state @@ -562,8 +564,8 @@ int main(int argc, char **argv) } catch(...) { - log.writeLog(__LINE__, "Problem getting the ParentOAMModuleName key from the Calpont System Configuration file", LOG_TYPE_ERROR); - exit(-1); + log.writeLog(__LINE__, "Problem getting the ParentOAMModuleName key from the Columnstore System Configuration file", LOG_TYPE_CRITICAL); + exit(1); } } @@ -1673,7 +1675,7 @@ static void statusControlThread() fProcStatMapreg.swap(region); fShmProcessStatus = static_cast(fProcStatMapreg.get_address()); if (fShmProcessStatus == 0) { - log.writeLog(__LINE__, "*****ProcessStatusTable shmat failed.", LOG_TYPE_ERROR); + log.writeLog(__LINE__, "*****ProcessStatusTable shmat failed.", LOG_TYPE_CRITICAL); exit(1); } @@ -1735,7 +1737,7 @@ static void statusControlThread() fSysStatMapreg.swap(region2); fShmSystemStatus = static_cast(fSysStatMapreg.get_address()); if (fShmSystemStatus == 0) { - log.writeLog(__LINE__, "*****SystemStatusTable shmat failed.", LOG_TYPE_ERROR); + log.writeLog(__LINE__, "*****SystemStatusTable shmat failed.", LOG_TYPE_CRITICAL); exit(1); } @@ -1839,7 +1841,7 @@ static void statusControlThread() fShmNICStatus = static_cast(fNICStatMapreg.get_address()); if (fShmNICStatus == 0) { - log.writeLog(__LINE__, "*****NICStatusTable shmat failed.", LOG_TYPE_ERROR); + log.writeLog(__LINE__, "*****NICStatusTable shmat failed.", LOG_TYPE_CRITICAL); exit(1); } @@ -1925,7 +1927,7 @@ static void statusControlThread() fShmExtDeviceStatus = static_cast(fExtStatMapreg.get_address()); if (fShmExtDeviceStatus == 0) { - log.writeLog(__LINE__, "*****ExtDeviceStatusTable shmat failed.", LOG_TYPE_ERROR); + log.writeLog(__LINE__, "*****ExtDeviceStatusTable shmat failed.", LOG_TYPE_CRITICAL); exit(1); } @@ -2003,7 +2005,7 @@ static void statusControlThread() fShmDbrootStatus = static_cast(fdDbrootStatMapreg.get_address()); if (fShmDbrootStatus == 0) { - log.writeLog(__LINE__, "*****DbrootStatusTable shmat failed.", LOG_TYPE_ERROR); + log.writeLog(__LINE__, "*****DbrootStatusTable shmat failed.", LOG_TYPE_CRITICAL); exit(1); } diff --git a/procmon/processmonitor.cpp b/procmon/processmonitor.cpp index 4251b073c..f11598a0d 100644 --- a/procmon/processmonitor.cpp +++ b/procmon/processmonitor.cpp @@ -2597,7 +2597,7 @@ pid_t ProcessMonitor::startProcess(string processModuleType, string processName, //Update Process Status: Mark Process INIT state updateProcessInfo(processName, FAILED, newProcessID); - exit(oam::API_FAILURE); + return (oam::API_FAILURE); } return newProcessID; @@ -2812,6 +2812,11 @@ void sendProcessThread(sendProcessInfo_t* t) try { oam.setProcessStatus(processName, config.moduleName(), state, PID); } + catch (exception& ex) + { + string error = ex.what(); + log.writeLog(__LINE__, "EXCEPTION ERROR on setProcessStatus: " + error, LOG_TYPE_ERROR); + } catch(...) { log.writeLog(__LINE__, "EXCEPTION ERROR on setProcessStatus: Caught unknown exception!", LOG_TYPE_ERROR );