From 846f7fb29b29759af679de3d239b1274effa2741 Mon Sep 17 00:00:00 2001 From: benthompson15 Date: Wed, 2 Dec 2020 13:14:25 -0600 Subject: [PATCH] MCOL-4193: Delete unused OAM and applications, ProcMon, ProcMgr, and no longer build all tools for packages --- CMakeLists.txt | 3 +- .../commandpackageprocessor.cpp | 2 - dbcon/joblist/distributedenginecomm.cpp | 2 - dbcon/mysql/ha_mcs_client_udfs.cpp | 1 - ddlproc/ddlproc.cpp | 34 - debian/mariadb-plugin-columnstore.install | 45 +- dmlproc/dmlproc.cpp | 110 - exemgr/main.cpp | 52 - oam/CMakeLists.txt | 2 - oam/cloud/CMakeLists.txt | 4 - oam/etc/AlarmConfig.xml | 328 - oam/etc/CMakeLists.txt | 6 +- oam/etc/ConsoleCmds.xml | 431 - oam/etc/LocalServer-CEC1.xml | 9 - oam/etc/LocalServer-CFE1.xml | 9 - oam/etc/ProcessConfig.xml.in | 142 - oam/install_scripts/CMakeLists.txt | 22 - .../columnstore_module_installer.sh.in | 153 - oam/install_scripts/columnstore_os_check.sh | 44 - .../disable-rep-columnstore.sh.in | 66 - oam/install_scripts/install-columnstore.sh | 13 - .../mariadb-command-line.sh.in | 62 - .../master-rep-columnstore.sh.in | 99 - .../mcs_module_installer.sh.in | 138 - oam/install_scripts/patch_installer.sh | 69 - oam/install_scripts/postInstall.sh.in | 129 - oam/install_scripts/remote_command.sh | 90 - oam/install_scripts/remote_command_verify.sh | 53 - oam/install_scripts/remote_scp_get.sh | 60 - oam/install_scripts/remote_scp_put.sh | 59 - oam/install_scripts/remotessh.exp | 89 - oam/install_scripts/rsync.sh | 83 - .../slave-rep-columnstore.sh.in | 111 - oam/install_scripts/startupTests.sh.in | 31 - oam/oamcpp/liboamcpp.cpp | 10239 +------------- oam/oamcpp/liboamcpp.h | 2163 +-- oam/oamcpp/oamcache.cpp | 81 +- oam/oamcpp/tdriver.cpp | 703 - oam/post/CMakeLists.txt | 7 - oam/post/columnstore_functions | 42 - oam/post/mcstest-001.sh.in | 63 - oam/post/mcstest-002.sh.in | 41 - oam/post/mcstest-003.sh.in | 42 - oam/post/mcstest-004.sh.in | 45 - oamapps/CMakeLists.txt | 5 - oamapps/alarmmanager/CMakeLists.txt | 17 - oamapps/alarmmanager/alarm.cpp | 229 - oamapps/alarmmanager/alarm.h | 172 - oamapps/alarmmanager/alarmglobal.h | 46 - oamapps/alarmmanager/alarmmanager.cpp | 726 - oamapps/alarmmanager/alarmmanager.h | 125 - oamapps/alarmmanager/tdriver.cpp | 124 - oamapps/columnstoreDB/columnstoreDB.cpp | 211 +- .../columnstoreSupport/columnstoreSupport.cpp | 4 +- oamapps/columnstoreSupport/mcsSupportUtil.cpp | 100 +- oamapps/columnstoreSupport/mcsSupportUtil.h | 4 +- oamapps/hardwareMonitor/hardwareMonitor.cpp | 412 - oamapps/hardwareMonitor/hardwareMonitor.h | 71 - oamapps/mcsadmin/CMakeLists.txt | 16 - oamapps/mcsadmin/mcsadmin.cpp | 10036 -------------- oamapps/mcsadmin/mcsadmin.h | 135 - oamapps/postConfigure/CMakeLists.txt | 69 - oamapps/postConfigure/Doxyfile | 275 - oamapps/postConfigure/getMySQLpw.cpp | 128 - oamapps/postConfigure/helpers.cpp | 844 -- oamapps/postConfigure/helpers.h | 52 - oamapps/postConfigure/installer.cpp | 785 -- oamapps/postConfigure/mycnfUpgrade.cpp | 287 - oamapps/postConfigure/postConfigure.cpp | 154 - .../postConfigure/quick_installer_amazon.sh | 67 - .../quick_installer_multi_server.sh | 69 - .../quick_installer_single_server.sh | 26 - oamapps/resourceMonitor/hardwareMonitor.cpp | 213 - oamapps/resourceMonitor/resourceMonitor.cpp | 348 - oamapps/resourceMonitor/tdriver.cpp | 64 - oamapps/serverMonitor/CMakeLists.txt | 26 - oamapps/serverMonitor/UMAutoSync.cpp | 270 - oamapps/serverMonitor/cpuMonitor.cpp | 593 - oamapps/serverMonitor/dbhealthMonitor.cpp | 263 - oamapps/serverMonitor/diskMonitor.cpp | 756 -- oamapps/serverMonitor/hardwareMonitor.cpp | 269 - oamapps/serverMonitor/main.cpp | 364 - oamapps/serverMonitor/memoryMonitor.cpp | 573 - oamapps/serverMonitor/msgProcessor.cpp | 693 - oamapps/serverMonitor/procmonMonitor.cpp | 193 - oamapps/serverMonitor/serverMonitor.cpp | 369 - oamapps/serverMonitor/serverMonitor.h | 250 - primitives/primproc/primitiveserver.cpp | 10 - primitives/primproc/primproc.cpp | 8 - procmgr/CMakeLists.txt | 16 - procmgr/main.cpp | 3170 ----- procmgr/processmanager.cpp | 11262 ---------------- procmgr/processmanager.h | 607 - procmon/CMakeLists.txt | 16 - procmon/main.cpp | 3696 ----- procmon/processmonitor.cpp | 6248 --------- procmon/processmonitor.h | 563 - tools/CMakeLists.txt | 1 - tools/brmtest/CMakeLists.txt | 45 - tools/brmtest/brmtest.cpp | 680 - tools/brmtest/locks.cpp | 82 - tools/configMgt/CMakeLists.txt | 13 - tools/configMgt/CalpontSystems.xml | 137 - tools/configMgt/README | 43 - tools/configMgt/autoBuilder | 119 - tools/configMgt/autoConfigure.cpp | 2126 --- tools/configMgt/autoPackager | 109 - tools/configMgt/autoReleaseNotes.sh | 81 - tools/configMgt/autoReleaseNotes_branch.sh | 90 - tools/configMgt/configure.cpp | 1763 --- tools/configMgt/datdup-build | 44 - tools/configMgt/mysql-build-branch | 8 - tools/configMgt/mysql-build-genii | 8 - tools/configMgt/recreateBranch.sh | 53 - tools/configMgt/remote_command.sh | 52 - tools/configMgt/remote_command_test.sh | 47 - tools/configMgt/remote_scp_get.sh | 38 - tools/configMgt/rpm_txt.sh | 32 - tools/notificationTester/client.cpp | 21 - tools/notificationTester/main.cpp | 313 - tools/setConfig/CMakeLists.txt | 4 - tools/setConfig/configxml.sh.in | 70 - tools/setConfig/main.cpp | 11 - tools/vbgen/CMakeLists.txt | 61 - tools/vbgen/myrand.cpp | 30 - tools/vbgen/myrand.h | 39 - tools/vbgen/vbgen.cpp | 73 - utils/CMakeLists.txt | 1 - utils/clusterTester/CMakeLists.txt | 5 - .../clusterTester/columnstoreClusterTester.sh | 1399 -- utils/clusterTester/os_detect.sh | 40 - versioning/BRM/masterdbrmnode.cpp | 29 - versioning/BRM/masternode.cpp | 33 - versioning/BRM/slavenode.cpp | 33 - writeengine/client/we_clients.cpp | 9 - writeengine/server/we_server.cpp | 45 - writeengine/splitter/we_sdhandler.cpp | 3 - writeengine/splitter/we_splclient.cpp | 9 +- 138 files changed, 311 insertions(+), 69494 deletions(-) delete mode 100644 oam/etc/AlarmConfig.xml delete mode 100644 oam/etc/ConsoleCmds.xml delete mode 100644 oam/etc/LocalServer-CEC1.xml delete mode 100644 oam/etc/LocalServer-CFE1.xml delete mode 100644 oam/etc/ProcessConfig.xml.in delete mode 100755 oam/install_scripts/columnstore_module_installer.sh.in delete mode 100644 oam/install_scripts/columnstore_os_check.sh delete mode 100644 oam/install_scripts/disable-rep-columnstore.sh.in delete mode 100755 oam/install_scripts/install-columnstore.sh delete mode 100755 oam/install_scripts/mariadb-command-line.sh.in delete mode 100644 oam/install_scripts/master-rep-columnstore.sh.in delete mode 100644 oam/install_scripts/mcs_module_installer.sh.in delete mode 100755 oam/install_scripts/patch_installer.sh delete mode 100755 oam/install_scripts/postInstall.sh.in delete mode 100755 oam/install_scripts/remote_command.sh delete mode 100755 oam/install_scripts/remote_command_verify.sh delete mode 100755 oam/install_scripts/remote_scp_get.sh delete mode 100644 oam/install_scripts/remote_scp_put.sh delete mode 100644 oam/install_scripts/remotessh.exp delete mode 100755 oam/install_scripts/rsync.sh delete mode 100644 oam/install_scripts/slave-rep-columnstore.sh.in delete mode 100755 oam/install_scripts/startupTests.sh.in delete mode 100644 oam/oamcpp/tdriver.cpp delete mode 100644 oam/post/CMakeLists.txt delete mode 100755 oam/post/columnstore_functions delete mode 100755 oam/post/mcstest-001.sh.in delete mode 100755 oam/post/mcstest-002.sh.in delete mode 100755 oam/post/mcstest-003.sh.in delete mode 100755 oam/post/mcstest-004.sh.in delete mode 100644 oamapps/alarmmanager/CMakeLists.txt delete mode 100644 oamapps/alarmmanager/alarm.cpp delete mode 100644 oamapps/alarmmanager/alarm.h delete mode 100644 oamapps/alarmmanager/alarmglobal.h delete mode 100644 oamapps/alarmmanager/alarmmanager.cpp delete mode 100644 oamapps/alarmmanager/alarmmanager.h delete mode 100644 oamapps/alarmmanager/tdriver.cpp delete mode 100644 oamapps/hardwareMonitor/hardwareMonitor.cpp delete mode 100644 oamapps/hardwareMonitor/hardwareMonitor.h delete mode 100644 oamapps/mcsadmin/CMakeLists.txt delete mode 100644 oamapps/mcsadmin/mcsadmin.cpp delete mode 100644 oamapps/mcsadmin/mcsadmin.h delete mode 100644 oamapps/postConfigure/CMakeLists.txt delete mode 100644 oamapps/postConfigure/Doxyfile delete mode 100644 oamapps/postConfigure/getMySQLpw.cpp delete mode 100644 oamapps/postConfigure/helpers.cpp delete mode 100644 oamapps/postConfigure/helpers.h delete mode 100644 oamapps/postConfigure/installer.cpp delete mode 100644 oamapps/postConfigure/mycnfUpgrade.cpp delete mode 100644 oamapps/postConfigure/postConfigure.cpp delete mode 100755 oamapps/postConfigure/quick_installer_amazon.sh delete mode 100755 oamapps/postConfigure/quick_installer_multi_server.sh delete mode 100755 oamapps/postConfigure/quick_installer_single_server.sh delete mode 100644 oamapps/resourceMonitor/hardwareMonitor.cpp delete mode 100644 oamapps/resourceMonitor/resourceMonitor.cpp delete mode 100644 oamapps/resourceMonitor/tdriver.cpp delete mode 100644 oamapps/serverMonitor/CMakeLists.txt delete mode 100644 oamapps/serverMonitor/UMAutoSync.cpp delete mode 100644 oamapps/serverMonitor/cpuMonitor.cpp delete mode 100644 oamapps/serverMonitor/dbhealthMonitor.cpp delete mode 100644 oamapps/serverMonitor/diskMonitor.cpp delete mode 100644 oamapps/serverMonitor/hardwareMonitor.cpp delete mode 100644 oamapps/serverMonitor/main.cpp delete mode 100644 oamapps/serverMonitor/memoryMonitor.cpp delete mode 100644 oamapps/serverMonitor/msgProcessor.cpp delete mode 100644 oamapps/serverMonitor/procmonMonitor.cpp delete mode 100644 oamapps/serverMonitor/serverMonitor.cpp delete mode 100644 oamapps/serverMonitor/serverMonitor.h delete mode 100644 procmgr/CMakeLists.txt delete mode 100644 procmgr/main.cpp delete mode 100644 procmgr/processmanager.cpp delete mode 100644 procmgr/processmanager.h delete mode 100644 procmon/CMakeLists.txt delete mode 100644 procmon/main.cpp delete mode 100644 procmon/processmonitor.cpp delete mode 100644 procmon/processmonitor.h delete mode 100644 tools/brmtest/CMakeLists.txt delete mode 100644 tools/brmtest/brmtest.cpp delete mode 100644 tools/brmtest/locks.cpp delete mode 100644 tools/configMgt/CMakeLists.txt delete mode 100644 tools/configMgt/CalpontSystems.xml delete mode 100644 tools/configMgt/README delete mode 100755 tools/configMgt/autoBuilder delete mode 100644 tools/configMgt/autoConfigure.cpp delete mode 100644 tools/configMgt/autoPackager delete mode 100755 tools/configMgt/autoReleaseNotes.sh delete mode 100644 tools/configMgt/autoReleaseNotes_branch.sh delete mode 100644 tools/configMgt/configure.cpp delete mode 100755 tools/configMgt/datdup-build delete mode 100755 tools/configMgt/mysql-build-branch delete mode 100755 tools/configMgt/mysql-build-genii delete mode 100755 tools/configMgt/recreateBranch.sh delete mode 100755 tools/configMgt/remote_command.sh delete mode 100755 tools/configMgt/remote_command_test.sh delete mode 100755 tools/configMgt/remote_scp_get.sh delete mode 100755 tools/configMgt/rpm_txt.sh delete mode 100644 tools/notificationTester/client.cpp delete mode 100644 tools/notificationTester/main.cpp delete mode 100755 tools/setConfig/configxml.sh.in delete mode 100644 tools/vbgen/CMakeLists.txt delete mode 100644 tools/vbgen/myrand.cpp delete mode 100644 tools/vbgen/myrand.h delete mode 100644 tools/vbgen/vbgen.cpp delete mode 100644 utils/clusterTester/CMakeLists.txt delete mode 100755 utils/clusterTester/columnstoreClusterTester.sh delete mode 100755 utils/clusterTester/os_detect.sh diff --git a/CMakeLists.txt b/CMakeLists.txt index 4ed428c3f..238e9d85a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -202,7 +202,7 @@ ENDIF() SET (ENGINE_LDFLAGS "-Wl,--no-as-needed -Wl,--add-needed") SET (ENGINE_DT_LIB datatypes) SET (ENGINE_COMMON_LIBS messageqcpp loggingcpp configcpp idbboot ${Boost_LIBRARIES} xml2 pthread rt libmysql_client ${ENGINE_DT_LIB}) -SET (ENGINE_OAM_LIBS oamcpp alarmmanager) +SET (ENGINE_OAM_LIBS oamcpp) SET (ENGINE_BRM_LIBS brm idbdatafile cacheutils rwlock ${ENGINE_OAM_LIBS} ${ENGINE_COMMON_LIBS}) SET (ENGINE_EXEC_LIBS joblist execplan windowfunction joiner rowgroup funcexp udfsdk regr dataconvert common compress querystats querytele thrift threadpool ${ENGINE_BRM_LIBS}) SET (ENGINE_WRITE_LIBS ddlpackageproc ddlpackage dmlpackageproc dmlpackage writeengine writeengineclient idbdatafile cacheutils ${ENGINE_EXEC_LIBS}) @@ -261,7 +261,6 @@ SET (ENGINE_UTILS_COMMON_INCLUDE "${CMAKE_CURRENT_SOURCE_DIR}/utils/common" SET (ENGINE_UTILS_DATACONVERT_INCLUDE "${CMAKE_CURRENT_SOURCE_DIR}/utils/dataconvert") SET (ENGINE_UTILS_RWLOCK_INCLUDE "${CMAKE_CURRENT_SOURCE_DIR}/utils/rwlock") SET (ENGINE_UTILS_FUNCEXP_INCLUDE "${CMAKE_CURRENT_SOURCE_DIR}/utils/funcexp") -SET (ENGINE_OAMAPPS_ALARMMANAGER_INCLUDE "${CMAKE_CURRENT_SOURCE_DIR}/oamapps/alarmmanager") SET (ENGINE_UTILS_INCLUDE "${CMAKE_CURRENT_SOURCE_DIR}/utils") SET (ENGINE_OAM_OAMCPP_INCLUDE "${CMAKE_CURRENT_SOURCE_DIR}/oam/oamcpp") SET (ENGINE_DBCON_DDLPKGPROC_INCLUDE "${CMAKE_CURRENT_SOURCE_DIR}/dbcon/ddlpackageproc") diff --git a/dbcon/dmlpackageproc/commandpackageprocessor.cpp b/dbcon/dmlpackageproc/commandpackageprocessor.cpp index 2a9d2786c..d18f7076c 100644 --- a/dbcon/dmlpackageproc/commandpackageprocessor.cpp +++ b/dbcon/dmlpackageproc/commandpackageprocessor.cpp @@ -36,8 +36,6 @@ #include "we_messages.h" #include "we_ddlcommandclient.h" #include "oamcache.h" -#include "alarmglobal.h" -#include "alarmmanager.h" #include "liboamcpp.h" using namespace std; using namespace WriteEngine; diff --git a/dbcon/joblist/distributedenginecomm.cpp b/dbcon/joblist/distributedenginecomm.cpp index 1cc318ef8..85d9655c2 100644 --- a/dbcon/joblist/distributedenginecomm.cpp +++ b/dbcon/joblist/distributedenginecomm.cpp @@ -61,8 +61,6 @@ using namespace config; using namespace logging; #include "liboamcpp.h" -#include "alarmmanager.h" -using namespace alarmmanager; using namespace oam; #include "jobstep.h" diff --git a/dbcon/mysql/ha_mcs_client_udfs.cpp b/dbcon/mysql/ha_mcs_client_udfs.cpp index 87ba12d84..6dd48d102 100644 --- a/dbcon/mysql/ha_mcs_client_udfs.cpp +++ b/dbcon/mysql/ha_mcs_client_udfs.cpp @@ -290,7 +290,6 @@ extern "C" long long rtn = 0; Oam oam; DBRM dbrm(true); - SystemStatus systemstatus; try { diff --git a/ddlproc/ddlproc.cpp b/ddlproc/ddlproc.cpp index b71116acb..2a6745d3b 100644 --- a/ddlproc/ddlproc.cpp +++ b/ddlproc/ddlproc.cpp @@ -213,40 +213,6 @@ int ServiceDDLProc::Child() NotifyServiceStarted(); - { - Oam oam; - - try - { - oam.processInitComplete("DDLProc", ACTIVE); - } - catch (std::exception& ex) - { - cerr << ex.what() << endl; - LoggingID logid(23, 0, 0); - logging::Message::Args args1; - logging::Message msg(1); - args1.add("DDLProc init caught exception: "); - args1.add(ex.what()); - msg.format( args1 ); - logging::Logger logger(logid.fSubsysID); - logger.logMessage(LOG_TYPE_CRITICAL, msg, logid); - return 1; - } - catch (...) - { - cerr << "Caught unknown exception in init!" << endl; - LoggingID logid(23, 0, 0); - logging::Message::Args args1; - logging::Message msg(1); - args1.add("DDLProc init caught unknown exception"); - msg.format( args1 ); - logging::Logger logger(logid.fSubsysID); - logger.logMessage(LOG_TYPE_CRITICAL, msg, logid); - return 1; - } - } - try { ddlprocessor.process(); diff --git a/debian/mariadb-plugin-columnstore.install b/debian/mariadb-plugin-columnstore.install index b74b47b8f..d028ed3e8 100644 --- a/debian/mariadb-plugin-columnstore.install +++ b/debian/mariadb-plugin-columnstore.install @@ -1,90 +1,52 @@ -etc/columnstore/AlarmConfig.xml etc/columnstore/Columnstore.xml -etc/columnstore/ConsoleCmds.xml etc/columnstore/ErrorMessage.txt etc/columnstore/MessageFile.txt -etc/columnstore/ProcessConfig.xml etc/columnstore/storagemanager.cnf etc/mysql/mariadb.conf.d/columnstore.cnf usr/bin/DDLProc usr/bin/DMLProc usr/bin/ExeMgr -usr/bin/MCSInstanceCmds.sh -usr/bin/MCSVolumeCmds.sh -usr/bin/MCSgetCredentials.sh usr/bin/PrimProc -usr/bin/ServerMonitor usr/bin/StorageManager usr/bin/WriteEngineServer -usr/bin/autoConfigure -usr/bin/bulklogReport.sh usr/bin/clearShm usr/bin/cleartablelock usr/bin/columnstore usr/bin/columnstore-post-install usr/bin/columnstore-pre-uninstall -usr/bin/columnstoreClusterTester.sh usr/bin/columnstoreDBWrite -usr/bin/columnstoreSupport usr/bin/columnstoreSyslogSetup.sh -usr/bin/columnstore_installer -usr/bin/columnstore_module_installer.sh -usr/bin/columnstore_os_check.sh usr/bin/columnstore_run.sh usr/bin/colxml -usr/bin/configReport.sh -usr/bin/configxml.sh usr/bin/controllernode usr/bin/cpimport usr/bin/cpimport.bin usr/bin/cplogger usr/bin/dbbuilder -usr/bin/dbmsReport.sh usr/bin/dbrmctl usr/bin/ddlcleanup -usr/bin/disable-rep-columnstore.sh usr/bin/editem -usr/bin/getMySQLpw -usr/bin/hardwareReport.sh usr/bin/idbmeminfo usr/bin/load_brm -usr/bin/logReport.sh usr/bin/mariadb-columnstore-start.sh usr/bin/mariadb-columnstore-stop.sh -usr/bin/mariadb-command-line.sh -usr/bin/master-rep-columnstore.sh usr/bin/mcs-savebrm.py usr/bin/mcs-loadbrm.py usr/bin/mcs-stop-controllernode.sh usr/bin/mcsGetConfig usr/bin/mcsSetConfig -usr/bin/mcs_module_installer.sh -usr/bin/mycnfUpgrade -usr/bin/os_detect.sh usr/bin/post-mysql-install usr/bin/post-mysqld-install -usr/bin/postConfigure -usr/bin/quick_installer_multi_server.sh -usr/bin/quick_installer_single_server.sh -usr/bin/remote_command.sh -usr/bin/remote_command_verify.sh -usr/bin/remote_scp_get.sh -usr/bin/remote_scp_put.sh -usr/bin/remotessh.exp usr/bin/reset_locks -usr/bin/resourceReport.sh usr/bin/rollback -usr/bin/rsync.sh usr/bin/save_brm -usr/bin/slave-rep-columnstore.sh usr/bin/smcat usr/bin/smls usr/bin/smput usr/bin/smrm -usr/bin/startupTests.sh +usr/bin/testS3Connection usr/bin/viewtablelock usr/bin/workernode -usr/lib/*/libalarmmanager.so usr/lib/*/libbatchloader.so usr/lib/*/libbrm.so usr/lib/*/libcacheutils.so @@ -134,7 +96,6 @@ usr/share/columnstore/columnstoreLogRotate usr/share/columnstore/columnstoreSyslog usr/share/columnstore/columnstoreSyslog-ng usr/share/columnstore/columnstoreSyslog7 -usr/share/columnstore/columnstore_functions usr/share/columnstore/columnstore_info.sql usr/share/columnstore/dumpcat_mysql.sql usr/share/columnstore/gitversionEngine @@ -148,10 +109,6 @@ usr/share/columnstore/mcs-primproc.service usr/share/columnstore/mcs-storagemanager.service usr/share/columnstore/mcs-workernode.service usr/share/columnstore/mcs-writeengineserver.service -usr/share/columnstore/mcstest-001.sh -usr/share/columnstore/mcstest-002.sh -usr/share/columnstore/mcstest-003.sh -usr/share/columnstore/mcstest-004.sh usr/share/columnstore/myCnf-exclude-args.text usr/share/columnstore/myCnf-include-args.text usr/share/columnstore/releasenum diff --git a/dmlproc/dmlproc.cpp b/dmlproc/dmlproc.cpp index f59006036..9736d4b06 100644 --- a/dmlproc/dmlproc.cpp +++ b/dmlproc/dmlproc.cpp @@ -29,8 +29,6 @@ //#include "boost/filesystem/path.hpp" using namespace std; -#include "alarmglobal.h" -#include "alarmmanager.h" #include "liboamcpp.h" #include @@ -216,14 +214,6 @@ void rollbackAll(DBRM* dbrm) { Oam oam; - try - { - alarmmanager::ALARMManager alarmMgr; - alarmMgr.sendAlarmReport("System", oam::ROLLBACK_FAILURE, alarmmanager::CLEAR); - } - catch (...) - {} - //Log a message in info.log logging::Message::Args args; logging::Message message(2); @@ -258,14 +248,6 @@ void rollbackAll(DBRM* dbrm) throw std::runtime_error(IDBErrorInfo::instance()->errorMsg(ERR_HARD_FAILURE)); } - // If there are tables to rollback, set to ROLLBACK_INIT. - // This tells ProcMgr that we are rolling back and will be - // a while. A message to this effect should be printed. - if (tableLocks.size() > 0) - { - oam.processInitComplete("DMLProc", oam::ROLLBACK_INIT); - } - uint64_t uniqueId = dbrm->getUnique64(); RollbackTransactionProcessor rollbackProcessor(dbrm); std::string errorMsg; @@ -368,15 +350,6 @@ void rollbackAll(DBRM* dbrm) oss << " problem with rollback transaction " << tableLocks[i].ownerTxnID << "and DBRM is setting to readonly and table lock is not released: " << errorMsg; rc = dbrm->setReadOnly(true); - //Raise an alarm - try - { - alarmmanager::ALARMManager alarmMgr; - alarmMgr.sendAlarmReport("System", oam::ROLLBACK_FAILURE, alarmmanager::SET); - } - catch (...) - {} - //Log to critical log logging::Message::Args args6; logging::Message message6(2); @@ -464,7 +437,6 @@ void rollbackAll(DBRM* dbrm) if (txnList.size() > 0) { - oam.processInitComplete("DMLProc", oam::ROLLBACK_INIT); ostringstream oss; oss << "DMLProc will rollback " << txnList.size() << " transactions."; logging::Message::Args args2; @@ -509,15 +481,6 @@ void rollbackAll(DBRM* dbrm) oss << " problem with rollback transaction " << txnId.id << "and DBRM is setting to readonly and table lock is not released: " << errorMsg; rc = dbrm->setReadOnly(true); - //Raise an alarm - try - { - alarmmanager::ALARMManager alarmMgr; - alarmMgr.sendAlarmReport("System", oam::ROLLBACK_FAILURE, alarmmanager::SET); - } - catch (...) - {} - //Log to critical log logging::Message::Args args6; logging::Message message6(2); @@ -637,39 +600,6 @@ int ServiceDMLProc::Child() idbdatafile::IDBPolicy::configIDBPolicy(); #endif - try - { - // At first we set to BUSY_INIT - oam.processInitComplete("DMLProc", oam::BUSY_INIT); - } - catch (std::exception& ex) - { - cerr << ex.what() << endl; - LoggingID logid(21, 0, 0); - logging::Message::Args args1; - logging::Message msg(1); - args1.add("DMLProc init caught exception: "); - args1.add(ex.what()); - msg.format( args1 ); - logging::Logger logger(logid.fSubsysID); - logger.logMessage(LOG_TYPE_CRITICAL, msg, logid); - NotifyServiceInitializationFailed(); - return 1; - } - catch (...) - { - cerr << "Caught unknown exception in init!" << endl; - LoggingID logid(21, 0, 0); - logging::Message::Args args1; - logging::Message msg(1); - args1.add("DMLProc init caught unknown exception"); - msg.format( args1 ); - logging::Logger logger(logid.fSubsysID); - logger.logMessage(LOG_TYPE_CRITICAL, msg, logid); - NotifyServiceInitializationFailed(); - return 1; - } - //@Bug 1627 try { @@ -677,15 +607,6 @@ int ServiceDMLProc::Child() } catch ( std::exception& e ) { - //@Bug 2299 Set DMLProc process to fail and log a message - try - { - oam.processInitFailure(); - } - catch (...) - { - } - logging::Message::Args args; logging::Message message(2); args.add("DMLProc failed to start due to :"); @@ -760,37 +681,6 @@ int ServiceDMLProc::Child() DMLServer::fDmlPackagepool.invoke(threadpool::ThreadPoolMonitor(&DMLServer::fDmlPackagepool)); } - //set ACTIVE state - try - { - oam.processInitComplete("DMLProc", ACTIVE); - } - catch (std::exception& ex) - { - cerr << ex.what() << endl; - LoggingID logid(21, 0, 0); - logging::Message::Args args1; - logging::Message msg(1); - args1.add("DMLProc init caught exception: "); - args1.add(ex.what()); - msg.format( args1 ); - logging::Logger logger(logid.fSubsysID); - logger.logMessage(LOG_TYPE_CRITICAL, msg, logid); - return 1; - } - catch (...) - { - cerr << "Caught unknown exception in init!" << endl; - LoggingID logid(21, 0, 0); - logging::Message::Args args1; - logging::Message msg(1); - args1.add("DMLProc init caught unknown exception"); - msg.format( args1 ); - logging::Logger logger(logid.fSubsysID); - logger.logMessage(LOG_TYPE_CRITICAL, msg, logid); - return 1; - } - Dec = DistributedEngineComm::instance(rm); setupChildSignalHandlers(); diff --git a/exemgr/main.cpp b/exemgr/main.cpp index ab6df70c1..950f2cc70 100644 --- a/exemgr/main.cpp +++ b/exemgr/main.cpp @@ -1354,27 +1354,9 @@ void added_a_pm(int) if (ec) { - //set BUSY_INIT state while processing the add pm configuration change - oam::Oam oam; - - try - { - oam.processInitComplete("ExeMgr", oam::BUSY_INIT); - } - catch (...) - {} - oam::OamCache* oamCache = oam::OamCache::makeOamCache(); oamCache->forceReload(); ec->Setup(); - - //set ACTIVE state - try - { - oam.processInitComplete("ExeMgr"); - } - catch (...) - {} } } @@ -1514,19 +1496,6 @@ int ServiceExeMgr::Child() gDebug= m_debug; - //set BUSY_INIT state - { - oam::Oam oam; - - try - { - oam.processInitComplete("ExeMgr", oam::BUSY_INIT); - } - catch (...) - { - } - } - #ifdef _MSC_VER //FIXME: #else @@ -1576,14 +1545,6 @@ int ServiceExeMgr::Child() ml.logCriticalMessage( message ); std::cerr << errMsg << std::endl; - try - { - oam.processInitFailure(); - } - catch (...) - { - } - NotifyServiceInitializationFailed(); return 2; } @@ -1686,19 +1647,6 @@ int ServiceExeMgr::Child() ", qs = " << rm->getEmExecQueueSize() << ", mx = " << maxPct << ", cf = " << rm->getConfig()->configFile() << std::endl; - //set ACTIVE state - { - oam::Oam oam; - - try - { - oam.processInitComplete("ExeMgr"); - } - catch (...) - { - } - } - { BRM::DBRM *dbrm = new BRM::DBRM(); dbrm->setSystemQueryReady(true); diff --git a/oam/CMakeLists.txt b/oam/CMakeLists.txt index 77707fdec..952c7f3c1 100644 --- a/oam/CMakeLists.txt +++ b/oam/CMakeLists.txt @@ -1,6 +1,4 @@ add_subdirectory(etc) -add_subdirectory(post) add_subdirectory(install_scripts) -add_subdirectory(cloud) diff --git a/oam/cloud/CMakeLists.txt b/oam/cloud/CMakeLists.txt index 97b8e5180..e69de29bb 100644 --- a/oam/cloud/CMakeLists.txt +++ b/oam/cloud/CMakeLists.txt @@ -1,4 +0,0 @@ -configure_file("${CMAKE_CURRENT_SOURCE_DIR}/MCSVolumeCmds.sh.in" "${CMAKE_CURRENT_SOURCE_DIR}/MCSVolumeCmds.sh" @ONLY) -configure_file("${CMAKE_CURRENT_SOURCE_DIR}/MCSInstanceCmds.sh.in" "${CMAKE_CURRENT_SOURCE_DIR}/MCSInstanceCmds.sh" @ONLY) - -install(PROGRAMS MCSInstanceCmds.sh MCSVolumeCmds.sh MCSgetCredentials.sh DESTINATION ${ENGINE_BINDIR} COMPONENT columnstore-engine) diff --git a/oam/etc/AlarmConfig.xml b/oam/etc/AlarmConfig.xml deleted file mode 100644 index 2d2439d9d..000000000 --- a/oam/etc/AlarmConfig.xml +++ /dev/null @@ -1,328 +0,0 @@ - - - - - 1 - CPU_USAGE_HIGH - The usage on the indicated CPU has exceeded its high threshold - 1 - 100 - 0 - 0 - - - 2 - CPU_USAGE_MED - The usage on the indicated CPU has exceeded its medium threshold - 2 - 100 - 0 - 0 - - - 3 - CPU_USAGE_LOW - The usage on the indicated CPU has exceeded its low threshold - 3 - 100 - 0 - 0 - - - 4 - DISK_USAGE_HIGH - The usage on the indicated Disk Drive has exceeded its high threshold - 1 - 100 - 0 - 0 - - - 5 - DISK_USAGE_MED - The usage on the indicated DISK Drive has exceeded its medium threshold - 2 - 100 - 0 - 0 - - - 6 - DISK_USAGE_LOW - The usage on the indicated Disk Drive has exceeded its low threshold - 3 - 100 - 0 - 0 - - - 7 - MEMORY_USAGE_HIGH - The usage on the indicated Memory has exceeded its high threshold - 1 - 100 - 0 - 0 - - - 8 - MEMORY_USAGE_MED - The usage on the indicated Memory has exceeded its medium threshold - 2 - 100 - 0 - 0 - - - 9 - MEMORY_USAGE_LOW - The usage on the indicated Memory has exceeded its low threshold - 3 - 100 - 0 - 0 - - - 10 - SWAP_USAGE_HIGH - The usage on the indicated Swap has exceeded its high threshold - 1 - 100 - 0 - 0 - - - 11 - SWAP_USAGE_MED - The usage on the indicated Swap has exceeded its medium threshold - 2 - 100 - 0 - 0 - - - 12 - SWAP_USAGE_LOW - The usage on the indicated Swap has exceeded its low threshold - 3 - NONE - 0 - 0 - - - 13 - PROCESS_DOWN_AUTO - Process is auto out-of-service - 2 - NONE - 0 - 0 - - - 14 - MODULE_DOWN_AUTO - Module is auto out-of-service - 1 - 100 - 0 - 0 - - - 15 - SYSTEM_DOWN_AUTO - System is auto out-of-service - 1 - 100 - 0 - 0 - - - 16 - POWERON_TEST_SEVERE - A Module Power-On Severe Warning error occurred. Test Failures indicate that the system is running in a degraded state - 2 - 100 - 0 - 0 - - - 17 - POWERON_TEST_WARNING - A Module Power-On Warning error occurred. Test Failures indicate that the system is running with little or no degradation in performance - 4 - 100 - 0 - 0 - - - 18 - HARDWARE_HIGH - A Hardware Device's resource has exceeded its high threshold - 1 - 100 - 0 - 0 - - - 19 - HARDWARE_MED - A Hardware Device's resource has exceeded its medium threshold - 2 - 100 - 0 - 0 - - - 20 - HARDWARE_LOW - A Hardware Device's resource has exceeded its low threshold - 3 - 100 - 0 - 0 - - - 21 - PROCESS_DOWN_MANUAL - Process is manually out-of-service - 3 - 100 - 0 - 0 - - - 22 - MODULE_DOWN_MANUAL - Module is manually out-of-service - 2 - 100 - 0 - 0 - - - 23 - SYSTEM_DOWN_MANUAL - System is manually out-of-service - 1 - 100 - 0 - 0 - - - 24 - EXT_DEVICE_DOWN_AUTO - External Device is auto out-of-service - 2 - 100 - 0 - 0 - - - 25 - PROCESS_INIT_FAILURE - Process Initialzation failed or Timed-out - 2 - 100 - 0 - 0 - - - 26 - NIC_DOWN_AUTO - NIC Interface is auto out-of-service - 2 - 100 - 0 - 0 - - - 27 - DBRM_LOAD_DATA_ERROR - Error occurred loading DBRM data - 1 - 100 - 0 - 0 - - - 28 - INVALID_SW_VERSION - Invalid Software Version found on a module. It doesn't match the version running on the Director Module - 1 - 100 - 0 - 0 - - - 29 - STARTUP_DIAGNOTICS_FAILURE - A Failure occurred when the System Startup Diagnostics was executed on a Module. The log files can be viewed for details on the failure. - 1 - 100 - 0 - 0 - - - 30 - CONN_FAILURE - A Connection Failure occurred that could affect the performance of the system processing. - 1 - 100 - 0 - 0 - - - 31 - DBRM_READ_ONLY - The DBRM is now read-only and updates to the database are not possible. - 1 - 100 - 0 - 0 - - - 32 - Unassigned - Unassigned - 0 - 0 - 0 - 0 - - - 33 - MODULE_SWITCH_ACTIVE - An outage of Parent OAM Module or the execution of the switchParentOAMModule command initiated a change of the Parent OAM Module. - 1 - 100 - 0 - 0 - - - 34 - ROLLBACK_FAILURE - A Database Rollback Failure occurred, Contact MariaDB Customer Support. - 1 - 100 - 0 - 0 - - - 35 - GLUSTER_DISK_FAILURE - An Error was reported on a Gluster disk copy. - 1 - 100 - 0 - 0 - - - 36 - INVALID_LOCALE - An Error was reported on setting system locale. - 1 - 100 - 0 - 0 - - diff --git a/oam/etc/CMakeLists.txt b/oam/etc/CMakeLists.txt index 68f5eda61..3d4ae3dfe 100644 --- a/oam/etc/CMakeLists.txt +++ b/oam/etc/CMakeLists.txt @@ -1,7 +1,3 @@ -configure_file("${CMAKE_CURRENT_SOURCE_DIR}/ProcessConfig.xml.in" "${CMAKE_CURRENT_SOURCE_DIR}/ProcessConfig.xml" @ONLY) -install(FILES AlarmConfig.xml - Columnstore.xml - ProcessConfig.xml - ConsoleCmds.xml +install(FILES Columnstore.xml DESTINATION ${ENGINE_SYSCONFDIR}/columnstore COMPONENT columnstore-engine) diff --git a/oam/etc/ConsoleCmds.xml b/oam/etc/ConsoleCmds.xml deleted file mode 100644 index c36f2bcab..000000000 --- a/oam/etc/ConsoleCmds.xml +++ /dev/null @@ -1,431 +0,0 @@ - - - - - help - Get help on the Console Commands - No Argument or '-a' will display a list of Console commands - Optional: '-v' will display a list of Console commands with their descriptions - Optional: Command-name will display the command description - - - ? - Get help on the Console Commands - No Argument, '-a', or '-all' will display a list of Console commands - Optional: '-v' or '-verbose' will display a list of Console commands with their descriptions - Optional: Command-name will display the command description - - - exit - Exit from the Console tool - None - - - quit - Exit from the Console tool - None - - - redistributeData - Redistribute table data accross all dbroots to balance disk usage - START to begin a redistribution - STOP to stop redistribution before completion - STATUS to to view statistics and progress - START REMOVE to redistribute and move data off the given dbroot to other dbroots - - - findObjectFile - Get the name of the directory containing the first file of the object - uses Object ID (OID) or Schema Table Column - Object ID (OID) of object - Schema - Table - Column - - - getModuleTypeConfig - Get Module Type Configuration Information - Can display all Module Type parameters, parameters for a single module type , or a single parameter for a specific module type - No Argument or 'all' will display all Module Type Configuration parameters - Optional: Module-type will display the Module Type Configuration parameters - Optional: Module-type and Parameter-name will display the Module Type Configuration parameter - - - setModuleTypeConfig - Set a Module Type Configuration parameter - Required: Module-type is the parameter to be updated - Required: Parameter-name is the parameter to be updated - Required: Parameter-value is the parameter value - - - getProcessConfig - Get Process Configuration Information - Can display all Process parameters, parameters for a single process, or a single parameter for a specific process - No Argument or 'all' enter will display all Process Configuration parameters - Optional: Process-name and Module-type will display the Process Configuration parameters - Optional: Process-name, Module-type, and Parameter-name will display the Process Configuration parameter - - - setProcessConfig - Set a Process Configuration parameter - Required: Process-name is the Process to be updated - Required: Module-type is the Module Type of the Process to be updated - Required: Parameter-name is the parameter to be updated - Required: Parameter-value is the parameter value - - - getAlarmConfig - Get Alarm Configuration Information - Can display all Alarms or a single Alarm - No Argument or 'all' enter will display all Alarms Configuration parameters - Optional: Alarm-ID will display the Alarm Configuration parameter - - - setAlarmConfig - Set an Alarm Configuration parameter - Required: Alarm-ID is the Alarm to be updated - Required: Parameter-name is the parameter to be updated - Required: Parameter-value is the parameter value - - - getActiveAlarms - Get Active Alarm list - Displays all the Alarms that are currently active - None - - - getStorageConfig - Get System Storage Configuration Information - Displays storage and dbroot assignment Information - None - - - addDbroot - Add DBRoot Disk storage to the MariaDB Columnstore System - Required: Number of DBRoots to be added - - - removeDbroot - Remove DBRoot Disk storage from the MariaDB Columnstore System - Required: List of Dbroot IDs to be removed - - - stopSystem - Stops the processing of the MariaDB Columnstore System - It will leave the Modules powered on and the Operating Systems enable - This command might be used for Software Maintenance reasons - Optional: GRACEFUL/FORCEFUL is the mode the process will be taken down. Default is FORCEFUL - Optional: ACK_YES/ACK_NO is Acknowledgment response or not. Default is ACK_YES - - - shutdownSystem - Shuts down the MariaDB Columnstore System - This command stops the processing of applications on all Modules within the MariaDB Columnstore System - It should be using when performing an upgrade - Optional: GRACEFUL/FORCEFUL is the mode the process will be taken down. Default is FORCEFUL - Optional: ACK_YES/ACK_NO is Acknowledgment response or not. Default is ACK_YES - - - startSystem - Starts a stopped or shutdown MariaDB Columnstore System - If system is shutdown, then a server root password should be entered if a ssh key is not defined - Optional: Server-root-password - - - restartSystem - Restarts a stopped or shutdown MariaDB Columnstore System - If system is shutdown, then a server root password should be entered if a ssh key is not defined - Optional: GRACEFUL/FORCEFUL is the mode the process will be taken down. Default is FORCEFUL - Optional: ACK_YES/ACK_NO is Acknowledgment response or not. Default is ACK_YES - Optional: Server-root-password - - - getSystemStatus - Get System and Modules Status - Get Status of the MariaDB Columnstore System and the modules within the MariaDB Columnstore System - None - - - getProcessStatus - Get MariaDB Columnstore Process Statuses - Get Status of the MariaDB Columnstore Processes witin the MariaDB Columnstore System - None - - - system - Execute a system shell command - Required: UNIX command - - - getAlarmHistory - Get system alarms - Get all set and clear alarms reported on the system on a daily basis - Required: Date to Display in format of MM/DD/YY - - - monitorAlarms - Monitor alarms in realtime mode - Enter "control-C" to exit monitorAlarms command - - - resetAlarm - Resets an Active Alarm - Resetting the Alarm will remove it from the Active Alarm Log file - Required: AlarmID or ALL to reset all active alarms - Required: Reporting Module or ALL to reset all active AlarmID alarms - Required: Reported Device or ALL to reset all active AlarmID/Reporting Module Alarms - - - enableLog - Enable the levels of process and debug logging - Required: 'system' or Module-name where logging is being enabled - Required: 'all' or the specific level to enable - Levels: critical, error, warning, info, and debug - - - disableLog - Disable the levels of process and debug logging - Required: 'system' or Module-name where logging is being disabled - Required: 'all' or the specific level to disable - Levels: critical, error, warning, info, and debug - - - switchParentOAMModule - Switches the Active Parent OAM Module to another Performance Module - A System stop and start is performed as part of the process - Command is recommended to be done while the system is idle - Optional: Performance-Module-name to switch to, default to Standby Parent OAM Module - - - getStorageStatus - Get System Storage Status - Get Status of the MariaDB Columnstore DBRoot Storages within the MariaDB Columnstore System - Only Valid for External Configured disk - None - - - getLogConfig - Get the System log file configuration - Get the process and debug log file configuration on each Module - - - movePmDbrootConfig - Move DBroots from one Performance Module to another - This command would be used before a Performance Module is removed or added - Required: Performance Module Name where the Dbroot are being moved from - Required: List of Dbroot IDs to be moved - Required: Performance Module Name where the Dbroot are being moved to - - - suspendDatabaseWrites - Suspend performing writes to the MariaDB Columnstore Database - None - - - resumeDatabaseWrites - Resume performing writes to the MariaDB Columnstore Database - None - - - unassignDbrootPmConfig - Unassign DBroots from a Performance Module - This command could be used before a DBRoot is removed - Required: List of Dbroot IDs to be unassigned - Required: Performance Module Name - - - assignDbrootPmConfig - Assign unassigned DBroots to Performance Module - This command could be used after a Performance Module and/or DBRoot is added - Required: List of Dbroot IDs to be assigned - Required: Performance Module Name - - - getAlarmSummary - Get Summary counts of Active Alarm - None - - - getSystemInfo - Get the Over-all System Statuses - Get the System, Module, Process, and Active Alarm Statuses - None - - - getModuleConfig - Get Module Name Configuration Information - Can display all Module Name parameters, parameters for a single module name, or a single parameter for a specific module name - No Argument or 'all' will display all Module Name Configuration parameters - Optional: Module-name will display the Module Name Configuration parameters - Optional: Module-name and Parameter-name will display the Module Name Configuration parameter - - - getSystemDirectories - Get System Installation and Temporary Logging Directories - - - AVAILABLE - - - AVAILABLE - - - AVAILABLE - - - assignElasticIPAddress - Assign Amazon Elastic IP Address to a module (deprecated) - Required: Amazon Elastic IP Address - Required: Module Name - - - unassignElasticIPAddress - Unassign Amazon Elastic IP Address (deprecated) - Required: Amazon Elastic IP Address - - - getSystemNetworkConfig - Get System Network Configuration Information - Displays the Network Host Names and IP Addresses for all Modules, Switches, - and Storage Units that make up the system - None - - - enableMySQLReplication - Enable MySQL Replication functionality on the system - Required: User Password, 'root' or 'non-root'. Or 'ssh' is ssh-keys are configured - Required: MySQL Password or 'none' if not configured - - - getSoftwareInfo - Get the MariaDB Columnstore Package information - None - - - addModule - Add a Module within the MariaDB Columnstore System - This command is used to configure a Module to the Configuration - Database within the MariaDB Columnstore System. It will also install the - MariaDB Columnstore Packages and setup the module to make it ready to be restored - Required: Module-type or Module-name being added - Required: Number-of-Modules being added when Module-type is specified - Required: Store hostnames in the configuration instead of IP addresses (y/n) - If not Amazon, then Required: hostnames separated by commas - If Amazon, then Optional: Amazon-Instance-Names separated by commas - Optional: Server-user-password - - - removeModule - Remove a Module within the MariaDB Columnstore System - This command is used to unconfigure a Module from the Configuration - Database within the MariaDB Columnstore System - Required: Module-type or Module-name being removed - Required: Number-of-Modules being remove when Module-type is specified - - - getModuleHostNames - Get a list of Module host names (NIC 1 only) - Optional: um or pm - - - disableMySQLReplication - Disable MySQL Replication functionality on the system - Required: MySQL Password or 'none' if not configured - - - getModuleCpuUsers - Get a Module Top Processes utilizing CPU - The top number of processes using more than 0% will be displayed - Required: Module-name being monitored - Optional: Top number of Process to Display (1-10, default is 5) - - - getSystemCpuUsers - Get System Top Processes utilizing CPU - The top number of processes using more than 0% will be displayed - Optional: Top number of Process to Display (1-10, default is 5) - - - getModuleCpu - Get a Module CPU usage - The Modules CPU Usage percentage will be displayed - Required: Module-name being monitored - - - getSystemCpu - Get System CPU usage on all modules - The System Modules CPU Usage percentage will be displayed - - - getModuleMemoryUsers - Get a Module Top Processes utilizing Memory - The top number of processes using more than 0% will be displayed - Required: Module-name being monitored - Optional: Top number of Process to Display (1-10, default is 5) - - - getSystemMemoryUsers - Get System Top Processes utilizing Memory - The top number of processes using more than 0% will be displayed - Optional: Top number of Process to Display (1-10, default is 5) - - - getModuleMemory - Get a Module Memory usage - The Modules Memory Usage percentage will be displayed - Required: Module-name being monitored - - - getSystemMemory - Get System Memory usage on all modules - The System Modules Memory Usage percentage will be displayed - - - getModuleDisk - Get a Module Disk usage - The Modules Disk Usage will be displayed - Required: Module-name being monitored - - - getSystemDisk - Get System Disk usage on all modules - The System Modules Disk Usage will be displayed - - - getModuleResourceUsage - Get a Module Resource usage - The Modules Resource Usage will be displayed - Resources consist of CPU, Memory, and Disk usage - Required: Module-name being monitored - - - getSystemResourceUsage - Get System Resource usage on all modules - The System Modules Resource Usage will be displayed - Resources consist of CPU, Memory, and Disk usage - - - getActiveSQLStatements - Get List Active SQL Statements within the System - - - alterSystem-disableModule - Disable a Module and Alter the MariaDB Columnstore System - This command is used to place a Module in an disable state - and restart the associated processes the MariaDB Columnstore System. - Required: Module-name(s) being placed disable - - - alterSystem-enableModule - Enable a Module and Alter the MariaDB Columnstore System - This command is used to place a Module in an enable state - and restart the associated processes the MariaDB Columnstore System. - Required: Module-name(s) being placed enable - - - stopModule - Stop a module - Used by columnstore stop script to stop individual modules. - Required: Module-name being stopped - - diff --git a/oam/etc/LocalServer-CEC1.xml b/oam/etc/LocalServer-CEC1.xml deleted file mode 100644 index 81a4418da..000000000 --- a/oam/etc/LocalServer-CEC1.xml +++ /dev/null @@ -1,9 +0,0 @@ - - - - - ec1 - 1 - 1 - - diff --git a/oam/etc/LocalServer-CFE1.xml b/oam/etc/LocalServer-CFE1.xml deleted file mode 100644 index a071b7bf2..000000000 --- a/oam/etc/LocalServer-CFE1.xml +++ /dev/null @@ -1,9 +0,0 @@ - - - - - fep1 - 0 - 0 - - diff --git a/oam/etc/ProcessConfig.xml.in b/oam/etc/ProcessConfig.xml.in deleted file mode 100644 index 76ab28466..000000000 --- a/oam/etc/ProcessConfig.xml.in +++ /dev/null @@ -1,142 +0,0 @@ - - - - - ProcessMonitor - ChildExtOAMModule - @ENGINE_BINDIR@/ProcMon - 0 - 1 - - LOADSHARE - off - - - ProcessManager - ParentOAMModule - @ENGINE_BINDIR@/ProcMgr - 1 - 2 - - ACTIVE_STANDBY - off - - - StorageManager - ChildExtOAMModule - @ENGINE_BINDIR@/StorageManager - 2 - 3 - - @ - LOADSHARE - off - - - DBRMControllerNode - ParentOAMModule - @ENGINE_BINDIR@/controllernode - @ENGINE_BINDIR@/controllernode - fg - 2 - 4 - ProcessManager - @ - SIMPLEX - off - - - ServerMonitor - ChildOAMModule - @ENGINE_BINDIR@/ServerMonitor - @ENGINE_BINDIR@/ServerMonitor - 2 - 6 - - LOADSHARE - off - - - DBRMWorkerNode - ChildExtOAMModule - @ENGINE_BINDIR@/workernode - @ENGINE_BINDIR@/workernode - DBRM_Worker - fg - 2 - 7 - - LOADSHARE - off - - - PrimProc - pm - @ENGINE_BINDIR@/PrimProc - 2 - 20 - - LOADSHARE - off - - - ExeMgr - um - @ENGINE_BINDIR@/ExeMgr - 2 - 30 - PrimProc - pm* - LOADSHARE - off - - - WriteEngineServer - pm - @ENGINE_BINDIR@/WriteEngineServer - 2 - 40 - LOADSHARE - off - - - DDLProc - um - @ENGINE_BINDIR@/DDLProc - 2 - 50 - WriteEngineServer - pm* - DBRMWorkerNode - @ - ExeMgr - * - SIMPLEX - off - - - DMLProc - um - @ENGINE_BINDIR@/DMLProc - 2 - 51 - WriteEngineServer - pm* - DBRMWorkerNode - @ - DDLProc - @ - SIMPLEX - off - - - mysqld - um - /usr/bin/mysqld - 0 - 100 - - LOADSHARE - off - - diff --git a/oam/install_scripts/CMakeLists.txt b/oam/install_scripts/CMakeLists.txt index 9ed5144a5..31764fc4b 100644 --- a/oam/install_scripts/CMakeLists.txt +++ b/oam/install_scripts/CMakeLists.txt @@ -6,16 +6,8 @@ configure_file("${CMAKE_CURRENT_SOURCE_DIR}/columnstore-post-install.in" "${CMAK configure_file("${CMAKE_CURRENT_SOURCE_DIR}/columnstore.in" "${CMAKE_CURRENT_SOURCE_DIR}/columnstore" @ONLY) configure_file("${CMAKE_CURRENT_SOURCE_DIR}/columnstore-pre-uninstall.in" "${CMAKE_CURRENT_SOURCE_DIR}/columnstore-pre-uninstall" @ONLY) configure_file("${CMAKE_CURRENT_SOURCE_DIR}/columnstoreLogRotate.in" "${CMAKE_CURRENT_SOURCE_DIR}/columnstoreLogRotate" @ONLY) -configure_file("${CMAKE_CURRENT_SOURCE_DIR}/postInstall.sh.in" "${CMAKE_CURRENT_SOURCE_DIR}/postInstall.sh" @ONLY) -configure_file("${CMAKE_CURRENT_SOURCE_DIR}/startupTests.sh.in" "${CMAKE_CURRENT_SOURCE_DIR}/startupTests.sh" @ONLY) configure_file("${CMAKE_CURRENT_SOURCE_DIR}/mariadb-columnstore.service.in" "${CMAKE_CURRENT_SOURCE_DIR}/mariadb-columnstore.service" @ONLY) configure_file("${CMAKE_CURRENT_SOURCE_DIR}/mariadb-columnstore-start.sh.in" "${CMAKE_CURRENT_SOURCE_DIR}/mariadb-columnstore-start.sh" @ONLY) -configure_file("${CMAKE_CURRENT_SOURCE_DIR}/columnstore_module_installer.sh.in" "${CMAKE_CURRENT_SOURCE_DIR}/columnstore_module_installer.sh" @ONLY) -configure_file("${CMAKE_CURRENT_SOURCE_DIR}/master-rep-columnstore.sh.in" "${CMAKE_CURRENT_SOURCE_DIR}/master-rep-columnstore.sh" @ONLY) -configure_file("${CMAKE_CURRENT_SOURCE_DIR}/disable-rep-columnstore.sh.in" "${CMAKE_CURRENT_SOURCE_DIR}/disable-rep-columnstore.sh" @ONLY) -configure_file("${CMAKE_CURRENT_SOURCE_DIR}/mariadb-command-line.sh.in" "${CMAKE_CURRENT_SOURCE_DIR}/mariadb-command-line.sh" @ONLY) -configure_file("${CMAKE_CURRENT_SOURCE_DIR}/slave-rep-columnstore.sh.in" "${CMAKE_CURRENT_SOURCE_DIR}/slave-rep-columnstore.sh" @ONLY) -configure_file("${CMAKE_CURRENT_SOURCE_DIR}/mcs_module_installer.sh.in" "${CMAKE_CURRENT_SOURCE_DIR}/mcs_module_installer.sh" @ONLY) configure_file("${CMAKE_CURRENT_SOURCE_DIR}/mcs-workernode.service.in" "${CMAKE_CURRENT_SOURCE_DIR}/mcs-workernode.service" @ONLY) configure_file("${CMAKE_CURRENT_SOURCE_DIR}/mcs-controllernode.service.in" "${CMAKE_CURRENT_SOURCE_DIR}/mcs-controllernode.service" @ONLY) configure_file("${CMAKE_CURRENT_SOURCE_DIR}/mcs-primproc.service.in" "${CMAKE_CURRENT_SOURCE_DIR}/mcs-primproc.service" @ONLY) @@ -32,25 +24,11 @@ configure_file("${CMAKE_CURRENT_SOURCE_DIR}/columnstoreSyslog.in" "${CMAKE_CURRE install(PROGRAMS columnstore-post-install columnstore-pre-uninstall - remote_command.sh - columnstore_module_installer.sh - startupTests.sh - remote_scp_get.sh columnstore_run.sh post-mysql-install post-mysqld-install - columnstore_os_check.sh columnstore columnstoreSyslogSetup.sh - remote_scp_put.sh - remotessh.exp - rsync.sh - remote_command_verify.sh - master-rep-columnstore.sh - slave-rep-columnstore.sh - disable-rep-columnstore.sh - mariadb-command-line.sh - mcs_module_installer.sh mcs-stop-controllernode.sh mcs-loadbrm.py mcs-savebrm.py diff --git a/oam/install_scripts/columnstore_module_installer.sh.in b/oam/install_scripts/columnstore_module_installer.sh.in deleted file mode 100755 index 917d027cf..000000000 --- a/oam/install_scripts/columnstore_module_installer.sh.in +++ /dev/null @@ -1,153 +0,0 @@ -#!/bin/bash -# -# $Id: module_installer.sh 421 2007-04-05 15:46:55Z dhill $ -# -# Setup the Custom OS files during a System install on a module -# -# -# append columnstore OS files to Linux OS file -# -# - -rpmmode=install -user=`whoami 2>/dev/null` -quiet=0 -shiftcnt=0 -password=" " - -for arg in "$@"; do - if [ $(expr -- "$arg" : '--rpmmode=') -eq 10 ]; then - rpmmode="$(echo $arg | awk -F= '{print $2}')" - ((shiftcnt++)) - elif [ $(expr -- "$arg" : '--user=') -eq 7 ]; then - user="$(echo $arg | awk -F= '{print $2}')" - ((shiftcnt++)) - elif [ $(expr -- "$arg" : '--quiet') -eq 7 ]; then - quiet=1 - ((shiftcnt++)) - elif [ $(expr -- "$arg" : '--port') -eq 6 ]; then - mysqlPort="$(echo $arg | awk -F= '{print $2}')" - ((shiftcnt++)) - elif [ $(expr -- "$arg" : '--module') -eq 8 ]; then - module="$(echo $arg | awk -F= '{print $2}')" - ((shiftcnt++)) - elif [ $(expr -- "$arg" : '--password') -eq 10 ]; then - password="$(echo $arg | awk -F= '{print $2}')" - ((shiftcnt++)) - fi -done -shift $shiftcnt - -PMwithUM=`mcsGetConfig Installation PMwithUM` -ServerTypeInstall=`mcsGetConfig Installation ServerTypeInstall` - -#get temp directory -tmpDir=`mcsGetConfig SystemConfig SystemTempFileDir` - -cloud=`mcsGetConfig Installation Cloud` -if [ $cloud = "amazon-ec2" ] || [ $cloud = "amazon-vpc" ]; then - echo "Amazon setup on Module" - cp /var/lib/columnstore/local/etc/credentials $HOME/.aws/. > /dev/null 2>&1 - - if [ $module = "pm" ]; then - if test -f /var/lib/columnstore/local/etc/pm1/fstab ; then - echo "Setup fstab on Module" - SUDO="" - if [ $user != "root" ]; then - SUDO="sudo " - fi - touch /etc/fstab - $SUDO chmod 666 /etc/fstab - rm -f /etc/fstab.columnstoreSave - cp /etc/fstab /etc/fstab.columnstoreSave - cat /var/lib/columnstore/local/etc/pm1/fstab >> /etc/fstab - fi - fi -fi - -. @ENGINE_SUPPORTDIR@/columnstore_functions - -mid=`module_id` - -#if um, cloud, separate system type, external um storage, then setup mount -if [ $cloud = "amazon-ec2" ] || [ $cloud = "amazon-vpc" ]; then - if [ $module = "um" ]; then - systemtype=`mcsGetConfig Installation ServerTypeInstall` - if [ $systemtype = "1" ]; then - umstoragetype=`mcsGetConfig Installation UMStorageType` - fi - fi - - #setup rc.local for amazon - RCFILE=/etc/rc.d/rc.local - - if [ $user != "root" ]; then - echo "uncomment runuser in rc.local, amazon AMI" - sudo sed -i -e 's/#runuser/runuser/g' /etc/rc.d/rc.local >/dev/null 2>&1 - fi -fi - -#if pm, create dbroot directories -if [ $module = "pm" ]; then - numdbroots=`mcsGetConfig SystemConfig DBRootCount` - for (( id=1; id<$numdbroots+1; id++ )); do - mkdir -p /var/lib/columnstore/data$id > /dev/null 2>&1 - chmod 755 /var/lib/columnstore/data$id - done -fi - -# if mysqlrep is on and module has a my.cnf file, upgrade it - -MySQLRep=`mcsGetConfig Installation MySQLRep` -if [ $MySQLRep = "y" ]; then - if test -f @MARIADB_MYCNFDIR@/columnstore.cnf ; then - echo "Run Upgrade on my.cnf on Module" - mycnfUpgrade > ${tmpDir}/mycnfUpgrade.log 2>&1 - fi -fi - -if test -f @MARIADB_MYCNFDIR@/columnstore.cnf ; then - mysqlPort=`mcsGetConfig Installation MySQLPort` - echo "Run Mysql Port update on my.cnf on Module" - mycnfUpgrade $mysqlPort > ${tmpDir}/mycnfUpgrade_port.log 2>&1 -fi - -# if um, run mysql install scripts -if [ $module = "um" ] || ( [ $module = "pm" ] && [ $PMwithUM = "y" ] ) || [ $ServerTypeInstall = "2" ]; then - - mysqlPassword=" " - if [[ $password != " " ]]; then - mysqlPassword="--password="$password - fi - - echo "Run post-mysqld-install" - post-mysqld-install $mysqlPassword --tmpdir=${tmpDir} > ${tmpDir}/post-mysqld-install.log 2>&1 - if [ $? -ne 0 ]; then - echo "ERROR: post-mysqld-install failed: check ${tmpDir}/post-mysqld-install.log" - exit 1 - fi - - echo "Run post-mysql-install" - - post-mysql-install --tmpdir=${tmpDir} > ${tmpDir}/post-mysql-install.log 2>&1 - if [ $? -ne 0 ]; then - echo "ERROR: post-mysql-install failed: check ${tmpDir}/post-mysql-install.log" - exit 1 - fi -fi - -if [ $user == "root" ]; then - columnstoreSyslogSetup.sh check > ${tmpDir}/syslogSetup-check.log 2>&1 - if [ $? -ne 0 ]; then - # try setup again - columnstoreSyslogSetup.sh install > ${tmpDir}/syslogSetup-install.log 2>&1 - if [ $? -ne 0 ]; then - echo "WARNING: columnstoreSyslogSetup.sh check failed: check ${tmpDir}/syslogSetup-check.log" - exit 2 - fi - fi -fi - -echo "!!!Module Installation Successfully Completed!!!" - -exit 0 diff --git a/oam/install_scripts/columnstore_os_check.sh b/oam/install_scripts/columnstore_os_check.sh deleted file mode 100644 index 620fe447b..000000000 --- a/oam/install_scripts/columnstore_os_check.sh +++ /dev/null @@ -1,44 +0,0 @@ -#!/bin/bash -# -# Determine the Linux distribution and version that is being run. -# -# Check for GNU/Linux distributions - if [ -f /etc/SuSE-release ]; then - DISTRIBUTION="suse" - elif [ -f /etc/UnitedLinux-release ]; then - DISTRIBUTION="united" - elif [ -f /etc/debian_version ]; then - DISTRIBUTION="debian" - elif [ -f /etc/lsb_version ]; then - DISTRIBUTION="ubuntu" - elif [ -f /etc/redhat-release ]; then - a=`grep -i 'red.*hat.*enterprise.*linux' /etc/redhat-release` - if test $? = 0; then - DISTRIBUTION=rhel - else - a=`grep -i 'red.*hat.*linux' /etc/redhat-release` - if test $? = 0; then - DISTRIBUTION=rh - else - a=`grep -i 'Fedora' /etc/redhat-release` - if test $? = 0; then - DISTRIBUTION=fedora - else - a=`grep -i 'cern.*e.*linux' /etc/redhat-release` - if test $? = 0; then - DISTRIBUTION=cel - else - a=`grep -i 'scientific linux cern' /etc/redhat-release` - if test $? = 0; then - DISTRIBUTION=slc - else - DISTRIBUTION="unknown" - fi - fi - fi - fi - fi - else - DISTRIBUTION="unknown" - fi -echo ${DISTRIBUTION} diff --git a/oam/install_scripts/disable-rep-columnstore.sh.in b/oam/install_scripts/disable-rep-columnstore.sh.in deleted file mode 100644 index ce22a8317..000000000 --- a/oam/install_scripts/disable-rep-columnstore.sh.in +++ /dev/null @@ -1,66 +0,0 @@ -#!/bin/bash -# -# $Id$ -# -# generic MariaDB Columnstore Disable Replication script. -# -# Notes: This script gets run by ProcMon: - -# check log for error -checkForError() { - grep ERROR ${tmpdir}/disable-rep-status.log > ${tmpdir}/error.check - if [ `cat ${tmpdir}/error.check | wc -c` -ne 0 ]; then - echo "ERROR: check log file:${tmpdir}/disable-rep-status.log" - rm -f ${tmpdir}/error.check - exit 1 - fi - rm -f ${tmpdir}/error.check -} - -pwprompt= -for arg in "$@"; do - if [ `expr -- "$arg" : '--password='` -eq 11 ]; then - password="`echo $arg | awk -F= '{print $2}'`" - pwprompt="--password=$password" - elif [ $(expr -- "$arg" : '--tmpdir=') -eq 9 ]; then - tmpdir="$(echo $arg | awk -F= '{print $2}')" - fi -done - -. @ENGINE_SUPPORTDIR@/columnstore_functions - ->${tmpdir}/disable-rep-status.log - -# -# Run stop slave command -# -echo "Run stop slave command" >>${tmpdir}/disable-rep-status.log -cat >${tmpdir}/idb_disable-rep.sql <>${tmpdir}/disable-rep-status.log -mysql \ - --user=root $pwprompt \ - calpontsys <${tmpdir}/idb_disable-rep.sql >>${tmpdir}/disable-rep-status.log 2>&1 - -checkForError - -# -# Run reset slave command -# -echo "Run reset slave command" >>${tmpdir}/disable-rep-status.log -cat >${tmpdir}/idb_disable-rep.sql <>${tmpdir}/disable-rep-status.log -mysql \ - --user=root $pwprompt \ - calpontsys <${tmpdir}/idb_disable-rep.sql >>${tmpdir}/disable-rep-status.log 2>&1 - -checkForError - -#alls good, 'OK' for success -echo "OK" -exit 0 diff --git a/oam/install_scripts/install-columnstore.sh b/oam/install_scripts/install-columnstore.sh deleted file mode 100755 index d19d0090d..000000000 --- a/oam/install_scripts/install-columnstore.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash -# -# $Id: install-infinidb.sh 421 2007-04-05 15:46:55Z dhill $ -# -# install-infinidb.sh dummy EE version - -echo " " 1>&2 -echo " This install-infinidb.sh script is only used to install the Community Edition." 1>&2 -echo " Please use the 'postConfigure' for installing the Enterprise Edition." 1>&2 -echo " Check the Calpont InfiniDB Installation Document for further details." 1>&2 - -exit 1 - diff --git a/oam/install_scripts/mariadb-command-line.sh.in b/oam/install_scripts/mariadb-command-line.sh.in deleted file mode 100755 index 4c47269d2..000000000 --- a/oam/install_scripts/mariadb-command-line.sh.in +++ /dev/null @@ -1,62 +0,0 @@ -#!/bin/bash -# -# $Id$ -# -# generic MariaDB Columnstore Command Line script. -# -# Notes: This script gets run by ProcMon during installs and upgrades: - -# check log for error -checkForError() { - grep "ERROR 1045" ${tmpdir}/mariadb-command-line.log > ${tmpdir}/error.check - if [ `cat ${tmpdir}/error.check | wc -c` -ne 0 ]; then - echo "ERROR - PASSWORD: check log file: ${tmpdir}/mariadb-command-line.log" - rm -f ${tmpdir}/error.check - exit 2 - fi - - grep ERROR ${tmpdir}/mariadb-command-line.log > ${tmpdir}/error.check - if [ `cat ${tmpdir}/error.check | wc -c` -ne 0 ]; then - echo "ERROR: check log file: ${tmpdir}/mariadb-command-line.log" - rm -f ${tmpdir}/error.check - exit 1 - fi - rm -f ${tmpdir}/error.check -} - -pwprompt= -for arg in "$@"; do - if [ `expr -- "$arg" : '--command='` -eq 10 ]; then - command1="`echo $arg | awk -F= '{print $2}'`" - command2="`echo $arg | awk -F= '{print $3}'`" - command=$command1"="$command2 - elif [ `expr -- "$arg" : '--port='` -eq 7 ]; then - port="`echo $arg | awk -F= '{print $2}'`" - elif [ $(expr -- "$arg" : '--tmpdir=') -eq 9 ]; then - tmpdir="$(echo $arg | awk -F= '{print $2}')" - fi -done - -. @ENGINE_SUPPORTDIR@/columnstore_functions - - ->${tmpdir}/mariadb-command-line.log - -# -# Run command -# -echo "Run command" >>${tmpdir}/mariadb-command-line.log -cat >${tmpdir}/mariadb-command-line.sql <> ${tmpdir}/mariadb-command-line.log -mysql \ - --user=root \ - calpontsys < ${tmpdir}/mariadb-command-line.sql >> ${tmpdir}/mariadb-command-line.log 2>&1 - -checkForError - -#alls good, 'OK' for success -echo "OK" -exit 0 diff --git a/oam/install_scripts/master-rep-columnstore.sh.in b/oam/install_scripts/master-rep-columnstore.sh.in deleted file mode 100644 index f3d2e38d1..000000000 --- a/oam/install_scripts/master-rep-columnstore.sh.in +++ /dev/null @@ -1,99 +0,0 @@ -#!/bin/bash -# -# $Id$ -# -# generic MariaDB Columnstore Master Replication script. -# -# Notes: This script gets run by ProcMon during installs and upgrades: - -# check log for error -checkForError() { - grep ERROR ${tmpdir}/master-rep-status-$hostipaddr.log > ${tmpdir}/error.check - if [ `cat ${tmpdir}/error.check | wc -c` -ne 0 ]; then - echo "ERROR: check log file: ${tmpdir}/master-rep-status-$hostipaddr.log" - rm -f ${tmpdir}/error.check - exit 1 - fi - rm -f ${tmpdir}/error.check -} - -pwprompt= -for arg in "$@"; do - if [ `expr -- "$arg" : '--hostIP='` -eq 9 ]; then - hostipaddr="`echo $arg | awk -F= '{print $2}'`" - elif [ $(expr -- "$arg" : '--tmpdir=') -eq 9 ]; then - tmpdir="$(echo $arg | awk -F= '{print $2}')" - fi -done - -. @ENGINE_SUPPORTDIR@/columnstore_functions - -repUser="idbrep" -password="C0lumnStore!" - ->${tmpdir}/master-rep-status-$hostipaddr.log - -# -# Create Replication User -# -echo "Create Replication User $repUser for node $hostipaddr" >>${tmpdir}/master-rep-status-$hostipaddr.log -cat >${tmpdir}/idb_master-rep.sql <>${tmpdir}/master-rep-status-$hostipaddr.log -mysql \ - --user=root \ - calpontsys <${tmpdir}/idb_master-rep.sql >>${tmpdir}/master-rep-status-$hostipaddr.log 2>&1 - -checkForError - -# -# Grant table access for created user -# -echo "Grant table access for $repUser for node $hostipaddr" >>${tmpdir}/master-rep-status-$hostipaddr.log -cat >${tmpdir}/idb_master-rep.sql <>${tmpdir}/master-rep-status-$hostipaddr.log -mysql \ - --user=root \ - calpontsys <${tmpdir}/idb_master-rep.sql >>${tmpdir}/master-rep-status-$hostipaddr.log 2>&1 - -checkForError - -# -# Run SHOW MASTER STATUS -# -echo "Run SHOW MASTER STATUS to node log" >>${tmpdir}/master-rep-status-$hostipaddr.log -cat >${tmpdir}/idb_master-rep.sql <>${tmpdir}/master-rep-status-$hostipaddr.log -mysql \ - --user=root \ - calpontsys <${tmpdir}/idb_master-rep.sql >>${tmpdir}/master-rep-status-$hostipaddr.log 2>&1 - -checkForError - -echo "Run SHOW MASTER STATUS to master status log ${tmpdir}/show-master-status.log" >>${tmpdir}/master-rep-status-$hostipaddr.log -cat >${tmpdir}/idb_master-rep.sql <${tmpdir}/show-master-status.log -mysql \ - --user=root \ - calpontsys <${tmpdir}/idb_master-rep.sql >>${tmpdir}/show-master-status.log - - -#alls good, 'OK' for success -echo "OK" -exit 0 - - diff --git a/oam/install_scripts/mcs_module_installer.sh.in b/oam/install_scripts/mcs_module_installer.sh.in deleted file mode 100644 index 231c3c008..000000000 --- a/oam/install_scripts/mcs_module_installer.sh.in +++ /dev/null @@ -1,138 +0,0 @@ -#!/usr/bin/expect -# -# Install custom OS files on system -# Argument 0 - Remote Module Name -# Argument 1 - Remote Server Host Name or IP address -# Argument 2 - Root Password of remote server -# Argument 3 - Debug flag 1 for on, 0 for off -# Argument 4 - Username on remote server (root default) -set timeout 30 -set USERNAME root -set MODULE [lindex $argv 0] -set SERVER [lindex $argv 1] -set PASSWORD [lindex $argv 2] -set DEBUG [lindex $argv 3] -set USERNAME "root" -set UNM [lindex $argv 4] -if { $UNM != "" } { - set USERNAME $UNM -} - -set HOME "$env(HOME)" - -log_user $DEBUG -spawn -noecho /bin/bash -# - -#check and see if remote server has ssh keys setup, set PASSWORD if so -send_user " " -send "ssh -v $USERNAME@$SERVER 'time'\n" -set timeout 60 -expect { - "authenticity" { send "yes\n" - exp_continue - } - "word: " { send "$PASSWORD\n" - exp_continue - } - "passphrase" { send "$PASSWORD\n" - exp_continue - } - "Exit status 0" { send_user "DONE"} - "Exit status 1" { send_user "FAILED: Login Failure\n" ; exit 1 } - "Host key verification failed" { send_user "FAILED: Host key verification failed\n" ; exit 1 } - "service not known" { send_user "FAILED: Invalid Host\n" ; exit 1 } - "Permission denied, please try again" { send_user "ERROR: Invalid password\n" ; exit 1 } - "Connection refused" { send_user "ERROR: Connection refused\n" ; exit 1 } - "Connection closed" { send_user "ERROR: Connection closed\n" ; exit 1 } - "No route to host" { send_user "ERROR: No route to host\n" ; exit 1 } - timeout { send_user "ERROR: Timeout to host\n" ; exit 2 } -} -send_user "\n" - -send_user "Stop ColumnStore service " -send "ssh -v $USERNAME@$SERVER 'pkill ProcMon; pkill ProcMgr'\n" -set timeout 60 -# check return -expect { - "word: " { send "$PASSWORD\n" - exp_continue - } - "passphrase" { send "$PASSWORD\n" - exp_continue - } -# "No such file or directory" { send_user "DONE" } - "Exit status 127" { send_user "DONE" } - "Exit status 0" { send_user "DONE" } - "Read-only file system" { send_user "ERROR: local disk - Read-only file system\n" ; exit 1} - timeout { send_user "DONE" } -} -send_user "\n" - -# -# copy over custom OS tmp files -# -send_user "Copy Custom OS files to Module " -send_user " \n" -send "scp -rv @ENGINE_DATADIR@/local/etc $USERNAME@$SERVER:@ENGINE_DATADIR@/local\n" -set timeout 120 -expect { - "word: " { send "$PASSWORD\n" - exp_continue - } - "passphrase" { send "$PASSWORD\n" - exp_continue - } - "Exit status 0" { send_user "DONE" } - "scp :" { send_user "ERROR\n" ; - send_user "\n*** Installation ERROR\n" ; - exit 1 } - "Read-only file system" { send_user "ERROR: local disk - Read-only file system\n" ; exit 1} - timeout { send_user "ERROR: Timeout\n" ; exit 2 } -} -send_user "\n" - -# -# copy over MariaDB Columnstore Module file -# -send_user "Copy MariaDB Columnstore Module file to Module " -send "scp -v @ENGINE_DATADIR@/local/etc/$MODULE/* $USERNAME@$SERVER:@ENGINE_DATADIR@/local/.\n" -set timeout 120 -expect { - "word: " { send "$PASSWORD\n" - exp_continue - } - "passphrase" { send "$PASSWORD\n" - exp_continue - } - "scp :" { send_user "ERROR\n" ; - send_user "\n*** Installation ERROR\n" ; - exit 1 } - "Exit status 0" { send_user "DONE" } - "Exit status 1" { send_user "ERROR: scp failed" ; exit 1 } - timeout { send_user "ERROR: Timeout to host\n" ; exit 2 } -} -send_user "\n" - -send_user "Start ColumnStore service " -send_user " \n" -send "ssh -v $USERNAME@$SERVER 'columnstore start'\n" -set timeout 60 -# check return -expect { - "word: " { send "$PASSWORD\n" - exp_continue - } - "passphrase" { send "$PASSWORD\n" - exp_continue - } - "Exit status 0" { send_user "DONE" } - "Exit status 127" { send_user "ERROR: columnstore Not Found\n" ; exit 1 } - timeout { send_user "ERROR: Timeout to host\n" ; exit 2 } -} - -send_user "\n" - -send_user "\nInstallation Successfully Completed on '$MODULE'\n" -exit 0 - diff --git a/oam/install_scripts/patch_installer.sh b/oam/install_scripts/patch_installer.sh deleted file mode 100755 index 8e21af3a6..000000000 --- a/oam/install_scripts/patch_installer.sh +++ /dev/null @@ -1,69 +0,0 @@ -#!/usr/bin/expect -# -# $Id: patch_installer.sh 421 2007-04-05 15:46:55Z dhill $ -# -# Install Software Patch on Module -# Argument 1 - Remote Module Name -# Argument 2 - Remote Server Host Name or IP address -# Argument 3 - Root Password of remote server -# Argument 4 - Patch Directory Location -# Argument 5 - Install Directory Location -# Argument 6 - Software File being installed -# Argument 7 - Debug flag 1 for on, 0 for off -set timeout 20 -set MODULE [lindex $argv 0] -set SERVER [lindex $argv 1] -set PASSWORD [lindex $argv 2] -set PATCHLOCATION [lindex $argv 3] -set INSTALLLOCATION [lindex $argv 4] -set FILE [lindex $argv 5] -set DEBUG [lindex $argv 6] -set USERNAME "root" -set UNM [lindex $argv 7] -if { $UNM != "" } { - set USERNAME $UNM -} -log_user $DEBUG -spawn -noecho /bin/bash -# -# mv file being install -# -send_user "Backup Current File on Module" -expect -re "# " -send "ssh $USERNAME@$SERVER 'mv $INSTALLLOCATION$FILE $INSTALLLOCATION$FILE'.patchSave''\n" -# accept the remote host fingerprint (assuming never logged in before) -expect -re "service not known" { send_user " FAILED: Invalid Host\n" ; exit } -expect -re "authenticity" { send "yes\n" } -expect -re "word: " -# password for ssh -send "$PASSWORD\n" -# check return -expect { - -re "# " { send_user " DONE" } - -re "Permission denied" { send_user " FAILED: Invalid password\n" } exit; - -re "mv" { send_user " FAILED: copy filed\n" ; exit} -} -send_user "\n" -# -# send Patch File -# -send_user "Copy New Calpont Software File to Module" -expect -re "# " -send "scp $PATCHLOCATION$FILE $USERNAME@$SERVER:$INSTALLLOCATION$FILE\n" -expect -re "word: " -# send the password -send "$PASSWORD\n" -# check return -expect { - -re "100%" { send_user " DONE" } - -re "scp" { send_user " FAILED\n" ; - send_user "\n*** Installation Failed\n" ; - exit } - -re "Permission denied" { send_user " FAILED: Invalid password\n" ; exit } - -re "No such file or directory" { send_user " FAILED: Invalid package\n" ; exit } -} -send_user "\n" - -send_user "\n!!!Patch Installation Successfully Completed!!!\n" -exit - diff --git a/oam/install_scripts/postInstall.sh.in b/oam/install_scripts/postInstall.sh.in deleted file mode 100755 index dfc874c2d..000000000 --- a/oam/install_scripts/postInstall.sh.in +++ /dev/null @@ -1,129 +0,0 @@ -#!/usr/bin/expect -# -# $Id$ -# -# Install RPM on system -# Argument 1 - Package name being installed -# Argument 2 - Root Password of remote server -# Argument 3 - Root Password of External Mode - -set timeout 10 -set RPMPACKAGE " " -set PASSWORD " " -set CONFIGFILE " " -set DEBUG 0 -set USERNAME "root" - -spawn -noecho /bin/bash - -for {set i 0} {$i<[llength $argv]} {incr i} { - set arg($i) [lindex $argv $i] -} - -set i 0 -while true { - if { $i == [llength $argv] } { break } - if { $arg($i) == "-h" } { - send_user "\n" - send_user "'postInstaller.sh' performs a system install of a Calpont RPM\n" - send_user "on a system with Calpont already installed or on a new system\n" - send_user "when the -c option is used.\n" - send_user "\n" - send_user "Usage: postInstaller.sh -r 'calpont-rpm' -p 'password' -c 'config-file' -d\n" - send_user " calpont-rpm - Calpont RPM with directory locatation, i.e. /root/calpont.x.x.x.x\n" - send_user " password - root password on the servers being installed'\n" - send_user " config-file - Optional: Columnstore.xml config file with directory location, i.e. /root/Columnstore.xml\n" - send_user " -d - Debug flag\n" - exit - } elseif { $arg($i) == "-r" } { - incr i - set RPMPACKAGE $arg($i) - } elseif { $arg($i) == "-p" } { - incr i - set PASSWORD $arg($i) - } elseif { $arg($i) == "-c" } { - incr i - set CONFIGFILE $arg($i) - } elseif { $arg($i) == "-d" } { - set DEBUG 1 - } elseif { $arg($i) == "-u" } { - incr i - set USERNAME $arg($i) - } - incr i -} - -log_user $DEBUG - -if { $RPMPACKAGE == " " || $PASSWORD == " "} {puts "please enter both RPM and password, enter ./postInstaller.sh -h for additional info"; exit -1} - -if { $CONFIGFILE == " " } { - set CONFIGFILE @ENGINE_SYSCONFDIR@/columnstore/Columnstore.xml.rpmsave -} -if { [catch { open $CONFIGFILE "r"} handle ] } { - puts "Calpont Config file not found: $CONFIGFILE"; exit -1 -} - - -send_user "\nPerforming Calpont RPM System Install\n\n" - -# -# stopSystem -# -send_user "Stop Calpont System " -expect -re "# " -send "mcsadmin stopSystem INSTALL y\n" -expect { - -re "# " { send_user "DONE" } - -re "**** stopSystem Failed" { send_user "INFO: System not running" } -} -send_user "\n" -# -# erase package -# -send_user "Erase Calpont Package on Module " -expect -re "# " -send "rpm -e --nodeps calpont\n" -expect { - -re "# " { send_user "DONE" } - -re "uninstall completed" { send_user "DONE" } - -re "ERROR dependencies" { send_user "ERROR: ERROR dependencies\n" ; exit -1 } - -re "not installed" { send_user "INFO: Package not installed" } -} -send_user "\n" - -set timeout 60 -# -# install package -# -send_user "Install Calpont Package on Module " -send "rpm -ivh $RPMPACKAGE\n" -expect { - -re "completed" { send_user "DONE" } - -re "ERROR dependencies" { send_user "ERROR: ERROR dependencies\n" ; - send_user "\n*** Installation ERROR\n" ; - exit -1 } - -re "error" { send_user "ERROR\n" ; - send_user "\n*** Installation ERROR\n" ; - exit -1 } -} -expect -re "# " -log_user 0 -exec mv -f @ENGINE_SYSCONFDIR@/columnstore/Columnstore.xml @ENGINE_SYSCONFDIR@/columnstore/Columnstore.xml.new > /dev/null 2>&1 -exec mv -f $CONFIGFILE @ENGINE_SYSCONFDIR@/columnstore/Columnstore.xml > /dev/null 2>&1 - -send_user "\n" -set timeout 380 -# -# Run installer -# -send_user "Run System Installer " -send "columnstore_installer $RPMPACKAGE initial $PASSWORD n 0\n" -expect { - -re "reboot request successful" { } - -re "error" { send_user "FAILED" ; exit -1 } -} - -send_user "\nCalpont RPM System Install Completed, System now rebooting\n\n" - -exit 0 diff --git a/oam/install_scripts/remote_command.sh b/oam/install_scripts/remote_command.sh deleted file mode 100755 index 85e76306f..000000000 --- a/oam/install_scripts/remote_command.sh +++ /dev/null @@ -1,90 +0,0 @@ -#!/usr/bin/expect -# -# $Id: remote_command.sh 3495 2012-12-17 22:51:40Z dhill $ -# -# Remote command execution script to another server -# Argument 1 - Remote Server Host Name or IP address -# Argument 2 - Remote Server password -# Argument 3 - Command -# Argument 4 - debug flag -# Argument 5 - Remote user name (optional) -# Argument 6 - Force a tty to be allocated (optional) -set stty_init {cols 512 -opost}; -set timeout 30 -set SERVER [lindex $argv 0] -set PASSWORD [lindex $argv 1] -set COMMAND [lindex $argv 2] -set DEBUG [lindex $argv 3] - -exec whoami >whoami.tmp -set USERNAME [exec cat whoami.tmp] -exec rm -f whoami.tmp - -set UNM [lindex $argv 4] -if { $UNM != "" && $UNM != "-" } { - set USERNAME "$UNM" -} -set TTY "" -set TTYOPT [lindex $argv 5] -if { $TTYOPT != "" } { - set TTY "-t" -} -log_user $DEBUG -spawn -noecho /bin/bash -#expect -re {[$#] } - -if { $PASSWORD == "ssh" } { - set PASSWORD "" -} - -# -# send command -# -send "ssh -v $TTY $USERNAME@$SERVER '$COMMAND'\n" -expect { - "cannot access" { exit 1} - "Host key verification failed" { send_user "FAILED: Host key verification failed\n" ; exit 1} - "service not known" { send_user " FAILED: Invalid Host\n" ; exit 1} - "ssh: connect to host" { send_user " FAILED: Invalid Host\n" ; exit 1 } - "Connection refused" { send_user "ERROR: Connection refused\n" ; exit 1 } - "Connection closed" { send_user "ERROR: Connection closed\n" ; exit 1 } - "authenticity" { send "yes\n" - expect { - "word: " { send "$PASSWORD\n" } - "passphrase" { send "$PASSWORD\n" } - } - } - "word: " { send "$PASSWORD\n" } - "passphrase" { send "$PASSWORD\n" } - "command not found" { exit 3 } -# -re {[$#] } { exit 0 } - "Exit status 0" { exit 0 } - "Exit status 1" { exit 1 } - "Exit status 3" { exit 1 } - "Exit status 4" { exit 1 } - timeout { exit 2 } - "Permission denied, please try again" { send_user "FAILED: Invalid password\n" ; exit 1 } -} -expect { - "command not found" { exit 3 } -# -re {[$#] } { exit 0 } - "Exit status 0" { exit 0 } - "Exit status 1" { exit 1 } - "Exit status 3" { exit 1 } - "Exit status 4" { exit 1 } - timeout { exit 2 } - "cannot access" { exit 1} - "Permission denied, please try again" { send_user "FAILED: Invalid password\n" ; exit 1 } - - "(y or n)" { send "y\n" - "command not found" { exit 3 } -# expect -re {[$#] } { exit 0 } - "Exit status 0" { exit 0 } - "Exit status 1" { exit 1 } - "Exit status 3" { exit 1 } - "Exit status 4" { exit 1 } - timeout { exit 2 } - } -} -exit 0 - diff --git a/oam/install_scripts/remote_command_verify.sh b/oam/install_scripts/remote_command_verify.sh deleted file mode 100755 index 22915caa2..000000000 --- a/oam/install_scripts/remote_command_verify.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/expect -# -# $Id: remote_commend.sh 421 2007-04-05 15:46:55Z dhill $ -# -# Remote command execution script to another server -# Argument 1 - Remote Server Host Name or IP address -# Argument 2 - Remote Server root password -# Argument 3 - Command -# Argument 4 - Good Response -# Argument 5 - Bad Response -# Argument 6 - timeout -# Argument 7 - Debug flag -set timeout 30 -set SERVER [lindex $argv 0] -set USERNAME [lindex $argv 1] -set PASSWORD [lindex $argv 2] -set COMMAND [lindex $argv 3] -set GOOD_RESPONSE [lindex $argv 4] -set BAD_RESPONSE [lindex $argv 5] -set timeout [lindex $argv 6] -set DEBUG [lindex $argv 7] -log_user $DEBUG -spawn -noecho /bin/bash -expect -re {[$#] } -# -# send command -# -send "ssh $USERNAME@$SERVER '$COMMAND'\n" -expect { - -re "authenticity" { send "yes\n" - expect { - timeout { send_user "TIMEOUT\n" ; exit 2 } - -re "word: " { send "$PASSWORD\n" } abort - } - } - timeout { send_user "TIMEOUT\n" ; exit 2 } - -re "service not known" { send_user "FAILED: Invalid Host\n" ; exit 1 } - -re "Permission denied" { send_user "FAILED: Invalid Password\n" ; exit 1 } - -re "word: " { send "$PASSWORD\n" } abort - -re $GOOD_RESPONSE { send_user " " ; exit 0 } - -re $BAD_RESPONSE { send_user "FAILED\n" ; exit 1 } - -re "parser error" { send_user "FAILED: System Columnstore.xml parse error\n" ; exit 1 } -} -expect { - timeout { send_user "FAILED-TIMEOUT\n" ; exit 1 } - -re $GOOD_RESPONSE { send_user " " ; exit 0 } - -re $BAD_RESPONSE { send_user "FAILED\n" ; exit 1 } - -re "No such file" { send_user "FAILED\n" ; exit 1 } - -re "parser error" { send_user "FAILED: System Columnstore.xml parse error\n" ; exit 1 } -} - -exit 1 - diff --git a/oam/install_scripts/remote_scp_get.sh b/oam/install_scripts/remote_scp_get.sh deleted file mode 100755 index 96b8d644e..000000000 --- a/oam/install_scripts/remote_scp_get.sh +++ /dev/null @@ -1,60 +0,0 @@ -#!/usr/bin/expect -# -# $Id: remote_commend.sh 421 2007-04-05 15:46:55Z dhill $ -# -# Remote command execution script to another server -# Argument 1 - Remote Server Host Name or IP address -# Argument 2 - Remote Server root password -# Argument 3 - Command -set timeout 10 -exec whoami > whoami.tmp -set USERNAME [exec cat whoami.tmp] -exec rm -f whoami.tmp -set SERVER [lindex $argv 0] -set PASSWORD [lindex $argv 1] -set FILE [lindex $argv 2] -set DEBUG [lindex $argv 3] -log_user $DEBUG -spawn -noecho /bin/bash - -if { $PASSWORD == "ssh" } { - set PASSWORD "" -} - -# -# send command -# -#expect -re {[$#] } -send "scp -v $USERNAME@$SERVER:$FILE .\n" -expect { - "Exit status 0" { exit 0 } - "Exit status 1" { exit 1 } - "100%" { send_user "DONE\n" ; exit 0 } - "authenticity" { send "yes\n" - expect { - "word: " { send "$PASSWORD\n" } - "passphrase" { send "$PASSWORD\n" } - } - } - "service not known" { send_user "FAILED: Invalid Host\n" ; exit 1 } - "Connection refused" { send_user "ERROR: Connection refused\n" ; exit 1 } - "Connection timed out" { send_user "FAILED: Connection timed out\n" ; exit 1 } - "lost connection" { send_user "FAILED: Connection refused\n" ; exit 1 } - "Connection closed" { send_user "ERROR: Connection closed\n" ; exit 1 } - "word: " { send "$PASSWORD\n" } - "passphrase" { send "$PASSWORD\n" } - "scp:" { send_user "FAILED\n" ; exit 1 } - "Permission denied, please try again" { send_user "FAILED: Invalid password\n" ; exit 1 } -} -expect { - "Exit status 0" { exit 0 } - "Exit status 1" { exit 1 } - "100%" { send_user "DONE\n" ; exit 0 } - "scp:" { send_user "FAILED\n" ; exit 1 } - "Permission denied, please try again" { send_user "FAILED: Invalid password\n" ; exit 1 } - "No such file or directory" { send_user "FAILED: No such file or directory\n" ; exit 1 } - "Connection refused" { send_user "ERROR: Connection refused\n" ; exit 1 } - "Connection closed" { send_user "ERROR: Connection closed\n" ; exit 1 } -} -exit 0 - diff --git a/oam/install_scripts/remote_scp_put.sh b/oam/install_scripts/remote_scp_put.sh deleted file mode 100644 index cee9211a7..000000000 --- a/oam/install_scripts/remote_scp_put.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/expect -# -# $Id: remote_commend.sh 421 2007-04-05 15:46:55Z dhill $ -# -# Remote command execution script to another server -# Argument 1 - Remote Server Host Name or IP address -# Argument 2 - Remote Server root password -# Argument 3 - Command -set timeout 30 -exec whoami >whoami.tmp -set USERNAME [exec cat whoami.tmp] -exec rm -f whoami.tmp -set SERVER [lindex $argv 0] -set PASSWORD [lindex $argv 1] -set FILE [lindex $argv 2] -set DEBUG [lindex $argv 3] -log_user $DEBUG -spawn -noecho /bin/bash - -if { $PASSWORD == "ssh" } { - set PASSWORD "" -} - -# -# send command -# -send "scp -v $FILE $USERNAME@$SERVER:$FILE\n" -expect { - "Exit status 0" { exit 0 } - "Exit status 1" { exit 1 } - -re "100%" { send_user "DONE\n" ; sleep 2; exit 0 } - -re "authenticity" { send "yes\n" - expect { - -re "word: " { send "$PASSWORD\n" } - -re "passphrase" { send "$PASSWORD\n" } - } - } - -re "service not known" { send_user "FAILED: Invalid Host\n" ; exit 1 } - -re "Connection refused" { send_user "FAILED: Connection refused\n" ; exit 1 } - -re "Connection timed out" { send_user "FAILED: Connection timed out\n" ; exit 1 } - -re "lost connection" { send_user "FAILED: Connection refused\n" ; exit 1 } - -re "Connection closed" { send_user "ERROR: Connection closed\n" ; exit 1 } - -re "word: " { send "$PASSWORD\n" } - -re "passphrase" { send "$PASSWORD\n" } - -re "WARNING:" { send "rm -f /root/.ssh/known_hosts" ; exit 1 } - -re "Permission denied, please try again" { send_user "FAILED: Invalid password\n" ; exit 1 } -} -expect { - "Exit status 0" { exit 0 } - "Exit status 1" { exit 1 } - -re "100%" { send_user "DONE\n" ; sleep 2 ; exit 0 } - -re "scp:" { send_user "FAILED\n" ; exit 1 } - -re "Permission denied, please try again" { send_user "FAILED: Invalid password\n" ; exit 1 } - -re "No such file or directory" { send_user "FAILED: Invalid file\n" ; exit 1 } - -re "Connection refused" { send_user "FAILED: Connection refused\n" ; exit 1 } - -re "Connection closed" { send_user "ERROR: Connection closed\n" ; exit 1 } -} -exit 0 - diff --git a/oam/install_scripts/remotessh.exp b/oam/install_scripts/remotessh.exp deleted file mode 100644 index 050339323..000000000 --- a/oam/install_scripts/remotessh.exp +++ /dev/null @@ -1,89 +0,0 @@ -set PROMPT "(\\$|#) " - -proc auto_ssh_command { username password server commandstr } { - global PROMPT - - # - # send command - # - send "ssh -t $username@$server $commandstr\n" - expect { - -re "Host key verification failed" { send_user "FAILED: Host key verification failed\n" ; exit 1} - -re "service not known" { send_user " FAILED: Invalid Host\n" ; exit 1} - -re "ssh: connect to host" { send_user " FAILED: Invalid Host\n" ; exit 1 } - -re "Connection refused" { send_user "ERROR: Connection refused\n" ; exit 1 } - -re "Connection closed" { send_user "ERROR: Connection closed\n" ; exit 1 } - -re "authenticity" { send "yes\n" - expect { - -re "word: " { send "$password\n" } abort - -re "passphrase" { send "$password\n" } abort - } - } - -re "word: " { send "$password\n" } abort - -re "passphrase" { send "$password\n" } abort - } - - set needsudopwd 0 - expect { - -re $PROMPT { exit 0 } - -re "Permission denied, please try again" { send_user " FAILED: Invalid password\n" ; exit 1 } - -re ": Permission denied" { send_user " FAILED: Privilege error\n" ; exit 1 } - -re "(y or n)" { send "y\n" - expect -re $PROMPT { exit 0 } - } - -re "sudo\\] password for $username: " { - send "$password\n"; - set needsudopwd 1 - } abort - } - - if {$needsudopwd} { - expect { - -re $PROMPT { exit 0 } - -re "try again" { send_user " FAILED: Invalid sudo password\n" ; exit 1 } - } - } -} - -proc auto_scp_command { username password server srcpath destpath } { - global PROMPT - send "scp $srcpath $username@$server:$destpath\n" - expect { - -re "Host key verification failed" { send_user "FAILED: Host key verification failed\n" ; exit 1} - -re "service not known" { send_user " FAILED: Invalid Host\n" ; exit 1} - -re "ssh: connect to host" { send_user " FAILED: Invalid Host\n" ; exit 1 } - -re "Connection refused" { send_user "ERROR: Connection refused\n" ; exit 1 } - -re "Connection closed" { send_user "ERROR: Connection closed\n" ; exit 1 } - -re "authenticity" { send "yes\n" - expect { - -re "word: " { send "$password\n" } abort - -re "passphrase" { send "$password\n" } abort - } - } - -re "word: " { send "$password\n" } abort - -re "passphrase" { send "$password\n" } abort - } - - expect { - -re $PROMPT { exit 0 } - -re "Permission denied, please try again" { send_user " FAILED: Invalid password\n" ; exit 1 } - -re ": Permission denied" { send_user " FAILED: Privilege error\n" ; exit 1 } - -re "(y or n)" { send "y\n" - expect -re $PROMPT { exit 0 } - } - } - - set timeout 30 - expect { - -re "100%" { send_user "DONE" } abort - -re "directory" { send_user "ERROR\n" ; - send_user "\n*** Installation ERROR\n" ; - exit 1 } - -re "Permission denied, please try again" { send_user "ERROR: Invalid password\n" ; exit 1 } - -re "No such file or directory" { send_user "ERROR: Invalid package\n" ; exit 1 } - } - - send_user "\n" - # sleep to make sure it's finished - sleep 5 -} diff --git a/oam/install_scripts/rsync.sh b/oam/install_scripts/rsync.sh deleted file mode 100755 index 6a103e422..000000000 --- a/oam/install_scripts/rsync.sh +++ /dev/null @@ -1,83 +0,0 @@ -#!/usr/bin/expect -# -# $Id: rsync.sh 2915 2012-05-23 16:01:34Z dhill $ -# -# Remote Install RPM and custom OS files from postConfigure script -# Argument 1 - Remote Server Host Name or IP address -# Argument 2 - Root Password of remote server -# Argument 3 - Debug flag 1 for on, 0 for off -set SERVER [lindex $argv 0] -set PASSWORD [lindex $argv 1] -set DEBUG 0 -set DEBUG [lindex $argv 3] -log_user $DEBUG -spawn -noecho /bin/bash - -if {[info exists env(USER)]} { - set USERNAME $env(USER) -} else { - set USERNAME "root" -} - - -if { $PASSWORD == "ssh" } { - set PASSWORD "" -} - -# LinuxJedi: Not sure what this is expecting to be copying, but it probably shouldn't be doing it -# set COMMAND "rsync -vopgr -e ssh --exclude=mysql/ --exclude=test/ --exclude=infinidb_vtable/ --exclude=infinidb_querystats/ --exclude=calpontsys/ --include=*/ --include=*/* --exclude=* $INSTALLDIR/mysql/db/ $USERNAME@$SERVER:$INSTALLDIR/mysql/db/" - -# -# run command -# -set timeout 20 -send "$COMMAND\n" -expect { - -re "Host key verification failed" { send_user "FAILED: Host key verification failed\n" ; exit -1} - -re "service not known" { send_user " FAILED: Invalid Host\n" ; exit -1} - -re "ssh: connect to host" { send_user " FAILED: Invalid Host\n" ; exit -1 } - -re "authenticity" { send "yes\n" - expect { - -re "word: " { send "$PASSWORD\n" } - -re "passphrase" { send "$PASSWORD\n" } - } - } - -re "word: " { send "$PASSWORD\n" } - -re "passphrase" { send "$PASSWORD\n" } - -re "failed" { send_user " FAILED: Failure, check tmp log\n" ; exit 1 } - -re "Permission denied" { send_user " FAILED: Invalid password\n" ; exit 1 } - -re "total size" {} abort -} -expect { - -re "failed" { send_user " FAILED: Failure, check tmp log\n" ; exit 1 } - -re "Permission denied" { send_user " FAILED: Invalid password\n" ; exit 1 } - -re "total size" {} abort - -} - -set HOME "$env(HOME)" - -if {[file exist $HOME/.my.cnf]} { - - set COMMAND "rsync -vopgr -e ssh $HOME/.my.cnf $USERNAME@$SERVER:$HOME/" - - # - # run command - # - set timeout 10 - send "$COMMAND\n" - expect { - -re "word: " { send "$PASSWORD\n" } - -re "passphrase" { send "$PASSWORD\n" } - -re "total size" {} abort - -re "failed" { exit 0 } - timeout { exit 0 } - } - expect { - -re "total size" {} abort - -re "failed" { exit 0 } - timeout { exit 0 } - } -} - -exit 0 diff --git a/oam/install_scripts/slave-rep-columnstore.sh.in b/oam/install_scripts/slave-rep-columnstore.sh.in deleted file mode 100644 index 28282ebe8..000000000 --- a/oam/install_scripts/slave-rep-columnstore.sh.in +++ /dev/null @@ -1,111 +0,0 @@ -#!/bin/bash -# -# $Id$ -# -# generic MariaDB Columnstore Slave Replication script. -# -# Notes: This script gets run by ProcMon during installs and upgrades: - -# check log for error -checkForError() { - grep ERROR ${tmpdir}/slave-rep-status.log > ${tmpdir}/error.check - if [ `cat ${tmpdir}/error.check | wc -c` -ne 0 ]; then - echo "ERROR: check log file: ${tmpdir}/slave-rep-status.log" - rm -f ${tmpdir}/error.check - exit 1 - fi - rm -f ${tmpdir}/error.check -} - -pwprompt= -for arg in "$@"; do - if [ `expr -- "$arg" : '--masteripaddr='` -eq 15 ]; then - masteripaddr="`echo $arg | awk -F= '{print $2}'`" - elif [ `expr -- "$arg" : '--masterlogfile='` -eq 16 ]; then - masterlogfile="`echo $arg | awk -F= '{print $2}'`" - elif [ `expr -- "$arg" : '--masterlogpos='` -eq 15 ]; then - masterlogpos="`echo $arg | awk -F= '{print $2}'`" - elif [ `expr -- "$arg" : '--port='` -eq 7 ]; then - port="`echo $arg | awk -F= '{print $2}'`" - elif [ $(expr -- "$arg" : '--tmpdir=') -eq 9 ]; then - tmpdir="$(echo $arg | awk -F= '{print $2}')" - fi -done - -. @ENGINE_SUPPORTDIR@/columnstore_functions - -repUser="idbrep" -password="C0lumnStore!" - ->${tmpdir}/slave-rep-status.log - -# -# Run stop slave command -# -echo "Run stop slave command" >>${tmpdir}/slave-rep-status.log -cat >${tmpdir}/idb_slave-rep.sql <>${tmpdir}/slave-rep-status.log -mysql \ - --user=root \ - calpontsys <${tmpdir}/idb_slave-rep.sql >>${tmpdir}/slave-rep-status.log 2>&1 - -checkForError - -# -# Run Change Master Command -# -echo "Run Change Master Command" >>${tmpdir}/slave-rep-status.log -cat >${tmpdir}/idb_slave-rep.sql <>${tmpdir}/slave-rep-status.log -mysql \ - --user=root \ - calpontsys <${tmpdir}/idb_slave-rep.sql >>${tmpdir}/slave-rep-status.log 2>&1 - -checkForError - -# -# Run start slave command -# -echo "Run start slave command" >>${tmpdir}/slave-rep-status.log -cat >${tmpdir}/idb_slave-rep.sql <>${tmpdir}/slave-rep-status.log -mysql \ - --user=root \ - calpontsys <${tmpdir}/idb_slave-rep.sql >>${tmpdir}/slave-rep-status.log 2>&1 - -checkForError - -# -# Run SHOW SLAVE STATUS -# -echo "Run SHOW SLAVE STATUS to node log" >>${tmpdir}/slave-rep-status.log -cat >${tmpdir}/idb_slave-rep.sql <>${tmpdir}/slave-rep-status.log -mysql \ - --user=root \ - calpontsys <${tmpdir}/idb_slave-rep.sql >>${tmpdir}/slave-rep-status.log 2>&1 - -checkForError - -#alls good, 'OK' for success -echo "OK" -exit 0 diff --git a/oam/install_scripts/startupTests.sh.in b/oam/install_scripts/startupTests.sh.in deleted file mode 100755 index a0a995f5a..000000000 --- a/oam/install_scripts/startupTests.sh.in +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/bash -# -# $Id: startupTests.sh 2937 2012-05-30 18:17:09Z rdempsey $ -# -# startupTests - perform sanity testing on system DB at system startup time -# called by Process-Monitor - -# Source function library. -if [ -f /etc/init.d/functions ]; then - . /etc/init.d/functions -fi - -. @ENGINE_SUPPORTDIR@/columnstore_functions - -for testScript in @ENGINE_SUPPORTDIR@/mcstest*.sh; do - if [ -x $testScript ]; then - eval $testScript - rc=$? - if [ $rc -ne 0 ]; then - cplogger -c 51 $testScript - echo "FAILED, check Critical log for additional info" - exit $rc - fi - fi -done -echo "OK" - -cplogger -i 54 - -exit 0 - diff --git a/oam/oamcpp/liboamcpp.cpp b/oam/oamcpp/liboamcpp.cpp index 296f98be3..a87cf1a4e 100644 --- a/oam/oamcpp/liboamcpp.cpp +++ b/oam/oamcpp/liboamcpp.cpp @@ -76,7 +76,6 @@ static const std::string optim("Build is " namespace fs = boost::filesystem; -using namespace alarmmanager; using namespace config; using namespace std; using namespace messageqcpp; @@ -89,12 +88,6 @@ namespace oam // flag to tell us ctrl-c was hit uint32_t ctrlc = 0; -// flag for using HDFS -// -1: non-hdfs -// 0: unknown -// 1: hdfs -int Oam::UseHdfs = 0; - //------------------------------------------------------------------------------ // Signal handler to catch Control-C signal to terminate the process // while waiting for a shutdown or suspend action @@ -109,25 +102,6 @@ Oam::Oam() { CalpontConfigFile = std::string(MCSSYSCONFDIR) + "/columnstore/Columnstore.xml"; - AlarmConfigFile = std::string(MCSSYSCONFDIR) + "/columnstore/AlarmConfig.xml"; - - ProcessConfigFile = std::string(MCSSYSCONFDIR) + "/columnstore/ProcessConfig.xml"; - - if (UseHdfs == 0) - { - try - { - Config* sysConfig = Config::makeConfig(CalpontConfigFile.c_str()); - string tmp = sysConfig->getConfig("Installation", "DBRootStorageType"); - - if (boost::iequals(tmp, "hdfs")) - UseHdfs = 1; - else - UseHdfs = -1; - } - catch (...) {} // defaulted to false - } - //get user string USER = "root"; char* p = getenv("USER"); @@ -149,66 +123,6 @@ Oam::Oam() Oam::~Oam() {} -/******************************************************************** - * - * get System Software information - * (for backward compatibility only) - ********************************************************************/ - -void Oam::getSystemSoftware(SystemSoftware& systemsoftware) -{ - systemsoftware.Version = columnstore_version; - systemsoftware.Release = columnstore_release; -} -/******************************************************************** - * - * get System Configuration Information - * - ********************************************************************/ - -void Oam::getSystemConfig(SystemConfig& systemconfig) -{ - - Config* sysConfig = Config::makeConfig(CalpontConfigFile.c_str()); - string Section = "SystemConfig"; - - // get numberic variables - systemconfig.DBRootCount = strtol(sysConfig->getConfig(Section, "DBRootCount").c_str(), 0, 0); - systemconfig.ModuleHeartbeatPeriod = strtol(sysConfig->getConfig(Section, "ModuleHeartbeatPeriod").c_str(), 0, 0); - systemconfig.ModuleHeartbeatCount = strtol(sysConfig->getConfig(Section, "ModuleHeartbeatCount").c_str(), 0, 0); -// systemconfig.ProcessHeartbeatPeriod = strtol(sysConfig->getConfig(Section, "ProcessHeartbeatPeriod").c_str(), 0, 0); - systemconfig.ExternalCriticalThreshold = strtol(sysConfig->getConfig(Section, "ExternalCriticalThreshold").c_str(), 0, 0); - systemconfig.ExternalMajorThreshold = strtol(sysConfig->getConfig(Section, "ExternalMajorThreshold").c_str(), 0, 0); - systemconfig.ExternalMinorThreshold = strtol(sysConfig->getConfig(Section, "ExternalMinorThreshold").c_str(), 0, 0); - systemconfig.TransactionArchivePeriod = strtol(sysConfig->getConfig(Section, "TransactionArchivePeriod").c_str(), 0, 0); - - // get string variables - for ( unsigned int dbrootID = 1 ; dbrootID < systemconfig.DBRootCount + 1 ; dbrootID++) - { - systemconfig.DBRoot.push_back(sysConfig->getConfig(Section, "DBRoot" + itoa(dbrootID))); - } - - systemconfig.SystemName = sysConfig->getConfig(Section, "SystemName"); - systemconfig.DBRMRoot = sysConfig->getConfig(Section, "DBRMRoot"); - systemconfig.ParentOAMModule = sysConfig->getConfig(Section, "ParentOAMModuleName"); - systemconfig.StandbyOAMModule = sysConfig->getConfig(Section, "StandbyOAMModuleName"); - - Section = "SessionManager"; - - systemconfig.MaxConcurrentTransactions = strtol(sysConfig->getConfig(Section, "MaxConcurrentTransactions").c_str(), 0, 0); - systemconfig.SharedMemoryTmpFile = sysConfig->getConfig(Section, "SharedMemoryTmpFile"); - - Section = "VersionBuffer"; - - systemconfig.NumVersionBufferFiles = strtol(sysConfig->getConfig(Section, "NumVersionBufferFiles").c_str(), 0, 0); - systemconfig.VersionBufferFileSize = strtol(sysConfig->getConfig(Section, "VersionBufferFileSize").c_str(), 0, 0); - - Section = "OIDManager"; - - systemconfig.OIDBitmapFile = sysConfig->getConfig(Section, "OIDBitmapFile"); - systemconfig.FirstOID = strtol(sysConfig->getConfig(Section, "FirstOID").c_str(), 0, 0); -} - /******************************************************************** * * get System Module Type Configuration Information @@ -541,277 +455,6 @@ void Oam::getSystemConfig(const std::string& module, ModuleConfig& moduleconfig) exceptionControl("getSystemConfig", API_INVALID_PARAMETER); } -/******************************************************************** - * - * get Local Module Configuration Information - * - ********************************************************************/ - -void Oam::getSystemConfig(ModuleConfig& moduleconfig) -{ - // get Local Module Name - - oamModuleInfo_t t = Oam::getModuleInfo(); - - string module = boost::get<0>(t); - - // get Module info - - Oam::getSystemConfig(module, moduleconfig); -} - -/******************************************************************** - * - * get Local Module Type Configuration Information - * - ********************************************************************/ - -void Oam::getSystemConfig(ModuleTypeConfig& moduletypeconfig) -{ - // get Local Module Name - - oamModuleInfo_t t = Oam::getModuleInfo(); - - string module = boost::get<0>(t); - string moduleType = module.substr(0, MAX_MODULE_TYPE_SIZE); - - // get Module info - - Oam::getSystemConfig(moduleType, moduletypeconfig); -} - -/******************************************************************** - * - * get System External Device Configuration information - * - ********************************************************************/ - -void Oam::getSystemConfig(SystemExtDeviceConfig& systemextdeviceconfig) -{ - Config* sysConfig = Config::makeConfig(CalpontConfigFile.c_str()); - const string Section = "SystemExtDeviceConfig"; - const string NAME = "Name"; - const string IPADDR = "IPAddr"; - const string DISABLE_STATE = "DisableState"; - - systemextdeviceconfig.Count = strtol(sysConfig->getConfig(Section, "Count").c_str(), 0, 0); - - int configCount = 0; - - for (int extDeviceID = 1; extDeviceID < MAX_EXT_DEVICE + 1; extDeviceID++) - { - ExtDeviceConfig Extdeviceconfig; - - string name = NAME + itoa(extDeviceID); - - try - { - Extdeviceconfig.Name = sysConfig->getConfig(Section, name); - } - catch (...) - { - continue; - } - - if (Extdeviceconfig.Name == oam::UnassignedName || - Extdeviceconfig.Name.empty()) - continue; - - string ipaddr = IPADDR + itoa(extDeviceID); - string disablestate = DISABLE_STATE + itoa(extDeviceID); - - Extdeviceconfig.IPAddr = sysConfig->getConfig(Section, ipaddr); - Extdeviceconfig.DisableState = sysConfig->getConfig(Section, disablestate); - - systemextdeviceconfig.extdeviceconfig.push_back(Extdeviceconfig); - configCount++; - } - - //correct count if not matching - if ( systemextdeviceconfig.Count != configCount ) - { - systemextdeviceconfig.Count = configCount; - - sysConfig->setConfig(Section, "Count", itoa(configCount)); - - try - { - sysConfig->write(); - } - catch (...) - { - exceptionControl("getSystemConfig", API_FAILURE); - } - } -} - -/******************************************************************** - * - * get System External Device Configuration information - * - ********************************************************************/ - -void Oam::getSystemConfig(const std::string& extDevicename, ExtDeviceConfig& extdeviceconfig) -{ - Config* sysConfig = Config::makeConfig(CalpontConfigFile.c_str()); - const string Section = "SystemExtDeviceConfig"; - const string NAME = "Name"; - const string IPADDR = "IPAddr"; - const string DISABLE_STATE = "DisableState"; - - for (int extDeviceID = 1; extDeviceID < MAX_EXT_DEVICE + 1; extDeviceID++) - { - string name = NAME + itoa(extDeviceID); - - extdeviceconfig.Name = sysConfig->getConfig(Section, name); - - if (extdeviceconfig.Name != extDevicename) - continue; - - string ipaddr = IPADDR + itoa(extDeviceID); - string disablestate = DISABLE_STATE + itoa(extDeviceID); - - extdeviceconfig.IPAddr = sysConfig->getConfig(Section, ipaddr); - extdeviceconfig.DisableState = sysConfig->getConfig(Section, disablestate); - return; - } - - // Ext Device Not found - exceptionControl("getSystemConfig", API_INVALID_PARAMETER); -} - - -/******************************************************************** - * - * set Ext Device Configuration information - * - ********************************************************************/ - -void Oam::setSystemConfig(const std::string deviceName, ExtDeviceConfig extdeviceconfig) -{ - if ( deviceName == oam::UnassignedName ) - return; - - Config* sysConfig = Config::makeConfig(CalpontConfigFile.c_str()); - const string Section = "SystemExtDeviceConfig"; - const string NAME = "Name"; - const string IPADDR = "IPAddr"; - const string DISABLE_STATE = "DisableState"; - - int count = strtol(sysConfig->getConfig(Section, "Count").c_str(), 0, 0); - - int entry = 0; - int extDeviceID = 1; - - for (; extDeviceID < MAX_EXT_DEVICE + 1; extDeviceID++) - { - string name = NAME + itoa(extDeviceID); - - if (sysConfig->getConfig(Section, name) == oam::UnassignedName) - entry = extDeviceID; - - if ((sysConfig->getConfig(Section, name)).empty() && entry == 0) - entry = extDeviceID; - - if (sysConfig->getConfig(Section, name) != deviceName) - continue; - - string ipaddr = IPADDR + itoa(extDeviceID); - string disablestate = DISABLE_STATE + itoa(extDeviceID); - - sysConfig->setConfig(Section, name, extdeviceconfig.Name); - sysConfig->setConfig(Section, ipaddr, extdeviceconfig.IPAddr); - sysConfig->setConfig(Section, disablestate, extdeviceconfig.DisableState); - - if ( extdeviceconfig.Name == oam::UnassignedName ) - { - // entry deleted decrement count - count--; - - if ( count < 0 ) - count = 0 ; - - sysConfig->setConfig(Section, "Count", itoa(count)); - - // - //send message to Process Monitor to remove external device to shared memory - // - try - { - ByteStream obs; - - obs << (ByteStream::byte) REMOVE_EXT_DEVICE; - obs << deviceName; - - sendStatusUpdate(obs, REMOVE_EXT_DEVICE); - } - catch (...) - { - exceptionControl("setSystemConfig", API_INVALID_PARAMETER); - } - - } - - try - { - sysConfig->write(); - } - catch (...) - { - exceptionControl("setSystemConfig", API_FAILURE); - } - - return; - } - - if ( entry == 0 ) - entry = extDeviceID; - - // Ext Device Not found, add it - - sysConfig->setConfig(Section, "Count", itoa(count + 1)); - - string name = NAME + itoa(entry); - string ipaddr = IPADDR + itoa(entry); - string disablestate = DISABLE_STATE + itoa(entry); - - sysConfig->setConfig(Section, name, extdeviceconfig.Name); - sysConfig->setConfig(Section, ipaddr, extdeviceconfig.IPAddr); - - if (extdeviceconfig.DisableState.empty() ) - extdeviceconfig.DisableState = oam::ENABLEDSTATE; - - sysConfig->setConfig(Section, disablestate, extdeviceconfig.DisableState); - - try - { - sysConfig->write(); - } - catch (...) - { - exceptionControl("setSystemConfig", API_FAILURE); - } - - // - //send message to Process Monitor to add new external device to shared memory - // - try - { - ByteStream obs; - - obs << (ByteStream::byte) ADD_EXT_DEVICE; - obs << extdeviceconfig.Name; - - sendStatusUpdate(obs, ADD_EXT_DEVICE); - } - catch (...) - { - exceptionControl("setSystemConfig", API_INVALID_PARAMETER); - } - - return; -} - /******************************************************************** * * get System Configuration String Parameter value @@ -863,1973 +506,12 @@ void Oam::getSystemConfig(const std::string& name, int& value) value = atoi(returnValue.c_str()); } -/******************************************************************** - * - * get Module Name for IP Address - * - ********************************************************************/ - -void Oam::getModuleNameByIPAddr(const std::string IpAddress, std::string& moduleName) -{ - SystemModuleTypeConfig systemmoduletypeconfig; - ModuleTypeConfig moduletypeconfig; - ModuleConfig moduleconfig; - systemmoduletypeconfig.moduletypeconfig.clear(); - string returnValue; - string Argument; - - try - { - Oam::getSystemConfig(systemmoduletypeconfig); - - for ( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++) - { - if ( systemmoduletypeconfig.moduletypeconfig[i].ModuleType.empty() ) - // end of list - break; - - int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount; - - if ( moduleCount == 0 ) - // skip if no modules - continue; - - string moduletype = systemmoduletypeconfig.moduletypeconfig[i].ModuleType; - - DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end() ; pt++) - { - string modulename = (*pt).DeviceName; - string moduleID = modulename.substr(MAX_MODULE_TYPE_SIZE, MAX_MODULE_ID_SIZE); - - HostConfigList::iterator pt1 = (*pt).hostConfigList.begin(); - - for ( ; pt1 != (*pt).hostConfigList.end() ; pt1++) - { - if ( IpAddress == (*pt1).IPAddr ) - { - moduleName = modulename; - return; - } - } - } - } - - moduleName = oam::UnassignedName; - return; - } - catch (exception&) - { - exceptionControl("getModuleNameByIPAddr", API_FAILURE); - } -} - - -/******************************************************************** - * - * set System Configuration String Parameter value - * - ********************************************************************/ - -void Oam::setSystemConfig(const std::string name, const std::string value) -{ - string mem = "Mem"; - string disk = "Disk"; - string swap = "Swap"; - string threshold = "Threshold"; - string critical = "Critical"; - string major = "Major"; - string minor = "Minor"; - - - Config* sysConfig = Config::makeConfig(CalpontConfigFile.c_str()); - string returnValue; - - // find and write new value to disk - - for ( int i = 0;; i++) - { - if ( configSections[i] == "" ) - // end of section list, no match found - exceptionControl("setSystemConfig", API_INVALID_PARAMETER); - - returnValue = sysConfig->getConfig(configSections[i], name); - - if (!(returnValue.empty())) - { - // match found - sysConfig->setConfig(configSections[i], name, value); - - try - { - sysConfig->write(); - } - catch (...) - { - exceptionControl("setSystemConfig", API_FAILURE); - } - - break; - } - } - - return; -} - -/******************************************************************** - * - * set System Configuration Interger Parameter value - * - ********************************************************************/ - -void Oam::setSystemConfig(const std::string name, const int value) -{ - string valueString; - - // convert Incoming Interger Parameter value to String - - valueString = itoa(value); - - // write parameter to disk - - Oam::setSystemConfig(name, valueString); -} - -/******************************************************************** - * - * set System Module Configuration Information by Module Type - * - ********************************************************************/ - -void Oam::setSystemConfig(const std::string moduletype, ModuleTypeConfig moduletypeconfig) -{ - Config* sysConfig = Config::makeConfig(CalpontConfigFile.c_str()); - const string Section = "SystemModuleConfig"; - const string MODULE_TYPE = "ModuleType"; - const string MODULE_DESC = "ModuleDesc"; - const string MODULE_RUN_TYPE = "RunType"; - const string MODULE_COUNT = "ModuleCount"; - const string MODULE_CPU_CRITICAL = "ModuleCPUCriticalThreshold"; - const string MODULE_CPU_MAJOR = "ModuleCPUMajorThreshold"; - const string MODULE_CPU_MINOR = "ModuleCPUMinorThreshold"; - const string MODULE_CPU_MINOR_CLEAR = "ModuleCPUMinorClearThreshold"; - const string MODULE_DISK_CRITICAL = "ModuleDiskCriticalThreshold"; - const string MODULE_DISK_MAJOR = "ModuleDiskMajorThreshold"; - const string MODULE_DISK_MINOR = "ModuleDiskMinorThreshold"; - const string MODULE_MEM_CRITICAL = "ModuleMemCriticalThreshold"; - const string MODULE_MEM_MAJOR = "ModuleMemMajorThreshold"; - const string MODULE_MEM_MINOR = "ModuleMemMinorThreshold"; - const string MODULE_SWAP_CRITICAL = "ModuleSwapCriticalThreshold"; - const string MODULE_SWAP_MAJOR = "ModuleSwapMajorThreshold"; - const string MODULE_SWAP_MINOR = "ModuleSwapMinorThreshold"; - const string MODULE_IP_ADDR = "ModuleIPAddr"; - const string MODULE_SERVER_NAME = "ModuleHostName"; - const string MODULE_DISK_MONITOR_FS = "ModuleDiskMonitorFileSystem"; - const string MODULE_DISABLE_STATE = "ModuleDisableState"; - - for (int moduleTypeID = 1; moduleTypeID < MAX_MODULE_TYPE + 1; moduleTypeID++) - { - string moduleType = MODULE_TYPE + itoa(moduleTypeID); - - if ( sysConfig->getConfig(Section, moduleType) == moduletype) - { - string ModuleType = MODULE_TYPE + itoa(moduleTypeID); - string ModuleDesc = MODULE_DESC + itoa(moduleTypeID); - string ModuleRunType = MODULE_RUN_TYPE + itoa(moduleTypeID); - string ModuleCount = MODULE_COUNT + itoa(moduleTypeID); - string ModuleCPUCriticalThreshold = MODULE_CPU_CRITICAL + itoa(moduleTypeID); - string ModuleCPUMajorThreshold = MODULE_CPU_MAJOR + itoa(moduleTypeID); - string ModuleCPUMinorThreshold = MODULE_CPU_MINOR + itoa(moduleTypeID); - string ModuleCPUMinorClearThreshold = MODULE_CPU_MINOR_CLEAR + itoa(moduleTypeID); - string ModuleDiskCriticalThreshold = MODULE_DISK_CRITICAL + itoa(moduleTypeID); - string ModuleDiskMajorThreshold = MODULE_DISK_MAJOR + itoa(moduleTypeID); - string ModuleDiskMinorThreshold = MODULE_DISK_MINOR + itoa(moduleTypeID); - string ModuleMemCriticalThreshold = MODULE_MEM_CRITICAL + itoa(moduleTypeID); - string ModuleMemMajorThreshold = MODULE_MEM_MAJOR + itoa(moduleTypeID); - string ModuleMemMinorThreshold = MODULE_MEM_MINOR + itoa(moduleTypeID); - string ModuleSwapCriticalThreshold = MODULE_SWAP_CRITICAL + itoa(moduleTypeID); - string ModuleSwapMajorThreshold = MODULE_SWAP_MAJOR + itoa(moduleTypeID); - string ModuleSwapMinorThreshold = MODULE_SWAP_MINOR + itoa(moduleTypeID); - - int oldModuleCount = atoi(sysConfig->getConfig(Section, ModuleCount).c_str()); - - sysConfig->setConfig(Section, ModuleType, moduletypeconfig.ModuleType); - sysConfig->setConfig(Section, ModuleDesc, moduletypeconfig.ModuleDesc); - sysConfig->setConfig(Section, ModuleRunType, moduletypeconfig.RunType); - sysConfig->setConfig(Section, ModuleCount, itoa(moduletypeconfig.ModuleCount)); - sysConfig->setConfig(Section, ModuleCPUCriticalThreshold, itoa(moduletypeconfig.ModuleCPUCriticalThreshold)); - sysConfig->setConfig(Section, ModuleCPUMajorThreshold, itoa(moduletypeconfig.ModuleCPUMajorThreshold)); - sysConfig->setConfig(Section, ModuleCPUMinorThreshold, itoa(moduletypeconfig.ModuleCPUMinorThreshold)); - sysConfig->setConfig(Section, ModuleCPUMinorClearThreshold, itoa(moduletypeconfig.ModuleCPUMinorClearThreshold)); - sysConfig->setConfig(Section, ModuleDiskCriticalThreshold, itoa(moduletypeconfig.ModuleDiskCriticalThreshold)); - sysConfig->setConfig(Section, ModuleDiskMajorThreshold, itoa(moduletypeconfig.ModuleDiskMajorThreshold)); - sysConfig->setConfig(Section, ModuleDiskMinorThreshold, itoa(moduletypeconfig.ModuleDiskMinorThreshold)); - sysConfig->setConfig(Section, ModuleMemCriticalThreshold, itoa(moduletypeconfig.ModuleMemCriticalThreshold)); - sysConfig->setConfig(Section, ModuleMemMajorThreshold, itoa(moduletypeconfig.ModuleMemMajorThreshold)); - sysConfig->setConfig(Section, ModuleMemMinorThreshold, itoa(moduletypeconfig.ModuleMemMinorThreshold)); - sysConfig->setConfig(Section, ModuleSwapCriticalThreshold, itoa(moduletypeconfig.ModuleSwapCriticalThreshold)); - sysConfig->setConfig(Section, ModuleSwapMajorThreshold, itoa(moduletypeconfig.ModuleSwapMajorThreshold)); - sysConfig->setConfig(Section, ModuleSwapMinorThreshold, itoa(moduletypeconfig.ModuleSwapMinorThreshold)); - - // clear out hostConfig info before adding in new contents - if ( oldModuleCount > 0) - { - for (int moduleID = 1; moduleID < MAX_MODULE ; moduleID++) - { - //get NIC IP address/hostnames - for (int nicID = 1; nicID < MAX_NIC + 1 ; nicID++) - { - string ModuleIpAddr = MODULE_IP_ADDR + itoa(moduleID) + "-" + itoa(nicID) + "-" + itoa(moduleTypeID); - - string ipAddr = sysConfig->getConfig(Section, ModuleIpAddr); - - if (ipAddr.empty()) - continue; - - string ModuleHostName = MODULE_SERVER_NAME + itoa(moduleID) + "-" + itoa(nicID) + "-" + itoa(moduleTypeID); - string ModuleDisableState = MODULE_DISABLE_STATE + itoa(moduleID) + "-" + itoa(moduleTypeID); - - sysConfig->setConfig(Section, ModuleIpAddr, UnassignedIpAddr); - sysConfig->setConfig(Section, ModuleHostName, UnassignedName); - sysConfig->setConfig(Section, ModuleDisableState, oam::ENABLEDSTATE); - } - } - } - - if ( moduletypeconfig.ModuleCount > 0 ) - { - DeviceNetworkList::iterator pt = moduletypeconfig.ModuleNetworkList.begin(); - - for ( ; pt != moduletypeconfig.ModuleNetworkList.end() ; pt++) - { - int ModuleID = atoi((*pt).DeviceName.substr(MAX_MODULE_TYPE_SIZE, MAX_MODULE_ID_SIZE).c_str()); - - string ModuleDisableState = MODULE_DISABLE_STATE + itoa(ModuleID) + "-" + itoa(moduleTypeID); - sysConfig->setConfig(Section, ModuleDisableState, (*pt).DisableState); - - HostConfigList::iterator pt1 = (*pt).hostConfigList.begin(); - - for ( ; pt1 != (*pt).hostConfigList.end() ; pt1++) - { - int nidID = (*pt1).NicID; - string ModuleIpAddr = MODULE_IP_ADDR + itoa(ModuleID) + "-" + itoa(nidID) + "-" + itoa(moduleTypeID); - sysConfig->setConfig(Section, ModuleIpAddr, (*pt1).IPAddr); - - string ModuleHostName = MODULE_SERVER_NAME + itoa(ModuleID) + "-" + itoa(nidID) + "-" + itoa(moduleTypeID); - sysConfig->setConfig(Section, ModuleHostName, (*pt1).HostName); - } - } - } - - DiskMonitorFileSystems::iterator pt = moduletypeconfig.FileSystems.begin(); - int id = 1; - - for ( ; pt != moduletypeconfig.FileSystems.end() ; pt++) - { - string ModuleDiskMonitorFS = MODULE_DISK_MONITOR_FS + itoa(id) + "-" + itoa(moduleTypeID); - sysConfig->setConfig(Section, ModuleDiskMonitorFS, *pt); - ++id; - } - - try - { - sysConfig->write(); - } - catch (...) - { - exceptionControl("getSystemConfig", API_FAILURE); - } - - return; - } - } - - // Module Not found - exceptionControl("getSystemConfig", API_INVALID_PARAMETER); -} - -/******************************************************************** - * - * set System Module Configuration Information by Module Name - * - ********************************************************************/ - -void Oam::setSystemConfig(const std::string module, ModuleConfig moduleconfig) -{ - Config* sysConfig100 = Config::makeConfig(CalpontConfigFile.c_str()); - - const string MODULE_TYPE = "ModuleType"; - const string Section = "SystemModuleConfig"; - const string MODULE_COUNT = "ModuleCount"; - const string MODULE_IP_ADDR = "ModuleIPAddr"; - const string MODULE_SERVER_NAME = "ModuleHostName"; - const string MODULE_DISABLE_STATE = "ModuleDisableState"; - const string MODULE_DBROOTID = "ModuleDBRootID"; - const string MODULE_DBROOT_COUNT = "ModuleDBRootCount"; - - string moduletype = module.substr(0, MAX_MODULE_TYPE_SIZE); - int moduleID = atoi(module.substr(MAX_MODULE_TYPE_SIZE, MAX_MODULE_ID_SIZE).c_str()); - - if ( moduleID < 1 ) - //invalid ID - exceptionControl("setSystemConfig", API_INVALID_PARAMETER); - - for (int moduleTypeID = 1; moduleTypeID < MAX_MODULE_TYPE + 1; moduleTypeID++) - { - string moduleType = MODULE_TYPE + itoa(moduleTypeID); - string ModuleCount = MODULE_COUNT + itoa(moduleTypeID); - - if ( sysConfig100->getConfig(Section, moduleType) == moduletype ) - { - string ModuleDisableState = MODULE_DISABLE_STATE + itoa(moduleID) + "-" + itoa(moduleTypeID); - sysConfig100->setConfig(Section, ModuleDisableState, moduleconfig.DisableState); - - HostConfigList::iterator pt1 = moduleconfig.hostConfigList.begin(); - - for ( ; pt1 != moduleconfig.hostConfigList.end() ; pt1++) - { - string ModuleIpAddr = MODULE_IP_ADDR + itoa(moduleID) + "-" + itoa((*pt1).NicID) + "-" + itoa(moduleTypeID); - sysConfig100->setConfig(Section, ModuleIpAddr, (*pt1).IPAddr); - - string ModuleHostName = MODULE_SERVER_NAME + itoa(moduleID) + "-" + itoa((*pt1).NicID) + "-" + itoa(moduleTypeID); - sysConfig100->setConfig(Section, ModuleHostName, (*pt1).HostName); - } - - int id = 1; - - if ( moduleconfig.dbrootConfigList.size() == 0 ) - { - string ModuleDBrootID = MODULE_DBROOTID + itoa(moduleID) + "-" + itoa((id)) + "-" + itoa(moduleTypeID); - sysConfig100->setConfig(Section, ModuleDBrootID, oam::UnassignedName); - } - else - { - DBRootConfigList::iterator pt2 = moduleconfig.dbrootConfigList.begin(); - - for ( ; pt2 != moduleconfig.dbrootConfigList.end() ; pt2++, id++) - { - string ModuleDBrootID = MODULE_DBROOTID + itoa(moduleID) + "-" + itoa((id)) + "-" + itoa(moduleTypeID); - sysConfig100->setConfig(Section, ModuleDBrootID, itoa((*pt2))); - } - } - - //set entries no longer configured to unsassigned - for ( int extraid = id ; id < MAX_DBROOT ; extraid++ ) - { - string ModuleDBrootID = MODULE_DBROOTID + itoa(moduleID) + "-" + itoa((extraid)) + "-" + itoa(moduleTypeID); - - if ( sysConfig100->getConfig(Section, ModuleDBrootID).empty() || - sysConfig100->getConfig(Section, ModuleDBrootID) == oam::UnassignedName ) - break; - - sysConfig100->setConfig(Section, ModuleDBrootID, oam::UnassignedName); - } - - string ModuleDBRootCount = MODULE_DBROOT_COUNT + itoa(moduleID) + "-" + itoa(moduleTypeID); - sysConfig100->setConfig(Section, ModuleDBRootCount, itoa(moduleconfig.dbrootConfigList.size())); - - try - { - sysConfig100->write(); - } - catch (...) - { - exceptionControl("setSystemConfig", API_FAILURE); - } - - return; - } - } - - // Module Not found - exceptionControl("setSystemConfig", API_INVALID_PARAMETER); -} - - -/******************************************************************** - * - * add Module - * - ********************************************************************/ - -void Oam::addModule(DeviceNetworkList devicenetworklist, const std::string password, const std::string mysqlpw, - bool storeHostnames) -{ - // build and send msg - int returnStatus = sendAddModuleToProcMgr(ADDMODULE, devicenetworklist, FORCEFUL, ACK_YES, storeHostnames, - password, mysqlpw); - - if (returnStatus != API_SUCCESS) - exceptionControl("addModule", returnStatus); - -} - -/******************************************************************** - * - * remove Module - * - ********************************************************************/ - -void Oam::removeModule(DeviceNetworkList devicenetworklist) -{ - DeviceNetworkList::iterator pt = devicenetworklist.begin(); - - for ( ; pt != devicenetworklist.end() ; pt++) - { - // validate Module name - int returnStatus = validateModule((*pt).DeviceName); - - if (returnStatus != API_SUCCESS) - exceptionControl("removeModule", returnStatus); - } - - // build and send msg - int returnStatus = sendMsgToProcMgr2(REMOVEMODULE, devicenetworklist, FORCEFUL, ACK_YES); - - if (returnStatus != API_SUCCESS) - exceptionControl("removeModule", returnStatus); -} - -/******************************************************************** - * - * reconfigure Module - * - ********************************************************************/ - -void Oam::reconfigureModule(DeviceNetworkList devicenetworklist) -{ - DeviceNetworkList::iterator pt = devicenetworklist.begin(); - // validate Module name - int returnStatus = validateModule((*pt).DeviceName); - - if (returnStatus != API_SUCCESS) - exceptionControl("reconfigureModule", returnStatus); - - // build and send msg - returnStatus = sendMsgToProcMgr2(RECONFIGUREMODULE, devicenetworklist, FORCEFUL, ACK_YES); - - if (returnStatus != API_SUCCESS) - exceptionControl("reconfigureModule", returnStatus); -} - - -/******************************************************************** - * - * get System Status Information - * - ********************************************************************/ - -void Oam::getSystemStatus(SystemStatus& systemstatus, bool systemStatusOnly) -{ - if (!checkSystemRunning()) - return; - -#ifdef _MSC_VER - // TODO: Remove when we create OAM for Windows - return; -#endif - ModuleStatus modulestatus; - systemstatus.systemmodulestatus.modulestatus.clear(); - ExtDeviceStatus extdevicestatus; - systemstatus.systemextdevicestatus.extdevicestatus.clear(); - NICStatus nicstatus; - systemstatus.systemnicstatus.nicstatus.clear(); - DbrootStatus dbrootstatus; - systemstatus.systemdbrootstatus.dbrootstatus.clear(); - - for ( int i = 0 ; i < 2 ; i ++) - { - try - { - MessageQueueClient processor("ProcStatusControl"); - // processor.syncProto(false); - ByteStream::byte ModuleNumber; - ByteStream::byte ExtDeviceNumber; - ByteStream::byte dbrootNumber; - ByteStream::byte NICNumber; - ByteStream::byte state; - std::string name; - std::string date; - ByteStream obs, ibs; - - obs << (ByteStream::byte) GET_SYSTEM_STATUS; - - if ( systemStatusOnly ) - obs << (ByteStream::byte) 1; - else - obs << (ByteStream::byte) 2; - - try - { - struct timespec ts = { 3, 0 }; - processor.write(obs, &ts); - } - catch (exception& e) - { - processor.shutdown(); - string error = e.what(); - //writeLog("getSystemStatus: write exception: " + error, LOG_TYPE_ERROR); - exceptionControl("getSystemStatus write", API_FAILURE); - } - catch (...) - { - processor.shutdown(); - //writeLog("getSystemStatus: write exception: unknown", LOG_TYPE_ERROR); - exceptionControl("getSystemStatus write", API_FAILURE); - } - - // wait 30 seconds for ACK from Process Monitor - try - { - struct timespec ts = { 30, 0 }; - ibs = processor.read(&ts); - } - catch (exception& e) - { - processor.shutdown(); - string error = e.what(); - //writeLog("getSystemStatus: read exception: " + error, LOG_TYPE_ERROR); - exceptionControl("getSystemStatus read", API_FAILURE); - } - catch (...) - { - processor.shutdown(); - //writeLog("getSystemStatus: read exception: unknown", LOG_TYPE_ERROR); - exceptionControl("getSystemStatus read", API_FAILURE); - } - - if (ibs.length() > 0) - { - if ( systemStatusOnly ) - { - ibs >> name; - ibs >> state; - ibs >> date; - - if ( name.find("system") != string::npos ) - { - systemstatus.SystemOpState = state; - systemstatus.StateChangeDate = date; - } - } - else - { - ibs >> ModuleNumber; - - for ( int i = 0 ; i < ModuleNumber ; ++i) - { - ibs >> name; - ibs >> state; - ibs >> date; - - if ( name.find("system") != string::npos ) - { - systemstatus.SystemOpState = state; - systemstatus.StateChangeDate = date; - } - else - { - modulestatus.Module = name; - modulestatus.ModuleOpState = state; - modulestatus.StateChangeDate = date; - systemstatus.systemmodulestatus.modulestatus.push_back(modulestatus); - } - } - - ibs >> ExtDeviceNumber; - - for ( int i = 0 ; i < ExtDeviceNumber ; ++i) - { - ibs >> name; - ibs >> state; - ibs >> date; - extdevicestatus.Name = name; - extdevicestatus.OpState = state; - extdevicestatus.StateChangeDate = date; - systemstatus.systemextdevicestatus.extdevicestatus.push_back(extdevicestatus); - } - - ibs >> NICNumber; - - for ( int i = 0 ; i < NICNumber ; ++i) - { - ibs >> name; - ibs >> state; - ibs >> date; - nicstatus.HostName = name; - nicstatus.NICOpState = state; - nicstatus.StateChangeDate = date; - systemstatus.systemnicstatus.nicstatus.push_back(nicstatus); - } - - ibs >> dbrootNumber; - - for ( int i = 0 ; i < dbrootNumber ; ++i) - { - ibs >> name; - ibs >> state; - ibs >> date; - dbrootstatus.Name = name; - dbrootstatus.OpState = state; - dbrootstatus.StateChangeDate = date; - systemstatus.systemdbrootstatus.dbrootstatus.push_back(dbrootstatus); - } - } - - processor.shutdown(); - return; - } - else - { - //writeLog("getSystemStatus: ProcStatusControl returns 0 length", LOG_TYPE_ERROR); - } - - // timeout ocurred, shutdown connection - processor.shutdown(); - //writeLog("getSystemStatus: read 0 length", LOG_TYPE_ERROR); - exceptionControl("getSystemStatus read 0", API_FAILURE); - } - catch (exception& e) - { - string error = e.what(); - //writeLog("getSystemStatus: final exception: " + error, LOG_TYPE_ERROR); - } - catch (...) - { - //writeLog("getSystemStatus: final exception: unknown", LOG_TYPE_ERROR); - } - } - - exceptionControl("getSystemStatus:MessageQueueClient-Error", API_FAILURE); -} - -/******************************************************************** - * - * set System Status information - * - ********************************************************************/ - -void Oam::setSystemStatus(const int state) -{ - //send and wait for ack and resend if not received - //retry 3 time max - for ( int i = 0; i < 3 ; i++) - { - try - { - ByteStream obs; - obs << (ByteStream::byte) SET_SYSTEM_STATUS; - obs << (ByteStream::byte) state; - - sendStatusUpdate(obs, SET_SYSTEM_STATUS); - return; - } - catch (...) - {} - } - - exceptionControl("setSystemStatus", API_FAILURE); -} - -/******************************************************************** - * - * get Module Status information - * - ********************************************************************/ - -void Oam::getModuleStatus(const std::string name, int& state, bool& degraded) -{ - SystemStatus systemstatus; - ModuleConfig moduleconfig; - std::vector NICstates; - degraded = false; - state = oam::UNEQUIP; - - try - { - getSystemStatus(systemstatus, false); - - for ( unsigned int i = 0 ; i < systemstatus.systemmodulestatus.modulestatus.size(); i++) - { - if ( systemstatus.systemmodulestatus.modulestatus[i].Module == name ) - { - state = systemstatus.systemmodulestatus.modulestatus[i].ModuleOpState; - - // get NIC status for degraded state info - try - { - getSystemConfig(name, moduleconfig); - - HostConfigList::iterator pt1 = moduleconfig.hostConfigList.begin(); - - for ( ; pt1 != moduleconfig.hostConfigList.end() ; pt1++) - { - try - { - int state; - getNICStatus((*pt1).HostName, state); - NICstates.push_back(state); - } - catch (exception& e) - { - Oam oam; - ostringstream os; - os << "Oam::getModuleStatus exception while getNICStatus " << (*pt1).HostName << " " << e.what(); - //oam.writeLog(os.str(), logging::LOG_TYPE_ERROR); - } - catch (...) - { - Oam oam; - ostringstream os; - os << "Oam::getModuleStatus exception while getNICStatus " << (*pt1).HostName; - //oam.writeLog(os.str(), logging::LOG_TYPE_ERROR); - } - } - - vector::iterator pt = NICstates.begin(); - - for ( ; pt != NICstates.end() ; pt++) - { - if ( (*pt) == oam::DOWN ) - { - degraded = true; - break; - } - } - - return; - } - catch (exception& e) - { - Oam oam; - ostringstream os; - os << "Oam::getModuleStatus exception while getSystemConfig " << name << " " << e.what(); - //oam.writeLog(os.str(), logging::LOG_TYPE_ERROR); - } - catch (...) - { - Oam oam; - ostringstream os; - os << "Oam::getModuleStatus exception while getSystemConfig " << name; - //oam.writeLog(os.str(), logging::LOG_TYPE_ERROR); - } - } - } - } - catch (exception& e) - { - Oam oam; - ostringstream os; - os << "Oam::getModuleStatus exception while getSystemStatus " << e.what(); - //oam.writeLog(os.str(), logging::LOG_TYPE_ERROR); - } - catch (...) - { - Oam oam; - ostringstream os; - os << "Oam::getModuleStatus exception while getSystemStatus"; - //oam.writeLog(os.str(), logging::LOG_TYPE_ERROR); - } - - // no match found - exceptionControl("getModuleStatus", API_INVALID_PARAMETER); -} - -/******************************************************************** - * - * set Module Status information - * - ********************************************************************/ - -void Oam::setModuleStatus(const std::string name, const int state) -{ - //send and wait for ack and resend if not received - //retry 3 time max - for ( int i = 0; i < 3 ; i++) - { - try - { - ByteStream obs; - - obs << (ByteStream::byte) SET_MODULE_STATUS; - obs << name; - obs << (ByteStream::byte) state; - - sendStatusUpdate(obs, SET_MODULE_STATUS); - return; - } - catch (...) - {} - } - - exceptionControl("setModuleStatus", API_FAILURE); -} - - -/******************************************************************** - * - * get External Device Status information - * - ********************************************************************/ - -void Oam::getExtDeviceStatus(const std::string name, int& state) -{ - SystemStatus systemstatus; - - try - { - getSystemStatus(systemstatus, false); - - for ( unsigned int i = 0 ; i < systemstatus.systemextdevicestatus.extdevicestatus.size(); i++) - { - if ( systemstatus.systemextdevicestatus.extdevicestatus[i].Name == name ) - { - state = systemstatus.systemextdevicestatus.extdevicestatus[i].OpState; - return; - } - } - } - catch (exception&) - { - exceptionControl("getExtDeviceStatus", API_FAILURE); - } - - // no match found - exceptionControl("getExtDeviceStatus", API_INVALID_PARAMETER); -} - -/******************************************************************** - * - * set External Device Status information - * - ********************************************************************/ - -void Oam::setExtDeviceStatus(const std::string name, const int state) -{ - //send and wait for ack and resend if not received - //retry 3 time max - for ( int i = 0; i < 3 ; i++) - { - try - { - ByteStream obs; - - obs << (ByteStream::byte) SET_EXT_DEVICE_STATUS; - obs << name; - obs << (ByteStream::byte) state; - - sendStatusUpdate(obs, SET_EXT_DEVICE_STATUS); - return; - } - catch (...) - {} - } - - exceptionControl("setExtDeviceStatus", API_FAILURE); -} - -/******************************************************************** - * - * get DBroot Status information - * - ********************************************************************/ - -void Oam::getDbrootStatus(const std::string name, int& state) -{ - SystemStatus systemstatus; - - try - { - getSystemStatus(systemstatus, false); - - for ( unsigned int i = 0 ; i < systemstatus.systemdbrootstatus.dbrootstatus.size(); i++) - { - if ( systemstatus.systemdbrootstatus.dbrootstatus[i].Name == name ) - { - state = systemstatus.systemdbrootstatus.dbrootstatus[i].OpState; - return; - } - } - } - catch (exception&) - { - exceptionControl("getDbrootStatus", API_FAILURE); - } - - // no match found - exceptionControl("getDbrootStatus", API_INVALID_PARAMETER); -} - -/******************************************************************** - * - * set DBroot Status information - * - ********************************************************************/ - -void Oam::setDbrootStatus(const std::string name, const int state) -{ - //send and wait for ack and resend if not received - //retry 3 time max - for ( int i = 0; i < 3 ; i++) - { - try - { - ByteStream obs; - - obs << (ByteStream::byte) SET_DBROOT_STATUS; - obs << name; - obs << (ByteStream::byte) state; - - sendStatusUpdate(obs, SET_DBROOT_STATUS); - return; - } - catch (...) - {} - } - - exceptionControl("setDbrootStatus", API_FAILURE); -} - -/******************************************************************** - * - * get NIC Status information - * - ********************************************************************/ - -void Oam::getNICStatus(const std::string name, int& state) -{ - SystemStatus systemstatus; - - try - { - getSystemStatus(systemstatus, false); - - for ( unsigned int i = 0 ; i < systemstatus.systemnicstatus.nicstatus.size(); i++) - { - if ( systemstatus.systemnicstatus.nicstatus[i].HostName == name ) - { - state = systemstatus.systemnicstatus.nicstatus[i].NICOpState; - return; - } - } - } - catch (exception& e) - { - Oam oam; - ostringstream os; - os << "Oam::getNICStatus exception while getSystemStatus for " << name << " " << e.what(); - //oam.writeLog(os.str(), logging::LOG_TYPE_ERROR); - exceptionControl("getNICStatus", API_FAILURE); - } - - // no match found - exceptionControl("getNICStatus", API_INVALID_PARAMETER); -} - -/******************************************************************** - * - * set NIC Status information - * - ********************************************************************/ - -void Oam::setNICStatus(const std::string name, const int state) -{ - //send and wait for ack and resend if not received - //retry 3 time max - for ( int i = 0; i < 3 ; i++) - { - try - { - ByteStream obs; - - obs << (ByteStream::byte) SET_NIC_STATUS; - obs << name; - obs << (ByteStream::byte) state; - - sendStatusUpdate(obs, SET_NIC_STATUS); - return; - } - catch (...) - {} - } - - exceptionControl("setNICStatus", API_FAILURE); -} - -/******************************************************************** - * - * get Process Configuration Information - * - ********************************************************************/ - -void Oam::getProcessConfig(const std::string process, const std::string module, ProcessConfig& processconfig) -{ - - Config* proConfig = Config::makeConfig(ProcessConfigFile.c_str()); - const string SECTION_NAME = "PROCESSCONFIG"; - const string ARG_NAME = "ProcessArg"; - string argName; - const string DEP_NAME = "DepProcessName"; - const string DEP_MDLNAME = "DepModuleName"; - string depName; - string depMdlName; - string moduleType = module.substr(0, MAX_MODULE_TYPE_SIZE); - - for (int processID = 1; processID < MAX_PROCESS + 1; processID++) - { - string sectionName = SECTION_NAME + itoa(processID); - - if ( proConfig->getConfig(sectionName, "ProcessName") == process ) - { - string ModuleType = proConfig->getConfig(sectionName, "ModuleType"); - - if ( ModuleType == "ParentOAMModule" - || ModuleType == "ChildExtOAMModule" - || ( ModuleType == "ChildOAMModule" && moduleType != "xm" ) - || ModuleType == moduleType) - { - // get string variables - processconfig.ProcessName = process; - processconfig.ModuleType = ModuleType; - - processconfig.ProcessLocation = proConfig->getConfig(sectionName, "ProcessLocation"); - processconfig.LogFile = proConfig->getConfig(sectionName, "LogFile");; - - // get Integer variables - processconfig.BootLaunch = strtol(proConfig->getConfig(sectionName, "BootLaunch").c_str(), 0, 0); - processconfig.LaunchID = strtol(proConfig->getConfig(sectionName, "LaunchID").c_str(), 0, 0);; - - // get Auguments - for (int argID = 0; argID < MAX_ARGUMENTS; argID++) - { - argName = ARG_NAME + itoa(argID + 1); - processconfig.ProcessArgs[argID] = proConfig->getConfig(sectionName, argName); - } - - // get process dependencies - for (int depID = 0; depID < MAX_DEPENDANCY; depID++) - { - depName = DEP_NAME + itoa(depID + 1); - processconfig.DepProcessName[depID] = proConfig->getConfig(sectionName, depName); - } - - // get dependent process Module name - for (int moduleID = 0; moduleID < MAX_DEPENDANCY; moduleID++) - { - depMdlName = DEP_MDLNAME + itoa(moduleID + 1); - processconfig.DepModuleName[moduleID] = proConfig->getConfig(sectionName, depMdlName); - } - - // get optional group id and type - try - { - processconfig.RunType = proConfig->getConfig(sectionName, "RunType"); - } - catch (...) - { - processconfig.RunType = "LOADSHARE"; - } - - return; - } - } - } - - // Process Not found - exceptionControl("getProcessConfig", API_INVALID_PARAMETER); -} - -/******************************************************************** - * - * get System Process Configuration Information - * - ********************************************************************/ - -void Oam::getProcessConfig(SystemProcessConfig& systemprocessconfig) -{ - - const string SECTION_NAME = "PROCESSCONFIG"; - systemprocessconfig.processconfig.clear(); - - Config* proConfig = Config::makeConfig(ProcessConfigFile.c_str()); - Config *csConfig = Config::makeConfig(); - string strStorageManagerEnabled = csConfig->getConfig("StorageManager", "Enabled"); - bool storageManagerEnabled = !strStorageManagerEnabled.empty() && (strStorageManagerEnabled[0] == 'Y' || - strStorageManagerEnabled[0] == 'y' || strStorageManagerEnabled[0] == 'T' || strStorageManagerEnabled[0] == 't'); - - for (int processID = 1; processID < MAX_PROCESS + 1; processID++) - { - ProcessConfig processconfig; - - // get process info - - string sectionName = SECTION_NAME + itoa(processID); - - Oam::getProcessConfig(proConfig->getConfig(sectionName, "ProcessName"), - proConfig->getConfig(sectionName, "ModuleType"), - processconfig ); - - // hide StorageManager from everything else if it is disabled - if (processconfig.ProcessName.empty() || - (!storageManagerEnabled && processconfig.ProcessName == "StorageManager")) - continue; - - systemprocessconfig.processconfig.push_back(processconfig); - } -} - -/******************************************************************** - * - * get Process Configuration String Parameter value - * - ********************************************************************/ - -void Oam::getProcessConfig(const std::string process, const std::string module, - const std::string name, std::string& value) -{ - - Config* proConfig = Config::makeConfig(ProcessConfigFile.c_str()); - const string SECTION_NAME = "PROCESSCONFIG"; - string moduleType = module.substr(0, MAX_MODULE_TYPE_SIZE); - - for (int processID = 1; processID < MAX_PROCESS + 1; processID++) - { - string sectionName = SECTION_NAME + itoa(processID); - - if ( proConfig->getConfig(sectionName, "ProcessName") == process ) - { - string ModuleType = proConfig->getConfig(sectionName, "ModuleType"); - - if ( ModuleType == "ParentOAMModule" - || ModuleType == "ChildExtOAMModule" - || ( ModuleType == "ChildOAMModule" && moduleType != "xm" ) - || ModuleType == moduleType) - { - // get string variables - - value = proConfig->getConfig(sectionName, name); - - if (value.empty()) - { - exceptionControl("getProcessConfig", API_INVALID_PARAMETER); - } - - return; - } - } - } - - // Process Not found - - exceptionControl("getProcessConfig", API_INVALID_PARAMETER); -} - -/******************************************************************** - * - * get Process Configuration Integer Parameter value - * - ********************************************************************/ - -void Oam::getProcessConfig(const std::string process, const std::string module, - const std::string name, int& value) -{ - string returnValue; - - Oam::getProcessConfig(process, module, name, returnValue); - - value = atoi(returnValue.c_str()); -} - -/******************************************************************** - * - * set Process Configuration String Parameter value - * - ********************************************************************/ - -void Oam::setProcessConfig(const std::string process, const std::string module, - const std::string name, const std::string value) -{ - - Config* proConfig = Config::makeConfig(ProcessConfigFile.c_str()); - const string SECTION_NAME = "PROCESSCONFIG"; - string returnValue; - string moduleType = module.substr(0, MAX_MODULE_TYPE_SIZE); - - for (int processID = 1; processID < MAX_PROCESS + 1; processID++) - { - string sectionName = SECTION_NAME + itoa(processID); - - if ( proConfig->getConfig(sectionName, "ProcessName") == process ) - { - string ModuleType = proConfig->getConfig(sectionName, "ModuleType"); - - if ( ModuleType == "ParentOAMModule" - || ModuleType == "ChildExtOAMModule" - || ( ModuleType == "ChildOAMModule" && moduleType != "xm" ) - || ModuleType == moduleType) - { - // check if parameter exist - - Oam::getProcessConfig(process, module, name, returnValue); - - // Set string variables - proConfig->setConfig(sectionName, name, value); - - try - { - proConfig->write(); - } - catch (...) - { - exceptionControl("setProcessConfig", API_FAILURE); - } - - // build and send msg to inform Proc-Mgt that Configuration is updated - // don't care if fails, sincet his can be called with Proc-Mgr enable - sendMsgToProcMgr(UPDATECONFIG, "", FORCEFUL, ACK_NO); - return; - } - } - } - - // Process Not found - - exceptionControl("setProcessConfig", API_INVALID_PARAMETER); -} - -/******************************************************************** - * - * set Process Configuration Interger Parameter value - * - ********************************************************************/ - -void Oam::setProcessConfig(const std::string process, const std::string module, - const std::string name, const int value) -{ - string valueString; - - // convert Incoming Interger Parameter value to String - - valueString = itoa(value); - - // write parameter to disk - - Oam::setProcessConfig(process, module, name, valueString); -} - -/******************************************************************** - * - * System Process Status information from the Process status file. - * - ********************************************************************/ - -void Oam::getProcessStatus(SystemProcessStatus& systemprocessstatus, string port) -{ - if (!checkSystemRunning()) - exceptionControl("getProcessStatus", API_FAILURE); - - ProcessStatus processstatus; - systemprocessstatus.processstatus.clear(); - - try - { - MessageQueueClient processor(port); -// processor.syncProto(false); - - ByteStream::quadbyte processNumber; - ByteStream::byte state; - ByteStream::quadbyte PID; - std::string changeDate; - std::string processName; - std::string moduleName; - ByteStream obs, ibs; - - obs << (ByteStream::byte) GET_ALL_PROC_STATUS; - - try - { - struct timespec ts = { 5, 0 }; - processor.write(obs, &ts); - } - catch (std::exception& ex) - { - exceptionControl("getProcessStatus:write", API_FAILURE); - } - catch (...) - { - exceptionControl("getProcessStatus:write", API_TIMEOUT); - } - - // wait 10 seconds for ACK from Process Monitor - struct timespec ts = { 30, 0 }; - - try - { - ibs = processor.read(&ts); - } - catch (std::exception& ex) - { - exceptionControl("getProcessStatus:read", API_FAILURE); - } - catch (...) - { - exceptionControl("getProcessStatus:read", API_TIMEOUT); - } - - if (ibs.length() > 0) - { - ibs >> processNumber; - - for ( unsigned i = 0 ; i < processNumber ; ++i) - { - ibs >> processName; - ibs >> moduleName; - ibs >> state; - ibs >> PID; - ibs >> changeDate; - - processstatus.ProcessName = processName; - processstatus.Module = moduleName; - processstatus.ProcessOpState = state; - processstatus.ProcessID = PID; - processstatus.StateChangeDate = changeDate; - - systemprocessstatus.processstatus.push_back(processstatus); - } - - processor.shutdown(); - return; - } - - // timeout occurred, shutdown connection - processor.shutdown(); - } - catch (std::exception& ex) - { - exceptionControl("getProcessStatus:MessageQueueClient", API_FAILURE, ex.what()); - } - catch (...) - { - exceptionControl("getProcessStatus:MessageQueueClient", API_FAILURE); - } - - exceptionControl("getProcessStatus", API_TIMEOUT); -} - -/******************************************************************** - * - * get Process information from the Process Status file. - * - ********************************************************************/ - -void Oam::getProcessStatus(const std::string process, const std::string module, ProcessStatus& processstatus) -{ -#ifdef _MSC_VER - // TODO: Remove when we create OAM for Windows - return; -#endif - - if (!checkSystemRunning()) - exceptionControl("getProcessStatus", API_FAILURE); - - for ( int i = 0 ; i < 5 ; i ++) - { - try - { - MessageQueueClient processor("ProcStatusControl"); - // processor.syncProto(false); - ByteStream::byte status, state; - ByteStream::quadbyte PID; - std::string changeDate; - ByteStream obs, ibs; - - obs << (ByteStream::byte) GET_PROC_STATUS; - obs << module; - obs << process; - - try - { - struct timespec ts = { 5, 0 }; - processor.write(obs, &ts); - } - catch (std::exception& ex) - { - processor.shutdown(); - exceptionControl("getProcessStatus:write", API_FAILURE, ex.what()); - } - catch (...) - { - processor.shutdown(); - exceptionControl("getProcessStatus:write", API_TIMEOUT); - } - - // wait 10 seconds for ACK from Process Monitor - struct timespec ts = { 15, 0 }; - - try - { - ibs = processor.read(&ts); - } - catch (std::exception& ex) - { - processor.shutdown(); - exceptionControl("getProcessStatus:read", API_FAILURE, ex.what()); - } - catch (...) - { - processor.shutdown(); - exceptionControl("getProcessStatus:read", API_TIMEOUT); - } - - if (ibs.length() > 0) - { - ibs >> status; - - if ( status == oam::API_SUCCESS ) - { - ibs >> state; - ibs >> PID; - ibs >> changeDate; - } - else - { - // shutdown connection - processor.shutdown(); - exceptionControl("getProcessStatus:status", API_FAILURE); - } - - processstatus.ProcessName = process; - processstatus.Module = module; - processstatus.ProcessOpState = state; - processstatus.ProcessID = PID; - processstatus.StateChangeDate = changeDate; - - processor.shutdown(); - return; - } - - // timeout occurred, shutdown connection - processor.shutdown(); - exceptionControl("getProcessStatus:status", API_TIMEOUT); - } - catch (...) - {} - } - - exceptionControl("getProcessStatus:MessageQueueClient-Error", API_FAILURE); - -} - - -/******************************************************************** - * - * set Process Status String Parameter from the Process Status file. - * - ********************************************************************/ - -void Oam::setProcessStatus(const std::string process, const std::string module, const int state, pid_t PID) -{ - if (!checkSystemRunning()) - exceptionControl("setProcessStatus", API_FAILURE); - - //send and wait for ack and resend if not received - //retry 5 time max - for ( int i = 0; i < 5 ; i++) - { - try - { - ByteStream obs; - - obs << (ByteStream::byte) SET_PROC_STATUS; - obs << module; - obs << process; - obs << (ByteStream::byte) state; - obs << (ByteStream::quadbyte) PID; - - sendStatusUpdate(obs, SET_PROC_STATUS); - return; - } - catch (...) - {} - -#ifdef _MSC_VER - Sleep(1 * 1000); -#else - sleep(1); -#endif - } - - exceptionControl("setProcessStatus", API_TIMEOUT); -} - -/******************************************************************** - * - * Process Initization Successful Completed, Mark Process ACTIVE - * - ********************************************************************/ - -void Oam::processInitComplete(std::string processName, int state) -{ - //This method takes too long on Windows and doesn't do anything there anyway... - // Disable legacy OAM - return; - // get current Module name - string moduleName; - oamModuleInfo_t st; - - try - { - st = getModuleInfo(); - moduleName = boost::get<0>(st); - } - catch (...) - { - //system("touch /var/log/mariadb/columnstore/test2"); - } - - for ( int i = 0 ; i < 5 ; i++) - { - //set process - try - { - setProcessStatus(processName, moduleName, state, getpid()); - - //verify it's set - try - { - ProcessStatus procstat; - getProcessStatus(processName, moduleName, procstat); - - if ( procstat.ProcessOpState == state) - return; - } - catch (...) - {} - } - catch (...) - { - //system("touch /var/log/mariadb/columnstore/test3"); - } - - sleep(1); - } - - writeLog("processInitComplete: Status update failed", LOG_TYPE_ERROR ); - exceptionControl("processInitComplete", API_FAILURE); -} - -/******************************************************************** - * - * Process Initization Failed, Mark Process FAILED - * - ********************************************************************/ - -void Oam::processInitFailure() -{ - // get current process name - string processName; - myProcessStatus_t t; - - try - { - t = getMyProcessStatus(); - processName = boost::get<1>(t); - } - catch (...) - { - exceptionControl("processInitFailure", API_FAILURE); - } - - // get current Module name - string moduleName; - oamModuleInfo_t st; - - try - { - st = getModuleInfo(); - moduleName = boost::get<0>(st); - } - catch (...) - { - exceptionControl("processInitFailure", API_FAILURE); - } - - //set process to FAILED - try - { - setProcessStatus(processName, moduleName, FAILED, 0); - } - catch (...) - { - exceptionControl("processInitFailure", API_FAILURE); - } - - //set MODULE to FAILED - try - { - setModuleStatus(moduleName, FAILED); - } - catch (...) - { - exceptionControl("processInitFailure", API_FAILURE); - } -} - -/******************************************************************** - * - * get Alarm Configuration Information by Alarm ID - * - ********************************************************************/ - -void Oam::getAlarmConfig(const int alarmid, AlarmConfig& alarmconfig) -{ - - Config* alaConfig = Config::makeConfig(AlarmConfigFile.c_str()); - string temp; - string Section = "AlarmConfig"; - - // validate Alarm ID - - if ( alarmid > MAX_ALARM_ID ) - exceptionControl("getAlarmConfig", API_INVALID_PARAMETER); - - // convert Alarm ID to ASCII - - Section.append(itoa(alarmid)); - - // get string variables - - temp = alaConfig->getConfig(Section, "AlarmID"); - - if ( temp.empty()) - { - exceptionControl("getAlarmConfig", API_INVALID_PARAMETER); - } - - alarmconfig.BriefDesc = alaConfig->getConfig(Section, "BriefDesc"); - alarmconfig.DetailedDesc = alaConfig->getConfig(Section, "DetailedDesc"); - - // get numberic variables - - alarmconfig.AlarmID = strtol(alaConfig->getConfig(Section, "alarmid").c_str(), 0, 0); - alarmconfig.Severity = strtol(alaConfig->getConfig(Section, "Severity").c_str(), 0, 0); - alarmconfig.Threshold = strtol(alaConfig->getConfig(Section, "Threshold").c_str(), 0, 0); - alarmconfig.Occurrences = strtol(alaConfig->getConfig(Section, "Occurrences").c_str(), 0, 0); - alarmconfig.LastIssueTime = strtol(alaConfig->getConfig(Section, "LastIssueTime").c_str(), 0, 0); - -} - -/******************************************************************** - * - * get Alarm Configuration String Parameter value - * - ********************************************************************/ - -void Oam::getAlarmConfig(const int alarmid, const std::string name, std::string& value) -{ - - Config* alaConfig = Config::makeConfig(AlarmConfigFile.c_str()); - string Section = "AlarmConfig"; - - // validate Alarm ID - - if ( alarmid > MAX_ALARM_ID ) - exceptionControl("getSystemConfig", API_INVALID_PARAMETER); - - // convert Alarm ID to ASCII - - Section.append(itoa(alarmid)); - - // get string variables - - value = alaConfig->getConfig(Section, name); - - if (value.empty()) - { - exceptionControl("getSystemConfig", API_INVALID_PARAMETER); - } -} - -/******************************************************************** - * - * get Alarm Configuration Integer Parameter value - * - ********************************************************************/ - -void Oam::getAlarmConfig(const int alarmid, const std::string name, int& value) -{ - string returnValue; - - // get string variables - - Oam::getAlarmConfig(alarmid, name, returnValue); - - value = atoi(returnValue.c_str()); -} - -/******************************************************************** - * - * set Alarm Configuration String Parameter value by Alarm ID - * - ********************************************************************/ - -void Oam::setAlarmConfig(const int alarmid, const std::string name, const std::string value) -{ - string Section = "AlarmConfig"; - int returnValue; - - struct flock fl; - int fd; - - // validate Alarm ID - - if ( alarmid > MAX_ALARM_ID ) - exceptionControl("setAlarmConfig", API_INVALID_PARAMETER); - - // convert Alarm ID to ASCII - - Section.append(itoa(alarmid)); - - // check if parameter exist - - Oam::getAlarmConfig(alarmid, name, returnValue); - - // only allow user to change these levels - if ( name != "Threshold" && - name != "Occurrences" && - name != "LastIssueTime" ) - exceptionControl("setAlarmConfig", API_READONLY_PARAMETER); - - string fileName = AlarmConfigFile; - - memset(&fl, 0, sizeof(fl)); - fl.l_type = F_RDLCK; // read lock - fl.l_whence = SEEK_SET; - fl.l_start = 0; - fl.l_len = 0; //lock whole file - - // open config file - if ((fd = open(fileName.c_str(), O_RDWR)) >= 0) - { - // lock file - if (fcntl(fd, F_SETLKW, &fl) != 0) - { - ostringstream oss; - oss << "Oam::setAlarmConfig: error locking file " << - fileName << - ": " << - strerror(errno) << - ", proceding anyway."; - cerr << oss.str() << endl; - } - - // write parameter to disk - - Config* alaConfig = Config::makeConfig(AlarmConfigFile.c_str()); - alaConfig->setConfig(Section, name, value); - - try - { - alaConfig->write(); - } - catch (...) - {} - - fl.l_type = F_UNLCK; //unlock - fcntl(fd, F_SETLK, &fl); - - close(fd); - } - else - { - ostringstream oss; - oss << "Oam::setAlarmConfig: error opening file " << - fileName << - ": " << - strerror(errno); - throw runtime_error(oss.str()); - } - - return; -} - -/******************************************************************** - * - * set Alarm Configuration Interger Parameter value by Alarm ID - * - ********************************************************************/ - -void Oam::setAlarmConfig(const int alarmid, const std::string name, const int value) -{ - string Section = "AlarmConfig"; - string valueString; - - // convert Incoming Interger Parameter value to String - - valueString = itoa(value); - - // write parameter to disk - - Oam::setAlarmConfig(alarmid, name, valueString); -} - -/******************************************************************** - * - * get Active Alarm List - * - ********************************************************************/ - -void Oam::getActiveAlarms(AlarmList& activeAlarm) -{ - // check if on Active OAM Parent - bool OAMParentModuleFlag; - oamModuleInfo_t st; - - try - { - st = getModuleInfo(); - OAMParentModuleFlag = boost::get<4>(st); - - if (OAMParentModuleFlag) - { - //call getAlarm API directly - ALARMManager sm; - sm.getActiveAlarm(activeAlarm); - return; - } - } - catch (...) - { - exceptionControl("getActiveAlarms", API_FAILURE); - } - - int returnStatus = API_SUCCESS; - - if (UseHdfs > 0) - { - // read from HDFS files - returnStatus = readHdfsActiveAlarms(activeAlarm); - } - else - { - // build and send msg - returnStatus = sendMsgToProcMgr3(GETACTIVEALARMDATA, activeAlarm, ""); - } - - if (returnStatus != API_SUCCESS) - exceptionControl("getActiveAlarms", returnStatus); -} - -/******************************************************************** - * - * get Historical Alarm List - * - ********************************************************************/ - -void Oam::getAlarms(std::string date, AlarmList& alarmlist) -{ - // check if on Active OAM Parent - bool OAMParentModuleFlag; - oamModuleInfo_t st; - - try - { - st = getModuleInfo(); - OAMParentModuleFlag = boost::get<4>(st); - - if (OAMParentModuleFlag) - { - //call getAlarm API directly - ALARMManager sm; - sm.getAlarm(date, alarmlist); - return; - } - } - catch (...) - { - exceptionControl("getAlarms", API_FAILURE); - } - - // build and send msg - int returnStatus = sendMsgToProcMgr3(GETALARMDATA, alarmlist, date); - - if (returnStatus != API_SUCCESS) - exceptionControl("getAlarms", returnStatus); -} - -/******************************************************************** - * - * check Active Alarm - * - ********************************************************************/ - -bool Oam::checkActiveAlarm(const int alarmid, const std::string moduleName, - const std::string deviceName) -{ - AlarmList activeAlarm; - - // check if on Active OAM Parent - bool OAMParentModuleFlag; - oamModuleInfo_t st; - - try - { - st = getModuleInfo(); - OAMParentModuleFlag = boost::get<4>(st); - - if (OAMParentModuleFlag) - { - //call getAlarm API directly - ALARMManager sm; - sm.getActiveAlarm(activeAlarm); - } - else if (UseHdfs > 0) - { - // read from HDFS files - if (readHdfsActiveAlarms(activeAlarm) != API_SUCCESS) - return false; - } - else - { - // build and send msg - int returnStatus = sendMsgToProcMgr3(GETACTIVEALARMDATA, activeAlarm, ""); - - if (returnStatus != API_SUCCESS) - return false; - } - } - catch (...) - { - return false; - } - - for (AlarmList::iterator i = activeAlarm.begin(); i != activeAlarm.end(); ++i) - { - // check if matching ID - if (alarmid != (i->second).getAlarmID() ) - continue; - - //check for moduleName of wildcard "*", if so return if alarm set on any module - if (deviceName.compare((i->second).getComponentID()) == 0 && - moduleName == "*") - return true; - - // check if the same fault component on same Module - if (deviceName.compare((i->second).getComponentID()) == 0 && - moduleName.compare((i->second).getSname()) == 0) - return true; - } - - return false; -} - /******************************************************************** * * get Local Module Information from Local Module Configuration file * * Returns: Local Module Name, Local Module Type, Local Module ID, - * OAM Parent Module Name, and OAM Parent Flag + * OAM Parent Module Name, and OAM Parent Flag * ********************************************************************/ @@ -2894,2287 +576,6 @@ oamModuleInfo_t Oam::getModuleInfo() return boost::make_tuple(localModule, localModuleType, localModuleID, ParentOAMModule, parentOAMModuleFlag, serverTypeInstall, StandbyOAMModule, standbyOAMModuleFlag); } -/******************************************************************** - * - * get My Process Status from Process Status file - * - ********************************************************************/ - -myProcessStatus_t Oam::getMyProcessStatus(pid_t processID) -{ - string returnValue; - ByteStream::quadbyte pid; - - if ( processID == 0 ) - // get current process PID - pid = getpid(); - else - pid = processID; - - // get process current Module - string moduleName; - oamModuleInfo_t st; - - try - { - st = getModuleInfo(); - moduleName = boost::get<0>(st); - } - catch (...) - { - //system("touch /var/log/mariadb/columnstore/test4"); - exceptionControl("getMyProcessStatus", API_FAILURE); - } - - if (!checkSystemRunning()) - exceptionControl("getMyProcessStatus", API_FAILURE); - - for ( int i = 0 ; i < 5 ; i ++) - { - try - { - MessageQueueClient processor("ProcStatusControl"); - // processor.syncProto(false); - ByteStream::byte status, state; - std::string processName; - ByteStream obs, ibs; - - obs << (ByteStream::byte) GET_PROC_STATUS_BY_PID; - obs << moduleName; - obs << pid; - - try - { - struct timespec ts = { 5, 0 }; - processor.write(obs, &ts); - - try - { - // wait 10 seconds for ACK from Process Monitor - struct timespec ts = { 10, 0 }; - - ibs = processor.read(&ts); - - if (ibs.length() > 0) - { - ibs >> status; - - if ( status == oam::API_SUCCESS ) - { - ibs >> state; - ibs >> processName; - } - else - { - // shutdown connection - processor.shutdown(); - //system("touch /var/log/mariadb/columnstore/test5"); - exceptionControl("getMyProcessStatus", API_FAILURE); - } - - // shutdown connection - processor.shutdown(); - - return boost::make_tuple((pid_t) pid, processName, state); - } - } - catch (...) - { - //system("touch /var/log/mariadb/columnstore/test6"); - processor.shutdown(); - exceptionControl("getMyProcessStatus", API_INVALID_PARAMETER); - } - } - catch (...) - { - //system("touch /var/log/mariadb/columnstore/test7"); - processor.shutdown(); - exceptionControl("getMyProcessStatus", API_INVALID_PARAMETER); - } - - // timeout occurred, shutdown connection - processor.shutdown(); - exceptionControl("getMyProcessStatus", API_TIMEOUT); - } - catch (...) - {} - } - - //system("touch /var/log/mariadb/columnstore/test9"); - exceptionControl("getMyProcessStatus", API_FAILURE); - - return boost::make_tuple(-1, "", -1); -} - -/******************************************************************** - * - * Stop Module - * - ********************************************************************/ - -void Oam::stopModule(DeviceNetworkList devicenetworklist, GRACEFUL_FLAG gracefulflag, ACK_FLAG ackflag) -{ - DeviceNetworkList::iterator pt = devicenetworklist.begin(); - - for ( ; pt != devicenetworklist.end() ; pt++) - { - // validate Module name - int returnStatus = validateModule((*pt).DeviceName); - - if (returnStatus != API_SUCCESS) - exceptionControl("stopModule", returnStatus); - } - - // build and send msg - int returnStatus = sendMsgToProcMgr2(STOPMODULE, devicenetworklist, gracefulflag, ackflag); - - if (returnStatus != API_SUCCESS) - exceptionControl("stopModule", returnStatus); -} - -/******************************************************************** - * - * Shutdown Module - build and send message to Process Manager - * - ********************************************************************/ - -void Oam::shutdownModule(DeviceNetworkList devicenetworklist, GRACEFUL_FLAG gracefulflag, ACK_FLAG ackflag) -{ - DeviceNetworkList::iterator pt = devicenetworklist.begin(); - - for ( ; pt != devicenetworklist.end() ; pt++) - { - // validate Module name - int returnStatus = validateModule((*pt).DeviceName); - - if (returnStatus != API_SUCCESS) - exceptionControl("shutdownModule", returnStatus); - } - - // build and send msg - int returnStatus = sendMsgToProcMgr2(SHUTDOWNMODULE, devicenetworklist, gracefulflag, ackflag); - - if (returnStatus != API_SUCCESS) - exceptionControl("shutdownModule", returnStatus); -} - -/******************************************************************** - * - * Start Module - build and send message to Process Manager - * - ********************************************************************/ - -void Oam::startModule(DeviceNetworkList devicenetworklist, ACK_FLAG ackflag) -{ - DeviceNetworkList::iterator pt = devicenetworklist.begin(); - - for ( ; pt != devicenetworklist.end() ; pt++) - { - // validate Module name - int returnStatus = validateModule((*pt).DeviceName); - - if (returnStatus != API_SUCCESS) - exceptionControl("startModule", returnStatus); - } - - // build and send msg - int returnStatus = sendMsgToProcMgr2(STARTMODULE, devicenetworklist, FORCEFUL, ackflag); - - if (returnStatus != API_SUCCESS) - exceptionControl("startModule", returnStatus); -} - -/******************************************************************** - * - * Restart Module - build and send message to Process Manager - * - ********************************************************************/ - -void Oam::restartModule(DeviceNetworkList devicenetworklist, GRACEFUL_FLAG gracefulflag, ACK_FLAG ackflag) -{ - DeviceNetworkList::iterator pt = devicenetworklist.begin(); - - for ( ; pt != devicenetworklist.end() ; pt++) - { - // validate Module name - int returnStatus = validateModule((*pt).DeviceName); - - if (returnStatus != API_SUCCESS) - exceptionControl("restartModule", returnStatus); - } - - // build and send msg - int returnStatus = sendMsgToProcMgr2(RESTARTMODULE, devicenetworklist, gracefulflag, ackflag); - - if (returnStatus != API_SUCCESS) - exceptionControl("restartModule", returnStatus); -} - -/******************************************************************** - * - * Disable Module - build and send message to Process Manager - * - ********************************************************************/ - -void Oam::disableModule(DeviceNetworkList devicenetworklist) -{ - DeviceNetworkList::iterator pt = devicenetworklist.begin(); - - for ( ; pt != devicenetworklist.end() ; pt++) - { - // validate Module name - int returnStatus = validateModule((*pt).DeviceName); - - if (returnStatus != API_SUCCESS) - exceptionControl("disableModule", returnStatus); - } - - // build and send msg - int returnStatus = sendMsgToProcMgr2(DISABLEMODULE, devicenetworklist, FORCEFUL, ACK_YES); - - if (returnStatus != API_SUCCESS) - exceptionControl("disableModule", returnStatus); -} - -/******************************************************************** - * - * Enable Module - build and send message to Process Manager - * - ********************************************************************/ - -void Oam::enableModule(DeviceNetworkList devicenetworklist) -{ - DeviceNetworkList::iterator pt = devicenetworklist.begin(); - - for ( ; pt != devicenetworklist.end() ; pt++) - { - // validate Module name - int returnStatus = validateModule((*pt).DeviceName); - - if (returnStatus != API_SUCCESS) - exceptionControl("enableModule", returnStatus); - } - - // build and send msg - int returnStatus = sendMsgToProcMgr2(ENABLEMODULE, devicenetworklist, FORCEFUL, ACK_YES); - - if (returnStatus != API_SUCCESS) - exceptionControl("enableModule", returnStatus); -} - -/******************************************************************** - * - * Stop System - build and send message to Process Manager - * - ********************************************************************/ - -void Oam::stopSystem(GRACEFUL_FLAG gracefulflag, ACK_FLAG ackflag) -{ - // build and send msg - int returnStatus = sendMsgToProcMgrWithStatus(STOPSYSTEM, "stopped", gracefulflag, ackflag); - - if (returnStatus != API_SUCCESS) - exceptionControl("stopSystem", returnStatus); -} - -/******************************************************************** - * - * Shutdown System - build and send message to Process Manager - * - ********************************************************************/ - -void Oam::shutdownSystem(GRACEFUL_FLAG gracefulflag, ACK_FLAG ackflag) -{ - int returnStatus = sendMsgToProcMgrWithStatus(SHUTDOWNSYSTEM, "shutdown", gracefulflag, ackflag); - - //Wait for everything to settle down - sleep(10); - - switch (returnStatus) - { - case API_SUCCESS: - cout << endl << " Successful shutdown of System " << endl << endl; - break; - - case API_CANCELLED: - cout << endl << " Shutdown of System canceled" << endl << endl; - break; - - default: - exceptionControl("shutdownSystem", returnStatus); - break; - } -} - -/******************************************************************** - * - * Suspend Database Writes - build and send message to Process Manager - * - ********************************************************************/ - -void Oam::SuspendWrites(GRACEFUL_FLAG gracefulflag, ACK_FLAG ackflag) -{ - SystemProcessStatus systemprocessstatus; - - // Send the message to suspend and wait for it to finish - int returnStatus = sendMsgToProcMgrWithStatus(SUSPENDWRITES, "write suspended", gracefulflag, ackflag); - - // An error throws here. - switch (returnStatus) - { - case API_SUCCESS: - cout << endl << "Suspend MariaDB Columnstore Database Writes Request successfully completed" << endl; - break; - - case API_FAILURE_DB_ERROR: - cout << endl << "**** stopDatabaseWrites Failed: save_brm Failed" << endl; - break; - - case API_CANCELLED: - cout << endl << " Suspension of database writes canceled" << endl << endl; - break; - - case API_FAILURE: - cout << endl << " Suspension of database writes failed: Filesystem sync failed" << endl << endl; - break; - - default: - exceptionControl("suspendWrites", returnStatus); - break; - } -} - -/******************************************************************** - * - * Start System - build and send message to Process Manager - * - ********************************************************************/ - -void Oam::startSystem(ACK_FLAG ackflag) -{ - // build and send msg - int returnStatus = sendMsgToProcMgr(STARTSYSTEM, "", FORCEFUL, ackflag); - - if (returnStatus != API_SUCCESS) - exceptionControl("startSystem", returnStatus); -} - -/******************************************************************** - * - * Restart System - build and send message to Process Manager - * - ********************************************************************/ - -int Oam::restartSystem(GRACEFUL_FLAG gracefulflag, ACK_FLAG ackflag) -{ - // Send the restart message (waits for completion) - int returnStatus = sendMsgToProcMgrWithStatus(RESTARTSYSTEM, "restarted", gracefulflag, ackflag); - - if (returnStatus != API_SUCCESS && returnStatus != API_CANCELLED) - { - exceptionControl("restartSystem", returnStatus); - } - - return returnStatus; -} - -/******************************************************************** - * - * Stop Process - build and send message to Process Manager - * - ********************************************************************/ - -void Oam::stopProcess(const std::string moduleName, const std::string processName, GRACEFUL_FLAG gracefulflag, ACK_FLAG ackflag) -{ - // validate Process name - int returnStatus = validateProcess(moduleName, processName); - - if (returnStatus != API_SUCCESS) - exceptionControl("stopProcess", returnStatus); - - // validate Process Name, don't allow Process-Monitor / Process-Manager - - if ( processName == "ProcessMonitor" || processName == "ProcessManager" ) - exceptionControl("stopProcess", API_INVALID_PARAMETER); - - // validate Process Name, don't allow COLD-STANDBY process - ProcessStatus procstat; - getProcessStatus(processName, moduleName, procstat); - - if ( procstat.ProcessOpState == oam::COLD_STANDBY ) - exceptionControl("stopProcess", API_INVALID_STATE); - - // build and send msg - returnStatus = sendMsgToProcMgr(STOPPROCESS, processName, gracefulflag, ackflag, moduleName); - - if (returnStatus != API_SUCCESS) - exceptionControl("stopProcess", returnStatus); -} - -/******************************************************************** - * - * Start Process - build and send message to Process Manager - * - ********************************************************************/ - -void Oam::startProcess(const std::string moduleName, const std::string processName, GRACEFUL_FLAG gracefulflag, ACK_FLAG ackflag) -{ - // validate Process name - int returnStatus = validateProcess(moduleName, processName); - - if (returnStatus != API_SUCCESS) - exceptionControl("startProcess", returnStatus); - - // validate Process Name, don't allow COLD-STANDBY process -// ProcessStatus procstat; -// getProcessStatus(processName, moduleName, procstat); -// if ( procstat.ProcessOpState == oam::COLD_STANDBY ) -// exceptionControl("startProcess", API_INVALID_STATE); - - // build and send msg - returnStatus = sendMsgToProcMgr(STARTPROCESS, processName, gracefulflag, ackflag, moduleName); - - if (returnStatus != API_SUCCESS) - exceptionControl("startProcess", returnStatus); -} - -/******************************************************************** - * - * Restart Process - build and send message to Process Manager - * - ********************************************************************/ - -void Oam::restartProcess(const std::string moduleName, const std::string processName, GRACEFUL_FLAG gracefulflag, ACK_FLAG ackflag) -{ - // validate Process name - int returnStatus = validateProcess(moduleName, processName); - - if (returnStatus != API_SUCCESS) - exceptionControl("restartProcess", returnStatus); - - // build and send msg - returnStatus = sendMsgToProcMgr(RESTARTPROCESS, processName, gracefulflag, ackflag, moduleName); - - if (returnStatus != API_SUCCESS) - exceptionControl("restartProcess", returnStatus); -} - -/******************************************************************** - * - * Stop Process - build and send message to Process Manager - * - ********************************************************************/ - -void Oam::stopProcessType(std::string type) -{ - // build and send msg - int returnStatus = sendMsgToProcMgr(STOPPROCESSTYPE, type); - - if (returnStatus != API_SUCCESS) - exceptionControl("stopProcessType", returnStatus); -} - -/******************************************************************** - * - * Start Processes - build and send message to Process Manager - * - ********************************************************************/ - -void Oam::startProcessType(std::string type) -{ - // build and send msg - int returnStatus = sendMsgToProcMgr(STARTPROCESSTYPE, type); - - if (returnStatus != API_SUCCESS) - exceptionControl("startProcessType", returnStatus); -} - -/******************************************************************** - * - * Restart Process Type- build and send message to Process Manager - * - ********************************************************************/ - -void Oam::restartProcessType(std::string type) -{ - // build and send msg - int returnStatus = sendMsgToProcMgr(RESTARTPROCESSTYPE, type); - - if (returnStatus != API_SUCCESS) - exceptionControl("restartProcessType", returnStatus); -} - -/******************************************************************** - * - * Reinit Process Type- build and send message to Process Manager - * - ********************************************************************/ - -void Oam::reinitProcessType(std::string type) -{ - // build and send msg - int returnStatus = sendMsgToProcMgr(REINITPROCESSTYPE, type, FORCEFUL); - - if (returnStatus != API_SUCCESS) - exceptionControl("reinitProcessType", returnStatus); -} - -/******************************************************************** - * - * Update Logging - Enable/Disable Logging with the system or on a specific - * Module at a specific level - * - ********************************************************************/ -void Oam::updateLog(const std::string action, const std::string deviceid, const std::string loglevel) -{ - // validate the loglevel - for ( int i = 0;; i++) - { - if ( LogLevel[i] == "" ) - { - // end of section list - exceptionControl("updateLog", API_INVALID_PARAMETER); - } - - if ( loglevel == LogLevel[i] ) - { - // build and send msg - int returnStatus = sendMsgToProcMgr(UPDATELOG, deviceid, FORCEFUL, ACK_YES, action, loglevel); - - if (returnStatus != API_SUCCESS) - exceptionControl("updateLog", returnStatus); - - return; - } - } -} - -/******************************************************************** - * - * Get Log File - Get Log file location for specific Module at a specific level - * - ********************************************************************/ -void Oam::getLogFile(const std::string moduleName, const std::string loglevel, std::string& filelocation) -{ - // validate Module name - int returnStatus = validateModule(moduleName); - - if (returnStatus != API_SUCCESS) - exceptionControl("getLogFile", returnStatus); - - string path; - - // Get Parent OAM Module name - - Config* sysConfig = Config::makeConfig(CalpontConfigFile.c_str()); - string Section = "SystemConfig"; - string ParentOAMModule = sysConfig->getConfig(Section, "ParentOAMModuleName"); - - if (moduleName == ParentOAMModule) - path = "//"; - else - path = "/mnt/" + moduleName; - - // get log file name for level - string logFile; - - for ( int i = 0;; i++) - { - if ( LogLevel[i] == "" ) - { - // end of list - exceptionControl("getLogFile", API_INVALID_PARAMETER); - break; - } - - if ( loglevel == LogLevel[i] ) - { - // match found, get and strip off to '/' - logFile = LogFile[i]; - string::size_type pos = logFile.find('/', 0); - - if (pos != string::npos) - { - logFile = logFile.substr(pos, 200); - break; - } - } - } - - filelocation = path + logFile; - -} - -/******************************************************************** - * - * Get Log File - Get Log file location for specific Module at a specific level - * - ********************************************************************/ -void Oam::getLogFile(const std::string moduleName, const std::string loglevel, const std::string date, - std::string& filelocation) -{ - // validate Module name - int returnStatus = validateModule(moduleName); - - if (returnStatus != API_SUCCESS) - exceptionControl("getLogFile", returnStatus); - - string path; - - // Get Parent OAM Module name - - Config* sysConfig = Config::makeConfig(CalpontConfigFile.c_str()); - string Section = "SystemConfig"; - string ParentOAMModule = sysConfig->getConfig(Section, "ParentOAMModuleName"); - - if (moduleName == ParentOAMModule) - path = "/"; - else - path = "/mnt/" + moduleName; - - // get log file name for level - string logFile; - string logFileName; - - for ( int i = 0;; i++) - { - if ( LogLevel[i] == "" ) - { - // end of list - exceptionControl("getLogFile", API_INVALID_PARAMETER); - break; - } - - if ( loglevel == LogLevel[i] ) - { - // match found, get and strip off to '/' - logFile = LogFile[i]; - string::size_type pos = logFile.find('/', 0); - - if (pos != string::npos) - { - logFile = logFile.substr(pos, 200); - - pos = logFile.rfind('/', 200); - logFileName = logFile.substr(pos + 1, 200); - break; - } - } - } - - logFile = path + logFile; - - string tempLogFile = tmpdir +"/logs"; - - //make 1 log file made up of archive and current *.log - (void)system(tempLogFile.c_str()); - - string logdir("/var/log/mariadb/columnstore"); - - if (access(logdir.c_str(), W_OK) != 0) logdir = tmpdir; - - string cmd = "ls " + path + logdir + "/archive | grep '" + logFileName + "' > " + tmpdir + "/logfiles"; - (void)system(cmd.c_str()); - - string fileName = tmpdir + "/logfiles"; - - ifstream oldFile (fileName.c_str()); - - if (oldFile) - { - char line[400]; - string buf; - - while (oldFile.getline(line, 400)) - { - buf = line; - cmd = "cat " + path + logdir + "/archive/" + buf + " >> " + tempLogFile; - (void)system(cmd.c_str()); - } - - oldFile.close(); - unlink (fileName.c_str()); - } - - cmd = "cat " + logFile + " >> " + tempLogFile; - (void)system(cmd.c_str()); - - //validate and get mm / dd from incoming date - if ( date.substr(2, 1) != "/" ) - exceptionControl("getLogFile", oam::API_INVALID_PARAMETER); - - string dd = date.substr(3, 2); - - if (dd.substr(0, 1) == "0" ) - dd = " " + dd.substr(1, 1); - - int mmName = atoi(date.substr(0, 2).c_str()); - string mm; - - switch ( mmName ) - { - case (1): - { - mm = "Jan"; - break; - } - - case (2): - { - mm = "Feb"; - break; - } - - case (3): - { - mm = "Mar"; - break; - } - - case (4): - { - mm = "Apr"; - break; - } - - case (5): - { - mm = "May"; - break; - } - - case (6): - { - mm = "Jun"; - break; - } - - case (7): - { - mm = "Jul"; - break; - } - - case (8): - { - mm = "Aug"; - break; - } - - case (9): - { - mm = "Sep"; - break; - } - - case (10): - { - mm = "Oct"; - break; - } - - case (11): - { - mm = "Nov"; - break; - } - - case (12): - { - mm = "Dec"; - break; - } - - default: - { - filelocation = ""; - return; - } - } - - string findDate = mm + " " + dd; - - ifstream file (tempLogFile.c_str()); - vector lines; - - if (file) - { - char line[400]; - string buf; - - while (file.getline(line, 400)) - { - buf = line; - string::size_type pos = buf.find(findDate, 0); - - if (pos != string::npos) - lines.push_back(buf); - } - - unlink (tempLogFile.c_str()); - } - - fileName = tmpdir + "/logsByDate"; - ofstream newFile (fileName.c_str()); - - //create new file - int fd = open(fileName.c_str(), O_RDWR | O_CREAT, 0664); - - copy(lines.begin(), lines.end(), ostream_iterator(newFile, "\n")); - newFile.close(); - - close(fd); - - filelocation = fileName; -} - -/******************************************************************** - * - * Get Log Config - Get Log Config data, which is the File IDs in the - * Module syslog.conf file - * - ********************************************************************/ -void Oam::getLogConfig(SystemLogConfigData& configdata ) -{ - SystemModuleTypeConfig systemmoduletypeconfig; - LogConfigData logconfigdata; - - try - { - Oam::getSystemConfig(systemmoduletypeconfig); - } - catch (...) - { - exceptionControl("getLogConfig", API_FAILURE); - } - - for ( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++) - { - if ( systemmoduletypeconfig.moduletypeconfig[i].ModuleType.empty() ) - //end of file - break; - - if ( systemmoduletypeconfig.moduletypeconfig[i].ModuleCount == 0 ) - continue; - - DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end() ; pt++) - { - string moduleName = (*pt).DeviceName; - - int returnStatus = sendMsgToProcMgr(GETCONFIGLOG, - moduleName, - FORCEFUL, - ACK_YES); - - logconfigdata.moduleName = moduleName; - logconfigdata.configData = returnStatus; - - configdata.push_back(logconfigdata); - } - } -} - -/****************************************************************************************** - * @brief DisplayLockedTables - * - * purpose: Show the details of all the locks in tableLocks - * Used when attempting to suspend or stop the - * database, but there are table locks. - * - ******************************************************************************************/ -void Oam::DisplayLockedTables(std::vector& tableLocks, BRM::DBRM* pDBRM) -{ - cout << "The following tables are locked:" << endl; - - // Initial widths of columns to display. We pass thru the table - // and see if we need to grow any of these. - unsigned int lockIDColumnWidth = 6; // "LockID" - unsigned int tableNameColumnWidth = 12; // "Name" - unsigned int ownerColumnWidth = 7; // "Process" - unsigned int pidColumnWidth = 3; // "PID" - unsigned int sessionIDColumnWidth = 7; // "Session" - unsigned int createTimeColumnWidth = 12; // "CreationTime" - unsigned int dbrootColumnWidth = 7; // "DBRoots" - unsigned int stateColumnWidth = 9; // "State" - - // Initialize System Catalog object used to get table name - boost::shared_ptr systemCatalogPtr = - execplan::CalpontSystemCatalog::makeCalpontSystemCatalog(0); - - std::string fullTblName; - const char* tableState; - - // Make preliminary pass through the table locks in order to determine our - // output column widths based on the data. Min column widths are based on - // the width of the column heading (except for the 'state' column). - uint64_t maxLockID = 0; - uint32_t maxPID = 0; - int32_t maxSessionID = 0; - int32_t minSessionID = 0; - std::vector createTimes; - std::vector pms; - char cTimeBuffer[64]; - - execplan::CalpontSystemCatalog::TableName tblName; - - for (unsigned idx = 0; idx < tableLocks.size(); idx++) - { - if (tableLocks[idx].id > maxLockID) - { - maxLockID = tableLocks[idx].id; - } - - try - { - tblName = systemCatalogPtr->tableName(tableLocks[idx].tableOID); - } - catch (...) - { - tblName.schema.clear(); - tblName.table.clear(); - } - - fullTblName = tblName.toString(); - - if (fullTblName.size() > tableNameColumnWidth) - { - tableNameColumnWidth = fullTblName.size(); - } - - if (tableLocks[idx].ownerName.length() > ownerColumnWidth) - { - ownerColumnWidth = tableLocks[idx].ownerName.length(); - } - - if (tableLocks[idx].ownerPID > maxPID) - { - maxPID = tableLocks[idx].ownerPID; - } - - if (tableLocks[idx].ownerSessionID > maxSessionID) - { - maxSessionID = tableLocks[idx].ownerSessionID; - } - - if (tableLocks[idx].ownerSessionID < minSessionID) - { - minSessionID = tableLocks[idx].ownerSessionID; - } - - // Creation Time. - // While we're at it, we save the time string off into a vector - // so we can display it later without recalcing it. - struct tm timeTM; - localtime_r(&tableLocks[idx].creationTime, &timeTM); - ctime_r(&tableLocks[idx].creationTime, cTimeBuffer); - strftime(cTimeBuffer, 64, "%F %r:", &timeTM); - cTimeBuffer[strlen(cTimeBuffer) - 1] = '\0'; // strip trailing '\n' - std::string cTimeStr( cTimeBuffer ); - - if (cTimeStr.length() > createTimeColumnWidth) - { - createTimeColumnWidth = cTimeStr.length(); - } - - createTimes.push_back(cTimeStr); - } - - tableNameColumnWidth += 1; - ownerColumnWidth += 1; - createTimeColumnWidth += 1; - - std::ostringstream idString; - idString << maxLockID; - - if (idString.str().length() > lockIDColumnWidth) - lockIDColumnWidth = idString.str().length(); - - lockIDColumnWidth += 1; - - std::ostringstream pidString; - pidString << maxPID; - - if (pidString.str().length() > pidColumnWidth) - pidColumnWidth = pidString.str().length(); - - pidColumnWidth += 1; - - const std::string sessionNoneStr("BulkLoad"); - std::ostringstream sessionString; - sessionString << maxSessionID; - - if (sessionString.str().length() > sessionIDColumnWidth) - sessionIDColumnWidth = sessionString.str().length(); - - if ((minSessionID < 0) && - (sessionNoneStr.length() > sessionIDColumnWidth)) - sessionIDColumnWidth = sessionNoneStr.length(); - - sessionIDColumnWidth += 1; - - // write the column headers before the first entry - cout.setf(ios::left, ios::adjustfield); - cout << setw(lockIDColumnWidth) << "LockID" << - setw(tableNameColumnWidth) << "Name" << - setw(ownerColumnWidth) << "Process" << - setw(pidColumnWidth) << "PID" << - setw(sessionIDColumnWidth) << "Session" << - setw(createTimeColumnWidth) << "CreationTime" << - setw(stateColumnWidth) << "State" << - setw(dbrootColumnWidth) << "DBRoots" << endl; - - for (unsigned idx = 0; idx < tableLocks.size(); idx++) - { - try - { - - tblName = systemCatalogPtr->tableName(tableLocks[idx].tableOID); - } - catch (...) - { - tblName.schema.clear(); - tblName.table.clear(); - } - - fullTblName = tblName.toString(); - cout << - setw(lockIDColumnWidth) << tableLocks[idx].id << - setw(tableNameColumnWidth) << fullTblName << - setw(ownerColumnWidth) << tableLocks[idx].ownerName << - setw(pidColumnWidth) << tableLocks[idx].ownerPID; - - // Log session ID, or "BulkLoad" if session is -1 - if (tableLocks[idx].ownerSessionID < 0) - cout << setw(sessionIDColumnWidth) << sessionNoneStr; - else - cout << setw(sessionIDColumnWidth) << - tableLocks[idx].ownerSessionID; - - // Creation Time - cout << setw(createTimeColumnWidth) << createTimes[idx]; - - // Processor State - if (pDBRM && !pDBRM->checkOwner(tableLocks[idx].id)) - { - tableState = "Abandoned"; - } - else - { - tableState = ((tableLocks[idx].state == BRM::LOADING) ? - "LOADING" : "CLEANUP"); - } - - cout << setw(stateColumnWidth) << tableState; - - // PM List - cout << setw(dbrootColumnWidth); - - for (unsigned k = 0; k < tableLocks[idx].dbrootList.size(); k++) - { - if (k > 0) - cout << ','; - - cout << tableLocks[idx].dbrootList[k]; - } - - cout << endl; - } // end of loop through table locks -} - -/****************************************************************************************** - * @brief getCurrentTime - * - * purpose: get time/date in string format - * - ******************************************************************************************/ -string Oam::getCurrentTime() -{ - time_t cal; - time (&cal); - string stime; - char ctime[26]; - ctime_r (&cal, ctime); - stime = ctime; -// string stime = ctime_r (&cal); - // strip off cr/lf - stime = stime.substr (0, 24); - return stime; -} - -/****************************************************************************************** - * @brief Get Local DBRM ID - * - * purpose: Get Local DBRM ID for Module - * - ******************************************************************************************/ -int Oam::getLocalDBRMID(const std::string moduleName) -{ - string cmd = "touch " + CalpontConfigFile; - (void)system(cmd.c_str()); - - string SECTION = "DBRM_Worker"; - - Config* sysConfig = Config::makeConfig(CalpontConfigFile.c_str()); - - int numWorker = atoi(sysConfig->getConfig("DBRM_Controller", "NumWorkers").c_str()); - - for (int workerID = 1; workerID < numWorker + 1; workerID++) - { - string section = SECTION + itoa(workerID); - - if ( sysConfig->getConfig(section, "Module") == moduleName ) - return workerID; - } - - // not found - exceptionControl("getLocalDBRMID", API_INVALID_PARAMETER); - return -1; -} - -/****************************************************************************************** - * @brief build empty set of System Tables - * - * purpose: build empty set of System Tables - * - ******************************************************************************************/ -void Oam::buildSystemTables() -{ - //determine active PM (DDLProc is ACTIVE) to send request to - SystemProcessStatus systemprocessstatus; - string PMmodule; - int returnStatus = API_FAILURE; - - try - { - getProcessStatus(systemprocessstatus); - - for ( unsigned int i = 0 ; i < systemprocessstatus.processstatus.size(); i++) - { - if ( systemprocessstatus.processstatus[i].ProcessName == "DDLProc" && - systemprocessstatus.processstatus[i].ProcessOpState == oam::ACTIVE) - { - PMmodule = systemprocessstatus.processstatus[i].Module; - - // build and send msg - returnStatus = sendMsgToProcMgr(BUILDSYSTEMTABLES, PMmodule, FORCEFUL, ACK_YES); - } - } - } - catch (...) - { - exceptionControl("buildSystemTables", API_FAILURE); - } - - if (returnStatus != API_SUCCESS) - exceptionControl("buildSystemTables", returnStatus); - else - return; -} - -/****************************************************************************************** - * @brief Get Network IP Address for Host Name - * - * purpose: Get Network IP Address for Host Name - * - ******************************************************************************************/ -string Oam::getIPAddress(string hostName) -{ - static uint32_t my_bind_addr; - struct hostent* ent; - string IPAddr = ""; - - ent = gethostbyname(hostName.c_str()); - - if (ent != 0) - { - my_bind_addr = (uint32_t) ((in_addr*)ent->h_addr_list[0])->s_addr; - - uint8_t split[4]; - uint32_t ip = my_bind_addr; - split[0] = (ip & 0xff000000) >> 24; - split[1] = (ip & 0x00ff0000) >> 16; - split[2] = (ip & 0x0000ff00) >> 8; - split[3] = (ip & 0x000000ff); - - IPAddr = itoa(split[3]) + "." + itoa(split[2]) + "." + itoa(split[1]) + "." + itoa(split[0]); - } - - return IPAddr; -} - -/****************************************************************************************** - * @brief Get System TOP Process CPU Users - * - * purpose: Get System TOP Process CPU Users - * - ******************************************************************************************/ -void Oam::getTopProcessCpuUsers(int topNumber, SystemTopProcessCpuUsers& systemtopprocesscpuusers) -{ - SystemModuleTypeConfig systemmoduletypeconfig; - TopProcessCpuUsers Topprocesscpuusers; - - try - { - Oam::getSystemConfig(systemmoduletypeconfig); - } - catch (...) - { - exceptionControl("getTopProcessCpuUsers", API_FAILURE); - } - - for ( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++) - { - if ( systemmoduletypeconfig.moduletypeconfig[i].ModuleType.empty() ) - //end of file - break; - - if ( systemmoduletypeconfig.moduletypeconfig[i].ModuleCount == 0 ) - continue; - - DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end() ; pt++) - { - string moduleName = (*pt).DeviceName; - - try - { - getTopProcessCpuUsers(moduleName, topNumber, Topprocesscpuusers); - - systemtopprocesscpuusers.topprocesscpuusers.push_back(Topprocesscpuusers); - } - catch (exception&) - { - } - } - } -} - -/****************************************************************************************** - * @brief Get Module TOP Process CPU Users - * - * purpose: Get SModule TOP Process CPU Users - * - ******************************************************************************************/ -void Oam::getTopProcessCpuUsers(const std::string module, int topNumber, TopProcessCpuUsers& topprocesscpuusers) -{ - ByteStream msg; - ByteStream receivedMSG; - ByteStream::byte count; - string processName; - ByteStream::quadbyte cpuUsage; - ProcessCpuUser Processcpuuser; - topprocesscpuusers.processcpuuser.clear(); - - // validate Module name - if ( module.find("xm") != string::npos ) - exceptionControl("getTopProcessCpuUsers", API_INVALID_PARAMETER); - - int returnStatus = validateModule(module); - - if (returnStatus != API_SUCCESS) - exceptionControl("getTopProcessCpuUsers", returnStatus); - - // setup message - msg << (ByteStream::byte) GET_PROC_CPU_USAGE; - msg << (ByteStream::byte) topNumber; - - topprocesscpuusers.ModuleName = module; - topprocesscpuusers.numberTopUsers = topNumber; - - try - { - //send the msg to Server Monitor - MessageQueueClient servermonitor(module + "_ServerMonitor"); - servermonitor.write(msg); - - // wait 10 seconds for ACK from Server Monitor - struct timespec ts = { 30, 0 }; - - receivedMSG = servermonitor.read(&ts); - - if (receivedMSG.length() > 0) - { - receivedMSG >> count; - - for ( int i = 0 ; i < count ; i++) - { - receivedMSG >> processName; - receivedMSG >> cpuUsage; - - Processcpuuser.ProcessName = processName; - Processcpuuser.CpuUsage = cpuUsage; - - topprocesscpuusers.processcpuuser.push_back(Processcpuuser); - } - - } - else// timeout - exceptionControl("getTopProcessCpuUsers", API_TIMEOUT); - - // shutdown connection - servermonitor.shutdown(); - } - catch (...) - { - exceptionControl("getTopProcessCpuUsers", API_FAILURE); - } -} - -/****************************************************************************************** - * @brief get System CPU Usage - * - * purpose: get System CPU Usage - * - ******************************************************************************************/ -void Oam::getSystemCpuUsage(SystemCpu& systemcpu) -{ - SystemModuleTypeConfig systemmoduletypeconfig; - ModuleCpu Modulecpu; - - try - { - Oam::getSystemConfig(systemmoduletypeconfig); - } - catch (...) - { - exceptionControl("getSystemCpuUsage", API_FAILURE); - } - - for ( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++) - { - if ( systemmoduletypeconfig.moduletypeconfig[i].ModuleType.empty() ) - //end of file - break; - - if ( systemmoduletypeconfig.moduletypeconfig[i].ModuleCount == 0 ) - continue; - - DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end() ; pt++) - { - string moduleName = (*pt).DeviceName; - - try - { - getModuleCpuUsage(moduleName, Modulecpu); - - systemcpu.modulecpu.push_back(Modulecpu); - } - catch (exception&) - { - } - } - } -} - -/****************************************************************************************** - * @brief get Module CPU Usage - * - * purpose: get Module CPU Usage - * - ******************************************************************************************/ -void Oam::getModuleCpuUsage(const std::string module, ModuleCpu& modulecpu) -{ - ByteStream msg; - ByteStream receivedMSG; - string processName; - ByteStream::byte cpuUsage; - - // validate Module name - if ( module.find("xm") != string::npos ) - exceptionControl("getModuleCpuUsage", API_INVALID_PARAMETER); - - int returnStatus = validateModule(module); - - if (returnStatus != API_SUCCESS) - exceptionControl("getModuleCpuUsage", returnStatus); - - // setup message - msg << (ByteStream::byte) GET_MODULE_CPU_USAGE; - - modulecpu.ModuleName = module; - - try - { - //send the msg to Server Monitor - MessageQueueClient servermonitor(module + "_ServerMonitor"); - servermonitor.write(msg); - - // wait 30 seconds for ACK from Server Monitor - struct timespec ts = { 30, 0 }; - - receivedMSG = servermonitor.read(&ts); - - if (receivedMSG.length() > 0) - { - receivedMSG >> cpuUsage; - - modulecpu.CpuUsage = cpuUsage; - } - else // timeout - exceptionControl("getModuleCpuUsage", API_TIMEOUT); - - // shutdown connection - servermonitor.shutdown(); - } - catch (...) - { - exceptionControl("getModuleCpuUsage", API_FAILURE); - } -} - -/****************************************************************************************** - * @brief get System TOP Process Memory Users - * - * purpose: get System TOP Process Memory Users - * - ******************************************************************************************/ -void Oam::getTopProcessMemoryUsers(int topNumber, SystemTopProcessMemoryUsers& systemtopprocessmemoryusers) -{ - SystemModuleTypeConfig systemmoduletypeconfig; - TopProcessMemoryUsers Topprocessmemoryusers; - - try - { - Oam::getSystemConfig(systemmoduletypeconfig); - } - catch (...) - { - exceptionControl("getTopProcessMemoryUsers", API_FAILURE); - } - - for ( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++) - { - if ( systemmoduletypeconfig.moduletypeconfig[i].ModuleType.empty() ) - //end of file - break; - - if ( systemmoduletypeconfig.moduletypeconfig[i].ModuleCount == 0 ) - continue; - - DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end() ; pt++) - { - string moduleName = (*pt).DeviceName; - - try - { - getTopProcessMemoryUsers(moduleName, topNumber, Topprocessmemoryusers); - - systemtopprocessmemoryusers.topprocessmemoryusers.push_back(Topprocessmemoryusers); - } - catch (exception&) - { - } - } - } -} - -/****************************************************************************************** - * @brief get Module TOP Process Memory Users - * - * purpose: get Module TOP Process Memory Users - * - ******************************************************************************************/ -void Oam::getTopProcessMemoryUsers(const std::string module, int topNumber, TopProcessMemoryUsers& topprocessmemoryusers) -{ - ByteStream msg; - ByteStream receivedMSG; - ByteStream::byte count; - string processName; - ByteStream::quadbyte memoryUsed; - ByteStream::byte memoryUsage; - ProcessMemoryUser Processmemoryuser; - topprocessmemoryusers.processmemoryuser.clear(); - - // validate Module name - if ( module.find("xm") != string::npos ) - exceptionControl("getTopProcessMemoryUsers", API_INVALID_PARAMETER); - - int returnStatus = validateModule(module); - - if (returnStatus != API_SUCCESS) - exceptionControl("getTopProcessMemoryUsers", returnStatus); - - // setup message - msg << (ByteStream::byte) GET_PROC_MEMORY_USAGE; - msg << (ByteStream::byte) topNumber; - - topprocessmemoryusers.ModuleName = module; - topprocessmemoryusers.numberTopUsers = topNumber; - - try - { - //send the msg to Server Monitor - MessageQueueClient servermonitor(module + "_ServerMonitor"); - servermonitor.write(msg); - - // wait 30 seconds for ACK from Server Monitor - struct timespec ts = { 30, 0 }; - - receivedMSG = servermonitor.read(&ts); - - if (receivedMSG.length() > 0) - { - receivedMSG >> count; - - for ( int i = 0 ; i < count ; i++) - { - receivedMSG >> processName; - receivedMSG >> memoryUsed; - receivedMSG >> memoryUsage; - - Processmemoryuser.ProcessName = processName; - Processmemoryuser.MemoryUsed = memoryUsed; - Processmemoryuser.MemoryUsage = memoryUsage; - - topprocessmemoryusers.processmemoryuser.push_back(Processmemoryuser); - } - - } - else // timeout - exceptionControl("getTopProcessMemoryUsers", API_TIMEOUT); - - // shutdown connection - servermonitor.shutdown(); - } - catch (...) - { - exceptionControl("getTopProcessMemoryUsers", API_FAILURE); - } -} - -/****************************************************************************************** - * @brief get System Memory Usage - * - * purpose: get System Memory Usage - * - ******************************************************************************************/ -void Oam::getSystemMemoryUsage(SystemMemory& systemmemory) -{ - SystemModuleTypeConfig systemmoduletypeconfig; - ModuleMemory Modulememory; - - try - { - Oam::getSystemConfig(systemmoduletypeconfig); - } - catch (...) - { - exceptionControl("getSystemMemoryUsage", API_FAILURE); - } - - for ( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++) - { - if ( systemmoduletypeconfig.moduletypeconfig[i].ModuleType.empty() ) - //end of file - break; - - if ( systemmoduletypeconfig.moduletypeconfig[i].ModuleCount == 0 ) - continue; - - DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end() ; pt++) - { - string moduleName = (*pt).DeviceName; - - try - { - getModuleMemoryUsage(moduleName, Modulememory); - - systemmemory.modulememory.push_back(Modulememory); - } - catch (exception&) - { - } - } - } -} - -/****************************************************************************************** - * @brief get Module Memory Usage - * - * purpose: get Module Memory Usage - * - ******************************************************************************************/ -void Oam::getModuleMemoryUsage(const std::string module, ModuleMemory& modulememory) -{ - ByteStream msg; - ByteStream receivedMSG; - string processName; - ByteStream::quadbyte mem_total; - ByteStream::quadbyte mem_used; - ByteStream::quadbyte cache; - ByteStream::byte memoryUsagePercent; - ByteStream::quadbyte swap_total; - ByteStream::quadbyte swap_used; - ByteStream::byte swapUsagePercent; - - // validate Module name - if ( module.find("xm") != string::npos ) - exceptionControl("getModuleMemoryUsage", API_INVALID_PARAMETER); - - // validate Module name - int returnStatus = validateModule(module); - - if (returnStatus != API_SUCCESS) - exceptionControl("getModuleMemoryUsage", returnStatus); - - // setup message - msg << (ByteStream::byte) GET_MODULE_MEMORY_USAGE; - - modulememory.ModuleName = module; - - try - { - //send the msg to Server Monitor - MessageQueueClient servermonitor(module + "_ServerMonitor"); - servermonitor.write(msg); - - // wait 30 seconds for ACK from Server Monitor - struct timespec ts = { 30, 0 }; - - receivedMSG = servermonitor.read(&ts); - - if (receivedMSG.length() > 0) - { - receivedMSG >> mem_total; - receivedMSG >> mem_used; - receivedMSG >> cache; - receivedMSG >> memoryUsagePercent; - receivedMSG >> swap_total; - receivedMSG >> swap_used; - receivedMSG >> swapUsagePercent; - - modulememory.MemoryTotal = mem_total; - modulememory.MemoryUsed = mem_used; - modulememory.cache = cache; - modulememory.MemoryUsage = memoryUsagePercent; - modulememory.SwapTotal = swap_total; - modulememory.SwapUsed = swap_used; - modulememory.SwapUsage = swapUsagePercent; - } - else // timeout - exceptionControl("getModuleMemoryUsage", API_TIMEOUT); - - // shutdown connection - servermonitor.shutdown(); - } - catch (...) - { - exceptionControl("getModuleMemoryUsage", API_FAILURE); - } -} - -/****************************************************************************************** - * @brief get System Disk Usage - * - * purpose: get System Disk Usage - * - ******************************************************************************************/ -void Oam::getSystemDiskUsage(SystemDisk& systemdisk) -{ - SystemModuleTypeConfig systemmoduletypeconfig; - ModuleDisk Moduledisk; - - try - { - Oam::getSystemConfig(systemmoduletypeconfig); - } - catch (...) - { - exceptionControl("getSystemMemoryUsage", API_FAILURE); - } - - for ( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++) - { - if ( systemmoduletypeconfig.moduletypeconfig[i].ModuleType.empty() ) - //end of file - break; - - if ( systemmoduletypeconfig.moduletypeconfig[i].ModuleCount == 0 ) - continue; - - DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end() ; pt++) - { - string moduleName = (*pt).DeviceName; - - try - { - getModuleDiskUsage(moduleName, Moduledisk); - - systemdisk.moduledisk.push_back(Moduledisk); - } - catch (exception&) - { - } - } - } -} - -/****************************************************************************************** - * @brief get Module Disk Usage - * - * purpose: get Module Disk Usage - * - ******************************************************************************************/ -void Oam::getModuleDiskUsage(const std::string module, ModuleDisk& moduledisk) -{ - ByteStream msg; - ByteStream receivedMSG; - string processName; - DiskUsage Diskusage; - moduledisk.diskusage.clear(); - - // validate Module name - if ( module.find("xm") != string::npos ) - exceptionControl("getModuleDiskUsage", API_INVALID_PARAMETER); - - // validate Module name - int returnStatus = validateModule(module); - - if (returnStatus != API_SUCCESS) - exceptionControl("getModuleDiskUsage", returnStatus); - - ByteStream::byte entries; - string deviceName; - uint64_t totalBlocks; - uint64_t usedBlocks; - uint8_t diskUsage; - - // setup message - msg << (ByteStream::byte) GET_MODULE_DISK_USAGE; - - moduledisk.ModuleName = module; - - try - { - //send the msg to Server Monitor - MessageQueueClient servermonitor(module + "_ServerMonitor"); - servermonitor.write(msg); - - // wait 30 seconds for ACK from Server Monitor - struct timespec ts = { 30, 0 }; - - receivedMSG = servermonitor.read(&ts); - - if (receivedMSG.length() > 0) - { - receivedMSG >> entries; - - for ( int i = 0 ; i < entries ; i++) - { - receivedMSG >> deviceName; - receivedMSG >> totalBlocks; - receivedMSG >> usedBlocks; - receivedMSG >> diskUsage; - - Diskusage.DeviceName = deviceName; - Diskusage.TotalBlocks = totalBlocks; - Diskusage.UsedBlocks = usedBlocks; - Diskusage.DiskUsage = diskUsage; - - moduledisk.diskusage.push_back(Diskusage); - } - } - else // timeout - exceptionControl("getModuleDiskUsage", API_TIMEOUT); - - // shutdown connection - servermonitor.shutdown(); - } - catch (...) - { - exceptionControl("getModuleDiskUsage", API_FAILURE); - } -} - -/****************************************************************************************** - * @brief get Active SQL Statements - * - * purpose: get Active SQL Statements - * - ******************************************************************************************/ -void Oam::getActiveSQLStatements(ActiveSqlStatements& activesqlstatements) -{ - SystemModuleTypeConfig systemmoduletypeconfig; - ByteStream msg; - ByteStream receivedMSG; - ByteStream::byte entries; - ByteStream::byte retStatus; - - try - { - Oam::getSystemConfig(systemmoduletypeconfig); - - // get Server Type Install ID - int serverTypeInstall = oam::INSTALL_NORMAL; - oamModuleInfo_t st; - st = getModuleInfo(); - serverTypeInstall = boost::get<5>(st); - - string sendModule; - - switch (serverTypeInstall) - { - case oam::INSTALL_NORMAL: - case oam::INSTALL_COMBINE_DM_UM: - sendModule = "um"; - break; - - case oam::INSTALL_COMBINE_PM_UM: - case oam::INSTALL_COMBINE_DM_UM_PM: - sendModule = "pm"; - break; - } - - //send request to modules - for ( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++) - { - if ( systemmoduletypeconfig.moduletypeconfig[i].ModuleType.empty() ) - //end of file - break; - - if ( systemmoduletypeconfig.moduletypeconfig[i].ModuleType == sendModule ) - { - if ( systemmoduletypeconfig.moduletypeconfig[i].ModuleCount == 0 ) - break; - - DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end() ; pt++) - { - string module = (*pt).DeviceName; - - // setup message - msg << (ByteStream::byte) GET_ACTIVE_SQL_QUERY; - - //send the msg to Server Monitor - MessageQueueClient servermonitor(module + "_ServerMonitor"); - servermonitor.write(msg); - - // wait 30 seconds for ACK from Server Monitor - struct timespec ts = { 30, 0 }; - - receivedMSG = servermonitor.read(&ts); - - if (receivedMSG.length() > 0) - { - receivedMSG >> retStatus; - - if ( retStatus != oam::API_SUCCESS ) - { - // shutdown connection - servermonitor.shutdown(); - exceptionControl("getActiveSQLStatements", (int) retStatus); - } - - receivedMSG >> entries; - ActiveSqlStatement activeSqlStatement; - - for (int i = 0; i < entries; i++) - { - receivedMSG >> activeSqlStatement.sqlstatement; - receivedMSG >> activeSqlStatement.starttime; - receivedMSG >> activeSqlStatement.sessionid; - activesqlstatements.push_back(activeSqlStatement); - } - } - else - { - // timeout - exceptionControl("getActiveSQLStatements", API_TIMEOUT); - } - - // shutdown connection - servermonitor.shutdown(); - } - - break; - } - } - } - catch (std::exception& ex) - { - exceptionControl("getActiveSQLStatements", API_FAILURE, ex.what()); - } - catch (...) - { - exceptionControl("getActiveSQLStatements", API_FAILURE); - } -} - -/******************************************************************** - * - * IsValidIP - Validate IP Address format - * - ********************************************************************/ -bool Oam::isValidIP(const std::string ipAddress) -{ - int currentPos = 0; - - for ( int i = 0 ; i < 4 ; i++) - { - string::size_type pos = ipAddress.find(".", currentPos); - - if (pos != string::npos) - { - if ( (pos - currentPos) > 3 || (pos - currentPos) <= 0) - return false; - - currentPos = pos + 1; - } - else - { - if ( i < 3 ) - return false; - - if ( (ipAddress.size() - currentPos) > 3 || (ipAddress.size() - currentPos) <= 0) - return false; - else - return true; - } - } - - return false; -} - - -/******************************************************************** - * - * incrementIPAddress - Increment IP Address - * - ********************************************************************/ -std::string Oam::incrementIPAddress(const std::string ipAddress) -{ - string newipAddress = ipAddress; - string::size_type pos = ipAddress.rfind(".", 80); - - if (pos != string::npos) - { - string last = ipAddress.substr(pos + 1, 80); - int Ilast = atoi(last.c_str()); - Ilast++; - - if ( Ilast > 255 ) - { - writeLog("incrementIPAddress: new address invalid, larger than 255", LOG_TYPE_ERROR ); - exceptionControl("incrementIPAddress", API_FAILURE); - } - - last = itoa(Ilast); - newipAddress = ipAddress.substr(0, pos + 1); - newipAddress = newipAddress + last; - } - else - { - writeLog("incrementIPAddress: passed address invalid: " + ipAddress, LOG_TYPE_ERROR ); - exceptionControl("incrementIPAddress", API_FAILURE); - } - - return newipAddress; -} - -/******************************************************************** - * - * checkLogStatus - Check for a phrase in a log file and return status - * - ********************************************************************/ -bool Oam::checkLogStatus(std::string fileName, std::string phrase ) -{ - ifstream file (fileName.c_str()); - - if (!file.is_open()) - { - return false; - } - - string buf; - - while (getline(file, buf)) - { - string::size_type pos = buf.find(phrase, 0); - - if (pos != string::npos) - //found phrase - return true; - } - - if (file.bad()) - { - return false; - } - - file.close(); - return false; -} - -/******************************************************************** - * - * fixRSAkey - Fix RSA key - * - ********************************************************************/ -void Oam::fixRSAkey(std::string logFile) -{ - ifstream file (logFile.c_str()); - - char line[400]; - string buf; - - while (file.getline(line, 400)) - { - buf = line; - - string::size_type pos = buf.find("Offending", 0); - - if (pos != string::npos) - { - // line ID - pos = buf.find(":", 0); - string lineID = buf.substr(pos + 1, 80); - - //remove non alphanumber characters - for (size_t i = 0; i < lineID.length();) - { - if (!isdigit(lineID[i])) - lineID.erase(i, 1); - else - i++; - } - - //get user - string USER = "root"; - char* p = getenv("USER"); - - if (p && *p) - USER = p; - - string userDir = USER; - - if ( USER != "root") - userDir = "home/" + USER; - - string cmd = "sed '" + lineID + "d' /" + userDir + "/.ssh/known_hosts > /" + userDir + "/.ssh/known_hosts"; - cout << cmd << endl; - system(cmd.c_str()); - return; - } - - } - - file.close(); - - return; -} - -/******************************************************************** - * - * getWritablePM - Get PM with read-write mount - * - ********************************************************************/ -string Oam::getWritablePM() -{ - string moduleName; - oamModuleInfo_t st; - - try - { - st = getModuleInfo(); - moduleName = boost::get<3>(st); - - if ( moduleName == oam::UnassignedName ) - return ""; - - return moduleName; - } - catch (...) - { - exceptionControl("getWritablePM", API_FAILURE); - } - - return ""; -} - -/******************************************************************** - * - * getHotStandbyPM - * - ********************************************************************/ -string Oam::getHotStandbyPM() -{ - string fileName = "/var/lib/columnstore/local/hotStandbyPM"; - string module; - - ifstream oldFile (fileName.c_str()); - - if (!oldFile) - return module; - - char line[400]; - - while (oldFile.getline(line, 400)) - { - module = line; - break; - } - - oldFile.close(); - - return module; - -} - -/******************************************************************** - * - * setHotStandbyPM - * - ********************************************************************/ -void Oam::setHotStandbyPM(std::string moduleName) -{ - string fileName = "/var/lib/columnstore/local/hotStandbyPM"; - - unlink (fileName.c_str()); - - if ( moduleName.empty() || moduleName == " " ) - return; - - ofstream newFile (fileName.c_str()); - - string cmd = "echo " + moduleName + " > " + fileName; - (void)system(cmd.c_str()); - - newFile.close(); - - return; -} - -/******************************************************************** - * - * Distribute Calpont Configure File - * - ********************************************************************/ -void Oam::distributeConfigFile(std::string name, std::string file) -{ - ACK_FLAG ackflag = oam::ACK_YES; - - if ( name == "system" ) - ackflag = oam::ACK_NO; - - // build and send msg - int returnStatus = sendMsgToProcMgr(DISTRIBUTECONFIG, name, oam::FORCEFUL, ackflag, file, "", 30); - - if (returnStatus != API_SUCCESS) - exceptionControl("distributeConfigFile", returnStatus); - - return; -} - -/******************************************************************** - * - * Switch Parent OAM Module - * - ********************************************************************/ -bool Oam::switchParentOAMModule(std::string moduleName, GRACEFUL_FLAG gracefulflag) -{ - if (!checkSystemRunning()) - exceptionControl("switchParentOAMModule", API_FAILURE); - - int returnStatus; - // We assume that moduleName is a valid pm - - // check if current Active Parent Process-Manager is down and running on Standby Module - // if so, send signal to Standby Process-Manager to start failover - Config* sysConfig = Config::makeConfig(); - - string IPAddr = sysConfig->getConfig("ProcStatusControl", "IPAddr"); - - string cmdLine = "ping "; - string cmdOption = " -w 1 >> /dev/null"; - string cmd = cmdLine + IPAddr + cmdOption; - - if ( system(cmd.c_str()) != 0 ) - { - //ping failure - try - { - string standbyOAMModule; - getSystemConfig("StandbyOAMModuleName", standbyOAMModule); - - oamModuleInfo_t t = Oam::getModuleInfo(); - string localModule = boost::get<0>(t); - - if (standbyOAMModule == localModule ) - // send SIGUSR1 - system("pkill -SIGUSR1 ProcMgr"); - } - catch (...) - { - exceptionControl("switchParentOAMModule", API_FAILURE); - } - - return false; - } - - // only make call if system is ACTIVE and module switching to is ACTIVE - SystemStatus systemstatus; - - try - { - getSystemStatus(systemstatus); - } - catch (exception& ) - {} - - if (systemstatus.SystemOpState == oam::MAN_INIT || - systemstatus.SystemOpState == oam::AUTO_INIT || - systemstatus.SystemOpState == oam::UP || - systemstatus.SystemOpState == oam::BUSY_INIT || - systemstatus.SystemOpState == oam::UP ) - exceptionControl("switchParentOAMModule", API_INVALID_STATE); - - if (systemstatus.SystemOpState == oam::ACTIVE || - systemstatus.SystemOpState == oam::FAILED ) - { - // build and send msg to stop system - returnStatus = sendMsgToProcMgrWithStatus(STOPSYSTEM, "OAM Module switched", gracefulflag, ACK_YES); - - if ( returnStatus != API_SUCCESS ) - exceptionControl("stopSystem", returnStatus); - } - - // build and send msg to switch configuration - cout << endl << " Switch Active Parent OAM to Module '" << moduleName << "', please wait..."; - returnStatus = sendMsgToProcMgr(SWITCHOAMPARENT, moduleName, FORCEFUL, ACK_YES); - - if (returnStatus != API_SUCCESS) - exceptionControl("switchParentOAMModule", returnStatus); - - if (systemstatus.SystemOpState == oam::ACTIVE || - systemstatus.SystemOpState == oam::FAILED ) - { - //give time for ProcMon/ProcMgr to get fully active on new pm - sleep(10); - - // build and send msg to restart system - returnStatus = sendMsgToProcMgr(RESTARTSYSTEM, "", FORCEFUL, ACK_YES); - - if (returnStatus != API_SUCCESS) - exceptionControl("startSystem", returnStatus); - - return true; // Caller should wait for system to come up. - } - - return false; // Caller should not wait for system to come up. -} - /******************************************************************** * * Get Storage Config Data @@ -5332,27 +733,6 @@ void Oam::getDbrootPmConfig(const int dbrootid, int& pmid) exceptionControl("getDbrootPmConfig", API_INVALID_PARAMETER); } -/******************************************************************** - * - * Get DBRoot - PM Config data - * - ********************************************************************/ -void Oam::getDbrootPmConfig(const int dbrootid, std::string& pmid) -{ - try - { - int PMid; - getDbrootPmConfig(dbrootid, PMid); - pmid = itoa(PMid); - return; - } - catch (exception& ) - {} - - // dbrootid not found, return with error - exceptionControl("getDbrootPmConfig", API_INVALID_PARAMETER); -} - /******************************************************************** * * Get System DBRoot Config data @@ -5405,2814 +785,6 @@ void Oam::getSystemDbrootConfig(DBRootConfigList& dbrootconfiglist) return; } -/******************************************************************** - * - * Set PM - DBRoot Config data - * - ********************************************************************/ -void Oam::setPmDbrootConfig(const int pmid, DBRootConfigList& dbrootconfiglist) -{ - ModuleConfig moduleconfig; - - string module = "pm" + itoa(pmid); - - try - { - getSystemConfig(module, moduleconfig); - - moduleconfig.dbrootConfigList = dbrootconfiglist; - - try - { - setSystemConfig(module, moduleconfig); - return; - } - catch (...) - { -// writeLog("ERROR: setSystemConfig api failure for " + module , LOG_TYPE_ERROR ); -// cout << endl << "ERROR: setSystemConfig api failure for " + module << endl; - exceptionControl("getSystemDbrootConfig", API_INVALID_PARAMETER); - } - } - catch (...) - { -// writeLog("ERROR: getSystemConfig api failure for " + module , LOG_TYPE_ERROR ); -// cout << endl << "ERROR: getSystemConfig api failure for " + module << endl; - exceptionControl("getSystemDbrootConfig", API_INVALID_PARAMETER); - } - - //set System DBRoot Count - try - { - setSystemDBrootCount(); - } - catch (exception& ) - { - cout << endl << "**** setSystemDBrootCount Failed" << endl; - exceptionControl("assignPmDbrootConfig", API_FAILURE); - } -} - -/******************************************************************** - * - * Manual Move PM - DBRoot data - * - ********************************************************************/ -void Oam::manualMovePmDbroot(std::string residePM, std::string dbrootIDs, std::string toPM) -{ - typedef std::vector dbrootList; - dbrootList dbrootlist; - dbrootList tempdbrootlist; - - writeLog("manualMovePmDbroot: " + dbrootIDs + " from " + residePM + " to " + toPM, LOG_TYPE_DEBUG ); - - string DataRedundancyConfig = "n"; - - try - { - getSystemConfig( "DataRedundancyConfig", DataRedundancyConfig); - } - catch (...) - { - DataRedundancyConfig = "n"; - } - - boost::char_separator sep(", "); - boost::tokenizer< boost::char_separator > tokens(dbrootIDs, sep); - - for ( boost::tokenizer< boost::char_separator >::iterator it = tokens.begin(); - it != tokens.end(); - ++it) - { - //if gluster, check if there are copies on the to-pm - if ( DataRedundancyConfig == "y") - { - string pmList = ""; - - try - { - string errmsg; - int ret = glusterctl(oam::GLUSTER_WHOHAS, *it, pmList, errmsg); - - if ( ret != 0 ) - { - writeLog("ERROR: glusterctl failure getting pm list for dbroot " + *it + " , error: " + errmsg, LOG_TYPE_ERROR ); - exceptionControl("autoMovePmDbroot", API_INVALID_PARAMETER); - } - } - catch (exception& ) - { - writeLog("ERROR: glusterctl failure getting pm list for dbroot " + *it, LOG_TYPE_ERROR ); - exceptionControl("autoMovePmDbroot", API_INVALID_PARAMETER); - } - catch (...) - { - writeLog("ERROR: glusterctl failure getting pm list for dbroot " + *it, LOG_TYPE_ERROR ); - exceptionControl("autoMovePmDbroot", API_INVALID_PARAMETER); - } - - boost::char_separator sep(" "); - boost::tokenizer< boost::char_separator > tokens(pmList, sep); - - for ( boost::tokenizer< boost::char_separator >::iterator it1 = tokens.begin(); - it1 != tokens.end(); - ++it1) - { - string pmModule = "pm" + *it1; - - if ( pmModule == toPM ) - { - dbrootlist.push_back(*it); - tempdbrootlist.push_back(*it); - } - } - - if ( dbrootlist.size() == 0 ) - { - writeLog("ERROR: No DBROOTs to move to-pm, no gluster copies", LOG_TYPE_ERROR ); - exceptionControl("autoMovePmDbroot", API_INVALID_PARAMETER); - } - } - else - { - dbrootlist.push_back(*it); - tempdbrootlist.push_back(*it); - } - } - - string residePMID = residePM.substr(MAX_MODULE_TYPE_SIZE, MAX_MODULE_ID_SIZE);; - string toPMID = toPM.substr(MAX_MODULE_TYPE_SIZE, MAX_MODULE_ID_SIZE);; - - //get dbroots ids for reside PM - DBRootConfigList residedbrootConfigList; - - try - { - getPmDbrootConfig(atoi(residePMID.c_str()), residedbrootConfigList); - - DBRootConfigList::iterator pt = residedbrootConfigList.begin(); - - for ( ; pt != residedbrootConfigList.end() ; pt++) - { - //check if entered dbroot id is in residing pm - dbrootList::iterator pt1 = tempdbrootlist.begin(); - - for ( ; pt1 != tempdbrootlist.end() ; pt1++) - { - if ( itoa(*pt) == *pt1 ) - { - tempdbrootlist.erase(pt1); - break; - } - } - } - - if ( !tempdbrootlist.empty() ) - { - // there is a entered dbroot id not in the residing pm - writeLog("ERROR: dbroot IDs not assigned to " + residePM, LOG_TYPE_ERROR ); - cout << endl << "ERROR: these dbroot IDs not assigned to '" << residePM << "' : "; - dbrootList::iterator pt1 = tempdbrootlist.begin(); - - for ( ; pt1 != tempdbrootlist.end() ;) - { - cout << *pt1; - pt1++; - - if (pt1 != tempdbrootlist.end()) - cout << ", "; - } - - cout << endl << endl; - exceptionControl("manualMovePmDbroot", API_FAILURE); - } - } - catch (exception& ) - { - writeLog("ERROR: getPmDbrootConfig api failure for pm" + residePMID, LOG_TYPE_ERROR ); - cout << endl << "ERROR: getPmDbrootConfig api failure for pm" + residePMID << endl; - exceptionControl("manualMovePmDbroot", API_FAILURE); - } - - //get dbroots ids for reside PM - DBRootConfigList todbrootConfigList; - - try - { - getPmDbrootConfig(atoi(toPMID.c_str()), todbrootConfigList); - } - catch (exception& ) - { - writeLog("ERROR: getPmDbrootConfig api failure for pm" + toPMID, LOG_TYPE_ERROR ); - cout << endl << "ERROR: getPmDbrootConfig api failure for pm" + toPMID << endl; - exceptionControl("manualMovePmDbroot", API_FAILURE); - } - - //remove entered dbroot IDs from reside PM list - dbrootList::iterator pt1 = dbrootlist.begin(); - - for ( ; pt1 != dbrootlist.end() ; pt1++) - { - DBRootConfigList::iterator pt2 = residedbrootConfigList.begin(); - - for ( ; pt2 != residedbrootConfigList.end() ; pt2++) - { - if ( itoa(*pt2) == *pt1 ) - { - - dbrootList dbroot1; - dbroot1.push_back(*pt1); - bool returnDbRoot = false; - - //send msg to unmount dbroot if module is not offline - int opState; - bool degraded; - - try - { - getModuleStatus(residePM, opState, degraded); - } - catch (...) - {} - - if (opState != oam::AUTO_OFFLINE || opState != oam::AUTO_DISABLED) - { - try - { - mountDBRoot(dbroot1, false); - } - catch (exception& ) - { - writeLog("ERROR: dbroot failed to unmount", LOG_TYPE_ERROR ); - cout << endl << "ERROR: umountDBRoot api failure" << endl; - exceptionControl("manualMovePmDbroot", API_FAILURE); - } - } - - //check for amazon moving required - try - { - amazonReattach(toPM, dbroot1); - } - catch (exception& ) - { - writeLog("ERROR: amazonReattach api failure", LOG_TYPE_ERROR ); - cout << endl << "ERROR: amazonReattach api failure" << endl; - exceptionControl("manualMovePmDbroot", API_FAILURE); - } - - //if Gluster, do the assign command - if ( DataRedundancyConfig == "y") - { - try - { - string errmsg; - int ret = glusterctl(oam::GLUSTER_ASSIGN, *pt1, toPM, errmsg); - if ( ret == 0 ) - { - todbrootConfigList.push_back(*pt2); - residedbrootConfigList.erase(pt2); - } - else - { - cerr << "FAILURE: Error assigning gluster dbroot# " + *pt1 + " to pm" + toPMID + ", error: " + errmsg << endl; - writeLog("FAILURE: Error assigning gluster dbroot# " + *pt1 + " to pm" + toPMID + ", error: " + errmsg, LOG_TYPE_ERROR ); - returnDbRoot = true; - } - } - catch (exception& e) - { - cout << endl << "**** glusterctl API exception: " << e.what() << endl; - cerr << "FAILURE: Error assigning gluster dbroot# " + *pt1 + " to pm" + toPMID << endl; - writeLog("FAILURE: Error assigning gluster dbroot# " + *pt1 + " to pm" + toPMID, LOG_TYPE_ERROR ); - returnDbRoot = true; - } - catch (...) - { - cout << endl << "**** glusterctl API exception: UNKNOWN" << endl; - cerr << "FAILURE: Error assigning gluster dbroot# " + *pt1 + " to pm" + toPMID << endl; - writeLog("FAILURE: Error assigning gluster dbroot# " + *pt1 + " to pm" + toPMID, LOG_TYPE_ERROR ); - returnDbRoot = true; - } - } - - if (returnDbRoot) - { - // something went wrong return it back to original owner - try - { - string errmsg; - writeLog("reassigning gluster dbroot# " + *pt1 + " to pm" + residePMID, LOG_TYPE_ERROR ); - int ret = glusterctl(oam::GLUSTER_ASSIGN, *pt1, residePM, errmsg); - if ( ret != 0 ) - { - cerr << "FAILURE: Error reassigning gluster dbroot# " + *pt1 + " to pm" + residePMID + ", error: " + errmsg << endl; - writeLog("FAILURE: Error reassigning gluster dbroot# " + *pt1 + " to pm" + residePMID + ", error: " + errmsg, LOG_TYPE_ERROR ); - exceptionControl("manualMovePmDbroot", API_INVALID_STATE); - } - mountDBRoot(dbroot1); - //get updated Columnstore.xml distributed - distributeConfigFile("system"); - return; - } - catch (exception& e) - { - cout << endl << "**** glusterctl API exception: " << e.what() << endl; - cerr << "FAILURE: Error reassigning gluster dbroot# " + *pt1 + " to pm" + residePMID << endl; - writeLog("FAILURE: Error reassigning gluster dbroot# " + *pt1 + " to pm" + residePMID, LOG_TYPE_ERROR ); - exceptionControl("manualMovePmDbroot", API_INVALID_STATE); - } - catch (...) - { - cout << endl << "**** glusterctl API exception: UNKNOWN" << endl; - cerr << "FAILURE: Error reassigning gluster dbroot# " + *pt1 + " to pm" + residePMID << endl; - writeLog("FAILURE: Error reassigning gluster dbroot# " + *pt1 + " to pm" + residePMID, LOG_TYPE_ERROR ); - exceptionControl("manualMovePmDbroot", API_INVALID_STATE); - } - } - break; - } - } - } - - - - //set the 2 pms dbroot config - try - { - setPmDbrootConfig(atoi(residePMID.c_str()), residedbrootConfigList); - } - catch (exception& ) - { - writeLog("ERROR: setPmDbrootConfig api failure for pm" + residePMID, LOG_TYPE_ERROR ); - cout << endl << "ERROR: setPmDbrootConfig api failure for pm" + residePMID << endl; - exceptionControl("manualMovePmDbroot", API_INVALID_STATE); - } - - try - { - setPmDbrootConfig(atoi(toPMID.c_str()), todbrootConfigList); - } - catch (exception& ) - { - writeLog("ERROR: setPmDbrootConfig api failure for pm" + toPMID, LOG_TYPE_ERROR ); - cout << endl << "ERROR: setPmDbrootConfig api failure for pm" + toPMID << endl; - exceptionControl("manualMovePmDbroot", API_INVALID_STATE); - } - - //send msg to mount dbroot - try - { - mountDBRoot(dbrootlist); - } - catch (exception& ) - { - writeLog("ERROR: mountDBRoot api failure", LOG_TYPE_DEBUG ); - cout << endl << "ERROR: mountDBRoot api failure" << endl; - } - - //get updated Columnstore.xml distributed - distributeConfigFile("system"); - - return; - -} - - -bool comparex(const PmDBRootCount_s& x, const PmDBRootCount_s& y) -{ - return x.count < y.count; -} - -/******************************************************************** - * - * Auto Move PM - DBRoot data - * - ********************************************************************/ -bool Oam::autoMovePmDbroot(std::string residePM) -{ - writeLog("autoMovePmDbroot: " + residePM, LOG_TYPE_DEBUG ); - - string DBRootStorageType; - - try - { - getSystemConfig("DBRootStorageType", DBRootStorageType); - } - catch (...) {} - - string DataRedundancyConfig = "n"; - - try - { - getSystemConfig( "DataRedundancyConfig", DataRedundancyConfig); - } - catch (...) - { - DataRedundancyConfig = "n"; - } - - if (DBRootStorageType == "internal" && DataRedundancyConfig == "n") - return 1; - - // get current Module name - string localModuleName; - oamModuleInfo_t st; - - try - { - st = getModuleInfo(); - localModuleName = boost::get<0>(st); - } - catch (...) - {} - - int localPMID = atoi(localModuleName.substr(MAX_MODULE_TYPE_SIZE, MAX_MODULE_ID_SIZE).c_str()); - string localPM = localModuleName; - - int residePMID = atoi(residePM.substr(MAX_MODULE_TYPE_SIZE, MAX_MODULE_ID_SIZE).c_str()); - - //get dbroot ids for reside PM - DBRootConfigList residedbrootConfigList; - - try - { - getPmDbrootConfig(residePMID, residedbrootConfigList); - - if ( residedbrootConfigList.empty() ) - { - writeLog("ERROR: residedbrootConfigList empty", LOG_TYPE_ERROR ); - exceptionControl("autoMovePmDbroot", API_INVALID_PARAMETER); - } - } - catch (...) - { - writeLog("ERROR: getPmDbrootConfig failure", LOG_TYPE_ERROR ); - exceptionControl("autoMovePmDbroot", API_INVALID_PARAMETER); - } - - //detach first to make sure DBS can be detach before trying to move to another pm - DBRootConfigList::iterator pt3 = residedbrootConfigList.begin(); - - for ( ; pt3 != residedbrootConfigList.end() ; pt3++ ) - { - int dbrootID = *pt3; - - try - { - typedef std::vector dbrootList; - dbrootList dbrootlist; - dbrootlist.push_back(itoa(dbrootID)); - - amazonDetach(dbrootlist); - } - catch (exception& ) - { - writeLog("ERROR: amazonDetach failure", LOG_TYPE_ERROR ); - - //reattach - typedef std::vector dbrootList; - dbrootList dbrootlist; - dbrootlist.push_back(itoa(dbrootID)); - - amazonAttach(residePM, dbrootlist); - - exceptionControl("autoMovePmDbroot", API_DETACH_FAILURE); - } - } - - //get dbroot id for other PMs - systemStorageInfo_t t; - DeviceDBRootList moduledbrootlist; - - try - { - t = getStorageConfig(); - moduledbrootlist = boost::get<2>(t); - } - catch (exception& ) - { - writeLog("ERROR: getStorageConfig failure", LOG_TYPE_ERROR ); - exceptionControl("autoMovePmDbroot", API_FAILURE); - } - - // get list of dbroot count for each pm - typedef std::vector PMdbrootList; - PMdbrootList pmdbrootList; - PmDBRootCount_s pmdbroot; - - DeviceDBRootList::iterator pt = moduledbrootlist.begin(); - - for ( ; pt != moduledbrootlist.end() ; pt++) - { - // only put pms with dbroots assigned, if 0 then that pm is disabled - if ( (*pt).dbrootConfigList.size() > 0 ) - { - pmdbroot.pmID = (*pt).DeviceID; - pmdbroot.count = (*pt).dbrootConfigList.size(); - pmdbrootList.push_back(pmdbroot); - } - } - - sort ( pmdbrootList.begin(), pmdbrootList.end(), comparex ); - - //clear reside IDs - DBRootConfigList clearresidedbrootConfigList; - - try - { - setPmDbrootConfig(residePMID, clearresidedbrootConfigList); - } - catch (...) - { - writeLog("ERROR: setPmDbrootConfig failure - clear reside ID", LOG_TYPE_ERROR ); - exceptionControl("autoMovePmDbroot", API_FAILURE); - } - - //distribute dbroot IDs to other PMs starting with lowest count - bool exceptionFailure = false; - bool dbroot1 = false; - DBRootConfigList::iterator pt2 = residedbrootConfigList.begin(); - - for ( ; pt2 != residedbrootConfigList.end() ; ) - { - int dbrootID = *pt2; - - //dbroot #1 always get moved to local module - if ( dbrootID == 1 ) - { - dbroot1 = true; - //get dbroot ids for PM - DBRootConfigList todbrootConfigList; - - try - { - getPmDbrootConfig(localPMID, todbrootConfigList); - } - catch (...) - { - writeLog("ERROR: getPmDbrootConfig failure", LOG_TYPE_ERROR ); - exceptionControl("autoMovePmDbroot", API_INVALID_PARAMETER); - } - - //get the first dbroot assigned to this pm, so it can be auto unmoved later instead of dbroot1 - DBRootConfigList::iterator pt = todbrootConfigList.begin(); - int subDBRootID = *pt; - - todbrootConfigList.push_back(dbrootID); - - try - { - setPmDbrootConfig(localPMID, todbrootConfigList); - writeLog("autoMovePmDbroot/setPmDbrootConfig : " + localModuleName + ":" + itoa(dbrootID), LOG_TYPE_DEBUG); - sleep(5); - - //send msg to toPM to mount dbroot - try - { - typedef std::vector dbrootList; - dbrootList dbrootlist; - dbrootlist.push_back(itoa(dbrootID)); - mountDBRoot(dbrootlist); - } - catch (exception& ) - { - writeLog("ERROR: mountDBRoot api failure", LOG_TYPE_DEBUG ); - cout << endl << "ERROR: mountDBRoot api failure" << endl; - } - } - catch (...) - { - writeLog("ERROR: setPmDbrootConfig failure", LOG_TYPE_ERROR ); - exceptionControl("autoMovePmDbroot", API_FAILURE); - } - - if ( DataRedundancyConfig == "y") - { - try - { - string errmsg; - int ret = glusterctl(oam::GLUSTER_ASSIGN, itoa(dbrootID), localPM, errmsg); - - if ( ret != 0 ) - { - writeLog("FAILURE: Error assigning gluster dbroot# " + itoa(dbrootID) +" ret: " + itoa(ret), LOG_TYPE_ERROR ); - } - } - catch (...) - { - writeLog("EXCEPTION FAILURE: Error assigning gluster dbroot# " + itoa(dbrootID), LOG_TYPE_ERROR ); - } - - // check if a copy is available when residePM returns - string pmList = ""; - - try - { - string errmsg; - int ret = glusterctl(oam::GLUSTER_WHOHAS, itoa(subDBRootID), pmList, errmsg); - - if ( ret != 0 ) - { - writeLog("ERROR: glusterctl failure getting pm list for dbroot " + itoa(subDBRootID) + " , error: " + errmsg, LOG_TYPE_ERROR ); - exceptionControl("autoMovePmDbroot", API_INVALID_PARAMETER); - } - } - catch (exception& ) - { - writeLog("ERROR: glusterctl failure getting pm list for dbroot " + itoa(subDBRootID), LOG_TYPE_ERROR ); - exceptionControl("autoMovePmDbroot", API_INVALID_PARAMETER); - } - catch (...) - { - writeLog("ERROR: glusterctl failure getting pm list for dbroot " + itoa(subDBRootID), LOG_TYPE_ERROR ); - exceptionControl("autoMovePmDbroot", API_INVALID_PARAMETER); - } - - bool found = false; - boost::char_separator sep(" "); - boost::tokenizer< boost::char_separator > tokens(pmList, sep); - - for ( boost::tokenizer< boost::char_separator >::iterator it = tokens.begin(); - it != tokens.end(); - ++it) - { - if ( atoi((*it).c_str()) == residePMID ) - { - // found it and can assign the subDBRoot back to residedPM no switch back is needed - found = true; - } - } - - // We will go back to this PM - if (!found) - { - subDBRootID = dbrootID; - } - } - - //store in move dbroot transaction file - string fileName = "/var/lib/columnstore/local/moveDbrootTransactionLog"; - - string cmd = "echo '" + residePM + "|" + localModuleName + "|" + itoa(subDBRootID) + "' >> " + fileName; - system(cmd.c_str()); - writeLog("WRITE1: " + cmd, LOG_TYPE_DEBUG ); - - //check for amazon moving required - try - { - typedef std::vector dbrootList; - dbrootList dbrootlist; - dbrootlist.push_back(itoa(dbrootID)); - - amazonReattach(localModuleName, dbrootlist, true); - } - catch (exception& ) - { - writeLog("ERROR: amazonReattach failure", LOG_TYPE_ERROR ); - exceptionControl("autoMovePmDbroot", API_FAILURE); - } - - pt2++; - - if ( pt2 == residedbrootConfigList.end() ) - break; - } - else - { - //if Gluster, get it's list for DBroot and move to one of those - string toPmID; - - if ( DataRedundancyConfig == "y") - { - string pmList = ""; - - try - { - string errmsg; - int ret = glusterctl(oam::GLUSTER_WHOHAS, itoa(dbrootID), pmList, errmsg); - - if ( ret != 0 ) - { - writeLog("ERROR: glusterctl failure getting pm list for dbroot " + itoa(dbrootID) + " , error: " + errmsg, LOG_TYPE_ERROR ); - exceptionControl("autoMovePmDbroot", API_INVALID_PARAMETER); - } - } - catch (exception& ) - { - writeLog("ERROR: glusterctl failure getting pm list for dbroot " + itoa(dbrootID), LOG_TYPE_ERROR ); - exceptionControl("autoMovePmDbroot", API_INVALID_PARAMETER); - } - catch (...) - { - writeLog("ERROR: glusterctl failure getting pm list for dbroot " + itoa(dbrootID), LOG_TYPE_ERROR ); - exceptionControl("autoMovePmDbroot", API_INVALID_PARAMETER); - } - - bool found = false; - boost::char_separator sep(" "); - boost::tokenizer< boost::char_separator > tokens(pmList, sep); - - for ( boost::tokenizer< boost::char_separator >::iterator it = tokens.begin(); - it != tokens.end(); - ++it) - { - if ( atoi((*it).c_str()) != residePMID ) - { - found = true; - toPmID = *it; - - string toPM = "pm" + toPmID; - - try - { - string errmsg; - int ret = glusterctl(oam::GLUSTER_ASSIGN, itoa(dbrootID), toPM, errmsg); - - if ( ret != 0 ) - { - writeLog("FAILURE: Error assigning gluster dbroot# " + itoa(dbrootID), LOG_TYPE_ERROR ); - } - } - catch (...) - { - writeLog("FAILURE: Error assigning gluster dbroot# " + itoa(dbrootID), LOG_TYPE_ERROR ); - } - - DBRootConfigList todbrootConfigList; - - try - { - getPmDbrootConfig(atoi(toPmID.c_str()), todbrootConfigList); - } - catch (...) - { - writeLog("ERROR: getPmDbrootConfig failure", LOG_TYPE_ERROR ); - exceptionControl("autoMovePmDbroot", API_INVALID_PARAMETER); - } - - todbrootConfigList.push_back(dbrootID); - - try - { - setPmDbrootConfig(atoi(toPmID.c_str()), todbrootConfigList); - writeLog("autoMovePmDbroot/setPmDbrootConfig : " + toPM + ":" + itoa(dbrootID), LOG_TYPE_DEBUG); - sleep(5); - - //send msg to toPM to mount dbroot - try - { - typedef std::vector dbrootList; - dbrootList dbrootlist; - dbrootlist.push_back(itoa(dbrootID)); - - mountDBRoot(dbrootlist); - } - catch (exception& ) - { - writeLog("ERROR: mountDBRoot api failure", LOG_TYPE_DEBUG ); - cout << endl << "ERROR: mountDBRoot api failure" << endl; - } - } - catch (...) - { - writeLog("ERROR: setPmDbrootConfig failure", LOG_TYPE_ERROR ); - exceptionFailure = true; - } - - //store in move dbroot transaction file - string fileName = "/var/lib/columnstore/local/moveDbrootTransactionLog"; - - string cmd = "echo '" + residePM + "|" + toPM + "|" + itoa(dbrootID) + "' >> " + fileName; - system(cmd.c_str()); - writeLog("WRITE2: " + cmd, LOG_TYPE_DEBUG ); - - pt2++; - - if ( pt2 == residedbrootConfigList.end() ) - break; - - dbrootID = *pt2; - } - } - - if (!found) - { - writeLog("ERROR: no available pm found for DBRoot " + itoa(dbrootID), LOG_TYPE_ERROR ); - exceptionControl("autoMovePmDbroot", API_INVALID_PARAMETER); - } - } - else - { - // not gluster, pmdbrootList = available pms for assigning - PMdbrootList::iterator pt1 = pmdbrootList.begin(); - - for ( ; pt1 != pmdbrootList.end() ; pt1++) - { - //if dbroot1 was moved, skip local module the first time through - if ( dbroot1 ) - { - if ( (*pt1).pmID == localPMID ) - { - dbroot1 = false; - continue; - } - } - - if ( (*pt1).pmID != residePMID ) - { - - string toPM = "pm" + itoa((*pt1).pmID); - - //get dbroot ids for PM - DBRootConfigList todbrootConfigList; - - try - { - getPmDbrootConfig((*pt1).pmID, todbrootConfigList); - } - catch (...) - { - writeLog("ERROR: getPmDbrootConfig failure", LOG_TYPE_ERROR ); - exceptionControl("autoMovePmDbroot", API_INVALID_PARAMETER); - } - - todbrootConfigList.push_back(dbrootID); - - //check for amazon moving required - try - { - typedef std::vector dbrootList; - dbrootList dbrootlist; - dbrootlist.push_back(itoa(dbrootID)); - - amazonReattach(toPM, dbrootlist, true); - } - catch (exception& ) - { - writeLog("ERROR: amazonReattach failure", LOG_TYPE_ERROR ); - exceptionFailure = true; - } - - try - { - setPmDbrootConfig((*pt1).pmID, todbrootConfigList); - writeLog("autoMovePmDbroot/setPmDbrootConfig : " + toPM + ":" + itoa(dbrootID), LOG_TYPE_DEBUG); - sleep(5); - - //send msg to toPM to mount dbroot - try - { - typedef std::vector dbrootList; - dbrootList dbrootlist; - dbrootlist.push_back(itoa(dbrootID)); - - mountDBRoot(dbrootlist); - } - catch (exception& ) - { - writeLog("ERROR: mountDBRoot api failure", LOG_TYPE_DEBUG ); - cout << endl << "ERROR: mountDBRoot api failure" << endl; - } - } - catch (...) - { - writeLog("ERROR: setPmDbrootConfig failure", LOG_TYPE_ERROR ); - exceptionFailure = true; - } - - //store in move dbroot transaction file - string fileName = "/var/lib/columnstore/local/moveDbrootTransactionLog"; - - string cmd = "echo '" + residePM + "|" + toPM + "|" + itoa(dbrootID) + "' >> " + fileName; - system(cmd.c_str()); - writeLog("WRITE3: " + cmd, LOG_TYPE_DEBUG ); - - pt2++; - - if ( pt2 == residedbrootConfigList.end() ) - break; - - dbrootID = *pt2; - } - } - } - } - } - - if (exceptionFailure) - exceptionControl("autoMovePmDbroot", API_FAILURE); - - return 0; - -} - -/******************************************************************** - * - * Auto Move PM - DBRoot data - * - ********************************************************************/ -bool Oam::autoUnMovePmDbroot(std::string toPM) -{ - writeLog("autoUnMovePmDbroot: " + toPM, LOG_TYPE_DEBUG ); - - string residePM; - string fromPM; - string dbrootIDs; - - string DBRootStorageType; - - try - { - getSystemConfig("DBRootStorageType", DBRootStorageType); - } - catch (...) {} - - string DataRedundancyConfig = "n"; - - try - { - getSystemConfig( "DataRedundancyConfig", DataRedundancyConfig); - } - catch (...) - { - DataRedundancyConfig = "n"; - } - - if (DBRootStorageType == "internal" && DataRedundancyConfig == "n") - return 1; - - //store in move dbroot transaction file - string fileName = "/var/lib/columnstore/local/moveDbrootTransactionLog"; - - ifstream oldFile (fileName.c_str()); - - if (!oldFile) - { - ofstream newFile (fileName.c_str()); - int fd = open(fileName.c_str(), O_RDWR | O_CREAT, 0664); - newFile.close(); - close(fd); - } - - vector lines; - char line[200]; - string buf; - string newLine; - bool found = false; - - while (oldFile.getline(line, 200)) - { - buf = line; - writeLog("READ: " + buf, LOG_TYPE_DEBUG ); - string::size_type pos = buf.find("|", 0); - - if (pos != string::npos) - { - residePM = buf.substr(0, pos); - - if ( residePM == toPM ) - { - string::size_type pos1 = buf.find("|", pos + 1); - - if (pos1 != string::npos) - { - fromPM = buf.substr(pos + 1, pos1 - pos - 1); - dbrootIDs = buf.substr(pos1 + 1, 80); - found = true; - - try - { - manualMovePmDbroot(fromPM, dbrootIDs, toPM); - writeLog("autoUnMovePmDbroot/manualMovePmDbroot : " + fromPM + ":" + dbrootIDs + ":" + toPM, LOG_TYPE_DEBUG); - } - catch (...) - { - writeLog("ERROR: manualMovePmDbroot failure: " + fromPM + ":" + dbrootIDs + ":" + toPM, LOG_TYPE_ERROR ); - cout << "ERROR: manualMovePmDbroot failure" << endl; - exceptionControl("autoUnMovePmDbroot", API_FAILURE); - } - } - } - else - lines.push_back(buf); - } - } - - if (!found) - { - writeLog("No dbroots found in /var/lib/columnstore/moveDbrootTransactionLog", LOG_TYPE_DEBUG ); - - cout << "No dbroots found in " << fileName << endl; - } - - oldFile.close(); - unlink (fileName.c_str()); - ofstream newFile (fileName.c_str()); - -//create new file - int fd = open(fileName.c_str(), O_RDWR | O_CREAT, 0664); - - copy(lines.begin(), lines.end(), ostream_iterator(newFile, "\n")); - newFile.close(); - - close(fd); - - return 0; -} - -/*************************************************************************** - * - * Function: addUMdisk - * - * Purpose: add UM disk - * - ****************************************************************************/ - -void Oam::addUMdisk(const int moduleID, std::string& volumeName, std::string& device, string EBSsize) -{ - string UMVolumeSize = "10"; - - try - { - getSystemConfig("UMVolumeSize", UMVolumeSize); - } - catch (...) {} - - writeLog("addUMdisk - Create new Volume for um" + itoa(moduleID), LOG_TYPE_DEBUG); - - cout << " Create AWS Volume for UM #" << itoa(moduleID) << endl; - - int retry = 0; - - for ( ; retry < 5 ; retry++ ) - { - volumeName = createEC2Volume(UMVolumeSize, "um"); - - if ( volumeName == "failed" || volumeName.empty() ) - retry = retry; - else - break; - } - - if ( retry >= 5 ) - { - cout << " *** ERROR: Failed to create a Volume for um1 " << moduleID << endl; - exceptionControl("addUMdisk", API_FAILURE); - } - - //attach and format volumes - device = "/dev/xvdf"; - - string localInstance = getEC2LocalInstance(); - - //attach volumes to local instance - writeLog("addUMdisk - Attach new Volume to local instance: " + volumeName, LOG_TYPE_DEBUG); - - retry = 0; - - for ( ; retry < 5 ; retry++ ) - { - if (!attachEC2Volume(volumeName, device, localInstance)) - detachEC2Volume(volumeName); - else - break; - } - - if ( retry >= 5 ) - { - cout << " *** ERROR: Volume " << volumeName << " failed to attach to local instance" << endl; - exceptionControl("addUMdisk", API_FAILURE); - } - - //format attached volume - writeLog("addUMdisk - Format new Volume for: " + volumeName, LOG_TYPE_DEBUG); - cout << " Formatting disk for UM #" << itoa(moduleID) << ", please wait..." << endl; - - string cmd; - int user; - user = getuid(); - - if ( user == 0 ) - cmd = "mkfs.ext2 -F " + device + " > " + tmpdir + "/format.log 2>&1"; - else - cmd = "sudo mkfs.ext2 -F " + device + " > " + tmpdir + "/format.log 2>&1"; - - system(cmd.c_str()); - - //detach volume - writeLog("addUMdisk - detach new Volume from local instance: " + volumeName, LOG_TYPE_DEBUG); - - if (!detachEC2Volume(volumeName)) - { - exceptionControl("addUMdisk", API_FAILURE); - } - - // add instance tag - string AmazonAutoTagging; - string systemName; - - try - { - getSystemConfig("AmazonAutoTagging", AmazonAutoTagging); - getSystemConfig("SystemName", systemName); - } - catch (...) {} - - if ( AmazonAutoTagging == "y" ) - { - string tagValue = systemName + "-um" + itoa(moduleID); - createEC2tag( volumeName, "Name", tagValue ); - } -} - -/*************************************************************************** - * - * Function: addDbroot - * - * Purpose: add DBRoot - * - ****************************************************************************/ - -void Oam::addDbroot(const int dbrootNumber, DBRootConfigList& dbrootlist, string EBSsize) -{ - int SystemDBRootCount = 0; - string cloud; - string DBRootStorageType; - string volumeSize; - Config* sysConfig = Config::makeConfig(CalpontConfigFile.c_str()); - string Section = "SystemConfig"; - - try - { - getSystemConfig("DBRootCount", SystemDBRootCount); - getSystemConfig("Cloud", cloud); - getSystemConfig("DBRootStorageType", DBRootStorageType); - getSystemConfig("PMVolumeSize", volumeSize); - } - catch (...) {} - - int newSystemDBRootCount = SystemDBRootCount + dbrootNumber; - - if ( newSystemDBRootCount > MAX_DBROOT ) - { - cout << "ERROR: Failed add, total Number of DBRoots would be over maximum of " << MAX_DBROOT << endl; - exceptionControl("addDbroot", API_INVALID_PARAMETER); - } - - if ( (cloud == "amazon-ec2" || cloud == "amazon-vpc") && - DBRootStorageType == "external" ) - { - if ( volumeSize == oam::UnassignedName ) - { - if ( EBSsize != oam::UnassignedName ) - { - volumeSize = EBSsize; - setSystemConfig("PMVolumeSize", volumeSize); - } - } - else - { - if ( EBSsize != oam::UnassignedName ) - { - volumeSize = EBSsize; - } - } - - if ( newSystemDBRootCount > MAX_DBROOT_AMAZON ) - { - cout << "ERROR: Failed add, total Number of DBRoots would be over maximum of " << MAX_DBROOT_AMAZON << endl; - exceptionControl("addDbroot", API_INVALID_PARAMETER); - } - } - - //get assigned DBRoots IDs - DBRootConfigList dbrootConfigList; - - try - { - getSystemDbrootConfig(dbrootConfigList); - } - catch (...) {} - - //get unassigned DBRoots IDs - DBRootConfigList undbrootlist; - - try - { - getUnassignedDbroot(undbrootlist); - } - catch (...) {} - - //combined list - DBRootConfigList::iterator pt1 = undbrootlist.begin(); - - for ( ; pt1 != undbrootlist.end() ; pt1++) - { - dbrootConfigList.push_back(*pt1); - } - - if ( dbrootlist.empty() ) - { - int newID = 1; - - for ( int count = 0 ; count < dbrootNumber ; count++ ) - { - //check for match - while (true) - { - bool found = false; - DBRootConfigList::iterator pt = dbrootConfigList.begin(); - - for ( ; pt != dbrootConfigList.end() ; pt++) - { - if ( newID == *pt ) - { - newID++; - found = true; - break; - } - } - - if (!found) - { - dbrootlist.push_back(newID); - newID++; - break; - } - } - } - } - - if ( dbrootlist.empty() ) - { - cout << "ERROR: Failed add, No DBRoot IDs available" << endl; - exceptionControl("addDbroot", API_INVALID_PARAMETER); - } - - //if amazon cloud with external volumes, create AWS volumes - if ( (cloud == "amazon-ec2" || cloud == "amazon-vpc") && - DBRootStorageType == "external" ) - { - //get local instance name (pm1) - string localInstance = getEC2LocalInstance(); - - if ( localInstance == "failed" || localInstance.empty() || localInstance == "") - { - cout << endl << "ERROR: Failed to get Instance ID" << endl; - exceptionControl("addDbroot", API_INVALID_PARAMETER); - } - - string Section = "Installation"; - - DBRootConfigList::iterator pt1 = dbrootlist.begin(); - - for ( ; pt1 != dbrootlist.end() ; pt1++) - { - cout << " Create AWS Volume for DBRoot #" << itoa(*pt1) << endl; - //create volume - string volumeName; - int retry = 0; - - for ( ; retry < 5 ; retry++ ) - { - volumeName = createEC2Volume(volumeSize, "pm"); - - if ( volumeName == "failed" || volumeName.empty() ) - retry = retry; - else - break; - } - - if ( retry >= 5 ) - { - cout << " *** ERROR: Failed to create a Volume for dbroot " << *pt1 << endl; - exceptionControl("addDbroot", API_FAILURE); - } - - string autoTagging; - string systemName; - - try - { - getSystemConfig("AmazonAutoTagging", autoTagging); - getSystemConfig("SystemName", systemName); - } - catch (...) {} - - if ( autoTagging == "y" ) - { - string tagValue = systemName + "-dbroot" + itoa(*pt1); - createEC2tag( volumeName, "Name", tagValue ); - } - - //get device name based on dbroot ID - storageID_t st; - - try - { - st = getAWSdeviceName( *pt1 ); - } - catch (...) {} - - string deviceName = boost::get<0>(st); - string amazonDeviceName = boost::get<1>(st); - - //attach volumes to local instance - retry = 0; - - for ( ; retry < 5 ; retry++ ) - { - if (!attachEC2Volume(volumeName, deviceName, localInstance)) - { - detachEC2Volume(volumeName); - } - else - break; - } - - if ( retry >= 5 ) - { - cout << " *** ERROR: Volume " << volumeName << " failed to attach to local instance" << endl; - exceptionControl("addDbroot", API_FAILURE); - } - - //format attached volume - cout << " Formatting DBRoot #" << itoa(*pt1) << ", please wait..." << endl; - string cmd; - int user; - user = getuid(); - - if (user == 0 ) - cmd = "mkfs.ext2 -F " + amazonDeviceName + " > " + tmpdir + "/format.log 2>&1"; - else - cmd = "sudo mkfs.ext2 -F " + amazonDeviceName + " > " + tmpdir + "/format.log 2>&1"; - - writeLog("addDbroot format cmd: " + cmd, LOG_TYPE_DEBUG ); - - system(cmd.c_str()); - - //detach - detachEC2Volume(volumeName); - - string volumeNameID = "PMVolumeName" + itoa(*pt1); - string deviceNameID = "PMVolumeDeviceName" + itoa(*pt1); - string amazonDeviceNameID = "PMVolumeAmazonDeviceName" + itoa(*pt1); - - //write volume and device name - try - { - sysConfig->setConfig(Section, volumeNameID, volumeName); - sysConfig->setConfig(Section, deviceNameID, deviceName); - sysConfig->setConfig(Section, amazonDeviceNameID, amazonDeviceName); - } - catch (...) - {} - - // fstabs - string entry = updateFstab( amazonDeviceName, itoa(*pt1)); - - //send update pms - if (entry != "" ) - distributeFstabUpdates(entry); - } - } - - //update Columnstore.xml entries - DBRootConfigList::iterator pt2 = dbrootlist.begin(); - - for ( ; pt2 != dbrootlist.end() ; pt2++) - { - string DBrootID = "DBRoot" + itoa(*pt2); - string pathID = "/var/lib/columnstore/data" + itoa(*pt2); - - try - { - sysConfig->setConfig(Section, DBrootID, pathID); - } - catch (...) - { - cout << "ERROR: Problem setting DBRoot in the MariaDB Columnstore System Configuration file" << endl; - exceptionControl("setConfig", API_FAILURE); - } - } - - try - { - sysConfig->write(); - } - catch (...) - { - exceptionControl("sysConfig->write", API_FAILURE); - } - - if (!checkSystemRunning()) - return; - - //get updated Columnstore.xml distributed - distributeConfigFile("system"); - - // - //send message to Process Monitor to add new dbroot to shared memory - // - pt2 = dbrootlist.begin(); - - for ( ; pt2 != dbrootlist.end() ; pt2++) - { - try - { - ByteStream obs; - - obs << (ByteStream::byte) ADD_DBROOT; - obs << itoa(*pt2); - - sendStatusUpdate(obs, ADD_DBROOT); - } - catch (...) - { - exceptionControl("setSystemConfig", API_INVALID_PARAMETER); - } - } - - return; -} - -/*************************************************************************** - * - * Function: distributeFstabUpdates - * - * Purpose: distribute Fstab Updates - * - ****************************************************************************/ - -void Oam::distributeFstabUpdates(std::string entry, std::string toPM) -{ - if (!checkSystemRunning()) - return; - - ACK_FLAG ackflag = oam::ACK_YES; - // build and send msg - int returnStatus = sendMsgToProcMgr(FSTABUPDATE, toPM, FORCEFUL, ackflag, entry); - - if (returnStatus != API_SUCCESS) - exceptionControl("distributeFstabUpdates", returnStatus); -} - -/*************************************************************************** - * - * Function: assignDbroot - * - * Purpose: assign DBRoot - * - ****************************************************************************/ - -void Oam::assignDbroot(std::string toPM, DBRootConfigList& dbrootlist) -{ - //make sure this new DBroot IDs aren't being used already - try - { - systemStorageInfo_t t; - t = getStorageConfig(); - - DeviceDBRootList moduledbrootlist = boost::get<2>(t); - - DBRootConfigList::iterator pt3 = dbrootlist.begin(); - - for ( ; pt3 != dbrootlist.end() ; pt3++) - { - DeviceDBRootList::iterator pt = moduledbrootlist.begin(); - - for ( ; pt != moduledbrootlist.end() ; pt++) - { - string moduleID = itoa((*pt).DeviceID); - DBRootConfigList::iterator pt1 = (*pt).dbrootConfigList.begin(); - - for ( ; pt1 != (*pt).dbrootConfigList.end() ; pt1++) - { - if ( *pt3 == *pt1) - { - cout << endl << "**** assignPmDbrootConfig Failed : DBRoot ID " + itoa(*pt3) + " already assigned to 'pm" + moduleID << "'" << endl; - exceptionControl("assignPmDbrootConfig", API_INVALID_PARAMETER); - } - } - } - } - } - catch (exception& e) - { - cout << endl << "**** getStorageConfig Failed : " << e.what() << endl; - } - - //make sure it's exist and unassigned - DBRootConfigList undbrootlist; - - try - { - getUnassignedDbroot(undbrootlist); - } - catch (...) {} - - if ( undbrootlist.empty() ) - { - cout << endl << "**** assignPmDbrootConfig Failed : no available dbroots are unassigned" << endl; - exceptionControl("assignPmDbrootConfig", API_INVALID_PARAMETER); - } - - DBRootConfigList::iterator pt1 = dbrootlist.begin(); - - for ( ; pt1 != dbrootlist.end() ; pt1++) - { - bool found = false; - DBRootConfigList::iterator pt2 = undbrootlist.begin(); - - for ( ; pt2 != undbrootlist.end() ; pt2++) - { - if ( *pt1 == * pt2 ) - { - found = true; - break; - } - } - - if (!found) - { - cout << endl << "**** assignPmDbrootConfig Failed : dbroot " << *pt1 << " doesn't exist" << endl; - exceptionControl("assignPmDbrootConfig", API_INVALID_PARAMETER); - } - } - - string toPMID = toPM.substr(MAX_MODULE_TYPE_SIZE, MAX_MODULE_ID_SIZE);; - - //get dbroots ids for to PM - DBRootConfigList todbrootConfigList; - - try - { - getPmDbrootConfig(atoi(toPMID.c_str()), todbrootConfigList); - - cout << "DBRoot IDs assigned to '" + toPM + "' = "; - - DBRootConfigList::iterator pt = todbrootConfigList.begin(); - - for ( ; pt != todbrootConfigList.end() ;) - { - cout << itoa(*pt); - pt++; - - if (pt != todbrootConfigList.end()) - cout << ", "; - } - - cout << endl; - } - catch (exception& e) - { - cout << endl << "**** getPmDbrootConfig Failed for '" << toPM << "' : " << e.what() << endl; - exceptionControl("assignPmDbrootConfig", API_FAILURE); - } - - cout << endl << "Changes being applied" << endl << endl; - - //added entered dbroot IDs to to-PM list and do Gluster assign if needed - string DataRedundancyConfig = "n"; - - try - { - getSystemConfig( "DataRedundancyConfig", DataRedundancyConfig); - } - catch (...) - { - DataRedundancyConfig = "n"; - } - - DBRootConfigList::iterator pt3 = dbrootlist.begin(); - - for ( ; pt3 != dbrootlist.end() ; pt3++) - { - todbrootConfigList.push_back(*pt3); - } - - try - { - setPmDbrootConfig(atoi(toPMID.c_str()), todbrootConfigList); - } - catch (exception& e) - { - cout << endl << "**** setPmDbrootConfig Failed for '" << toPM << "' : " << e.what() << endl; - exceptionControl("assignPmDbrootConfig", API_FAILURE); - } - - //get dbroots ids for to-PM - try - { - todbrootConfigList.clear(); - getPmDbrootConfig(atoi(toPMID.c_str()), todbrootConfigList); - - cout << "DBRoot IDs assigned to '" + toPM + "' = "; - - DBRootConfigList::iterator pt = todbrootConfigList.begin(); - - for ( ; pt != todbrootConfigList.end() ;) - { - cout << itoa(*pt); - pt++; - - if (pt != todbrootConfigList.end()) - cout << ", "; - } - - cout << endl; - } - catch (exception& e) - { - cout << endl << "**** getPmDbrootConfig Failed for '" << toPM << "' : " << e.what() << endl; - exceptionControl("assignPmDbrootConfig", API_FAILURE); - } - - //get old System DBRoot Count - int oldSystemDbRootCount = 0; - - try - { - getSystemConfig("DBRootCount", oldSystemDbRootCount); - - if (oldSystemDbRootCount < 1) - throw runtime_error("SystemDbRootCount not > 0"); - } - catch (exception& e) - { - cout << endl << "**** getSystemConfig for DBRootCount failed; " << - e.what() << endl; - exceptionControl("assignPmDbrootConfig", API_FAILURE); - } - - //set new System DBRoot Count - try - { - setSystemDBrootCount(); - } - catch (exception& ) - { - cout << endl << "**** setSystemDBrootCount Failed" << endl; - exceptionControl("assignPmDbrootConfig", API_FAILURE); - } - - //set FilesPerColumnPartition - try - { - setFilesPerColumnPartition( oldSystemDbRootCount ); - } - catch (exception& ) - { - cout << endl << "**** setFilesPerColumnPartition Failed" << endl; - exceptionControl("assignPmDbrootConfig", API_FAILURE); - } - - //get updated Columnstore.xml distributed - distributeConfigFile("system"); - - return; -} - -/*************************************************************************** - * - * Function: unassignDbroot - * - * Purpose: unassign DBRoot - * - ****************************************************************************/ - -void Oam::unassignDbroot(std::string residePM, DBRootConfigList& dbrootlist) -{ - string residePMID = residePM.substr(MAX_MODULE_TYPE_SIZE, MAX_MODULE_ID_SIZE);; - - //get dbroots ids for reside PM - DBRootConfigList residedbrootConfigList; - - try - { - getPmDbrootConfig(atoi(residePMID.c_str()), residedbrootConfigList); - - cout << endl << "DBRoot IDs assigned to '" + residePM + "' = "; - - DBRootConfigList::iterator pt = residedbrootConfigList.begin(); - - for ( ; pt != residedbrootConfigList.end() ;) - { - cout << itoa(*pt); - pt++; - - if (pt != residedbrootConfigList.end()) - cout << ", "; - } - - cout << endl; - } - catch (exception& e) - { - cout << endl << "**** getPmDbrootConfig Failed for '" << residePM << "' : " << e.what() << endl; - exceptionControl("unassignPmDbrootConfig", API_FAILURE); - } - - cout << "Changes being applied..." << endl; - - //remove entered dbroot IDs from reside PM list - DBRootConfigList::iterator pt1 = dbrootlist.begin(); - - for ( ; pt1 != dbrootlist.end() ; pt1++) - { - DBRootConfigList::iterator pt2 = residedbrootConfigList.begin(); - - for ( ; pt2 != residedbrootConfigList.end() ; pt2++) - { - if ( *pt2 == *pt1 ) - { - - dbrootList dbroot1; - dbroot1.push_back(itoa(*pt1)); - - //send msg to unmount dbroot if module is not offline - int opState; - bool degraded; - - try - { - getModuleStatus(residePM, opState, degraded); - } - catch (...) - {} - - if (opState != oam::AUTO_OFFLINE || opState != oam::AUTO_DISABLED) - { - try - { - mountDBRoot(dbroot1, false); - } - catch (exception& ) - { - writeLog("ERROR: dbroot failed to unmount", LOG_TYPE_ERROR ); - cout << endl << "ERROR: umountDBRoot api failure" << endl; - exceptionControl("unassignPmDbrootConfig", API_FAILURE); - } - } - - //get volume name and detach it - - string volumeNameID = "PMVolumeName" + itoa(*pt1); - string volumeName = oam::UnassignedName; - - try - { - getSystemConfig( volumeNameID, volumeName); - } - catch (...) - {} - - if ( volumeName != oam::UnassignedName ) - detachEC2Volume(volumeName); - - residedbrootConfigList.erase(pt2); - cout << "DBRoot IDs unassigned from '" + residePM + "' = " + itoa(*pt1) << endl; - - break; - } - } - } - - try - { - setPmDbrootConfig(atoi(residePMID.c_str()), residedbrootConfigList); - } - catch (exception& e) - { - cout << endl << "**** setPmDbrootConfig Failed for '" << residePM << "' : " << e.what() << endl; - exceptionControl("unassignPmDbrootConfig", API_FAILURE); - } - - //get dbroots ids for reside-PM - try - { - residedbrootConfigList.clear(); - getPmDbrootConfig(atoi(residePMID.c_str()), residedbrootConfigList); - - cout << "DBRoot IDs assigned to '" + residePM + "' = "; - - DBRootConfigList::iterator pt = residedbrootConfigList.begin(); - - for ( ; pt != residedbrootConfigList.end() ;) - { - cout << itoa(*pt); - pt++; - - if (pt != residedbrootConfigList.end()) - cout << ", "; - } - - cout << endl << endl; - } - catch (exception& e) - { - cout << endl << "**** getPmDbrootConfig Failed for '" << residePM << "' : " << e.what() << endl; - exceptionControl("unassignPmDbrootConfig", API_FAILURE); - } - - //get old System DBRoot Count - int oldSystemDbRootCount = 0; - - try - { - getSystemConfig("DBRootCount", oldSystemDbRootCount); - - if (oldSystemDbRootCount < 1) - throw runtime_error("SystemDbRootCount not > 0"); - } - catch (exception& e) - { - cout << endl << "**** getSystemConfig for DBRootCount failed; " << - e.what() << endl; - exceptionControl("unassignPmDbrootConfig", API_FAILURE); - } - - //set new System DBRoot Count - try - { - setSystemDBrootCount(); - } - catch (exception& ) - { - cout << endl << "**** setSystemDBrootCount Failed" << endl; - exceptionControl("unassignPmDbrootConfig", API_FAILURE); - } - - //set FilesPerColumnPartition - try - { - setFilesPerColumnPartition( oldSystemDbRootCount ); - } - catch (exception& ) - { - cout << endl << "**** setFilesPerColumnPartition Failed" << endl; - exceptionControl("unassignPmDbrootConfig", API_FAILURE); - } - - return; -} - -/*************************************************************************** - * - * Function: getUnassignedDbroot - * - * Purpose: get unassigned DBRoot list - * - ****************************************************************************/ - -void Oam::getUnassignedDbroot(DBRootConfigList& dbrootlist) -{ - - //get assigned dbroots IDs - DBRootConfigList dbrootConfigList; - - try - { - getSystemDbrootConfig(dbrootConfigList); - - } - catch (...) {} - - // get string variables - Config* sysConfig = Config::makeConfig(CalpontConfigFile.c_str()); - string Section = "SystemConfig"; - - for ( int dbrootID = 1 ; dbrootID < MAX_DBROOT ; dbrootID++) - { - string dbrootPath; - - try - { - dbrootPath = sysConfig->getConfig(Section, "DBRoot" + itoa(dbrootID)); - } - catch (...) {} - - if (dbrootPath.empty() || dbrootPath == oam::UnassignedName) - continue; - - bool found = false; - DBRootConfigList::iterator pt = dbrootConfigList.begin(); - - for ( ; pt != dbrootConfigList.end() ; pt++) - { - if ( dbrootID == *pt ) - { - found = true; - break; - } - } - - if (!found) - dbrootlist.push_back(dbrootID); - } - - return; -} - -/*************************************************************************** - * - * Function: removeDbroot - * - * Purpose: remove DBRoot - * - ****************************************************************************/ - -void Oam::removeDbroot(DBRootConfigList& dbrootlist) -{ - int SystemDBRootCount = 0; - string cloud; - string DBRootStorageType; - string DataRedundancyConfig = "n"; - - try - { - getSystemConfig("DBRootCount", SystemDBRootCount); - getSystemConfig("Cloud", cloud); - getSystemConfig("DBRootStorageType", DBRootStorageType); - getSystemConfig("DataRedundancyConfig", DataRedundancyConfig); - } - catch (...) {} - - int dbrootNumber = dbrootlist.size(); - - if ( dbrootNumber < 1 ) - { - cout << "ERROR: Failed remove, total Number of DBRoots to remove is less than 1 " << endl; - exceptionControl("removeDbroot", API_INVALID_PARAMETER); - } - - Config* sysConfig = Config::makeConfig(CalpontConfigFile.c_str()); - string Section = "SystemConfig"; - - //check if dbroot requested to be removed is empty and dboot #1 is requested to be removed - DBRootConfigList::iterator pt = dbrootlist.begin(); - - for ( ; pt != dbrootlist.end() ; pt++) - { - int dbrootID = *pt; - - //see if dbroot exist - string DBrootpath = "DBRoot" + itoa(dbrootID); - string dbrootdir; - - try - { - dbrootdir = sysConfig->getConfig(Section, DBrootpath); - } - catch (...) - {} - - if ( dbrootdir.empty() || dbrootdir == oam::UnassignedName ) - { - cout << "ERROR: DBRoot doesn't exist: " << itoa(dbrootID) << endl; - exceptionControl("removeDbroot", API_FAILURE); - } - - if ( dbrootID == 1 ) - { - cout << "ERROR: Failed remove, can't remove dbroot #1" << endl; - exceptionControl("removeDbroot", API_INVALID_PARAMETER); - } - - //check if dbroot is empty - bool isEmpty = false; - string errMsg; - - try - { - BRM::DBRM dbrm; - - if ( dbrm.isDBRootEmpty(dbrootID, isEmpty, errMsg) != 0) - { - cout << "ERROR: isDBRootEmpty API error, dbroot #" << itoa(dbrootID) << " :" << errMsg << endl; - exceptionControl("removeDbroot", API_FAILURE); - } - } - catch (exception& ) - {} - - if (!isEmpty) - { - cout << "ERROR: Failed remove, dbroot #" << itoa(dbrootID) << " is not empty" << endl; - exceptionControl("removeDbroot", API_FAILURE); - } - - //check if dbroot is assigned to a pm and if so, unassign it - int pmid = 0; - - try - { - getDbrootPmConfig(dbrootID, pmid); - } - catch (exception& ) - {} - - if ( pmid > 0 ) - { - //unassign dbroot from pm - DBRootConfigList pmdbrootlist; - pmdbrootlist.push_back(dbrootID); - - try - { - unassignDbroot("pm" + itoa(pmid), pmdbrootlist); - } - catch (exception& ) - { - cout << endl << "**** unassignDbroot Failed" << endl; - exceptionControl("removeDbroot", API_FAILURE); - } - } - - // if gluster, request volume delete - if ( DataRedundancyConfig == "y") - { - try - { - string errmsg1; - string errmsg2; - int ret = glusterctl(oam::GLUSTER_DELETE, itoa(dbrootID), errmsg1, errmsg2); - - if ( ret != 0 ) - { - cerr << "FAILURE: Error deleting gluster dbroot# " + itoa(dbrootID) + ", error: " + errmsg1 << endl; - exceptionControl("removeDbroot", API_FAILURE); - } - } - catch (exception& e) - { - cout << endl << "**** glusterctl API exception: " << e.what() << endl; - cerr << "FAILURE: Error deleting gluster dbroot# " + itoa(dbrootID) << endl; - writeLog("FAILURE: Error deleting gluster dbroot# " + itoa(dbrootID), LOG_TYPE_ERROR ); - exceptionControl("removeDbroot", API_FAILURE); - } - catch (...) - { - cout << endl << "**** glusterctl API exception: UNKNOWN" << endl; - cerr << "FAILURE: Error deleting gluster dbroot# " + itoa(dbrootID) << endl; - writeLog("FAILURE: Error deleting gluster dbroot# " + itoa(dbrootID), LOG_TYPE_ERROR ); - exceptionControl("removeDbroot", API_FAILURE); - } - } - - try - { - sysConfig->delConfig(Section, DBrootpath); - } - catch (...) - { - cout << "ERROR: Problem deleting DBRoot in the MariaDB Columnstore System Configuration file" << endl; - exceptionControl("deleteConfig", API_FAILURE); - } - } - - try - { - sysConfig->write(); - } - catch (...) - { - exceptionControl("sysConfig->write", API_FAILURE); - } - - //get updated Columnstore.xml distributed - distributeConfigFile("system"); - - // - //send message to Process Monitor to remove dbroot to shared memory - // - pt = dbrootlist.begin(); - - for ( ; pt != dbrootlist.end() ; pt++) - { - try - { - ByteStream obs; - - obs << (ByteStream::byte) REMOVE_DBROOT; - obs << itoa(*pt); - - sendStatusUpdate(obs, REMOVE_DBROOT); - } - catch (...) - { - exceptionControl("setSystemConfig", API_INVALID_PARAMETER); - } - } - - return; -} - -//current amazon max dbroot id support = 190; -string PMdeviceName = "/dev/sd"; -string deviceLetter[] = {"g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "end"}; - -/*************************************************************************** - * - * Function: getAWSdeviceName - * - * Purpose: get AWS Device Name for DBRoot ID - * - ****************************************************************************/ - -storageID_t Oam::getAWSdeviceName( const int dbrootid) -{ - string amazondeviceName = "/dev/xvd"; - - try - { - getSystemConfig( "AmazonDeviceName", amazondeviceName ); - } - catch (...) - {} - - if ( amazondeviceName.empty() || amazondeviceName == "" ) - amazondeviceName = "/dev/xvd"; - - //calulate id numbers from DBRoot ID -// int lid = (dbrootid-1) / 10; -// int did = dbrootid - (dbrootid * lid); - -// return boost::make_tuple(PMdeviceName + deviceLetter[lid] + itoa(did), amazondeviceName + deviceLetter[lid] + itoa(did)); - return boost::make_tuple(PMdeviceName + deviceLetter[dbrootid - 1], amazondeviceName + deviceLetter[dbrootid - 1]); -} - -/*************************************************************************** - * - * Function: setSystemDBrootCount - * - * Purpose: set System DBRoot Count - * - ****************************************************************************/ - -void Oam::setSystemDBrootCount() -{ - sleep(1); //let other updates get applied to the config file - - //set the system dbroot number - try - { - DBRootConfigList dbrootConfigList; - getSystemDbrootConfig(dbrootConfigList); - - try - { - setSystemConfig("DBRootCount", dbrootConfigList.size()); - } - catch (...) - { - writeLog("ERROR: setSystemConfig DBRootCount " + dbrootConfigList.size(), LOG_TYPE_ERROR ); - cout << endl << "ERROR: setSystemConfig DBRootCount " + dbrootConfigList.size() << endl; - exceptionControl("setSystemConfig", API_FAILURE); - } - } - catch (...) - { - writeLog("ERROR: getSystemDbrootConfig ", LOG_TYPE_ERROR ); - cout << endl << "ERROR: getSystemDbrootConfig " << endl; - exceptionControl("getSystemDbrootConfig", API_INVALID_PARAMETER); - } - - return; -} - -/*************************************************************************** - * - * Function: setFilesPerColumnPartition - * - * Purpose: set FilesPerColumnPartition - * This function takes the old DBRootCount as an input arg - * and expects that the new DBRootCount has been set in the - * Columnstore.xml file. Function assumes oldSystemDBRootCount - * has already been validated to be > 0 (else we could get a - * divide by 0 error). - * - ****************************************************************************/ - -void Oam::setFilesPerColumnPartition( int oldSystemDBRootCount ) -{ - int newSystemDBRootCount = 0; - int oldFilesPerColumnPartition = 4; - - try - { - getSystemConfig("DBRootCount", newSystemDBRootCount); - } - catch (...) - { - writeLog("ERROR: getSystemConfig DBRootCount ", LOG_TYPE_ERROR ); - cout << endl << "ERROR: getSystemConfig DBRootCount" << endl; - exceptionControl("setFilesPerColumnPartition", API_INVALID_PARAMETER); - } - - try - { - getSystemConfig("FilesPerColumnPartition", oldFilesPerColumnPartition); - } - catch (...) - { - writeLog("ERROR: getSystemConfig FilesPerColumnPartition ", LOG_TYPE_ERROR ); - cout << endl << "ERROR: getSystemConfig FilesPerColumnPartition" << endl; - exceptionControl("setFilesPerColumnPartition", API_INVALID_PARAMETER); - } - - if ( oldFilesPerColumnPartition != oldSystemDBRootCount * - (oldFilesPerColumnPartition / oldSystemDBRootCount) ) - { - writeLog("ERROR: old FilesPerColumnPartition not a multiple of DBRootCount", LOG_TYPE_ERROR ); - cout << endl << "ERROR: old FilesPerColumnPartition not a multiple of DBRootCount " << endl; - exceptionControl("setFilesPerColumnPartition", API_INVALID_PARAMETER); - } - - int newFilesPerColumnPartition = (oldFilesPerColumnPartition / oldSystemDBRootCount) * newSystemDBRootCount; - - try - { - setSystemConfig("FilesPerColumnPartition", newFilesPerColumnPartition); - } - catch (...) - { - writeLog("ERROR: setSystemConfig FilesPerColumnPartition " + newFilesPerColumnPartition, LOG_TYPE_ERROR ); - cout << endl << "ERROR: setSystemConfig FilesPerColumnPartition " + newFilesPerColumnPartition << endl; - exceptionControl("setFilesPerColumnPartition", API_FAILURE); - } -} - -#pragma pack(push,1) -struct NotifyMsgStruct -{ - uint32_t magic; - uint32_t msgno; - char node[8]; - uint32_t paylen; -}; -#pragma pack(pop) - -/*************************************************************************** - * - * Function: sendDeviceNotification - * - * Purpose: send Device Notification Msg - * - ****************************************************************************/ - -int Oam::sendDeviceNotification(std::string deviceName, NOTIFICATION_TYPE type, std::string payload) -{ - //first check if there are any CMP entries configured - try - { - Config* sysConfig = Config::makeConfig(CalpontConfigFile.c_str()); - string CMPsection = "CMP"; - - for ( int id = 1 ;; id++) - { - string CMP = CMPsection + itoa(id); - - try - { - string ipaddr = sysConfig->getConfig(CMP, "IPAddr"); - - if (ipaddr.empty()) - return API_SUCCESS; - - string port = sysConfig->getConfig(CMP, "Port"); - - NotifyMsgStruct msg; - memset(&msg, 0, sizeof(msg)); - msg.magic = NOTIFICATIONKEY; - msg.msgno = type; - strncpy(msg.node, deviceName.c_str(), 7); - - if (!payload.empty()) - msg.paylen = payload.length() + 1; - - // send notification msg to this exchange - try - { - int ds = -1; - ds = ::socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); - struct sockaddr_in serv_addr; - struct in_addr la; - ::inet_aton(ipaddr.c_str(), &la); - memset(&serv_addr, 0, sizeof(serv_addr)); - serv_addr.sin_family = AF_INET; - serv_addr.sin_addr.s_addr = la.s_addr; - serv_addr.sin_port = htons(atoi(port.c_str())); - int rc = -1; - rc = ::connect(ds, (struct sockaddr*)&serv_addr, sizeof(serv_addr)); - - if (rc < 0) throw runtime_error("socket connect error"); - - rc = ::write(ds, &msg, sizeof(msg)); - - if (rc < 0) throw runtime_error("socket write error"); - - if (msg.paylen > 0) - { - rc = ::write(ds, payload.c_str(), msg.paylen); - - if (rc < 0) throw runtime_error("socket write error"); - } - - ::shutdown(ds, SHUT_RDWR); - ::close(ds); - } - catch (std::runtime_error&) - { - //There's other reasons, but this is the most likely... - return API_CONN_REFUSED; - } - catch (std::exception&) - { - return API_FAILURE; - } - catch (...) - { - return API_FAILURE; - } - } - catch (...) - { - return API_SUCCESS; - } - } - } - catch (...) {} - - return API_SUCCESS; -} - -/*************************************************************************** - * - * Function: actionMysqlCalpont - * - * Purpose: systemctl mariadb.service command - * - ****************************************************************************/ - -void Oam::actionMysqlCalpont(MYSQLCALPONT_ACTION action) -{ - // check system type to see if mysqld should be accessed on this module - int serverTypeInstall = oam::INSTALL_NORMAL; - string moduleType; - string moduleName; - oamModuleInfo_t st; - - try - { - st = getModuleInfo(); - moduleType = boost::get<1>(st); - serverTypeInstall = boost::get<5>(st); - moduleName = boost::get<0>(st); - } - catch (...) {} - - //PMwithUM config - string PMwithUM = "n"; - - try - { - getSystemConfig( "PMwithUM", PMwithUM); - } - catch (...) - { - PMwithUM = "n"; - } - - if ( ( serverTypeInstall == oam::INSTALL_NORMAL && moduleType == "um" ) || - ( serverTypeInstall == oam::INSTALL_NORMAL && moduleType == "pm" && PMwithUM == "y") || - ( serverTypeInstall == oam::INSTALL_COMBINE_DM_UM_PM ) ) - PMwithUM = PMwithUM; - else - return; - - string mysqlscript = "systemctl"; - - string command; - - int no_systemd = -1; - - // This is here because calling system() is problematic with ProcMon - // which has its own signalHandler for SIGCHLD. Therefore since this - // is only needed when doing non MYSQL_STATUS commands, only check - // for systemctl and mariadb.service when doing other commands. - if (action != MYSQL_STATUS) - { - pid_t cPid; - int status; - - cPid = fork(); - - if (cPid == 0) - { - execlp("systemctl","systemctl","cat","mariadb.service",NULL); - } - else if (cPid > 0) - { - waitpid(cPid, &status, 0); - if (!WIFEXITED(status)) - { - writeLog("Oam::actionMysqlCalpont: systemctl Failed", LOG_TYPE_ERROR); - exceptionControl("actionMysqlCalpont", API_FAILURE); - } - no_systemd = WEXITSTATUS(status); - } - else - { - writeLog("Oam::actionMysqlCalpont: Fork Failed", LOG_TYPE_ERROR); - exceptionControl("actionMysqlCalpont", API_FAILURE); - } - } - - switch (action) - { - case MYSQL_START: - { - if (no_systemd) - { - system("/usr/bin/mysqld_safe &"); - } - command = "start"; - break; - } - - case MYSQL_STOP: - { - if (no_systemd) - { - system("pkill mysqld"); - } - command = "stop"; - - //set process status - try - { - setProcessStatus("mysqld", moduleName, MAN_OFFLINE, 0); - } - catch (...) - {} - - break; - } - - case MYSQL_RESTART: - { - if (no_systemd) - { - system("pkill mysqld"); - system("/usr/bin/mysqld_safe &"); - } - command = "restart"; - break; - } - - case MYSQL_RELOAD: - { - if (no_systemd) - { - system("pkill -HUP mysqld"); - } - command = "reload"; - break; - } - - case MYSQL_FORCE_RELOAD: - { - if (no_systemd) - { - system("pkill -HUP mysqld"); - } - command = "force-reload"; - break; - } - - case MYSQL_STATUS: - { - command = "status"; - break; - } - - default: - { - writeLog("***DEFAULT OPTION", LOG_TYPE_ERROR); - exceptionControl("actionMysqlCalpont", API_INVALID_PARAMETER); - } - } - - //RUN COMMAND - if (!no_systemd) - { - string cmd = mysqlscript + " " + command + " mariadb.service > " + tmpdir + "/" + command + ".log 2>&1"; - system(cmd.c_str()); - } - - if (action == MYSQL_START || action == MYSQL_RESTART) - { - pid_t pid = 0; - // Loop check because we mysqld may not start immediately - for (int i=0; i < 10; i++) - { - //get pid - char buf[512]; - FILE *cmd_pipe = popen("pidof -s mysqld", "r"); - - fgets(buf, 512, cmd_pipe); - pid = strtoul(buf, NULL, 10); - - pclose( cmd_pipe ); - - if (pid) - { - break; - } - else - { - sleep(2); - } - } - - if (!pid) - { - // mysql not started - writeLog("***mysqld NOT RUNNING", LOG_TYPE_ERROR); - exceptionControl("actionMysqlCalpont", API_FAILURE); - } - - //set process status - try - { - setProcessStatus("mysqld", moduleName, ACTIVE, pid); - } - catch (...) - {} - } - - if (action == MYSQL_STATUS ) - { - ProcessStatus procstat; - getProcessStatus("mysqld", moduleName, procstat); - int state = procstat.ProcessOpState; - pid_t pidStatus = procstat.ProcessID; - pid_t pid = 0; - if ( state != ACTIVE ) - { - for (int i=0; i < 10; i++) - { - //get pid - char buf[512]; - FILE *cmd_pipe = popen("pidof -s mysqld", "r"); - - fgets(buf, 512, cmd_pipe); - pid = strtoul(buf, NULL, 10); - - pclose( cmd_pipe ); - - if (pid) - { - //set process status - try - { - setProcessStatus("mysqld", moduleName, ACTIVE, pid); - } - catch (...) - {} - return; - } - else - { - sleep(2); - } - } - } - else - { - for (int i=0; i < 10; i++) - { - //check if pid has changed - char buf[512]; - FILE *cmd_pipe = popen("pidof -s mysqld", "r"); - - fgets(buf, 512, cmd_pipe); - pid = strtoul(buf, NULL, 10); - - pclose( cmd_pipe ); - - if (pid) - { - if ( pidStatus != pid ) - { - //set process status - try - { - setProcessStatus("mysqld", moduleName, ACTIVE, pid); - } - catch (...) - {} - break; - } - } - else - { - sleep(2); - } - } - } - if (pid) - { - //check module status, if DEGRADED set to ACTIVE - int opState; - bool degraded; - - try - { - getModuleStatus(moduleName, opState, degraded); - } - catch (...) - {} - - if (opState == oam::DEGRADED) - { - try - { - setModuleStatus(moduleName, ACTIVE); - } - catch (...) - {} - } - } - else - { - if ( state == ACTIVE ) - { - //set process status - try - { - setProcessStatus("mysqld", moduleName, MAN_OFFLINE, 0); - } - catch (...) - {} - } - - //check module status, if ACTIVE set to DEGRADED - int opState; - bool degraded; - - try - { - getModuleStatus(moduleName, opState, degraded); - } - catch (...) - {} - - if (opState == oam::ACTIVE) - { - try - { - setModuleStatus(moduleName, DEGRADED); - } - catch (...) - {} - } - - return; - } - } - - return; -} - -/****************************************************************************************** - * @brief run DBHealth Check - * - * purpose: test the health of the DB - * - ******************************************************************************************/ -void Oam::checkDBFunctional(bool action) -{ - ByteStream msg; - ByteStream receivedMSG; - - // only make call if system is active - SystemStatus systemstatus; - - try - { - getSystemStatus(systemstatus); - } - catch (exception& ) - {} - - if (systemstatus.SystemOpState != oam::ACTIVE ) - exceptionControl("checkDBHealth", API_INVALID_STATE); - - SystemModuleTypeConfig systemmoduletypeconfig; - - try - { - Oam::getSystemConfig(systemmoduletypeconfig); - } - catch (...) - { - exceptionControl("checkDBHealth", API_FAILURE); - } - - // get Server Type Install ID - int serverTypeInstall = oam::INSTALL_NORMAL; - string OAMParentModuleName; - oamModuleInfo_t st; - - try - { - st = getModuleInfo(); - OAMParentModuleName = boost::get<3>(st); - serverTypeInstall = boost::get<5>(st); - } - catch (...) - { - exceptionControl("getMyProcessStatus", API_FAILURE); - } - - string module; - - switch ( serverTypeInstall ) - { - case (oam::INSTALL_NORMAL): - case (oam::INSTALL_COMBINE_DM_UM): - { - module = "um1"; - break; - } - - case (oam::INSTALL_COMBINE_PM_UM): - case (oam::INSTALL_COMBINE_DM_UM_PM): - { - module = OAMParentModuleName; - break; - } - } - - // setup message - msg << (ByteStream::byte) RUN_DBHEALTH_CHECK; - msg << (ByteStream::byte) action; - - try - { - //send the msg to Server Monitor - MessageQueueClient servermonitor(module + "_ServerMonitor"); - servermonitor.write(msg); - - // wait 30 seconds for ACK from Server Monitor - struct timespec ts = { 30, 0 }; - - receivedMSG = servermonitor.read(&ts); - - if (receivedMSG.length() > 0) - { - ByteStream::byte returnType; - receivedMSG >> returnType; - - if ( returnType == RUN_DBHEALTH_CHECK ) - { - ByteStream::byte returnStatus; - receivedMSG >> returnStatus; - - if ( returnStatus == oam::API_SUCCESS ) - { - // succesfull - servermonitor.shutdown(); - return; - } - } - - // shutdown connection - servermonitor.shutdown(); - - exceptionControl("checkDBHealth", API_FAILURE); - } - else - { - // timeout - // shutdown connection - servermonitor.shutdown(); - - exceptionControl("checkDBHealth", API_TIMEOUT); - } - } - catch (...) - { - exceptionControl("checkDBHealth", API_FAILURE); - return; - } - - return; -} - /*************************************************************************** * * Function: validateModule @@ -8265,972 +837,6 @@ int Oam::validateModule(const std::string name) return API_INVALID_PARAMETER; } -/*************************************************************************** - * - * Function: getEC2InstanceIpAddress - * - * Purpose: Check Amazon EC2 is running and returns Private IP address - * - ****************************************************************************/ - -std::string Oam::getEC2InstanceIpAddress(std::string instanceName) -{ - // run script to get Instance status and IP Address - string tmplog = tmpdir + "/getCloudIP_" + instanceName; - string cmd = "MCSInstanceCmds.sh getPrivateIP " + instanceName + " > " + tmplog; - system(cmd.c_str()); - - if (checkLogStatus(tmplog, "stopped") ) - return "stopped"; - - if (checkLogStatus(tmplog, "terminated") ) - return "terminated"; - - // get IP Address - string IPAddr; - ifstream oldFile (tmplog.c_str()); - char line[400]; - - while (oldFile.getline(line, 400)) - { - IPAddr = line; - } - - oldFile.close(); - - if (isValidIP(IPAddr)) - return IPAddr; - - return "terminated"; -} - -/*************************************************************************** - * - * Function: getEC2LocalInstance - * - * Purpose: Get Amazon EC2 local Instance Name - * - ****************************************************************************/ - -std::string Oam::getEC2LocalInstance(std::string name) -{ - // run script to get Instance status and IP Address - string file = tmpdir + "/getInstanceInfo_" + name; - string cmd = "MCSInstanceCmds.sh getInstance > " + file; - int status = system(cmd.c_str()); - if (WEXITSTATUS(status) == 1 ) - return "failed"; - - // get Instance Name - string instanceName; - ifstream oldFile (file.c_str()); - char line[400]; - - while (oldFile.getline(line, 400)) - { - instanceName = line; - } - - oldFile.close(); - - return instanceName; - -} - -/*************************************************************************** - * - * Function: getEC2LocalInstanceType - * - * Purpose: Get Amazon EC2 local Instance Type - * - ****************************************************************************/ - -std::string Oam::getEC2LocalInstanceType(std::string name) -{ - // run script to get Instance status and IP Address - string file = tmpdir + "/getInstanceType_" + name; - string cmd = "MCSInstanceCmds.sh getType > " + file; - int status = system(cmd.c_str()); - if (WEXITSTATUS(status) == 1 ) - return "failed"; - - // get Instance Name - string instanceType; - ifstream oldFile (file.c_str()); - char line[400]; - - while (oldFile.getline(line, 400)) - { - instanceType = line; - } - - oldFile.close(); - - return instanceType; - -} - -/*************************************************************************** - * - * Function: getEC2LocalInstanceSubnet - * - * Purpose: Get Amazon EC2 local Instance Subnet - * - ****************************************************************************/ - -std::string Oam::getEC2LocalInstanceSubnet(std::string name) -{ - // run script to get Instance Subnet - string file = tmpdir + "/getInstanceSubnet_" + name; - string cmd = "MCSInstanceCmds.sh getSubnet > " + file; - int status = system(cmd.c_str()); - if (WEXITSTATUS(status) == 1 ) - return "failed"; - - // get Instance Name - string instanceSubnet; - ifstream oldFile (file.c_str()); - char line[400]; - - while (oldFile.getline(line, 400)) - { - instanceSubnet = line; - } - - oldFile.close(); - - return instanceSubnet; - -} - - -/*************************************************************************** - * - * Function: launchEC2Instance - * - * Purpose: Launch Amazon EC2 Instance - * - ****************************************************************************/ - -std::string Oam::launchEC2Instance( const std::string name, const std::string IPAddress, const std::string type, const std::string group) -{ - // run script to get Instance status and IP Address - string file = tmpdir + "/getInstance_" + name; - string cmd = "MCSInstanceCmds.sh launchInstance " + IPAddress + " " + type + " " + group + " > " + file; - int status = system(cmd.c_str()); - if (WEXITSTATUS(status) == 1 ) - return "failed"; - - if (checkLogStatus(file, "Required") ) - return "failed"; - - // get Instance ID - string instance; - ifstream oldFile (file.c_str()); - char line[400]; - - while (oldFile.getline(line, 400)) - { - instance = line; - } - - oldFile.close(); - - if (instance.empty()) - return "failed"; - - if (instance == "unknown") - return "failed"; - - if (instance.find("i-") == string::npos) - return "failed"; - - return instance; -} - -/*************************************************************************** - * - * Function: terminateEC2Instance - * - * Purpose: Terminate Amazon EC2 Instance - * - ****************************************************************************/ - -void Oam::terminateEC2Instance(std::string instanceName) -{ - // run script to get Instance status and IP Address - string cmd = "MCSInstanceCmds.sh terminateInstance " + instanceName + " > " + tmpdir + "/terminateEC2Instance_" + instanceName; - system(cmd.c_str()); - - return; -} - -/*************************************************************************** - * - * Function: stopEC2Instance - * - * Purpose: Terminate Amazon EC2 Instance - * - ****************************************************************************/ - -void Oam::stopEC2Instance(std::string instanceName) -{ - // run script to get Instance status and IP Address - string cmd = "MCSInstanceCmds.sh stopInstance " + instanceName + " > " + tmpdir + "/stopEC2Instance_" + instanceName; - system(cmd.c_str()); - - return; -} - -/*************************************************************************** - * - * Function: startEC2Instance - * - * Purpose: Start Amazon EC2 Instance - * - ****************************************************************************/ - -bool Oam::startEC2Instance(std::string instanceName) -{ - // run script to get Instance status and IP Address - string cmd = "MCSInstanceCmds.sh startInstance " + instanceName + " > " + tmpdir + "/startEC2Instance_" + instanceName; - int ret = system(cmd.c_str()); - if (WEXITSTATUS(ret) == 1 ) - return false; - - return true; -} - -/*************************************************************************** - * - * Function: assignElasticIP - * - * Purpose: assign Elastic IP Address on Amazon - * - ****************************************************************************/ - -bool Oam::assignElasticIP(std::string instanceName, std::string IpAddress) -{ - // run script to get Instance status and IP Address - string cmd = "MCSInstanceCmds.sh assignElasticIP " + instanceName + " " + IpAddress + " > " + tmpdir + "/assignElasticIP_" + instanceName; - int ret = system(cmd.c_str()); - if (WEXITSTATUS(ret) == 1 ) - exceptionControl("assignElasticIP", oam::API_FAILURE); - - return true; -} - -/*************************************************************************** - * - * Function: deassignElasticIP - * - * Purpose: deassign Elastic IP Address on Amazon - * - ****************************************************************************/ - -bool Oam::deassignElasticIP(std::string IpAddress) -{ - // run script to get Instance status and IP Address - string cmd = "MCSInstanceCmds.sh deassignElasticIP " + IpAddress + " > " + tmpdir + "/deassignElasticIP_" + IpAddress; - int ret = system(cmd.c_str()); - if (WEXITSTATUS(ret) == 1 ) - exceptionControl("deassignElasticIP", oam::API_FAILURE); - - return true; -} - -/*************************************************************************** - * - * Function: getEC2VolumeStatus - * - * Purpose: get Volume Status - * - ****************************************************************************/ - -std::string Oam::getEC2VolumeStatus(std::string volumeName) -{ - // run script to get Volume Status - string cmd = "MCSVolumeCmds.sh describe " + volumeName + " > " + tmpdir + "/getVolumeStatus_" + volumeName; - int ret = system(cmd.c_str()); - if (WEXITSTATUS(ret) == 1 ){ - return "failed"; - } - - // get status - string status; - string file = tmpdir + "/getVolumeStatus_" + volumeName; - ifstream oldFile (file.c_str()); - char line[400]; - - while (oldFile.getline(line, 400)) - { - status = line; - break; - } - - oldFile.close(); - - return status; -} - -/*************************************************************************** - * - * Function: createEC2Volume - * - * Purpose: create a EC2 Volume - * - ****************************************************************************/ - -std::string Oam::createEC2Volume(std::string size, std::string name) -{ - // run script to get Volume Status - string file = tmpdir + "/createVolumeStatus_" + name; - string cmd = "MCSVolumeCmds.sh create " + size + " " + name + " > " + file; - int ret = system(cmd.c_str()); - if (WEXITSTATUS(ret) == 1 ) - return "failed"; - - // get status - string volumeName; - ifstream oldFile (file.c_str()); - char line[400]; - - while (oldFile.getline(line, 400)) - { - volumeName = line; - } - - oldFile.close(); - - if ( volumeName == "unknown" ) - return "failed"; - - if (volumeName.find("vol-") == string::npos) - return "failed"; - - return volumeName; -} - -/*************************************************************************** - * - * Function: attachEC2Volume - * - * Purpose: attach EC2 Volume - * - ****************************************************************************/ - -bool Oam::attachEC2Volume(std::string volumeName, std::string deviceName, std::string instanceName) -{ - // add 1 retry if it fails by dettaching then attaching - int ret = 0; - string status; - - for ( int retry = 0 ; retry < 2 ; retry++ ) - { - // run script to attach Volume - string cmd = "MCSVolumeCmds.sh attach " + volumeName + " " + instanceName + " " + deviceName + " > " + tmpdir + "/attachVolumeStatus_" + volumeName; - ret = system(cmd.c_str()); - - if (WEXITSTATUS(ret) == 1 ) - { - //failing to attach, dettach and retry - writeLog("attachEC2Volume: Attach failed, call detach:" + volumeName + " " + instanceName + " " + deviceName, LOG_TYPE_ERROR ); - - detachEC2Volume(volumeName); - } - else - return true; - } - - if (ret == 0 ) - return true; - else - return false; -} - -/*************************************************************************** - * - * Function: detachEC2Volume - * - * Purpose: detach EC2 Volume - * - ****************************************************************************/ - -bool Oam::detachEC2Volume(std::string volumeName) -{ - // run script to attach Volume - string cmd = "MCSVolumeCmds.sh detach " + volumeName + " > " + tmpdir + "/detachVolumeStatus_" + volumeName; - int ret = system(cmd.c_str()); - if (WEXITSTATUS(ret) == 1 ) - return false; - - return true; -} - -/*************************************************************************** - * - * Function: deleteEC2Volume - * - * Purpose: detach EC2 Volume - * - ****************************************************************************/ - -bool Oam::deleteEC2Volume(std::string volumeName) -{ - // run script to delete Volume - string cmd = "MCSVolumeCmds.sh delete " + volumeName + " > " + tmpdir + "/deleteVolumeStatus_" + volumeName; - int ret = system(cmd.c_str()); - if (WEXITSTATUS(ret) == 1 ) - return false; - - return true; -} - -/*************************************************************************** - * - * Function: createEC2tag - * - * Purpose: create EC2 tag - * - ****************************************************************************/ - -bool Oam::createEC2tag(std::string resourceName, std::string tagName, std::string tagValue) -{ - // run script to create a tag - string cmd = "MCSVolumeCmds.sh createTag " + resourceName + " " + tagName + " " + tagValue + " > " + tmpdir + "createTagStatus_" + resourceName; - int ret = system(cmd.c_str()); - if (WEXITSTATUS(ret) == 1 ) - return false; - - return true; -} - -/****************************************************************************************** -* @brief syslogAction -* -* purpose: Take Action on Syslog Process -* -******************************************************************************************/ -void Oam::syslogAction( std::string action) -{ -#ifndef _MSC_VER - writeLog("syslogAction: " + action, LOG_TYPE_DEBUG ); - - string systemlog = "syslog"; - - string fileName; - getSystemConfig("SystemLogConfigFile", fileName); - - if (fileName == oam::UnassignedName ) - { - return; - } - - string::size_type pos = fileName.find("syslog-ng", 0); - - if (pos != string::npos) - systemlog = "syslog-ng"; - else - { - pos = fileName.find("rsyslog", 0); - - if (pos != string::npos) - systemlog = "rsyslog"; - } - - string cmd; - - if ( action == "sighup" ) - { - if ( systemlog == "syslog" || systemlog == "rsyslog") - systemlog = systemlog + "d"; - - cmd = "pkill -hup " + systemlog + " > /dev/null 2>&1"; - } - else - { - cmd = "systemctl " + action + " " + systemlog + ".service > /dev/null 2>&1"; - system(cmd.c_str()); - - cmd = "service " + systemlog + " " + action + " > /dev/null 2>&1"; - } - - // take action on syslog service - writeLog("syslogAction cmd: " + cmd, LOG_TYPE_DEBUG ); - system(cmd.c_str()); - - // delay to give time for syslog to get up and going - sleep(2); -#endif -} - -/****************************************************************************************** -* @brief dbrmctl -* -* purpose: call dbrm control -* -******************************************************************************************/ -void Oam::dbrmctl(std::string command) -{ - //reload DBRM with new configuration - string cmd = "dbrmctl " + command + " > /dev/null 2>&1"; - system(cmd.c_str()); - - return; -} - -/****************************************************************************************** -* @brief glusterctl -* -* purpose: gluster control and glusteradd -* -* commands: status - Used to check status of gluster and disk good/bad -* returns -* NOTINSTALLED -* OK -* FAILED! erorrmsg -* setddc - Set Number of gluster disk copies -* argument = # -* returns -* NOTINSTALLED -* OK -* FAILED! erorrmsg -* assign - Used to assign a dbroot to a primary pm -* argument1 = dbroot# -* argument2 = pm# -* returns -* NOTINSTALLED -* OK -* FAILED! erorrmsg -* unassign - Used to assign a dbroot to a primary pm -* argument1 = dbroot# -* argument2 = pm# -* returns -* NOTINSTALLED -* OK -* FAILED! erorrmsg -* whohas - Used to get secondary pm for a dbroot for moving -* argument1 = dbroot# -* return argument #2 - pm# -* returns -* NOTINSTALLED -* OK -* FAILED! erorrmsg -* add - Used to add new gluster pm and dbroot -* argument1 = firstnewpm# -* argument2 = firstnewdbroot# -* returns -* NOTINSTALLED -* OK -* FAILED! erorrmsg -* delete - Used to delete dbroot volumes -* argument1 = dbroot# -* returns -* NOTINSTALLED -* OK -* FAILED! erorrmsg -* -******************************************************************************************/ -int Oam::glusterctl(GLUSTER_COMMANDS command, std::string argument1, std::string& argument2, std::string& errmsg) -{ -#ifndef _MSC_VER - int user, group; - user = getuid(); - group = getgid(); - - string glustercmd = "gluster "; - - if (user != 0) - { - glustercmd = "sudo " + glustercmd; - } - - errmsg = ""; - - switch ( command ) - { - case (oam::GLUSTER_STATUS): - { - string command = glustercmd + "volume status"; - - char buffer[128]; - string result = ""; - FILE* pipe = popen(command.c_str(), "r"); - - if (!pipe) exceptionControl("GLUSTER_STATUS", API_FAILURE, "popen() failed"); - - try - { - while (!feof(pipe)) - { - if (fgets(buffer, 128, pipe) != NULL) - result += buffer; - } - } - catch (...) - { - exceptionControl("GLUSTER_STATUS", API_FAILURE); - throw; - } - - pclose(pipe); - argument2 = result; - return 0; - } - - case (oam::GLUSTER_SETDDC): - { - // The way this was implemented it doesn't seem possible to actually change the value.. - return 0; - } - - case (oam::GLUSTER_ASSIGN): - { - string dbrootID = argument1; - string pm = argument2; - - // build and send msg - int returnStatus = sendMsgToProcMgr(GLUSTERASSIGN, pm, FORCEFUL, ACK_YES, dbrootID); - - if (returnStatus != API_SUCCESS) - exceptionControl("GLUSTER_ASSIGN", returnStatus); - - break; - } - - case (oam::GLUSTER_UNASSIGN): - { - string dbrootID = argument1; - string pm = argument2; - - // build and send msg - int returnStatus = sendMsgToProcMgr(GLUSTERUNASSIGN, pm, FORCEFUL, ACK_YES, dbrootID); - - if (returnStatus != API_SUCCESS) - exceptionControl("GLUSTER_UNASSIGN", returnStatus); - - break; - } - - case (oam::GLUSTER_WHOHAS): - { - - // Script returned a string of space separated PMIDs everywhere this is called it expects that return form. - // When configuring or adding Modules we need to update DataRedundancyConfig section of configuration file. - Config* sysConfig = Config::makeConfig(CalpontConfigFile.c_str()); - string dbrootID = argument1; - string moduleDBRootIDPMs = "DBRoot" + dbrootID + "PMs"; - string msg = sysConfig->getConfig("DataRedundancyConfig", moduleDBRootIDPMs); - - if (msg.empty()) - { - exceptionControl("glusterctl", API_FAILURE); - } - - argument2 = msg; - return 0; - break; - } - - case (oam::GLUSTER_ADD): - { - int pmID = atoi(argument1.c_str()); - int dbrootID = atoi(argument2.c_str()); - string password = errmsg; - string command = ""; - int status; - writeLog("glusterctl: GLUSTER_ADD: dbroot = " + argument2 + " pm = " + argument1, LOG_TYPE_DEBUG ); - - Config* sysConfig = Config::makeConfig(); - ModuleTypeConfig moduletypeconfig; - getSystemConfig("pm", moduletypeconfig); - int dataRedundancyCopies; - int dbrootCount; - getSystemConfig("DataRedundancyCopies", dataRedundancyCopies); - getSystemConfig("DBRootCount", dbrootCount); - - int numberPMs = moduletypeconfig.ModuleCount; - int numberNewPMs = numberPMs - pmID + 1; - int numberNewDBRoots = (dbrootCount - dbrootID) + 1; - int numberDBRootsPerPM = numberNewDBRoots / numberNewPMs; - - utils::VLArray > dbrootPms(dbrootCount); - utils::VLArray DataRedundancyConfigs(numberPMs); - int startDBRootID = dbrootID; - - for (int pm = (pmID - 1); pm < numberPMs; pm++, startDBRootID++) - { - DataRedundancyConfigs[pm].pmID = (pm + 1); - string moduleHostName = "ModuleHostName" + itoa(pm + 1) + "-1-3"; - string moduleIpAddr = "ModuleIPAddr" + itoa(pm + 1) + "-1-3"; - - DataRedundancyConfigs[pm].pmHostname = sysConfig->getConfig("DataRedundancyConfig", moduleHostName); - DataRedundancyConfigs[pm].pmIpAddr = sysConfig->getConfig("DataRedundancyConfig", moduleIpAddr); - - if (DataRedundancyConfigs[pm].pmHostname.empty()) - { - DataRedundancyConfigs[pm].pmHostname = sysConfig->getConfig("SystemModuleConfig", moduleHostName); - } - - if (DataRedundancyConfigs[pm].pmIpAddr.empty()) - { - DataRedundancyConfigs[pm].pmIpAddr = sysConfig->getConfig("SystemModuleConfig", moduleIpAddr); - } - - int nextPM = (pm + 1); - int dbrootCopy = startDBRootID; - - if (nextPM >= numberPMs) - { - nextPM = (pmID - 1); - } - - for ( int i = 0; i < numberDBRootsPerPM; i++) - { - DataRedundancyConfigs[pm].dbrootCopies.push_back(dbrootCopy); - - for ( int copies = 0; copies < (dataRedundancyCopies - 1); copies++) - { - DataRedundancyConfigs[nextPM].dbrootCopies.push_back(dbrootCopy); - nextPM++; - - if (nextPM >= numberPMs) - { - nextPM = (pmID - 1); - } - - if (nextPM == pm) - { - nextPM++; - } - - if (nextPM >= numberPMs) - { - nextPM = (pmID - 1); - } - } - - dbrootCopy += numberNewPMs; - } - } - - for (int db = (dbrootID - 1); db < dbrootCount; db++) - { - int newDBrootID = db + 1; - string dbrootIDPMs = ""; - string moduleDBRootIDPMs = "DBRoot" + itoa(newDBrootID) + "PMs"; - - for (int pm = (pmID - 1); pm < numberPMs; pm++) - { - if (find(DataRedundancyConfigs[pm].dbrootCopies.begin(), DataRedundancyConfigs[pm].dbrootCopies.end(), newDBrootID) != DataRedundancyConfigs[pm].dbrootCopies.end()) - { - dbrootPms[db].push_back(DataRedundancyConfigs[pm].pmID); - dbrootIDPMs += itoa(DataRedundancyConfigs[pm].pmID); - dbrootIDPMs += " "; - } - } - - // Store to config and distribute so that GLUSTER_WHOHAS will work - sysConfig->setConfig("DataRedundancyConfig", moduleDBRootIDPMs, dbrootIDPMs); - } - - for (int pm = (pmID - 1); pm < numberPMs; pm++) - { - cout << "gluster peer probe " + DataRedundancyConfigs[pm].pmIpAddr << endl; - status = system(command.c_str()); - - if (WEXITSTATUS(status) != 0 ) - { - cout << "ERROR: peer probe command failed." << endl; - command = "remote_command.sh " + DataRedundancyConfigs[pm].pmIpAddr + " " + password + " 'stat /var/run/glusterd.pid > /dev/null 2>&1'"; - status = system(command.c_str()); - - if (WEXITSTATUS(status) != 0 ) - { - cout << "ERROR: No glusterd process detected at " << DataRedundancyConfigs[pm].pmIpAddr << "." << endl; - cout << " Start and enable glusterd at " << DataRedundancyConfigs[pm].pmIpAddr << "." << endl; - } - - exceptionControl("GLUSTER_ADD", API_FAILURE); - } - } - - sleep(5); - command = glustercmd + "peer status " + " >> /tmp/glusterCommands.txt 2>&1"; - status = system(command.c_str()); - - if (WEXITSTATUS(status) != 0 ) - { - cout << "ERROR: command failed: " << command << endl; - exit(1); - } - - //Need to wait since peer probe success does not always mean it is ready for volume create command - sleep(10); - utils::VLArray pmnextbrick(numberPMs); - - for (int pm = (pmID - 1); pm < numberPMs; pm++) - { - pmnextbrick[pm] = 1; - } - - for (int db = (dbrootID - 1); db < dbrootCount; db++) - { - int newDbrootID = db + 1; - - command = glustercmd + "volume create dbroot" + itoa(newDbrootID) + " transport tcp replica " + itoa(dataRedundancyCopies) + " "; - - vector::iterator dbrootPmIter = dbrootPms[db].begin(); - - for (; dbrootPmIter < dbrootPms[db].end(); dbrootPmIter++ ) - { - int pm = (*dbrootPmIter) - 1; - command += DataRedundancyConfigs[pm].pmIpAddr + ":/var/lib/columnstore/gluster/brick" + itoa(pmnextbrick[pm]) + " "; - pmnextbrick[pm]++; - } - - command += "force >> /tmp/glusterCommands.txt 2>&1"; - cout << "Gluster create and start volume dbroot" << itoa(newDbrootID) << "..."; - status = system(command.c_str()); - - if (WEXITSTATUS(status) != 0 ) - { - writeLog("ERROR: command failed: " + command, LOG_TYPE_DEBUG); - exceptionControl("GLUSTER_ADD", API_FAILURE); - } - - if (user != 0) - { - command = "sudo gluster volume set dbroot" + itoa(newDbrootID) + " storage.owner-uid " + itoa(user) + " >> /tmp/glusterCommands.txt 2>&1";; - status = system(command.c_str()); - - if (WEXITSTATUS(status) != 0 ) - { - writeLog("ERROR: command failed: ", LOG_TYPE_DEBUG); - exceptionControl("GLUSTER_ADD", API_FAILURE); - } - - command = "sudo gluster volume set dbroot" + itoa(newDbrootID) + " storage.owner-gid " + itoa(group) + " >> /tmp/glusterCommands.txt 2>&1";; - status = system(command.c_str()); - - if (WEXITSTATUS(status) != 0 ) - { - writeLog("ERROR: command failed: ", LOG_TYPE_DEBUG); - exceptionControl("GLUSTER_ADD", API_FAILURE); - } - } - - command = glustercmd + "volume start dbroot" + itoa(newDbrootID) + " >> /tmp/glusterCommands.txt 2>&1"; - status = system(command.c_str()); - - if (WEXITSTATUS(status) != 0 ) - { - writeLog("ERROR: command failed: ", LOG_TYPE_DEBUG); - exceptionControl("GLUSTER_ADD", API_FAILURE); - } - - cout << "DONE" << endl; - } - - try - { - sysConfig->write(); - } - catch (...) - { - exceptionControl("sysConfig->write", API_FAILURE); - } - - distributeConfigFile("system"); - - for (int pm = (pmID - 1); pm < numberPMs; pm++) - { - for (int i = 0; i < numberDBRootsPerPM; i++) - { - string ModuleDBRootID = "ModuleDBRootID" + itoa(pm + 1) + "-" + itoa(i + 1) + "-3"; - string dbr = sysConfig->getConfig("SystemModuleConfig", ModuleDBRootID); - string command = "" + DataRedundancyConfigs[pm].pmIpAddr + - ":/dbroot" + dbr + " /var/lib/columnstore/data" + dbr + - " glusterfs defaults,direct-io-mode=enable 0 0"; - string toPM = "pm" + itoa(pm + 1); - distributeFstabUpdates(command, toPM); - } - } - - break; - } - - case (oam::GLUSTER_DELETE): - { - string dbrootID = argument1; - string command = ""; - int status; - writeLog("glusterctl: GLUSTER_DELETE: dbroot = " + dbrootID, LOG_TYPE_DEBUG ); - - command = glustercmd + "--mode=script volume stop dbroot" + dbrootID + " >> /tmp/glusterCommands.txt 2>&1"; - - status = system(command.c_str()); - - if (WEXITSTATUS(status) != 0 ) - { - writeLog("ERROR: command failed: ", LOG_TYPE_DEBUG); - exceptionControl("GLUSTER_DELETE", API_FAILURE); - } - - // give time for transaction to finish after stopping - sleep(10); - - command = glustercmd + " --mode=script volume delete dbroot" + dbrootID + " >> /tmp/glusterCommands.txt 2>&1"; - - status = system(command.c_str()); - - if (WEXITSTATUS(status) != 0 ) - { - writeLog("ERROR: command failed: ", LOG_TYPE_DEBUG); - exceptionControl("GLUSTER_DELETE", API_FAILURE); - } - - break; - } - - case (oam::GLUSTER_PEERPROBE): - { - string ipAddress = argument1; - string password = argument2; - string command = ""; - int status; - - command = glustercmd + "peer probe " + ipAddress + " >> /tmp/glusterCommands.txt 2>&1"; - - cout << "gluster peer probe " + ipAddress << endl; - status = system(command.c_str()); - - if (WEXITSTATUS(status) != 0 ) - { - cout << "ERROR: peer probe command failed." << endl; - command = "remote_command.sh " + ipAddress + " " + password + " 'stat /var/run/glusterd.pid > /dev/null 2>&1'"; - status = system(command.c_str()); - - if (WEXITSTATUS(status) != 0 ) - { - cout << "ERROR: No glusterd process detected at " << ipAddress << "." << endl; - cout << " Start and enable glusterd at " << ipAddress << "." << endl; - } - - return 1; - } - - break; - } - - default: - break; - } - -#endif - return 0; -} - /****************************************************************************************** * @brief changeMyCnf * @@ -9311,348 +917,6 @@ bool Oam::changeMyCnf( std::string paramater, std::string value ) return true; } -/****************************************************************************************** -* @brief enableMySQLRep -* -* purpose: enable MySQL Replication on the system -* -******************************************************************************************/ -bool Oam::enableMySQLRep( std::string password ) -{ - // build and send msg - int returnStatus = sendMsgToProcMgr(ENABLEMYSQLREP, password, FORCEFUL, ACK_YES); - - if (returnStatus != API_SUCCESS) - exceptionControl("enableMySQLRep", returnStatus); - - return true; -} - -/****************************************************************************************** -* @brief disableMySQLRep -* -* purpose: enable MySQL Replication on the system -* -******************************************************************************************/ -bool Oam::disableMySQLRep() -{ - // build and send msg - int returnStatus = sendMsgToProcMgr(DISABLEMYSQLREP, oam::UnassignedName, FORCEFUL, ACK_YES); - - if (returnStatus != API_SUCCESS) - exceptionControl("disableMySQLRep", returnStatus); - - return true; -} - -/*************************************************************************** - * - * Function: checkGlusterLog - * - * Purpose: check Gluster Log after a Gluster control call - * - ****************************************************************************/ -int Oam::checkGlusterLog(std::string logFile, std::string& msg) -{ - if (checkLogStatus(logFile, "OK")) - { - if ( logFile == tmpdir + "/gluster_howhas.log" ) - { - ifstream File(logFile.c_str()); - - char line[100]; - string buf; - - while (File.getline(line, 100)) - { - buf = line; - string::size_type pos = buf.find("OK", 0); - - if (pos != string::npos) - { - msg = buf.substr(3, 100); - return 0; - } - } - - msg = ""; - return 1; - } - - msg = ""; - return 0; - } - - if (checkLogStatus(logFile, "NOTINSTALLED")) - { - writeLog("checkGlusterLog: NOTINSTALLED", LOG_TYPE_DEBUG ); - exceptionControl("glusterctl", API_DISABLED); - } - - if (checkLogStatus(logFile, "FAILED")) - { - ifstream File(logFile.c_str()); - - char line[100]; - string buf; - - while (File.getline(line, 100)) - { - buf = line; - string::size_type pos = buf.find("FAILED", 0); - - if (pos != string::npos) - { - msg = buf.substr(7, 100); - writeLog("checkGlusterLog: " + buf, LOG_TYPE_ERROR); - return 1; - } - } - - writeLog("checkGlusterLog: FAILURE", LOG_TYPE_ERROR); - - if ( logFile == tmpdir + "/gluster_howhas.log" ) - return 2; - else - exceptionControl("glusterctl", API_FAILURE); - } - - writeLog("checkGlusterLog: FAILURE - no log file match: " + logFile, LOG_TYPE_ERROR); - exceptionControl("glusterctl", API_FAILURE); - - return 1; -} - - -/****************************************************************************************** -* @brief getMySQLPassword -* -* purpose: check and get mysql user password -* -******************************************************************************************/ -std::string Oam::getMySQLPassword() -{ - string mysqlUser = "root"; - - string USER = "root"; - char* p = getenv("USER"); - - if (p && *p) - USER = p; - - string HOME = "/root"; - p = getenv("HOME"); - - if (p && *p) - HOME = p; - - string fileName = HOME + "/.my.cnf"; - - writeLog("getMySQLPassword: checking: " + fileName, LOG_TYPE_DEBUG); - - ifstream file (fileName.c_str()); - - if (!file) - { - writeLog("getMySQLPassword: doesn't exist: " + fileName, LOG_TYPE_DEBUG); - return oam::UnassignedName; - } - - char line[400]; - string buf; - - while (file.getline(line, 400)) - { - buf = line; - string::size_type pos = buf.find(mysqlUser, 0); - - if (pos != string::npos) - { - file.getline(line, 400); - buf = line; - - pos = buf.find("password", 0); - - if (pos != string::npos) - { - string::size_type pos1 = buf.find("=", pos); - - if (pos1 != string::npos) - { - //password found - - string password = buf.substr(pos1 + 1, 80); - password.erase(remove_if(password.begin(), password.end(), ::isspace), password.end()); - - writeLog("getMySQLPassword: password found", LOG_TYPE_DEBUG); - return password; - } - } - - break; - } - } - - file.close(); - - writeLog("getMySQLPassword: no password found", LOG_TYPE_DEBUG); - exceptionControl("getMySQLPassword", API_FAILURE); - - return oam::UnassignedName; -} - - -/****************************************************************************************** -* @brief updateFstab -* -* purpose: Update Fstabs for Amazon EBS setup -* -******************************************************************************************/ -std::string Oam::updateFstab(std::string device, std::string dbrootID) -{ - writeLog("updateFstab called: " + device + ":" + dbrootID, LOG_TYPE_DEBUG ); - - //check if entry already exist - int user; - user = getuid(); - - string entry; - - if (user == 0) - entry = device + " /var/lib/columnstore/data" + dbrootID + " ext2 noatime,nodiratime,noauto 0 0"; - else - entry = device + " /var/lib/columnstore/data" + dbrootID + " ext2 noatime,nodiratime,noauto,user 0 0"; - - string cmd; - - cmd = "grep /data" + dbrootID + " /etc/fstab > /dev/null 2>&1"; - - int status = system(cmd.c_str()); - - if (WEXITSTATUS(status) != 0 ) - { - //chmod before update, used on amazon ami EBS. not other systems - system("sudo chmod 666 /etc/fstab"); - - //update local fstab - cmd = "echo " + entry + " >> /etc/fstab"; - - system(cmd.c_str()); - } - - cmd = "grep /data" + dbrootID + "/var/lib/columnstore/local/etc/pm1/fstab > /dev/null 2>&1"; - - status = system(cmd.c_str()); - - if (WEXITSTATUS(status) != 0 ) - { - //use from addmodule later - cmd = "touch /var/lib/columnstore/local/etc/pm1/fstab;echo " + entry + " >> /var/lib/columnstore/local/etc/pm1/fstab"; - system(cmd.c_str()); - } - - return entry; -} - -/****************************************************************************************** -* @brief waitForActive -* -* purpose: wait for system to be active -* -******************************************************************************************/ -void Oam::waitForActive() -{ - SystemStatus systemstatus; - SystemProcessStatus systemprocessstatus; - bool bfirst = true; - int dot = 0; - - for (int i = 0 ; i < 120 ; i ++, dot ++) - { - sleep (3); - - try - { - getSystemStatus(systemstatus); - - if (systemstatus.SystemOpState == ACTIVE) - { - BRM::DBRM dbrm; - - try - { - int rc = dbrm.getSystemQueryReady(); - - if (rc == -1 ) - { - writeLog("waitForActive: getSystemQueryReady error return: startSystem failed", LOG_TYPE_ERROR); - exceptionControl("waitForActive", API_FAILURE); - } - - if ( rc != 0 ) - return; - - writeLog("waitForActive: getSystemQueryReady not ready", LOG_TYPE_DEBUG); - } - catch (...) - {} - } - - if (systemstatus.SystemOpState == FAILED) - { - exceptionControl("waitForActive", API_FAILURE); - } - - if (systemstatus.SystemOpState == MAN_OFFLINE) - { - exceptionControl("waitForActive", API_FAILURE); - } - - if (dot >= 3 ) - { - cout << "." << flush; - dot = 0; - } - - // Check DMLProc for a switch to BUSY_INIT. - // In such a case, we need to print a message that rollbacks - // are occurring and will take some time. - if (bfirst) // Once we've printed our message, no need to waste cpu looking - { - getProcessStatus(systemprocessstatus); - - for (unsigned int i = 0 ; i < systemprocessstatus.processstatus.size(); i++) - { - if (systemprocessstatus.processstatus[i].ProcessName == "DMLProc") - { - if (systemprocessstatus.processstatus[i].ProcessOpState == oam::ROLLBACK_INIT) - { - cout << endl << endl << " System Not Ready, DMLProc is checking/processing rollback of abandoned transactions. Processing could take some time, please wait..." << flush; - bfirst = false; - } - - // At this point, we've found our DMLProc, so there's no need to spin the for loop - // any further. - break; - } - } - } - } - catch (...) - { - // At some point, we need to give up, ProcMon just isn't going to respond. - if (i > 60) // 3 minutes - { - cout << endl << endl << "TIMEOUT: ProcMon not responding to getSystemStatus"; - break; - } - } - } - - exceptionControl("waitForActive", API_FAILURE); -} - - /*************************************************************************** * PRIVATE FUNCTIONS ***************************************************************************/ @@ -9769,39 +1033,6 @@ void Oam::exceptionControl(std::string function, int returnStatus, const char* e throw runtime_error(msg); } -/*************************************************************************** - * - * Function: getFreeSpace - * - * Purpose: get free disk space in bytes - * - ****************************************************************************/ -double Oam::getFreeSpace(std::string path) -{ - double free_space = 0.0; -#ifdef _MSC_VER - ULARGE_INTEGER freeBytesAvail; - - if (GetDiskFreeSpaceEx(path.c_str(), &freeBytesAvail, 0, 0) != 0) - free_space = (double)freeBytesAvail.QuadPart; - -#else - struct statfs statBuf; - - if ( statfs(path.c_str(), &statBuf) == 0) - { - free_space = ((double)statBuf.f_bavail) * ((double)statBuf.f_bsize); - return free_space; - } - else - { - exceptionControl("statvfs failed", API_FAILURE ); - } - -#endif - return free_space; -} - /*************************************************************************** * * Function: itoa @@ -9816,1473 +1047,5 @@ std::string Oam::itoa(const int i) ss << i; return ss.str(); } - -/*************************************************************************** - * - * Function: sendMsgToProcMgr - * - * Purpose: Build and send request message to Process Manager - * - ****************************************************************************/ - -int Oam::sendMsgToProcMgr(messageqcpp::ByteStream::byte requestType, const std::string name, - GRACEFUL_FLAG gracefulflag, ACK_FLAG ackflag, const std::string argument1, - const std::string argument2, int timeout) -{ - if (!checkSystemRunning()) - return API_CONN_REFUSED; - - int returnStatus = API_SUCCESS; //default - ByteStream msg; - ByteStream receivedMSG; - ByteStream::byte msgType; - ByteStream::byte actionType; - string target; - ByteStream::byte status; - - // get current requesting process, an error will occur if process is a UI tool (not kept in Status Table) - // this will be used to determine if this is a manually or auto request down within Process-Monitor - bool requestManual; - myProcessStatus_t t; - - try - { - t = getMyProcessStatus(); - requestManual = false; // set to auto - } - catch (...) - { - requestManual = true; // set to manual - } - - // setup message - msg << (ByteStream::byte) REQUEST; - msg << requestType; - msg << name; - msg << (ByteStream::byte) gracefulflag; - msg << (ByteStream::byte) ackflag; - msg << (ByteStream::byte) requestManual; - - if (!argument1.empty()) - msg << argument1; - - if (!argument2.empty()) - msg << argument2; - - try - { - //send the msg to Process Manager - MessageQueueClient procmgr("ProcMgr"); - procmgr.write(msg); - - // check for Ack msg if needed - if ( ackflag == ACK_YES ) - { - // wait for ACK from Process Manager - struct timespec ts = { timeout, 0 }; - - receivedMSG = procmgr.read(&ts); - - if (receivedMSG.length() > 0) - { - receivedMSG >> msgType; - receivedMSG >> actionType; - receivedMSG >> target; - receivedMSG >> status; - - if ( msgType == oam::ACK && actionType == requestType && target == name) - { - // ACK for this request - returnStatus = status; - } - } - else // timeout - returnStatus = API_TIMEOUT; - } - else - // No ACK, assume write success - returnStatus = API_SUCCESS; - - // shutdown connection - procmgr.shutdown(); - } - catch (std::runtime_error&) - { - //There's other reasons, but this is the most likely... - returnStatus = API_CONN_REFUSED; - } - catch (std::exception&) - { - returnStatus = API_FAILURE; - } - catch (...) - { - returnStatus = API_FAILURE; - } - - return returnStatus; -} - - -/*************************************************************************** - * - * Function: sendMsgToProcMgr2 - * - * Purpose: Build and send request message to Process Manager - * - ****************************************************************************/ - -int Oam::sendMsgToProcMgr2(messageqcpp::ByteStream::byte requestType, DeviceNetworkList devicenetworklist, - GRACEFUL_FLAG gracefulflag, ACK_FLAG ackflag, const std::string password, const std::string mysqlpw) -{ - if (!checkSystemRunning()) - return API_CONN_REFUSED; - - int returnStatus = API_TIMEOUT; //default - ByteStream msg; - ByteStream receivedMSG; - ByteStream::byte msgType; - ByteStream::byte actionType; - ByteStream::byte status; - - // get current requesting process, an error will occur if process is a UI tool (not kept in Status Table) - // this will be used to determine if this is a manually or auto request down within Process-Monitor - bool requestManual; - myProcessStatus_t t; - - try - { - t = getMyProcessStatus(); - requestManual = false; // set to auto - } - catch (...) - { - requestManual = true; // set to manual - } - - // setup message - msg << (ByteStream::byte) REQUEST; - msg << requestType; - msg << (std::string) " "; - msg << (ByteStream::byte) gracefulflag; - msg << (ByteStream::byte) ackflag; - msg << (ByteStream::byte) requestManual; - - msg << (uint16_t) devicenetworklist.size(); - - DeviceNetworkList::iterator pt = devicenetworklist.begin(); - - for ( ; pt != devicenetworklist.end() ; pt++) - { - msg << (*pt).DeviceName; - - if ( (*pt).UserTempDeviceName.empty() ) - msg << " "; - else - msg << (*pt).UserTempDeviceName; - - if ( (*pt).DisableState.empty() ) - msg << " "; - else - msg << (*pt).DisableState; - - msg << (uint16_t) (*pt).hostConfigList.size(); - - HostConfigList::iterator pt1 = (*pt).hostConfigList.begin(); - - for ( ; pt1 != (*pt).hostConfigList.end() ; pt1++) - { - msg << (*pt1).IPAddr; - msg << (*pt1).HostName; - msg << (*pt1).NicID; - } - } - - msg << password; - msg << mysqlpw; - - try - { - //send the msg to Process Manager - MessageQueueClient procmgr("ProcMgr"); - procmgr.write(msg); - - // check for Ack msg if needed - if ( ackflag == ACK_YES ) - { - // wait 15 minutes for ACK from Process Manager - struct timespec ts = { 900, 0 }; - - receivedMSG = procmgr.read(&ts); - - if (receivedMSG.length() > 0) - { - receivedMSG >> msgType; - receivedMSG >> actionType; - receivedMSG >> status; - - if ( msgType == oam::ACK && actionType == requestType) - { - // ACK for this request - returnStatus = status; - } - } - else // timeout - returnStatus = API_TIMEOUT; - } - else - // No ACK, assume write success - returnStatus = API_SUCCESS; - - // shutdown connection - procmgr.shutdown(); - } - catch (...) - { - returnStatus = API_FAILURE; - } - - return returnStatus; -} - -/* A slightly different version of sendMsgToProcMgr2. Add-module needs to send one add'l - parameter, and this was the best of a couple bad options. */ -int Oam::sendAddModuleToProcMgr(messageqcpp::ByteStream::byte requestType, DeviceNetworkList devicenetworklist, - GRACEFUL_FLAG gracefulflag, ACK_FLAG ackflag, bool storeHostnames, const std::string password, - const std::string mysqlpw) -{ - if (!checkSystemRunning()) - return API_CONN_REFUSED; - - int returnStatus = API_TIMEOUT; //default - ByteStream msg; - ByteStream receivedMSG; - ByteStream::byte msgType; - ByteStream::byte actionType; - ByteStream::byte status; - - // get current requesting process, an error will occur if process is a UI tool (not kept in Status Table) - // this will be used to determine if this is a manually or auto request down within Process-Monitor - bool requestManual; - myProcessStatus_t t; - - try - { - t = getMyProcessStatus(); - requestManual = false; // set to auto - } - catch (...) - { - requestManual = true; // set to manual - } - - // setup message - msg << (ByteStream::byte) REQUEST; - msg << requestType; - msg << (std::string) " "; - msg << (ByteStream::byte) gracefulflag; - msg << (ByteStream::byte) ackflag; - msg << (ByteStream::byte) requestManual; - msg << (uint8_t) storeHostnames; - msg << (uint16_t) devicenetworklist.size(); - - DeviceNetworkList::iterator pt = devicenetworklist.begin(); - - for ( ; pt != devicenetworklist.end() ; pt++) - { - msg << (*pt).DeviceName; - - if ( (*pt).UserTempDeviceName.empty() ) - msg << " "; - else - msg << (*pt).UserTempDeviceName; - - if ( (*pt).DisableState.empty() ) - msg << " "; - else - msg << (*pt).DisableState; - - msg << (uint16_t) (*pt).hostConfigList.size(); - - HostConfigList::iterator pt1 = (*pt).hostConfigList.begin(); - - for ( ; pt1 != (*pt).hostConfigList.end() ; pt1++) - { - msg << (*pt1).IPAddr; - msg << (*pt1).HostName; - msg << (*pt1).NicID; - } - } - - msg << password; - msg << mysqlpw; - - try - { - //send the msg to Process Manager - MessageQueueClient procmgr("ProcMgr"); - procmgr.write(msg); - - // check for Ack msg if needed - if ( ackflag == ACK_YES ) - { - // wait 15 minutes for ACK from Process Manager - struct timespec ts = { 900, 0 }; - - receivedMSG = procmgr.read(&ts); - - if (receivedMSG.length() > 0) - { - receivedMSG >> msgType; - receivedMSG >> actionType; - receivedMSG >> status; - - if ( msgType == oam::ACK && actionType == requestType) - { - // ACK for this request - returnStatus = status; - } - } - else // timeout - returnStatus = API_TIMEOUT; - } - else - // No ACK, assume write success - returnStatus = API_SUCCESS; - - // shutdown connection - procmgr.shutdown(); - } - catch (...) - { - returnStatus = API_FAILURE; - } - - return returnStatus; -} - - - -/*************************************************************************** - * - * Function: sendMsgToProcMgr3 - * - * Purpose: Build and send Alarm request message to Process Manager - * - ****************************************************************************/ - -int Oam::sendMsgToProcMgr3(messageqcpp::ByteStream::byte requestType, AlarmList& alarmlist, const std::string date) -{ - if (!checkSystemRunning()) - return API_CONN_REFUSED; - - int returnStatus = API_SUCCESS; //default - ByteStream msg; - ByteStream receivedMSG; - ByteStream::byte msgType; - ByteStream::byte actionType; - ByteStream::byte status; - - // setup message - msg << requestType; - msg << date; - - try - { - //send the msg to Process Manager - MessageQueueClient procmgr("ProcMgr"); - procmgr.write(msg); - - // wait 30 seconds for ACK from Process Manager - struct timespec ts = { 30, 0 }; - - receivedMSG = procmgr.read(&ts); - - if (receivedMSG.length() > 0) - { - receivedMSG >> msgType; - receivedMSG >> actionType; - receivedMSG >> status; - - if ( msgType == oam::ACK && actionType == requestType && status == API_SUCCESS ) - { - ByteStream::byte numAlarms; - - while (true) - { - //number of alarms - receivedMSG >> numAlarms; - - //check for end-of-list - if ( numAlarms == 0) - break; - - for ( int i = 0 ; i < numAlarms ; i++ ) - { - Alarm alarm; - ByteStream::doublebyte value; - string svalue; - - receivedMSG >> value; - alarm.setAlarmID(value); - receivedMSG >> svalue; - alarm.setDesc(svalue); - receivedMSG >> value; - alarm.setSeverity(value); - receivedMSG >> svalue; - alarm.setTimestamp(svalue); - receivedMSG >> svalue; - alarm.setSname(svalue); - receivedMSG >> svalue; - alarm.setPname(svalue); - receivedMSG >> svalue; - alarm.setComponentID(svalue); - - alarmlist.insert (AlarmList::value_type(alarm.getTimestampSeconds(), alarm)); - } - - break; - } - } - else - returnStatus = API_FAILURE; - } - else // timeout - returnStatus = API_TIMEOUT; - - // shutdown connection - procmgr.shutdown(); - } - catch (...) - { - returnStatus = API_FAILURE; - } - - return returnStatus; -} - -/*************************************************************************** - * - * Function: sendMsgToProcMgrWithStatus - * - * Purpose: Build and send a request message to Process Manager - * Check for status messages and display on stdout. - * - * This is used only in manual mode. - * - ****************************************************************************/ - -int Oam::sendMsgToProcMgrWithStatus(messageqcpp::ByteStream::byte requestType, const std::string name, - GRACEFUL_FLAG gracefulflag, ACK_FLAG ackflag, - const std::string argument1, const std::string argument2, int timeout) -{ - if (!checkSystemRunning()) - return API_CONN_REFUSED; - - int returnStatus = API_STILL_WORKING; - ByteStream msg; - ByteStream receivedMSG; - ByteStream::byte msgType; - ByteStream::byte actionType; - string target; - ByteStream::byte status; - struct timespec ts = {timeout, 0}; - bool requestManual = true; - std::stringstream buffer; - BRM::DBRM dbrm; -#ifndef _MSC_VER - struct sigaction ctrlcHandler; - struct sigaction oldCtrlcHandler; - memset(&ctrlcHandler, 0, sizeof(ctrlcHandler)); -#endif - // setup message - msg << (ByteStream::byte) REQUEST; - msg << requestType; - msg << name; - msg << (ByteStream::byte) gracefulflag; - msg << (ByteStream::byte) ackflag; - msg << (ByteStream::byte) requestManual; - - if (!argument1.empty()) - msg << argument1; - - if (!argument2.empty()) - msg << argument2; - - if (gracefulflag == GRACEFUL_WAIT) - { - // Control-C signal to terminate the shutdown command - ctrlc = 0; -#ifdef _MSC_VER - //FIXME: -#else - ctrlcHandler.sa_handler = handleControlC; - sigaction(SIGINT, &ctrlcHandler, &oldCtrlcHandler); -#endif - } - - try - { - //send the msg to Process Manager - MessageQueueClient procmgr("ProcMgr"); - procmgr.write(msg); - - // check for Ack msg if needed - if (ackflag == ACK_YES) - { - while (returnStatus == API_STILL_WORKING) - { - // wait for ACK from Process Manager - receivedMSG = procmgr.read(&ts); - - // If user hit ctrl-c, we've been cancelled - if (ctrlc == 1) - { - writeLog("Clearing System Shutdown pending", LOG_TYPE_INFO ); - dbrm.setSystemShutdownPending(false); - dbrm.setSystemSuspendPending(false); - returnStatus = API_CANCELLED; - break; - } - - if (receivedMSG.length() > 0) - { - receivedMSG >> msgType; - receivedMSG >> actionType; - receivedMSG >> target; - receivedMSG >> status; - - if ( msgType == oam::ACK && actionType == requestType && target == name) - { - if (status == API_TRANSACTIONS_COMPLETE) - { - cout << endl << " System being " << name << ", please wait..." << flush; - - // More work to wait on.... - // At this point, the requirement is to have ctrl-c drop us out of calpont console - // so we'll restore the handler to default. - if (gracefulflag == GRACEFUL_WAIT) - { -#ifdef _MSC_VER - //FIXME: -#else - sigaction(SIGINT, &oldCtrlcHandler, NULL); -#endif - } - } - else - { - returnStatus = status; - } - } - - if (returnStatus == API_STILL_WORKING) - { - cout << "." << flush; - } - } - else // timeout - { - returnStatus = API_TIMEOUT; - } - } - } - else - { - // No ACK, assume write success - returnStatus = API_SUCCESS; - } - - // shutdown connection - procmgr.shutdown(); - } - catch (std::runtime_error&) - { - //There's other reasons, but this is the most likely... - returnStatus = API_CONN_REFUSED; - } - catch (std::exception&) - { - returnStatus = API_FAILURE; - } - catch (...) - { - returnStatus = API_FAILURE; - } - - if (gracefulflag == GRACEFUL_WAIT) - { - // Just in case we errored out and bypassed the normal restore, - // restore ctrl-c to previous handler. -#ifdef _MSC_VER - //FIXME: -#else - sigaction(SIGINT, &oldCtrlcHandler, NULL); -#endif - } - - return returnStatus; -} - -/*************************************************************************** - * - * Function: validateProcess - * - * Purpose: Validate Process Name - * - ****************************************************************************/ - -int Oam::validateProcess(const std::string moduleName, std::string processName) -{ - SystemProcessStatus systemprocessstatus; - ProcessStatus processstatus; - - try - { - getProcessStatus(systemprocessstatus); - - for ( unsigned int i = 0 ; i < systemprocessstatus.processstatus.size(); i++) - { - if ( systemprocessstatus.processstatus[i].Module == moduleName && - systemprocessstatus.processstatus[i].ProcessName == processName) - // found it - return API_SUCCESS; - } - } - catch (...) - { - return API_INVALID_PARAMETER; - } - - return API_INVALID_PARAMETER; -} - -/*************************************************************************** - * - * Function: sendStatusUpdate - * - * Purpose: Send Status Update to Process Monitor - * - ****************************************************************************/ - -void Oam::sendStatusUpdate(ByteStream obs, ByteStream::byte returnRequestType) -{ - if (!checkSystemRunning()) - return; - - for ( int i = 0 ; i < 5 ; i ++) - { - try - { - MessageQueueClient processor("ProcStatusControl"); -// processor.syncProto(false); - ByteStream ibs; - - try - { - struct timespec ts = { 3, 0 }; - processor.write(obs, &ts); - } - catch (...) - { - processor.shutdown(); - throw std::runtime_error("write error"); - } - - - try - { - struct timespec ts1 = { 15, 0 }; - ibs = processor.read(&ts1); - } - catch (...) - { - processor.shutdown(); - throw std::runtime_error("read error"); - } - - ByteStream::byte returnRequestType; - - if (ibs.length() > 0) - { - ibs >> returnRequestType; - processor.shutdown(); - return; - } - else - { - // timeout occurred, shutdown connection and retry - processor.shutdown(); - throw std::runtime_error("timeout"); - return; - } - } - catch (...) - {} - } - - return; -} - -/*************************************************************************** - * - * Function: amazonDetach - * - * Purpose: Amazon EC2 volume deattach needed - * - ****************************************************************************/ - -void Oam::amazonDetach(dbrootList dbrootConfigList) -{ - //if amazon cloud with external volumes, do the detach/attach moves - string cloud; - string DBRootStorageType; - - try - { - getSystemConfig("Cloud", cloud); - getSystemConfig("DBRootStorageType", DBRootStorageType); - } - catch (...) {} - - if ( (cloud == "amazon-ec2" || cloud == "amazon-vpc") && - DBRootStorageType == "external" ) - { - writeLog("amazonDetach function started ", LOG_TYPE_DEBUG ); - - dbrootList::iterator pt3 = dbrootConfigList.begin(); - - for ( ; pt3 != dbrootConfigList.end() ; pt3++) - { - string dbrootid = *pt3; - string volumeNameID = "PMVolumeName" + dbrootid; - string volumeName = oam::UnassignedName; - string deviceNameID = "PMVolumeDeviceName" + dbrootid; - string deviceName = oam::UnassignedName; - - try - { - getSystemConfig( volumeNameID, volumeName); - getSystemConfig( deviceNameID, deviceName); - } - catch (...) - {} - - if ( volumeName == oam::UnassignedName || deviceName == oam::UnassignedName ) - { - cout << " ERROR: amazonDetach, invalid configure " + volumeName + ":" + deviceName << endl; - writeLog("ERROR: amazonDetach, invalid configure " + volumeName + ":" + deviceName, LOG_TYPE_ERROR ); - exceptionControl("amazonDetach", API_INVALID_PARAMETER); - } - - //send msg to to-pm to umount volume - int returnStatus = sendMsgToProcMgr(UNMOUNT, dbrootid, FORCEFUL, ACK_YES); - - if (returnStatus != API_SUCCESS) - { - writeLog("ERROR: amazonDetach, umount failed on " + dbrootid, LOG_TYPE_ERROR ); - } - - if (!detachEC2Volume(volumeName)) - { - cout << " ERROR: amazonDetach, detachEC2Volume failed on " + volumeName << endl; - writeLog("ERROR: amazonDetach, detachEC2Volume failed on " + volumeName, LOG_TYPE_ERROR ); - exceptionControl("amazonDetach", API_FAILURE); - } - - writeLog("amazonDetach, detachEC2Volume passed on " + volumeName, LOG_TYPE_DEBUG ); - } - } -} - -/*************************************************************************** - * - * Function: amazonAttach - * - * Purpose: Amazon EC2 volume Attach needed - * - ****************************************************************************/ - -void Oam::amazonAttach(std::string toPM, dbrootList dbrootConfigList) -{ - //if amazon cloud with external volumes, do the detach/attach moves - string cloud; - string DBRootStorageType; - - try - { - getSystemConfig("Cloud", cloud); - getSystemConfig("DBRootStorageType", DBRootStorageType); - } - catch (...) {} - - if ( (cloud == "amazon-ec2" || cloud == "amazon-vpc") && - DBRootStorageType == "external" ) - { - writeLog("amazonAttach function started ", LOG_TYPE_DEBUG ); - - //get Instance Name for to-pm - string toInstanceName = oam::UnassignedName; - - try - { - ModuleConfig moduleconfig; - getSystemConfig(toPM, moduleconfig); - HostConfigList::iterator pt1 = moduleconfig.hostConfigList.begin(); - toInstanceName = (*pt1).HostName; - } - catch (...) - {} - - if ( toInstanceName == oam::UnassignedName || toInstanceName.empty() ) - { - cout << " ERROR: amazonAttach, invalid Instance Name for " << toPM << endl; - writeLog("ERROR: amazonAttach, invalid Instance Name " + toPM, LOG_TYPE_ERROR ); - exceptionControl("amazonAttach", API_INVALID_PARAMETER); - } - - dbrootList::iterator pt3 = dbrootConfigList.begin(); - - for ( ; pt3 != dbrootConfigList.end() ; pt3++) - { - string dbrootid = *pt3; - string volumeNameID = "PMVolumeName" + dbrootid; - string volumeName = oam::UnassignedName; - string deviceNameID = "PMVolumeDeviceName" + dbrootid; - string deviceName = oam::UnassignedName; - - try - { - getSystemConfig( volumeNameID, volumeName); - getSystemConfig( deviceNameID, deviceName); - } - catch (...) - {} - - if ( volumeName == oam::UnassignedName || deviceName == oam::UnassignedName ) - { - cout << " ERROR: amazonAttach, invalid configure " + volumeName + ":" + deviceName << endl; - writeLog("ERROR: amazonAttach, invalid configure " + volumeName + ":" + deviceName, LOG_TYPE_ERROR ); - exceptionControl("amazonAttach", API_INVALID_PARAMETER); - } - - if (!attachEC2Volume(volumeName, deviceName, toInstanceName)) - { - cout << " ERROR: amazonAttach, attachEC2Volume failed on " + volumeName + ":" + deviceName + ":" + toInstanceName << endl; - writeLog("ERROR: amazonAttach, attachEC2Volume failed on " + volumeName + ":" + deviceName + ":" + toInstanceName, LOG_TYPE_ERROR ); - exceptionControl("amazonAttach", API_FAILURE); - } - - writeLog("amazonAttach, attachEC2Volume passed on " + volumeName + ":" + toPM, LOG_TYPE_DEBUG ); - } - } -} - - -/*************************************************************************** -* -* Function: amazonReattach -* -* Purpose: Amazon EC2 volume reattach needed -* -****************************************************************************/ - -void Oam::amazonReattach(std::string toPM, dbrootList dbrootConfigList, bool attach) -{ - //if amazon cloud with external volumes, do the detach/attach moves - string cloud; - string DBRootStorageType; - - try - { - getSystemConfig("Cloud", cloud); - getSystemConfig("DBRootStorageType", DBRootStorageType); - } - catch (...) {} - - if ( (cloud == "amazon-ec2" || cloud == "amazon-vpc") && - DBRootStorageType == "external" ) - { - writeLog("amazonReattach function started ", LOG_TYPE_DEBUG ); - - //get Instance Name for to-pm - string toInstanceName = oam::UnassignedName; - - try - { - ModuleConfig moduleconfig; - getSystemConfig(toPM, moduleconfig); - HostConfigList::iterator pt1 = moduleconfig.hostConfigList.begin(); - toInstanceName = (*pt1).HostName; - } - catch (...) - {} - - if ( toInstanceName == oam::UnassignedName || toInstanceName.empty() ) - { - cout << " ERROR: amazonReattach, invalid Instance Name for " << toPM << endl; - writeLog("ERROR: amazonReattach, invalid Instance Name " + toPM, LOG_TYPE_ERROR ); - exceptionControl("amazonReattach", API_INVALID_PARAMETER); - } - - dbrootList::iterator pt3 = dbrootConfigList.begin(); - - for ( ; pt3 != dbrootConfigList.end() ; pt3++) - { - string dbrootid = *pt3; - string volumeNameID = "PMVolumeName" + dbrootid; - string volumeName = oam::UnassignedName; - string deviceNameID = "PMVolumeDeviceName" + dbrootid; - string deviceName = oam::UnassignedName; - - try - { - getSystemConfig( volumeNameID, volumeName); - getSystemConfig( deviceNameID, deviceName); - } - catch (...) - {} - - if ( volumeName == oam::UnassignedName || deviceName == oam::UnassignedName ) - { - cout << " ERROR: amazonReattach, invalid configure " + volumeName + ":" + deviceName << endl; - writeLog("ERROR: amazonReattach, invalid configure " + volumeName + ":" + deviceName, LOG_TYPE_ERROR ); - exceptionControl("amazonReattach", API_INVALID_PARAMETER); - } - - if (!attach) - { - //send msg to to-pm to umount volume - int returnStatus = sendMsgToProcMgr(UNMOUNT, dbrootid, FORCEFUL, ACK_YES); - - if (returnStatus != API_SUCCESS) - { - writeLog("ERROR: amazonReattach, umount failed on " + dbrootid, LOG_TYPE_ERROR ); - } - } - - if (!detachEC2Volume(volumeName)) - { - cout << " ERROR: amazonReattach, detachEC2Volume failed on " + volumeName << endl; - writeLog("ERROR: amazonReattach, detachEC2Volume failed on " + volumeName, LOG_TYPE_ERROR ); - exceptionControl("amazonReattach", API_FAILURE); - } - - writeLog("amazonReattach, detachEC2Volume passed on " + volumeName, LOG_TYPE_DEBUG ); - - if (!attachEC2Volume(volumeName, deviceName, toInstanceName)) - { - cout << " ERROR: amazonReattach, attachEC2Volume failed on " + volumeName + ":" + deviceName + ":" + toInstanceName << endl; - writeLog("ERROR: amazonReattach, attachEC2Volume failed on " + volumeName + ":" + deviceName + ":" + toInstanceName, LOG_TYPE_ERROR ); - exceptionControl("amazonReattach", API_FAILURE); - } - - writeLog("amazonReattach, attachEC2Volume passed on " + volumeName + ":" + toPM, LOG_TYPE_DEBUG ); - } - } -} - - -/*************************************************************************** - * - * Function: mountDBRoot - * - * Purpose: Send msg to ProcMon to mount/unmount a external DBRoot - * - ****************************************************************************/ - -void Oam::mountDBRoot(dbrootList dbrootConfigList, bool mount) -{ - //if external volumes, mount to device - string DBRootStorageType; - - try - { - getSystemConfig("DBRootStorageType", DBRootStorageType); - } - catch (...) {} - - // nothing to do here - if (DBRootStorageType == "storagemanager") - return; - - string DataRedundancyConfig = "n"; - - try - { - getSystemConfig( "DataRedundancyConfig", DataRedundancyConfig); - } - catch (...) - { - DataRedundancyConfig = "n"; - } - - if ( (DBRootStorageType == "external" && DataRedundancyConfig == "n") - || - (DataRedundancyConfig == "y" && !mount) ) - { - dbrootList::iterator pt3 = dbrootConfigList.begin(); - - for ( ; pt3 != dbrootConfigList.end() ; pt3++) - { - string dbrootid = *pt3; - - int mountCmd = oam::MOUNT; - - if (!mount) - { - mountCmd = oam::UNMOUNT; - writeLog("mountDBRoot api, umount dbroot" + dbrootid, LOG_TYPE_DEBUG); - } - else - writeLog("mountDBRoot api, mount dbroot" + dbrootid, LOG_TYPE_DEBUG); - - //send msg to to-pm to umount volume - int returnStatus = sendMsgToProcMgr(mountCmd, dbrootid, FORCEFUL, ACK_YES); - - if (returnStatus != API_SUCCESS) - { - if ( mountCmd == oam::MOUNT ) - { - writeLog("ERROR: mount failed on dbroot" + dbrootid, LOG_TYPE_ERROR ); - cout << " ERROR: mount failed on dbroot" + dbrootid << endl; - } - else - { - writeLog("ERROR: unmount failed on dbroot" + dbrootid, LOG_TYPE_ERROR ); - cout << " ERROR: unmount failed on dbroot" + dbrootid << endl; - exceptionControl("mountDBRoot", API_FAILURE); - } - } - } - } - - return; -} - -/****************************************************************************************** -* @brief writeLog -* -* purpose: Write the message to the log -* -******************************************************************************************/ -void Oam::writeLog(const string logContent, const LOG_TYPE logType) -{ - LoggingID lid(8); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add(logContent); - msg.format(args); - - switch (logType) - { - case LOG_TYPE_DEBUG: - ml.logDebugMessage(msg); - break; - - case LOG_TYPE_INFO: - ml.logInfoMessage(msg); - break; - - case LOG_TYPE_WARNING: - ml.logWarningMessage(msg); - break; - - case LOG_TYPE_ERROR: - ml.logErrorMessage(msg); - break; - - case LOG_TYPE_CRITICAL: - ml.logCriticalMessage(msg); - break; - } - - return; -} - -/*************************************************************************** - * - * Function: waitForSystem - * - * Purpose: When a Shutdown, stop, restart or suspend - * operation is requested but there are active - * transactions of some sort, We wait for all - * transactions to close before performing the - * action. - ****************************************************************************/ -bool Oam::waitForSystem(PROC_MGT_MSG_REQUEST request, messageqcpp::IOSocket& ios, messageqcpp::ByteStream& stillWorkingMsg) -{ - // Use ios to send back periodic still working messages - BRM::DBRM dbrm; - execplan::SessionManager sessionManager; - bool bIsDbrmUp; - BRM::SIDTIDEntry blockingsid; - std::vector tableLocks; - bool bActiveTransactions = true; - bool bRollback; - bool bForce; - bool ret = false; - size_t idx; - - try - { - while (bActiveTransactions) - { - sleep(3); - ios.write(stillWorkingMsg); - - bActiveTransactions = false; - // Any table locks still set? - tableLocks = dbrm.getAllTableLocks(); - - for (idx = 0; idx < tableLocks.size(); ++idx) - { - if (dbrm.checkOwner(tableLocks[idx].id)) - { - bActiveTransactions = true; - break; - } - } - - // Any active transactions? - if (sessionManager.checkActiveTransaction(0, bIsDbrmUp, blockingsid)) - { - bActiveTransactions = true; - } - - // check to see if the user canceled the request. - if (request == SUSPENDWRITES) - { - if (dbrm.getSystemSuspendPending(bRollback) == 0) // Means we no longer are going to suspend - { - writeLog("System Suspend Canceled in wait", LOG_TYPE_INFO ); - break; - } - } - else - { - if (dbrm.getSystemShutdownPending(bRollback, bForce) == 0) // Means we no longer are going to shutdown - { - writeLog("System Shutdown Canceled in wait", LOG_TYPE_INFO ); - break; - } - } - - if (!bActiveTransactions) - { - ret = true; - } - } - } - catch (...) - { - writeLog("Communication with MariaDB ColumnStore Admin console failed while waiting for transactions", LOG_TYPE_ERROR); - } - -// writeLog("Returning from wait with value " + itoa(ret), LOG_TYPE_INFO ); - return ret; -} - -int Oam::readHdfsActiveAlarms(AlarmList& alarmList) -{ - int returnStatus = API_FAILURE; - Alarm alarm; - - // the alarm file will be pushed to all nodes every 10 seconds, retry 1 second - for (int i = 0; i < 10 && returnStatus != API_SUCCESS; i++) - { - try - { - ifstream activeAlarm(ACTIVE_ALARM_FILE.c_str(), ios::in); - - if (!activeAlarm.is_open()) - { - // file may be temporary not available due to dpcp. - usleep(10000); - - activeAlarm.open(ACTIVE_ALARM_FILE.c_str(), ios::in); - - if (!activeAlarm.is_open()) - { - // still cannot open, treat as no activeAlarms file. - returnStatus = API_SUCCESS; - continue; - } - } - - // read from opened file. - while (!activeAlarm.eof() && activeAlarm.good()) - { - activeAlarm >> alarm; - - if (alarm.getAlarmID() != INVALID_ALARM_ID) - alarmList.insert (AlarmList::value_type(INVALID_ALARM_ID, alarm)); - } - - activeAlarm.close(); - - returnStatus = API_SUCCESS; - } - catch (...) - { - } - - // wait a second and try again - if (returnStatus != API_SUCCESS) - usleep (100000); - } - - return returnStatus; -} - -bool Oam::checkSystemRunning() -{ - struct stat st; - - string lockFileDir = "/var/subsys/lock"; - - try - { - Config* sysConfig = Config::makeConfig(CalpontConfigFile.c_str()); - lockFileDir = sysConfig->getConfig("Installation", "LockFileDirectory"); - } - catch (...) - {} // defaulted to false - - string lockFile = lockFileDir + "/columnstore"; - - if (stat(lockFile.c_str(), &st) == 0) - { - return true; - } - - if (geteuid() != 0) - { - // not root user - // The stat above may fail for non-root because of permissions - // This is a non-optimal solution - string cmd = "pgrep ProcMon > /dev/null 2>&1"; - - if (system(cmd.c_str()) == 0) - { - return true; - } - } - - return false; -} } //namespace oam - - -namespace procheartbeat -{ -/* - ProcHeartbeat::ProcHeartbeat() - {} - - ProcHeartbeat::~ProcHeartbeat() - {} -*/ -/******************************************************************** - * - * Register for Proc Heartbeat - * - ********************************************************************/ -/* - void ProcHeartbeat::registerHeartbeat(int ID) - { - Oam oam; - // get current Module name - string Module; - oamModuleInfo_t st; - try { - st = getModuleInfo(); - Module = boost::get<0>(st); - } - catch (...) { - exceptionControl("registerHeartbeat", API_FAILURE); - } - - // get current process Name - string processName; - myProcessStatus_t t; - try { - t = getMyProcessStatus(); - processName = boost::get<1>(t); - } - catch (...) { - exceptionControl("registerHeartbeat", API_FAILURE); - } - - ByteStream msg; - - // setup message - msg << (ByteStream::byte) HEARTBEAT_REGISTER; - msg << Module; - msg << processName; - msg << (ByteStream::byte) ID; - - try - { - //send the msg to Process Manager - MessageQueueClient procmgr("ProcHeartbeatControl"); - procmgr.write(msg); - procmgr.shutdown(); - } - catch(...) - { - exceptionControl("registerHeartbeat", API_FAILURE); - } - - } -*/ -/******************************************************************** - * - * De-Register for Proc Heartbeat - * - ********************************************************************/ -/* - void ProcHeartbeat::deregisterHeartbeat(int ID) - { - Oam oam; - // get current Module name - string Module; - oamModuleInfo_t st; - try { - st = getModuleInfo(); - Module = boost::get<0>(st); - } - catch (...) { - exceptionControl("deregisterHeartbeat", API_FAILURE); - } - - // get current process Name - string processName; - myProcessStatus_t t; - try { - t = getMyProcessStatus(); - processName = boost::get<1>(t); - } - catch (...) { - exceptionControl("deregisterHeartbeat", API_FAILURE); - } - - ByteStream msg; - - // setup message - msg << (ByteStream::byte) HEARTBEAT_DEREGISTER; - msg << Module; - msg << processName; - msg << (ByteStream::byte) ID; - - try - { - //send the msg to Process Manager - MessageQueueClient procmgr("ProcHeartbeatControl"); - procmgr.write(msg); - procmgr.shutdown(); - } - catch(...) - { - exceptionControl("deregisterHeartbeat", API_FAILURE); - } - - } -*/ -/******************************************************************** - * - * Send Proc Heartbeat - * - ********************************************************************/ -/* - void ProcHeartbeat::sendHeartbeat(int ID, oam::ACK_FLAG ackFlag) - { - Oam oam; - - // get process heartbeat period - int processHeartbeatPeriod = 60; //default - - try { - getSystemConfig("ProcessHeartbeatPeriod", processHeartbeatPeriod); - } - catch(...) - { - } - - //skip sending if Heartbeat is disable - if( processHeartbeatPeriod == -1 ) - exceptionControl("sendHeartbeat", API_DISABLED); - - // get current Module name - string Module; - oamModuleInfo_t st; - try { - st = getModuleInfo(); - Module = boost::get<0>(st); - } - catch (...) { - exceptionControl("sendHeartbeat", API_FAILURE); - } - - // get current process Name - string processName; - myProcessStatus_t t; - try { - t = getMyProcessStatus(); - processName = boost::get<1>(t); - } - catch (...) { - exceptionControl("sendHeartbeat", API_FAILURE); - } - - ByteStream msg; - - // setup message - msg << (ByteStream::byte) HEARTBEAT_SEND; - msg << Module; - msg << processName; - msg << getCurrentTime(); - msg << (ByteStream::byte) ID; - msg << (ByteStream::byte) ackFlag; - - try - { - //send the msg to Process Manager - MessageQueueClient procmgr("ProcHeartbeatControl"); - procmgr.write(msg); - - //check for ACK - if ( ackFlag == oam::ACK_YES ) { - ByteStream ackMsg; - ByteStream::byte type; - // wait for ACK from Process Manager - struct timespec ts = { processHeartbeatPeriod, 0 }; - - ackMsg = procmgr.read(&ts); - - if (ackMsg.length() > 0) - { - ackMsg >> type; - if ( type != HEARTBEAT_SEND ) { - //Ack not received - procmgr.shutdown(); - exceptionControl("sendHeartbeat", API_TIMEOUT); - } - } - else - { - procmgr.shutdown(); - exceptionControl("sendHeartbeat", API_TIMEOUT); - } - } - procmgr.shutdown(); - } - catch(...) - { - exceptionControl("sendHeartbeat", API_FAILURE); - } - } -*/ -} // end of namespace // vim:ts=4 sw=4: diff --git a/oam/oamcpp/liboamcpp.h b/oam/oamcpp/liboamcpp.h index 51317bb14..53b26ebbd 100644 --- a/oam/oamcpp/liboamcpp.h +++ b/oam/oamcpp/liboamcpp.h @@ -44,7 +44,6 @@ #include "bytestream.h" #include "configcpp.h" #include "boost/tuple/tuple.hpp" -#include "alarmmanager.h" #include "dbrm.h" #include "messagequeue.h" @@ -64,12 +63,12 @@ namespace oam /** @brief Maximum Number of Modules within the Calpont System */ -const int MAX_MODULE = 1024; +//const int MAX_MODULE = 1024; /** @brief Maximum Number of DBRoots within the Calpont System */ const int MAX_DBROOT = 10240; -const int MAX_DBROOT_AMAZON = 190; //DUE TO DEVICE NAME LIMIT +//const int MAX_DBROOT_AMAZON = 190; //DUE TO DEVICE NAME LIMIT /** @brief Maximum Number of Modules Types within the Calpont System */ @@ -77,24 +76,24 @@ const int MAX_MODULE_TYPE = 3; /** @brief Maximum Number of External Devices within the Calpont System */ -const int MAX_EXT_DEVICE = 20; +//const int MAX_EXT_DEVICE = 20; /** @brief Maximum Number of Arguments per process */ -const int MAX_ARGUMENTS = 15; +//const int MAX_ARGUMENTS = 15; /** @brief Maximum Number of Dependancy processes per process */ -const int MAX_DEPENDANCY = 6; +//const int MAX_DEPENDANCY = 6; /** @brief Maximum Number of processes within the Calpont System */ -const int MAX_PROCESS_PER_MODULE = 15; -const int MAX_PROCESS = MAX_MODULE * MAX_PROCESS_PER_MODULE; +//const int MAX_PROCESS_PER_MODULE = 15; +//const int MAX_PROCESS = MAX_MODULE * MAX_PROCESS_PER_MODULE; /** @brief Maximum Number of Parameters per process */ -const int MAX_PARAMS = 13; +//const int MAX_PARAMS = 13; /** @brief Maximum Module Type Size */ @@ -129,57 +128,7 @@ const std::string configSections[] = { "SystemConfig", "" }; -/** @brief gluster control commands - */ -enum GLUSTER_COMMANDS -{ - GLUSTER_STATUS, - GLUSTER_SETDDC, - GLUSTER_ASSIGN, - GLUSTER_WHOHAS, - GLUSTER_UNASSIGN, - GLUSTER_ADD, - GLUSTER_DELETE, - GLUSTER_PEERPROBE -}; - - -/** @brief mysql-Calpont Action - */ -enum MYSQLCALPONT_ACTION -{ - MYSQL_START, - MYSQL_STOP, - MYSQL_RESTART, - MYSQL_RELOAD, - MYSQL_FORCE_RELOAD, - MYSQL_STATUS -}; - -/** @brief Device Notification Type - */ -enum NOTIFICATION_TYPE -{ - NOTIFICATION_TYPE_RESERVED, // 0 = not used - START_PM_MASTER_DOWN, - START_PM_STANDBY_DOWN, - START_PM_COLD_DOWN, - START_UM_DOWN, - MODULE_DOWN, - START_STANDBY_TO_MASTER, - PM_MASTER_ACTIVE, - PM_STANDBY_ACTIVE, - PM_COLD_ACTIVE, - UM_ACTIVE, - PM_MASTER_FAILED_DISABLED, - DBROOT_DOWN, - DBROOT_UP, - DB_HEALTH_CHECK_FAILED, - DBROOT_MOUNT_FAILURE, - MODULE_UP -}; - -const uint32_t NOTIFICATIONKEY = 0x49444231; +//const uint32_t NOTIFICATIONKEY = 0x49444231; /** @brief Server Type Installs */ @@ -193,21 +142,6 @@ enum INSTALLTYPE INSTALL_COMBINE_PM_UM // 4 = pm/um on a same server }; -/** @brief Server Monitor Message Request options - */ - -enum SERVERMONITOR_TYPE_REQUEST -{ - GET_PROC_CPU_USAGE, - GET_MODULE_CPU_USAGE, - GET_PROC_MEMORY_USAGE, - GET_MODULE_MEMORY_USAGE, - GET_MODULE_DISK_USAGE, - GET_ACTIVE_SQL_QUERY, - RUN_DBHEALTH_CHECK -}; - - /** @brief OAM API Return values */ @@ -233,476 +167,6 @@ enum API_STATUS API_MAX }; -/** @brief OAM Parent Module Indicator - */ - -enum OAM_MASTER_MODULE -{ - PARENT_NO, - PARENT_YES -}; - -/** @brief Realtime Linux OS Module Indicator - */ - -/* enum RT_LINUX_Module - { - RT_LINUX_NO, - RT_LINUX_YES - }; -*/ -/** @brief Process and Hardware States - */ - -enum STATE -{ - MAN_OFFLINE, // 0 = Manual disable mode - AUTO_OFFLINE, // 1 = Auto disable, due to a fault - MAN_INIT, // 2 = Manual initialization mode - AUTO_INIT, // 3 = Auto initialization mode - ACTIVE, // 4 = Active mode - LEAVE_BLANK, // when this was standby, 'PuTTY' would show up in the console - STANDBY, // 6 = Hot Standby mode - FAILED, // 7 = Failed restoral mode - UP, // 8 = Up mode, for hardware devices - DOWN, // 9 = Down mode, for hardware devices - COLD_STANDBY, // 10 = Cold Standby mode - UNEQUIP, // 11 = Unequipped mode - EQUIP, // 12 = Equipped mode - DEGRADED, // 13 = Degraded mode - MAN_DISABLED, // 14 = Manual Disabled mode - AUTO_DISABLED, // 15 = Auto Disabled mode - ENABLED, // 16 = Enabled mode - INITIAL, // 17 = Initial mode - STANDBY_INIT, // 18 = Standby init - BUSY_INIT, // 19 = Busy init - ROLLBACK_INIT, // 20 = Rollback during DML init - PID_UPDATE, // 21 = Assigning the pid - STATE_MAX // 22 = Max value -}; - -/** @brief String State - */ -const std::string oamState[] = -{ - "MAN_OFFLINE", - "AUTO_OFFLINE", - "MAN_INIT", - "AUTO_INIT", - "ACTIVE", - "LEAVE_BLANK", - "HOT_STANDBY", - "FAILED", - "UP", - "DOWN", - "COLD_STANDBY", - "UNEQUIP", - "EQUIP", - "DEGRADED", - "MAN_DISABLED", - "AUTO_DISABLED", - "ENABLED", - "INITIAL", - "STANDBY_INIT", - "BUSY_INIT", - "ROLLBACK_INIT", - "PID_UPDATE" - "" -}; -/** @brief Process and Hardware String States - */ - -const std::string MANOFFLINE = "MAN_OFFLINE"; -const std::string AUTOOFFLINE = "AUTO_OFFLINE"; -const std::string MANINIT = "MAN_INIT"; -const std::string AUTOINIT = "AUTO_INIT"; -const std::string ACTIVESTATE = "ACTIVE"; -const std::string STANDBYSTATE = "HOT_STANDBY"; -const std::string FAILEDSTATE = "FAILED"; -const std::string UPSTATE = "UP"; -const std::string DOWNSTATE = "DOWN"; -const std::string COLDSTANDBYSTATE = "COLD_STANDBY"; -const std::string INITIALSTATE = "INITIAL"; -const std::string DEGRADEDSTATE = "DEGRADED"; -const std::string ENABLEDSTATE = "ENABLED"; -const std::string MANDISABLEDSTATE = "MAN_DISABLED"; -const std::string AUTODISABLEDSTATE = "AUTO_DISABLED"; -const std::string STANDBYINIT = "STANDBY_INIT"; -const std::string BUSYINIT = "BUSY_INIT"; - -/** @brief Module/Process Run Types - */ - -const std::string ACTIVE_STANDBY = "ACTIVE_STANDBY"; -const std::string LOADSHARE = "LOADSHARE"; -const std::string BROADCAST = "BROADCAST"; -const std::string SIMPLEX = "SIMPLEX"; - -/** @brief Module Equippage states - */ - -const std::string EQUIP_YES = "EQ"; -const std::string EQUIP_NO = "NE"; - - -/** @brief Update Logging Levels - */ -const std::string LogLevel[] = -{ - "critical", - "error", - "warning", - "info", - "debug", - "all", - "" -}; - -/** @brief Logging Level file name - * - * NOTE: make sure this list is insync with above LogLevel list - */ -const std::string LogFile[] = -{ - "local1.=crit -/var/log/mariadb/columnstore/crit.log", - "local1.=err -/var/log/mariadb/columnstore/err.log", - "local1.=warning -/var/log/mariadb/columnstore/warning.log", - "local1.=info -/var/log/mariadb/columnstore/info.log", - "local1.=debug -/var/log/mariadb/columnstore/debug.log", - "" -}; - -const std::string LogFile7[] = -{ - "local1.crit -/var/log/mariadb/columnstore/crit.log", - "local1.err -/var/log/mariadb/columnstore/err.log", - "local1.warning -/var/log/mariadb/columnstore/warning.log", - "local1.info -/var/log/mariadb/columnstore/info.log", - "local1.debug -/var/log/mariadb/columnstore/debug.log", - "" -}; - -/** @brief Log Config Data map - */ -typedef struct LogConfigData_struct -{ - std::string moduleName; - int configData; -} -LogConfigData; - -typedef std::vector SystemLogConfigData; - -/** @brief LogConfigData level bitmap - */ -enum LEVEL_FLAGS -{ - LEVEL_CRITICAL = 0x1, - LEVEL_ERROR = 0x2, - LEVEL_WARNING = 0x4, - LEVEL_INFO = 0x8, - LEVEL_DEBUG = 0x10, -}; - -/** @brief Alarm IDs - */ - -enum ALARMS -{ - ALARM_NONE, // 0 = NO ALARM - CPU_USAGE_HIGH, // 1 = CPU Usage High threahold crossed - CPU_USAGE_MED, // 2 = CPU Usage Medium threshold crossed - CPU_USAGE_LOW, // 3 = CPU Usage Low threashold crossed - DISK_USAGE_HIGH, // 4 = DISK Usage High threahold crossed - DISK_USAGE_MED, // 5 = DISK Usage Medium threshold crossed - DISK_USAGE_LOW, // 6 = DISK Usage Low threashold crossed - MEMORY_USAGE_HIGH, // 7 = MEMORY Usage High threahold crossed - MEMORY_USAGE_MED, // 8 = MEMORY Usage Medium threshold crossed - MEMORY_USAGE_LOW, // 9 = MEMORY Usage Low threashold crossed - SWAP_USAGE_HIGH, // 10 = SWAP Usage High threahold crossed - SWAP_USAGE_MED, // 11 = SWAP Usage Medium threshold crossed - SWAP_USAGE_LOW, // 12 = SWAP Usage Low threashold crossed - PROCESS_DOWN_AUTO, // 13 = Process is down due to fault - MODULE_DOWN_AUTO, // 14 = Module is down due to fault - SYSTEM_DOWN_AUTO, // 15 = System is down due to fault - POWERON_TEST_SEVERE, // 16 = Power-On test Module Warning error - POWERON_TEST_WARNING, // 17 = Power-On test Warning error - HARDWARE_HIGH, // 18 = Hardware Critical alarm - HARDWARE_MED, // 19 = Hardware Major alarm - HARDWARE_LOW, // 20 = Hardware Minor alarm - PROCESS_DOWN_MANUAL, // 21 = Process is down due to operator request - MODULE_DOWN_MANUAL, // 22 = Module is down due to operator request - SYSTEM_DOWN_MANUAL, // 23 = System is down due to operator request - EXT_DEVICE_DOWN_AUTO, // 24 = External Device is down due to fault - PROCESS_INIT_FAILURE, // 25 = Process Initization Failure - NIC_DOWN_AUTO, // 26 = NIC is down due to fault - DBRM_LOAD_DATA_ERROR, // 27 = DBRM Load Data error - INVALID_SW_VERSION, // 28 = Invalid Software Version - STARTUP_DIAGNOTICS_FAILURE, // 29 = Module Startup Dianostics Failure - CONN_FAILURE, // 30 = Connect Failure - DBRM_READ_ONLY, // 31 = The DBRM is read-only - EE_LICENSE_EXPIRED, // 32 = Enterprise License has expired - MODULE_SWITCH_ACTIVE, // 33 = PM Failover / Switchover - ROLLBACK_FAILURE, // 34 = DB Rollback Failure - GLUSTER_DISK_FAILURE, // 35 = Gluster Disk Copy Failure - INVALID_LOCALE, // 36 = Locale invalid - MAX_ALARM_ID -}; - -/** @brief Alarm Severity - */ - -enum ALARM_SEVERITY -{ - NO_SEVERITY, // 0 = N/A - CRITICAL, // 1 = CRITICAL - MAJOR, // 2 = MAJOR - MINOR, // 3 = MINOR - WARNING, // 4 = WARNING - INFORMATIONAL // 5 = INFORMATIONAL -}; - -/** @brief OAM Hardware Management User Authorization level - */ - -enum AUTH_LEVEL -{ - ADMINISTRATION, // 0 = Admin Level - MAINTENANCE // 1 = Maintenance Level -}; - -/** @brief Boot Launch flag - */ - -enum LAUNCH_FLAG -{ - INIT_LAUNCH, // 0 = Process launched by OS Init - BOOT_LAUNCH, // 1 = Process launched by ProcMon at boot time - MGR_LAUNCH // 2 = Process lanuched by ProcMgr after System reboot -}; - -/** @brief Process Management API request options - * - * Message from a UI to Process Manager - */ - -enum PROC_MGT_MSG_REQUEST -{ - STOPMODULE, - STARTMODULE, - RESTARTMODULE, - ENABLEMODULE, - DISABLEMODULE, - STARTSYSTEM, - STOPSYSTEM, - RESTARTSYSTEM, - SHUTDOWNMODULE, - SHUTDOWNSYSTEM, - STOPPROCESS, - STARTPROCESS, - RESTARTPROCESS, - UPDATELOG, - GETCONFIGLOG, - REINITPROCESS, - UPDATECONFIG, - BUILDSYSTEMTABLES, - ADDMODULE, - REMOVEMODULE, - RECONFIGUREMODULE, - STOPPROCESSTYPE, - STARTPROCESSTYPE, - RESTARTPROCESSTYPE, - REINITPROCESSTYPE, - DISTRIBUTECONFIG, - SWITCHOAMPARENT, - UNMOUNT, - MOUNT, - SUSPENDWRITES, - FSTABUPDATE, - ENABLEMYSQLREP, - DISABLEMYSQLREP, - GLUSTERASSIGN, - GLUSTERUNASSIGN -}; - -/** @brief Process Management - Mgr to Mon request options - * - * Message from a Process Manager to Process Monitor - */ - -enum PROC_MGR_MSG_REQUEST -{ - STOPALL, - STOP, - START, - RESTART, - STARTALL, - PROCREINITPROCESS -}; - -/** @brief Process Management API type options - * - * Message from Process Manager to Process Monitor - * Process Monitor to Manager to UI - */ - -enum PROC_MGT_TYPE_REQUEST -{ - REQUEST, - ACK, - REPORT_STATUS, - PROCUPDATELOG, - PROCGETCONFIGLOG, - CHECKPOWERON, - PROCUPDATECONFIG, - HEARTBEAT_REGISTER, - HEARTBEAT_DEREGISTER, - HEARTBEAT_SEND, - PROCBUILDSYSTEMTABLES, - LOCALHEARTBEAT, - RECONFIGURE, - PROCESSRESTART, - GETSOFTWAREINFO, - UPDATEEXPORTS, - UPDATEPARENTNFS, - OAMPARENTACTIVE, - UPDATECONFIGFILE, - GETDBRMDATA, - GETPARENTOAMMODULE, - OAMPARENTCOLD, - GETALARMDATA, - GETACTIVEALARMDATA, - PROCUNMOUNT, - PROCMOUNT, - PROCFSTABUPDATE, - MASTERREP, - SLAVEREP, - MASTERDIST, - DISABLEREP, - PROCGLUSTERASSIGN, - PROCGLUSTERUNASSIGN, - CONFIGURE, - SYNCFSALL -}; - - -/** @brief Hardware and process shutdown flag - */ - -enum GRACEFUL_FLAG -{ - GRACEFUL, - FORCEFUL, - INSTALL, - REMOVE, - GRACEFUL_STANDBY, - STATUS_UPDATE, - GRACEFUL_WAIT // Wait for all table locks and transactions to finish. -}; - -/** @brief Acknowledgment indication flag - */ - -enum ACK_FLAG -{ - ACK_NO, - ACK_YES -}; - -/** @brief Responses to cancel/wait/rollback/force question - * - * When a suspend, stop, restart or shutdown of system is - * requested, the user is asked this question. - */ -enum CC_SUSPEND_ANSWER -{ - CANCEL, - WAIT, - ROLLBACK, - FORCE -}; - -/** @brief Process Management Status Request types - */ - -enum STATUS_TYPE_REQUEST -{ - GET_PROC_STATUS, - SET_PROC_STATUS, - GET_ALL_PROC_STATUS, - GET_PROC_STATUS_BY_PID, - GET_SYSTEM_STATUS, - SET_SYSTEM_STATUS, - SET_MODULE_STATUS, - SET_EXT_DEVICE_STATUS, - ADD_MODULE, - REMOVE_MODULE, - RECONFIGURE_MODULE, - SET_NIC_STATUS, - SET_PM_IPS, - ADD_EXT_DEVICE, - REMOVE_EXT_DEVICE, - GET_SHARED_MEM, - SET_DBROOT_STATUS, - ADD_DBROOT, - REMOVE_DBROOT -}; - -/** @brief System Software Package Structure - * - * Structure that is returned by the getSystemSoftware API - */ - -struct SystemSoftware_s -{ - std::string Version; //!< System Software Version - std::string Release; //!< System Software Release -}; -typedef struct SystemSoftware_s SystemSoftware; - -/** @brief System Software Package parse data - */ -const std::string SoftwareData[] = -{ - "version=", - "release=", - "" -}; - -/** @brief System Configuration Structure - * - * Structure that is returned by the getSystemConfigFile API for the - * System Configuration data stored in the System Configuration file - */ - -struct SystemConfig_s -{ - std::string SystemName; //!< System Name - int32_t ModuleHeartbeatPeriod; //!< Module Heartbeat period in minutes - uint32_t ModuleHeartbeatCount; //!< Module Heartbeat failure count -// int32_t ProcessHeartbeatPeriod; //!< Process Heartbeat period in minutes - std::string NMSIPAddr; //!< NMS system IP address - std::string DNSIPAddr; //!< DNS IP address - std::string LDAPIPAddr; //!< LDAP IP address - std::string NTPIPAddr; //!< NTP IP address - uint32_t DBRootCount; //!< Database Root directory Count - std::vector DBRoot; //!< Database Root directories - std::string DBRMRoot; //!< DBRM Root directory - uint32_t ExternalCriticalThreshold; //!< External Disk Critical Threahold % - uint32_t ExternalMajorThreshold; //!< External Disk Major Threahold % - uint32_t ExternalMinorThreshold; //!< External Disk Minor Threahold % - uint32_t MaxConcurrentTransactions; //!< Session Mgr Max Current Trans - std::string SharedMemoryTmpFile; //!< Session Mgr Shared Mem Temp file - uint32_t NumVersionBufferFiles; //!< Version Buffer number of files - uint32_t VersionBufferFileSize; //!< Version Buffer file size - std::string OIDBitmapFile; //!< OID Mgr Bitmap File name - uint32_t FirstOID; //!< OID Mgr First O - std::string ParentOAMModule; //!< Parent OAM Module Name - std::string StandbyOAMModule; //!< Standby Parent OAM Module Name - uint32_t TransactionArchivePeriod; //!< Tranaction Archive Period in minutes -}; -typedef struct SystemConfig_s SystemConfig; - /** @brief Host/IP Address Config Structure * */ @@ -771,18 +235,6 @@ typedef struct DeviceDBRootConfig_s DeviceDBRootConfig; typedef std::vector DeviceDBRootList; -/** @brief Module Type Configuration Structure - * - * Structure that is returned by the getSystemConfigFile API for the - * Module Type Configuration data stored in the System Configuration file - */ - -struct PmDBRootCount_s -{ - uint16_t pmID; //!< PM ID - uint16_t count; //!< DBRoot Count -}; - struct ModuleTypeConfig_s { std::string ModuleType; //!< Module Type @@ -838,237 +290,6 @@ struct ModuleConfig_s }; typedef struct ModuleConfig_s ModuleConfig; - -/** @brief External Device Name Configuration Structure - * - * Structure that is returned by the getSystemConfigFile API for the - * External Device Name Configuration data stored in the System Configuration file - */ - -struct ExtDeviceConfig_s -{ - std::string Name; //!< Name - std::string IPAddr; //!< IP address - std::string DisableState; //!< Disabled State -}; -typedef struct ExtDeviceConfig_s ExtDeviceConfig; - -/** @brief System External Device Configuration Structure - * - * Structure that is returned by the getSystemConfigFile API for the - * External Device Type Configuration data stored in the System Configuration file - */ - -struct SystemExtDeviceConfig_s -{ - uint16_t Count; //!< External Device Equipage Count - std::vector extdeviceconfig; //!< External Device IP Address and name List -}; -typedef struct SystemExtDeviceConfig_s SystemExtDeviceConfig; - -/** @brief Module Status Structure - * - * Structure that is returned by the getSystemStatus API for the - * System Status data stored in the System Status file - */ - -struct ModuleStatus_s -{ - std::string Module; //!< Module Name - uint16_t ModuleOpState; //!< Operational State - std::string StateChangeDate; //!< Last time/date state change -}; -typedef struct ModuleStatus_s ModuleStatus; - -/** @brief System Module Status Structure - * - * Structure that is returned by the getSystemStatus API for the - * System Module Status data stored in the System Status file - */ - -struct SystemModuleStatus_s -{ - std::vector modulestatus; //!< Module Status Structure -}; -typedef struct SystemModuleStatus_s SystemModuleStatus; - - -/** @brief Ext Device Status Structure - * - * Structure that is returned by the getSystemStatus API for the - * System Status data stored in the System Status file - */ - -struct ExtDeviceStatus_s -{ - std::string Name; //!< External Device Name - uint16_t OpState; //!< Operational State - std::string StateChangeDate; //!< Last time/date state change -}; -typedef struct ExtDeviceStatus_s ExtDeviceStatus; - -/** @brief System Ext Device Status Structure - * - * Structure that is returned by the getSystemStatus API for the - * System System Ext Status data stored in the System Status file - */ - -struct SystemExtDeviceStatus_s -{ - std::vector extdevicestatus; //!< External Device Status Structure -}; -typedef struct SystemExtDeviceStatus_s SystemExtDeviceStatus; - - -/** @brief DBRoot Status Structure - * - * Structure that is returned by the getSystemStatus API for the - * System Status data stored in the System Status file - */ - -struct DbrootStatus_s -{ - std::string Name; //!< Dbroot Name - uint16_t OpState; //!< Operational State - std::string StateChangeDate; //!< Last time/date state change -}; -typedef struct DbrootStatus_s DbrootStatus; - -/** @brief Dbroot Status Structure - * - * Structure that is returned by the getSystemStatus API for the - * System System Ext Status data stored in the System Status file - */ - -struct SystemDbrootStatus_s -{ - std::vector dbrootstatus; //!< Dbroot Status Structure -}; -typedef struct SystemDbrootStatus_s SystemDbrootStatus; - -/** @brief NIC Status Structure - * - * Structure that is returned by the getSystemStatus API for the - * System Status data stored in the System Status file - */ - -struct NICStatus_s -{ - std::string HostName; //!< NIC Name - uint16_t NICOpState; //!< Operational State - std::string StateChangeDate; //!< Last time/date state change -}; -typedef struct NICStatus_s NICStatus; - -/** @brief System NIC Status Structure - * - * Structure that is returned by the getSystemStatus API for the - * System NIC Status data stored in the System Status file - */ - -struct SystemNICStatus_s -{ - std::vector nicstatus; //!< NIC Status Structure -}; -typedef struct SystemNICStatus_s SystemNICStatus; - -/** @brief System Status Structure - * - * Structure that is returned by the getSystemStatus API for the - * System Status data stored in the System Status file - */ - -struct SystemStatus_s -{ - uint16_t SystemOpState; //!< System Operational State - std::string StateChangeDate; //!< Last time/date state change - SystemModuleStatus systemmodulestatus; //!< System Module status - SystemExtDeviceStatus systemextdevicestatus; //!< System Ext Device status - SystemNICStatus systemnicstatus; //!< System NIC status - SystemDbrootStatus systemdbrootstatus; //!< System DBroot status -}; -typedef struct SystemStatus_s SystemStatus; - -/** @brief Process Configuration Structure - * - * Structure that is returned by the getSystemProcessConfig API for the - * Process Configuration data stored in the Process Configuration file - */ - -struct ProcessConfig_s -{ - std::string ProcessName; //!< Process Name - std::string ModuleType; //!< Module Type that process is running on - std::string ProcessLocation; //!< Process launch location - std::string ProcessArgs[MAX_ARGUMENTS]; //!< Process Arguments - uint16_t BootLaunch; //!< Boot Launch flag, 0 = init, 1 = boot, 2 = Mgr - uint16_t LaunchID; //!< Launch ID number - std::string DepProcessName[MAX_DEPENDANCY]; //!< Dependent Processes - std::string DepModuleName[MAX_DEPENDANCY];//!< Dependent Process Module Name - std::string RunType; //!< Process Run Type - std::string LogFile; //!< Process Log File Indicator -}; -typedef struct ProcessConfig_s ProcessConfig; - -/** @brief System Process Configuration Structure - * - * Structure that is returned by the getSystemProcessConfig API for the - * System Process Configuration data stored in the Process Configuration file - */ - -struct SystemProcessConfig_s -{ - std::vector processconfig; //!< Process Configuration Structure -}; -typedef struct SystemProcessConfig_s SystemProcessConfig; - -/** @brief Process Status Structure - * - * Structure that is returned by the getProcessStatus API for the - * Process Status data stored in the Process Status file - */ - -struct ProcessStatus_s -{ - std::string ProcessName; //!< Process Name - std::string Module; //!< Module Name that process is running on - pid_t ProcessID; //!< Process ID number - std::string StateChangeDate; //!< Last time/date state change - uint16_t ProcessOpState; //!< Process Operational State -}; -typedef struct ProcessStatus_s ProcessStatus; - - -/** @brief System Process Status Structure - * - * Structure that is returned by the getProcessStatus API for the - * System Process Status data stored in the Process Status file - */ - -struct SystemProcessStatus_s -{ - std::vector processstatus; //!< Process Status Structure -}; -typedef struct SystemProcessStatus_s SystemProcessStatus; - -/** @brief Alarm Configuration Structure - * - * Structure that is returned by the getAlarmConfig API for the - * Alarm Configuration data stored in the Alarm Configuration file - */ - -struct AlarmConfig_s -{ - uint16_t AlarmID; //!< Alarm ID - std::string BriefDesc; //!< Brief Description - std::string DetailedDesc; //!< Detailed Description - uint16_t Severity; //!< Severity - 1=Critical, 2=Major, 3=Minor, 4=Warning, 5=Informational - uint16_t Threshold; //!< Stop reporting threshold - uint16_t Occurrences; //!< Alarm Occurrences within 30 min window - uint32_t LastIssueTime; //!< last time alarms was issued -}; -typedef struct AlarmConfig_s AlarmConfig; - /** @brief Local Module OAM Configuration StructureLOG_ * * Structure that is returned by the getModuleInfo API for the @@ -1082,244 +303,6 @@ typedef struct AlarmConfig_s AlarmConfig; typedef boost::tuple oamModuleInfo_t; -/** @brief My Process OAM Status Structure - * - * Structure that is returned by the getMyProcessStatus API for the - * Local Process OAM Status data stored in the Process Status file - * Returns: Process ID, Process Name, and Process State - */ - -typedef boost::tuple myProcessStatus_t; - -/** @brief Store Device ID Structure - * - * Structure that is returned by the getMyProcessStatus API for the - * Local Process OAM Status data stored in the Process Status file - * Returns: Process ID, Process Name, and Process State - */ - -typedef boost::tuple storageID_t; - -/** @brief User Configuration Structure - * - * Structure that is returned by the getHardwareUserConfig API for the - * User Configuration data - */ - -struct UserConfig_s -{ - std::string UserName; //!< User Name - AUTH_LEVEL UserAuthLevel; //!< User Authorization level - bool UserActiveFlag; //!< User Actively logged in -}; -typedef struct UserConfig_s UserConfig; - -/** @brief System User Configuration Structure - * - * Structure that is returned by the getHardwareUserConfig API for the - * System User Configuration data - */ - -struct SystemUserConfig_s -{ - std::vector userconfig; //!< User Configuration Structure -}; -typedef struct SystemUserConfig_s SystemUserConfig; - - - -/** @brief Process Cpu User Structure - * - */ - -struct ProcessCpuUser_s -{ - std::string ProcessName; //!< Process Name - uint16_t CpuUsage; //!< Process Cpu Usage % -}; -typedef struct ProcessCpuUser_s ProcessCpuUser; - -/** @brief TOP Process Cpu User Structure - * - * Structure that is returned by the getTopProcessCpuUsers API - */ - -struct TopProcessCpuUsers_s -{ - std::string ModuleName; //!< Module Name - uint16_t numberTopUsers; //!< Number of TOP Users - std::vector processcpuuser; //!< TOP Users -}; -typedef struct TopProcessCpuUsers_s TopProcessCpuUsers; - -/** @brief System TOP Process Cpu User Structure - * - * Structure that is returned by the getTopProcessCpuUsers API - */ - -struct SystemTopProcessCpuUsers_s -{ - std::vector topprocesscpuusers; //!< TOP Process Cpu User Structure -}; -typedef struct SystemTopProcessCpuUsers_s SystemTopProcessCpuUsers; - -/** @brief Module Cpu Structure - * - */ - -struct ModuleCpu_s -{ - std::string ModuleName; //!< Module Name - uint16_t CpuUsage; //!< Module Cpu Usage % -}; -typedef struct ModuleCpu_s ModuleCpu; - -/** @brief System Module Cpu Structure - * - * Structure that is returned by the getTopProcessCpuUsers API - */ - -struct SystemCpu_s -{ - std::vector modulecpu; //!< Module Cpu -}; -typedef struct SystemCpu_s SystemCpu; - - -/** @brief Process Memory User Structure - * - */ - -struct ProcessMemoryUser_s -{ - std::string ProcessName; //!< Process Name - uint32_t MemoryUsed; //!< Process Memory Used - uint16_t MemoryUsage; //!< Process Memory Usage % -}; -typedef struct ProcessMemoryUser_s ProcessMemoryUser; - -/** @brief TOP Process Memory User Structure - * - * Structure that is returned by the getTopProcessMemoryUsers API - */ - -struct TopProcessMemoryUsers_s -{ - std::string ModuleName; //!< Module Name - uint16_t numberTopUsers; //!< Number of TOP Users - std::vector processmemoryuser; //!< TOP Users -}; -typedef struct TopProcessMemoryUsers_s TopProcessMemoryUsers; - -/** @brief System TOP Process Memory User Structure - * - * Structure that is returned by the getTopProcessMemoryUsers API - */ - -struct SystemTopProcessMemoryUsers_s -{ - std::vector topprocessmemoryusers; //!< TOP Process Memory User Structure -}; -typedef struct SystemTopProcessMemoryUsers_s SystemTopProcessMemoryUsers; - -/** @brief Module Memory Structure - * - */ - -struct ModuleMemory_s -{ - std::string ModuleName; //!< Module Name - uint32_t MemoryTotal; //!< Memory Total - uint32_t MemoryUsed; //!< Memory Used - uint32_t cache; //!< Cache Used - uint16_t MemoryUsage; //!< Memory Usage Percent - uint32_t SwapTotal; //!< Swap Total - uint32_t SwapUsed; //!< Swap Used - uint16_t SwapUsage; //!< Swap Usage Percent -}; -typedef struct ModuleMemory_s ModuleMemory; - -/** @brief System Module Cpu Structure - * - * Structure that is returned by the getTopProcessCpuUsers API - */ - -struct SystemMemory_s -{ - std::vector modulememory; //!< Module Memory -}; -typedef struct SystemMemory_s SystemMemory; - -/** @brief Disk Usage Structure - * - */ - -struct DiskUsage_s -{ - std::string DeviceName; //!< Device Name - uint64_t TotalBlocks; //!< Total Blocks - uint64_t UsedBlocks; //!< Used Blocks - uint16_t DiskUsage; //!< Disk Usage % -}; -typedef struct DiskUsage_s DiskUsage; - -/** @brief Module Disk Usage Structure - * - * Structure that is returned by the getTopProcessMemoryUsers API - */ - -struct ModuleDisk_s -{ - std::string ModuleName; //!< Module Name - std::vector diskusage; //!< Disk Usage -}; -typedef struct ModuleDisk_s ModuleDisk; - -/** @brief System Disk Usage Structure - * - * Structure that is returned by the getTopProcessMemoryUsers API - */ - -struct SystemDisk_s -{ - std::vector moduledisk; //!< Module Disk Usage -}; -typedef struct SystemDisk_s SystemDisk; - -/** @brief Active Sql Statement Structure - * - */ - -struct ActiveSqlStatement -{ - std::string sqlstatement; - unsigned starttime; - uint64_t sessionid; -}; -typedef std::vector ActiveSqlStatements; - -struct DataRedundancyStorageSetup -{ - int brickID; - std::string storageLocation; - std::string storageFilesytemType; -}; -typedef std::vector DataRedundancyStorage; - -struct DataRedundancySetup_s -{ - int pmID; - std::string pmHostname; - std::string pmIpAddr; - std::vector dbrootCopies; - DataRedundancyStorage storageLocations; -}; -typedef struct DataRedundancySetup_s DataRedundancySetup; - -// username / password for smbclient use -const std::string USERNAME = "oamuser"; -const std::string PASSWORD = "Calpont1"; - /** @brief System Storage Configuration Structure * * Structure that is returned by the getStorageConfig API @@ -1328,8 +311,6 @@ const std::string PASSWORD = "Calpont1"; typedef boost::tuple systemStorageInfo_t; -typedef std::vector dbrootList; - /** @brief OAM API I/F class * * Operations, Administration, and Maintenance C++ APIs. These APIS are utilized @@ -1350,20 +331,6 @@ public: */ EXPORT virtual ~Oam(); - /** @brief get System Software information - * - * get System Software information from the System software RPM. - * @param systemconfig Returned System Software Structure - */ - EXPORT void getSystemSoftware(SystemSoftware& systemsoftware); - - /** @brief get System Configuration information - * - * get System Configuration information from the system config file. - * @param systemconfig Returned System Configuration Structure - */ - EXPORT void getSystemConfig(SystemConfig& systemconfig); - /** @brief get System Module Configuration information * * get System Module Configuration information value from the system config file. @@ -1387,43 +354,6 @@ public: */ EXPORT void getSystemConfig(const std::string& moduleName, ModuleConfig& moduleconfig); - /** @brief get System Module Configuration information for local Module - * - * get System Module Name Configuration information for local Module from the system config file. - * @param Moduleconfig Returned System Configuration Structure - */ - EXPORT void getSystemConfig(ModuleConfig& moduleconfig); - - /** @brief get System Module Type Configuration information for local Module Type - * - * get System Module Name Configuration information for local Module from the system config file. - * @param moduletypeconfig Returned System Configuration Structure - */ - EXPORT void getSystemConfig(ModuleTypeConfig& moduletypeconfig); - - /** @brief get System Ext Device Name Configuration information - * - * get System Ext Device Name Configuration information for a System Ext from the system config file. - * @param name the Ext Device Name to get information - * @param extdeviceConfig Returned System Ext Device Configuration Structure - */ - EXPORT void getSystemConfig(const std::string& name, ExtDeviceConfig& extdeviceConfig); - - /** @brief get System Ext Device Configuration information - * - * get System Ext Device Name Configuration information for local System Ext from the system config file. - * @param extdeviceConfig Returned System Configuration Structure - */ - EXPORT void getSystemConfig(SystemExtDeviceConfig& systemextdeviceConfig); - - /** @brief set Ext Device Configuration information - * - * Set Ext Device Configuration information - * @param deviceName the Device Name to get information - * @param extdeviceConfig Ext Device Configuration Structure - */ - EXPORT void setSystemConfig(const std::string deviceName, ExtDeviceConfig extdeviceConfig); - /** @brief get System Configuration String Parameter * * get System Configuration String Parameter from the system config file. @@ -1440,258 +370,6 @@ public: */ EXPORT void getSystemConfig(const std::string& name, int& value); - /** @brief get Module Name for IP Address - * - * get Module Name for given IP address from the system config file. - * @param IpAddress the Patamater IP Address - * @param moduleName Returned Parameter Value - */ - EXPORT void getModuleNameByIPAddr(const std::string IpAddress, std::string& moduleName); - - /** @brief set System Configuration String Parameter - * - * set System Configuration String Parameter from the system config file. - * @param name the Parameter Name to set value - * @param value the Parameter Value to set - */ - EXPORT void setSystemConfig(const std::string name, const std::string value); - - /** @brief set System Configuration Integer Parameter - * - * set System Configuration Integer Parameter from the system config file. - * @param name the Parameter Name to set value - * @param value the Parameter Value to set - */ - EXPORT void setSystemConfig(const std::string name, const int value); - - /** @brief set System Module Type Configuration information for a Module - * - * set System Module Type Configuration information for a Module from the system config file. - * @param moduletype the Module Type to get information - * @param moduletypeconfig System Module Configuration Structure - */ - EXPORT void setSystemConfig(const std::string moduletype, ModuleTypeConfig moduletypeconfig); - - - /** @brief set System Module Name Configuration information for a Module - * - * Set System Module Name Configuration information for a Module from the system config file. - * @param moduleName the Module Name to get information - * @param moduleconfig System Module Configuration Structure - */ - EXPORT void setSystemConfig(const std::string module, ModuleConfig moduleconfig); - - - /** @brief add Module - * - * Add module to the system config file. - * @param DeviceNetworkConfig the Modules added - * @param password Host Root Password - */ - EXPORT void addModule(DeviceNetworkList devicenetworklist, const std::string password, const std::string mysqlpw, - bool storeHostnames); - - /** @brief remove Module - * - * Remove module from the system config file. - * @param DeviceNetworkConfig the Modules to be removed - */ - EXPORT void removeModule(DeviceNetworkList devicenetworklist); - - /** @brief reconfigure Module - * - * Add module to the system config file. - * @param DeviceNetworkConfig the Module Name to be reconfigured - */ - EXPORT void reconfigureModule(DeviceNetworkList devicenetworklist); - - /** @brief get System Status information - * - * get System Status information from the system status file. - * @param systemstatus Returned System Status Structure - */ - EXPORT void getSystemStatus(SystemStatus& systemstatus, bool systemStatusOnly = true); - - /** @brief set System Status information - * - * set System Status information in the system status file. - * @param state System Operational State - */ - EXPORT void setSystemStatus(const int state); - - /** @brief get Module Status information - * - * get Module Status information from the system status file. - * @param name Module Name - * @param state Returned Operational State - */ - EXPORT void getModuleStatus(const std::string name, int& state, bool& degraded); - - /** @brief set Module Status information - * - * set Module Status information in the system status file. - * @param name Module Name - * @param state Module Operational State - */ - EXPORT void setModuleStatus(const std::string name, const int state); - - /** @brief get Ext Device Status information - * - * get Ext Device Status information from the system status file. - * @param name Ext Device Name - * @param state Returned Operational State - */ - EXPORT void getExtDeviceStatus(const std::string name, int& state); - - /** @brief set Ext Device Status information - * - * set Ext Device Status information in the system status file. - * @param name Ext Device Name - * @param state System Ext Operational State - */ - EXPORT void setExtDeviceStatus(const std::string name, const int state); - - /** @brief get Dbroot Status information - * - * get DBroot Status information in the system status file. - * @param name DBroot Name - * @param state System Operational State - */ - EXPORT void getDbrootStatus(const std::string name, int& state); - - /** @brief set Dbroot Status information - * - * set DBroot Status information in the system status file. - * @param name DBroot Name - * @param state System Operational State - */ - EXPORT void setDbrootStatus(const std::string name, const int state); - - /** @brief get NIC Status information - * - * get NIC Status information. - * @param name NIC HostName - * @param state Returned Operational State - */ - EXPORT void getNICStatus(const std::string name, int& state); - - /** @brief set NIC Status information - * - * set NIC Status information. - * @param name NIC HostName - * @param state NIC Operational State - */ - EXPORT void setNICStatus(const std::string name, const int state); - - /** @brief get System Process Configuration information - * - * get System Configuration Process information from the Process config file. - * @param systemprocessconfig Returned System Process Configuration Structure - */ - EXPORT void getProcessConfig(SystemProcessConfig& systemprocessconfig); - - /** @brief get Process Configuration information - * - * get System Process information from the Process config file. - * @param process the Process Name to get value - * @param module the Module Name for the Process to get value - * @param processconfig Returned Process Configuration Structure - */ - EXPORT void getProcessConfig(const std::string process, const std::string module, ProcessConfig& processconfig); - - /** @brief get Process Configuration String Parameter - * - * get Process Configuration String Parameter from the Process config file. - * @param process the Process Name to get value - * @param module the Module Name for the Process to get value - * @param name the Parameter Name to get value - * @param value the Parameter Value to get - */ - - EXPORT void getProcessConfig(const std::string process, const std::string module, const std::string name, std::string& value); - - /** @brief get Process Configuration Integer Parameter - * - * get Process Configuration Integer Parameter from the Process config file. - * @param process the Process Name to get value - * @param module the Module Name for the Process to get value - * @param name the Parameter Name to get value - * @param value the Parameter Value to get - */ - EXPORT void getProcessConfig(const std::string process, const std::string module, const std::string name, int& value); - - /** @brief set Process Configuration String Parameter - * - * set Process Configuration String Parameter from the Process config file. - * @param process the Process Name to set value - * @param module the Module Name for the Process to set value - * @param name the Parameter Name to set value - * @param value the Parameter Value to set - */ - - EXPORT void setProcessConfig(const std::string process, const std::string module, const std::string name, const std::string value); - - /** @brief set Process Configuration Integer Parameter - * - * set Process Configuration Integer Parameter from the Process config file. - * @param process the Process Name to set value - * @param module the Module Name for the Process to set value - * @param name the Parameter Name to set value - * @param value the Parameter Value to set - */ - EXPORT void setProcessConfig(const std::string process, const std::string module, const std::string name, const int value); - - /** @brief get System Process Status information - * - * get System Process Status information from the Process status file. - * @param systemprocessconfig Returned System Process Status Structure - */ - EXPORT void getProcessStatus(SystemProcessStatus& systemprocessstatus, std::string port = "ProcStatusControl"); - - /** @brief get Process Status information - * - * get Process information from the Process Status file. - * @param process the Process Name to get value - * @param module the Module Name for the Process to get value - * @param processconfig Returned Process Status Structure - */ - EXPORT void getProcessStatus(const std::string process, const std::string module, ProcessStatus& processstatus); - - /** @brief set Process Status - * - * set Process Status - * @param process the Process Name to set value - * @param module the Module Name for the Process to set value - * @param state the Operational state - * @param PID the Process ID - */ - - EXPORT void setProcessStatus(const std::string process, const std::string module, const int state, pid_t PID); - - /** @brief Process Init Complete - * - * Process Init Complete - * - */ - - EXPORT void processInitComplete(std::string processName, int STATE = oam::ACTIVE); - - /** @brief Process Init Failure - * - * Process Init Failure - * - */ - - EXPORT void processInitFailure(); - - /** @brief get Local Process Status Data - * - * get Local PID, Name, and Status from Process Status file - * @return myProcessStatus_t structure, which contains the local process OAM - * Status Data - */ - EXPORT myProcessStatus_t getMyProcessStatus(pid_t processID = 0); - /** @brief get Local Module Configuration Data * * get Local Module Name, OAM Parent Flag, and Realtime Linux OS Flag from @@ -1701,545 +379,10 @@ public: */ EXPORT oamModuleInfo_t getModuleInfo(); - /** @brief get Alarm Configuration information - * - * get Alarm Configuration information from the alarm config file. - * @param alarmid the Alarm ID for the parameter value - * @param alarmconfig Returned Alarm Configuration Structure - */ - EXPORT void getAlarmConfig(const int alarmid, AlarmConfig& alarmconfig); - - /** @brief get Alarm Configuration String Parameter - * - * get Alarm Configuration String Parameter from the Alarm config file. - * @param alarmid the Alarm ID to get Alarm Configuration information - * @param name the Parameter Name for the parameter value - * @param value returned Parameter Value - */ - EXPORT void getAlarmConfig(const int alarmid, const std::string name, std::string& value); - - /** @brief get Alarm Configuration Integer Parameter - * - * get Alarm Configuration Integer Parameter from the Alarm config file. - * @param alarmid the Alarm ID to get the parameter value - * @param name the Parameter Name for the parameter value - * @param value returned Parameter Value - */ - EXPORT void getAlarmConfig(const int alarmid, const std::string name, int& value); - - /** @brief set Alarm Configuration String Parameter - * - * set Alarm Configuration String Parameter from the Alarm config file. - * @param alarmid the Alarm ID to set the parameter value - * @param name the Parameter Name to set - * @param value the Parameter Value to set - */ - EXPORT void setAlarmConfig(const int alarmid, const std::string name, const std::string value); - - /** @brief set Alarm Configuration Integer Parameter - * - * set Alarm Configuration Integer Parameter from the Alarm config file. - * @param alarmid the Alarm ID to set the parameter value - * @param name the Parameter Name to set - * @param value the Parameter Value to set - */ - EXPORT void setAlarmConfig(const int alarmid, const std::string name, const int value); - - /** @brief OAM Hardware Management Login - * - * Login into the system to utilizes the OAM APIs from a user application - * @param username the Login User Name - * @param password the Login Password - */ - EXPORT void login(const std::string username, const std::string password); - - /** @brief OAM Hardware Management Self Logout - * - * Logout from OAM Hardware Management system - */ - EXPORT void logout(); - - /** @brief OAM Hardware Management Logout - * - * Logout another user from OAM Hardware Management system - * @param username the Login User Name - * @param password the Login Password - */ - EXPORT void logout(const std::string username, const std::string password); - - /** @brief Add OAM Hardware Management User - * - * Add a new user to the OAM Hardware Management system - * @param username the new User Name - * @param password the new User Password - * @param authlevel the Authorization Level for the new user - */ - EXPORT void addHardwareUser(const std::string username, const std::string password, AUTH_LEVEL authlevel); - - /** @brief Change OAM Hardware Management User Password - * - * Change a current OAM Hardware Management User's password - * @param username the User Name - * @param oldpassword the old User Password - * @param newpassword the new User Password - */ - EXPORT void changeHardwareUserPassword(const std::string username, const std::string oldpassword, const std::string newpassword); - - /** @brief Delete OAM Hardware Management User - * - * Delete a current OAM Hardware Management User - * @param username the User Name - */ - EXPORT void deleteHardwareUser(const std::string username); - - /** @brief Get all OAM Hardware Management User Configuration - * - * Get OAM Hardware Management User Configuration for a single user - * @param systemuserconfig Returned System User Configuration Structure - */ - EXPORT void getHardwareUserConfig(SystemUserConfig& systemuserconfig); - - /** @brief Get OAM Hardware Management User Configuration - * - * Get all OAM Hardware Management User Configuration - * @param username the User Name - * @param userconfig Returned User Configuration Structure - */ - EXPORT void getHardwareUserConfig(const std::string username, UserConfig& userconfig); - - /** @brief Stop Module - * - * Stop's a Module within the Calpont Database Appliance - * @param name the Module Name to stop - * @param gracefulflag Graceful/Forceful flag - * @param ackflag Acknowledgment flag - */ -// EXPORT void stopModule(const std::string name, GRACEFUL_FLAG gracefulflag, ACK_FLAG ackflag); - - /** @brief Shutdown Module - * - * Shutdown's a Module within the Calpont Database Appliance - * @param name the Module Name to stop - * @param gracefulflag Graceful/Forceful flag - * @param ackflag Acknowledgment flag - */ -// EXPORT void shutdownModule(const std::string name, GRACEFUL_FLAG gracefulflag, ACK_FLAG ackflag); - - /** @brief Start Module - * - * Start's a stopped Module within the Calpont Database Appliance - * @param name the Module Name to stop - * @param ackflag Acknowledgment flag - */ -// EXPORT void startModule(const std::string name, ACK_FLAG ackflag); - - /** @brief Restart Module - * - * Restart's a Module within the Calpont Database Appliance - * @param name the Module Name to restart - * @param gracefulflag Graceful/Forceful flag - * @param ackflag Acknowledgment flag - */ -// EXPORT void restartModule(const std::string name, GRACEFUL_FLAG gracefulflag, ACK_FLAG ackflag); - - /** @brief Disable Module - * - * Disable a Module within the Calpont Database Appliance - * @param name the Module Name to disable - */ -// EXPORT void disableModule(const std::string name); - - /** @brief Enable Module - * - * Enable a Module within the Calpont Database Appliance - * @param name the Module Name to enable - */ -// EXPORT void enableModule(const std::string name); - - /** @brief Stop Module - * - * Stop's a Module within the Calpont Database Appliance - * @param DeviceNetworkConfig the Modules to be stopped - * @param gracefulflag Graceful/Forceful flag - * @param ackflag Acknowledgment flag - */ - EXPORT void stopModule(DeviceNetworkList devicenetworklist, GRACEFUL_FLAG gracefulflag, ACK_FLAG ackflag); - - /** @brief Shutdown Module - * - * Shutdown's a Module within the Calpont Database Appliance - * @param DeviceNetworkConfig the Modules to be shutdown - * @param gracefulflag Graceful/Forceful flag - * @param ackflag Acknowledgment flag - */ - EXPORT void shutdownModule(DeviceNetworkList devicenetworklist, GRACEFUL_FLAG gracefulflag, ACK_FLAG ackflag); - - /** @brief Start Module - * - * Start's a stopped Module within the Calpont Database Appliance - * @param DeviceNetworkConfig the Modules to be started - * @param ackflag Acknowledgment flag - */ - EXPORT void startModule(DeviceNetworkList devicenetworklist, ACK_FLAG ackflag); - - /** @brief Restart Module - * - * Restart's a Module within the Calpont Database Appliance - * @param DeviceNetworkConfig the Modules to be restarted - * @param gracefulflag Graceful/Forceful flag - * @param ackflag Acknowledgment flag - */ - EXPORT void restartModule(DeviceNetworkList devicenetworklist, GRACEFUL_FLAG gracefulflag, ACK_FLAG ackflag); - - /** @brief Disable Module - * - * Disable a Module within the Calpont Database Appliance - * @param DeviceNetworkConfig the Modules to be disabled - */ - EXPORT void disableModule(DeviceNetworkList devicenetworklist); - - /** @brief Enable Module - * - * Enable a Module within the Calpont Database Appliance - * @param DeviceNetworkConfig the Modules to be enabled - */ - EXPORT void enableModule(DeviceNetworkList devicenetworklist); - - /** @brief Stop System - * - * Stop's the Calpont Database Appliance System - * @param gracefulflag Graceful/Forceful flag - * @param ackflag Acknowledgment flag - */ - EXPORT void stopSystem(GRACEFUL_FLAG gracefulflag, ACK_FLAG ackflag); - - /** @brief Shutdown System - * - * Shutdown's the Calpont Database Appliance System - * @param gracefulflag Graceful/Forceful flag - * @param ackflag Acknowledgment flag - */ - EXPORT void shutdownSystem(GRACEFUL_FLAG gracefulflag, ACK_FLAG ackflag); - - /** @brief Suspend Database Writes - * - * Suspends writing to the database. This should be done before backup - * activities occur. - * @param gracefulflag Graceful/Forceful flag - * @param ackflag Acknowledgment flag - */ - EXPORT void SuspendWrites(GRACEFUL_FLAG gracefulflag, ACK_FLAG ackflag); - - /** @brief Start System - * - * Start's the stopped Calpont Database Appliance System - * @param ackflag Acknowledgment flag - */ - EXPORT void startSystem(ACK_FLAG ackflag); - - /** @brief Restart System - * - * Restart's the active/stopped Calpont Database Appliance System - * @param gracefulflag Graceful/Forceful flag - * @param ackflag Acknowledgment flag - */ - EXPORT int restartSystem(GRACEFUL_FLAG gracefulflag, ACK_FLAG ackflag); - - /** @brief Display a list of locked tables - */ - void DisplayLockedTables(std::vector& tableLocks, BRM::DBRM* pDBRM = NULL); - - /** @brief Get Active Alarms - * - * Get's the Active Alarm list for the Calpont Database Appliance - * @param activealarm Returned Active Alarm list Structure - */ - EXPORT void getActiveAlarms(alarmmanager::AlarmList& activealarm); - - /** @brief Get Historical Alarms - * - * Get's the Alarm list for the Calpont Database Appliance - * @param date date of alarms, "today" or date in YYYYMMDD - * @param activealarm Returned Alarm list Structure - */ - EXPORT void getAlarms(std::string date, alarmmanager::AlarmList& alarm); - - /** @brief check Active Alarm - * - * Check if alarm is in Active Alarm file - * @param alarmid the Alarm ID - * @param moduleName the Module Name - * @param deviceName the Alarm device Name - */ - EXPORT bool checkActiveAlarm(const int alarmid, const std::string moduleName, const std::string deviceName); - - /** @brief Read Active Alarms in HDFS - * - * Read the Active Alarm list from the HDFS image of activeAlarms - * @param activealarm Returned Active Alarm list Structure - */ - EXPORT int readHdfsActiveAlarms(alarmmanager::AlarmList& activealarm); - - /** @brief update Log - * - * Enable/Disable Logging with the system or on a specific Module at - * a specific level - * @param action enabled/disable - * @param deviceid the device which logging is being enabled/disable - * @param loglevel the level of logging being enabled/disable - */ - EXPORT void updateLog(const std::string action, const std::string deviceid, const std::string loglevel); - - /** @brief get Log data file location for today - * - * Get Log file location for specific Module at a specific level - * @param moduleName the Module where the log file is located - * @param loglevel the level of logging - * @param filelocation Returned: the location path of the log file - */ - EXPORT void getLogFile(const std::string moduleName, const std::string loglevel, std::string& filelocation); - - /** @brief get Log data file location - * - * Get Log file location for specific Module at a specific level - * @param moduleName the Module where the log file is located - * @param loglevel the level of logging - * @param date date of log file, either "today" or - * @param filelocation Returned: the location path of the log file - */ - EXPORT void getLogFile(const std::string moduleName, const std::string loglevel, const std::string date, std::string& filelocation); - - /** @brief get Log configuration data - * - * Get Log Config data, which is the File IDs in the Module syslog.conf file - * @param moduleName the Module where the log file is located - * @param fileIDs Returned: list of Log File IDs - */ - EXPORT void getLogConfig(SystemLogConfigData& configData ); - - /** @brief get current time in string format - * - * get current time in string format - */ - EXPORT std::string getCurrentTime(); - - /** @brief get free diskspace in bytes - * - */ - EXPORT double getFreeSpace(std::string path); - // Integer to ASCII convertor EXPORT std::string itoa(const int); - /** @brief Stop Process - * - * Stop's a process on a Module within the Calpont Database Appliance - * @param moduleName the Module Name - * @param processName the Process Name to stopped - * @param gracefulflag Graceful/Forceful flag - * @param ackflag Acknowledgment flag - */ - EXPORT void stopProcess(const std::string moduleName, const std::string processName, GRACEFUL_FLAG gracefulflag, ACK_FLAG ackflag); - - /** @brief Start Process - * - * Start's a process on a Module within the Calpont Database Appliance - * @param moduleName the Module Name - * @param processName the Process Name to started - * @param gracefulflag Graceful/Forceful flag - * @param ackflag Acknowledgment flag - */ - EXPORT void startProcess(const std::string moduleName, const std::string processName, GRACEFUL_FLAG gracefulflag, ACK_FLAG ackflag); - - /** @brief Restart Process - * - * Restart's a process on a Module within the Calpont Database Appliance - * @param moduleName the Module Name - * @param processName the Process Name to restarted - * @param gracefulflag Graceful/Forceful flag - * @param ackflag Acknowledgment flag - */ - EXPORT void restartProcess(const std::string moduleName, const std::string processName, GRACEFUL_FLAG gracefulflag, ACK_FLAG ackflag); - - /** @brief Stop Process Type - * - * Stop's processes within the Calpont Database Appliance - */ - EXPORT void stopProcessType(std::string type); - - /** @brief Start Process Type - * - * Start's processes within the Calpont Database Appliance - */ - EXPORT void startProcessType(std::string type); - - /** @brief Restart Process Type - * - * Restart's process within the Calpont Database Appliance - */ - EXPORT void restartProcessType(std::string type); - - /** @brief Reinit Process Type - * - * Reinit's process within the Calpont Database Appliance - */ - EXPORT void reinitProcessType(std::string type); - - /** @brief Get Local DBRM ID for Module - * - * @param moduleName the Module Name - */ - EXPORT int getLocalDBRMID(const std::string moduleName); - - /** @brief build empty set of System Tables - */ - EXPORT void buildSystemTables(); - - /** @brief local exception control function - * @param function Function throwing the exception - * @param returnStatus - * @param msg A message to be included - */ - EXPORT void exceptionControl(std::string function, int returnStatus, const char* extraMsg = NULL); - - /** @brief get IP Address from Hostname - */ - EXPORT std::string getIPAddress(std::string hostName); - - /** @brief get System TOP Process CPU Users - * - * get System TOP Process CPU Users - * @param topNumber Number of TOP processes to retrieve - * @param systemtopprocesscpuusers Returned System Top Process CPU Users Structure - */ - EXPORT void getTopProcessCpuUsers(int topNumber, SystemTopProcessCpuUsers& systemtopprocesscpuusers); - - /** @brief get Module TOP Process CPU Users - * - * get Module TOP Process CPU Users - * @param topNumber Number of TOP processes to retrieve - * @param topprocesscpuusers Returned Top Process CPU Users Structure - */ - EXPORT void getTopProcessCpuUsers(const std::string module, int topNumber, TopProcessCpuUsers& topprocesscpuusers); - - /** @brief get System CPU Usage - * - * get System CPU Usage - * @param systemcpu Returned System CPU Usage Structure - */ - EXPORT void getSystemCpuUsage(SystemCpu& systemcpu); - - /** @brief get Module CPU Usage - * - * get Module CPU Usage - * @param module Module Name - * @param modulecpu Returned Top Process CPU Users Structure - */ - EXPORT void getModuleCpuUsage(const std::string module, ModuleCpu& modulecpu); - - /** @brief get System TOP Process Memory Users - * - * get System TOP Process Memory Users - * @param topNumber Number of Memory processes to retrieve - * @param systemtopprocessmemoryusers Returned System Top Process Memory Users Structure - */ - EXPORT void getTopProcessMemoryUsers(int topNumber, SystemTopProcessMemoryUsers& systemtopprocessmemoryusers); - - /** @brief get Module TOP Process Memory Users - * - * get Module TOP Process Memory Users - * @param module Module Name - * @param topNumber Number of TOP processes to retrieve - * @param topprocessmemoryusers Returned Top Process Memory Users Structure - */ - EXPORT void getTopProcessMemoryUsers(const std::string module, int topNumber, TopProcessMemoryUsers& topprocessmemoryusers); - - /** @brief get System Memory Usage - * - * get System Memory Usage - * @param systemmemory Returned System memory Usage Structure - */ - EXPORT void getSystemMemoryUsage(SystemMemory& systemmemory); - - /** @brief get Module Memory Usage - * - * get Module Memory Usage - * @param module Module Name - * @param modulememory Returned Module Memory Usage Structure - */ - EXPORT void getModuleMemoryUsage(const std::string module, ModuleMemory& modulememory); - - /** @brief get System Disk Usage - * - * get System Disk Usage - * @param systemdisk Returned System Disk Usage Structure - */ - EXPORT void getSystemDiskUsage(SystemDisk& systemdisk); - - /** @brief get Module Disk Usage - * - * get Module Disk Usage - * @param module Module Name - * @param moduledisk Returned Module Disk Usage Structure - */ - EXPORT void getModuleDiskUsage(const std::string module, ModuleDisk& moduledisk); - - /** @brief get Active SQL Statements - * - * get Active SQL Statements - * @param activesqlstatements Returned Active Sql Statement Structure - */ - EXPORT void getActiveSQLStatements(ActiveSqlStatements& activesqlstatements); - - /** @brief Valid IP Address - * - * Validate IP Address format - */ - EXPORT bool isValidIP(const std::string ipAddress); - - /** @brief Increment IP Address - * - * Increment IP Address - */ - EXPORT std::string incrementIPAddress(const std::string ipAddress); - - /** - *@brief Check for a phrase in a log file and return status - */ - EXPORT bool checkLogStatus(std::string filename, std::string phase); - - /** - *@brief Fix RSA key - */ - EXPORT void fixRSAkey(std::string logFile); - - /** - *@brief Get PM with read-write mount - */ - EXPORT std::string getWritablePM(); - - /** - *@brief Get PM with read-write mount - */ - EXPORT std::string getHotStandbyPM(); - - /** - *@brief Get PM with read-write mount - */ - EXPORT void setHotStandbyPM(std::string moduleName); - - /** - *@brief Distribute Calpont Configure File - */ - EXPORT void distributeConfigFile(std::string name = "system", std::string file = "Columnstore.xml"); - - /** - *@brief Switch Parent OAM Module - * Return true if we need to wait for systme restart - */ - EXPORT bool switchParentOAMModule(std::string moduleName, GRACEFUL_FLAG gracefulflag); - /** *@brief Get Storage Config Data */ @@ -2255,193 +398,15 @@ public: */ EXPORT void getDbrootPmConfig(const int dbrootid, int& pmid); - EXPORT void getDbrootPmConfig(const int dbrootid, std::string& pmid); - /** *@brief Get System DBRoot Config data */ EXPORT void getSystemDbrootConfig(DBRootConfigList& dbrootconfiglist); - /** - *@brief Set PM - DBRoot Config data - */ - EXPORT void setPmDbrootConfig(const int pmid, DBRootConfigList& dbrootconfiglist); - - /** - *@brief Manual Move PM - DBRoot data - */ - EXPORT void manualMovePmDbroot(std::string residePM, std::string dbrootIDs, std::string toPM); - - /** - *@brief Auto Move PM - DBRoot data - */ - EXPORT bool autoMovePmDbroot(std::string residePM); - - /** - *@brief Auto Un-Move PM - DBRoot data - */ - EXPORT bool autoUnMovePmDbroot(std::string toPM); - - /** - *@brief add DBRoot - */ - EXPORT void addDbroot(const int dbrootNumber, DBRootConfigList& dbrootlist, std::string EBSsize = oam::UnassignedName); - - /** - *@brief add UM Disk - */ - EXPORT void addUMdisk(const int moduleID, std::string& volumeName, std::string& device, std::string EBSsize = oam::UnassignedName); - - /** - *@brief distribute Fstab Updates - */ - EXPORT void distributeFstabUpdates(std::string entry, std::string toPM = "system" ); - - /** - *@brief assign DBRoot - */ - EXPORT void assignDbroot(std::string toPM, DBRootConfigList& dbrootlist); - - /** - *@brief unassign DBRoot - */ - EXPORT void unassignDbroot(std::string residePM, DBRootConfigList& dbrootlist); - - /** - *@brief get unassigned DBRoot list - */ - EXPORT void getUnassignedDbroot(DBRootConfigList& dbrootlist); - - /** - *@brief remove DBRoot - */ - EXPORT void removeDbroot(DBRootConfigList& dbrootlist); - - /** - *@brief get AWS Device Name for DBRoot ID - */ - EXPORT storageID_t getAWSdeviceName( const int dbrootid); - - /** - *@brief set System DBRoot Count - */ - EXPORT void setSystemDBrootCount(); - - /** - *@brief set FilesPerColumnPartition based on value of old - * FilePerColumnPartition and old DbRootCount that is given - */ - EXPORT void setFilesPerColumnPartition( int oldDbRootCount ); - - /** @brief send Device Notification Msg - */ - EXPORT int sendDeviceNotification(std::string deviceName, NOTIFICATION_TYPE type, std::string payload = ""); - - /** @brief run DBHealth Check - */ - EXPORT void checkDBFunctional(bool action = true); - - /** @brief mysql-Calpont service command - */ - EXPORT void actionMysqlCalpont(MYSQLCALPONT_ACTION action); - /** @brief validate Module name */ EXPORT int validateModule(const std::string name); - /** @brief getEC2LocalInstance - */ - EXPORT std::string getEC2LocalInstance(std::string name = "dummy"); - - /** @brief getEC2LocalInstanceType - */ - EXPORT std::string getEC2LocalInstanceType(std::string name = "dummy"); - - /** @brief getEC2LocalInstanceSubnet - */ - EXPORT std::string getEC2LocalInstanceSubnet(std::string name = "dummy"); - - /** @brief launchEC2Instance - */ - EXPORT std::string launchEC2Instance(const std::string name = "dummy", const std::string IPAddress = oam::UnassignedName, const std::string type = oam::UnassignedName, const std::string group = oam::UnassignedName); - - /** @brief getEC2InstanceIpAddress - */ - EXPORT std::string getEC2InstanceIpAddress(std::string instanceName); - - /** @brief terminateEC2Instance - */ - EXPORT void terminateEC2Instance(std::string instanceName); - - /** @brief stopEC2Instance - */ - EXPORT void stopEC2Instance(std::string instanceName); - - /** @brief startEC2Instance - */ - EXPORT bool startEC2Instance(std::string instanceName); - - /** @brief assignElasticIP - */ - EXPORT bool assignElasticIP(std::string instanceName, std::string IpAddress); - - /** @brief deassignElasticIP - */ - EXPORT bool deassignElasticIP(std::string IpAddress); - - /** @brief createEC2Volume - */ - EXPORT std::string createEC2Volume(std::string size, std::string name = "dummy"); - - /** @brief getEC2VolumeStatus - */ - EXPORT std::string getEC2VolumeStatus(std::string volumeName); - - /** @brief attachEC2Volume - */ - EXPORT bool attachEC2Volume(std::string volumeName, std::string deviceName, std::string instanceName); - - /** @brief detachEC2Volume - */ - EXPORT bool detachEC2Volume(std::string volumeName); - - /** @brief deleteEC2Volume - */ - EXPORT bool deleteEC2Volume(std::string volumeName); - - /** @brief createEC2tag - */ - EXPORT bool createEC2tag(std::string resourceName, std::string tagName, std::string tagValue); - - /** - *@brief take action on Syslog process - */ - EXPORT void syslogAction( std::string action); - - /** - *@brief call dbrm control - */ - EXPORT void dbrmctl(std::string command); - - /** @brief Wait for system to close transactions - * - * When a Shutdown, stop, restart or suspend operation is - * requested but there are active transactions of some sort, - * We wait for all transactions to close before performing - * the action. - */ - EXPORT bool waitForSystem(PROC_MGT_MSG_REQUEST request, messageqcpp::IOSocket& ios, messageqcpp::ByteStream& stillWorkingMsg); - - void amazonReattach(std::string toPM, dbrootList dbrootConfigList, bool attach = false); - void mountDBRoot(dbrootList dbrootConfigList, bool mount = true); - void amazonDetach(dbrootList dbrootConfigList); - void amazonAttach(std::string toPM, dbrootList dbrootConfigList); - - /** - *@brief gluster control - */ - EXPORT int glusterctl(GLUSTER_COMMANDS command, std::string argument1, std::string& argument2, std::string& errmsg); - /** * @brief changeMyCnf * @@ -2450,127 +415,27 @@ public: **/ EXPORT bool changeMyCnf( std::string paramater, std::string value ); - /** - * @brief enableMySQLRep - * - * purpose: enable MySQL Replication on the system - * - **/ - EXPORT bool enableMySQLRep( std::string password ); - - /** - * @brief diableMySQLRep - * - * purpose: disable MySQL Replication on the system - * - **/ - EXPORT bool disableMySQLRep(); - - /** @brief check Gluster Log after a Gluster control call - */ - EXPORT int checkGlusterLog(std::string logFile, std::string& errmsg); - - /** @brief check and get mysql user password - */ - EXPORT std::string getMySQLPassword(); - - /** @brief update fstab with dbroot mounts - */ - EXPORT std::string updateFstab(std::string device, std::string dbrootID); - - /** - * @brief Write the message to the log - */ - void writeLog(const std::string logContent, const logging::LOG_TYPE logType = logging::LOG_TYPE_INFO); - - bool checkSystemRunning(); - - /** @brief wait for system to be active - */ - EXPORT void waitForActive(); - private: - int sendMsgToProcMgr3(messageqcpp::ByteStream::byte requestType, alarmmanager::AlarmList& alarmlist, const std::string date); - - /** @brief build and send request message to Process Manager - */ - int sendMsgToProcMgr(messageqcpp::ByteStream::byte requestType, const std::string name = "", - GRACEFUL_FLAG gracefulflag = FORCEFUL, ACK_FLAG ackflag = ACK_NO, - const std::string argument1 = "", const std::string argument2 = "", int timeout = 600); - - /** @brief build and send request message to Process Manager 2 - */ - int sendMsgToProcMgr2(messageqcpp::ByteStream::byte requestType, DeviceNetworkList devicenetworklist, - GRACEFUL_FLAG gracefulflag, ACK_FLAG ackflag, const std::string password = oam::UnassignedName, const std::string mysqlpw = oam::UnassignedName); - - /** @brief a slightly different version of sendMsgToProcMgr2, which is for addmodule only. - */ - int sendAddModuleToProcMgr(messageqcpp::ByteStream::byte requestType, DeviceNetworkList devicenetworklist, - GRACEFUL_FLAG gracefulflag, ACK_FLAG ackflag, bool storeHostnames, const std::string password = oam::UnassignedName, - const std::string mysqlpw = oam::UnassignedName); - - /** @brief build and send request message to Process Manager - * Check for status messages - */ - int sendMsgToProcMgrWithStatus(messageqcpp::ByteStream::byte requestType, const std::string name = "", - GRACEFUL_FLAG gracefulflag = GRACEFUL, ACK_FLAG ackflag = ACK_YES, - const std::string argument1 = "", const std::string argument2 = "", int timeout = 600); - - // check for Ack message from Process Manager - // int checkMsgFromProcMgr(messageqcpp::ByteStream::byte requestType, const std::string name); - /** @brief validate Process name */ int validateProcess(const std::string moduleName, std::string processName); - /** @brief send status updates to process monitor + /** @brief local exception control function + * @param function Function throwing the exception + * @param returnStatus + * @param msg A message to be included */ - void sendStatusUpdate(messageqcpp::ByteStream obs, messageqcpp::ByteStream::byte returnRequestType); + EXPORT void exceptionControl(std::string function, int returnStatus, const char* extraMsg = NULL); std::string tmpdir; std::string CalpontConfigFile; - std::string AlarmConfigFile; - std::string ProcessConfigFile; - static int UseHdfs; std::string userDir; }; // end of class } // end of namespace -namespace procheartbeat -{ - -class ProcHeartbeat -{ -public: - /** @brief ProcHeartbeat Class constructor - */ - ProcHeartbeat(); - - /** @brief ProcHeartbeat Class destructor - */ - virtual ~ProcHeartbeat(); - - /** @brief Register for Proc Heartbeat - * - */ - void registerHeartbeat(int ID = 1); - - /** @brief De-Register for Proc Heartbeat - * - * DeregisterHeartbeat - */ - void deregisterHeartbeat(int ID = 1); - - /** @brief Send Proc Heartbeat - * - */ - void sendHeartbeat(int ID = 1, oam::ACK_FLAG ackFlag = oam::ACK_NO); -}; -} - #undef EXPORT #endif diff --git a/oam/oamcpp/oamcache.cpp b/oam/oamcpp/oamcache.cpp index efb41ba5b..ba049ed5e 100644 --- a/oam/oamcpp/oamcache.cpp +++ b/oam/oamcpp/oamcache.cpp @@ -111,86 +111,9 @@ void OamCache::checkReload() // Restore for Windows when we support multiple PMs while (it != uniquePids.end()) { - // Disable legacy-OAM. -/* - if (getenv("SKIP_OAM_INIT") == NULL) - { - try - { - int state = oam::MAN_INIT; - bool degraded; - char num[80]; - int retry = 0; - - // MCOL-259 retry for 5 seconds if the PM is in some INIT mode. - while (( state == oam::BUSY_INIT - || state == oam::MAN_INIT - || state == oam::PID_UPDATE) - && retry < 5) - { - snprintf(num, 80, "%d", *it); - - try - { - oam.getModuleStatus(string("pm") + num, state, degraded); - } - catch (std::exception& e) - { - ostringstream os; - os << "OamCache::checkReload exception while getModuleStatus pm" << num << " " << e.what(); - oam.writeLog(os.str(), logging::LOG_TYPE_ERROR); - break; - } - catch (...) - { - ostringstream os; - os << "OamCache::checkReload exception while getModuleStatus pm" << num; - oam.writeLog(os.str(), logging::LOG_TYPE_ERROR); - break; - } - - if (state == oam::ACTIVE || state == oam::DEGRADED) - { - pmToConnectionMap[*it] = i++; - moduleIds.push_back(*it); - break; - } - - sleep(1); - ++retry; - //cout << "pm " << *it << " -> connection " << (i-1) << endl; - } - - if (state != oam::ACTIVE) - { - ostringstream os; - os << "OamCache::checkReload shows state for pm" << num << " as " << oamState[state]; - oam.writeLog(os.str(), logging::LOG_TYPE_ERROR); - } - } - catch (std::exception& e) - { - ostringstream os; - os << "OamCache::checkReload final exception while getModuleStatus " << e.what(); - oam.writeLog(os.str(), logging::LOG_TYPE_ERROR); - break; - } - catch (...) - { - ostringstream os; - os << "OamCache::checkReload final exception while getModuleStatus"; - oam.writeLog(os.str(), logging::LOG_TYPE_ERROR); - break; - } - } - else -*/ - { - pmToConnectionMap[*it] = i++; - moduleIds.push_back(*it); - } + pmToConnectionMap[*it] = i++; + moduleIds.push_back(*it); it++; - } #endif diff --git a/oam/oamcpp/tdriver.cpp b/oam/oamcpp/tdriver.cpp deleted file mode 100644 index d06cb2bd6..000000000 --- a/oam/oamcpp/tdriver.cpp +++ /dev/null @@ -1,703 +0,0 @@ -/* Copyright (C) 2014 InfiniDB, Inc. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; version 2 of - the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301, USA. */ - -/*************************************************************************** - * dhill@srvengcm1.calpont.com - * - * Purpose: OAM C++ API tester - * - ***************************************************************************/ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -using namespace std; - -#include -using namespace boost; - -#include - -#include "liboamcpp.h" -using namespace oam; - -using namespace alarmmanager; - -class getModuleInfoTest : public CppUnit::TestFixture -{ - - CPPUNIT_TEST_SUITE( getModuleInfoTest ); - - CPPUNIT_TEST( test1 ); - - CPPUNIT_TEST_SUITE_END(); - -private: - string Svalue; - int Ivalue; - bool Bvalue; - -public: - void setUp() - { - setenv("CALPONT_HOME", "/home/buildslave/Buildbot/nightly/export/etc/", 1); -// setenv("CALPONT_HOME", "/home/dhill/genii/export/etc/", 1); - } - - void tearDown() - { - } - - void test1() - { - - Oam oamapi; - Svalue = oamapi.getCurrentTime(); - cout << "Current time is " << Svalue; - CPPUNIT_ASSERT(!Svalue.empty()); - - Bvalue = oamapi.isValidIP("111.222.333.444"); - CPPUNIT_ASSERT(Bvalue == true); - - Bvalue = oamapi.isValidIP("111.222.333"); - CPPUNIT_ASSERT(Bvalue == false); - - Bvalue = oamapi.isValidIP("1.2.3.4"); - CPPUNIT_ASSERT(Bvalue == true); - - Bvalue = oamapi.isValidIP("1.2.3.4444"); - CPPUNIT_ASSERT(Bvalue == false); - - Bvalue = oamapi.isValidIP("1111.222.333.444"); - CPPUNIT_ASSERT(Bvalue == false); - - -// can test on deve machine -// oamModuleInfo_t t; - -// t = oamapi.getModuleInfo(); - -// Svalue = get<0>(t); -// CPPUNIT_ASSERT(Svalue == "dm1"); - -// Svalue = get<1>(t); -// CPPUNIT_ASSERT(Svalue == "dm1"); - -// Ivalue = get<2>(t); -// CPPUNIT_ASSERT(Ivalue == MASTER_YES); - }; -}; - -class getSystemConfigTest : public CppUnit::TestFixture -{ - - CPPUNIT_TEST_SUITE( getSystemConfigTest ); - - CPPUNIT_TEST( test1 ); - CPPUNIT_TEST( test2 ); -//CPPUNIT_TEST( test3 ); -//CPPUNIT_TEST( test4 ); -//CPPUNIT_TEST( test5 ); - CPPUNIT_TEST_EXCEPTION( test6, std::runtime_error ); -//CPPUNIT_TEST_EXCEPTION( test7, std::runtime_error ); - CPPUNIT_TEST( test8 ); - CPPUNIT_TEST_EXCEPTION( test9, std::runtime_error ); - CPPUNIT_TEST( test10 ); - - - CPPUNIT_TEST_SUITE_END(); - -private: - string Svalue; - int Ivalue; - -public: - void setUp() - { - setenv("CALPONT_HOME", "/home/buildslave/Buildbot/nightly/export/etc/", 1); -// setenv("CALPONT_HOME", "/home/dhill/genii/export/etc/", 1); - } - - void tearDown() - { - } - - void test1() - { - SystemConfig systemconfig; - - Oam oamapi; - oamapi.getSystemConfig(systemconfig); - - Ivalue = systemconfig.ModuleHeartbeatPeriod; - CPPUNIT_ASSERT(Ivalue != -1); - - Ivalue = systemconfig.ModuleHeartbeatCount; - CPPUNIT_ASSERT(Ivalue != -1); - -// Ivalue = systemconfig.ProcessHeartbeatPeriod; -// CPPUNIT_ASSERT(Ivalue != -2); - - Svalue = systemconfig.NMSIPAddr; - CPPUNIT_ASSERT(!Svalue.empty()); - }; - - void test2() - { - SystemModuleTypeConfig systemmoduletypeconfig; - - Oam oamapi; - oamapi.getSystemConfig(systemmoduletypeconfig); - - for ( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++) - { - if ( systemmoduletypeconfig.moduletypeconfig[i].ModuleType.empty() ) - // end of list - break; - - Svalue = systemmoduletypeconfig.moduletypeconfig[i].ModuleType; - CPPUNIT_ASSERT(!Svalue.empty()); - - Svalue = systemmoduletypeconfig.moduletypeconfig[i].ModuleDesc; - CPPUNIT_ASSERT(!Svalue.empty()); - - Ivalue = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount; - CPPUNIT_ASSERT(Ivalue != -1); - - Ivalue = systemmoduletypeconfig.moduletypeconfig[i].ModuleCPUCriticalThreshold; - CPPUNIT_ASSERT(Ivalue != -1); - - Ivalue = systemmoduletypeconfig.moduletypeconfig[i].ModuleCPUMajorThreshold; - CPPUNIT_ASSERT(Ivalue != -1); - - Ivalue = systemmoduletypeconfig.moduletypeconfig[i].ModuleCPUMinorThreshold; - CPPUNIT_ASSERT(Ivalue != -1); - - Ivalue = systemmoduletypeconfig.moduletypeconfig[i].ModuleCPUMinorClearThreshold; - CPPUNIT_ASSERT(Ivalue != -1); - } - }; - - /* void test3() { - ModuleConfig moduleconfig; - const string Modulename = "dm1"; - - Oam oamapi; - oamapi.getSystemConfig(Modulename, moduleconfig); - - Svalue = moduleconfig.ModuleName; - CPPUNIT_ASSERT(!Svalue.empty()); - - }; - */ - void test4() - { - ModuleConfig moduleconfig; - - Oam oamapi; - oamapi.getSystemConfig(moduleconfig); - - Svalue = moduleconfig.ModuleName; - CPPUNIT_ASSERT(!Svalue.empty()); - - }; - - void test5() - { - Oam oamapi; - oamapi.setSystemConfig("SystemVersion", "V2.0.2.3"); - - oamapi.getSystemConfig("SystemVersion", Svalue); - - CPPUNIT_ASSERT(Svalue == "V2.0.2.3"); - }; - - void test6() - { - Oam oamapi; - oamapi.getSystemConfig("SystemVersionBad", Svalue); - CPPUNIT_ASSERT(Svalue.size() == 0); - }; - - void test7() - { - Oam oamapi; - oamapi.setSystemConfig("SystemVersionBad", "V2.0.2.3"); - }; - - void test8() - { - Oam oamapi; - oamapi.setSystemConfig("ModuleHeartbeatPeriod", 5); - - oamapi.getSystemConfig("ModuleHeartbeatPeriod", Ivalue); - - CPPUNIT_ASSERT(Ivalue == 5); - }; - - void test9() - { - Oam oamapi; - oamapi.getSystemConfig("ModuleHeartbeatPeriodBad", Ivalue); - CPPUNIT_ASSERT(Ivalue == 0); - }; - - void test10() - { - Oam oamapi; - oamapi.setSystemConfig("ModuleCPUMajorThreshold1", 7500); - - oamapi.getSystemConfig("ModuleCPUMajorThreshold1", Ivalue); - - CPPUNIT_ASSERT(Ivalue == 7500); - }; - - -}; -/* -class getSystemStatusTest : public CppUnit::TestFixture { - -CPPUNIT_TEST_SUITE( getSystemStatusTest ); - -CPPUNIT_TEST( test1 ); -CPPUNIT_TEST( test2 ); -CPPUNIT_TEST( test3 ); -CPPUNIT_TEST( test4 ); - - -CPPUNIT_TEST_SUITE_END(); - -private: - Oam oamapi; - string Svalue; - int Ivalue; - -public: - void setUp() { - } - - void tearDown() { - } - - void test1() { - SystemStatus systemstatus; - - oamapi.getSystemStatus(systemstatus); - - Svalue = systemstatus.SystemOpState; - CPPUNIT_ASSERT(!Svalue.empty()); - - for( unsigned int i = 0 ; i < systemstatus.systemModulestatus.Modulestatus.size(); i++) - { - if( systemstatus.systemModulestatus.Modulestatus[i].Module.empty() ) - // end of list - break; - - Svalue = systemstatus.systemModulestatus.Modulestatus[i].Module; - CPPUNIT_ASSERT(!Svalue.empty()); - - Svalue = systemstatus.systemModulestatus.Modulestatus[i].ModuleOpState; - CPPUNIT_ASSERT(!Svalue.empty()); - } - }; - - void test2() { - oamapi.getModuleStatus("dm1", Svalue); - - CPPUNIT_ASSERT(!Svalue.empty()); - }; - - void test3() { - oamapi.setSystemStatus("ACTIVE"); - - SystemStatus systemstatus; - - oamapi.getSystemStatus(systemstatus); - - Svalue = systemstatus.SystemOpState; - - CPPUNIT_ASSERT(Svalue == "ACTIVE"); - oamapi.setSystemStatus("AUTO_OFFLINE"); - }; - - void test4() { - oamapi.setModuleStatus("dm1", "ACTIVE"); - - oamapi.getModuleStatus("dm1", Svalue); - - CPPUNIT_ASSERT(Svalue == "ACTIVE"); - oamapi.setModuleStatus("dm1", "AUTO_OFFLINE"); - }; - - -}; -*/ -class getProcessConfigTest : public CppUnit::TestFixture -{ - - CPPUNIT_TEST_SUITE( getProcessConfigTest ); - - CPPUNIT_TEST( test1 ); - CPPUNIT_TEST( test2 ); - CPPUNIT_TEST_EXCEPTION( test3, std::runtime_error ); - CPPUNIT_TEST( test4 ); - CPPUNIT_TEST_EXCEPTION( test5, std::runtime_error ); - - CPPUNIT_TEST_SUITE_END(); - -private: - string Svalue; - int Ivalue; - -public: - void setUp() - { - setenv("CALPONT_HOME", "/home/buildslave/Buildbot/nightly/export/etc/", 1); -// setenv("CALPONT_HOME", "/home/dhill/genii/export/etc/", 1); - } - - void tearDown() - { - } - - void test1() - { - SystemProcessConfig systemprocessconfig; - - Oam oamapi; - oamapi.getProcessConfig(systemprocessconfig); - - for ( unsigned int i = 0 ; i < systemprocessconfig.processconfig.size(); i++) - { - Svalue = systemprocessconfig.processconfig[i].ProcessName; - CPPUNIT_ASSERT(!Svalue.empty()); - - Svalue = systemprocessconfig.processconfig[i].ModuleType; - CPPUNIT_ASSERT(!Svalue.empty()); - - Svalue = systemprocessconfig.processconfig[i].ProcessLocation; - CPPUNIT_ASSERT(!Svalue.empty()); - - for ( int j = 0 ; j < oam::MAX_ARGUMENTS; j++) - { - if (systemprocessconfig.processconfig[i].ProcessArgs[j].empty()) - break; - - Svalue = systemprocessconfig.processconfig[i].ProcessArgs[j]; - CPPUNIT_ASSERT(!Svalue.empty()); - } - - Ivalue = systemprocessconfig.processconfig[i].BootLaunch; - CPPUNIT_ASSERT(Ivalue != -1); - - Ivalue = systemprocessconfig.processconfig[i].LaunchID; - CPPUNIT_ASSERT(Ivalue != -1); - - for ( int j = 0 ; j < MAX_DEPENDANCY; j++) - { - if (systemprocessconfig.processconfig[i].DepProcessName[j].empty()) - break; - - Svalue = systemprocessconfig.processconfig[i].DepProcessName[j]; - CPPUNIT_ASSERT(!Svalue.empty()); - Svalue = systemprocessconfig.processconfig[i].DepModuleName[j]; - CPPUNIT_ASSERT(!Svalue.empty()); - } - } - }; - - void test2() - { - ProcessConfig processconfig; - - Oam oamapi; - oamapi.getProcessConfig("ProcessManager", "dm1", processconfig); - - Svalue = processconfig.ProcessName; - CPPUNIT_ASSERT(!Svalue.empty()); - - Svalue = processconfig.ModuleType; - CPPUNIT_ASSERT(!Svalue.empty()); - - Svalue = processconfig.ProcessLocation; - CPPUNIT_ASSERT(!Svalue.empty()); - - for ( int j = 0 ; j < oam::MAX_ARGUMENTS; j++) - { - if (processconfig.ProcessArgs[j].empty()) - break; - - Svalue = processconfig.ProcessArgs[j]; - CPPUNIT_ASSERT(!Svalue.empty()); - } - - Ivalue = processconfig.BootLaunch; - CPPUNIT_ASSERT(Ivalue != -1); - - Ivalue = processconfig.LaunchID; - CPPUNIT_ASSERT(Ivalue != -1); - - for ( int j = 0 ; j < MAX_DEPENDANCY; j++) - { - if (processconfig.DepProcessName[j].empty()) - break; - - Svalue = processconfig.DepProcessName[j]; - CPPUNIT_ASSERT(!Svalue.empty()); - Svalue = processconfig.DepModuleName[j]; - CPPUNIT_ASSERT(!Svalue.empty()); - } - }; - - void test3() - { - ProcessConfig processconfig; - Oam oamapi; - oamapi.getProcessConfig("SNMPTrapDaemonBAD", "dm1", processconfig); - CPPUNIT_ASSERT(Svalue.size() == 0); - }; - - void test4() - { - Oam oamapi; - oamapi.setProcessConfig("ProcessManager", "dm1", "BootLaunch", 10); - - oamapi.getProcessConfig("ProcessManager", "dm1", "BootLaunch", Ivalue); - - CPPUNIT_ASSERT(Ivalue == 10); - }; - - void test5() - { - ProcessConfig processconfig; - Oam oamapi; - oamapi.getProcessConfig("ProcessManager", "dm1", "ModuleTypeBAD", Svalue); - CPPUNIT_ASSERT(Svalue.size() == 0); - }; - -}; -/* -class getProcessStatusTest : public CppUnit::TestFixture { - -CPPUNIT_TEST_SUITE( getProcessConfigTest ); - -CPPUNIT_TEST( test1 ); -CPPUNIT_TEST( test2 ); -CPPUNIT_TEST_EXCEPTION( test3, std::runtime_error ); -//CPPUNIT_TEST( test4 ); -CPPUNIT_TEST_EXCEPTION( test5, std::runtime_error ); - -CPPUNIT_TEST_SUITE_END(); - -private: - Oam oamapi; - string Svalue; - int Ivalue; - -public: - void setUp() { - } - - void tearDown() { - } - - void test1() { - SystemProcessStatus systemprocessstatus; - - oamapi.getProcessStatus(systemprocessstatus); - - for( unsigned int i = 0 ; i < systemprocessstatus.processstatus.size(); i++) - { - Svalue = systemprocessstatus.processstatus[i].ProcessName; - CPPUNIT_ASSERT(!Svalue.empty()); - - Svalue = systemprocessstatus.processstatus[i].Module; - CPPUNIT_ASSERT(!Svalue.empty()); - - Ivalue = systemprocessstatus.processstatus[i].ProcessID; - CPPUNIT_ASSERT(Ivalue != -1); - - Svalue = systemprocessstatus.processstatus[i].StateChangeDate; - CPPUNIT_ASSERT(!Svalue.empty()); - - Svalue = systemprocessstatus.processstatus[i].ProcessOpState; - CPPUNIT_ASSERT(!Svalue.empty()); - - } - }; - - void test2() { - ProcessStatus processstatus; - - oamapi.getProcessStatus("ProcessManager", "dm1", processstatus); - - Svalue = processstatus.ProcessName; - CPPUNIT_ASSERT(!Svalue.empty()); - - Svalue = processstatus.Module; - CPPUNIT_ASSERT(!Svalue.empty()); - - Ivalue = processstatus.ProcessID; - CPPUNIT_ASSERT(Ivalue != -1); - - Svalue = processstatus.StateChangeDate; - CPPUNIT_ASSERT(!Svalue.empty()); - - Svalue = processstatus.ProcessOpState; - CPPUNIT_ASSERT(!Svalue.empty()); - - }; - - void test3() { - ProcessStatus processstatus; - oamapi.getProcessStatus("SNMPTrapDaemonBAD", "dm1", processstatus); - CPPUNIT_ASSERT(Svalue.size() == 0); - }; - - void test4() { - oamapi.setProcessStatus("ProcessManager", "dm1", "StateChangeDate", "1234567"); - - oamapi.getProcessStatus("ProcessManager", "dm1", "StateChangeDate", Svalue); - - CPPUNIT_ASSERT(Svalue == "1234567"); - }; - - void test5() { - oamapi.getProcessStatus("ProcessManager", "dm1", "StateChangeDateBAD", Svalue); - CPPUNIT_ASSERT(Svalue.size() == 0); - }; - - void test6() { - oamapi.setProcessStatus("ProcessManager", "dm1", "ProcessID", 10); - - oamapi.getProcessStatus("ProcessManager", "dm1", "ProcessID", Ivalue); - - CPPUNIT_ASSERT(Ivalue == 10); - }; - -}; -*/ -class getAlarmConfigTest : public CppUnit::TestFixture -{ - - CPPUNIT_TEST_SUITE( getAlarmConfigTest ); - - CPPUNIT_TEST( test1 ); - CPPUNIT_TEST( test2 ); - CPPUNIT_TEST_EXCEPTION( test3, std::runtime_error ); -//CPPUNIT_TEST( test4 ); - - CPPUNIT_TEST_SUITE_END(); - -private: - string Svalue; - int Ivalue; - -public: - void setUp() - { - setenv("CALPONT_HOME", "/home/buildslave/Buildbot/nightly/export/etc/", 1); -// setenv("CALPONT_HOME", "/home/dhill/genii/export/etc/", 1); - } - - void tearDown() - { - } - - void test1() - { - AlarmConfig alarmconfig; - Oam oamapi; - - for ( int alarmID = 1 ; alarmID < MAX_ALARM_ID; alarmID++) - { - oamapi.getAlarmConfig(alarmID, alarmconfig); - - Svalue = alarmconfig.BriefDesc; - CPPUNIT_ASSERT(!Svalue.empty()); - - Svalue = alarmconfig.DetailedDesc; - CPPUNIT_ASSERT(!Svalue.empty()); - - Svalue = alarmconfig.Severity; - CPPUNIT_ASSERT(!Svalue.empty()); - - Ivalue = alarmconfig.Threshold; - CPPUNIT_ASSERT(Ivalue != -1); - - Ivalue = alarmconfig.Occurrences; - CPPUNIT_ASSERT(Ivalue != -1); - - Svalue = alarmconfig.LastIssueTime; - CPPUNIT_ASSERT(!Svalue.empty()); - } - }; - - void test2() - { - Oam oamapi; - oamapi.setAlarmConfig(CPU_USAGE_MED, "Threshold", 20); - - oamapi.getAlarmConfig(CPU_USAGE_MED, "Threshold", Ivalue); - - CPPUNIT_ASSERT(Ivalue == 20); - }; - - void test3() - { - Oam oamapi; - oamapi.getAlarmConfig(CPU_USAGE_MED, "ThresholdBAD", Ivalue); - CPPUNIT_ASSERT(Ivalue == 0); - }; - - /* void test4() { - // test getActiveAlarm API - AlarmList activeAlarm; - #if 1 - Oam oamapi; - oamapi.getActiveAlarms (activeAlarm); - #endif - }; - */ -}; - -CPPUNIT_TEST_SUITE_REGISTRATION( getModuleInfoTest ); -CPPUNIT_TEST_SUITE_REGISTRATION( getSystemConfigTest ); -//CPPUNIT_TEST_SUITE_REGISTRATION( getSystemStatusTest ); -//CPPUNIT_TEST_SUITE_REGISTRATION( getProcessStatusTest ); -CPPUNIT_TEST_SUITE_REGISTRATION( getProcessConfigTest ); -CPPUNIT_TEST_SUITE_REGISTRATION( getAlarmConfigTest ); - -#include -#include - -int main( int argc, char** argv) -{ - CppUnit::TextUi::TestRunner runner; - CppUnit::TestFactoryRegistry& registry = CppUnit::TestFactoryRegistry::getRegistry(); - runner.addTest( registry.makeTest() ); - bool wasSuccessful = runner.run( "", false ); - return (wasSuccessful ? 0 : 1); -} - - diff --git a/oam/post/CMakeLists.txt b/oam/post/CMakeLists.txt deleted file mode 100644 index 917f97145..000000000 --- a/oam/post/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -configure_file("${CMAKE_CURRENT_SOURCE_DIR}/mcstest-001.sh.in" "${CMAKE_CURRENT_SOURCE_DIR}/mcstest-001.sh" @ONLY) -configure_file("${CMAKE_CURRENT_SOURCE_DIR}/mcstest-002.sh.in" "${CMAKE_CURRENT_SOURCE_DIR}/mcstest-002.sh" @ONLY) -configure_file("${CMAKE_CURRENT_SOURCE_DIR}/mcstest-003.sh.in" "${CMAKE_CURRENT_SOURCE_DIR}/mcstest-003.sh" @ONLY) -configure_file("${CMAKE_CURRENT_SOURCE_DIR}/mcstest-004.sh.in" "${CMAKE_CURRENT_SOURCE_DIR}/mcstest-004.sh" @ONLY) - -install(PROGRAMS columnstore_functions mcstest-001.sh mcstest-002.sh mcstest-003.sh mcstest-004.sh DESTINATION ${ENGINE_SUPPORTDIR} COMPONENT columnstore-engine) - diff --git a/oam/post/columnstore_functions b/oam/post/columnstore_functions deleted file mode 100755 index af8bd4232..000000000 --- a/oam/post/columnstore_functions +++ /dev/null @@ -1,42 +0,0 @@ -# -# $Id: functions 2937 2012-05-30 18:17:09Z rdempsey $ -# - -# Source function library. -if [ -f /etc/init.d/functions ]; then - . /etc/init.d/functions -fi - -module_type() -{ -( - sed -r 's/[0-9]+$//' /var/lib/columnstore/local/module -) 2>/dev/null -} - -firstboot() -{ - dbrmroot="`mcsGetConfig SystemConfig DBRMRoot`" - dbrmdir="`dirname $dbrmroot`" - test ! -f $dbrmdir/BRM_saves_current -} - -oidbitmapfile() -{ - oidfile="`mcsGetConfig OIDManager OIDBitmapFile`" - test ! -f $oidfile -} - -module_name() -{ -( - cat /var/lib/columnstore/local/module -) 2>/dev/null -} - -module_id() -{ -( - sed -r 's/[a-zA-Z]//g' /var/lib/columnstore/local/module -) 2>/dev/null -} diff --git a/oam/post/mcstest-001.sh.in b/oam/post/mcstest-001.sh.in deleted file mode 100755 index eafa30dd6..000000000 --- a/oam/post/mcstest-001.sh.in +++ /dev/null @@ -1,63 +0,0 @@ -#!/bin/bash -# -# $Id: test-001.sh 3704 2013-08-07 03:33:20Z bwilkinson $ - -USER=`whoami 2>/dev/null` - -# Source function library. -if [ -f /etc/init.d/functions ]; then - . /etc/init.d/functions -fi - -. @ENGINE_SUPPORTDIR@/functions - -scrname=`basename $0` -tname="check-syscat-oids" - -mt=`module_type` - -#These tests only for PM -if [ "$mt" != "pm" ]; then - exit 0 -fi - -#check for dbrm and data1, don't run if missing both -if firstboot; then - if [ -d /var/lib/columnstore/data1/000.dir ]; then - cplogger -c 50 $scrname "$tname" "missing dbrm data with existing 000.dir" - exit 1 - else - exit 0 - fi -else - #check for oidbitmap file - if oidbitmapfile; then - cplogger -c 50 $scrname "$tname" "missing oidbitmapfile with existing current file" - exit 1 - fi -fi - -#check for both current file and OIDBITMAP file - -#Make sure all syscat OIDs are present (N.B. only works for shared-everything) - -cplogger -i 48 $scrname "$tname" - -catoids= -catoids="$catoids 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010" -catoids="$catoids 2001 2004" -catoids="$catoids 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040" -catoids="$catoids 2061 2064 2067 2070 2073 2076" - -# TODO-this doesn't work with HDFS file system -#for oid in $catoids; do -# if [ ! -s `oid2file $oid` ]; then -# cplogger -c 50 $scrname "$tname" "could not find file for OID $oid" -# exit 1 -# fi -#done - -cplogger -i 52 $scrname "$tname" - -exit 0 - diff --git a/oam/post/mcstest-002.sh.in b/oam/post/mcstest-002.sh.in deleted file mode 100755 index 0b7ce57b7..000000000 --- a/oam/post/mcstest-002.sh.in +++ /dev/null @@ -1,41 +0,0 @@ -#!/bin/bash -# -# $Id: test-002.sh 2937 2012-05-30 18:17:09Z rdempsey $ - - -USER=`whoami 2>/dev/null` - -# Source function library. -if [ -f /etc/init.d/functions ]; then - . /etc/init.d/functions -fi - -. @ENGINE_SUPPORTDIR@/functions - -scrname=`basename $0` -tname="check-brm" - -#Don't run on first boot -if firstboot; then - exit 0 -fi - -#Make sure BRM is read-write - -cplogger -i 48 $scrname "$tname" - -#turn this test off for now...it doesn't if the DBRM isn't started, and these tests run too early -# we need a way to run some tests at different stages of system startup... -#dbrmctl status 2>&1 | egrep -qsi '^ok' -/bin/true -rc=$? - -if [ $rc -ne 0 ]; then - cplogger -c 50 $scrname "$tname" "the BRM is read only" - exit 1 -fi - -cplogger -i 52 $scrname "$tname" - -exit 0 - diff --git a/oam/post/mcstest-003.sh.in b/oam/post/mcstest-003.sh.in deleted file mode 100755 index 315cf3bb0..000000000 --- a/oam/post/mcstest-003.sh.in +++ /dev/null @@ -1,42 +0,0 @@ -#!/bin/bash -# -# $Id: test-003.sh 2937 2012-05-30 18:17:09Z rdempsey $ - -USER=`whoami 2>/dev/null` - -# Source function library. -if [ -f /etc/init.d/functions ]; then - . /etc/init.d/functions -fi - -. @ENGINE_SUPPORTDIR@/functions - -scrname=`basename $0` -tname="check-oid-bitmap" - -#Don't run on first boot -if firstboot; then - exit 0 -fi - -#Make sure there is an oid bitmap file if there are any EM entries - -cplogger -i 48 $scrname "$tname" - -obmfile=$(getConfig OIDManager OIDBitmapFile) -emcnt=$(editem -o 2001 | wc -l) - -rc=1 -if [ -f $obmfile -o $emcnt -eq 0 ]; then - rc=0 -fi - -if [ $rc -ne 0 ]; then - cplogger -c 50 $scrname "$tname" "there is no OID bitmap file but there are Extent Map entires" - exit 1 -fi - -cplogger -i 52 $scrname "$tname" - -exit 0 - diff --git a/oam/post/mcstest-004.sh.in b/oam/post/mcstest-004.sh.in deleted file mode 100755 index 2680451df..000000000 --- a/oam/post/mcstest-004.sh.in +++ /dev/null @@ -1,45 +0,0 @@ -#!/bin/bash -# -# $Id: test-004.sh 1538 2009-07-22 18:57:04Z dhill $ - -# -# Validates that FilesPerColumnPartition setting is not set lower than existing extents. -# - -USER=`whoami 2>/dev/null` - -# Source function library. -if [ -f /etc/init.d/functions ]; then - . /etc/init.d/functions -fi - -. @ENGINE_SUPPORTDIR@/functions - -scrname=`basename $0` -tname="validate-partition-size" - -#Don't run on first boot -if firstboot; then - exit 0 -fi - -exit 0 - -cplogger -i 48 $scrname "$tname" - -# Get the FilesPerColumnPartition setting from Columnstore.xml. -filesPer=$(getConfig ExtentMap FilesPerColumnPartition) - -# Get the maximum segment number for all column files. -maxSeg=$(editem -i | awk -F '|' -v max=0 '{if($7>max)max=$7}END{print max+1}') - -# Error and out if the maximum existing segment number is higher than the FilesPerColumnPartition setting. -if [ $maxSeg -gt $filesPer ]; then - cplogger -c 50 $scrname "$tname" "One or more tables were populated with FilesPerColumnPartition higher than the current setting." - exit 1 -fi - -cplogger -i 52 $scrname "$tname" - -exit 0 - diff --git a/oamapps/CMakeLists.txt b/oamapps/CMakeLists.txt index 89ed60afc..e3ce84e9f 100644 --- a/oamapps/CMakeLists.txt +++ b/oamapps/CMakeLists.txt @@ -1,7 +1,2 @@ -add_subdirectory(alarmmanager) add_subdirectory(columnstoreDB) -add_subdirectory(postConfigure) -add_subdirectory(serverMonitor) -add_subdirectory(columnstoreSupport) - diff --git a/oamapps/alarmmanager/CMakeLists.txt b/oamapps/alarmmanager/CMakeLists.txt deleted file mode 100644 index e1696d5e4..000000000 --- a/oamapps/alarmmanager/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ - -include_directories( ${ENGINE_COMMON_INCLUDES} ) - - -########### next target ############### - -set(alarmmanager_LIB_SRCS alarmmanager.cpp alarm.cpp) - -add_library(alarmmanager SHARED ${alarmmanager_LIB_SRCS}) - -add_dependencies(alarmmanager loggingcpp) - -target_compile_options(alarmmanager PRIVATE -Wno-unused-result) - -install(TARGETS alarmmanager DESTINATION ${ENGINE_LIBDIR} COMPONENT columnstore-engine) - - diff --git a/oamapps/alarmmanager/alarm.cpp b/oamapps/alarmmanager/alarm.cpp deleted file mode 100644 index 572a7ab13..000000000 --- a/oamapps/alarmmanager/alarm.cpp +++ /dev/null @@ -1,229 +0,0 @@ -/* Copyright (C) 2016 MariaDB Corporation - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; version 2 of - the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301, USA. */ - -/****************************************************************************************** -* Author: Zhixuan Zhu -******************************************************************************************/ -#include - -#include "alarm.h" -#include "liboamcpp.h" - -using namespace std; -using namespace oam; - -namespace alarmmanager -{ - -Alarm::Alarm() -{ - // alarm receive time - Oam oam; - setTimestamp (oam.getCurrentTime()); - time_t cal; - time (&cal); - setTimestampSeconds (cal); -} - -Alarm::~Alarm() -{ -} - -void Alarm::setAlarmID (const uint16_t id) -{ - alarmID = id; -} - -void Alarm::setDesc (const string& d) -{ - desc = d; -} - -void Alarm::setComponentID (const string& id) -{ - componentID = id; -} - -void Alarm::setSeverity (const uint16_t s) -{ - severity = s; -} - -void Alarm::setState (const bool s) -{ - state = s; -} - -void Alarm::setCtnThreshold (const uint16_t ctn) -{ - ctnThreshold = ctn; -} - -void Alarm::setOccurrence (const uint16_t o) -{ - occurrence = o; -} - -void Alarm::setLastIssueTime (const uint32_t time) -{ - lastIssueTime = time; -} - -void Alarm::setPid (const uint16_t p) -{ - pid = p; -} - -void Alarm::setTid (const uint16_t t) -{ - tid = t; -} - -void Alarm::setTimestamp (const string& t) -{ - timestamp = t; -} - -void Alarm::setTimestampSeconds (const time_t& t) -{ - timestampseconds = t; -} - -void Alarm::setSname (const string& s) -{ - sname = s; -} - -void Alarm::setPname (const string& p) -{ - pname = p; -} - -istream& operator >>(istream& input, Alarm& alarm) -{ - char buf[100] = {0}; - alarm.setAlarmID (INVALID_ALARM_ID); - - while (!input.eof() && strcmp (buf, "") == 0) - { - input.getline (buf, 100); - } - - if (input.eof()) - return input; - - // Alarm ID - alarm.setAlarmID (atoi (buf)); - - // Severity - input.getline (buf, 100); - - if (strstr (buf, "CRITICAL") != 0) - alarm.setSeverity (CRITICAL); - else if (strstr (buf, "MAJOR") != 0) - alarm.setSeverity (MAJOR); - else if (strstr (buf, "MINOR") != 0) - alarm.setSeverity (MINOR); - else if (strstr (buf, "WARNING") != 0) - alarm.setSeverity (WARNING); - else if (strstr (buf, "INFORMATIONAL") != 0) - alarm.setSeverity (INFORMATIONAL); - else - alarm.setSeverity (NO_SEVERITY); - - // state - if (strstr (buf, "CLEARED") != 0) - alarm.setState (0); - else - alarm.setState (1); - - // Desc - input.getline (buf, 100); - alarm.setDesc (buf); - - // Timestamp - input.getline (buf, 100); - alarm.setTimestamp (buf); - - // Timestamp Seconds - input.getline (buf, 100); - Oam oam; - alarm.setTimestampSeconds (atoi(buf)); - - // Reporting server name - input.getline (buf, 100); - alarm.setSname (buf); - - // Reporting process name - input.getline (buf, 100); - alarm.setPname (buf); - - // fault device name - input.getline (buf, 100); - alarm.setComponentID (buf); - - input.ignore (100, '\n'); - return input; -} - -ostream& operator<< (ostream& output, const Alarm& alarm) -{ - output << alarm.getAlarmID() << endl; - - if (alarm.getState() == 0) - output << "CLEARED "; - - switch (alarm.getSeverity()) - { - case CRITICAL: - output << "CRITICAL ALARM" << endl; - break; - - case MAJOR: - output << "MAJOR ALARM" << endl; - break; - - case MINOR: - output << "MINOR ALARM" << endl; - break; - - case WARNING: - output << "WARNING ALARM" << endl; - break; - - case INFORMATIONAL: - output << "INFORMATIONAL ALARM" << endl; - break; - - case NO_SEVERITY: - output << "NO_SEVERITY ALARM" << endl; - break; - } - - output << alarm.getDesc() << endl; - output << alarm.getTimestamp() << endl; - output << alarm.getTimestampSeconds() << endl; - output << alarm.getSname() << endl; - output << alarm.getPname() << endl; - output << alarm.getComponentID() << endl; - output << endl; - - return output; -} - - -} //namespace alarmmanager diff --git a/oamapps/alarmmanager/alarm.h b/oamapps/alarmmanager/alarm.h deleted file mode 100644 index a82398657..000000000 --- a/oamapps/alarmmanager/alarm.h +++ /dev/null @@ -1,172 +0,0 @@ -/* Copyright (C) 2016 MariaDB Corporation - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; version 2 of - the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301, USA. */ - -/****************************************************************************************** -* $Id: alarm.h 781 2013-01-21 14:12:44Z rdempsey $ -* -* Author: Zhixuan Zhu -******************************************************************************************/ -/** - * @file - */ -#ifndef CAL_ALARM_H -#define CAL_ALARM_H - -#include -#include -#include "alarmglobal.h" - -namespace alarmmanager -{ - -/** @brief Alarm class interface - * - */ -class Alarm -{ -public: - /* - * @brief overloaded stream operator - */ - friend std::ostream& operator<< ( std::ostream&, const Alarm& ); - friend std::istream& operator>> ( std::istream&, Alarm& ); - - /* - * @brief default ctor - */ - Alarm(); - - /* - * @brief dtor - */ - virtual ~Alarm(); - - /* - * @brief access methods - */ - inline uint16_t getAlarmID() const - { - return alarmID; - } - void setAlarmID (const uint16_t); - - inline const std::string getDesc() const - { - return desc; - } - void setDesc (const std::string&); - - inline const std::string getComponentID() const - { - return componentID; - } - void setComponentID (const std::string&); - - inline uint16_t getSeverity() const - { - return severity; - } - void setSeverity (const uint16_t); - - inline bool getState () const - { - return state; - } - void setState (const bool); - - inline uint16_t getCtnThreshold() const - { - return ctnThreshold; - } - void setCtnThreshold (const uint16_t); - - inline uint16_t getOccurrence() const - { - return occurrence; - } - void setOccurrence (const uint16_t); - - inline const time_t& getReceiveTime () const - { - return receiveTime; - } - void setReceiveTime (const time_t); - - inline uint32_t getLastIssueTime() const - { - return lastIssueTime; - } - void setLastIssueTime (const uint32_t); - - inline uint16_t getPid () const - { - return pid; - } - void setPid (const uint16_t); - - inline uint16_t getTid () const - { - return tid; - } - void setTid (const uint16_t); - - inline const std::string getTimestamp () const - { - return timestamp; - } - void setTimestamp (const std::string&); - - inline time_t getTimestampSeconds () const - { - return timestampseconds; - } - void setTimestampSeconds (const time_t&); - - inline const std::string getSname () const - { - return sname; - } - void setSname (const std::string&); - - inline const std::string getPname () const - { - return pname; - } - void setPname (const std::string&); - - -private: - uint16_t alarmID; - std::string desc; - std::string componentID; - uint16_t severity; - bool state; // true: set; false: clear - uint16_t ctnThreshold; - uint16_t occurrence; - time_t receiveTime; - uint32_t lastIssueTime; - uint16_t pid; // report process id - uint16_t tid; // report thread id - std::string sname; // report server name - std::string pname; // report process name - std::string timestamp; // receive time in date/time format - time_t timestampseconds; // receive time in seconds format -}; - -} - -#endif diff --git a/oamapps/alarmmanager/alarmglobal.h b/oamapps/alarmmanager/alarmglobal.h deleted file mode 100644 index 554f71e5e..000000000 --- a/oamapps/alarmmanager/alarmglobal.h +++ /dev/null @@ -1,46 +0,0 @@ -/* Copyright (C) 2016 MariaDB Corporation - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; version 2 of - the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301, USA. */ - -#ifndef ALARMGLOBAL_H -#define ALARMGLOBAL_H - -#ifdef __linux__ -#include -#include -#endif -#include -#include - -namespace alarmmanager -{ - - -/** @brief constant define - * - */ -const int SET = 1; -const int CLEAR = 0; - -const std::string ACTIVE_ALARM_FILE = "/var/log/mariadb/columnstore/activeAlarms"; -const std::string ALARM_FILE = "/var/log/mariadb/columnstore/alarm.log"; -const std::string ALARM_ARCHIVE_FILE = "/var/log/mariadb/columnstore/archive"; - -const bool ALARM_DEBUG = false; -const uint16_t INVALID_ALARM_ID = 0; -} - -#endif diff --git a/oamapps/alarmmanager/alarmmanager.cpp b/oamapps/alarmmanager/alarmmanager.cpp deleted file mode 100644 index f07130c7b..000000000 --- a/oamapps/alarmmanager/alarmmanager.cpp +++ /dev/null @@ -1,726 +0,0 @@ -/* Copyright (C) 2016 MariaDB Corporation - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; version 2 of - the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301, USA. */ - -/****************************************************************************************** -* Author: Zhixuan Zhu -******************************************************************************************/ -#define ALARMMANAGER_DLLEXPORT -#include "alarmmanager.h" -#undef ALARMMANAGER_DLLEXPORT - -#include -#include -#include -#include -#include - -#include "alarmglobal.h" -#include "liboamcpp.h" -#include "installdir.h" -#include "messagequeue.h" - -using namespace std; -using namespace oam; -using namespace messageqcpp; -using namespace logging; - -const unsigned int CTN_INTERVAL = 30 * 60; - - -namespace alarmmanager -{ - -#ifdef __linux__ -inline pid_t gettid(void) -{ - return syscall(__NR_gettid); -} -#else -inline pid_t gettid(void) -{ - return getpid(); -} -#endif - -/***************************************************************************************** -* @brief Constructor -* -* purpose: -* -*****************************************************************************************/ - -ALARMManager::ALARMManager() -{ - Oam oam; - - // Get Parent OAM Module Name - try - { - oam.getSystemConfig("ParentOAMModuleName", ALARMManager::parentOAMModuleName); - } - catch (...) - { - //Log event - LoggingID lid(11); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("Failed to read Parent OAM Module Name"); - msg.format(args); - ml.logErrorMessage(msg); - throw runtime_error ("Failed to read Parent OAM Module Name"); - } - -} - -/***************************************************************************************** -* @brief Destructor -* -* purpose: -* -*****************************************************************************************/ - -ALARMManager::~ALARMManager() -{ -} - -/***************************************************************************************** -* @brief rewriteActiveLog -* -* purpose: Update Active Alarm file, called to remove Cleared alarm -* -*****************************************************************************************/ -void rewriteActiveLog (const AlarmList& alarmList) -{ - if (ALARM_DEBUG) - { - LoggingID lid(11); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("rewriteAlarmLog Called"); - msg.format(args); - ml.logDebugMessage(msg); - } - - // delete the old file - unlink (ACTIVE_ALARM_FILE.c_str()); - - // create new file - int fd = open(ACTIVE_ALARM_FILE.c_str(), O_RDWR | O_CREAT, 0664); - - // Aquire an exclusive lock - if (flock(fd, LOCK_EX) == -1) - { - throw runtime_error ("Lock active alarm log file error"); - } - - ofstream activeAlarmFile (ACTIVE_ALARM_FILE.c_str()); - - AlarmList::const_iterator i; - - for (i = alarmList.begin(); i != alarmList.end(); ++i) - { - activeAlarmFile << i->second; - } - - activeAlarmFile.close(); - - // Release lock - if (flock(fd, LOCK_UN) == -1) - { - throw runtime_error ("Release lock active alarm log file error"); - } - - close(fd); -} - -/***************************************************************************************** -* @brief logAlarm -* -* purpose: Log Alarm in Active Alarm or Historical Alarm file -* -*****************************************************************************************/ -void logAlarm (const Alarm& calAlarm, const string& fileName) -{ - if (ALARM_DEBUG) - { - LoggingID lid(11); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("logAlarm Called"); - msg.format(args); - ml.logDebugMessage(msg); - } - - int fd = open(fileName.c_str(), O_RDWR | O_CREAT, 0664); - ofstream AlarmFile (fileName.c_str(), ios::app); - - // Aquire an exclusive lock - if (flock(fd, LOCK_EX) == -1) - { - throw runtime_error ("Lock file error: " + fileName); - } - - AlarmFile << calAlarm; - AlarmFile.close(); - - // Release lock - if (flock(fd, LOCK_UN) == -1) - { - throw runtime_error ("Release lock file error: " + fileName); - } - - close(fd); -} - -/***************************************************************************************** -* @brief processAlarm -* -* purpose: Process Alarm by updating Active Alarm and Historical Alarm files -* -*****************************************************************************************/ -void processAlarm(const Alarm& calAlarm) -{ - bool logActiveFlag = (calAlarm.getState() == CLEAR ? false : true); - bool logHistFlag = true; - - if (calAlarm.getState() == CLEAR ) - logHistFlag = false; - - // get active alarms - AlarmList alarmList; - ALARMManager sm; - sm.getActiveAlarm (alarmList); - - if (ALARM_DEBUG) - { - LoggingID lid(11); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("processAlarm Called"); - msg.format(args); - ml.logDebugMessage(msg); - } - - AlarmList::iterator i; - - for (i = alarmList.begin(); i != alarmList.end(); ++i) - { - // check if matching ID - if (calAlarm.getAlarmID() != (i->second).getAlarmID() ) - { - continue; - } - - // check if the same fault component on same server - if (calAlarm.getComponentID().compare((i->second).getComponentID()) == 0 && - calAlarm.getSname().compare((i->second).getSname()) == 0) - { - // for set alarm, don't log - if (calAlarm.getState() == SET ) - { - logActiveFlag = false; - logHistFlag = false; - break; - } - - // for clear alarm, remove the set by rewritting the file - else if (calAlarm.getState() == CLEAR ) - { - logActiveFlag = false; - logHistFlag = true; - //cout << "size before: " << alarmList.size(); - alarmList.erase (i); - - //cout << " size after: " << alarmList.size() << endl; - try - { - rewriteActiveLog (alarmList); - } - catch (runtime_error& e) - { - LoggingID lid(11); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("rewriteActiveLog error:"); - args.add(e.what()); - msg.format(args); - ml.logErrorMessage(msg); - } - - break; - } - } - } // end of for loop - - if (logActiveFlag) - { - try - { - logAlarm (calAlarm, ACTIVE_ALARM_FILE); - } - catch (runtime_error& e) - { - LoggingID lid(11); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("logAlarm error:"); - args.add(e.what()); - msg.format(args); - ml.logErrorMessage(msg); - } - } - - if (logHistFlag) - { - // log historical alarm - try - { - logAlarm (calAlarm, ALARM_FILE); - } - catch (runtime_error& e) - { - LoggingID lid(11); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("logAlarm error:"); - args.add(e.what()); - msg.format(args); - ml.logErrorMessage(msg); - } - } -} - -/***************************************************************************************** -* @brief configAlarm -* -* purpose: Get Config Data for Incoming alarm -* -*****************************************************************************************/ -void configAlarm (Alarm& calAlarm) -{ - int alarmID = calAlarm.getAlarmID(); - Oam oam; - AlarmConfig alarmConfig; - - if (ALARM_DEBUG) - { - LoggingID lid(11); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("configAlarm Called"); - msg.format(args); - ml.logDebugMessage(msg); - } - - try - { - oam.getAlarmConfig (alarmID, alarmConfig); - - calAlarm.setDesc (alarmConfig.BriefDesc); - calAlarm.setSeverity (alarmConfig.Severity); - calAlarm.setCtnThreshold (alarmConfig.Threshold); - calAlarm.setOccurrence (alarmConfig.Occurrences); - calAlarm.setLastIssueTime (alarmConfig.LastIssueTime); - - // check lastIssueTime to see if it's time to clear the counter - time_t now; - time (&now); - - if ((now - calAlarm.getLastIssueTime()) >= CTN_INTERVAL) - { - // reset counter and set lastIssueTime - oam.setAlarmConfig (alarmID, "LastIssueTime", now); - oam.setAlarmConfig (alarmID, "Occurrences", 1); - } - - else - { - // increment counter and check the ctnThreshold - calAlarm.setOccurrence (alarmConfig.Occurrences + 1); - oam.setAlarmConfig (alarmID, "Occurrences", calAlarm.getOccurrence()); - - // if counter over threshold and set alarm, stop processing. - if (calAlarm.getCtnThreshold() > 0 - && calAlarm.getOccurrence() >= calAlarm.getCtnThreshold() - && calAlarm.getState() == SET) - { - if (ALARM_DEBUG) - { - LoggingID lid(11); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("counter over threshold and set alarm, stop processing."); - args.add("threshold:"); - args.add(calAlarm.getCtnThreshold()); - args.add("occurances:"); - args.add(calAlarm.getOccurrence()); - msg.format(args); - ml.logDebugMessage(msg); - } - - return; - } - } - } - catch (runtime_error& e) - { - if (ALARM_DEBUG) - { - LoggingID lid(11); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("runtime error:"); - args.add(e.what()); - msg.format(args); - ml.logDebugMessage(msg); - } - - throw; - } - - // process alarm - processAlarm (calAlarm); -} - -/***************************************************************************************** -* @brief sendAlarmReport API -* -* purpose: Send Alarm Report -* -*****************************************************************************************/ -void ALARMManager::sendAlarmReport (const char* componentID, int alarmID, int state, - std::string repModuleName, std::string repProcessName) -{ - -#ifdef SKIP_ALARM - return; -#else - LoggingID lid(11); - MessageLog ml(lid); - Message msg; - Message::Args args; - - //Log receiving of Alarm report - if (ALARM_DEBUG) - { - args.add("sendAlarmReport: alarm #"); - args.add(alarmID); - args.add(", state: "); - args.add(state); - args.add(", component: "); - args.add(componentID); - msg.format(args); - ml.logDebugMessage(msg); - } - - Oam oam; - - // get current Module name - string ModuleName; - - if ( repModuleName.empty()) - { - oamModuleInfo_t st; - - try - { - st = oam.getModuleInfo(); - ModuleName = boost::get<0>(st); - } - catch (...) - { - ModuleName = "Unknown Reporting Module"; - } - } - else - ModuleName = repModuleName; - - // get pid, tid info - int pid = getpid(); - int tid = gettid(); - - // get reporting Process Name - string processName; - - if ( repProcessName.empty()) - { - // get current process name - myProcessStatus_t t; - - try - { - t = oam.getMyProcessStatus(); - processName = boost::get<1>(t); - } - catch (...) - { - processName = "Unknown-Reporting-Process"; - } - } - else - processName = repProcessName; - -ByteStream msg1; - - // setup message - msg1 << (ByteStream::byte) alarmID; - msg1 << (std::string) componentID; - msg1 << (ByteStream::byte) state; - msg1 << (std::string) ModuleName; - msg1 << (std::string) processName; - msg1 << (ByteStream::byte) pid; - msg1 << (ByteStream::byte) tid; - - try - { - //send the msg to Process Manager - MessageQueueClient procmgr("ProcMgr_Alarm"); - procmgr.write(msg1); - - // shutdown connection - procmgr.shutdown(); - } - catch (...) - {} - - return; -#endif //SKIP_ALARM -} - -/***************************************************************************************** -* @brief processAlarmReport API -* -* purpose: Process Alarm Report -* -*****************************************************************************************/ -void ALARMManager::processAlarmReport (Alarm& calAlarm) -{ - // Get alarm configuration - try - { - configAlarm (calAlarm); - } - catch (runtime_error& e) - { - LoggingID lid(11); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("configAlarm error:"); - args.add(e.what()); - msg.format(args); - ml.logErrorMessage(msg); - } - - return; - -} - -/***************************************************************************************** -* @brief getActiveAlarm API -* -* purpose: Get List of Active Alarm from activealarm file -* -*****************************************************************************************/ -void ALARMManager::getActiveAlarm(AlarmList& alarmList) const -{ - //add-on to fileName with mount name if on non Parent Module - Oam oam; - string fileName = ACTIVE_ALARM_FILE; - - int fd = open(fileName.c_str(), O_RDONLY); - - if (fd == -1) - { - // file may being deleted temporarily by trapHandler - sleep (1); - fd = open(fileName.c_str(), O_RDONLY); - - if (fd == -1) - { - // no active alarms, return - return; - } - } - - ifstream activeAlarm (fileName.c_str(), ios::in); - - // acquire read lock - if (flock(fd, LOCK_SH) == -1) - { - throw runtime_error ("Lock active alarm log file error"); - } - - Alarm alarm; - - while (!activeAlarm.eof()) - { - activeAlarm >> alarm; - - if (alarm.getAlarmID() != INVALID_ALARM_ID) - //don't sort -// alarmList.insert (AlarmList::value_type(alarm.getAlarmID(), alarm)); - alarmList.insert (AlarmList::value_type(INVALID_ALARM_ID, alarm)); - } - - activeAlarm.close(); - - // release lock - if (flock(fd, LOCK_UN) == -1) - { - throw runtime_error ("Release lock active alarm log file error"); - } - - close(fd); - - if (ALARM_DEBUG) - { - AlarmList :: iterator i; - - for (i = alarmList.begin(); i != alarmList.end(); ++i) - { - cout << i->second << endl; - } - } - - return; -} - -/***************************************************************************************** -* @brief getAlarm API -* -* purpose: Get List of Historical Alarms from alarm file -* -* date = MM/DD/YY format -* -*****************************************************************************************/ -void ALARMManager::getAlarm(std::string date, AlarmList& alarmList) const -{ - - string alarmFile = startup::StartUp::tmpDir() + "/alarms"; - - //make 1 alarm log file made up of archive and current alarm.log - string cmd = "touch " + alarmFile; - (void)system(cmd.c_str()); - - cmd = "ls " + ALARM_ARCHIVE_FILE + " | grep 'alarm.log' > " + alarmFile; - (void)system(cmd.c_str()); - - string fileName = startup::StartUp::tmpDir() + "/alarmlogfiles"; - - ifstream oldFile (fileName.c_str()); - - if (oldFile) - { - char line[200]; - string buf; - - while (oldFile.getline(line, 200)) - { - buf = line; - string cmd = "cat " + ALARM_ARCHIVE_FILE + "/" + buf + " >> " + alarmFile; - (void)system(cmd.c_str()); - } - - oldFile.close(); - unlink (fileName.c_str()); - } - - cmd = "cat " + ALARM_FILE + " >> " + alarmFile; - (void)system(cmd.c_str()); - - int fd = open(alarmFile.c_str(), O_RDONLY); - - if (fd == -1) - // doesn't exist yet, return - return; - - ifstream hisAlarm (alarmFile.c_str(), ios::in); - - // acquire read lock - if (flock(fd, LOCK_SH) == -1) - { - throw runtime_error ("Lock alarm log file error"); - } - - //get mm / dd / yy from incoming date - string mm = date.substr(0, 2); - string dd = date.substr(3, 2); - string yy = date.substr(6, 2); - - Alarm alarm; - - while (!hisAlarm.eof()) - { - hisAlarm >> alarm; - - if (alarm.getAlarmID() != INVALID_ALARM_ID) - { - time_t cal = alarm.getTimestampSeconds(); - struct tm tm; - localtime_r(&cal, &tm); - char tmp[3]; - strftime (tmp, 3, "%m", &tm); - string alarm_mm = tmp; - alarm_mm = alarm_mm.substr(0, 2); - strftime (tmp, 3, "%d", &tm); - string alarm_dd = tmp; - alarm_dd = alarm_dd.substr(0, 2); - strftime (tmp, 3, "%y", &tm); - string alarm_yy = tmp; - alarm_yy = alarm_yy.substr(0, 2); - - if ( mm == alarm_mm && dd == alarm_dd && yy == alarm_yy ) - //don't sort - // alarmList.insert (AlarmList::value_type(alarm.getAlarmID(), alarm)); - alarmList.insert (AlarmList::value_type(INVALID_ALARM_ID, alarm)); - } - } - - hisAlarm.close(); - unlink (alarmFile.c_str()); - - // release lock - if (flock(fd, LOCK_UN) == -1) - { - throw runtime_error ("Release lock active alarm log file error"); - } - - if (ALARM_DEBUG) - { - AlarmList :: iterator i; - - for (i = alarmList.begin(); i != alarmList.end(); ++i) - { - cout << i->second << endl; - } - } -} - -} //namespace alarmmanager -// vim:ts=4 sw=4: - diff --git a/oamapps/alarmmanager/alarmmanager.h b/oamapps/alarmmanager/alarmmanager.h deleted file mode 100644 index 0ad5a4832..000000000 --- a/oamapps/alarmmanager/alarmmanager.h +++ /dev/null @@ -1,125 +0,0 @@ -/* Copyright (C) 2016 MariaDB Corporation - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; version 2 of - the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301, USA. */ - -/****************************************************************************************** -* Author: Zhixuan Zhu -******************************************************************************************/ -/** - * @file - */ -#ifndef ALARM_MANAGER_H -#define ALARM_MANAGER_H - -#include -#include -#include "alarm.h" - -#if defined(_MSC_VER) && defined(xxxALARMMANAGER_DLLEXPORT) -#define EXPORT __declspec(dllexport) -#else -#define EXPORT -#endif - -namespace alarmmanager -{ - -/** @brief type define - * - */ -typedef std::multimap AlarmList; - - -/** @brief constant define - * - */ - -/** @brief ALARMManager class interface - * - */ -class ALARMManager -{ -public: - /** @brief default ctor - * - */ - EXPORT ALARMManager(); - - /** @brief dtor - * - */ - EXPORT virtual ~ALARMManager(); - - /** @brief send an alarm - * - * @param componentID the component, hardware or software ID - * @param alarmID the alarm ID - * @param state set or clear - * @param repModuleName reported Module Name (used by non-realtime process) - * @param repProcessName reporting Process Name (used by non-realtime process) - */ - EXPORT void sendAlarmReport (const char* componentID, - const int alarmID, - const int state, - std::string repModuleName = "", - std::string repProcessName = ""); - - - EXPORT void processAlarmReport (Alarm& calAlarm); - - /** @brief return active alarm list - * - * @param AlarmList the alarm map reference to store alarms - */ - EXPORT void getActiveAlarm (AlarmList& ) const; - - /** @brief return alarm list - * - * @param date date of alarms, "today" or date in YYYYMMDD - * @param AlarmList the alarm map reference to store alarms - */ - EXPORT void getAlarm (std::string date, AlarmList& ) const; - - /** @brief get NMS address for trapd - * - * @param addr the reference to store addr - */ - -private: - /** @brief copy ctor. keep private for now - * - */ - ALARMManager(const ALARMManager& rhs); - - /** @brief assign op. keep private for now - * - */ - ALARMManager& operator=(const ALARMManager& rhs); - - /** - * this is to avoid running create_trap_session more than once. - */ - static bool initFlag; - - std::string parentOAMModuleName; - -}; - -} - -#undef EXPORT - -#endif diff --git a/oamapps/alarmmanager/tdriver.cpp b/oamapps/alarmmanager/tdriver.cpp deleted file mode 100644 index 9020bcd36..000000000 --- a/oamapps/alarmmanager/tdriver.cpp +++ /dev/null @@ -1,124 +0,0 @@ -/* Copyright (C) 2016 MariaDB Corporation. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; version 2 of - the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301, USA. */ - -#include -#include - -#include "alarmmanager.h" -#include "liboamcpp.h" - -using namespace alarmmanager; -using namespace oam; -using namespace messageqcpp; -using namespace std; - -class ALARMManagerTest : public CppUnit::TestFixture -{ - - CPPUNIT_TEST_SUITE( ALARMManagerTest ); - - CPPUNIT_TEST( test1 ); - CPPUNIT_TEST( test2 ); -//CPPUNIT_TEST( test3 ); // requires ProcMgr to be running - - CPPUNIT_TEST_SUITE_END(); - -private: - -public: - void setUp() - { - } - - void tearDown() - { - } - - void test1() - { - // set alarm - ALARMManager sm; - sm.sendAlarmReport("EC-DISK-1", 4, SET); - AlarmList activeAlarms; - sm.getActiveAlarm(activeAlarms); - - //clear alarm - sm.sendAlarmReport("EC-DISK-1", 4, CLEAR); - sm.getActiveAlarm(activeAlarms); - } - - void test2() - { - Alarm alarm; - alarm.setAlarmID (2); - alarm.setComponentID ("atlanta"); - alarm.setSeverity (MAJOR); - alarm.setState (1); - cout << alarm << endl; - string a; - uint32_t b; - b = alarm.getCtnThreshold(); - alarm.setCtnThreshold(b); - b = alarm.getOccurrence(); - alarm.setOccurrence(b); - a = alarm.getTimestamp(); - b = alarm.getLastIssueTime(); - alarm.setLastIssueTime(b); - a = alarm.getSname(); - alarm.setSname(a); - a = alarm.getPname(); - alarm.setPname(a); - b = alarm.getTid(); - alarm.setTid(b); - b = alarm.getPid(); - alarm.setPid(b); - } - - void test3() - { - ALARMManager sm; - string value; - sm.setSNMPConfig ("atlanta", SUB_AGENT, "DISK_CRITICAL", "2000000"); - sm.getSNMPConfig ("atlanta", SUB_AGENT, "DISK_CRITICAL", value); - cout << "DISK: " << value << endl; - sm.setSNMPConfig ("atlanta", SUB_AGENT, "MEM_MAJOR", "333333"); - sm.getSNMPConfig ("atlanta", SUB_AGENT, "MEM_MAJOR", value); - cout << "MEM " << value << endl; - sm.setSNMPConfig ("atlanta", SUB_AGENT, "SWAP_MINOR", "543216"); - sm.getSNMPConfig ("atlanta", SUB_AGENT, "SWAP_MINOR", value); - cout << "SWAP " << value << endl; - sm.setNMSAddr ("10.100.3.141"); - sm.getNMSAddr (value); - cout << "NMS address: " << value << endl; - } - -}; - -CPPUNIT_TEST_SUITE_REGISTRATION( ALARMManagerTest ); - -#include -#include - -int main( int argc, char** argv) -{ - CppUnit::TextUi::TestRunner runner; - CppUnit::TestFactoryRegistry& registry = CppUnit::TestFactoryRegistry::getRegistry(); - runner.addTest( registry.makeTest() ); - bool wasSuccessful = runner.run( "", false ); - return (wasSuccessful ? 0 : 1); -} - diff --git a/oamapps/columnstoreDB/columnstoreDB.cpp b/oamapps/columnstoreDB/columnstoreDB.cpp index d3b17f3ff..03a843385 100644 --- a/oamapps/columnstoreDB/columnstoreDB.cpp +++ b/oamapps/columnstoreDB/columnstoreDB.cpp @@ -45,6 +45,8 @@ using namespace std; using namespace oam; using namespace config; +#include "calpontsystemcatalog.h" +using namespace execplan; namespace { @@ -67,6 +69,213 @@ void usage(char* prog) } } +/****************************************************************************************** + * @brief DisplayLockedTables + * + * purpose: Show the details of all the locks in tableLocks + * Used when attempting to suspend or stop the + * database, but there are table locks. + * + ******************************************************************************************/ +void DisplayLockedTables(std::vector& tableLocks, BRM::DBRM* pDBRM) +{ + cout << "The following tables are locked:" << endl; + + // Initial widths of columns to display. We pass thru the table + // and see if we need to grow any of these. + unsigned int lockIDColumnWidth = 6; // "LockID" + unsigned int tableNameColumnWidth = 12; // "Name" + unsigned int ownerColumnWidth = 7; // "Process" + unsigned int pidColumnWidth = 3; // "PID" + unsigned int sessionIDColumnWidth = 7; // "Session" + unsigned int createTimeColumnWidth = 12; // "CreationTime" + unsigned int dbrootColumnWidth = 7; // "DBRoots" + unsigned int stateColumnWidth = 9; // "State" + + // Initialize System Catalog object used to get table name + boost::shared_ptr systemCatalogPtr = + execplan::CalpontSystemCatalog::makeCalpontSystemCatalog(0); + + std::string fullTblName; + const char* tableState; + + // Make preliminary pass through the table locks in order to determine our + // output column widths based on the data. Min column widths are based on + // the width of the column heading (except for the 'state' column). + uint64_t maxLockID = 0; + uint32_t maxPID = 0; + int32_t maxSessionID = 0; + int32_t minSessionID = 0; + std::vector createTimes; + std::vector pms; + char cTimeBuffer[64]; + + execplan::CalpontSystemCatalog::TableName tblName; + + for (unsigned idx = 0; idx < tableLocks.size(); idx++) + { + if (tableLocks[idx].id > maxLockID) + { + maxLockID = tableLocks[idx].id; + } + + try + { + tblName = systemCatalogPtr->tableName(tableLocks[idx].tableOID); + } + catch (...) + { + tblName.schema.clear(); + tblName.table.clear(); + } + + fullTblName = tblName.toString(); + + if (fullTblName.size() > tableNameColumnWidth) + { + tableNameColumnWidth = fullTblName.size(); + } + + if (tableLocks[idx].ownerName.length() > ownerColumnWidth) + { + ownerColumnWidth = tableLocks[idx].ownerName.length(); + } + + if (tableLocks[idx].ownerPID > maxPID) + { + maxPID = tableLocks[idx].ownerPID; + } + + if (tableLocks[idx].ownerSessionID > maxSessionID) + { + maxSessionID = tableLocks[idx].ownerSessionID; + } + + if (tableLocks[idx].ownerSessionID < minSessionID) + { + minSessionID = tableLocks[idx].ownerSessionID; + } + + // Creation Time. + // While we're at it, we save the time string off into a vector + // so we can display it later without recalcing it. + struct tm timeTM; + localtime_r(&tableLocks[idx].creationTime, &timeTM); + ctime_r(&tableLocks[idx].creationTime, cTimeBuffer); + strftime(cTimeBuffer, 64, "%F %r:", &timeTM); + cTimeBuffer[strlen(cTimeBuffer) - 1] = '\0'; // strip trailing '\n' + std::string cTimeStr( cTimeBuffer ); + + if (cTimeStr.length() > createTimeColumnWidth) + { + createTimeColumnWidth = cTimeStr.length(); + } + + createTimes.push_back(cTimeStr); + } + + tableNameColumnWidth += 1; + ownerColumnWidth += 1; + createTimeColumnWidth += 1; + + std::ostringstream idString; + idString << maxLockID; + + if (idString.str().length() > lockIDColumnWidth) + lockIDColumnWidth = idString.str().length(); + + lockIDColumnWidth += 1; + + std::ostringstream pidString; + pidString << maxPID; + + if (pidString.str().length() > pidColumnWidth) + pidColumnWidth = pidString.str().length(); + + pidColumnWidth += 1; + + const std::string sessionNoneStr("BulkLoad"); + std::ostringstream sessionString; + sessionString << maxSessionID; + + if (sessionString.str().length() > sessionIDColumnWidth) + sessionIDColumnWidth = sessionString.str().length(); + + if ((minSessionID < 0) && + (sessionNoneStr.length() > sessionIDColumnWidth)) + sessionIDColumnWidth = sessionNoneStr.length(); + + sessionIDColumnWidth += 1; + + // write the column headers before the first entry + cout.setf(ios::left, ios::adjustfield); + cout << setw(lockIDColumnWidth) << "LockID" << + setw(tableNameColumnWidth) << "Name" << + setw(ownerColumnWidth) << "Process" << + setw(pidColumnWidth) << "PID" << + setw(sessionIDColumnWidth) << "Session" << + setw(createTimeColumnWidth) << "CreationTime" << + setw(stateColumnWidth) << "State" << + setw(dbrootColumnWidth) << "DBRoots" << endl; + + for (unsigned idx = 0; idx < tableLocks.size(); idx++) + { + try + { + + tblName = systemCatalogPtr->tableName(tableLocks[idx].tableOID); + } + catch (...) + { + tblName.schema.clear(); + tblName.table.clear(); + } + + fullTblName = tblName.toString(); + cout << + setw(lockIDColumnWidth) << tableLocks[idx].id << + setw(tableNameColumnWidth) << fullTblName << + setw(ownerColumnWidth) << tableLocks[idx].ownerName << + setw(pidColumnWidth) << tableLocks[idx].ownerPID; + + // Log session ID, or "BulkLoad" if session is -1 + if (tableLocks[idx].ownerSessionID < 0) + cout << setw(sessionIDColumnWidth) << sessionNoneStr; + else + cout << setw(sessionIDColumnWidth) << + tableLocks[idx].ownerSessionID; + + // Creation Time + cout << setw(createTimeColumnWidth) << createTimes[idx]; + + // Processor State + if (pDBRM && !pDBRM->checkOwner(tableLocks[idx].id)) + { + tableState = "Abandoned"; + } + else + { + tableState = ((tableLocks[idx].state == BRM::LOADING) ? + "LOADING" : "CLEANUP"); + } + + cout << setw(stateColumnWidth) << tableState; + + // PM List + cout << setw(dbrootColumnWidth); + + for (unsigned k = 0; k < tableLocks[idx].dbrootList.size(); k++) + { + if (k > 0) + cout << ','; + + cout << tableLocks[idx].dbrootList[k]; + } + + cout << endl; + } // end of loop through table locks +} + int main(int argc, char** argv) { string command; @@ -104,7 +313,7 @@ int main(int argc, char** argv) if (!tableLocks.empty()) { - oam.DisplayLockedTables(tableLocks, &dbrm); + DisplayLockedTables(tableLocks, &dbrm); } else { diff --git a/oamapps/columnstoreSupport/columnstoreSupport.cpp b/oamapps/columnstoreSupport/columnstoreSupport.cpp index 8c07e0afd..4ef646b21 100644 --- a/oamapps/columnstoreSupport/columnstoreSupport.cpp +++ b/oamapps/columnstoreSupport/columnstoreSupport.cpp @@ -766,7 +766,7 @@ int main(int argc, char* argv[]) //check for mysql password set string pwprompt = " "; - if (oam.checkLogStatus(logFile, "ERROR 1045") ) + if (checkLogStatus(logFile, "ERROR 1045") ) { cout << "NOTE: MariaDB Columnstore root user password is set" << endl; @@ -823,7 +823,7 @@ int main(int argc, char* argv[]) string cmd = columnstoreMysql + pwprompt + " -e 'status' > " + logFile + " 2>&1"; system(cmd.c_str()); - if (oam.checkLogStatus(logFile, "ERROR 1045") ) + if (checkLogStatus(logFile, "ERROR 1045") ) { cout << "FAILED: Failed login using MariaDB Columnstore root user password '" << mysqlpw << "'" << endl; FAILED = true; diff --git a/oamapps/columnstoreSupport/mcsSupportUtil.cpp b/oamapps/columnstoreSupport/mcsSupportUtil.cpp index 57c29ca1d..376c0881b 100644 --- a/oamapps/columnstoreSupport/mcsSupportUtil.cpp +++ b/oamapps/columnstoreSupport/mcsSupportUtil.cpp @@ -20,7 +20,6 @@ using namespace std; using namespace oam; using namespace config; -using namespace alarmmanager; void getSystemNetworkConfig(FILE * pOutputFile) { @@ -116,7 +115,7 @@ void getSystemNetworkConfig(FILE * pOutputFile) { /* MCOL-1607. IPAddr may be a host name here b/c it is read straight from the config file. */ - string tmphost = oam.getIPAddress(pt1->IPAddr); + string tmphost = getIPAddress(pt1->IPAddr); string ipAddr; if (tmphost.empty()) ipAddr = pt1->IPAddr; @@ -304,7 +303,7 @@ void getStorageConfig(FILE * pOutputFile) } //get any unassigned DBRoots - DBRootConfigList undbrootlist; + /*DBRootConfigList undbrootlist; try { @@ -327,7 +326,7 @@ void getStorageConfig(FILE * pOutputFile) } fprintf(pOutputFile,"\n"); - } + }*/ fprintf(pOutputFile,"\n"); @@ -399,7 +398,7 @@ void getStorageConfig(FILE * pOutputFile) } // print un-assigned dbroots - DBRootConfigList::iterator pt1 = undbrootlist.begin(); + /*DBRootConfigList::iterator pt1 = undbrootlist.begin(); for ( ; pt1 != undbrootlist.end() ; pt1++) { @@ -422,7 +421,7 @@ void getStorageConfig(FILE * pOutputFile) } fprintf(pOutputFile,"Amazon EC2 Volume Name/Device Name/Amazon Device Name for DBRoot%u: %s, %s, %s",*pt1,volumeName.c_str(),deviceName.c_str(),amazondeviceName.c_str()); - } + }*/ } string DataRedundancyConfig; @@ -471,7 +470,7 @@ void getStorageConfig(FILE * pOutputFile) try { string errmsg; - oam.glusterctl(oam::GLUSTER_WHOHAS, oam.itoa(*pt), pmList, errmsg); + //oam.glusterctl(oam::GLUSTER_WHOHAS, oam.itoa(*pt), pmList, errmsg); } catch (...) {} @@ -505,14 +504,13 @@ void getStorageConfig(FILE * pOutputFile) void getStorageStatus(FILE * pOutputFile) { - SystemStatus systemstatus; Oam oam; fprintf(pOutputFile,"System External DBRoot Storage Statuses\n\n"); fprintf(pOutputFile,"Component Status Last Status Change\n"); fprintf(pOutputFile,"------------ -------------------------- ------------------------\n"); - try + /*try { oam.getSystemStatus(systemstatus, false); @@ -541,7 +539,7 @@ void getStorageStatus(FILE * pOutputFile) catch (exception& e) { cout << endl << "**** getSystemStatus Failed = " << e.what() << endl; - } + }*/ string DataRedundancyConfig; int DataRedundancyCopies; @@ -555,21 +553,71 @@ void getStorageStatus(FILE * pOutputFile) } catch (...) {} - if ( DataRedundancyConfig == "y" ) - { - string arg1 = ""; - string arg2 = ""; - string errmsg = ""; - int ret = oam.glusterctl(oam::GLUSTER_STATUS, arg1, arg2, errmsg); - - if ( ret == 0 ) - { - cout << arg2 << endl; - } - else - { - cerr << "FAILURE: Status check error: " + errmsg << endl; - } - } } + +/******************************************************************** + * + * checkLogStatus - Check for a phrase in a log file and return status + * + ********************************************************************/ +bool checkLogStatus(std::string fileName, std::string phrase ) +{ + ifstream file (fileName.c_str()); + + if (!file.is_open()) + { + return false; + } + + string buf; + + while (getline(file, buf)) + { + string::size_type pos = buf.find(phrase, 0); + + if (pos != string::npos) + //found phrase + return true; + } + + if (file.bad()) + { + return false; + } + + file.close(); + return false; +} + +/****************************************************************************************** + * @brief Get Network IP Address for Host Name + * + * purpose: Get Network IP Address for Host Name + * + ******************************************************************************************/ +string getIPAddress(string hostName) +{ + static uint32_t my_bind_addr; + struct hostent* ent; + string IPAddr = ""; + Oam oam; + + ent = gethostbyname(hostName.c_str()); + + if (ent != 0) + { + my_bind_addr = (uint32_t) ((in_addr*)ent->h_addr_list[0])->s_addr; + + uint8_t split[4]; + uint32_t ip = my_bind_addr; + split[0] = (ip & 0xff000000) >> 24; + split[1] = (ip & 0x00ff0000) >> 16; + split[2] = (ip & 0x0000ff00) >> 8; + split[3] = (ip & 0x000000ff); + + IPAddr = oam.itoa(split[3]) + "." + oam.itoa(split[2]) + "." + oam.itoa(split[1]) + "." + oam.itoa(split[0]); + } + + return IPAddr; +} diff --git a/oamapps/columnstoreSupport/mcsSupportUtil.h b/oamapps/columnstoreSupport/mcsSupportUtil.h index acfb3f696..9a8374068 100644 --- a/oamapps/columnstoreSupport/mcsSupportUtil.h +++ b/oamapps/columnstoreSupport/mcsSupportUtil.h @@ -26,12 +26,12 @@ #include #include "configcpp.h" #include "liboamcpp.h" -#include "alarmmanager.h" void getSystemNetworkConfig(FILE * pOutputFile); void getModuleTypeConfig(FILE * pOutputFile); void getStorageConfig(FILE * pOutputFile); void getStorageStatus(FILE * pOutputFile); - +bool checkLogStatus(std::string filename, std::string phase); +std::string getIPAddress(std::string hostName); #endif diff --git a/oamapps/hardwareMonitor/hardwareMonitor.cpp b/oamapps/hardwareMonitor/hardwareMonitor.cpp deleted file mode 100644 index aafeb57fd..000000000 --- a/oamapps/hardwareMonitor/hardwareMonitor.cpp +++ /dev/null @@ -1,412 +0,0 @@ -/* Copyright (C) 2014 InfiniDB, Inc. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; version 2 of - the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301, USA. */ - -/*************************************************************************** - * $Id: hardwareMonitor.cpp 34 2006-09-29 21:13:54Z dhill $ - * - * Author: David Hill - ***************************************************************************/ - -#include "hardwareMonitor.h" - -using namespace std; -using namespace oam; -using namespace alarmmanager; -using namespace logging; - - -/************************************************************************************************************ -* @brief main function -* -* purpose: Get current hardware status and report alarms -* -* Parses file generated by the ipmitool -* -* pattern = what it is | value | units | status | value 1 | value 2 | value 3 | value 4 | value 5 | value 6 -* data(0) = what it is -* data(1) = value -* data(2) = units -* data(3) = status -* data(4)-data(9) = barrier values -* data(4) - low non-recoverable, i.e. fatal -* data(5) - low critical -* data(6) - low warning -* data(7) - high warning -* data(8) - high critical -* data(9) - high non-recoverable, i.e. fatal -* -************************************************************************************************************/ -int main (int argc, char** argv) -{ - string data[10]; - string SensorName; - float SensorValue; - string Units; - string SensorStatus; - float lowFatal; - float lowCritical; - float lowWarning; - float highWarning; - float highCritical; - float highFatal; - char* p; - - // check for IPMI_SUPPORT FLAG passed in - if (argc > 1) - IPMI_SUPPORT = atoi(argv[1]); - - // loop forever reading the hardware status - while (true) - { - if ( IPMI_SUPPORT == 0) - { - int returnCode = system("ipmitool sensor list > /tmp/harwareMonitor.txt"); - - if (returnCode) - { - // System error, Log this event - LoggingID lid; - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("Error running ipmitool sensor list!!!"); - msg.format(args); - ml.logWarningMessage(msg); - sleep(300); - continue; - } - } - - // parse output file - - ifstream File ("/tmp/harwareMonitor.txt"); - - if (!File) - { - // System error, Log this event - LoggingID lid; - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("Error opening /tmp/harwareMonitor.txt!!!"); - msg.format(args); - ml.logWarningMessage(msg); - sleep(300); - continue; - } - - char line[200]; - - while (File.getline(line, 200)) - { - // parse the line - int f = 0; - p = strtok(line, "|"); - - while (p) - { - data[f] = p; - data[f] = StripWhitespace(data[f]); - p = strtok (NULL, "|"); - f++; - } - - if ( f == 0 ) - // nothing on this line, skip - continue; - - SensorName = data[0]; - SensorValue = atof(data[1].c_str()); - Units = data[2]; - SensorStatus = data[3]; - lowFatal = atof(data[4].c_str()); - lowCritical = atof(data[5].c_str()); - lowWarning = atof(data[6].c_str()); - highWarning = atof(data[7].c_str()); - highCritical = atof(data[8].c_str()); - highFatal = atof(data[9].c_str()); - - // check status and issue apporiate alarm if needed - if ( (SensorStatus != "ok") && (SensorStatus != "nr") && (SensorStatus != "na") ) - { - // Status error, check for warning or critical levels - - if ( SensorValue >= highFatal ) - { - // issue critical alarm and send message to shutdown Server - sendAlarm(SensorName, HARDWARE_HIGH, SET, SensorValue); - sendMsgShutdownServer(); - } - else if ( (SensorValue < highFatal) && (SensorValue >= highCritical) ) - // issue major alarm - sendAlarm(SensorName, HARDWARE_MED, SET, SensorValue); - - else if ( (SensorValue < highCritical ) && (SensorValue >= highWarning) ) - // issue minor alarm - sendAlarm(SensorName, HARDWARE_LOW, SET, SensorValue); - - else if ( (SensorValue <= lowWarning) && (SensorValue > lowCritical) ) - // issue minor alarm - sendAlarm(SensorName, HARDWARE_LOW, SET, SensorValue); - - else if ( (SensorValue <= lowCritical) && (SensorValue > lowFatal) ) - // issue major alarm - sendAlarm(SensorName, HARDWARE_MED, SET, SensorValue); - - else if ( SensorValue <= lowFatal ) - { - // issue critical alarm and send message to shutdown Server - sendAlarm(SensorName, HARDWARE_HIGH, SET, SensorValue); - sendMsgShutdownServer(); - } - else - // check if there are any active alarms that needs to be cleared - checkAlarm(SensorName); - } - else - // check if there are any active alarms that needs to be cleared - checkAlarm(SensorName); - - } //end of parsing file while - - File.close(); - // sleep for 1 minute - sleep(60); - } //end of forever while loop -} - -/****************************************************************************************** -* @brief sendAlarm -* -* purpose: send a trap and log the process information -* -******************************************************************************************/ -void sendAlarm(string alarmItem, ALARMS alarmID, int action, float sensorValue) -{ - Oam oam; - - //Log this event - LoggingID lid; - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add(alarmItem); - args.add(", sensor value out-of-range: "); - args.add(sensorValue); - - // get current server name - string serverName; - oamServerInfo_t st; - - try - { - st = oam.getServerInfo(); - serverName = boost::get<0>(st); - } - catch (...) - { - serverName = "Unknown Server"; - } - - // check if there is an active alarm above the reporting theshold - // that needs to be cleared - checkAlarm(alarmItem, alarmID); - - // check if Alarm is already active, don't resend - if ( !( oam.checkActiveAlarm(alarmID, serverName, alarmItem)) ) - { - - ALARMManager alarmMgr; - // send alarm - alarmMgr.sendAlarmReport(alarmItem.c_str(), alarmID, action); - - args.add(", Alarm set: "); - args.add(alarmID); - } - - // output log - msg.format(args); - ml.logWarningMessage(msg); - - return; -} - -/****************************************************************************************** -* @brief checkAlarm -* -* purpose: check to see if an alarm(s) is set on device and clear if so -* -******************************************************************************************/ -void checkAlarm(string alarmItem, ALARMS alarmID) -{ - Oam oam; - - // get current server name - string serverName; - oamServerInfo_t st; - - try - { - st = oam.getServerInfo(); - serverName = boost::get<0>(st); - } - catch (...) - { - serverName = "Unknown Server"; - } - - switch (alarmID) - { - case ALARM_NONE: // clear all alarms set if any found - if ( oam.checkActiveAlarm(HARDWARE_HIGH, serverName, alarmItem) ) - // alarm set, clear it - clearAlarm(alarmItem, HARDWARE_HIGH); - - if ( oam.checkActiveAlarm(HARDWARE_MED, serverName, alarmItem) ) - // alarm set, clear it - clearAlarm(alarmItem, HARDWARE_MED); - - if ( oam.checkActiveAlarm(HARDWARE_LOW, serverName, alarmItem) ) - // alarm set, clear it - clearAlarm(alarmItem, HARDWARE_LOW); - - break; - - case HARDWARE_LOW: // clear high and medium alarms set if any found - if ( oam.checkActiveAlarm(HARDWARE_HIGH, serverName, alarmItem) ) - // alarm set, clear it - clearAlarm(alarmItem, HARDWARE_HIGH); - - if ( oam.checkActiveAlarm(HARDWARE_MED, serverName, alarmItem) ) - // alarm set, clear it - clearAlarm(alarmItem, HARDWARE_MED); - - break; - - case HARDWARE_MED: // clear high alarms set if any found - if ( oam.checkActiveAlarm(HARDWARE_HIGH, serverName, alarmItem) ) - // alarm set, clear it - clearAlarm(alarmItem, HARDWARE_HIGH); - - break; - - default: // none to clear - break; - } // end of switch - - return; -} - -/****************************************************************************************** -* @brief clearAlarm -* -* purpose: clear Alarm that was previously set -* -******************************************************************************************/ -void clearAlarm(string alarmItem, ALARMS alarmID) -{ - ALARMManager alarmMgr; - alarmMgr.sendAlarmReport(alarmItem.c_str(), alarmID, CLEAR); - - //Log this event - LoggingID lid; - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add(alarmItem); - args.add(" alarm #"); - args.add(alarmID); - args.add("cleared"); - msg.format(args); - ml.logWarningMessage(msg); -} -/****************************************************************************************** -* @brief sendMsgShutdownServer -* -* purpose: send a Message to Shutdown server -* -******************************************************************************************/ -void sendMsgShutdownServer() -{ - Oam oam; - - //Log this event - LoggingID lid; - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("Fatal Hardware Alarm detected, Server being shutdown"); - msg.format(args); - ml.logCriticalMessage(msg); - - string serverName; - oamServerInfo_t st; - - try - { - st = oam.getServerInfo(); - serverName = boost::get<0>(st); - } - catch (...) - { - // o well, let's take out own action - if ( IPMI_SUPPORT == 0) - system("init 0"); - } - - try - { - oam.shutdownServer(serverName, FORCEFUL, ACK_NO); - } - catch (exception& e) - { - // o well, let's take out own action - if ( IPMI_SUPPORT == 0) - system("init 0"); - } -} - -/****************************************************************************************** -* @brief StripWhitespace -* -* purpose: strip off whitespaces from a string -* -******************************************************************************************/ -string StripWhitespace(string value) -{ - for (;;) - { - string::size_type pos = value.find (' ', 0); - - if (pos == string::npos) - // no more found - break; - - // strip leading - if (pos == 0) - { - value = value.substr (pos + 1, 10000); - } - else - { - // strip trailing - value = value.substr (0, pos); - } - } - - return value; -} diff --git a/oamapps/hardwareMonitor/hardwareMonitor.h b/oamapps/hardwareMonitor/hardwareMonitor.h deleted file mode 100644 index 2fc395d04..000000000 --- a/oamapps/hardwareMonitor/hardwareMonitor.h +++ /dev/null @@ -1,71 +0,0 @@ -/* Copyright (C) 2014 InfiniDB, Inc. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; version 2 of - the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301, USA. */ - -/*************************************************************************** - * $Id: hardwareMonitor.h 34 2006-09-29 21:13:54Z dhill $ - * - * Author: David Hill - ***************************************************************************/ -/** - * @file - */ -#ifndef HARDWARE_MONITOR_H -#define HARDWARE_MONITOR_H - -#include -#include -#include -#include -#include -#include -#include - -#include "liboamcpp.h" -#include "messagelog.h" -#include "messageobj.h" -#include "loggingid.h" -#include "alarmmanager.h" - - -int IPMI_SUPPORT = 0; // 0 for supported - -/** - * @brief send alarm - */ -void sendAlarm(std::string alarmItem, oam::ALARMS alarmID, int action, float sensorValue); - -/** - * @brief check alarm - */ -void checkAlarm(std::string alarmItem, oam::ALARMS alarmID = oam::ALARM_NONE); - -/** - * @brief clear alarm - */ -void clearAlarm(std::string alarmItem, oam::ALARMS alarmID); - -/** - * @brief send msg to shutdown server - */ -void sendMsgShutdownServer(); - -/** - * @brief strip off whitespaces from a string - */ -std::string StripWhitespace(std::string value); - -#endif diff --git a/oamapps/mcsadmin/CMakeLists.txt b/oamapps/mcsadmin/CMakeLists.txt deleted file mode 100644 index 79e57eea1..000000000 --- a/oamapps/mcsadmin/CMakeLists.txt +++ /dev/null @@ -1,16 +0,0 @@ - -include_directories( ${ENGINE_COMMON_INCLUDES} ) - - -########### next target ############### - -set(mcsadmin_SRCS mcsadmin.cpp) - -add_executable(mcsadmin ${mcsadmin_SRCS}) - -target_compile_options(mcsadmin PRIVATE -Wno-unused-result) - -target_link_libraries(mcsadmin ${ENGINE_LDFLAGS} ${ENGINE_READLINE_LIBRARY} ncurses ${MARIADB_CLIENT_LIBS} ${ENGINE_EXEC_LIBS} ${ENGINE_WRITE_LIBS}) - -install(TARGETS mcsadmin DESTINATION ${ENGINE_BINDIR} COMPONENT columnstore-engine) - diff --git a/oamapps/mcsadmin/mcsadmin.cpp b/oamapps/mcsadmin/mcsadmin.cpp deleted file mode 100644 index e1732eab2..000000000 --- a/oamapps/mcsadmin/mcsadmin.cpp +++ /dev/null @@ -1,10036 +0,0 @@ -/* Copyright (C) 2014 InfiniDB, Inc. - Copyright (C) 2016 MariaDB Corporation - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; version 2 of - the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301, USA. */ - - -/****************************************************************************************** - * $Id: mcsadmin.cpp 3110 2013-06-20 18:09:12Z dhill $ - * - ******************************************************************************************/ - -#include -#include -extern int h_errno; - -#include "columnstoreversion.h" -#include "mcsadmin.h" -#include -#include -#include -#include - -#include "mcsconfig.h" -#include "sessionmanager.h" -#include "dbrm.h" -#include "messagequeue.h" -#include "we_messages.h" -#include "../../writeengine/redistribute/we_redistributedef.h" -#include "we_config.h" // for findObjectFile -#include "we_fileop.h" // for findObjectFile -namespace fs = boost::filesystem; - -using namespace alarmmanager; -using namespace std; -using namespace oam; -using namespace config; -using namespace messageqcpp; -using namespace redistribute; -using namespace execplan; - -#include "installdir.h" - -// Variables shared in both main and functions - -Config* fConfig = 0; -string Section; -int CmdID = 0; -string CmdList[cmdNum]; -int CmdListID[cmdNum]; -string cmdName; -const string SECTION_NAME = "Cmd"; -int serverInstallType; -string systemName; -string parentOAMModule; -string localModule; -bool rootUser = true; -string HOME = "/root"; -string SingleServerInstall; -string tmpDir; - -bool repeatStop; - -static void checkPromptThread(); -bool connectToDBRoot1PM(Oam& oam, boost::scoped_ptr& msgQueueClient); -bool SendToWES(Oam& oam, ByteStream bs); - -bool waitForActive() -{ - Oam oam; - - try - { - oam.waitForActive(); - return true; - } - catch (...) - {} - - return false; -} - -bool waitForStop() -{ - Oam oam; - SystemStatus systemstatus; - SystemProcessStatus systemprocessstatus; - - for (int i = 0 ; i < 1200 ; i ++) - { - sleep (3); - - try - { - oam.getSystemStatus(systemstatus); - - if (systemstatus.SystemOpState == MAN_OFFLINE) - { - return true; - } - - if (systemstatus.SystemOpState == FAILED) - { - return false; - } - - cout << "." << flush; - } - catch (...) - { - // At some point, we need to give up, ProcMgr just isn't going to respond. - if (i > 60) // 3 minutes - { - cout << "ProcMgr not responding while waiting for system to start"; - break; - } - } - } - - return false; -} - -//------------------------------------------------------------------------------ -// Signal handler to catch SIGTERM signal to terminate the process -//------------------------------------------------------------------------------ -void handleSigTerm(int i) -{ - std::cout << "Received SIGTERM to terminate MariaDB ColumnStore Console..." << std::endl; - -} - -//------------------------------------------------------------------------------ -// Signal handler to catch Control-C signal to terminate the process -//------------------------------------------------------------------------------ -void handleControlC(int i) -{ - std::cout << "Received Control-C to terminate the console..." << std::endl; - exit(0); -} - -//------------------------------------------------------------------------------ -// Initialize signal handling -//------------------------------------------------------------------------------ -void setupSignalHandlers() -{ -#ifdef _MSC_VER - //FIXME -#else - // Control-C signal to terminate a command - struct sigaction act; - memset(&act, 0, sizeof(act)); - act.sa_handler = handleControlC; - sigaction(SIGINT, &act, 0); - - // catch SIGTERM signal to terminate the program -// memset(&act, 0, sizeof(act)); -// act.sa_handler = handleSigTerm; -// sigaction(SIGTERM, &act, 0); -#endif -} - -int main(int argc, char* argv[]) -{ - - { - cout << "SKIP_OAM_INIT is set and legacy OAM is disabled by default" << endl; - sleep(2); - } - -#ifndef _MSC_VER - setuid(0); // set effective ID to root; ignore return status -#endif - setlocale(LC_ALL, ""); - setlocale(LC_NUMERIC, "C"); - - Oam oam; - char* pcommand = 0; - string arguments[ArgNum]; - - const char* p = getenv("HOME"); - - if (!p) - p = ""; - else - HOME = p; - - string ccHistoryFile = HOME + "/.cc_history"; - - tmpDir = startup::StartUp::tmpDir(); - - string cf = std::string(MCSSYSCONFDIR) + "/columnstore/" + ConsoleCmdsFile; - fConfig = Config::makeConfig(cf); - -// setupSignalHandlers(); - - // Get System Name - try - { - oam.getSystemConfig("SystemName", systemName); - } - catch (...) - { - cout << endl << "**** Failed : Failed to read systemName Name" << endl; - exit(-1); - } - - //get parentModule Name - parentOAMModule = getParentOAMModule(); - - // get Local Module Name and Single Server Install Indicator - oamModuleInfo_t st; - - try - { - st = oam.getModuleInfo(); - localModule = boost::get<0>(st); - serverInstallType = boost::get<5>(st); - } - catch (...) - { - cout << endl << "**** Failed : Failed to read Local Module Name" << endl; - exit(-1); - } - - try - { - oam.getSystemConfig("SingleServerInstall", SingleServerInstall); - } - catch (...) {} - - //check if root-user - int user; - user = getuid(); - - if (user != 0) - rootUser = false; - - // create/open command log file if not created - - logFile.open(DEFAULT_LOG_FILE.c_str(), ios::app); - - if (geteuid() == 0 && !logFile) - { - cerr << "UI Command log file cannot be opened" << endl; - } - - writeLog("Start of a command session!!!"); - - // get and sort command list for future help display - - for (int i = 0; i < cmdNum ; i++) - { - // get cmd name - - Section = SECTION_NAME + oam.itoa(i); - - cmdName = fConfig->getConfig(Section, "Name"); - - if (cmdName.empty()) - // no command skip - continue; - - CmdList[i] = cmdName; - CmdListID[i] = i; - - // sort - - for (int j = 0; j < i ; j++) - { - if ( CmdList[i] < CmdList[j] ) - { - cmdName = CmdList[i]; - CmdList[i] = CmdList[j]; - CmdList[j] = cmdName; - CmdID = CmdListID[i]; - CmdListID[i] = CmdListID[j]; - CmdListID[j] = CmdID; - } - } - } - - if ( localModule != parentOAMModule ) - { - // issue message informing user they aren't logged into Active OAm Parent - cout << endl; - cout << "WARNING: running on non Parent OAM Module, can't make configuration changes in this session." << endl; - cout << " Access Console from '" << parentOAMModule << "' if you need to make changes." << endl << endl; - } - - // check for arguments passed in as a request - - if (argc > 1) - { - int j = 0; - string command; - - for (; argc > 1; j++, argc--) - { - arguments[j] = argv[j + 1]; - command.append(arguments[j]); - command.append(" "); - } - - // add to history and UI command log file - read_history(ccHistoryFile.c_str()); - add_history (command.c_str()); - writeLog(command.c_str()); - write_history(ccHistoryFile.c_str()); - - checkRepeat(arguments, j); - } - else - { - cout << endl << "MariaDB ColumnStore Admin Console" << endl; - cout << " enter 'help' for list of commands" << endl; - cout << " enter 'exit' to exit the MariaDB ColumnStore Command Console" << endl; - cout << " use up/down arrows to recall commands" << endl << endl; - - // output current active alarm stats - printAlarmSummary(); - printCriticalAlarms(); - - //read readline history file - read_history(ccHistoryFile.c_str()); - - while (true) - { - //get parentModule Name - parentOAMModule = getParentOAMModule(); - - // flush agument list - for (int j = 0; j < ArgNum; j++) - { - arguments[j].clear(); - } - - // read input - pcommand = readline("mcsadmin> "); - - if (!pcommand) // user hit -D - pcommand = strdup("exit"); - - else if (!*pcommand) - { - // just an enter-key was entered, ignore and reprompt - continue; - } - - // add to history and UI command log file - add_history (pcommand); - writeLog(pcommand); - write_history(ccHistoryFile.c_str()); - - string command = pcommand; - - //check if a argument was entered as a set of char with quotes around them - int commandLoc = 0; - int numberArgs = 0; - bool validCMD = true; - - for (int i = 0; i < ArgNum; i++) - { - string::size_type pos = command.find(" ", commandLoc); - string::size_type pos1 = command.find("\"", commandLoc); - string::size_type pos3 = command.find("\'", commandLoc); - - if ( (pos == string::npos && pos1 == string::npos) || - (pos == string::npos && pos3 == string::npos) ) - { - //end of the command - string argtemp = command.substr(commandLoc, 80); - - if ( argtemp != "" ) - { - arguments[numberArgs] = argtemp; - numberArgs++; - } - - break; - } - - if (pos < pos1 && pos < pos3) - { - // hit ' ' first - string argtemp = command.substr(commandLoc, pos - commandLoc); - - if ( argtemp != "" ) - { - arguments[numberArgs] = argtemp; - numberArgs++; - } - - commandLoc = pos + 1; - } - else - { - if ( pos >= pos1 ) - { - //hit " first - string::size_type pos2 = command.find("\"", pos1 + 1); - - if (pos2 != string::npos) - { - arguments[numberArgs] = command.substr(pos1 + 1, pos2 - pos1 - 1); - numberArgs++; - commandLoc = pos2 + 1; - } - else - { - cout << "Invalid Command, mismatching use of quotes" << endl; - validCMD = false; - break; - } - } - else - { - //hit ' first - string::size_type pos2 = command.find("\'", pos3 + 1); - - if (pos2 != string::npos) - { - arguments[numberArgs] = command.substr(pos3 + 1, pos2 - pos3 - 1); - numberArgs++; - commandLoc = pos2 + 1; - } - else - { - cout << "Invalid Command, mismatching use of quotes" << endl; - validCMD = false; - break; - } - } - } - } - - if (validCMD) - checkRepeat(arguments, numberArgs); - - free (pcommand); - } - } -} - -void checkRepeat(string* arguments, int argNumber) -{ - Oam oam; - bool repeat = false; - int repeatCount = 5; - - for ( int i = 0; i < argNumber ; i++) - { - if ( arguments[i].find("-r") == 0) - { - // entered - if ( arguments[i] != "-r") - { - //strip report count off - repeatCount = atoi(arguments[i].substr(2, 10).c_str()); - - if ( repeatCount < 1 || repeatCount > 60 ) - { - cout << "Failed: incorrect repeat count entered, valid range is 1-60, set to default of 5" << endl; - repeatCount = 5; - } - } - - repeat = true; - arguments[i].clear(); - cout << "repeating the command '" << arguments[0] << "' every " << repeatCount << " seconds, enter CTRL-D to stop" << endl; - sleep(5); - break; - } - } - - bool threadCreate = false; - - if (repeat) - { - while (true) - { - system("clear"); - - if ( processCommand(arguments) ) - return; - else - { - if ( !threadCreate ) - { - threadCreate = true; - repeatStop = false; - pthread_t PromptThread; - pthread_create (&PromptThread, NULL, (void* (*)(void*)) &checkPromptThread, NULL); - } - - for ( int i = 0 ; i < repeatCount ; i ++ ) - { - if (repeatStop) - break; - - sleep(1); - } - - if (repeatStop) - break; - } - } - } - else - processCommand(arguments); -} - -int processCommand(string* arguments) -{ - Oam oam; - // Possible command line arguments - GRACEFUL_FLAG gracefulTemp = GRACEFUL; - ACK_FLAG ackTemp = ACK_YES; - CC_SUSPEND_ANSWER suspendAnswer = CANCEL; - bool bNeedsConfirm = true; - string password; - string cmd; - // get command info from Command config file - CmdID = -1; - - // put inputted command into lowercase - string inputCmd = arguments[0]; - transform (inputCmd.begin(), inputCmd.end(), inputCmd.begin(), to_lower()); - - for (int i = 0; i < cmdNum; i++) - { - // put table command into lowercase - string cmdName_LC = CmdList[i]; - transform (cmdName_LC.begin(), cmdName_LC.end(), cmdName_LC.begin(), to_lower()); - - if (cmdName_LC.find(inputCmd) == 0) - { - // command found, ECHO command - cout << cmdName_LC << " " << oam.getCurrentTime() << endl; - CmdID = CmdListID[i]; - break; - } - } - - if (CmdID == -1) - { - // get is command in the Support Command list - for (int i = 0;; i++) - { - if (supportCmds[i] == "") - // end of list - break; - - if (supportCmds[i].find(inputCmd) == 0) - { - // match found, go process it - cout << supportCmds[i] << " " << oam.getCurrentTime() << endl; - int status = ProcessSupportCommand(i, arguments); - - if ( status == -1 ) - // didn't process it for some reason - break; - - return 1; - } - } - - // command not valid - cout << arguments[0] << ": Unknown Command, type help for list of commands" << endl << endl; - return 1; - } - switch ( CmdID ) - { - case 0: // help - case 1: // ? - { - const string DESC_NAME = "Desc"; - string desc; - string descName; - const string ARG_NAME = "Arg"; - string arg; - string argName; - - string argument1_LC = arguments[1]; - transform (argument1_LC.begin(), argument1_LC.end(), argument1_LC.begin(), to_lower()); - - if (argument1_LC.find("-a") == 0 || argument1_LC == "") - { - // list commands and brief description (Desc1) - cout << endl << "List of commands:" << endl; - cout << "Note: the command must be the first entry entered on the command line" << endl << endl; - cout.setf(ios::left); - cout.width(34); - cout << "Command" << "Description" << endl; - cout.setf(ios::left); - cout.width(34); - cout << "------------------------------" << "--------------------------------------------------------" << endl; - - for (int i = 0; i < cmdNum ; i++) - { - // get cmd name - - Section = SECTION_NAME + oam.itoa(CmdListID[i]); - - cmdName = fConfig->getConfig(Section, "Name"); - - if (cmdName.empty() || cmdName == "AVAILABLE") - // no command skip - continue; - - cout.setf(ios::left); - cout.width(34); - cout << cmdName << fConfig->getConfig(Section, "Desc1") << endl; - } - - cout << endl << "For help on a command, enter 'help' followed by command name" << endl; - } - else - { - if (argument1_LC.find("-v") == 0) - { - // list of commands with their descriptions - cout << endl << "List of commands and descriptions:" << endl << endl; - - for (int k = 0 ; k < cmdNum ; k++) - { - Section = SECTION_NAME + oam.itoa(CmdListID[k]); - cmdName = fConfig->getConfig(Section, "Name"); - - if (cmdName.empty() || cmdName == "AVAILABLE") - //no command skip - continue; - - cout << "Command: " << cmdName << endl << endl; - int i = 2; - cout << " Description: " << fConfig->getConfig(Section, "Desc1") << endl; - - while (true) - { - desc = DESC_NAME + oam.itoa(i); - descName = fConfig->getConfig(Section, desc); - - if (descName.empty()) - //end of Desc list - break; - - cout << " " << descName << endl; - i++; - } - - i = 2; - cout << endl << " Arguments: " << fConfig->getConfig(Section, "Arg1") << endl; - - while (true) - { - arg = ARG_NAME + oam.itoa(i); - argName = fConfig->getConfig(Section, arg); - - if (argName.empty()) - //end of arg list - break; - - cout << " " << argName << endl; - i++; - } - - cout << endl; - } - } - else - { - // description for a single command - int j = 0; - - for (j = 0; j < cmdNum; j++) - { - // get cmd description - - Section = SECTION_NAME + oam.itoa(j); - - cmdName = fConfig->getConfig(Section, "Name"); - - string cmdName_LC = cmdName; - transform (cmdName_LC.begin(), cmdName_LC.end(), cmdName_LC.begin(), to_lower()); - - if (cmdName_LC == argument1_LC) - { - // command found, output description - cout << endl << " Command: " << cmdName << endl << endl; - int i = 2; - cout << " Description: " << fConfig->getConfig(Section, "Desc1") << endl; - - while (true) - { - desc = DESC_NAME + oam.itoa(i); - descName = fConfig->getConfig(Section, desc); - - if (descName.empty()) - //end of Desc list - break; - - cout << " " << descName << endl; - i++; - } - - i = 2; - cout << endl << " Arguments: " << fConfig->getConfig(Section, "Arg1") << endl; - - while (true) - { - arg = ARG_NAME + oam.itoa(i); - argName = fConfig->getConfig(Section, arg); - - if (argName.empty()) - //end of arg list - break; - - cout << " " << argName << endl; - i++; - } - - break; - } - } - - if (j == cmdNum) - { - // command not valid - cout << arguments[1] << ": Unknown Command, type help for list of commands" << endl << endl; - break; - } - } - } - - cout << endl; - } - break; - - case 2: // exit - case 3: // quit - { - // close the log file - writeLog("End of a command session!!!"); - logFile.close(); - cout << "Exiting the MariaDB ColumnStore Admin Console" << endl; - - exit (0); - } - break; - - case 4: // redistributeData - { - set removeDbroots; // set of dbroots we want to leave empty - vector srcDbroots; // all of the currently configured dbroots - vector destDbroots; // srcDbroots - removeDbroots - set::iterator dbiter; -#if _MSC_VER - if (_strnicmp(arguments[1].c_str(), "start", 5) == 0)) -#else - if (strncasecmp(arguments[1].c_str(), "start", 5) == 0) -#endif - { - // Get a list of all the configured dbroots in the xml file. - DBRootConfigList dbRootConfigList; - std::set configuredDBRoots; - oam.getSystemDbrootConfig(dbRootConfigList); - - for (DBRootConfigList::iterator i = dbRootConfigList.begin(); i != dbRootConfigList.end(); ++i) - configuredDBRoots.insert(*i); - - // The user may choose to redistribute in such a way as to - // leave certain dbroots empty, presumably for later removal. -#if _MSC_VER - if (_strnicmp(arguments[1].c_str(), "remove", 6) == 0)) -#else - if (strncasecmp(arguments[1].c_str(), "remove", 6) == 0) -#endif - { - int dbroot; - bool error = false; - - for (int i = 3; arguments[i] != ""; ++i) - { - dbroot = atoi(arguments[i].c_str()); - - if (dbroot == 1) - { - cout << "Not allowed to remove dbroot-1" << endl; - error = true; - } - else - { - if (configuredDBRoots.find(dbroot) == configuredDBRoots.end()) - { - ostringstream oss; - cout << "DBRoot-" << dbroot << " is not configured" << endl; - error = true; - } - else - { - removeDbroots.insert((uint32_t)dbroot); - } - } - } - - if (error) - { - cout << "Errors encountered. Abort" << endl; - break; - } - } - - // Create a list of source dbroots -- where the data currently resides. - for (dbiter = configuredDBRoots.begin(); dbiter != configuredDBRoots.end(); ++dbiter) - srcDbroots.push_back((uint32_t)*dbiter); - - // Create a list of destination dbroots -- where the data is to go. - for (dbiter = configuredDBRoots.begin(); dbiter != configuredDBRoots.end(); ++dbiter) - { - // Only use the dbroots not in the remove list - if (removeDbroots.find((uint32_t)*dbiter) == removeDbroots.end()) - { - destDbroots.push_back((uint32_t)*dbiter); - } - } - - // Print out what we're about to do - cout << "redistributeData START "; - - if (removeDbroots.size() > 0) - { - cout << " Removing dbroots:"; - set::iterator iter; - - for (iter = removeDbroots.begin(); iter != removeDbroots.end(); ++iter) - { - cout << " " << *iter; - } - } - - cout << endl; - cout << "Source dbroots:"; - vector::iterator iter; - - for (iter = srcDbroots.begin(); iter != srcDbroots.end(); ++iter) - cout << " " << *iter; - - cout << endl << "Destination dbroots:"; - - for (iter = destDbroots.begin(); iter != destDbroots.end(); ++iter) - cout << " " << *iter; - - cout << endl << endl; - - // Connect to PM for dbroot1 - ByteStream bs; - // message WES ID, sequence #, action id - uint32_t sequence = time(0); - bs << (ByteStream::byte) WriteEngine::WE_SVR_REDISTRIBUTE; - - // Send the CLEAR message to WriteEngineServer (WES). Wipes out previous state. - RedistributeMsgHeader header(0, 0, sequence, RED_CNTL_CLEAR); - bs.append((const ByteStream::byte*) &header, sizeof(header)); - SendToWES(oam, bs); - - // Send the START message - bs.restart(); - sequence = time(0); - bs << (ByteStream::byte) WriteEngine::WE_SVR_REDISTRIBUTE; - header.sequenceNum = sequence; - header.messageId = RED_CNTL_START; - bs.append((const ByteStream::byte*) &header, sizeof(header)); - uint32_t options = 0; - - if (removeDbroots.size() > 0) - { - options |= RED_OPTN_REMOVE; - } - - bs << options; - - // source db roots, - bs << (uint32_t) srcDbroots.size(); - - for (uint64_t i = 0; i < srcDbroots.size(); ++i) - bs << (uint32_t) srcDbroots[i]; - - // destination db roots, - bs << (uint32_t) destDbroots.size(); - - for (uint64_t i = 0; i < destDbroots.size(); ++i) - bs << (uint32_t) destDbroots[i]; - - SendToWES(oam, bs); - } -#if _MSC_VER - else if (_strnicmp(arguments[1].c_str(), "stop", 4) == 0)) -#else - else if (strncasecmp(arguments[1].c_str(), "stop", 4) == 0) -#endif - { - ByteStream bs; - // message WES ID, sequence #, action id - uint32_t sequence = time(0); - bs << (ByteStream::byte) WriteEngine::WE_SVR_REDISTRIBUTE; - RedistributeMsgHeader header(0, 0, sequence, RED_CNTL_STOP); - bs.append((const ByteStream::byte*) &header, sizeof(header)); - SendToWES(oam, bs); - } -#if _MSC_VER - else if (_strnicmp(arguments[1].c_str(), "status", 6) == 0)) -#else - else if (strncasecmp(arguments[1].c_str(), "status", 6) == 0) -#endif - { - ByteStream bs; - // message WES ID, sequence #, action id - uint32_t sequence = time(0); - bs << (ByteStream::byte) WriteEngine::WE_SVR_REDISTRIBUTE; - RedistributeMsgHeader header(0, 0, sequence, RED_CNTL_STATUS); - bs.append((const ByteStream::byte*) &header, sizeof(header)); - SendToWES(oam, bs); - } - else - { - cout << "redistributeData must have one of START, STOP or STATUS" << endl; - } - } - break; - - case 5: // findObjectFile - { - unsigned maxDBRoot = WriteEngine::Config::DBRootCount(); - - if (maxDBRoot < 1) - { - cout << endl << "findobjectfile fails because there are no dbroots defined for this server" << endl; - break;; - } - - if (arguments[1] == "") - { - cout << endl << "findobjectfile requires one of" << endl; - cout << "a) oid of column for which file name is to be retrieved" << endl; - cout << "b) schema, table and column for which the file name is to be retrieved" << endl; - cout << "c) oid of table for which the file name of each column is to be retrieved" << endl; - cout << "d) schema and table for which the file name of each column is to be retrieved" << endl; - break; - } - - char* endchar; - int oid = 0; - int tableOid = 0; // If a table report - int dictOid = 0; // If a dictionary oid was given - std::vector columnOids; - CalpontSystemCatalog::TableName tableName; - CalpontSystemCatalog::TableColName columnName; - boost::shared_ptr systemCatalogPtr = - execplan::CalpontSystemCatalog::makeCalpontSystemCatalog(0); - systemCatalogPtr->identity(execplan::CalpontSystemCatalog::FE); - - // Try to get a numeric oid from the argument - oid = strtol(arguments[1].c_str(), &endchar, 0); - - // test to see if not all numeric - if (endchar < & (*arguments[1].end())) // endchar from above will not point to the end if not numeric - { - oid = 0; - } - - if (oid == 0) // A table or column name was entered - { - // Need to convert the arguments to oid - columnName.schema = arguments[1]; - - if (arguments[2] == "") - { - cout << endl << "findobjectfile requires a table for schema " << arguments[1] << endl; - break; - } - - columnName.table = arguments[2]; - - if (arguments[3] == "") - { - // No column was given. Get the list of column oids for the table. - tableName.schema = arguments[1]; - tableName.table = arguments[2]; - - try - { - tableOid = systemCatalogPtr->lookupTableOID(tableName); - - if (tableOid) - { - CalpontSystemCatalog::RIDList rdlist = systemCatalogPtr->columnRIDs(tableName); - - for (unsigned int i = 0; i < rdlist.size(); ++i) - { - columnOids.push_back(rdlist[i].objnum); - } - } - else - { - cout << arguments[1] << "." << arguments[2] << " is not a columnstore table" << endl; - break; - } - } - catch ( runtime_error& e ) - { - cout << "error while trying to get the columns for " << tableName.schema << "." << tableName.table << ": " << e.what() << endl; - break; - } - catch (...) - { - cout << "error while trying to get the columns for " << tableName.schema << "." << tableName.table << endl; - break; - } - } - else // A column name was given - { - columnName.column = arguments[3]; - oid = systemCatalogPtr->lookupOID(columnName); - - if (oid < 1) - { - cout << arguments[1] << "." << arguments[2] << "." << arguments[3] << " is not a columnstore column" << endl; - break; - } - - columnOids.push_back(oid); - } - } - else // An oid was given - { - try - { - // Is oid a column? - columnName = systemCatalogPtr->colName(oid); - } - catch (...) { /* Ignore */ } - - if (columnName.schema.size() == 0 || columnName.table.size() == 0 || columnName.column.size() == 0) - { - // Not a column OID - // check to see if it's a dictionary oid. - try - { - columnName = systemCatalogPtr->dictColName(oid); - } - catch (...) { /* Ignore */ } - - if (columnName.schema.size() == 0 || columnName.table.size() == 0 || columnName.column.size() == 0) - { - // Not a dictionary oid - // Check to see if a table oid was given. If so, get the column oid list. - try - { - tableName = systemCatalogPtr->tableName(oid); - } - catch (...) { /* Ignore */ } - - if (tableName.schema.size() == 0 || tableName.table.size() == 0) - { - // Not a table or a column OID. - cout << "OID " << oid << " does not represent a table or column in columnstore" << endl; - break; - } - - tableOid = oid; - - try - { - CalpontSystemCatalog::RIDList rdlist = systemCatalogPtr->columnRIDs(tableName); - - for (unsigned int i = 0; i < rdlist.size(); ++i) - { - columnOids.push_back(rdlist[i].objnum); - } - } - catch (...) { /* Ignore */ } - } - else - { - // This is a dictionary oid - dictOid = oid; - columnOids.push_back(oid); - } - } - else - { - // This is a column oid - columnOids.push_back(oid); - } - } - - // Use writeengine code to get the filenames - WriteEngine::FileOp fileOp; - char fileName[WriteEngine::FILE_NAME_SIZE]; - memset(fileName, 0, WriteEngine::FILE_NAME_SIZE); - int rc; - - if (tableOid) - { - cout << "for table OID " << tableOid << " " - << tableName.schema << "." << tableName.table << ":" << endl; - } - - for (unsigned int i = 0; i < columnOids.size(); ++i) - { - oid = columnOids[i]; - - if (oid < 1000) - { - rc = fileOp.getVBFileName(oid, fileName); - } - else - { - rc = fileOp.oid2DirName(oid, fileName); - } - - if (oid == dictOid) - { - columnName = systemCatalogPtr->dictColName(oid); - cout << "dictionary OID " << oid << " "; - } - else - { - columnName = systemCatalogPtr->colName(oid); - cout << "column OID " << oid << " "; - } - - if (!tableOid) - { - cout << columnName.schema << "." << columnName.table << "."; - } - - cout << columnName.column << "\t"; - - if (strlen(fileName) > 0) - { - cout << fileName; - } - - if (rc == WriteEngine::NO_ERROR) - { - // Success. No more output. - cout << endl; - } - else if (rc == WriteEngine::ERR_FILE_NOT_EXIST) - { - if (strlen(fileName) == 0) - { - // We couldn't get a name - cout << "Error: Filename could not be determined" << endl; - } - else - { - // We got a name, but the file doesn't exist - cout << " (directory does not exist on this server)" << endl; - } - } - else - { - // Something broke - cerr << "WriteEngine::FileOp::oid2DirName() error. rc=" << rc << endl; - } - } - } - break; - - case 6: // getModuleTypeConfig - { - SystemModuleTypeConfig systemmoduletypeconfig; - ModuleTypeConfig moduletypeconfig; - ModuleConfig moduleconfig; - systemmoduletypeconfig.moduletypeconfig.clear(); - string returnValue; - string Argument; - - if (arguments[1] == "all" || arguments[1] == "") - { - - // get and all display Module config parameters - - try - { - oam.getSystemConfig(systemmoduletypeconfig); - - cout << endl << "Module Type Configuration" << endl << endl; - - for ( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++) - { - if ( systemmoduletypeconfig.moduletypeconfig[i].ModuleType.empty() ) - // end of list - break; - - int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount; - - if ( moduleCount < 1 ) - continue; - - string moduletype = systemmoduletypeconfig.moduletypeconfig[i].ModuleType; - cout << "ModuleType '" << moduletype << "' Configuration information" << endl << endl; - - cout << "ModuleDesc = " << systemmoduletypeconfig.moduletypeconfig[i].ModuleDesc << endl; - cout << "RunType = " << systemmoduletypeconfig.moduletypeconfig[i].RunType << endl; - cout << "ModuleCount = " << moduleCount << endl; - - if ( moduleCount > 0 ) - { - DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end() ; pt++) - { - string modulename = (*pt).DeviceName; - HostConfigList::iterator pt1 = (*pt).hostConfigList.begin(); - - for ( ; pt1 != (*pt).hostConfigList.end() ; pt1++) - { - string ipAddr = (*pt1).IPAddr; - string servername = (*pt1).HostName; - cout << "ModuleHostName and ModuleIPAddr for NIC ID " + oam.itoa((*pt1).NicID) + " on module '" << modulename << "' = " << servername << " , " << ipAddr << endl; - } - } - } - - DeviceDBRootList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleDBRootList.begin(); - - for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleDBRootList.end() ; pt++) - { - if ( (*pt).dbrootConfigList.size() > 0 ) - { - cout << "DBRootIDs assigned to module 'pm" << (*pt).DeviceID << "' = "; - DBRootConfigList::iterator pt1 = (*pt).dbrootConfigList.begin(); - - for ( ; pt1 != (*pt).dbrootConfigList.end() ; ) - { - cout << *pt1; - pt1++; - - if (pt1 != (*pt).dbrootConfigList.end()) - cout << ", "; - } - } - - cout << endl; - } - - cout << "ModuleCPUCriticalThreshold % = " << systemmoduletypeconfig.moduletypeconfig[i].ModuleCPUCriticalThreshold << endl; - cout << "ModuleCPUMajorThreshold % = " << systemmoduletypeconfig.moduletypeconfig[i].ModuleCPUMajorThreshold << endl; - cout << "ModuleCPUMinorThreshold % = " << systemmoduletypeconfig.moduletypeconfig[i].ModuleCPUMinorThreshold << endl; - cout << "ModuleCPUMinorClearThreshold % = " << systemmoduletypeconfig.moduletypeconfig[i].ModuleCPUMinorClearThreshold << endl; - cout << "ModuleDiskCriticalThreshold % = " << systemmoduletypeconfig.moduletypeconfig[i].ModuleDiskCriticalThreshold << endl; - cout << "ModuleDiskMajorThreshold % = " << systemmoduletypeconfig.moduletypeconfig[i].ModuleDiskMajorThreshold << endl; - cout << "ModuleDiskMinorThreshold % = " << systemmoduletypeconfig.moduletypeconfig[i].ModuleDiskMinorThreshold << endl; - cout << "ModuleMemCriticalThreshold % = " << systemmoduletypeconfig.moduletypeconfig[i].ModuleMemCriticalThreshold << endl; - cout << "ModuleMemMajorThreshold % = " << systemmoduletypeconfig.moduletypeconfig[i].ModuleMemMajorThreshold << endl; - cout << "ModuleMemMinorThreshold % = " << systemmoduletypeconfig.moduletypeconfig[i].ModuleMemMinorThreshold << endl; - cout << "ModuleSwapCriticalThreshold % = " << systemmoduletypeconfig.moduletypeconfig[i].ModuleSwapCriticalThreshold << endl; - cout << "ModuleSwapMajorThreshold % = " << systemmoduletypeconfig.moduletypeconfig[i].ModuleSwapMajorThreshold << endl; - cout << "ModuleSwapMinorThreshold % = " << systemmoduletypeconfig.moduletypeconfig[i].ModuleSwapMinorThreshold << endl; - - DiskMonitorFileSystems::iterator pt2 = systemmoduletypeconfig.moduletypeconfig[i].FileSystems.begin(); - int id = 1; - - for ( ; pt2 != systemmoduletypeconfig.moduletypeconfig[i].FileSystems.end() ; pt2++) - { - string fs = *pt2; - cout << "ModuleDiskMonitorFileSystem#" << id << " = " << fs << endl; - ++id; - } - - cout << endl; - } - } - catch (exception& e) - { - cout << endl << "**** getModuleTypeConfig Failed = " << e.what() << endl; - } - } - else - { - // get a single module type config - if (arguments[2] == "") - { - try - { - oam.getSystemConfig(arguments[1], moduletypeconfig); - - cout << endl << "Module Type Configuration for " << arguments[1] << endl << endl; - - int moduleCount = moduletypeconfig.ModuleCount; - string moduletype = moduletypeconfig.ModuleType; - - cout << "ModuleDesc = " << moduletypeconfig.ModuleDesc << endl; - cout << "ModuleCount = " << moduleCount << endl; - cout << "RunType = " << moduletypeconfig.RunType << endl; - - if ( moduleCount > 0 ) - { - DeviceNetworkList::iterator pt = moduletypeconfig.ModuleNetworkList.begin(); - - for ( ; pt != moduletypeconfig.ModuleNetworkList.end() ; pt++) - { - string modulename = (*pt).DeviceName; - HostConfigList::iterator pt1 = (*pt).hostConfigList.begin(); - - for ( ; pt1 != (*pt).hostConfigList.end() ; pt1++) - { - string ipAddr = (*pt1).IPAddr; - string servername = (*pt1).HostName; - cout << "ModuleHostName and ModuleIPAddr for NIC ID " + oam.itoa((*pt1).NicID) + " on module " << modulename << " = " << servername << " , " << ipAddr << endl; - } - } - } - - int dbrootCount = moduletypeconfig.ModuleDBRootList.size(); - - cout << "DBRootCount = " << dbrootCount << endl; - - if ( dbrootCount > 0 ) - { - DeviceDBRootList::iterator pt = moduletypeconfig.ModuleDBRootList.begin(); - - for ( ; pt != moduletypeconfig.ModuleDBRootList.end() ; pt++) - { - cout << "DBRoot IDs assigned to 'pm" + oam.itoa((*pt).DeviceID) + "' = "; - - DBRootConfigList::iterator pt1 = (*pt).dbrootConfigList.begin(); - - for ( ; pt1 != (*pt).dbrootConfigList.end() ; ) - { - cout << *pt1; - pt1++; - - if (pt1 != (*pt).dbrootConfigList.end()) - cout << ", "; - } - - cout << endl; - } - } - - cout << "ModuleCPUCriticalThreshold % = " << moduletypeconfig.ModuleCPUCriticalThreshold << endl; - cout << "ModuleCPUMajorThreshold % = " << moduletypeconfig.ModuleCPUMajorThreshold << endl; - cout << "ModuleCPUMinorThreshold % = " << moduletypeconfig.ModuleCPUMinorThreshold << endl; - cout << "ModuleCPUMinorClearThreshold % = " << moduletypeconfig.ModuleCPUMinorClearThreshold << endl; - cout << "ModuleDiskCriticalThreshold % = " << moduletypeconfig.ModuleDiskCriticalThreshold << endl; - cout << "ModuleDiskMajorThreshold % = " << moduletypeconfig.ModuleDiskMajorThreshold << endl; - cout << "ModuleDiskMinorThreshold % = " << moduletypeconfig.ModuleDiskMinorThreshold << endl; - cout << "ModuleMemCriticalThreshold % = " << moduletypeconfig.ModuleMemCriticalThreshold << endl; - cout << "ModuleMemMajorThreshold % = " << moduletypeconfig.ModuleMemMajorThreshold << endl; - cout << "ModuleMemMinorThreshold % = " << moduletypeconfig.ModuleMemMinorThreshold << endl; - cout << "ModuleSwapCriticalThreshold % = " << moduletypeconfig.ModuleSwapCriticalThreshold << endl; - cout << "ModuleSwapMajorThreshold % = " << moduletypeconfig.ModuleSwapMajorThreshold << endl; - cout << "ModuleSwapMinorThreshold % = " << moduletypeconfig.ModuleSwapMinorThreshold << endl; - - DiskMonitorFileSystems::iterator pt = moduletypeconfig.FileSystems.begin(); - int id = 1; - - for ( ; pt != moduletypeconfig.FileSystems.end() ; pt++) - { - string fs = *pt; - cout << "ModuleDiskMonitorFileSystem#" << id << " = " << fs << endl; - ++id; - } - - cout << endl; - } - catch (exception& e) - { - cout << endl << "**** getModuleTypeConfig Failed = " << e.what() << endl; - } - } - else - { - // get a parameter for a module type - try - { - oam.getSystemConfig(systemmoduletypeconfig); - } - catch (...) - {} - - unsigned int i = 0; - - for ( i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++) - { - if (systemmoduletypeconfig.moduletypeconfig[i].ModuleType == arguments[1]) - { - string argument2 = arguments[2]; - string::size_type pos = arguments[2].rfind("#", 200); - - if (pos != string::npos) - { - string ID = arguments[2].substr(pos + 1, 5); - arguments[2] = arguments[2].substr(0, pos); - arguments[2] = arguments[2] + ID + "-"; - } - - Argument = arguments[2] + oam.itoa(i + 1); - - try - { - oam.getSystemConfig(Argument, returnValue); - cout << endl << " " << argument2 << " = " << returnValue << endl << endl; - break; - } - catch (exception& e) - { - cout << endl << "**** getModuleTypeConfig Failed = " << e.what() << endl; - break; - } - } - } - - if ( i == systemmoduletypeconfig.moduletypeconfig.size() ) - { - // module type not found - cout << endl << "**** getModuleTypeConfig Failed : Invalid Module Type" << endl; - break; - } - } - } - } - break; - - case 7: // setModuleTypeConfig - parameters: Module type, Parameter name and value - { - SystemModuleTypeConfig systemmoduletypeconfig; - ModuleTypeConfig moduletypeconfig; - string Argument; - - parentOAMModule = getParentOAMModule(); - - if ( localModule != parentOAMModule ) - { - // exit out since not on Parent OAM Module - cout << endl << "**** setModuleTypeConfig Failed : only should be run on the Parent OAM Module, which is '" << parentOAMModule << "'" << endl; - break; - } - - if (arguments[3] == "") - { - // need 3 arguments - cout << endl << "**** setModuleTypeConfig Failed : Missing a required Parameter, enter 'help' for additional information" << endl; - break; - } - - if ( arguments[3] == "=" ) - { - cout << endl << "**** setModuleTypeConfig Failed : Invalid Value of '=', please re-enter" << endl; - break; - } - - try - { - oam.getSystemConfig(systemmoduletypeconfig); - } - catch (...) - {} - - unsigned int i = 0; - - for ( i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++) - { - if (systemmoduletypeconfig.moduletypeconfig[i].ModuleType == arguments[1]) - { - string argument2 = arguments[2]; - string::size_type pos = arguments[2].rfind("#", 200); - - if (pos != string::npos) - { - string ID = arguments[2].substr(pos + 1, 5); - arguments[2] = arguments[2].substr(0, pos); - arguments[2] = arguments[2] + ID + "-"; - } - - Argument = arguments[2] + oam.itoa(i + 1); - - try - { - oam.setSystemConfig(Argument, arguments[3]); - cout << endl << " Successfully set " << argument2 << " = " << arguments[3] << endl << endl; - break; - } - catch (exception& e) - { - cout << endl << "**** setModuleTypeConfig Failed = " << e.what() << endl; - break; - } - } - } - - if ( i == systemmoduletypeconfig.moduletypeconfig.size() ) - { - // module type not found - cout << endl << "**** setModuleTypeConfig Failed : Invalid Module Type" << endl; - break; - } - } - break; - - case 8: // getProcessConfig - { - SystemProcessConfig systemprocessconfig; - ProcessConfig processconfig; - string returnValue; - - if (arguments[1] == "all" || arguments[1] == "") - { - // get and all display Process config parameters - - try - { - oam.getProcessConfig(systemprocessconfig); - - cout << endl << "Process Configuration" << endl << endl; - - for ( unsigned int i = 0 ; i < systemprocessconfig.processconfig.size(); i++) - { - cout << "Process #" << i + 1 << " Configuration information" << endl; - - cout << "ProcessName = " << systemprocessconfig.processconfig[i].ProcessName << endl; - cout << "ModuleType = " << systemprocessconfig.processconfig[i].ModuleType << endl; - cout << "ProcessLocation = " << systemprocessconfig.processconfig[i].ProcessLocation << endl; - - for ( int j = 0 ; j < oam::MAX_ARGUMENTS; j++) - { - if (systemprocessconfig.processconfig[i].ProcessArgs[j].empty()) - break; - - cout << "ProcessArg" << j + 1 << " = " << systemprocessconfig.processconfig[i].ProcessArgs[j] << endl; - } - - cout << "BootLaunch = " << systemprocessconfig.processconfig[i].BootLaunch << endl; - cout << "LaunchID = " << systemprocessconfig.processconfig[i].LaunchID << endl; - - for ( int j = 0 ; j < MAX_DEPENDANCY; j++) - { - if (systemprocessconfig.processconfig[i].DepProcessName[j].empty()) - break; - - cout << "DepModuleName" << j + 1 << " = " << systemprocessconfig.processconfig[i].DepModuleName[j] << endl; - cout << "DepProcessName" << j + 1 << " = " << systemprocessconfig.processconfig[i].DepProcessName[j] << endl; - } - - // display Process Group variables, if they exist - - cout << "RunType = " << systemprocessconfig.processconfig[i].RunType << endl; - cout << "LogFile = " << systemprocessconfig.processconfig[i].LogFile << endl; - - cout << endl; - } - } - catch (exception& e) - { - cout << endl << "**** getProcessConfig Failed = " << e.what() << endl; - } - } - else - { - // get a single process info - parameters: module-name, process-name - if (arguments[2] == "") - { - cout << endl << "**** getProcessConfig Failed : Missing a required Parameter, enter 'help' for additional information" << endl; - break; - } - - if (arguments[3] == "") - { - //**** Add API to get single process info - try - { - oam.getProcessConfig(arguments[1], arguments[2], processconfig); - - cout << endl << "Process Configuration for " << arguments[1] << " on module " << arguments[2] << endl << endl; - - cout << "ProcessName = " << processconfig.ProcessName << endl; - cout << "ModuleType = " << processconfig.ModuleType << endl; - cout << "ProcessLocation = " << processconfig.ProcessLocation << endl; - - for ( int j = 0 ; j < oam::MAX_ARGUMENTS; j++) - { - if (processconfig.ProcessArgs[j].empty()) - break; - - cout << "ProcessArg" << j + 1 << " = " << processconfig.ProcessArgs[j] << endl; - } - - cout << "BootLaunch = " << processconfig.BootLaunch << endl; - cout << "LaunchID = " << processconfig.LaunchID << endl; - - for ( int j = 0 ; j < MAX_DEPENDANCY; j++) - { - if (processconfig.DepProcessName[j].empty()) - break; - - cout << "DepProcessName" << j + 1 << " = " << processconfig.DepProcessName[j] << endl; - cout << "DepModuleName" << j + 1 << " = " << processconfig.DepModuleName[j] << endl; - } - - cout << "RunType = " << processconfig.RunType << endl; - cout << "LogFile = " << processconfig.LogFile << endl; - - cout << endl; - } - catch (exception& e) - { - cout << endl << "**** getProcessConfig Failed = " << e.what() << endl; - } - } - else - { - // get a parameter for a process - parameters: module-name, process-name, - // parameter-name - // get module ID from module name entered, then get parameter - try - { - oam.getProcessConfig(arguments[1], arguments[2], arguments[3], returnValue); - cout << endl << " " << arguments[3] << " = " << returnValue << endl << endl; - break; - } - catch (exception& e) - { - cout << endl << "**** getProcessConfig Failed = " << e.what() << endl; - break; - } - } - } - } - break; - - case 9: // setProcessConfig - parameters: Module name, Process Name, Parameter name and value - { - parentOAMModule = getParentOAMModule(); - - if ( localModule != parentOAMModule ) - { - // exit out since not on Parent OAM Module - cout << endl << "**** setProcessConfig Failed : only should be run on the Parent OAM Module, which is '" << parentOAMModule << "'" << endl; - break; - } - - if (arguments[4] == "") - { - cout << endl << "**** setProcessConfig Failed : Missing a required Parameter, enter 'help' for additional information" << endl; - break; - } - - try - { - if ( arguments[4] == "=" ) - { - cout << endl << "**** setProcessConfig Failed : Invalid Value of '=', please re-enter" << endl; - break; - } - - oam.setProcessConfig(arguments[1], arguments[2], arguments[3], arguments[4]); - cout << endl << " Successfully set " << arguments[3] << " = " << arguments[4] << endl << endl; - break; - } - catch (exception& e) - { - cout << endl << "**** setProcessConfig Failed = " << e.what() << endl; - break; - } - } - break; - - case 10: // getAlarmConfig- parameters: all or AlarmID - { - AlarmConfig alarmconfig; - - if (arguments[1] == "all" || arguments[1] == "") - { - - // get and all display Alarm config parameters - - cout << endl << "Alarm Configuration" << endl << endl; - - for ( int alarmID = 1 ; alarmID < MAX_ALARM_ID; alarmID++) - { - try - { - oam.getAlarmConfig(alarmID, alarmconfig); - - cout << "Alarm ID #" << alarmID << " Configuration information" << endl; - - cout << "BriefDesc = " << alarmconfig.BriefDesc << endl; - cout << "DetailedDesc = " << alarmconfig.DetailedDesc << endl; - - // cout << "EmailAddr = " << alarmconfig.EmailAddr << endl; - // cout << "PagerNum = " << alarmconfig.PagerNum << endl; - - switch (alarmconfig.Severity) - { - case CRITICAL: - cout << "Severity = CRITICAL" << endl; - break; - - case MAJOR: - cout << "Severity = MAJOR" << endl; - break; - - case MINOR: - cout << "Severity = MINOR" << endl; - break; - - case WARNING: - cout << "Severity = WARNING" << endl; - break; - - default: - cout << "Severity = INFORMATIONAL" << endl; - break; - } - - cout << "Threshold = " << alarmconfig.Threshold << endl; - // cout << "Occurrences = " << alarmconfig.Occurrences << endl; - // cout << "LastIssueTime = " << alarmconfig.LastIssueTime << endl << endl; - cout << endl; - } - catch (exception& e) - { - cout << endl << "**** getAlarmConfig Failed = " << e.what() << endl; - break; - } - } - - break; - } - else - { - // get a single Alarm info - try - { - oam.getAlarmConfig(atoi(arguments[1].c_str()), alarmconfig); - - cout << endl << "Alarm ID #" << arguments[1] << " Configuration information" << endl; - - cout << "BriefDesc = " << alarmconfig.BriefDesc << endl; - cout << "DetailedDesc = " << alarmconfig.DetailedDesc << endl; - - // cout << "EmailAddr = " << alarmconfig.EmailAddr << endl; - // cout << "PagerNum = " << alarmconfig.PagerNum << endl; - - switch (alarmconfig.Severity) - { - case CRITICAL: - cout << "Severity = CRITICAL" << endl; - break; - - case MAJOR: - cout << "Severity = MAJOR" << endl; - break; - - case MINOR: - cout << "Severity = MINOR" << endl; - break; - - case WARNING: - cout << "Severity = WARNING" << endl; - break; - - default: - cout << "Severity = INFORMATIONAL" << endl; - break; - } - - cout << "Threshold = " << alarmconfig.Threshold << endl; - // cout << "Occurrences = " << alarmconfig.Occurrences << endl; - // cout << "LastIssueTime = " << alarmconfig.LastIssueTime << endl << endl; - cout << endl; - break; - } - catch (exception& e) - { - cout << endl << "**** getAlarmConfig Failed = " << e.what() << endl; - break; - } - } - } - break; - - case 11: // setAlarmConfig - parameters: AlarmID, Parameter name and value - { - parentOAMModule = getParentOAMModule(); - - if ( localModule != parentOAMModule ) - { - // exit out since not on Parent OAM Module - cout << endl << "**** setAlarmConfig Failed : only should be run on the Parent OAM Module, which is '" << parentOAMModule << "'" << endl; - break; - } - - if (arguments[3] == "") - { - // need 3 arguments - cout << endl << "**** setAlarmConfig Failed : Missing a required Parameter, enter 'help' for additional information" << endl; - break; - } - - try - { - if ( arguments[3] == "=" ) - { - cout << endl << "**** setAlarmConfig Failed : Invalid Value of '=', please re-enter" << endl; - break; - } - - if ( arguments[2] == "Threshold" && arguments[3] != "0" && atoi(arguments[3].c_str()) == 0 ) - { - cout << endl << "**** setAlarmConfig Failed : New value must be a number" << endl; - break; - } - - oam.setAlarmConfig(atoi(arguments[1].c_str()), arguments[2], atoi(arguments[3].c_str())); - cout << endl << " Successfully set " << arguments[2] << " = " << arguments[3] << endl << endl; - break; - } - catch (exception& e) - { - cout << endl << "**** setAlarmConfig Failed = " << e.what() << endl; - break; - } - } - break; - - case 12: // getActiveAlarms - parameters: none - { - AlarmList alarmList; - - try - { - oam.getActiveAlarms(alarmList); - } - catch (...) - { - // need arguments - cout << endl << "**** getActiveAlarms Failed : Error in oam.getActiveAlarms" << endl; - break; - } - - cout << endl << "Active Alarm List:" << endl << endl; - - AlarmList :: iterator i; - - for (i = alarmList.begin(); i != alarmList.end(); ++i) - { - cout << "AlarmID = " << i->second.getAlarmID() << endl; - cout << "Brief Description = " << i->second.getDesc() << endl; - cout << "Alarm Severity = "; - - switch (i->second.getSeverity()) - { - case CRITICAL: - cout << "CRITICAL" << endl; - break; - - case MAJOR: - cout << "MAJOR" << endl; - break; - - case MINOR: - cout << "MINOR" << endl; - break; - - case WARNING: - cout << "WARNING" << endl; - break; - - case INFORMATIONAL: - cout << "INFORMATIONAL" << endl; - break; - } - - cout << "Time Issued = " << i->second.getTimestamp() << endl; - cout << "Reporting Module = " << i->second.getSname() << endl; - cout << "Reporting Process = " << i->second.getPname() << endl; - cout << "Reported Device = " << i->second.getComponentID() << endl << endl; - } - } - break; - - case 13: // getStorageConfig - { - try - { - systemStorageInfo_t t; - t = oam.getStorageConfig(); - - string cloud; - - try - { - oam.getSystemConfig("Cloud", cloud); - } - catch (...) {} - - string::size_type pos = cloud.find("amazon", 0); - - if (pos != string::npos) - cloud = "amazon"; - - cout << endl << "System Storage Configuration" << endl << endl; - - cout << "Performance Module (DBRoot) Storage Type = " << boost::get<0>(t) << endl; - - if ( cloud == "amazon" ) - cout << "User Module Storage Type = " << boost::get<3>(t) << endl; - - cout << "System Assigned DBRoot Count = " << boost::get<1>(t) << endl; - - DeviceDBRootList moduledbrootlist = boost::get<2>(t); - - typedef std::vector dbrootList; - dbrootList dbrootlist; - - DeviceDBRootList::iterator pt = moduledbrootlist.begin(); - - for ( ; pt != moduledbrootlist.end() ; pt++) - { - cout << "DBRoot IDs assigned to 'pm" + oam.itoa((*pt).DeviceID) + "' = "; - DBRootConfigList::iterator pt1 = (*pt).dbrootConfigList.begin(); - - for ( ; pt1 != (*pt).dbrootConfigList.end() ;) - { - cout << *pt1; - dbrootlist.push_back(*pt1); - pt1++; - - if (pt1 != (*pt).dbrootConfigList.end()) - cout << ", "; - } - - cout << endl; - } - - //get any unassigned DBRoots - DBRootConfigList undbrootlist; - - try - { - oam.getUnassignedDbroot(undbrootlist); - } - catch (...) {} - - if ( !undbrootlist.empty() ) - { - cout << endl << "DBRoot IDs unassigned = "; - DBRootConfigList::iterator pt1 = undbrootlist.begin(); - - for ( ; pt1 != undbrootlist.end() ;) - { - cout << *pt1; - pt1++; - - if (pt1 != undbrootlist.end()) - cout << ", "; - } - - cout << endl; - } - - cout << endl; - - // um volumes - if (cloud == "amazon" && boost::get<3>(t) == "external") - { - ModuleTypeConfig moduletypeconfig; - oam.getSystemConfig("um", moduletypeconfig); - - for ( int id = 1; id < moduletypeconfig.ModuleCount + 1 ; id++) - { - string volumeNameID = "UMVolumeName" + oam.itoa(id); - string volumeName = oam::UnassignedName; - string deviceNameID = "UMVolumeDeviceName" + oam.itoa(id); - string deviceName = oam::UnassignedName; - - try - { - oam.getSystemConfig( volumeNameID, volumeName); - oam.getSystemConfig( deviceNameID, deviceName); - } - catch (...) - {} - - cout << "Amazon EC2 Volume Name/Device Name for 'um" << id << "': " << volumeName << ", " << deviceName << endl; - } - } - - // pm volumes - if (cloud == "amazon" && boost::get<0>(t) == "external") - { - cout << endl; - - DBRootConfigList dbrootConfigList; - - try - { - oam.getSystemDbrootConfig(dbrootConfigList); - - DBRootConfigList::iterator pt = dbrootConfigList.begin(); - - for ( ; pt != dbrootConfigList.end() ; pt++) - { - string volumeNameID = "PMVolumeName" + oam.itoa(*pt); - string volumeName = oam::UnassignedName; - string deviceNameID = "PMVolumeDeviceName" + oam.itoa(*pt); - string deviceName = oam::UnassignedName; - string amazonDeviceNameID = "PMVolumeAmazonDeviceName" + oam.itoa(*pt); - string amazondeviceName = oam::UnassignedName; - - try - { - oam.getSystemConfig( volumeNameID, volumeName); - oam.getSystemConfig( deviceNameID, deviceName); - oam.getSystemConfig( amazonDeviceNameID, amazondeviceName); - } - catch (...) - { - continue; - } - - cout << "Amazon EC2 Volume Name/Device Name/Amazon Device Name for DBRoot" << oam.itoa(*pt) << ": " << volumeName << ", " << deviceName << ", " << amazondeviceName << endl; - } - } - catch (exception& e) - { - cout << endl << "**** getSystemDbrootConfig Failed : " << e.what() << endl; - } - - // print un-assigned dbroots - DBRootConfigList::iterator pt1 = undbrootlist.begin(); - - for ( ; pt1 != undbrootlist.end() ; pt1++) - { - string volumeNameID = "PMVolumeName" + oam.itoa(*pt1); - string volumeName = oam::UnassignedName; - string deviceNameID = "PMVolumeDeviceName" + oam.itoa(*pt1); - string deviceName = oam::UnassignedName; - string amazonDeviceNameID = "PMVolumeAmazonDeviceName" + oam.itoa(*pt1); - string amazondeviceName = oam::UnassignedName; - - try - { - oam.getSystemConfig( volumeNameID, volumeName); - oam.getSystemConfig( deviceNameID, deviceName); - oam.getSystemConfig( amazonDeviceNameID, amazondeviceName); - } - catch (...) - { - continue; - } - - cout << "Amazon EC2 Volume Name/Device Name/Amazon Device Name for DBRoot" << oam.itoa(*pt1) << ": " << volumeName << ", " << deviceName << ", " << amazondeviceName << endl; - } - } - - string DataRedundancyConfig; - int DataRedundancyCopies; - string DataRedundancyStorageType; - - try - { - oam.getSystemConfig("DataRedundancyConfig", DataRedundancyConfig); - oam.getSystemConfig("DataRedundancyCopies", DataRedundancyCopies); - oam.getSystemConfig("DataRedundancyStorageType", DataRedundancyStorageType); - } - catch (...) {} - - if ( DataRedundancyConfig == "y" ) - { - cout << endl << "Data Redundant Configuration" << endl << endl; - cout << "Copies Per DBroot = " << DataRedundancyCopies << endl; - //cout << "Storage Type = " << DataRedundancyStorageType << endl; - - oamModuleInfo_t st; - string moduleType; - - try - { - st = oam.getModuleInfo(); - moduleType = boost::get<1>(st); - } - catch (...) {} - - if ( moduleType != "pm") - break; - - try - { - DBRootConfigList dbrootConfigList; - oam.getSystemDbrootConfig(dbrootConfigList); - - DBRootConfigList::iterator pt = dbrootConfigList.begin(); - - for ( ; pt != dbrootConfigList.end() ; pt++) - { - cout << "DBRoot #" << oam.itoa(*pt) << " has copies on PMs = "; - - string pmList = ""; - - try - { - string errmsg; - oam.glusterctl(oam::GLUSTER_WHOHAS, oam.itoa(*pt), pmList, errmsg); - } - catch (...) - {} - - boost::char_separator sep(" "); - boost::tokenizer< boost::char_separator > tokens(pmList, sep); - - for ( boost::tokenizer< boost::char_separator >::iterator it = tokens.begin(); - it != tokens.end(); - ++it) - { - cout << *it << " "; - } - - cout << endl; - } - - cout << endl; - } - catch (exception& e) - { - cout << endl << "**** getSystemDbrootConfig Failed : " << e.what() << endl; - } - } - } - catch (exception& e) - { - cout << endl << "**** getStorageConfig Failed : " << e.what() << endl; - } - - cout << endl; - - break; - } - - case 14: // addDbroot parameters: dbroot-number - { - string DataRedundancyConfig = "n"; - - try - { - oam.getSystemConfig( "DataRedundancyConfig", DataRedundancyConfig); - } - catch (...) - {} - - if (DataRedundancyConfig == "y") - { - cout << endl << "**** addDbroot Not Supported on Data Redundancy Configured System, use addModule command to expand your capacity" << endl; - break; - } - - if ( localModule != parentOAMModule ) - { - // exit out since not on active module - cout << endl << "**** addDbroot Failed : Can only run command on Active OAM Parent Module (" << parentOAMModule << ")." << endl; - break; - } - - string cloud; - bool amazon = false; - - try - { - oam.getSystemConfig("Cloud", cloud); - } - catch (...) {} - - string::size_type pos = cloud.find("amazon", 0); - - if (pos != string::npos) - amazon = true; - - if (arguments[1] == "") - { - // need atleast 1 arguments - cout << endl << "**** addDbroot Failed : Missing a required Parameter, enter 'help' for additional information" << endl; - break; - } - - int dbrootNumber = atoi(arguments[1].c_str()); - - string DBRootStorageType; - - try - { - oam.getSystemConfig( "DBRootStorageType", DBRootStorageType); - } - catch (...) {} - - string EBSsize = oam::UnassignedName; - - if (amazon && DBRootStorageType == "external" ) - { - if ( arguments[2] != "") - EBSsize = arguments[2]; - else - { - cout << endl; - oam.getSystemConfig("PMVolumeSize", EBSsize); - - string prompt = "Enter EBS storage size in GB, current setting is " + EBSsize + " : "; - EBSsize = dataPrompt(prompt); - } - } - - //get dbroots ids for reside PM - try - { - DBRootConfigList dbrootlist; - oam.addDbroot(dbrootNumber, dbrootlist, EBSsize); - - cout << endl << " New DBRoot IDs added = "; - - DBRootConfigList::iterator pt = dbrootlist.begin(); - - for ( ; pt != dbrootlist.end() ;) - { - cout << oam.itoa(*pt); - pt++; - - if (pt != dbrootlist.end()) - cout << ", "; - } - - cout << endl; - } - catch (exception& e) - { - cout << endl << "**** addDbroot Failed: " << e.what() << endl; - break; - } - - cout << endl; - } - break; - - case 15: // removeDbroot parameters: dbroot-list - { - - string DataRedundancyConfig = "n"; - - try - { - oam.getSystemConfig( "DataRedundancyConfig", DataRedundancyConfig); - } - catch (...) - {} - - if (DataRedundancyConfig == "y") - { - cout << endl << "**** removeDbroot Not Supported on Data Redundancy Configured System, use removeModule command to remove modules and dbroots" << endl; - break; - } - - if ( localModule != parentOAMModule ) - { - // exit out since not on active module - cout << endl << "**** removeDbroot Failed : Can only run command on Active OAM Parent Module (" << parentOAMModule << ")." << endl; - break; - } - - if (arguments[1] == "") - { - // need atleast 1 arguments - cout << endl << "**** removeDbroot Failed : Missing a required Parameter, enter 'help' for additional information" << endl; - break; - } - - SystemStatus systemstatus; - - try - { - oam.getSystemStatus(systemstatus); - - if (systemstatus.SystemOpState != oam::ACTIVE ) - { - cout << endl << "**** removeDbroot Failed, System has to be in a ACTIVE state" << endl; - break; - } - } - catch (exception& e) - { - cout << endl << "**** removeDbroot Failed : " << e.what() << endl; - break; - } - catch (...) - { - cout << endl << "**** removeDbroot Failed, Failed return from getSystemStatus API" << endl; - break; - } - - systemStorageInfo_t t; - - try - { - t = oam.getStorageConfig(); - } - catch (...) {} - - string dbrootIDs = arguments[1]; - - DBRootConfigList dbrootlist; - - bool assign = false; - boost::char_separator sep(", "); - boost::tokenizer< boost::char_separator > tokens(dbrootIDs, sep); - - for ( boost::tokenizer< boost::char_separator >::iterator it = tokens.begin(); - it != tokens.end(); - ++it) - { - //check if dbroot is assigned to a pm - DeviceDBRootList moduledbrootlist = boost::get<2>(t); - - DeviceDBRootList::iterator pt = moduledbrootlist.begin(); - - for ( ; pt != moduledbrootlist.end() ; pt++) - { - DBRootConfigList::iterator pt1 = (*pt).dbrootConfigList.begin(); - - for ( ; pt1 != (*pt).dbrootConfigList.end() ; pt1++) - { - if ( atoi((*it).c_str()) == *pt1 ) - { - cout << endl << "**** removeDbroot Failed, dbroot " << *it << " is assigned to a module, unassign first before removing" << endl; - assign = true; - break; - } - } - } - - if (assign) - break; - - dbrootlist.push_back(atoi((*it).c_str())); - } - - if (assign) - break; - - cout << endl; - - try - { - oam.removeDbroot(dbrootlist); - - cout << endl << " Successful Removal of DBRoots " << endl << endl; - } - catch (exception& e) - { - cout << endl << "**** removeDbroot Failed: " << e.what() << endl; - break; - } - } - break; - - case 16: // stopSystem - parameters: graceful flag, Ack flag - { - BRM::DBRM dbrm; - bool bDBRMReady = dbrm.isDBRMReady(); - getFlags(arguments, gracefulTemp, ackTemp, suspendAnswer, bNeedsConfirm); - - if ( gracefulTemp == INSTALL ) - { - cout << endl << "Invalid Parameter, INSTALL option not supported. Please use shutdownSystem Command" << endl << endl; - break; - } - - cout << endl << "This command stops the processing of applications on all Modules within the MariaDB ColumnStore System" << endl; - - try - { - cout << endl << " Checking for active transactions" << endl; - - if (gracefulTemp != GRACEFUL || - !bDBRMReady || - dbrm.isReadWrite()) - { - suspendAnswer = FORCE; - } - - if (suspendAnswer == CANCEL) // We don't have an answer from the command line or some other state. - { - // If there are bulkloads, ddl or dml happening, Ask what to do. - bool bIsDbrmUp = true; - execplan::SessionManager sessionManager; - BRM::SIDTIDEntry blockingsid; - std::vector tableLocks = dbrm.getAllTableLocks(); - bool bActiveTransactions = false; - - if (!tableLocks.empty()) - { - oam.DisplayLockedTables(tableLocks, &dbrm); - bActiveTransactions = true; - } - - if (sessionManager.checkActiveTransaction(0, bIsDbrmUp, blockingsid)) - { - cout << endl << "There are active transactions being processed" << endl; - bActiveTransactions = true; - } - - if (bActiveTransactions) - { - suspendAnswer = AskSuspendQuestion(CmdID); - // if (suspendAnswer == FORCE) - // { - // if (confirmPrompt("Force may cause data problems and should only be used in extreme circumstances")) - // { - // break; - // } - // } - bNeedsConfirm = false; - } - else - { - suspendAnswer = FORCE; - } - } - - if (suspendAnswer == CANCEL) - { - // We're outa here. - break; - } - - if (bNeedsConfirm) - { - if (confirmPrompt("")) - break; - } - - switch (suspendAnswer) - { - case WAIT: - cout << endl << " Waiting for all transactions to complete" << flush; - dbrm.setSystemShutdownPending(true, false, false); - gracefulTemp = GRACEFUL_WAIT; // Causes procmgr to wait for all table locks to free and all transactions to finish before shutdown - break; - - case ROLLBACK: - cout << endl << " Rollback of all transactions" << flush; - dbrm.setSystemShutdownPending(true, true, false); - gracefulTemp = GRACEFUL_WAIT; // Causes procmgr to wait for all table locks to free and all transactions to finish before shutdown - break; - - case FORCE: - cout << endl << " System being stopped now..." << flush; - - if (bDBRMReady) - { - dbrm.setSystemShutdownPending(true, false, true); - } - - break; - - case CANCEL: - break; - } - - oam.stopSystem(gracefulTemp, ackTemp); - - if ( waitForStop() ) - cout << endl << " Successful stop of System " << endl << endl; - else - cout << endl << "**** stopSystem Failed : check log files" << endl; - - checkForDisabledModules(); - } - catch (exception& e) - { - string Failed = e.what(); - - if (Failed.find("Connection refused") != string::npos) - { - cout << endl << "**** stopSystem Failure : ProcessManager not Active" << endl; - cout << "Retry or Run 'shutdownSystem FORCEFUL' command" << endl << endl; - } - else - { - cout << endl << "**** stopSystem Failure : " << e.what() << endl; - cout << "Retry or Run 'shutdownSystem FORCEFUL' command" << endl << endl; - } - } - } - break; - - case 17: // shutdownSystem - parameters: graceful flag, Ack flag, suspendAnswer - { - BRM::DBRM dbrm; - bool bDBRMReady = dbrm.isDBRMReady(); - getFlags(arguments, gracefulTemp, ackTemp, suspendAnswer, bNeedsConfirm); - - cout << endl << "This command stops the processing of applications on all Modules within the MariaDB ColumnStore System" << endl; - - try - { - cout << endl << " Checking for active transactions" << endl; - - if (gracefulTemp != GRACEFUL || - !bDBRMReady || - dbrm.isReadWrite()) - { - suspendAnswer = FORCE; - } - - if (suspendAnswer == CANCEL) // We don't have an answer from the command line. - { - // If there are bulkloads, ddl or dml happening, Ask what to do. - bool bIsDbrmUp = true; - execplan::SessionManager sessionManager; - BRM::SIDTIDEntry blockingsid; - std::vector tableLocks = dbrm.getAllTableLocks(); - bool bActiveTransactions = false; - - if (!tableLocks.empty()) - { - oam.DisplayLockedTables(tableLocks, &dbrm); - bActiveTransactions = true; - } - - if (sessionManager.checkActiveTransaction(0, bIsDbrmUp, blockingsid)) - { - cout << endl << " There are active transactions being processed" << endl; - bActiveTransactions = true; - } - - if (bActiveTransactions) - { - suspendAnswer = AskSuspendQuestion(CmdID); - bNeedsConfirm = false; - } - else - { - suspendAnswer = FORCE; - } - } - - if (suspendAnswer == CANCEL) - { - // We're outa here. - break; - } - - if (bNeedsConfirm) - { - if (confirmPrompt("")) - break; - } - - switch (suspendAnswer) - { - case WAIT: - cout << endl << " Waiting for all transactions to complete" << flush; - dbrm.setSystemShutdownPending(true, false, false); - gracefulTemp = GRACEFUL_WAIT; // Causes procmgr to wait for all table locks to free and all transactions to finish before shutdown - break; - - case ROLLBACK: - cout << endl << " Rollback of all transactions" << flush; - dbrm.setSystemShutdownPending(true, true, false); - gracefulTemp = GRACEFUL_WAIT; // Causes procmgr to wait for all table locks to free and all transactions to finish before shutdown - break; - - case FORCE: - cout << endl << " Stopping System..." << flush; - - if (bDBRMReady) - { - dbrm.setSystemShutdownPending(true, false, true); - } - - break; - - case CANCEL: - break; - } - - // This won't return until the system is shutdown. It might take a while to finish what we're working on first. - - oam.stopSystem(gracefulTemp, ackTemp); - - if ( waitForStop() ) - cout << endl << " Successful stop of System " << endl; - else - cout << endl << "**** stopSystem Failed : check log files" << endl; - - cout << endl << " Shutting Down System..." << flush; - - oam.shutdownSystem(gracefulTemp, ackTemp); - - //hdfs / hadoop config - string DBRootStorageType; - - try - { - oam.getSystemConfig( "DBRootStorageType", DBRootStorageType); - } - catch (...) {} - - if ( DBRootStorageType == "hdfs") - { - string logFile = tmpDir + "/cc-stop.pdsh"; - - cmd = "pdsh -a 'columnstore stop' > " + logFile + " 2>&1"; - system(cmd.c_str()); - - if (oam.checkLogStatus(logFile, "exit") ) - { - cout << endl << "ERROR: Stopping MariaDB ColumnStore Service failure, check " << logFile << " exit..." << endl; - } - } - else - { - cmd = "columnstore stop > " + tmpDir + "/status.log"; - system(cmd.c_str()); - } - } - catch (exception& e) - { - string Failed = e.what(); - - if ( gracefulTemp == FORCEFUL ) - { - cmd = "columnstore stop > " + tmpDir + "/status.log"; - system(cmd.c_str()); - cout << endl << " Successful shutdown of System (stopped local columnstore service) " << endl << endl; - } - - if (Failed.find("Connection refused") != string::npos) - { - cout << endl << "**** shutdownSystem Error : ProcessManager not Active, stopping columnstore service" << endl; - cmd = "columnstore stop > " + tmpDir + "/status.log"; - system(cmd.c_str()); - cout << endl << " Successful stop of local columnstore service " << endl << endl; - } - else - { - cout << endl << "**** shutdownSystem Failure : " << e.what() << endl; - cout << " Retry running command using FORCEFUL option" << endl << endl; - } - - //hdfs / hadoop config - string DBRootStorageType; - - try - { - oam.getSystemConfig( "DBRootStorageType", DBRootStorageType); - } - catch (...) {} - - if ( DBRootStorageType == "hdfs") - { - string logFile = tmpDir + "cc-stop.pdsh"; - cmd = "pdsh -a 'columnstore stop' > " + logFile + " 2>&1"; - system(cmd.c_str()); - - if (oam.checkLogStatus(logFile, "exit") ) - { - cout << endl << "ERROR: Stopping MariaDB ColumnStore Service failure, check " + logFile + ". exit..." << endl; - break; - } - } - } - - //this is here because a customer likes doing a shutdownsystem then startsystem in a script - sleep(5); - } - break; - - case 18: // startSystem - parameters: Ack flag - { - // startSystem Command - - //don't start if a disable module has a dbroot assigned to it - if (!checkForDisabledModules()) - { - cout << endl << "Error: startSystem command can't be performed: disabled module has a dbroot assigned to it" << endl; - break; - } - - // if columnstore service is down, then start system by starting all of the columnstore services - // this would be used after a shutdownSystem command - // if columnstore service is up, send message to ProcMgr to start system (which starts all processes) - - if (!oam.checkSystemRunning()) - { - cout << endl << "startSystem command, 'columnstore' service is down, sending command to" << endl; - cout << "start the 'columnstore' service on all modules" << endl << endl; - - SystemModuleTypeConfig systemmoduletypeconfig; - ModuleTypeConfig moduletypeconfig; - ModuleConfig moduleconfig; - systemmoduletypeconfig.moduletypeconfig.clear(); - int systemModuleNumber = 0; - - try - { - oam.getSystemConfig(systemmoduletypeconfig); - - for ( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++) - { - if ( systemmoduletypeconfig.moduletypeconfig[i].ModuleType.empty() ) - // end of list - break; - - systemModuleNumber = systemModuleNumber + systemmoduletypeconfig.moduletypeconfig[i].ModuleCount; - } - } - catch (exception& e) - { - cout << endl << "**** startSystem Failed = " << e.what() << endl; - break; - } - - if ( systemModuleNumber > 1 ) - { - if (arguments[1] != "") - password = arguments[1]; - else - password = "ssh"; - - // - // perform start of columnstore of other servers in the system - // - - DeviceNetworkList::iterator pt; - string modulename; - - for ( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++) - { - for (pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin(); - pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end(); - ++pt) - { - modulename = (*pt).DeviceName; - - if ( (*pt).DisableState == oam::MANDISABLEDSTATE || - (*pt).DisableState == oam::AUTODISABLEDSTATE ) - { - cout << " Module '" << modulename << "' is disabled and will not be started" << endl; - } - } - } - - cout << endl << " System being started, please wait..."; - cout.flush(); - bool FAILED = false; - - //hdfs / hadoop config - string DBRootStorageType; - - try - { - oam.getSystemConfig( "DBRootStorageType", DBRootStorageType); - } - catch (...) {} - - if ( DBRootStorageType == "hdfs") - { - string logFile = tmpDir + "/cc-restart.pdsh"; - cmd = "pdsh -a 'columnstore start' > " + logFile + " 2>&1"; - system(cmd.c_str()); - - if (oam.checkLogStatus(logFile, "exit") ) - { - cout << endl << "ERROR: Restart MariaDB ColumnStore Service failure, check " << logFile << ". exit..." << endl; - break; - } - } - else - { - for ( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++) - { - if ( systemmoduletypeconfig.moduletypeconfig[i].ModuleType.empty() ) - // end of list - break; - - int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount; - - if ( moduleCount == 0 ) - // skip if no modules - continue; - - for (pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin(); - pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end(); - ++pt) - { - modulename = (*pt).DeviceName; - - if ( (*pt).DisableState == oam::MANDISABLEDSTATE || - (*pt).DisableState == oam::AUTODISABLEDSTATE ) - { - continue; - } - - if ( modulename == localModule ) - { - cmd = "columnstore start > " + tmpDir + "/startSystem.log 2>&1"; - int rtnCode = system(cmd.c_str()); - - if (geteuid() == 0 && WEXITSTATUS(rtnCode) != 0) - { - cout << endl << "error with running 'columnstore start' on local module " << endl; - cout << endl << "**** startSystem Failed" << endl; - break; - } - - continue; - } - - HostConfigList::iterator pt1 = (*pt).hostConfigList.begin(); - - for ( ; pt1 != (*pt).hostConfigList.end() ; pt1++) - { - //run remote command script - cmd = "remote_command.sh " + (*pt1).IPAddr + " " + password + " 'columnstore start' 0"; - int rtnCode = system(cmd.c_str()); - - if (WEXITSTATUS(rtnCode) < 0) - { - cout << endl << "error with running 'columnstore start' on module " + modulename << endl; - cout << endl << "**** startSystem Failed" << endl; - - // stop local columnstore service - cmd = "columnstore stop > " + tmpDir + "/stop.log 2>&1"; - system(cmd.c_str()); - - FAILED = true; - break; - } - else - { - if (rtnCode > 0) - { - cout << endl << "Invalid Password when running 'columnstore start' on module " + modulename << ", can retry by providing password as the second argument" << endl; - cout << endl << "**** startSystem Failed" << endl; - - // stop local columnstore service - cmd = "columnstore stop > " + tmpDir + "/stop.log 2>&1"; - system(cmd.c_str()); - - FAILED = true; - break; - } - } - } - - if (FAILED) - break; - } - } - - if (FAILED) - break; - } - - if (FAILED) - break; - } - else - { - //just kick off local server - cout << endl << " System being started, please wait..."; - cout.flush(); - cmd = "columnstore start > " + tmpDir + "/startSystem.log 2>&1"; - int rtnCode = system(cmd.c_str()); - - if (geteuid() == 0 && WEXITSTATUS(rtnCode) != 0) - { - cout << endl << "error with running 'columnstore restart' on local module " << endl; - cout << endl << "**** startSystem Failed" << endl; - break; - } - } - - if ( waitForActive() ) - cout << endl << " Successful start of System " << endl << endl; - else - cout << endl << "**** startSystem Failed : check log files" << endl; - } - else - { - getFlags(arguments, gracefulTemp, ackTemp, suspendAnswer, bNeedsConfirm); - - try - { - cout << endl << " System being started, please wait..."; - cout.flush(); - oam.startSystem(ackTemp); - - if ( waitForActive() ) - cout << endl << " Successful start of System " << endl << endl; - else - cout << endl << "**** startSystem Failed : check log files" << endl; - } - catch (exception& e) - { - cout << endl << "**** startSystem Failed : " << e.what() << endl; - string Failed = e.what(); - - if (Failed.find("Database Test Error") != string::npos) - cout << "Database Test Error occurred, check Alarm and Logs for addition Information" << endl; - } - } - } - break; - - case 19: // restartSystem - parameters: graceful flag, Ack flag - { - getFlags(arguments, gracefulTemp, ackTemp, suspendAnswer, bNeedsConfirm, &password); - - //don't start if a disable module has a dbroot assigned to it - if (!checkForDisabledModules()) - { - cout << endl << "Error: restartSystem command can't be performed: disabled module has a dbroot assigned to it" << endl; - break; - } - - // if columnstore service is down, then start system by starting all of the columnstore services - // this would be used after a shutdownSystem command - // if columnstore service is up, send message to ProcMgr to start system (which starts all processes) - - if (!oam.checkSystemRunning()) - { - if (bNeedsConfirm) - { - if (confirmPrompt("")) // returns true if user wants to quit. - break; - } - - cout << "restartSystem command, 'columnstore' service is down, sending command to" << endl; - cout << "start the 'columnstore' service on all modules" << endl << endl; - - SystemModuleTypeConfig systemmoduletypeconfig; - ModuleTypeConfig moduletypeconfig; - ModuleConfig moduleconfig; - systemmoduletypeconfig.moduletypeconfig.clear(); - int systemModuleNumber = 0; - - try - { - oam.getSystemConfig(systemmoduletypeconfig); - - for ( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++) - { - if ( systemmoduletypeconfig.moduletypeconfig[i].ModuleType.empty() ) - // end of list - break; - - systemModuleNumber = systemModuleNumber + systemmoduletypeconfig.moduletypeconfig[i].ModuleCount; - } - } - catch (exception& e) - { - cout << endl << "**** restartSystem Failed = " << e.what() << endl; - break; - } - - if ( systemModuleNumber > 1 ) - { - if (password.empty()) - password = "ssh"; - - // - // perform start of columnstore of other servers in the system - // - - DeviceNetworkList::iterator pt; - string modulename; - - for ( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++) - { - for (pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin(); - pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end(); - ++pt) - { - modulename = (*pt).DeviceName; - - if ( (*pt).DisableState == oam::MANDISABLEDSTATE || - (*pt).DisableState == oam::AUTODISABLEDSTATE ) - { - cout << " Module '" << modulename << "' is disabled and will not be started" << endl; - } - } - } - - cout << endl << " System being started, please wait..."; - cout.flush(); - bool FAILED = false; - - //hdfs / hadoop config - string DBRootStorageType; - - try - { - oam.getSystemConfig( "DBRootStorageType", DBRootStorageType); - } - catch (...) {} - - if ( DBRootStorageType == "hdfs") - { - string logFile = tmpDir + "/cc-restart.pdsh"; - cmd = "pdsh -a 'columnstore restart' > " + logFile + " 2>&1"; - system(cmd.c_str()); - - if (oam.checkLogStatus(logFile, "exit") ) - { - cout << endl << "ERROR: Restart MariaDB ColumnStore Service failue, check " << logFile << ". exit..." << endl; - break; - } - } - else - { - for ( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++) - { - if ( systemmoduletypeconfig.moduletypeconfig[i].ModuleType.empty() ) - // end of list - break; - - int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount; - - if ( moduleCount == 0 ) - // skip if no modules - continue; - - for (pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin(); - pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end(); - ++pt) - { - modulename = (*pt).DeviceName; - - if ( (*pt).DisableState == oam::MANDISABLEDSTATE || - (*pt).DisableState == oam::AUTODISABLEDSTATE ) - { - continue; - } - - if ( modulename == localModule ) - continue; // do last - - HostConfigList::iterator pt1 = (*pt).hostConfigList.begin(); - - for ( ; pt1 != (*pt).hostConfigList.end() ; pt1++) - { - //run remote command script - cmd = "remote_command.sh " + (*pt1).IPAddr + " " + password + " 'columnstore restart' 0"; - - int rtnCode = system(cmd.c_str()); - - if (WEXITSTATUS(rtnCode) < 0) - { - cout << endl << "error with running 'columnstore start' on module " + modulename << endl; - cout << endl << "**** restartSystem Failed" << endl; - - // stop local columnstore service - cmd = "columnstore stop > " + tmpDir + "/stop.log 2>&1"; - system(cmd.c_str()); - - FAILED = true; - break; - } - else - { - if (rtnCode > 0) - { - cout << endl << "Invalid Password when running 'columnstore start' on module " + modulename << ", can retry by providing password as the second argument" << endl; - cout << endl << "**** restartSystem Failed" << endl; - FAILED = true; - - // stop local columnstore service - cmd = "columnstore stop > " + tmpDir + "/stop.log 2>&1"; - system(cmd.c_str()); - - break; - } - } - } - - if (FAILED) - break; - } - - if (FAILED) - break; - - //RESTART LOCAL HOST - cmd = "columnstore restart > " + tmpDir + "/start.log 2>&1"; - int rtnCode = system(cmd.c_str()); - - if (geteuid() == 0 && WEXITSTATUS(rtnCode) != 0) - { - cout << endl << "error with running 'columnstore restart' on local module " << endl; - cout << endl << "**** restartSystem Failed" << endl; - break; - } - } - - if (FAILED) - break; - } - } - else - { - //just kick off local server - cout << " System being restarted, please wait..."; - cout.flush(); - cmd = "columnstore restart > " + tmpDir + "/start.log 2>&1"; - int rtnCode = system(cmd.c_str()); - - if (WEXITSTATUS(rtnCode) != 0) - { - cout << endl << "error with running 'columnstore start' on local module " << endl; - cout << endl << "**** restartSystem Failed" << endl; - break; - } - } - - if ( waitForActive() ) - cout << endl << " Successful restart of System " << endl << endl; - else - cout << endl << "**** restartSystem Failed : check log files" << endl; - } - else - { - BRM::DBRM dbrm; - bool bDBRMReady = dbrm.isDBRMReady(); - - try - { - if (gracefulTemp != GRACEFUL || - !bDBRMReady || - dbrm.isReadWrite()) - { - suspendAnswer = FORCE; - } - - if (suspendAnswer == CANCEL) // We don't have an answer from the command line. - { - // If there are bulkloads, ddl or dml happening, Ask what to do. - bool bIsDbrmUp = true; - execplan::SessionManager sessionManager; - BRM::SIDTIDEntry blockingsid; - std::vector tableLocks = dbrm.getAllTableLocks(); - bool bActiveTransactions = false; - - if (!tableLocks.empty()) - { - oam.DisplayLockedTables(tableLocks, &dbrm); - bActiveTransactions = true; - } - - if (sessionManager.checkActiveTransaction(0, bIsDbrmUp, blockingsid)) - { - cout << endl << "There are active transactions being processed" << endl; - bActiveTransactions = true; - } - - if (bActiveTransactions) - { - suspendAnswer = AskSuspendQuestion(CmdID); - // if (suspendAnswer == FORCE) - // { - // if (confirmPrompt("Force may cause data problems and should only be used in extreme circumstances")) - // { - // break; - // } - // } - bNeedsConfirm = false; - } - else - { - suspendAnswer = FORCE; - } - } - - if (suspendAnswer == CANCEL) - { - // We're outa here. - break; - } - - if (bNeedsConfirm) - { - if (confirmPrompt("")) - break; - } - - switch (suspendAnswer) - { - case WAIT: - cout << endl << " Waiting for all transactions to complete" << flush; - dbrm.setSystemShutdownPending(true, false, false); - gracefulTemp = GRACEFUL_WAIT; // Causes procmgr to wait for all table locks to free and all transactions to finish before shutdown - break; - - case ROLLBACK: - cout << endl << " Rollback of all transactions" << flush; - dbrm.setSystemShutdownPending(true, true, false); - gracefulTemp = GRACEFUL_WAIT; // Causes procmgr to wait for all table locks to free and all transactions to finish before shutdown - break; - - case FORCE: - cout << endl << " System being restarted now ..." << flush; - - if (bDBRMReady) - { - dbrm.setSystemShutdownPending(true, false, true); - } - - break; - - case CANCEL: - break; - } - - int returnStatus = oam.restartSystem(gracefulTemp, ackTemp); - - switch (returnStatus) - { - case API_SUCCESS: - if ( waitForActive() ) - cout << endl << " Successful restart of System " << endl << endl; - else - cout << endl << "**** restartSystem Failed : check log files" << endl; - - break; - - case API_CANCELLED: - cout << endl << " Restart of System canceled" << endl << endl; - break; - - default: - cout << endl << "**** restartSystem Failed : Check system logs" << endl; - break; - } - } - catch (exception& e) - { - cout << endl << "**** restartSystem Failed : " << e.what() << endl; - string Failed = e.what(); - - if (Failed.find("Database Test Error") != string::npos) - cout << "Database Test Error occurred, check Alarm and Logs for additional Information" << endl; - } - } - } - break; - - case 20: // getSystemStatus - parameters: NONE - { - try - { - printSystemStatus(); - } - catch (...) - { - break; - } - - } - break; - - case 21: // getProcessStatus - parameters: NONE - { - try - { - printProcessStatus(); - } - catch (...) - { - break; - } - } - break; - - case 22: // system - UNIX system command - { - if (arguments[1] == "") - { - // need arguments - cout << endl << "**** system Failed : Missing a required Parameter, enter 'help' for additional information" << endl; - break; - } - - for (int j = 2; j < ArgNum; j++) - { - arguments[1].append(" "); - arguments[1].append(arguments[j]); - } - - system (arguments[1].c_str()); - } - break; - - case 23: // getAlarmHistory - { - if (arguments[1] == "") - { - // need arguments - cout << endl << "**** getAlarmHistory Failed : Missing a required Parameter, enter 'help' for additional information" << endl; - break; - } - - if ( arguments[1].size() != 8 ) - { - cout << "date not in correct format, enter MM/DD/YY" << endl; - break; - } - - if ( !(arguments[1].substr(2, 1) == "/" && arguments[1].substr(5, 1) == "/") ) - { - cout << "date not in correct format, enter MM/DD/YY" << endl; - break; - } - - AlarmList alarmList; - - try - { - oam.getAlarms(arguments[1], alarmList); - } - catch (exception& e) - { - cout << endl << "**** getAlarms Failed = " << e.what() << endl; - break; - } - - cout << endl << "Historical Alarm List for " + arguments[1] + " :" << endl << endl; - - AlarmList :: iterator i; - int counter = 0; - - for (i = alarmList.begin(); i != alarmList.end(); ++i) - { - // SET = 1, CLEAR = 0 - if (i->second.getState() == true) - { - cout << "SET" << endl; - } - else - { - cout << "CLEAR" << endl; - } - - cout << "AlarmID = " << i->second.getAlarmID() << endl; - cout << "Brief Description = " << i->second.getDesc() << endl; - cout << "Alarm Severity = "; - - switch (i->second.getSeverity()) - { - case CRITICAL: - cout << "CRITICAL" << endl; - break; - - case MAJOR: - cout << "MAJOR" << endl; - break; - - case MINOR: - cout << "MINOR" << endl; - break; - - case WARNING: - cout << "WARNING" << endl; - break; - - case INFORMATIONAL: - cout << "INFORMATIONAL" << endl; - break; - } - - cout << "Time Issued = " << i->second.getTimestamp() << endl; - cout << "Reporting Module = " << i->second.getSname() << endl; - cout << "Reporting Process = " << i->second.getPname() << endl; - cout << "Reported Device = " << i->second.getComponentID() << endl << endl; - - counter++; - - if ( counter > 4 ) - { - // continue prompt - if (confirmPrompt("Displaying Alarm History")) - break; - - counter = 0; - } - } - } - break; - - case 24: // monitorAlarms - { - cout << endl << "Monitor for System Alarms" << endl; - cout << " Enter control-C to return to command line" << endl << endl; - - cmd = "tail -n 0 -f " + alarmmanager::ALARM_FILE; - system(cmd.c_str()); - } - break; - - case 25: // resetAlarm - { - if (arguments[1] == "") - { - // need 3 arguments - cout << endl << "**** resetAlarm Failed : Missing a required Parameter, enter 'help' for additional information" << endl; - break; - } - - try - { - // check if requested alarm is Active - AlarmList alarmList; - Oam oam; - - try - { - oam.getActiveAlarms(alarmList); - } - catch (exception& e) - { - cout << endl << "**** getActiveAlarm Failed = " << e.what() << endl; - break; - } - - bool found = false; - AlarmList::iterator i; - - for (i = alarmList.begin(); i != alarmList.end(); ++i) - { - // check if matching ID - if ( arguments[1] != "ALL" ) - { - if (atoi(arguments[1].c_str()) != (i->second).getAlarmID() ) - continue; - - if ( arguments[2] != "ALL") - { - if (arguments[2].compare((i->second).getSname()) != 0) - continue; - - if ( arguments[3] != "ALL") - { - if (arguments[3].compare((i->second).getComponentID()) != 0 ) - continue; - } - } - } - - ALARMManager aManager; - aManager.sendAlarmReport((i->second).getComponentID().c_str(), - (i->second).getAlarmID(), - CLEAR, - (i->second).getSname(), - "mcsadmin"); - - cout << endl << " Alarm Successfully Reset: "; - cout << "ID = " << oam.itoa((i->second).getAlarmID()); - cout << " / Module = " << (i->second).getSname(); - cout << " / Device = " << (i->second).getComponentID() << endl; - found = true; - } - - // check is a SET alarm was found, if not return - if (!found) - { - cout << endl << "**** resetAlarm Failed : Requested Alarm is not Set" << endl; - break; - } - } - catch (exception& e) - { - cout << endl << "**** resetAlarm Failed = " << e.what() << endl; - break; - } - } - break; - - case 26: // enableLog - { - if (arguments[2] == "") - { - // need 2 arguments - cout << endl << "**** Failed : enableLog Missing a required Parameter, enter 'help' for additional information" << endl; - break; - } - - // covert second argument (level) into lowercase - transform (arguments[2].begin(), arguments[2].end(), arguments[2].begin(), to_lower()); - - try - { - oam.updateLog(ENABLEDSTATE, arguments[1], arguments[2]); - cout << endl << " Successful Enabling of Logging " << endl << endl; - } - catch (exception& e) - { - cout << endl << "**** enableLog Failed : " << e.what() << endl; - } - } - break; - - case 27: // disableLog - { - if (arguments[2] == "") - { - // need 2 arguments - cout << endl << "**** disableLog Failed : Missing a required Parameter, enter 'help' for additional information" << endl; - break; - } - - // covert second argument (level) into lowercase - transform (arguments[2].begin(), arguments[2].end(), arguments[2].begin(), to_lower()); - - try - { - oam.updateLog(MANDISABLEDSTATE, arguments[1], arguments[2]); - cout << endl << " Successful Disabling of Logging " << endl << endl; - } - catch (exception& e) - { - cout << endl << "**** disableLog Failed : " << e.what() << endl; - } - } - break; - - case 28: // switchParentOAMModule - { - BRM::DBRM dbrm; - bool bDBRMReady = dbrm.isDBRMReady(); - string module; - bool bUseHotStandby = true; - SystemStatus systemstatus; - Oam oam; - - //first check that the system is in a ACTIVE OR MAN_OFFLINE STATE - try - { - oam.getSystemStatus(systemstatus); - - if (systemstatus.SystemOpState == ACTIVE || - systemstatus.SystemOpState == MAN_OFFLINE) - { - module = ""; - } - else - { - cout << endl << "**** switchParentOAMModule Failed : System Status needs to be ACTIVE or MAN_OFFLINE" << endl; - break; - } - } - catch (...) - {} - - // First get the values for the standard arguments - getFlags(arguments, gracefulTemp, ackTemp, suspendAnswer, bNeedsConfirm); - - // Now check for arguments unique to this command. In this case, a valid - // module name. - for (int i = 1; i < ArgNum; i++) - { - if (arguments[i].size() > 0) - { - if (oam.validateModule(arguments[i]) == API_SUCCESS) - { - module = arguments[i]; - bUseHotStandby = false; - break; - } - } - } - - //check if there are more than 1 pm modules to start with - ModuleTypeConfig moduletypeconfig; - oam.getSystemConfig("pm", moduletypeconfig); - - if ( moduletypeconfig.ModuleCount < 2 ) - { - cout << endl << "**** switchParentOAMModule Failed : Command only support on systems with Multiple Performance Modules" << endl; -// break; - } - - string DBRootStorageType; - - try - { - oam.getSystemConfig("DBRootStorageType", DBRootStorageType); - } - catch (...) {} - - string DataRedundancyConfig = "n"; - - try - { - oam.getSystemConfig( "DataRedundancyConfig", DataRedundancyConfig); - } - catch (...) - {} - - if (DBRootStorageType == "internal" && DataRedundancyConfig == "n") - { - cout << endl << "**** switchParentOAMModule Failed : DBRoot Storage type = internal/non-data-replication" << endl; - break; - } - - string ParentOAMModuleName; - - try - { - oam.getSystemConfig("ParentOAMModuleName", ParentOAMModuleName); - } - catch (...) {} - - if (bUseHotStandby) - { - oam.getSystemConfig("StandbyOAMModuleName", module); - - if ( module.empty() || module == oam::UnassignedName ) - { - cout << endl << "**** switchParentOAMModule Failed : There's no hot standby defined" << endl << " enter a Performance Module" << endl; - break; - } - - cout << endl << "Switching to the Hot-Standby Parent OAM Module '" << module << "'" << endl; - } - else - { - parentOAMModule = getParentOAMModule(); - - if ( module == parentOAMModule ) - { - cout << endl << "**** switchParentOAMModule Failed : " << module << " is already the Active Parent OAM Module" << endl; - break; - } - - cout << endl << "Switching to the Performance Module '" << module << "'" << endl; - } - - //check for gluster system is do-able - if (DataRedundancyConfig == "y") - { - // get to-module assigned DBRoots and see if current active PM - // has a copy - - DBRootConfigList toPMbrootConfigList; - - try - { - string moduleID = module.substr(MAX_MODULE_TYPE_SIZE, MAX_MODULE_ID_SIZE); - oam.getPmDbrootConfig(atoi(moduleID.c_str()), toPMbrootConfigList); - - bool match = false; - DBRootConfigList::iterator pt = toPMbrootConfigList.begin(); - - for ( ; pt != toPMbrootConfigList.end() ; pt++) - { - // check if ACTIVE PM has a copy of Dbroot - string pmList = ""; - - try - { - string errmsg; - int ret = oam.glusterctl(oam::GLUSTER_WHOHAS, oam.itoa(*pt), pmList, errmsg); - - if ( ret != 0 ) - { - cout << endl << "**** switchParentOAMModule Failed : " << module << " glusterctl error" << endl; - break; - } - } - catch (...) - { - cout << endl << "**** switchParentOAMModule Failed : " << module << " glusterctl error" << endl; - break; - } - - boost::char_separator sep(" "); - boost::tokenizer< boost::char_separator > tokens(pmList, sep); - - for ( boost::tokenizer< boost::char_separator >::iterator it1 = tokens.begin(); - it1 != tokens.end(); - ++it1) - { - string pmModule = "pm" + *it1; - - if ( pmModule == ParentOAMModuleName ) - { - match = true; - break; - } - } - } - - if (!match) - { - cout << endl << "**** switchParentOAMModule Failed : The Current Active PM doesn't have a copy of any DBROOTs that reside on the Siwtching PM " << endl; - break; - } - - //check if switching to PM has DBROOT 1 - string pmList = ""; - - try - { - string errmsg; - int ret = oam.glusterctl(oam::GLUSTER_WHOHAS, "1", pmList, errmsg); - - if ( ret != 0 ) - { - cout << endl << "**** switchParentOAMModule Failed : " << module << " glusterctl error" << endl; - break; - } - } - catch (...) - { - cout << endl << "**** switchParentOAMModule Failed : " << module << " glusterctl error" << endl; - break; - } - - match = false; - boost::char_separator sep(" "); - boost::tokenizer< boost::char_separator > tokens(pmList, sep); - - for ( boost::tokenizer< boost::char_separator >::iterator it1 = tokens.begin(); - it1 != tokens.end(); - ++it1) - { - string pmModule = "pm" + *it1; - - if ( pmModule == module ) - { - match = true; - break; - } - } - - if (!match) - { - cout << endl << "**** switchParentOAMModule Failed : The Switching to PM doesn't have a copy of the DBROOT #1" << endl; - break; - } - } - catch (exception& e) - { - cout << endl << "**** getPmDbrootConfig Failed for '" << module << "' : " << e.what() << endl; - break; - } - } - - - if (bNeedsConfirm) - { - // confirm request - if (confirmPrompt("This command switches the Active Parent OAM Module and should only be executed on an idle system.")) - break; - } - - string MySQLRep; - - try - { - oam.getSystemConfig("MySQLRep", MySQLRep); - } - catch (...) {} - - try - { - cout << endl << " Check for active transactions" << endl; - - if (!bDBRMReady || - dbrm.isReadWrite() != 0) - { - suspendAnswer = FORCE; - } - - if (suspendAnswer == CANCEL) // We don't have an answer from the command line. - { - // If there are bulkloads, ddl or dml happening, Ask what to do. - bool bIsDbrmUp = true; - execplan::SessionManager sessionManager; - BRM::SIDTIDEntry blockingsid; - std::vector tableLocks = dbrm.getAllTableLocks(); - bool bActiveTransactions = false; - - if (!tableLocks.empty()) - { - oam.DisplayLockedTables(tableLocks, &dbrm); - bActiveTransactions = true; - } - - if (sessionManager.checkActiveTransaction(0, bIsDbrmUp, blockingsid)) - { - cout << endl << "There are active transactions being processed" << endl; - bActiveTransactions = true; - } - - if (bActiveTransactions) - { - suspendAnswer = AskSuspendQuestion(CmdID); - // if (suspendAnswer == FORCE) - // { - // if (confirmPrompt("Force may cause data problems and should only be used in extreme circumstances")) - // { - // break; - // } - // } - } - else - { - suspendAnswer = FORCE; - } - } - - if (suspendAnswer == CANCEL) - { - // We're outa here. - break; - } - - switch (suspendAnswer) - { - case WAIT: - cout << endl << " Waiting for all transactions to complete" << flush; - dbrm.setSystemShutdownPending(true, false, false); - gracefulTemp = GRACEFUL_WAIT; // Causes procmgr to wait for all table locks to free and all transactions to finish before shutdown - break; - - case ROLLBACK: - cout << endl << " Rollback of all transactions" << flush; - dbrm.setSystemShutdownPending(true, true, false); - gracefulTemp = GRACEFUL_WAIT; // Causes procmgr to wait for all table locks to free and all transactions to finish before shutdown - break; - - case FORCE: - cout << endl << " Switch Active Parent OAM Module starting..." << endl; - - if (bDBRMReady) - { - dbrm.setSystemShutdownPending(true, false, true); - } - - break; - - case CANCEL: - break; - } - - if (oam.switchParentOAMModule(module, gracefulTemp)) - { - if (waitForActive()) - { - // give time for new ProcMgr to go active - sleep (10); - cout << endl << " Successful Switch Active Parent OAM Module" << endl << endl; - } - else - cout << endl << "**** Switch Active Parent OAM Module failed : check log files" << endl; - } - else - { - // give time for new ProcMgr to go active - sleep (10); - cout << endl << " Successful Switch Active Parent OAM Module" << endl << endl; - } - } - catch (exception& e) - { - cout << endl << "**** switchParentOAMModule Failed : " << e.what() << endl; - break; - } - } - break; - - case 29: // getStorageStatus - { - SystemStatus systemstatus; - Oam oam; - - cout << "System External DBRoot Storage Statuses" << endl << endl; - cout << "Component Status Last Status Change" << endl; - cout << "------------ -------------------------- ------------------------" << endl; - - try - { - oam.getSystemStatus(systemstatus, false); - - if ( systemstatus.systemdbrootstatus.dbrootstatus.size() == 0 ) - { - cout << " No External DBRoot Storage Configured" << endl; - break; - } - - for ( unsigned int i = 0 ; i < systemstatus.systemdbrootstatus.dbrootstatus.size(); i++) - { - if ( systemstatus.systemdbrootstatus.dbrootstatus[i].Name.empty() ) - // end of list - break; - - cout << "DBRoot #"; - cout.setf(ios::left); - cout.width(6); - cout << systemstatus.systemdbrootstatus.dbrootstatus[i].Name; - cout.width(29); - int state = systemstatus.systemdbrootstatus.dbrootstatus[i].OpState; - printState(state, " "); - cout.width(24); - string stime = systemstatus.systemdbrootstatus.dbrootstatus[i].StateChangeDate ; - stime = stime.substr (0, 24); - cout << stime << endl; - } - - cout << endl; - } - catch (exception& e) - { - cout << endl << "**** getSystemStatus Failed = " << e.what() << endl; - } - - string DataRedundancyConfig; - int DataRedundancyCopies; - string DataRedundancyStorageType; - - try - { - oam.getSystemConfig("DataRedundancyConfig", DataRedundancyConfig); - oam.getSystemConfig("DataRedundancyCopies", DataRedundancyCopies); - oam.getSystemConfig("DataRedundancyStorageType", DataRedundancyStorageType); - } - catch (...) {} - - if ( DataRedundancyConfig == "y" ) - { - string arg1 = ""; - string arg2 = ""; - string errmsg = ""; - int ret = oam.glusterctl(oam::GLUSTER_STATUS, arg1, arg2, errmsg); - - if ( ret == 0 ) - { - cout << arg2 << endl; - } - else - { - cerr << "FAILURE: Status check error: " + errmsg << endl; - } - } - } - break; - - case 30: // getLogConfig - { - try - { - SystemLogConfigData systemconfigdata; - LogConfigData logconfigdata; - - oam.getLogConfig(systemconfigdata); - - string configFileName; - oam.getSystemConfig("SystemLogConfigFile", configFileName); - - cout << endl << "MariaDB ColumnStore System Log Configuration Data" << endl << endl; - - cout << "System Logging Configuration File being used: " << configFileName << endl << endl; - - cout << "Module Configured Log Levels" << endl; - cout << "------ ---------------------------------------" << endl; - - SystemLogConfigData::iterator pt = systemconfigdata.begin(); - - for (; pt != systemconfigdata.end() ; pt++) - { - logconfigdata = *pt; - string module = logconfigdata.moduleName; - int data = logconfigdata.configData; - - if ( data < API_MAX ) - { - // failure API status returned - cout.setf(ios::left); - cout.width(10); - cout << logconfigdata.moduleName; - cout << "getLogConfig Failed - Error : " << data << endl; - } - else - { - cout.setf(ios::left); - cout.width(10); - cout << logconfigdata.moduleName; - - data = data - API_MAX; - - if ( data == 0 ) - // no level configured - cout << "None Configured" << endl; - else - { - if ( ((data & LEVEL_CRITICAL) ? 1 : 0) == 1 ) - cout << "Critical "; - - if ( ((data & LEVEL_ERROR) ? 1 : 0) == 1 ) - cout << "Error "; - - if ( ((data & LEVEL_WARNING) ? 1 : 0) == 1 ) - cout << "Warning "; - - if ( ((data & LEVEL_INFO) ? 1 : 0) == 1 ) - cout << "Info "; - - if ( ((data & LEVEL_DEBUG) ? 1 : 0) == 1 ) - cout << "Debug "; - - cout << endl; - } - } - } - } - catch (exception& e) - { - cout << endl << "**** getLogConfig Failed : " << e.what() << endl; - break; - } - - } - break; - - case 31: // movePmDbrootConfig parameters: pm-reside dbroot-list pm-to - { - if ( localModule != parentOAMModule ) - { - // exit out since not on active module - cout << endl << "**** movePmDbrootConfig Failed : Can only run command on Active OAM Parent Module (" << parentOAMModule << ")." << endl; - break; - } - - //check the system status / service status and only allow command when System is MAN_OFFLINE - if (oam.checkSystemRunning()) - { - SystemStatus systemstatus; - - try - { - oam.getSystemStatus(systemstatus); - - if (systemstatus.SystemOpState != oam::MAN_OFFLINE ) - { - cout << endl << "**** movePmDbrootConfig Failed, System has to be in a MAN_OFFLINE state, stop system first" << endl; - break; - } - } - catch (exception& e) - { - cout << endl << "**** movePmDbrootConfig Failed : " << e.what() << endl; - break; - } - catch (...) - { - cout << endl << "**** movePmDbrootConfig Failed, Failed return from getSystemStatus API" << endl; - break; - } - } - - if (arguments[3] == "") - { - // need arguments - cout << endl << "**** movePmDbrootConfig Failed : Missing a required Parameter, enter 'help' for additional information" << endl; - break; - } - - string residePM = arguments[1]; - string dbrootIDs = arguments[2]; - string toPM = arguments[3]; - - string residePMID = residePM.substr(MAX_MODULE_TYPE_SIZE, MAX_MODULE_ID_SIZE);; - string toPMID = toPM.substr(MAX_MODULE_TYPE_SIZE, MAX_MODULE_ID_SIZE);; - - // check module status - try - { - bool degraded; - int opState; - oam.getModuleStatus(toPM, opState, degraded); - - if (opState == oam::AUTO_DISABLED || - opState == oam::MAN_DISABLED) - { - cout << "**** movePmDbrootConfig Failed: " << toPM << " is DISABLED." << endl; - cout << "Run alterSystem-EnableModule to enable module" << endl; - break; - } - - if (opState == oam::FAILED) - { - cout << "**** movePmDbrootConfig Failed: " << toPM << " is in a FAILED state." << endl; - break; - } - } - catch (exception& ex) - {} - - bool moveDBRoot1 = false; - bool found = false; - boost::char_separator sep(", "); - boost::tokenizer< boost::char_separator > tokens(dbrootIDs, sep); - - for ( boost::tokenizer< boost::char_separator >::iterator it = tokens.begin(); - it != tokens.end(); - ++it) - { - if (*it == "1" ) - { - moveDBRoot1 = true; - break; - } - - //if gluster, check if toPM is has a copy - string DataRedundancyConfig; - - try - { - oam.getSystemConfig("DataRedundancyConfig", DataRedundancyConfig); - } - catch (...) {} - - if ( DataRedundancyConfig == "y" ) - { - string pmList = ""; - - try - { - string errmsg; - oam.glusterctl(oam::GLUSTER_WHOHAS, *it, pmList, errmsg); - } - catch (...) - {} - - boost::char_separator sep(" "); - boost::tokenizer< boost::char_separator > tokens(pmList, sep); - - for ( boost::tokenizer< boost::char_separator >::iterator it1 = tokens.begin(); - it1 != tokens.end(); - ++it1) - { - if ( *it1 == toPMID ) - { - found = true; - break; - } - } - - if (!found) - { - cout << endl << "**** movePmDbrootConfig Failed : Data Redundancy Configured, DBRoot #" << *it << " doesn't have a copy on " << toPM << endl; - cout << "Run getStorageConfig to get copy information" << endl << endl; - break; - } - } - else - found = true; - } - - if (moveDBRoot1) - { - cout << endl << "**** movePmDbrootConfig Failed : Can't move dbroot #1" << endl << endl; - break; - } - - if (!found) - { - break; - } - - - if (residePM.find("pm") == string::npos ) - { - cout << endl << "**** movePmDbrootConfig Failed : Parmameter 1 is not a Performance Module name, enter 'help' for additional information" << endl; - break; - } - - if (toPM.find("pm") == string::npos ) - { - cout << endl << "**** movePmDbrootConfig Failed : Parmameter 3 is not a Performance Module name, enter 'help' for additional information" << endl; - break; - } - - if (residePM == toPM ) - { - cout << endl << "**** movePmDbrootConfig Failed : Reside and To Performance Modules are the same" << endl; - break; - } - - //get dbroots ids for reside PM - DBRootConfigList residedbrootConfigList; - - try - { - oam.getPmDbrootConfig(atoi(residePMID.c_str()), residedbrootConfigList); - - cout << endl << "DBRoot IDs currently assigned to '" + residePM + "' = "; - - DBRootConfigList::iterator pt = residedbrootConfigList.begin(); - - for ( ; pt != residedbrootConfigList.end() ;) - { - cout << oam.itoa(*pt); - pt++; - - if (pt != residedbrootConfigList.end()) - cout << ", "; - } - - cout << endl; - } - catch (exception& e) - { - cout << endl << "**** getPmDbrootConfig Failed for '" << residePM << "' : " << e.what() << endl; - break; - } - - //get dbroots ids for reside PM - DBRootConfigList todbrootConfigList; - - try - { - oam.getPmDbrootConfig(atoi(toPMID.c_str()), todbrootConfigList); - - cout << "DBRoot IDs currently assigned to '" + toPM + "' = "; - - DBRootConfigList::iterator pt = todbrootConfigList.begin(); - - for ( ; pt != todbrootConfigList.end() ;) - { - cout << oam.itoa(*pt); - pt++; - - if (pt != todbrootConfigList.end()) - cout << ", "; - } - - cout << endl; - } - catch (exception& e) - { - cout << endl << "**** getPmDbrootConfig Failed for '" << toPM << "' : " << e.what() << endl; - break; - } - - cout << endl << "DBroot IDs being moved, please wait..." << endl << endl; - - try - { - oam.manualMovePmDbroot(residePM, dbrootIDs, toPM); - } - catch (...) - { - cout << endl << "**** manualMovePmDbroot Failed : API Failure" << endl; - break; - } - - //get dbroots ids for reside PM - try - { - residedbrootConfigList.clear(); - oam.getPmDbrootConfig(atoi(residePMID.c_str()), residedbrootConfigList); - - cout << "DBRoot IDs newly assigned to '" + residePM + "' = "; - - DBRootConfigList::iterator pt = residedbrootConfigList.begin(); - - for ( ; pt != residedbrootConfigList.end() ;) - { - cout << oam.itoa(*pt); - pt++; - - if (pt != residedbrootConfigList.end()) - cout << ", "; - } - - cout << endl; - } - catch (exception& e) - { - cout << endl << "**** getPmDbrootConfig Failed for '" << toPM << "' : " << e.what() << endl; - break; - } - - try - { - todbrootConfigList.clear(); - oam.getPmDbrootConfig(atoi(toPMID.c_str()), todbrootConfigList); - - cout << "DBRoot IDs newly assigned to '" + toPM + "' = "; - - DBRootConfigList::iterator pt = todbrootConfigList.begin(); - - for ( ; pt != todbrootConfigList.end() ;) - { - cout << oam.itoa(*pt); - pt++; - - if (pt != todbrootConfigList.end()) - cout << ", "; - } - - cout << endl; - } - catch (exception& e) - { - cout << endl << "**** getPmDbrootConfig Failed for '" << toPM << "' : " << e.what() << endl; - break; - } - - } - break; - - case 32: // suspendDatabaseWrites - { - BRM::DBRM dbrm; - getFlags(arguments, gracefulTemp, ackTemp, suspendAnswer, bNeedsConfirm); - - cout << endl << "This command suspends the DDL/DML writes to the MariaDB ColumnStore Database" << endl; - - try - { - - if (!dbrm.isDBRMReady()) - { - cout << endl << " The Controller Node is not responding.\n The system can't be set into write suspend mode" << endl << flush; - break; - } - else if (dbrm.isReadWrite() != 0) - { - suspendAnswer = FORCE; - } - - // If there are bulkloads, ddl or dml happening, refuse the request - if (suspendAnswer == CANCEL) // We don't have an answer from the command line. - { - // If there are bulkloads, ddl or dml happening, Ask what to do. - bool bIsDbrmUp = true; - execplan::SessionManager sessionManager; - BRM::SIDTIDEntry blockingsid; - std::vector tableLocks = dbrm.getAllTableLocks(); - bool bActiveTransactions = false; - - if (!tableLocks.empty()) - { - oam.DisplayLockedTables(tableLocks, &dbrm); - bActiveTransactions = true; - } - - if (sessionManager.checkActiveTransaction(0, bIsDbrmUp, blockingsid)) - { - cout << endl << "There are active transactions being processed" << endl; - bActiveTransactions = true; - } - - if (bActiveTransactions) - { - suspendAnswer = AskSuspendQuestion(CmdID); - // if (suspendAnswer == FORCE) - // { - // if (confirmPrompt("Force may cause data problems and should only be used in extreme circumstances")) - // { - // break; - // } - // } - bNeedsConfirm = false; - } - else - { - suspendAnswer = FORCE; - } - } - - if (suspendAnswer == CANCEL) - { - // We're outa here. - break; - } - - if (bNeedsConfirm) - { - if (confirmPrompt("")) - break; - } - - switch (suspendAnswer) - { - case WAIT: - cout << endl << " Waiting for all transactions to complete" << flush; - dbrm.setSystemSuspendPending(true, false); - gracefulTemp = GRACEFUL_WAIT; // Causes procmgr to wait for all table locks to free and all transactions to finish before shutdown - break; - - case ROLLBACK: - cout << endl << " Rollback of all transactions" << flush; - dbrm.setSystemSuspendPending(true, true); - gracefulTemp = GRACEFUL_WAIT; // Causes procmgr to wait for all table locks to free and all transactions to finish before shutdown - break; - - case FORCE: - case CANCEL: - default: - gracefulTemp = FORCEFUL; - break; - } - - // stop writes to MariaDB ColumnStore Database - oam.SuspendWrites(gracefulTemp, ackTemp); - } - catch (exception& e) - { - cout << endl << "**** stopDatabaseWrites Failed: " << e.what() << endl; - } - catch (...) - { - cout << endl << "**** stopDatabaseWrites Failed" << endl; - break; - } - - break; - } - - case 33: // resumeDatabaseWrites - { - if ( arguments[1] != "y" ) - { - if (confirmPrompt("This command resumes the DDL/DML writes to the MariaDB ColumnStore Database")) - break; - } - - // resume writes to MariaDB ColumnStore Database - - try - { - BRM::DBRM dbrm; - - dbrm.setSystemSuspended(false); - - - - - - oam.setSystemStatus(ACTIVE); - cout << endl << "Resume MariaDB ColumnStore Database Writes Request successfully completed" << endl; - } - catch (exception& e) - { - cout << endl << "**** resumeDatabaseWrites Failed: " << e.what() << endl; - } - catch (...) - { - cout << endl << "**** resumeDatabaseWrites Failed" << endl; - break; - } - - break; - } - - case 34: // unassignDbrootPmConfig parameters: dbroot-list reside-pm - { - string DataRedundancyConfig = "n"; - - try - { - oam.getSystemConfig( "DataRedundancyConfig", DataRedundancyConfig); - } - catch (...) - {} - - if (DataRedundancyConfig == "y") - { - cout << endl << "**** unassignDbrootPmConfig : command not supported on Data Redundancy configured system. " << endl; - break; - } - - if ( localModule != parentOAMModule ) - { - // exit out since not on active module - cout << endl << "**** unassignDbrootPmConfig Failed : Can only run command on Active OAM Parent Module (" << parentOAMModule << ")." << endl; - break; - } - - - if (arguments[2] == "") - { - // need atleast 2 arguments - cout << endl << "**** unassignDbrootPmConfig Failed : Missing a required Parameter, enter 'help' for additional information" << endl; - break; - } - - string dbrootIDs = arguments[1]; - string residePM = arguments[2]; - - if (arguments[2].find("pm") == string::npos ) - { - cout << endl << "**** unassignDbrootPmConfig Failed : Parmameter 2 is not a Performance Module name, enter 'help' for additional information" << endl; - break; - } - - // check module status - try - { - bool degraded; - int opState; - oam.getModuleStatus(residePM, opState, degraded); - - if (opState != oam::MAN_OFFLINE) - { - cout << endl << "**** unassignDbrootPmConfig Failed, " + residePM + " has to be in a MAN_OFFLINE state" << endl; - break; - } - - } - catch (exception& ex) - {} - - DBRootConfigList dbrootlist; - - boost::char_separator sep(", "); - boost::tokenizer< boost::char_separator > tokens(dbrootIDs, sep); - - for ( boost::tokenizer< boost::char_separator >::iterator it = tokens.begin(); - it != tokens.end(); - ++it) - { - dbrootlist.push_back(atoi((*it).c_str())); - } - - cout << endl; - - //get dbroots ids for reside PM - try - { - oam.unassignDbroot(residePM, dbrootlist); - - cout << endl << " Successfully Unassigned DBRoots " << endl << endl; - - } - catch (exception& e) - { - cout << endl << "**** Failed Unassign of DBRoots: " << e.what() << endl; - break; - } - } - break; - - case 35: // assignDbrootPmConfig parameters: pm dbroot-list - { - string DataRedundancyConfig = "n"; - - try - { - oam.getSystemConfig( "DataRedundancyConfig", DataRedundancyConfig); - } - catch (...) - {} - - if (DataRedundancyConfig == "y") - { - cout << endl << "**** assignDbrootPmConfig : command not supported on Data Redundancy configured system. " << endl; - break; - } - - if ( localModule != parentOAMModule ) - { - // exit out since not on active module - cout << endl << "**** assignDbrootPmConfig Failed : Can only run command on Active OAM Parent Module (" << parentOAMModule << ")." << endl; - break; - } - - //check the system status / service status and only allow command when System is MAN_OFFLINE - if (!oam.checkSystemRunning()) - { - cout << endl << "**** assignDbrootPmConfig Failed, System is down. Needs to be running" << endl; - break; - } - - if (arguments[2] == "") - { - // need atleast 2 arguments - cout << endl << "**** assignDbrootPmConfig Failed : Missing a required Parameter, enter 'help' for additional information" << endl; - break; - } - - string dbrootIDs = arguments[1]; - string toPM = arguments[2]; - - if (arguments[2].find("pm") == string::npos ) - { - cout << endl << "**** assignDbrootPmConfig Failed : Parmameter 2 is not a Performance Module name, enter 'help' for additional information" << endl; - break; - } - - // check module status - try - { - bool degraded; - int opState; - oam.getModuleStatus(toPM, opState, degraded); - - if (opState == oam::AUTO_DISABLED || - opState == oam::MAN_DISABLED) - { - cout << "**** assignDbrootPmConfig Failed: " << toPM << " is DISABLED." << endl; - cout << "Run alterSystem-EnableModule to enable module" << endl; - break; - } - - if (!opState == oam::MAN_OFFLINE) - { - cout << "**** assignDbrootPmConfig Failed: " << toPM << " needs to be MAN_OFFLINE." << endl; - break; - } - } - catch (exception& ex) - {} - - DBRootConfigList dbrootlist; - - boost::char_separator sep(", "); - boost::tokenizer< boost::char_separator > tokens(dbrootIDs, sep); - - for ( boost::tokenizer< boost::char_separator >::iterator it = tokens.begin(); - it != tokens.end(); - ++it) - { - dbrootlist.push_back(atoi((*it).c_str())); - } - - cout << endl; - - //get dbroots ids for reside PM - try - { - oam.assignDbroot(toPM, dbrootlist); - - cout << endl << " Successfully Assigned DBRoots " << endl << endl; - - try - { - string DBRootStorageType; - oam.getSystemConfig("DBRootStorageType", DBRootStorageType); - - if (DBRootStorageType == "external" ) - { - string DataRedundancyConfig = "n"; - string cloud = oam::UnassignedName; - - try - { - oam.getSystemConfig("Cloud", cloud); - oam.getSystemConfig( "DataRedundancyConfig", DataRedundancyConfig); - } - catch (...) - {} - - if ( DataRedundancyConfig == "n" && cloud == oam::UnassignedName) - cout << " REMINDER: Update the /etc/fstab on " << toPM << " to include these dbroot mounts" << endl << endl; - - break; - - } - } - catch (...) {} - - } - catch (exception& e) - { - cout << endl << "**** Failed Assign of DBRoots: " << e.what() << endl; - break; - } - } - break; - - case 36: // getAlarmSummary - { - printAlarmSummary(); - } - break; - - case 37: // getSystemInfo - { - try - { - printSystemStatus(); - } - catch (...) - { - break; - } - - try - { - printProcessStatus(); - } - catch (...) - { - break; - } - - printAlarmSummary(); - } - break; - - case 38: // getModuleConfig - { - SystemModuleTypeConfig systemmoduletypeconfig; - ModuleTypeConfig moduletypeconfig; - ModuleConfig moduleconfig; - systemmoduletypeconfig.moduletypeconfig.clear(); - string returnValue; - string Argument; - - if (arguments[1] == "all" || arguments[1] == "") - { - - // get and all display Module Name config parameters - - try - { - oam.getSystemConfig(systemmoduletypeconfig); - - cout << endl << "Module Name Configuration" << endl; - - for ( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++) - { - if ( systemmoduletypeconfig.moduletypeconfig[i].ModuleType.empty() ) - // end of list - break; - - int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount; - - if ( moduleCount == 0 ) - // skip if no modules - continue; - - string moduletype = systemmoduletypeconfig.moduletypeconfig[i].ModuleType; - - DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end() ; pt++) - { - string modulename = (*pt).DeviceName; - string moduleID = modulename.substr(MAX_MODULE_TYPE_SIZE, MAX_MODULE_ID_SIZE); - cout << endl << "Module '" << modulename << "' Configuration information" << endl << endl; - - cout << "ModuleType = " << moduletype << endl; - cout << "ModuleDesc = " << systemmoduletypeconfig.moduletypeconfig[i].ModuleDesc << " #" << moduleID << endl; - - HostConfigList::iterator pt1 = (*pt).hostConfigList.begin(); - - for ( ; pt1 != (*pt).hostConfigList.end() ; pt1++) - { - cout << "ModuleIPAdd NIC ID " + oam.itoa((*pt1).NicID) + " = " << (*pt1).IPAddr << endl; - cout << "ModuleHostName NIC ID " + oam.itoa((*pt1).NicID) + " = " << (*pt1).HostName << endl; - } - - DeviceDBRootList::iterator pt3 = systemmoduletypeconfig.moduletypeconfig[i].ModuleDBRootList.begin(); - - for ( ; pt3 != systemmoduletypeconfig.moduletypeconfig[i].ModuleDBRootList.end() ; pt3++) - { - if ( (*pt3).DeviceID == atoi(moduleID.c_str()) ) - { - cout << "DBRootIDs assigned = "; - DBRootConfigList::iterator pt2 = (*pt3).dbrootConfigList.begin(); - - for ( ; pt2 != (*pt3).dbrootConfigList.end() ;) - { - cout << oam.itoa(*pt2); - pt2++; - - if (pt2 != (*pt3).dbrootConfigList.end() ) - cout << ", "; - } - - cout << endl; - } - } - } - } - } - catch (exception& e) - { - cout << endl << "**** getModuleConfig Failed = " << e.what() << endl; - } - } - else - { - // get a single module name info - if (arguments[2] == "") - { - try - { - oam.getSystemConfig(arguments[1], moduleconfig); - - cout << endl << "Module Name Configuration for " << arguments[1] << endl << endl; - - cout << "ModuleType = " << moduleconfig.ModuleType << endl; - cout << "ModuleDesc = " << moduleconfig.ModuleDesc << endl; - HostConfigList::iterator pt1 = moduleconfig.hostConfigList.begin(); - - for ( ; pt1 != moduleconfig.hostConfigList.end() ; pt1++) - { - cout << "ModuleIPAdd NIC ID " + oam.itoa((*pt1).NicID) + " = " << (*pt1).IPAddr << endl; - cout << "ModuleHostName NIC ID " + oam.itoa((*pt1).NicID) + " = " << (*pt1).HostName << endl; - } - - if ( moduleconfig.ModuleType == "pm" ) - { - - cout << "DBRootIDs assigned = "; - - DBRootConfigList::iterator pt2 = moduleconfig.dbrootConfigList.begin(); - - for ( ; pt2 != moduleconfig.dbrootConfigList.end() ; ) - { - cout << oam.itoa(*pt2); - pt2++; - - if (pt2 != moduleconfig.dbrootConfigList.end()) - cout << ", "; - } - - cout << endl << endl; - } - } - catch (exception& e) - { - cout << endl << "**** getModuleConfig Failed = " << e.what() << endl; - } - } - else - { - // get a parameter for a module - // get module ID from module name entered, then get parameter - oam.getSystemConfig(systemmoduletypeconfig); - - cout << endl; - bool found = false; - - for ( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++) - { - string moduleType = arguments[1].substr(0, MAX_MODULE_TYPE_SIZE); - string moduleID = arguments[1].substr(MAX_MODULE_TYPE_SIZE, MAX_MODULE_ID_SIZE); - - int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount; - - if ( moduleCount == 0 ) - // skip if no modules - continue; - - if (systemmoduletypeconfig.moduletypeconfig[i].ModuleType == moduleType ) - { - DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end() ; pt++) - { - if ( (*pt).DeviceName != arguments[1] ) - continue; - - found = true; - - if ( arguments[2] == "ModuleIPAdd" || arguments[2] == "ModuleHostName") - { - HostConfigList::iterator pt1 = (*pt).hostConfigList.begin(); - - for ( ; pt1 != (*pt).hostConfigList.end() ; pt1++) - { - if ( arguments[2] == "ModuleIPAdd" ) - cout << "ModuleIPAdd NIC ID " + oam.itoa((*pt1).NicID) + " = " << (*pt1).IPAddr << endl; - else - cout << "ModuleHostName NIC ID " + oam.itoa((*pt1).NicID) + " = " << (*pt1).HostName << endl; - } - } - else - { - Argument = arguments[2] + oam.itoa(i + 1); - - try - { - oam.getSystemConfig(Argument, returnValue); - cout << endl << " " << arguments[2] << " = " << returnValue << endl << endl; - break; - } - catch (exception& e) - { - cout << endl << "**** getModuleConfig Failed = " << e.what() << endl; - break; - } - } - } - } - } - - if ( !found ) - { - // module name not found - cout << endl << "**** getModuleConfig Failed : Invalid Module Name" << endl; - break; - } - - cout << endl; - } - } - } - break; - - case 39: // getSystemDirectories - { - cout << endl << "System Installation and Temporary File Directories" << endl << endl; - - cout << "System Temporary File Directory = " << tmpDir << endl << endl; - } - break; - - case 40: - { - } - break; - - case 41: - { - } - break; - - case 42: - { - } - break; - - case 43: // assignElasticIPAddress - { - //get cloud configuration data - string cloud = oam::UnassignedName; - - try - { - oam.getSystemConfig("Cloud", cloud); - } - catch (...) {} - - if ( cloud == oam::UnassignedName ) - { - cout << endl << "**** assignElasticIPAddress Not Supported : For Amazon Systems only" << endl; - break; - } - - if (arguments[2] == "") - { - // need 2 arguments - cout << endl << "**** assignElasticIPAddress Failed : Missing a required Parameter, enter 'help' for additional information" << endl; - break; - } - - parentOAMModule = getParentOAMModule(); - - if ( localModule != parentOAMModule ) - { - // exit out since not on Parent OAM Module - cout << endl << "**** assignElasticIPAddress Failed : only should be run on the Parent OAM Module, which is '" << parentOAMModule << "'" << endl; - break; - } - - string IPaddress = arguments[1]; - string moduleName = arguments[2]; - - if ( oam.validateModule(moduleName) != API_SUCCESS) - { - cout << endl << "**** assignElasticIPAddress Failed : Invalid Module name" << endl; - break; - } - - if ( moduleName == localModule ) - { - if ( arguments[3] != "y") - { - string warning = "Warning: Assigning Elastic IP Address to local module will lock up this terminal session."; - - // confirm request - if (confirmPrompt(warning)) - break; - } - } - - //check and add Elastic IP Address - int AmazonElasticIPCount = 0; - - try - { - oam.getSystemConfig("AmazonElasticIPCount", AmazonElasticIPCount); - } - catch (...) - { - AmazonElasticIPCount = 0; - } - - bool found = false; - int id = 1; - - for ( ; id < AmazonElasticIPCount + 1 ; id++ ) - { - string AmazonElasticModule = "AmazonElasticModule" + oam.itoa(id); - string ELmoduleName; - string AmazonElasticIPAddr = "AmazonElasticIPAddr" + oam.itoa(id); - string ELIPaddress; - - try - { - oam.getSystemConfig(AmazonElasticModule, ELmoduleName); - oam.getSystemConfig(AmazonElasticIPAddr, ELIPaddress); - } - catch (...) {} - - if ( ELmoduleName == moduleName && - ELIPaddress == IPaddress) - { - //assign again incase it got unconnected - //get instance id - string instanceName = oam::UnassignedName; - - try - { - ModuleConfig moduleconfig; - oam.getSystemConfig(moduleName, moduleconfig); - HostConfigList::iterator pt1 = moduleconfig.hostConfigList.begin(); - instanceName = (*pt1).HostName; - } - catch (...) - {} - - try - { - oam.assignElasticIP(instanceName, IPaddress); - cout << endl << " Successfully completed Assigning Elastic IP Address " << endl << endl; - } - catch (...) {} - - found = true; - break; - } - - if ( ELmoduleName == moduleName ) - { - cout << endl << "**** assignElasticIPAddress Failed : module already assigned IP Address " << ELIPaddress << endl; - found = true; - break; - } - - if ( ELIPaddress == IPaddress ) - { - cout << endl << "**** assignElasticIPAddress Failed : IP Address already assigned to module " << ELmoduleName << endl; - found = true; - break; - } - } - - if (found) - break; - - AmazonElasticIPCount++; - - //get instance id - string instanceName = oam::UnassignedName; - - try - { - ModuleConfig moduleconfig; - oam.getSystemConfig(moduleName, moduleconfig); - HostConfigList::iterator pt1 = moduleconfig.hostConfigList.begin(); - instanceName = (*pt1).HostName; - } - catch (...) - {} - - try - { - oam.assignElasticIP(instanceName, IPaddress); - } - catch (...) - { - cout << endl << "**** assignElasticIPAddress Failed : assignElasticIP API Error" << endl; - break; - } - - //add to configuration - string AmazonElasticModule = "AmazonElasticModule" + oam.itoa(id); - string AmazonElasticIPAddr = "AmazonElasticIPAddr" + oam.itoa(id); - - Config* sysConfig = Config::makeConfig(); - - try - { - sysConfig->setConfig("Installation", "AmazonElasticIPCount", oam.itoa(AmazonElasticIPCount)); - sysConfig->setConfig("Installation", AmazonElasticModule, moduleName); - sysConfig->setConfig("Installation", AmazonElasticIPAddr, IPaddress); - sysConfig->write(); - } - catch (...) - { - cout << "ERROR: Problem setting AmazonElasticModule in the MariaDB ColumnStore System Configuration file" << endl; - break; - } - - cout << endl << " Successfully completed Assigning Elastic IP Address " << endl << endl; - } - break; - - case 44: // unassignElasticIPAddress - { - //get cloud configuration data - string cloud = oam::UnassignedName; - - try - { - oam.getSystemConfig("Cloud", cloud); - } - catch (...) {} - - if ( cloud == oam::UnassignedName ) - { - cout << endl << "**** unassignElasticIPAddress Not Supported : For Amazon Systems only" << endl; - break; - } - - if (arguments[1] == "") - { - // need 2 arguments - cout << endl << "**** unassignElasticIPAddress Failed : Missing a required Parameter, enter 'help' for additional information" << endl; - break; - } - - parentOAMModule = getParentOAMModule(); - - if ( localModule != parentOAMModule ) - { - // exit out since not on Parent OAM Module - cout << endl << "**** unassignElasticIPAddress Failed : only should be run on the Parent OAM Module, which is '" << parentOAMModule << "'" << endl; - break; - } - - string IPaddress = arguments[1]; - - //check and add Elastic IP Address - int AmazonElasticIPCount = 0; - - try - { - oam.getSystemConfig("AmazonElasticIPCount", AmazonElasticIPCount); - } - catch (...) - { - AmazonElasticIPCount = 0; - } - - bool found = false; - int id = 1; - - for ( ; id < AmazonElasticIPCount + 1 ; id++ ) - { - string AmazonElasticModule = "AmazonElasticModule" + oam.itoa(id); - string ELmoduleName; - string AmazonElasticIPAddr = "AmazonElasticIPAddr" + oam.itoa(id); - string ELIPaddress; - - try - { - oam.getSystemConfig(AmazonElasticIPAddr, ELmoduleName); - oam.getSystemConfig(AmazonElasticIPAddr, ELIPaddress); - } - catch (...) {} - - if ( ELIPaddress == IPaddress ) - { - found = true; - - try - { - oam.deassignElasticIP(IPaddress); - } - catch (...) - { - cout << endl << "**** deassignElasticIPAddress Failed : deassignElasticIP API Error"; - break; - } - - int oldAmazonElasticIPCount = AmazonElasticIPCount; - - Config* sysConfig = Config::makeConfig(); - - //move up any others - if ( oldAmazonElasticIPCount > id ) - { - for ( int newid = id + 1 ; newid < oldAmazonElasticIPCount + 1 ; newid++ ) - { - AmazonElasticModule = "AmazonElasticModule" + oam.itoa(newid); - AmazonElasticIPAddr = "AmazonElasticIPAddr" + oam.itoa(newid); - - try - { - oam.getSystemConfig(AmazonElasticModule, ELmoduleName); - oam.getSystemConfig(AmazonElasticIPAddr, ELIPaddress); - } - catch (...) {} - - AmazonElasticModule = "AmazonElasticModule" + oam.itoa(newid - 1); - AmazonElasticIPAddr = "AmazonElasticIPAddr" + oam.itoa(newid - 1); - - try - { - oam.setSystemConfig(AmazonElasticModule, ELmoduleName); - oam.setSystemConfig(AmazonElasticIPAddr, ELIPaddress); - } - catch (...) {} - } - } - - AmazonElasticModule = "AmazonElasticModule" + oam.itoa(oldAmazonElasticIPCount); - AmazonElasticIPAddr = "AmazonElasticIPAddr" + oam.itoa(oldAmazonElasticIPCount); - - //delete last entry and update count - AmazonElasticIPCount--; - - try - { - sysConfig->setConfig("Installation", "AmazonElasticIPCount", oam.itoa(AmazonElasticIPCount)); - sysConfig->delConfig("Installation", AmazonElasticModule); - sysConfig->delConfig("Installation", AmazonElasticIPAddr); - sysConfig->write(); - } - catch (...) - { - cout << "ERROR: Problem setting AmazonElasticModule in the MariaDB ColumnStore System Configuration file" << endl; - break; - } - } - } - - if (!found) - { - cout << endl << " Elastic IP Address " << IPaddress << " not assigned to a module" << endl << endl; - break; - } - - cout << endl << " Successfully completed Unassigning Elastic IP Address " << endl << endl; - - } - break; - - case 45: // getSystemNetworkConfig - { - // get and display Module Network Config - SystemModuleTypeConfig systemmoduletypeconfig; - systemmoduletypeconfig.moduletypeconfig.clear(); - - //check and add Elastic IP Address - int AmazonElasticIPCount = 0; - - try - { - oam.getSystemConfig("AmazonElasticIPCount", AmazonElasticIPCount); - } - catch (...) - { - AmazonElasticIPCount = 0; - } - - // get max length of a host name for header formatting - - int maxSize = 9; - - try - { - oam.getSystemConfig(systemmoduletypeconfig); - - for ( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++) - { - if ( systemmoduletypeconfig.moduletypeconfig[i].ModuleType.empty() ) - // end of list - break; - - int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount; - string moduletype = systemmoduletypeconfig.moduletypeconfig[i].ModuleType; - string moduletypedesc = systemmoduletypeconfig.moduletypeconfig[i].ModuleDesc; - - if ( moduleCount > 0 ) - { - DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end() ; pt++) - { - HostConfigList::iterator pt1 = (*pt).hostConfigList.begin(); - - for ( ; pt1 != (*pt).hostConfigList.end() ; pt1++) - { - if ( maxSize < (int) (*pt1).HostName.size() ) - maxSize = (*pt1).HostName.size(); - } - } - } - } - } - catch (exception& e) - { - cout << endl << "**** getSystemNetworkConfig Failed = " << e.what() << endl; - } - - cout << endl << "System Network Configuration" << endl << endl; - - cout.setf(ios::left); - cout.width(15); - cout << "Module Name"; - cout.width(30); - cout << "Module Description"; - cout.width(10); - cout << "NIC ID"; - cout.width(maxSize + 5); - cout << "Host Name"; - cout.width(20); - cout << "IP Address"; - cout.width(14); - - if ( AmazonElasticIPCount > 0 ) - { - cout.width(20); - cout << "Elastic IP Address"; - } - - cout << endl; - cout.width(15); - cout << "-----------"; - cout.width(30); - cout << "-------------------------"; - cout.width(10); - cout << "------"; - - for ( int i = 0 ; i < maxSize ; i++ ) - { - cout << "-"; - } - - cout << " "; - cout.width(20); - cout << "---------------"; - - if ( AmazonElasticIPCount > 0 ) - { - cout.width(20); - cout << "------------------"; - } - - cout << endl; - - try - { - oam.getSystemConfig(systemmoduletypeconfig); - - for ( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++) - { - if ( systemmoduletypeconfig.moduletypeconfig[i].ModuleType.empty() ) - // end of list - break; - - int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount; - string moduletype = systemmoduletypeconfig.moduletypeconfig[i].ModuleType; - string moduletypedesc = systemmoduletypeconfig.moduletypeconfig[i].ModuleDesc; - - if ( moduleCount > 0 ) - { - DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end() ; pt++) - { - string modulename = (*pt).DeviceName; - string moduleID = modulename.substr(MAX_MODULE_TYPE_SIZE, MAX_MODULE_ID_SIZE); - string modulenamedesc = moduletypedesc + " #" + moduleID; - - cout.setf(ios::left); - cout.width(15); - cout << modulename; - cout.width(33); - cout << modulenamedesc; - - HostConfigList::iterator pt1 = (*pt).hostConfigList.begin(); - - for ( ; pt1 != (*pt).hostConfigList.end() ; pt1++) - { - /* MCOL-1607. IPAddr may be a host name here b/c it is read straight - from the config file. */ - string tmphost = oam.getIPAddress(pt1->IPAddr); - string ipAddr; - if (tmphost.empty()) - ipAddr = pt1->IPAddr; - else - ipAddr = tmphost; - string hostname = (*pt1).HostName; - string nicID = oam.itoa((*pt1).NicID); - - if ( nicID != "1" ) - { - cout.width(48); - cout << " "; - } - - cout.width(7); - cout << nicID; - cout.width(maxSize + 5); - cout << hostname; - cout.width(20); - cout << ipAddr; - cout.width(14); - - if ( nicID == "1" && AmazonElasticIPCount > 0 ) - { - int id = 1; - - for ( ; id < AmazonElasticIPCount + 1 ; id++ ) - { - string AmazonElasticModule = "AmazonElasticModule" + oam.itoa(id); - string ELmoduleName; - string AmazonElasticIPAddr = "AmazonElasticIPAddr" + oam.itoa(id); - string ELIPaddress; - - try - { - oam.getSystemConfig(AmazonElasticModule, ELmoduleName); - oam.getSystemConfig(AmazonElasticIPAddr, ELIPaddress); - } - catch (...) {} - - if ( modulename == ELmoduleName ) - { - cout.width(20); - cout << ELIPaddress; - break; - } - } - } - - cout << endl; - } - } - } - } - } - catch (exception& e) - { - cout << endl << "**** getSystemNetworkConfig Failed = " << e.what() << endl; - } - - //get cloud configuration data - string cloud = oam::UnassignedName; - - try - { - oam.getSystemConfig("Cloud", cloud); - } - catch (...) {} - - if ( cloud == "amazon-ec2" || cloud == "amazon-vpc" ) - { - cout << endl << "Amazon Instance Configuration" << endl << endl; - - string PMInstanceType = oam::UnassignedName; - string UMInstanceType = oam::UnassignedName; - - try - { - oam.getSystemConfig("PMInstanceType", PMInstanceType); - oam.getSystemConfig("UMInstanceType", UMInstanceType); - - cout << "PMInstanceType = " << PMInstanceType << endl; - cout << "UMInstanceType = " << UMInstanceType << endl; - } - catch (...) {} - - } - - cout << endl; - - break; - } - - case 46: // enableReplication - { - if ( SingleServerInstall == "y" ) - { - // exit out since not on single-server install - cout << endl << "**** enableReplication Failed : not supported on a Single-Server type installs " << endl; - break; - } - - string MySQLRep; - - try - { - oam.getSystemConfig("MySQLRep", MySQLRep); - } - catch (...) {} - - if ( MySQLRep == "y" ) - { - string warning = "MariaDB ColumnStore Replication Feature is already enabled"; - - // confirm request - if (confirmPrompt(warning)) - break; - } - - string password; - - if ( arguments[1] == "") - { - cout << endl; - string prompt = "Enter the 'User' Password or 'ssh' if configured with ssh-keys"; - password = dataPrompt(prompt); - } - else - password = arguments[1]; - - if ( password == "") - password = oam::UnassignedName; - - //set flag - try - { - oam.setSystemConfig("MySQLRep", "y"); - sleep(2); - } - catch (...) {} - - try - { - oam.enableMySQLRep(password); - cout << endl << " Successful Enabling of MariaDB ColumnStore Replication " << endl << endl; - - //display Primary UM Module / Master Node - string PrimaryUMModuleName; - - try - { - oam.getSystemConfig("PrimaryUMModuleName", PrimaryUMModuleName); - } - catch (...) {} - - cout << " MariaDB ColumnStore Replication Master Node is " << PrimaryUMModuleName << endl << endl; - } - catch (exception& e) - { - cout << endl << "**** enableRep Failed : " << e.what() << endl; - } - - break; - } - - case 47: // getSoftwareInfo - { - cout << endl; - - if ( rootUser) - { - string logFile = tmpDir + "/columnstore.log"; - string cmd = "rpm -qi mariadb-columnstore-platform > " + logFile + " 2>&1"; - int rtnCode = system(cmd.c_str()); - - if (WEXITSTATUS(rtnCode) == 0) - { - string cmd = "cat " + logFile; - system(cmd.c_str()); - } - else - { - string cmd = "dpkg -s mariadb-columnstore-platform > " + logFile + " 2>&1"; - rtnCode = system(cmd.c_str()); - - if (WEXITSTATUS(rtnCode) == 0) - { - string cmd = "cat " + logFile; - system(cmd.c_str()); - } - else - { - cout << "SoftwareVersion = " << columnstore_version << endl; - cout << "SoftwareRelease = " << columnstore_release << endl; - } - } - } - else - { - cout << "SoftwareVersion = " << columnstore_version << endl; - cout << "SoftwareRelease = " << columnstore_release << endl; - } - - cout << endl; - break; - } - - case 48: // addModule - parameters: Module type/Module Name, Number of Modules, Server Hostnames, - // Server root password optional - { - Config* sysConfig = Config::makeConfig(); - - if ( SingleServerInstall == "y" ) - { - // exit out since not on single-server install - cout << endl << "**** addModule Failed : not support on a Single-Server type installs " << endl; - break; - } - - parentOAMModule = getParentOAMModule(); - - if ( localModule != parentOAMModule ) - { - // exit out since not on Parent OAM Module - cout << endl << "**** addModule Failed : only should be run on the Parent OAM Module, which is '" << parentOAMModule << "'" << endl; - break; - } - - if (arguments[1] == "") - { - // need at least arguments - cout << endl << "**** addModule Failed : Missing a required Parameter, enter 'help' for additional information" << endl; - break; - } - - switch ( serverInstallType ) - { - case (oam::INSTALL_COMBINE_DM_UM_PM): - { - if (arguments[1].find("um") != string::npos ) - { - cout << endl << "**** addModule Failed : User Module Types not supported on this Combined Server Installation" << endl; - return (0); - } - - break; - } - } - - string DataRedundancyConfig = "n"; - int DataRedundancyCopies; - string cloud = oam::UnassignedName; - int DataRedundancyNetworkType; - int DataRedundancyStorageType; - string AmazonVPCNextPrivateIP; - - try - { - oam.getSystemConfig("Cloud", cloud); - oam.getSystemConfig("AmazonVPCNextPrivateIP", AmazonVPCNextPrivateIP); - oam.getSystemConfig("DataRedundancyConfig", DataRedundancyConfig); - oam.getSystemConfig("DataRedundancyCopies", DataRedundancyCopies); - oam.getSystemConfig("DataRedundancyNetworkType", DataRedundancyNetworkType); - oam.getSystemConfig("DataRedundancyStorageType", DataRedundancyStorageType); - } - catch (...) {} - - ModuleTypeConfig moduletypeconfig; - DeviceNetworkConfig devicenetworkconfig; - DeviceNetworkList devicenetworklist; - DeviceNetworkList enabledevicenetworklist; - HostConfig hostconfig; - - bool storeHostnames = false; - string moduleType; - string moduleName; - int moduleCount; - string password = "ssh"; - typedef std::vector inputNames; - inputNames inputnames; - typedef std::vector umStorageNames; - umStorageNames umstoragenames; - int hostArg; - int dbrootPerPM = 0; - - //check if module type or module name was entered - if ( arguments[1].size() == 2 ) - { - //Module Type was entered - if (arguments[3] == "" && cloud == oam::UnassignedName) - { - // need at least arguments - cout << endl << "**** addModule Failed : Missing a required Parameter, enter 'help' for additional information" << endl; - break; - } - - //Module Type was entered - moduleType = arguments[1]; - moduleCount = atoi(arguments[2].c_str()); - hostArg = 4; - - // MCOL-1607. Check whether we should store host names or IP addresses. - if (arguments[3] != "" && (arguments[3][0] == 'y' || arguments[3][0] == 'Y')) - storeHostnames = true; - - if (arguments[5] != "") - password = arguments[5]; - else - { - cout << endl; - string prompt = "Enter the 'User' Password or 'ssh' if configured with ssh-keys"; - password = dataPrompt(prompt); - } - - if (arguments[6] != "") - dbrootPerPM = atoi(arguments[6].c_str()); - } - else - { - //Module Name was entered - if (arguments[2] == "" && cloud == oam::UnassignedName) - { - // need at least arguments - cout << endl << "**** addModule Failed : Missing a required Parameter, enter 'help' for additional information" << endl; - break; - } - - moduleName = arguments[1]; - moduleType = arguments[1].substr(0, MAX_MODULE_TYPE_SIZE); - moduleCount = 1; - hostArg = 3; - - // MCOL-1607. Check whether we should store host names or IP addresses. - if (arguments[2] != "" && (arguments[2][0] == 'y' || arguments[2][0] == 'Y')) - storeHostnames = true; - - if (arguments[4] != "") - password = arguments[4]; - else - { - cout << endl; - string prompt = "Enter the 'User' Password or 'ssh' if configured with ssh-keys"; - password = dataPrompt(prompt); - } - - if (arguments[5] != "") - dbrootPerPM = atoi(arguments[5].c_str()); - } - -//do we needed this check???? - if ( moduleCount < 1 || moduleCount > 10 ) - { - cout << endl << "**** addModule Failed : Failed to Add Module, invalid number-of-modules entered (1-10)" << endl; - break; - } - - if ( DataRedundancyConfig == "y" && moduleType == "pm" ) - { - if ( localModule != parentOAMModule ) - { - // exit out since not on active module - cout << endl << "**** addModule Failed : Can only run command on Active OAM Parent Module (" << parentOAMModule << ")." << endl; - break; - } - - if ( fmod((float) moduleCount, (float) DataRedundancyCopies) != 0 ) - { - cout << endl << "**** addModule Failed : Failed to Add Module, invalid number-of-modules: must be multiple of Data Redundancy Copies, which is " << DataRedundancyCopies << endl; - break; - } - } - - //check and parse input Hostname/VPC-IP Addresses - if (arguments[hostArg] != "") - { - boost::char_separator sep(", "); - boost::tokenizer< boost::char_separator > tokens(arguments[hostArg], sep); - - for ( boost::tokenizer< boost::char_separator >::iterator it = tokens.begin(); - it != tokens.end(); - ++it) - { - inputnames.push_back(*it); - } - } - - if ( inputnames.size() < (unsigned) moduleCount ) - { - if ( cloud == oam::UnassignedName ) - { - cout << endl << "**** addModule Failed : Failed to Add Module, number of hostnames is less than Module Count" << endl; - break; - } - else - { - if ( cloud == "amazon-ec2" ) - { - cout << endl << "Launching new Instance(s)" << endl; - - for ( int id = inputnames.size() ; id < moduleCount ; id++ ) - { - inputnames.push_back(oam::UnassignedName); - } - } - else - { - // amazon-vpc - if ( inputnames.size() == 0 ) - { - if ( AmazonVPCNextPrivateIP == oam::UnassignedName) - { - cout << endl << "**** addModule Failed : Failed to Add Module, enter VPC Private IP Address" << endl; - break; - } - else - { - if ( AmazonVPCNextPrivateIP == "autoassign") - { - for ( int id = inputnames.size() ; id < moduleCount ; id++ ) - { - inputnames.push_back("autoassign"); - } - } - else - { - for ( int id = inputnames.size() ; id < moduleCount ; id++ ) - { - inputnames.push_back(AmazonVPCNextPrivateIP); - - try - { - AmazonVPCNextPrivateIP = oam.incrementIPAddress(AmazonVPCNextPrivateIP); - } - catch (...) - { - cout << endl << "ERROR: incrementIPAddress API error, check logs" << endl; - exit(1); - } - } - } - } - } - } - } - } - - //get configured moduleNames - try - { - oam.getSystemConfig(moduleType, moduletypeconfig); - } - catch (...) - { - cout << endl << "**** addModule Failed : Failed to Add Module, getSystemConfig API Failed" << endl; - break; - } - - //get module names already in-use and Number of NIC IDs for module - typedef std::vector moduleNameList; - moduleNameList modulenamelist; - int nicNumber = 1; - - DeviceNetworkList::iterator pt = moduletypeconfig.ModuleNetworkList.begin(); - - for ( ; pt != moduletypeconfig.ModuleNetworkList.end() ; pt++) - { - modulenamelist.push_back((*pt).DeviceName); - HostConfigList::iterator pt1 = (*pt).hostConfigList.begin(); - - for ( ; pt1 != (*pt).hostConfigList.end() ; pt1++) - { - if ( (*pt1).HostName != oam::UnassignedName ) - { - if ( nicNumber < (*pt1).NicID ) - nicNumber = (*pt1).NicID; - } - } - } - - if ( ((unsigned) nicNumber * moduleCount) != inputnames.size() && cloud == oam::UnassignedName ) - { - cout << endl << "**** addModule Failed : Failed to Add Module, invalid number of hostNames entered. Enter " + oam.itoa(nicNumber * moduleCount) + " hostname(s), which is the number of NICs times the number of modules" << endl; - break; - } - - int moduleID = 1; - inputNames::const_iterator listPT1 = inputnames.begin(); - - for ( int i = 0 ; i < moduleCount ; i++ ) - { - string dataDupIPaddr = "ModuleIPAddr" + oam.itoa(moduleID) + "-1-3"; - string dataDupHostName = "ModuleHostName" + oam.itoa(moduleID) + "-1-3"; - //validate or determine module name - moduleNameList::const_iterator listPT = modulenamelist.begin(); - - for ( ; listPT != modulenamelist.end() ; listPT++) - { - if ( !moduleName.empty() ) - { - //add by moduleName, validate that Entered module name doesn't exist - if ( moduleName == (*listPT) ) - { - cout << endl << "**** addModule Failed : Module Name already exist" << endl; - return 1; - } - } - else - { - //add by moduleType, get available module name - string newModuleName = moduleType + oam.itoa(moduleID); - - if ( newModuleName == (*listPT) ) - moduleID++; - else - { - moduleName = newModuleName; - moduleID++; - break; - } - } - } - - if ( moduleName.empty() ) - { - moduleName = moduleType + oam.itoa(moduleID); - moduleID++; - } - - // store module name - devicenetworkconfig.DeviceName = moduleName; - enabledevicenetworklist.push_back(devicenetworkconfig); - - for ( int j = 0 ; j < nicNumber ; j ++ ) - { - //get/check Server Hostnames IP address - string hostName; - string IPAddress; - - // MCOL-1607. Store hostnames in the config file if they entered one */ - if (storeHostnames) - { - // special case - if (cloud == "amazon-vpc" && *listPT1 == "autoassign") - { - hostName = oam::UnassignedName; - IPAddress = *listPT1; - } - else if (oam.isValidIP(*listPT1)) // they entered an IP addr - { - hostName = oam::UnassignedName; - IPAddress = *listPT1; - } - else // they entered a hostname - IPAddress = hostName = *listPT1; - } - else if ( cloud == "amazon-ec2") - { - hostName = *listPT1; - - if ( hostName != oam::UnassignedName ) - { - IPAddress = oam.getEC2InstanceIpAddress(hostName); - - if (IPAddress == "stopped" || IPAddress == "terminated") - { - cout << "ERROR: Instance " + hostName + " not running, please start and retry" << endl << endl; - return 1; - } - } - else - IPAddress = oam::UnassignedName; - } - else - { - if ( cloud == "amazon-vpc") - { - if ( *listPT1 != "autoassign" ) - { - if ( oam.isValidIP(*listPT1) ) - { - //ip address entered - hostName = oam::UnassignedName; - IPAddress = *listPT1; - } - else - { - //instance id entered - hostName = *listPT1; - IPAddress = oam.getEC2InstanceIpAddress(hostName); - - if (IPAddress == "stopped" || IPAddress == "terminated") - { - cout << "ERROR: Instance " + hostName + " not running, please start and retry" << endl << endl; - return 1; - } - } - } - else - { - hostName = oam::UnassignedName; - IPAddress = "autoassign"; - } - } - else - { - // non-amazon - hostName = *listPT1; - IPAddress = oam.getIPAddress(hostName); - if ( IPAddress.empty() ) - { - // prompt for IP Address - string prompt = "IP Address of " + hostName + " not found, enter IP Address or enter 'abort'"; - IPAddress = dataPrompt(prompt); - - if ( IPAddress == "abort" || !oam.isValidIP(IPAddress) ) - return 1; - } - } - } - - if ( DataRedundancyConfig == "y") - { - string errmsg1; - string errmsg2; - int ret = oam.glusterctl(oam::GLUSTER_PEERPROBE, IPAddress, password, errmsg2); - - if ( ret != 0 ) - { - return 1; - } - } - - hostconfig.IPAddr = IPAddress; - hostconfig.HostName = hostName; - hostconfig.NicID = j + 1; - devicenetworkconfig.hostConfigList.push_back(hostconfig); - listPT1++; - } - - devicenetworklist.push_back(devicenetworkconfig); - devicenetworkconfig.hostConfigList.clear(); - moduleName.clear(); - - if ( DataRedundancyConfig == "y" && DataRedundancyNetworkType == 2 && moduleType == "pm") - { - string DataRedundancyIPAddress = sysConfig->getConfig("DataRedundancyConfig", dataDupIPaddr); - string DataRedundancyHostname = sysConfig->getConfig("DataRedundancyConfig", dataDupHostName); - - if (DataRedundancyIPAddress.empty() || DataRedundancyHostname.empty()) - { - string prompt = "DataRedundancy is configured for dedicated network, enter a hostname"; - DataRedundancyHostname = dataPrompt(prompt); - if (storeHostnames) - DataRedundancyIPAddress = DataRedundancyHostname; - else - { - DataRedundancyIPAddress = oam.getIPAddress(DataRedundancyHostname); - - if ( DataRedundancyIPAddress.empty() ) - { - // prompt for IP Address - string prompt = "IP Address of " + DataRedundancyHostname + " not found, enter IP Address"; - DataRedundancyIPAddress = dataPrompt(prompt); - - if (!oam.isValidIP(DataRedundancyIPAddress)) - return 1; - } - } - sysConfig->setConfig("DataRedundancyConfig", dataDupHostName, DataRedundancyHostname); - sysConfig->setConfig("DataRedundancyConfig", dataDupIPaddr, DataRedundancyIPAddress); - } - } - } - - DBRootConfigList dbrootlist; - int dbrootNumber = -1; - typedef std::vector storageDeviceList; - storageDeviceList storagedevicelist; - string deviceType; - - if ( DataRedundancyConfig == "y" && moduleType == "pm") - { - cout << endl << "Data Redundancy storage will be expanded when module(s) are added." << endl; - - if ( dbrootPerPM == 0) - { - cout << endl; - // prompt for number of DBRoot - string prompt = "Number of DBRoots Per Performance Module you want to add"; - dbrootPerPM = atoi(dataPrompt(prompt).c_str()); - } - else - cout << endl << "Number of DBRoots Per Performance Module to be added is " << oam.itoa(dbrootPerPM) << endl; - - dbrootNumber = dbrootPerPM * moduleCount; - - if ( DataRedundancyStorageType == 2 ) - { - cout << endl << "Data Redundancy Storage Type is configured for 'storage'" << endl; - - cout << "You will need " << oam.itoa(dbrootNumber * DataRedundancyCopies); - cout << " total storage locations and " << oam.itoa(dbrootPerPM * DataRedundancyCopies) << " storage locations per PM. You will now " << endl; - cout << "be asked to enter the device names for the storage locations. You will enter " << endl; - cout << "them for each PM, on one line, separated by spaces (" << oam.itoa(dbrootPerPM * DataRedundancyCopies) << " names on each line)." << endl; - - DeviceNetworkList::iterator pt = devicenetworklist.begin(); - string firstPM = (*pt).DeviceName.substr(MAX_MODULE_TYPE_SIZE, MAX_MODULE_ID_SIZE); - - for ( ; pt != devicenetworklist.end() ; pt++) - { - cout << endl; - string prompt = "Storage Device Names for " + (*pt).DeviceName; - string devices = dataPrompt(prompt); - storagedevicelist.push_back(devices); - } - - cout << endl; - string prompt = "Filesystem type for these storage locations (ext2,ext3,xfs,etc)"; - deviceType = dataPrompt(prompt); - } - - } - - string mysqlpassword = oam::UnassignedName; - - try - { - cout << endl << "Adding Modules "; - DeviceNetworkList::iterator pt = devicenetworklist.begin(); - string firstPM = (*pt).DeviceName.substr(MAX_MODULE_TYPE_SIZE, MAX_MODULE_ID_SIZE); - - for ( ; pt != devicenetworklist.end() ; pt++) - { - cout << (*pt).DeviceName << ", "; - } - - cout << "please wait..." << endl; - - oam.addModule(devicenetworklist, password, mysqlpassword, storeHostnames); - - cout << "Add Module(s) successfully completed" << endl; - - if ( DataRedundancyConfig == "y" && moduleType == "pm" ) - { - - { - //send messages to update fstab to new modules, if needed - DeviceNetworkList::iterator pt2 = devicenetworklist.begin(); - storageDeviceList::iterator pt3 = storagedevicelist.begin(); - - for ( ; pt2 != devicenetworklist.end() ; pt2++, pt3++) - { - HostConfigList::iterator hostConfigIter = (*pt2).hostConfigList.begin(); - string moduleName = (*pt2).DeviceName; - int brickID = 1; - - if ( DataRedundancyStorageType == 2 ) - { - string devices = *pt3; - boost::char_separator sep(" "); - boost::tokenizer< boost::char_separator > tokens(devices, sep); - - for ( boost::tokenizer< boost::char_separator >::iterator it = tokens.begin(); - it != tokens.end(); - ++it) - { - string deviceName = *it; - string entry = deviceName + " /var/lib/columnstore/gluster/brick" + oam.itoa(brickID) + " " + deviceType + " defaults 1 2"; - //send update pm - oam.distributeFstabUpdates(entry, moduleName); - } - } - - string command = "remote_command.sh " + (*hostConfigIter).IPAddr + " " + password + " 'mkdir -p /var/lib/columnstore/gluster/brick" + oam.itoa(brickID) + "'"; - system(command.c_str()); - brickID++; - } - } - - //enable modules - try - { - cout << endl << "Enabling Modules " << endl; - oam.enableModule(enabledevicenetworklist); - cout << "Successful Enable of Modules " << endl; - } - catch (exception& e) - { - cout << endl << "**** enableModule Failed : " << e.what() << endl; - break; - } - - cout << endl << "Adding DBRoots" << endl; - - //add dbroots - string firstDBroot; - - try - { - oam.addDbroot(dbrootNumber, dbrootlist); - - cout << "New DBRoot IDs added = "; - DBRootConfigList::iterator pt1 = dbrootlist.begin(); - firstDBroot = oam.itoa(*pt1); - - for ( ; pt1 != dbrootlist.end() ;) - { - cout << oam.itoa(*pt1); - pt1++; - - if (pt1 != dbrootlist.end()) - cout << ", "; - } - - cout << endl; - - } - catch (exception& e) - { - cout << endl << "**** addDbroot Failed: " << e.what() << endl; - break; - } - - cout << endl << "Assigning DBRoots" << endl << endl; - - DeviceNetworkList::iterator pt = devicenetworklist.begin(); - DBRootConfigList::iterator pt1 = dbrootlist.begin(); - - for ( ; pt != devicenetworklist.end() ; pt++) - { - string moduleName = (*pt).DeviceName; - - DBRootConfigList dbrootlist; - - for ( int dbrootNum = 0; dbrootNum < dbrootPerPM ; dbrootNum++) - { - dbrootlist.push_back(*pt1); - pt1++; - } - - //assign dbroots to pm - try - { - oam.assignDbroot(moduleName, dbrootlist); - - cout << endl << "Successfully Assigned DBRoots " << endl; - - } - catch (exception& e) - { - cout << endl << "**** Failed Assign of DBRoots: " << e.what() << endl; - break; - } - } - - cout << endl << "Run Data Redundancy Setup for DBRoots" << endl; - - try - { - int ret = oam.glusterctl(oam::GLUSTER_ADD, firstPM, firstDBroot, password); - - if ( ret != 0 ) - { - cout << endl << "**** Failed Data Redundancy Add of DBRoots, " << endl; - break; - } - - cout << endl << "Successfully Completed Data Redundancy Add DBRoots " << endl; - - } - catch (...) - { - cout << endl << "**** glusterctl GLUSTER_ADD Failed" << endl; - break; - } - - cout << endl << "addModule Command Successfully completed: Run startSystem command to Activate newly added Performance Modules" << endl << endl; - } - else - { - cout << "addModule Command Successfully completed: Modules are Disabled, run alterSystem-enableModule command to enable them" << endl << endl; - } - - try - { - oam.setSystemConfig("AmazonVPCNextPrivateIP", AmazonVPCNextPrivateIP); - } - catch (...) {} - - } - catch (exception& e) - { - cout << endl << "**** addModule Failed: " << e.what() << endl; - } - catch (...) - { - cout << endl << "**** addModule Failed : Failed to Add Module" << endl; - } - - break; - } - - case 49: // removeModule - parameters: Module name/type, number-of-modules - { - string DataRedundancyConfig = "n"; - int DataRedundancyCopies; - - try - { - oam.getSystemConfig( "DataRedundancyConfig", DataRedundancyConfig); - } - catch (...) - {} - - if (DataRedundancyConfig == "y") - { - try - { - oam.getSystemConfig( "DataRedundancyCopies", DataRedundancyCopies); - } - catch (...) - {} - } - - if ( SingleServerInstall == "y" ) - { - // exit out since not on single-server install - cout << endl << "**** removeModule Failed : not support on a Single-Server type installs " << endl; - break; - } - - if (arguments[1] == "") - { - // need atleast 1 arguments - cout << endl << "**** removeModule Failed : Missing a required Parameter, enter 'help' for additional information" << endl; - break; - } - - parentOAMModule = getParentOAMModule(); - - if ( arguments[1] == parentOAMModule ) - { - // exit out since you can't manually remove OAM Parent Module - cout << endl << "**** removeModule Failed : can't manually remove the Active OAM Parent Module." << endl; - break; - } - - if ( localModule != parentOAMModule ) - { - // exit out since not on Parent OAM Module - cout << endl << "**** removeModule Failed : only should be run on the Parent OAM Module, which is '" << parentOAMModule << "'" << endl; - break; - } - - switch ( serverInstallType ) - { - case (oam::INSTALL_COMBINE_DM_UM_PM): - { - if (arguments[1].find("um") != string::npos ) - { - cout << endl << "**** removeModule Failed : User Modules not supported on the Combined Server Installation" << endl; - return 0; - } - } - } - - ModuleTypeConfig moduletypeconfig; - DeviceNetworkConfig devicenetworkconfig; - DeviceNetworkList devicenetworklist; - bool quit = false; - - string moduleType; - - //check if module type or module name was entered - if ( arguments[1].size() == 2 ) - { - //Module Type was entered - - if ( arguments[3] != "y") - { - cout << endl << "!!!!! DESTRUCTIVE COMMAND !!!!!" << endl; - string warning = "This command does a remove a module from the MariaDB ColumnStore System"; - - // confirm request - if (confirmPrompt(warning)) - break; - } - - int moduleCount = atoi(arguments[2].c_str()); - - if ( moduleCount < 1 || moduleCount > 10 ) - { - cout << endl << "**** removeModule Failed : Failed to Remove Module, invalid number-of-modules entered (1-10)" << endl; - break; - } - - if ( DataRedundancyConfig == "y" ) - { - cout << endl << "**** removeModule Failed : Data Redundancy requires you to specify modules to remove in groups." << endl; - break; - } - - cout << endl; - - moduleType = arguments[1]; - - //store moduleNames - try - { - oam.getSystemConfig(moduleType, moduletypeconfig); - } - catch (...) - { - cout << endl << "**** removeModule Failed : Failed to Remove Module, getSystemConfig API Failed" << endl; - break; - } - - int currentModuleCount = moduletypeconfig.ModuleCount; - - if ( moduleCount > currentModuleCount ) - { - cout << endl << "**** removeModule Failed : Failed to Remove Module, mount count entered to larger than configured" << endl; - break; - } - - if ( moduleCount == currentModuleCount ) - { - if ( moduleType == "pm" ) - { - cout << endl << "**** removeModule Failed : Failed to Remove Module, you can't remove last Director Module" << endl; - break; - } - } - - //get module names in-use - typedef std::vector moduleNameList; - moduleNameList modulenamelist; - - DeviceNetworkList::iterator pt = moduletypeconfig.ModuleNetworkList.begin(); - - for ( ; pt != moduletypeconfig.ModuleNetworkList.end() ; pt++) - { - HostConfigList::iterator pt1 = (*pt).hostConfigList.begin(); - - if ( (*pt1).HostName != oam::UnassignedName ) - modulenamelist.push_back((*pt).DeviceName); - } - - moduleNameList::reverse_iterator pt1 = modulenamelist.rbegin(); - - for ( int i = 0 ; i < moduleCount ; i++) - { - devicenetworkconfig.DeviceName = *pt1; - pt1++; - devicenetworklist.push_back(devicenetworkconfig); - } - } - else - { - //Module Name was entered - - if ( arguments[2] != "y") - { - cout << endl << "!!!!! DESTRUCTIVE COMMAND !!!!!" << endl; - string warning = "This command removes module(s) from the MariaDB ColumnStore System"; - - // confirm request - if (confirmPrompt(warning)) - break; - } - - cout << endl; - - //parse module names - boost::char_separator sep(", "); - boost::tokenizer< boost::char_separator > tokens(arguments[1], sep); - - for ( boost::tokenizer< boost::char_separator >::iterator it = tokens.begin(); - it != tokens.end(); - ++it) - { - devicenetworkconfig.DeviceName = *it; - devicenetworklist.push_back(devicenetworkconfig); - - moduleType = (*it).substr(0, MAX_MODULE_TYPE_SIZE); - - try - { - oam.getSystemConfig(moduleType, moduletypeconfig); - } - catch (...) - { - cout << endl << "**** removeModule Failed : Failed to Remove Module, getSystemConfig API Failed" << endl; - quit = true; - break; - } - - int currentModuleCount = moduletypeconfig.ModuleCount; - - if ( moduleType == "pm" && currentModuleCount == 1) - { - cout << endl << "**** removeModule Failed : Failed to Remove Module, you can't remove last Performance Module" << endl; - quit = true; - break; - } - - if ( moduleType == "um" && currentModuleCount == 1) - { - cout << endl << "**** removeModule Failed : Failed to Remove Module, you can't remove last User Module" << endl; - quit = true; - break; - } - } - } - - if ( DataRedundancyConfig == "y" && devicenetworklist.size() != (size_t)DataRedundancyCopies) - { - cout << endl << "**** removeModule Failed : Data Redundancy requires you to remove modules in groups equal to number of copies" << endl; - quit = true; - } - - if (quit) - break; - - DeviceNetworkList::iterator pt = devicenetworklist.begin(); - DeviceNetworkList::iterator endpt = devicenetworklist.end(); - - // check for module status and if any dbroots still assigned - for ( ; pt != endpt ; pt++) - { - // check module status - try - { - bool degraded; - int opState; - oam.getModuleStatus((*pt).DeviceName, opState, degraded); - - if (opState == oam::MAN_OFFLINE || - opState == oam::MAN_DISABLED || - opState == oam::FAILED) - { - - } - else - { - cout << "**** removeModule Failed : " << (*pt).DeviceName << " is not MAN_OFFLINE, DISABLED, or FAILED state."; - quit = true; - cout << endl; - break; - } - } - catch (exception& ex) - {} - - // check dbrootlist should be empty on non data redundancy setups and remove dbroots if dataredundancy removal check passes - if ( moduleType == "pm" ) - { - // check for dbroots assigned - DBRootConfigList dbrootConfigList; - string moduleID = (*pt).DeviceName.substr(MAX_MODULE_TYPE_SIZE, MAX_MODULE_ID_SIZE); - - try - { - oam.getPmDbrootConfig(atoi(moduleID.c_str()), dbrootConfigList); - } - catch (...) - {} - - if ( !dbrootConfigList.empty() && DataRedundancyConfig == "n") - { - cout << "**** removeModule Failed : " << (*pt).DeviceName << " has dbroots still assigned. Please run movePmDbrootConfig or unassignDbrootPmConfig."; - quit = true; - cout << endl; - break; - } - else if (DataRedundancyConfig == "y" && !dbrootConfigList.empty()) - { - bool PMlistError = true; - cout << "Removing DBRoot(s)" << endl; - DBRootConfigList::iterator dbrootListPt = dbrootConfigList.begin(); - - for ( ; dbrootListPt != dbrootConfigList.end() ; dbrootListPt++) - { - // check if ACTIVE PM has a copy of Dbroot - string pmList = ""; - - try - { - string errmsg; - int ret = oam.glusterctl(oam::GLUSTER_WHOHAS, oam.itoa(*dbrootListPt), pmList, errmsg); - - if ( ret != 0 ) - { - cout << endl << "**** removeModule Failed : " << (*pt).DeviceName << " glusterctl error" << endl; - break; - } - } - catch (...) - { - cout << endl << "**** removeModule Failed : " << (*pt).DeviceName << " glusterctl error" << endl; - break; - } - - boost::char_separator sep(" "); - boost::tokenizer< boost::char_separator > tokens(pmList, sep); - - for ( boost::tokenizer< boost::char_separator >::iterator it1 = tokens.begin(); - it1 != tokens.end(); - ++it1) - { - PMlistError = true; - DeviceNetworkList::iterator deviceNetListStartPt = devicenetworklist.begin(); - string pmWithThisdbrootCopy = (*it1); - - // walk the list of PMs that have copies of this dbroot - // and be sure they are in the list of nodes to be removed - for ( ; deviceNetListStartPt != endpt ; deviceNetListStartPt++) - { - string thisModuleID = (*deviceNetListStartPt).DeviceName.substr(MAX_MODULE_TYPE_SIZE, MAX_MODULE_ID_SIZE); - - //cout << "pmWithThisDBRoot: " << pmWithThisdbrootCopy << " thisModuleID: " << thisModuleID << endl; - if (pmWithThisdbrootCopy == thisModuleID) - { - PMlistError = false; - } - } - - if (PMlistError) - { - cout << "**** removeModule Failed : Attempting to remove PMs: " << arguments[1] << " -- DBRoot" << oam.itoa(*dbrootListPt) << " has copies on PMs " << pmList << endl; - quit = true; - } - } - } - - if (!quit) - { - try - { - if (!dbrootConfigList.empty()) - { - oam.removeDbroot(dbrootConfigList); - } - - cout << endl << " Successful Removal of DBRoots " << endl << endl; - } - catch (exception& e) - { - cout << endl << "**** removeModule : Removal of DBRoots Failed: " << e.what() << endl; - quit = true; - } - } - } - } - } - - if (quit) - { - cout << endl; - break; - } - - try - { - cout << endl << "Removing Module(s) "; - DeviceNetworkList::iterator pt = devicenetworklist.begin(); - - for ( ; pt != devicenetworklist.end() ; pt++) - { - cout << (*pt).DeviceName << ", "; - } - - cout << "please wait..." << endl; - - oam.removeModule(devicenetworklist); - cout << endl << "Remove Module successfully completed" << endl << endl; - } - catch (exception& e) - { - cout << endl << "Failed to Remove Module: " << e.what() << endl << endl; - } - catch (...) - { - cout << endl << "**** removeModule Failed : Failed to Remove Module" << endl << endl; - break; - } - - break; - } - - case 50: // getModuleHostNames - { - SystemModuleTypeConfig systemmoduletypeconfig; - ModuleTypeConfig moduletypeconfig; - ModuleConfig moduleconfig; - systemmoduletypeconfig.moduletypeconfig.clear(); - string returnValue; - string Argument; - - // get and all display Module HostNames (NIC 1) - // No other data will be displayed, only the hostnames. - // This feature is designed for use by other processes. - // It was specifically installed for the sqoop import feature (version 4.5) - // If arguments[1] == PM, display only PMs, UM, display only UMs, else all. - try - { - oam.getSystemConfig(systemmoduletypeconfig); - - for ( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++) - { - if ( systemmoduletypeconfig.moduletypeconfig[i].ModuleType.empty() ) - // end of list - break; - - int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount; - - if ( moduleCount == 0 ) - // skip if no modules - continue; - - string moduletype = systemmoduletypeconfig.moduletypeconfig[i].ModuleType; - - if (arguments[1] == "pm" && moduletype != "pm") - continue; - - if (arguments[1] == "um" && moduletype != "um") - continue; - - DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end() ; pt++) - { - HostConfigList::iterator pt1 = (*pt).hostConfigList.begin(); - - for ( ; pt1 != (*pt).hostConfigList.end() ; pt1++) - { - // Only print for NIC 1 - if ((*pt1).NicID == 1) - { - // We need the name with domain and everything. - if ((*pt1).HostName == "localhost") - { - char hostName[128] = {0}; - gethostname(hostName, 128); - cout << hostName << endl; - } - else - { - struct hostent* hentName = gethostbyname((*pt1).HostName.c_str()); - - if (hentName) - { - cout << hentName->h_name << endl; - } - else - { - cout << (*pt1).HostName.c_str() << endl; - } - } - } - } - } - } - } - catch (exception& e) - { - cout << endl << "**** getModuleHostNames Failed = " << e.what() << endl; - } - - break; - } - - case 51: // disableReplication - { - if ( SingleServerInstall == "y" ) - { - // exit out since not on single-server install - cout << endl << "**** disableReplication Failed : not supported on a Single-Server type installs " << endl; - break; - } - - string MySQLRep; - - try - { - oam.getSystemConfig("MySQLRep", MySQLRep); - } - catch (...) {} - - if ( MySQLRep == "n" ) - { - string warning = "MariaDB ColumnStore Replication Feature is already disable"; - - // confirm request - if (confirmPrompt(warning)) - break; - } - - //set flag - try - { - oam.setSystemConfig("MySQLRep", "n"); - sleep(2); - } - catch (...) {} - - try - { - oam.disableMySQLRep(); - cout << endl << " Successful Disable of MariaDB ColumnStore Replication " << endl; - } - catch (exception& e) - { - cout << endl << "**** disableRep Failed : " << e.what() << endl; - } - - cout << endl; - - break; - } - - case 52: // getModuleCpuUsers - { - if (arguments[1] == "") - { - // need 1 arguments - cout << endl << "**** getModuleCpuUsers Failed : Missing a required Parameter, enter 'help' for additional information" << endl; - break; - } - - int topNumber = 5; - - if (arguments[2] != "") - { - topNumber = atoi(arguments[2].c_str()); - - if ( topNumber < 1 || topNumber > 10 ) - { - cout << endl << "**** getModuleCpuUsers Failed : Invalid top Number entered" << endl; - break; - } - } - - TopProcessCpuUsers topprocesscpuusers; - - try - { - oam.getTopProcessCpuUsers(arguments[1], topNumber, topprocesscpuusers); - - printModuleCpuUsers(topprocesscpuusers); - - } - catch (exception& e) - { - cout << endl << "Failed to get Top CPU Users: " << e.what() << endl << endl; - } - catch (...) - { - cout << endl << "**** getModuleCpuUsers Failed : Failed to get Top CPU Users" << endl << endl; - break; - } - - break; - } - - case 53: // getSystemCpuUsers - { - int topNumber = 5; - - if (arguments[1] != "") - { - topNumber = atoi(arguments[1].c_str()); - - if ( topNumber < 1 || topNumber > 10 ) - { - cout << endl << "**** getSystemCpuUsers Failed : Invalid top Number entered" << endl; - break; - } - } - - cout << endl << "System Process Top CPU Users per Module" << endl << endl; - - SystemTopProcessCpuUsers systemtopprocesscpuusers; - TopProcessCpuUsers topprocesscpuusers; - - try - { - oam.getTopProcessCpuUsers(topNumber, systemtopprocesscpuusers); - - for ( unsigned int i = 0 ; i < systemtopprocesscpuusers.topprocesscpuusers.size(); i++) - { - printModuleCpuUsers(systemtopprocesscpuusers.topprocesscpuusers[i]); - } - - } - catch (exception& e) - { - cout << endl << "Failed to get Top CPU Users: " << e.what() << endl << endl; - } - catch (...) - { - cout << endl << "**** getSystemCpuUsers Failed : Failed to get Top CPU Users" << endl << endl; - break; - } - - break; - } - - case 54: // getModuleCpu - { - if (arguments[1] == "") - { - // need 1 arguments - cout << endl << "**** getModuleCpu Failed : Missing a required Parameter, enter 'help' for additional information" << endl; - break; - } - - ModuleCpu modulecpu; - - try - { - oam.getModuleCpuUsage(arguments[1], modulecpu); - - printModuleCpu(modulecpu); - - } - catch (exception& e) - { - cout << endl << "Failed to get CPU Usage: " << e.what() << endl << endl; - } - catch (...) - { - cout << endl << "**** getModuleCpu Failed : Failed to get Module CPU Usage" << endl << endl; - break; - } - - break; - } - - case 55: // getSystemCpu - { - cout << endl << "System CPU Usage per Module" << endl << endl; - - SystemCpu systemcpu; - - try - { - oam.getSystemCpuUsage(systemcpu); - - for ( unsigned int i = 0 ; i < systemcpu.modulecpu.size(); i++) - { - printModuleCpu(systemcpu.modulecpu[i]); - } - - } - catch (exception& e) - { - cout << endl << "Failed to get CPU Usage: " << e.what() << endl << endl; - } - catch (...) - { - cout << endl << "**** getSystemCpu Failed : Failed to get CPU Usage" << endl << endl; - break; - } - - break; - } - - case 56: // getModuleMemoryUsers - { - if (arguments[1] == "") - { - // need 1 arguments - cout << endl << "**** getModuleMemoryUsers Failed : Missing a required Parameter, enter 'help' for additional information" << endl; - break; - } - - int topNumber = 5; - - if (arguments[2] != "") - { - topNumber = atoi(arguments[2].c_str()); - - if ( topNumber < 1 || topNumber > 10 ) - { - cout << endl << "**** getModuleMemoryUsers Failed : Invalid top Number entered" << endl; - break; - } - } - - TopProcessMemoryUsers topprocessmemoryusers; - - try - { - oam.getTopProcessMemoryUsers(arguments[1], topNumber, topprocessmemoryusers); - - printModuleMemoryUsers(topprocessmemoryusers); - - } - catch (exception& e) - { - cout << endl << "Failed to get Top Memory Users: " << e.what() << endl << endl; - } - catch (...) - { - cout << endl << "**** getModuleMemoryUsers Failed : Failed to get Top Memory Users" << endl << endl; - break; - } - - break; - } - - case 57: // getSystemMemoryUsers - { - int topNumber = 5; - - if (arguments[1] != "") - { - topNumber = atoi(arguments[1].c_str()); - - if ( topNumber < 1 || topNumber > 10 ) - { - cout << endl << "**** getSystemMemoryUsers Failed : Invalid top Number entered" << endl; - break; - } - } - - cout << endl << "System Process Top Memory Users per Module" << endl << endl; - - SystemTopProcessMemoryUsers systemtopprocessmemoryusers; - TopProcessMemoryUsers topprocessmemoryusers; - - try - { - oam.getTopProcessMemoryUsers(topNumber, systemtopprocessmemoryusers); - - for ( unsigned int i = 0 ; i < systemtopprocessmemoryusers.topprocessmemoryusers.size(); i++) - { - printModuleMemoryUsers(systemtopprocessmemoryusers.topprocessmemoryusers[i]); - } - - } - catch (exception& e) - { - cout << endl << "Failed to get Top CPU Users: " << e.what() << endl << endl; - } - catch (...) - { - cout << endl << "**** getSystemMemoryUsers Failed : Failed to get Top CPU Users" << endl << endl; - break; - } - - break; - } - - case 58: // getModuleMemory - { - if (arguments[1] == "") - { - // need 1 arguments - cout << endl << "**** getModuleMemory Failed : Missing a required Parameter, enter 'help' for additional information" << endl; - break; - } - - ModuleMemory modulememory; - - try - { - oam.getModuleMemoryUsage(arguments[1], modulememory); - - cout << endl << "Module Memory Usage (in K bytes)" << endl << endl; - - cout.setf(ios::left); - cout.width(8); - cout << "Module"; - cout.width(11); - cout << "Mem Total"; - cout.width(9); - cout << "Mem Used"; - cout.width(9); - cout << "cache"; - cout.width(12); - cout << "Mem Usage %"; - cout.width(11); - cout << "Swap Total"; - cout.width(10); - cout << "Swap Used"; - cout.width(13); - cout << "Swap Usage %"; - cout << endl; - - cout.setf(ios::left); - cout.width(8); - cout << "------"; - cout.width(11); - cout << "---------"; - cout.width(9); - cout << "-------"; - cout.width(9); - cout << "-------"; - cout.width(12); - cout << "----------"; - cout.width(11); - cout << "----------"; - cout.width(10); - cout << "---------"; - cout.width(13); - cout << "-----------"; - cout << endl; - - printModuleMemory(modulememory); - } - catch (exception& e) - { - cout << endl << "Failed to get Memory Usage: " << e.what() << endl << endl; - } - catch (...) - { - cout << endl << "**** getModuleMemory Failed : Failed to get Module Memory Usage" << endl << endl; - break; - } - - break; - } - - case 59: // getSystemMemory - { - cout << endl << "System Memory Usage per Module (in K bytes)" << endl << endl; - - cout.setf(ios::left); - cout.width(8); - cout << "Module"; - cout.width(11); - cout << "Mem Total"; - cout.width(10); - cout << "Mem Used"; - cout.width(9); - cout << "Cache"; - cout.width(13); - cout << "Mem Usage %"; - cout.width(12); - cout << "Swap Total"; - cout.width(11); - cout << "Swap Used"; - cout.width(14); - cout << "Swap Usage %"; - cout << endl; - - cout.setf(ios::left); - cout.width(8); - cout << "------"; - cout.width(11); - cout << "---------"; - cout.width(10); - cout << "--------"; - cout.width(9); - cout << "-------"; - cout.width(13); - cout << "-----------"; - cout.width(12); - cout << "----------"; - cout.width(11); - cout << "---------"; - cout.width(14); - cout << "------------"; - cout << endl; - - SystemMemory systemmemory; - - try - { - oam.getSystemMemoryUsage(systemmemory); - - for ( unsigned int i = 0 ; i < systemmemory.modulememory.size(); i++) - { - printModuleMemory(systemmemory.modulememory[i]); - } - - } - catch (exception& e) - { - cout << endl << "Failed to get Memory Usage: " << e.what() << endl << endl; - } - catch (...) - { - cout << endl << "**** getSystemCpu Failed : Failed to get Memory Usage" << endl << endl; - break; - } - - break; - } - - case 60: // getModuleDisk - { - if (arguments[1] == "") - { - // need 1 arguments - cout << endl << "**** getModuleDisk Failed : Missing a required Parameter, enter 'help' for additional information" << endl; - break; - } - - ModuleDisk moduledisk; - - try - { - oam.getModuleDiskUsage(arguments[1], moduledisk); - - printModuleDisk(moduledisk); - - } - catch (exception& e) - { - cout << endl << "Failed to get Disk Usage: " << e.what() << endl << endl; - } - catch (...) - { - cout << endl << "**** getModuleDisk Failed : Failed to get Module Disk Usage" << endl << endl; - break; - } - - break; - } - - case 61: // getSystemDisk - { - cout << endl << "System Disk Usage per Module" << endl << endl; - - SystemDisk systemdisk; - - try - { - oam.getSystemDiskUsage(systemdisk); - - for ( unsigned int i = 0 ; i < systemdisk.moduledisk.size(); i++) - { - printModuleDisk(systemdisk.moduledisk[i]); - } - - } - catch (exception& e) - { - cout << endl << "Failed to get Memory Usage: " << e.what() << endl << endl; - } - catch (...) - { - cout << endl << "**** getSystemCpu Failed : Failed to get Memory Usage" << endl << endl; - break; - } - - break; - } - - case 62: // getModuleResources - { - if (arguments[1] == "") - { - // need 1 arguments - cout << endl << "**** getModuleResources Failed : Missing a required Parameter, enter 'help' for additional information" << endl; - break; - } - - int topNumber = 5; - - TopProcessCpuUsers topprocesscpuusers; - - try - { - oam.getTopProcessCpuUsers(arguments[1], topNumber, topprocesscpuusers); - } - catch (exception& e) - { - cout << endl << "Failed to get Top CPU Users: " << e.what() << endl << endl; - break; - } - catch (...) - { - cout << endl << "**** getModuleCpuUsers Failed : Failed to get Top CPU Users" << endl << endl; - break; - } - - ModuleCpu modulecpu; - - try - { - oam.getModuleCpuUsage(arguments[1], modulecpu); - } - catch (exception& e) - { - cout << endl << "Failed to get CPU Usage: " << e.what() << endl << endl; - break; - } - catch (...) - { - cout << endl << "**** getModuleCpu Failed : Failed to get Module CPU Usage" << endl << endl; - break; - } - - TopProcessMemoryUsers topprocessmemoryusers; - - try - { - oam.getTopProcessMemoryUsers(arguments[1], topNumber, topprocessmemoryusers); - } - catch (exception& e) - { - cout << endl << "Failed to get Top Memory Users: " << e.what() << endl << endl; - break; - } - catch (...) - { - cout << endl << "**** getModuleMemoryUsers Failed : Failed to get Top Memory Users" << endl << endl; - break; - } - - ModuleMemory modulememory; - - try - { - oam.getModuleMemoryUsage(arguments[1], modulememory); - } - catch (exception& e) - { - cout << endl << "Failed to get Memory Usage: " << e.what() << endl << endl; - break; - } - catch (...) - { - cout << endl << "**** getModuleMemory Failed : Failed to get Module Memory Usage" << endl << endl; - break; - } - - ModuleDisk moduledisk; - - try - { - oam.getModuleDiskUsage(arguments[1], moduledisk); - } - catch (exception& e) - { - cout << endl << "Failed to get Disk Usage: " << e.what() << endl << endl; - break; - } - catch (...) - { - cout << endl << "**** getModuleDisk Failed : Failed to get Module Disk Usage" << endl << endl; - break; - } - - printModuleResources(topprocesscpuusers, modulecpu, topprocessmemoryusers, modulememory, moduledisk); - - break; - } - - case 63: // getSystemResources - { - cout << endl << "System Resource Usage per Module" << endl << endl; - - int topNumber = 5; - - SystemModuleTypeConfig systemmoduletypeconfig; - ModuleTypeConfig moduletypeconfig; - - try - { - oam.getSystemConfig(systemmoduletypeconfig); - - for ( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++) - { - if ( systemmoduletypeconfig.moduletypeconfig[i].ModuleType.empty() ) - // end of list - continue; - - int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount; - - if ( moduleCount == 0 ) - continue; - - DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end() ; pt++) - { - string modulename = (*pt).DeviceName; - - if ( modulename == "unknown" ) - continue; - - TopProcessCpuUsers topprocesscpuusers; - - try - { - oam.getTopProcessCpuUsers(modulename, topNumber, topprocesscpuusers); - } - catch (exception& e) - { - cout << endl << "Failed to get Top CPU Users: " << e.what() << endl << endl; - break; - } - catch (...) - { - cout << endl << "**** getModuleCpuUsers Failed : Failed to get Top CPU Users" << endl << endl; - break; - } - - ModuleCpu modulecpu; - - try - { - oam.getModuleCpuUsage(modulename, modulecpu); - } - catch (exception& e) - { - cout << endl << "Failed to get CPU Usage: " << e.what() << endl << endl; - break; - } - catch (...) - { - cout << endl << "**** getModuleCpu Failed : Failed to get Module CPU Usage" << endl << endl; - break; - } - - TopProcessMemoryUsers topprocessmemoryusers; - - try - { - oam.getTopProcessMemoryUsers(modulename, topNumber, topprocessmemoryusers); - } - catch (exception& e) - { - cout << endl << "Failed to get Top Memory Users: " << e.what() << endl << endl; - break; - } - catch (...) - { - cout << endl << "**** getModuleMemoryUsers Failed : Failed to get Top Memory Users" << endl << endl; - break; - } - - ModuleMemory modulememory; - - try - { - oam.getModuleMemoryUsage(modulename, modulememory); - } - catch (exception& e) - { - cout << endl << "Failed to get Memory Usage: " << e.what() << endl << endl; - break; - } - catch (...) - { - cout << endl << "**** getModuleMemory Failed : Failed to get Module Memory Usage" << endl << endl; - break; - } - - ModuleDisk moduledisk; - - try - { - oam.getModuleDiskUsage(modulename, moduledisk); - } - catch (exception& e) - { - cout << endl << "Failed to get Disk Usage: " << e.what() << endl << endl; - break; - } - catch (...) - { - cout << endl << "**** getModuleDisk Failed : Failed to get Module Disk Usage" << endl << endl; - break; - } - - printModuleResources(topprocesscpuusers, modulecpu, topprocessmemoryusers, modulememory, moduledisk); - } - } - } - catch (exception& e) - { - cout << endl << "**** getSystemResources Failed : " << e.what() << endl; - } - - break; - } - - case 64: // getActiveSQLStatements - { - cout << endl << "Get List of Active SQL Statements" << endl; - cout << "=================================" << endl << endl; - - ActiveSqlStatements activesqlstatements; - - try - { - oam.getActiveSQLStatements(activesqlstatements); - - if ( activesqlstatements.size() == 0 ) - { - cout << "No Active SQL Statements at this time" << endl << endl; - break; - } - - cout << "Start Time Time (hh:mm:ss) Session ID SQL Statement" << endl; - cout << "---------------- ---------------- -------------------- ------------------------------------------------------------" << endl; - - for ( unsigned int i = 0 ; i < activesqlstatements.size(); i++) - { - struct tm tmStartTime; - char timeBuf[36]; - time_t startTime = activesqlstatements[i].starttime; - localtime_r(&startTime, &tmStartTime); - (void)strftime(timeBuf, 36, "%b %d %H:%M:%S", &tmStartTime); - - cout.setf(ios::left); - cout.width(21); - cout << timeBuf; - - //get current time in Epoch - time_t cal; - time (&cal); - - int runTime = cal - activesqlstatements[i].starttime; - int runHours = runTime / 3600; - int runMinutes = (runTime - (runHours * 3600)) / 60; - int runSeconds = runTime - (runHours * 3600) - (runMinutes * 60); - - cout.width(15); - string hours = oam.itoa(runHours); - string minutes = oam.itoa(runMinutes); - string seconds = oam.itoa(runSeconds); - - string run; - - if ( hours.size() == 1 ) - run = "0" + hours + ":"; - else - run = hours + ":"; - - if ( minutes.size() == 1 ) - run = run + "0" + minutes + ":"; - else - run = run + minutes + ":"; - - if ( seconds.size() == 1 ) - run = run + "0" + seconds; - else - run = run + seconds; - - cout << run; - - cout.width(23); - cout << activesqlstatements[i].sessionid; - - string SQLStatement = activesqlstatements[i].sqlstatement; - int pos = 0; - - for ( ;; ) - { - string printSQL = SQLStatement.substr(pos, 60); - pos = pos + 60; - cout << printSQL << endl; - - if ( printSQL.size() < 60 ) - break; - - cout.width(59); - cout << " "; - } - - cout << endl; - } - - } - catch (exception& e) - { - cout << endl << "Failed to get List of Active SQL Statements: " << e.what() << endl << endl; - } - catch (...) - { - cout << endl << "**** getActiveSQLStatements Failed : Failed to get List of Active SQL Statements" << endl << endl; - break; - } - - break; - } - - case 65: // alterSystem-disableModule - { - - string DataRedundancyConfig = "n"; - - try - { - oam.getSystemConfig( "DataRedundancyConfig", DataRedundancyConfig); - } - catch (...) - {} - - if ( SingleServerInstall == "y" ) - { - // exit out since not on single-server install - cout << endl << "**** alterSystem-disableModule Failed : not support on a Single-Server type installs " << endl; - break; - } - - parentOAMModule = getParentOAMModule(); - - if ( localModule != parentOAMModule ) - { - //exit out since not on Parent OAM Module - cout << endl << "**** alterSystem-disableModule Failed : only should be run on the Parent OAM Module, which is '" << parentOAMModule << "'" << endl; - break; - } - - if (arguments[1] == "") - { - // need arguments - cout << endl << "**** alterSystem-disableModule Failed : Missing a required Parameter, enter 'help' for additional information" << endl; - break; - } - - parentOAMModule = getParentOAMModule(); - - if ( arguments[1] == parentOAMModule ) - { - // exit out since you can't manually remove OAM Parent Module - cout << endl << "**** alterSystem-disableModule Failed : can't manually disable the Active OAM Parent Module." << endl; - break; - } - - string moduleType = arguments[1].substr(0, MAX_MODULE_TYPE_SIZE); - - gracefulTemp = INSTALL; - - //display Primary UM Module - string PrimaryUMModuleName; - - try - { - oam.getSystemConfig("PrimaryUMModuleName", PrimaryUMModuleName); - } - catch (...) {} - - bool primUM = false; - - if ( PrimaryUMModuleName == arguments[1] ) - { - cout << endl << "This command stops the processing of applications on the Primary User Module, which is where DDL/DML are performed"; - - if (confirmPrompt("If there is another module that can be changed to a new Primary User Module, this will be done")) - break; - - primUM = true; - } - else - { - // confirm request - if ( arguments[2] != "y" ) - { - if (confirmPrompt("This command stops the processing of applications on a Module within the MariaDB ColumnStore System")) - break; - } - } - - //parse module names - DeviceNetworkConfig devicenetworkconfig; - DeviceNetworkList devicenetworklist; - - boost::char_separator sep(", "); - boost::tokenizer< boost::char_separator > tokens(arguments[1], sep); - - for ( boost::tokenizer< boost::char_separator >::iterator it = tokens.begin(); - it != tokens.end(); - ++it) - { - devicenetworkconfig.DeviceName = *it; - devicenetworklist.push_back(devicenetworkconfig); - } - - DeviceNetworkList::iterator pt = devicenetworklist.begin(); - DeviceNetworkList::iterator endpt = devicenetworklist.end(); - - bool quit = false; - - // check for module status and if any dbroots still assigned - if ( moduleType == "pm" ) - { - for ( ; pt != endpt ; pt++) - { - // check for dbroots assigned - DBRootConfigList dbrootConfigList; - string moduleID = (*pt).DeviceName.substr(MAX_MODULE_TYPE_SIZE, MAX_MODULE_ID_SIZE); - - try - { - oam.getPmDbrootConfig(atoi(moduleID.c_str()), dbrootConfigList); - } - catch (...) - {} - - if ( !dbrootConfigList.empty() && DataRedundancyConfig == "n") - { - cout << endl << "**** alterSystem-disableModule Failed : " << (*pt).DeviceName << " has dbroots still assigned and will not be disabled. Please run movePmDbrootConfig or unassignDbrootPmConfig."; - quit = true; - cout << endl; - break; - } - else if (!dbrootConfigList.empty() && DataRedundancyConfig == "y") - { - //check if dbroot requested to be removed is empty and dboot #1 is requested to be removed - DBRootConfigList::iterator pt = dbrootConfigList.begin(); - - for ( ; pt != dbrootConfigList.end() ; pt++) - { - int dbrootID = *pt; - - //check if dbroot is empty - bool isEmpty = false; - string errMsg; - - try - { - BRM::DBRM dbrm; - - if ( dbrm.isDBRootEmpty(dbrootID, isEmpty, errMsg) != 0) - { - cout << endl << "**** alterSystem-disableModule Failed : Data Redundancy detected DBRoots must be empty to be disabled. Remove data from DBRoot #" << oam.itoa(dbrootID) << " to continue." << endl; - cout << "ERROR: isDBRootEmpty API error, dbroot #" << oam.itoa(dbrootID) << " :" << errMsg << endl; - quit = true; - } - } - catch (...) - {} - } - } - } - - if (quit) - { - cout << endl; - break; - } - } - - if ( devicenetworklist.empty() ) - { - cout << endl << "quiting, no modules to remove." << endl << endl; - break; - } - - // stop module - try - { - cout << endl << " Stopping Modules" << endl; - oam.stopModule(devicenetworklist, gracefulTemp, ackTemp); - cout << " Successful stop of Modules " << endl; - } - catch (exception& e) - { - - string Failed = e.what(); - - if (Failed.find("Disabled") != string::npos) - cout << endl << " Successful stop of Modules " << endl; - else - { - cout << endl << "**** stopModule Failed : " << e.what() << endl; - break; - } - } - - // disable module - try - { - cout << endl << " Disabling Modules" << endl; - oam.disableModule(devicenetworklist); - cout << " Successful disable of Modules " << endl; - - //display Primary UM Module - string PrimaryUMModuleName; - - try - { - oam.getSystemConfig("PrimaryUMModuleName", PrimaryUMModuleName); - } - catch (...) {} - - if ( primUM && - PrimaryUMModuleName != arguments[1] ) - cout << endl << " New Primary User Module = " << PrimaryUMModuleName << endl; - - } - catch (exception& e) - { - cout << endl << "**** disableModule Failed : " << e.what() << endl; - break; - } - - cout << endl; - break; - } - - case 66: // alterSystem-enableModule - { - if ( SingleServerInstall == "y" ) - { - // exit out since not on single-server install - cout << endl << "**** alterSystem-enableModule Failed : not support on a Single-Server type installs " << endl; - break; - } - - parentOAMModule = getParentOAMModule(); - - if ( localModule != parentOAMModule ) - { - //exit out since not on Parent OAM Module - cout << endl << "**** alterSystem-enableModule Failed : only should be run on the Parent OAM Module, which is '" << parentOAMModule << "'" << endl; - break; - } - - if (arguments[1] == "") - { - // need arguments - cout << endl << "**** alterSystem-enableModule Failed : Missing a required Parameter, enter 'help' for additional information" << endl; - break; - } - - string moduleType = arguments[1].substr(0, MAX_MODULE_TYPE_SIZE); - - ACK_FLAG ackTemp = ACK_YES; - - // confirm request - if ( arguments[2] != "y" ) - { - if (confirmPrompt("This command starts the processing of applications on a Module within the MariaDB ColumnStore System")) - break; - } - - //parse module names - DeviceNetworkConfig devicenetworkconfig; - DeviceNetworkList devicenetworklist; - boost::char_separator sep(", "); - boost::tokenizer< boost::char_separator > tokens(arguments[1], sep); - - for ( boost::tokenizer< boost::char_separator >::iterator it = tokens.begin(); - it != tokens.end(); - ++it) - { - devicenetworkconfig.DeviceName = *it; - devicenetworklist.push_back(devicenetworkconfig); - } - - //get the system status, enable modules and startmodules if system is ACTIVE - SystemStatus systemstatus; - - try - { - oam.getSystemStatus(systemstatus); - - // enable module - try - { - cout << endl << " Enabling Modules " << endl; - oam.enableModule(devicenetworklist); - cout << " Successful enable of Modules " << endl; - } - catch (exception& e) - { - cout << endl << "**** enableModule Failed : " << e.what() << endl; - break; - } - - if ( moduleType == "pm" ) - { - cout << endl << " Performance Module(s) Enabled, run movePmDbrootConfig or assignDbrootPmConfig to assign dbroots, if needed" << endl << endl; - break; - } - else - { - if (systemstatus.SystemOpState == oam::ACTIVE ) - { - try - { - cout << endl << " Restarting System " << endl; - gracefulTemp = oam::FORCEFUL; - int returnStatus = oam.restartSystem(gracefulTemp, ackTemp); - switch (returnStatus) - { - case API_SUCCESS: - if ( waitForActive() ) - cout << endl << " Successful restart of System " << endl << endl; - else - cout << endl << "**** restartSystem Failed : check log files" << endl; - break; - case API_CANCELLED: - cout << endl << " Restart of System canceled" << endl << endl; - break; - default: - cout << endl << "**** restartSystem Failed : Check system logs" << endl; - break; - } - } - catch (exception& e) - { - cout << endl << "**** restartSystem Failed : " << e.what() << endl; - break; - } - } - else - cout << endl << " System not Active, run 'startSystem' to start system if needed" << endl; - } - } - catch (exception& e) - { - cout << endl << "**** alterSystem-enableModule Failed : " << e.what() << endl; - break; - } - catch (...) - { - cout << endl << "**** alterSystem-enableModule Failed, Failed return from getSystemStatus API" << endl; - break; - } - - cout << endl; - - break; - } - - case 67: // stopModule parameters moduleID - { - - if (arguments[1] == "") - { - // need arguments - cout << endl << "**** stopModule Failed : Missing a required Parameter, enter 'help' for additional information" << endl; - break; - } - - string moduleType = arguments[1].substr(0, MAX_MODULE_TYPE_SIZE); - - //gracefulTemp = INSTALL; - - // confirm request - if ( arguments[2] != "y" ) - { - if (confirmPrompt("This command stops the processing of applications on a Module within the MariaDB ColumnStore System")) - break; - } - - //parse module names - DeviceNetworkConfig devicenetworkconfig; - DeviceNetworkList devicenetworklist; - - boost::char_separator sep(", "); - boost::tokenizer< boost::char_separator > tokens(arguments[1], sep); - - for ( boost::tokenizer< boost::char_separator >::iterator it = tokens.begin(); - it != tokens.end(); - ++it) - { - devicenetworkconfig.DeviceName = *it; - devicenetworklist.push_back(devicenetworkconfig); - } - - if ( devicenetworklist.empty() ) - { - cout << endl << "No modules to stop." << endl << endl; - break; - } - - // stop module - try - { - cout << endl << " Stopping Module(s)" << endl; - oam.stopModule(devicenetworklist, gracefulTemp, ackTemp); - cout << " Successful stop of Module(s) " << endl; - } - catch (exception& e) - { - - string Failed = e.what(); - - if (Failed.find("Disabled") != string::npos) - cout << endl << " Successful stop of Module(s) " << endl; - else - { - cout << endl << "**** stopModule Failed : " << e.what() << endl; - break; - } - } - - break; - } - - - default: - { - cout << arguments[0] << ": Unknown Command, type help for list of commands" << endl << endl; - return 1; - } - } - - return 0; -} - -/****************************************************************************************** - * @brief ProcessSupportCommand - * - * purpose: Process Support commands - * - ******************************************************************************************/ -int ProcessSupportCommand(int CommandID, std::string arguments[]) -{ - Oam oam; - GRACEFUL_FLAG gracefulTemp = GRACEFUL; - ACK_FLAG ackTemp = ACK_YES; - CC_SUSPEND_ANSWER suspendAnswer = WAIT; - bool bNeedsConfirm = true; - string cmd; - - switch ( CommandID ) - { - case 0: // helpsupport - { - // display commands in the Support Command list - cout << endl << "List of Support commands" << endl << endl; - - for (int i = 1;; i++) - { - if (supportCmds[i] == "") - // end of list - break; - - cout << " " << supportCmds[i] << endl; - } - - cout << endl; - } - break; - - case 1: // stopprocess - parameters: Process-name, Module-name, Graceful flag, Ack flag - { - if (arguments[2] == "") - { - // need arguments - cout << endl << "**** stopprocess Failed : Missing a required Parameter, Enter Process and Module names" << endl; - break; - } - - // don't allow stopping of Process-Monitor - if ( arguments[1] == "ProcessMonitor" ) - { - cout << "ProcessMonitor is controlled by 'init' and can not be stopped" << endl; - break; - } - else - { - // give warning for Process-Monitor - if ( arguments[1] == "ProcessManager" ) - { - if (confirmPrompt("ProcessManager is the Interface for the Console and should only be removed as part of a MariaDB ColumnStore Package installation")) - break; - } - else - { - if ( arguments[3] != "y" ) - { - getFlags(arguments, gracefulTemp, ackTemp, suspendAnswer, bNeedsConfirm); - - // confirm request - if (confirmPrompt("This command stops the processing of an application on a Module within the MariaDB ColumnStore System")) - break; - } - } - } - - try - { - oam.stopProcess(arguments[2], arguments[1], gracefulTemp, ackTemp); - cout << endl << " Successful stop of Process " << arguments[1] << endl << endl; - } - catch (exception& e) - { - cout << endl << "**** stopprocess Failed : " << e.what() << endl; - } - } - break; - - case 2: // startprocess - parameters: Process-name, Module-name, Graceful flag, Ack flag - { - if (arguments[2] == "") - { - // need arguments - cout << endl << "**** startprocess Failed : Missing a required Parameter, Enter Process and Module names" << endl; - break; - } - - getFlags(arguments, gracefulTemp, ackTemp, suspendAnswer, bNeedsConfirm); - - try - { - oam.startProcess(arguments[2], arguments[1], gracefulTemp, ackTemp); - cout << endl << " Successful start of Process " << arguments[1] << endl << endl; - } - catch (exception& e) - { - cout << endl << "**** startprocess Failed : " << e.what() << endl; - } - } - break; - - case 3: // restartprocess - parameters: Process-name, Module-name, Graceful flag, Ack flag - { - if (arguments[2] == "") - { - // need arguments - cout << endl << "**** restartprocess Failed : Missing a required Parameter, Enter Process and Module names" << endl; - break; - } - - getFlags(arguments, gracefulTemp, ackTemp, suspendAnswer, bNeedsConfirm); - - if (arguments[3] != "y") - { - // confirm request - if (confirmPrompt("This command restarts the processing of an application on a Module within the MariaDB ColumnStore System")) - break; - } - - - try - { - oam.restartProcess(arguments[2], arguments[1], gracefulTemp, ackTemp); - cout << endl << " Successful restart of Process " << arguments[1] << endl << endl; - } - catch (exception& e) - { - cout << endl << "**** restartprocess Failed : " << e.what() << endl; - } - } - break; - - case 4: // killpid - { - if (arguments[1] == "" || arguments[2] != "") - { - // need arguments - cout << endl << "**** killpid Failed : Invalid or Missing Parameter, Enter local Process-ID" << endl; - break; - } - - pid_t PID = atoi(arguments[1].c_str()); - - if ( PID <= 0 ) - { - cout << endl << "**** killpid Failed : Invalid Process-ID Entered" << endl; - break; - } - - int status = kill( PID, SIGTERM); - - if ( status != API_SUCCESS) - cout << endl << " Failure in kill of Process-ID " << arguments[1] << ", Failed: " << errno << endl << endl; - else - cout << endl << " Successful kill of Process-ID " << arguments[1] << endl << endl; - } - break; - - case 5: // rebootsystem - parameters: password - { - if ( !rootUser) - { - cout << endl << "**** rebootsystem Failed : command not available when running as non-root user" << endl; - break; - } - - parentOAMModule = getParentOAMModule(); - - if ( localModule != parentOAMModule ) - { - // exit out since not on Parent OAM Module - cout << endl << "**** rebootsystem Failed : only should be run on the Parent OAM Module, which is '" << parentOAMModule << "'" << endl; - break; - } - - if (arguments[1] == "" || arguments[1] == "y") - { - // need arguments - cout << endl << "**** rebootsystem Failed : Invalid or Missing Parameter, Provide root-password" << endl; - break; - } - - string password = arguments[1]; - - if ( arguments[2] != "y") - { - cout << endl << "!!!!! DESTRUCTIVE COMMAND !!!!!" << endl; - string warning = "This command stops the Processing of applications and reboots all modules within the MariaDB ColumnStore System"; - - // confirm request - if (confirmPrompt(warning)) - break; - } - - cout << endl << " Stop System being performed, please wait..." << endl; - - try - { - cout << endl << " System being stopped, please wait... " << endl; - oam.stopSystem(GRACEFUL, ACK_YES); - - if ( waitForStop() ) - cout << endl << " Successful stop of System " << endl << endl; - else - cout << endl << "**** stopSystem Failed : check log files" << endl; - } - catch (exception& e) - { - cout << endl << "**** stopSystem Failed : " << e.what() << endl; - string warning = "stopSystem command failed,"; - - // confirm request - if (confirmPrompt(warning)) - break; - } - - SystemModuleTypeConfig systemmoduletypeconfig; - systemmoduletypeconfig.moduletypeconfig.clear(); - - try - { - oam.getSystemConfig(systemmoduletypeconfig); - - bool FAILED = false; - - for ( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++) - { - if ( systemmoduletypeconfig.moduletypeconfig[i].ModuleType.empty() ) - // end of list - break; - - int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount; - string moduletype = systemmoduletypeconfig.moduletypeconfig[i].ModuleType; - - if ( moduleCount > 0 ) - { - DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end() ; pt++) - { - string modulename = (*pt).DeviceName; - - if (modulename == parentOAMModule ) - { - //do me last - continue; - } - - //skip modules in MAN_DISABLED state - try - { - int opState; - bool degraded; - oam.getModuleStatus(modulename, opState, degraded); - - if (opState == oam::MAN_DISABLED ) - //skip - continue; - } - catch (exception& ex) - {} - - //run remote command script - HostConfigList::iterator pt1 = (*pt).hostConfigList.begin(); - cmd = "remote_command.sh " + (*pt1).IPAddr + " " + password + " reboot " ; - int rtnCode = system(cmd.c_str()); - - if (WEXITSTATUS(rtnCode) != 0) - { - cout << "Failed with running remote_command.sh" << endl; - FAILED = true; - } - else - cout << endl << " Successful reboot request of Module " << modulename << endl; - } - } - } - - if ( FAILED ) - break; - - //reboot local module - int rtnCode = system("reboot"); - - if (WEXITSTATUS(rtnCode) != 0) - cout << "Failed rebooting local module" << endl; - else - { - cout << endl << " Successful reboot request of local Module" << endl; - // close the log file - writeLog("End of a command session!!!"); - logFile.close(); - cout << endl << "Exiting the MariaDB ColumnStore Command Console" << endl; - exit (0); - } - } - catch (...) - { - cout << endl << "**** rebootsystem Failed : Failed on getSystemConfig API" << endl; - break; - } - } - break; - - case 6: // rebootnode - parameters: module-name password - { - if ( !rootUser) - { - cout << endl << "**** rebootnode Failed : command not available when running as non-root user" << endl; - break; - } - - if (arguments[1] == "" || arguments[2] == "") - { - // need arguments - cout << endl << "**** rebootnode Failed : Invalid or Missing Parameter, Enter module-name and root-password" << endl; - break; - } - - string inputModuleName = arguments[1]; - string password = arguments[2]; - - if ( arguments[3] != "y") - { - cout << endl << "!!!!! DESTRUCTIVE COMMAND !!!!!" << endl; - string warning = "This command reboots a node within the MariaDB ColumnStore System"; - - // confirm request - if (confirmPrompt(warning)) - break; - } - - SystemModuleTypeConfig systemmoduletypeconfig; - systemmoduletypeconfig.moduletypeconfig.clear(); - - try - { - oam.getSystemConfig(systemmoduletypeconfig); - unsigned int i = 0; - - for ( ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++) - { - if ( systemmoduletypeconfig.moduletypeconfig[i].ModuleType.empty() ) - { - // end of list - break; - } - - int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount; - string moduletype = systemmoduletypeconfig.moduletypeconfig[i].ModuleType; - - if ( moduleCount > 0 ) - { - DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end() ; pt++) - { - string modulename = (*pt).DeviceName; - - if (inputModuleName == modulename ) - { - if (inputModuleName == localModule ) - { - //reboot local module - int rtnCode = system("reboot"); - - if (WEXITSTATUS(rtnCode) != 0) - cout << "Failed rebooting local node" << endl; - else - { - cout << endl << " Successful reboot request of Node " << modulename << endl; - // close the log file - writeLog("End of a command session!!!"); - logFile.close(); - cout << endl << "Exiting the MariaDB ColumnStore Command Console" << endl; - exit (0); - } - } - else - { - HostConfigList::iterator pt1 = (*pt).hostConfigList.begin(); - string ipAddr = (*pt1).IPAddr; - //run remote command script - cmd = "remote_command.sh " + ipAddr + " " + password + " reboot " ; - int rtnCode = system(cmd.c_str()); - - if (WEXITSTATUS(rtnCode) != 0) - cout << "Failed with running remote_command.sh" << endl; - else - cout << endl << " Successful reboot request of Node " << modulename << endl; - - return (0); - } - } - } - } - } - } - catch (...) - { - cout << endl << "**** rebootnode Failed : Failed on getSystemConfig API" << endl; - break; - } - } - break; - - case 7: // stopdbrmprocess - { - if ( arguments[1] != "y" ) - { - // confirm request - if (confirmPrompt("This command stops the dbrm processes within the MariaDB ColumnStore System")) - break; - } - - try - { - oam.stopProcessType("DBRM"); - cout << endl << " Successful stop of DBRM Processes" << endl << endl; - } - catch (exception& e) - { - cout << endl << "**** stopdbrmprocess Failed : " << e.what() << endl; - } - } - break; - - case 8: // startdbrmprocess - { - try - { - oam.startProcessType("DBRM"); - cout << endl << " Successful Start of DBRM Processes" << endl << endl; - } - catch (exception& e) - { - cout << endl << "**** startdbrmprocess Failed : " << e.what() << endl; - } - } - break; - - case 9: // restartdbrmprocess - { - if ( arguments[1] != "y" ) - { - // confirm request - if (confirmPrompt("This command restarts the dbrm processes within the MariaDB ColumnStore System")) - break; - } - - try - { - oam.restartProcessType("DBRM"); - cout << endl << " Successful Restart of DBRM Processes" << endl << endl; - } - catch (exception& e) - { - cout << endl << "**** restartdbrmprocess Failed : " << e.what() << endl; - } - } - break; - - case 10: // setsystemstartupstate - { - Config* sysConfig = Config::makeConfig(); - - parentOAMModule = getParentOAMModule(); - - if ( localModule != parentOAMModule ) - { - // exit out since not on Parent OAM Module - cout << endl << "**** setsystemstartupstate Failed : only should be run on the Parent OAM Module, which is '" << parentOAMModule << "'" << endl; - break; - } - - string systemStartupOffline; - - try - { - systemStartupOffline = sysConfig->getConfig("Installation", "SystemStartupOffline"); - cout << "SystemStartupOffline currently set to '" + systemStartupOffline + "'" << endl; - } - catch (...) - { - cout << "ERROR: Problem getting systemStartupOffline from the MariaDB ColumnStore System Configuration file" << endl; - return 1; - } - - while (true) - { - char* pcommand = 0; - string prompt; - string temp = "cancel"; - prompt = "Set system startup state to offline: (y,n,cancel) [cancel]: "; - pcommand = readline(prompt.c_str()); - - if (pcommand) - { - if (strlen(pcommand) > 0) temp = pcommand; - - free(pcommand); - pcommand = 0; - } - - if ( temp == "cancel" ) - return 0; - - if ( temp == "n" || temp == "y") - { - systemStartupOffline = temp; - break; - } - - cout << "Invalid Option, please re-enter" << endl; - } - - try - { - sysConfig->setConfig("Installation", "SystemStartupOffline", systemStartupOffline); - sysConfig->write(); - } - catch (...) - { - cout << "ERROR: Problem setting systemStartupOffline in the MariaDB ColumnStore System Configuration file" << endl; - exit(-1); - } - - cout << endl << " Successful setting of systemStartupOffline to '" << systemStartupOffline << "'" << endl << endl; - } - break; - - case 11: // stopPrimProcs - { - if ( arguments[1] != "y" ) - { - // confirm request - if (confirmPrompt("This command stops the PrimProc processes within the MariaDB ColumnStore System")) - break; - } - - try - { - oam.stopProcessType("PrimProc"); - cout << endl << " Successful stop of PrimProc Processes" << endl << endl; - } - catch (exception& e) - { - cout << endl << "**** stopPrimProcs Failed : " << e.what() << endl; - } - } - break; - - case 12: // startPrimProcs - { - try - { - oam.startProcessType("PrimProc"); - cout << endl << " Successful Start of PrimProc Processes" << endl << endl; - } - catch (exception& e) - { - cout << endl << "**** startPrimProcs Failed : " << e.what() << endl; - } - } - break; - - case 13: // restartPrimProcs - { - if ( arguments[1] != "y" ) - { - // confirm request - if (confirmPrompt("This command restarts the PrimProc processes within the MariaDB ColumnStore System")) - break; - } - - try - { - oam.restartProcessType("PrimProc"); - cout << endl << " Successful Restart of PrimProc Processes" << endl << endl; - } - catch (exception& e) - { - cout << endl << "**** restartPrimProcs Failed : " << e.what() << endl; - } - } - break; - - case 14: // stopExeMgrs - { - if ( arguments[1] != "y" ) - { - // confirm request - if (confirmPrompt("This command stops the ExeMgr processes within the MariaDB ColumnStore System")) - break; - } - - try - { - oam.stopProcessType("ExeMgr"); - cout << endl << " Successful stop of ExeMgr Processes" << endl << endl; - } - catch (exception& e) - { - cout << endl << "**** stopExeMgrs Failed : " << e.what() << endl; - } - } - break; - - case 15: // startExeMgrs - { - try - { - oam.startProcessType("ExeMgr"); - cout << endl << " Successful Start of ExeMgr Processes" << endl << endl; - } - catch (exception& e) - { - cout << endl << "**** startExeMgrs Failed : " << e.what() << endl; - } - } - break; - - case 16: // restartExeMgrs - { - if ( arguments[1] != "y" ) - { - // confirm request - if (confirmPrompt("This command restarts the ExeMgr processes within the MariaDB ColumnStore System")) - break; - } - - try - { - oam.restartProcessType("ExeMgr"); - cout << endl << " Successful Restart of ExeMgr Processes" << endl << endl; - } - catch (exception& e) - { - cout << endl << "**** restartExeMgrs Failed : " << e.what() << endl; - } - } - break; - - case 17: // getProcessStatusStandby - parameters: NONE - { - printProcessStatus("ProcStatusControlStandby"); - } - break; - - case 18: // distributeconfigfile - parameters: option, moduleName - { - string name = "system"; - - if ( arguments[1] != "" ) - name = arguments[1]; - - try - { - oam.distributeConfigFile(name); - cout << endl << " Successful Distribution of MariaDB ColumnStore Config File" << endl << endl; - } - catch (exception& e) - { - cout << endl << "**** Distribution of MariaDB ColumnStore Config File Failed : " << e.what() << endl; - } - } - break; - - case 19: // getPmDbrootConfig - paramaters: pm id - { - string pmID; - - if (arguments[1] == "") - { - cout << endl; - string prompt = "Enter the Performance Module ID"; - pmID = dataPrompt(prompt); - } - else - pmID = arguments[1]; - - try - { - DBRootConfigList dbrootConfigList; - oam.getPmDbrootConfig(atoi(pmID.c_str()), dbrootConfigList); - - cout << "DBRoot IDs assigned to 'pm" + pmID + "' = "; - - DBRootConfigList::iterator pt = dbrootConfigList.begin(); - - for ( ; pt != dbrootConfigList.end() ;) - { - cout << oam.itoa(*pt); - pt++; - - if (pt != dbrootConfigList.end()) - cout << ", "; - } - - cout << endl; - } - catch (exception& e) - { - cout << endl << "**** getPmDbrootConfig Failed : " << e.what() << endl; - } - } - break; - - case 20: // getDbrootPmConfig - parameters dbroot id - { - string dbrootID; - - if (arguments[1] == "") - { - cout << endl; - string prompt = "Enter the DBRoot ID"; - dbrootID = dataPrompt(prompt); - } - else - dbrootID = arguments[1]; - - try - { - int pmID; - oam.getDbrootPmConfig(atoi(dbrootID.c_str()), pmID); - - cout << endl << " DBRoot ID " << dbrootID << " is assigned to 'pm" << pmID << "'" << endl; - } - catch (exception& e) - { - cout << endl << "**** getDbrootPmConfig Failed : " << e.what() << endl; - } - } - break; - - case 21: // getSystemDbrootConfig - { - cout << endl << "System DBroot Configuration" << endl << endl; - - try - { - DBRootConfigList dbrootConfigList; - oam.getSystemDbrootConfig(dbrootConfigList); - - cout << "System DBRoot IDs = "; - DBRootConfigList::iterator pt = dbrootConfigList.begin(); - - for ( ; pt != dbrootConfigList.end() ;) - { - cout << oam.itoa(*pt); - pt++; - - if (pt != dbrootConfigList.end()) - cout << ", "; - } - - cout << endl; - } - catch (exception& e) - { - cout << endl << "**** getSystemDbrootConfig Failed : " << e.what() << endl; - } - } - break; - - case 22: // checkDBFunctional - { - try - { - oam.checkDBFunctional(false); - cout << endl << " checkDBFunctional Successful" << endl << endl; - } - catch (exception& e) - { - cout << endl << "**** checkDBFunctional Failed : " << e.what() << endl; - cout << endl << " can check UM " + tmpDir + "/dbfunctional.log for possible additional information" << endl << endl; - } - catch (...) - { - cout << endl << " checkDBFunctional Failed: check UM " + tmpDir + "/dbfunctional.log" << endl << endl; - } - } - break; - - case 23: // getsystemreadflags - { - - cout << " Print the DB System Flags. 1 = set and ready, 0 = clear and not ready" << endl; - - BRM::DBRM dbrm; - - cout << endl; - - try - { - cout << "getSystemQueryReady = " << dbrm.getSystemQueryReady() << endl; - } - catch (...) - {} - - try - { - cout << "getSystemReady = " << dbrm.getSystemReady() << endl; - } - catch (...) - {} - - try - { - cout << "getSystemSuspended = " << dbrm.getSystemSuspended() << endl; - } - catch (...) - {} - } - break; - - case 24: // setsystemqueryready - { - BRM::DBRM dbrm; - - string set = "0"; - - if (arguments[1] == "") - { - cout << endl; - string prompt = "Enter 1 for set and 0 for clear"; - set = dataPrompt(prompt); - } - else - set = arguments[1]; - - bool flag = true; - - if ( set == "0" ) - flag = false; - - cout << endl; - - try - { - cout << "getSystemQueryReady = " << dbrm.getSystemQueryReady() << endl; - } - catch (...) - {} - - cout << endl; - - try - { - dbrm.setSystemQueryReady(flag); - cout << "setSystemQueryReady = " << flag << endl; - } - catch (...) - {} - - cout << endl; - - try - { - cout << "getSystemQueryReady = " << dbrm.getSystemQueryReady() << endl; - } - catch (...) - {} - } - - default: // shouldn't get here, but... - return 1; - - } // end of switch - - return 0; -} - -/****************************************************************************************** - * @brief getFlags - * - * purpose: get and convert Graceful and Ack flags - * - ******************************************************************************************/ -void getFlags(const string* arguments, GRACEFUL_FLAG& gracefulTemp, ACK_FLAG& ackTemp, oam::CC_SUSPEND_ANSWER& suspendAnswer, bool& bNeedsConfirm, string* password) -{ - gracefulTemp = GRACEFUL; // default - ackTemp = ACK_YES; // default - suspendAnswer = CANCEL; - bNeedsConfirm = true; - - for ( int i = 1; i < ArgNum; i++) - { - if (strcasecmp(arguments[i].c_str(), "Y") == 0) - bNeedsConfirm = false; - else if (strcasecmp(arguments[i].c_str(), "N") == 0) - bNeedsConfirm = true; - else if (strcasecmp(arguments[i].c_str(), "GRACEFUL") == 0) - gracefulTemp = oam::GRACEFUL; - else if (strcasecmp(arguments[i].c_str(), "FORCEFUL") == 0) - gracefulTemp = FORCEFUL; - else if (strcasecmp(arguments[i].c_str(), "INSTALL") == 0) - gracefulTemp = INSTALL; - else if (strcasecmp(arguments[i].c_str(), "ACK_YES") == 0 || strcasecmp(arguments[i].c_str(), "YES_ACK") == 0) - ackTemp = ACK_YES; - else if (strcasecmp(arguments[i].c_str(), "ACK_NO") == 0 || strcasecmp(arguments[i].c_str(), "NO_ACK") == 0) - ackTemp = ACK_NO; - else if (strcasecmp(arguments[i].c_str(), "WAIT") == 0) - suspendAnswer = WAIT; - else if (strcasecmp(arguments[i].c_str(), "ROLLBACK") == 0) - suspendAnswer = ROLLBACK; - else if (strcasecmp(arguments[i].c_str(), "FORCE") == 0) - suspendAnswer = FORCE; - else if (password && arguments[i].length() > 0) - *password = arguments[i]; - } -} - - -/****************************************************************************************** - * @brief confirmPrompt - * - * purpose: Confirmation prompt - * - ******************************************************************************************/ -int confirmPrompt(std::string warningCommand) -{ - char* pcommand = 0; - char* p; - string argument = "n"; - - while (true) - { - // read input - if (warningCommand.size() > 0) - { - cout << endl << warningCommand << endl; - } - - pcommand = readline(" Do you want to proceed: (y or n) [n]: "); - - if (pcommand && *pcommand) - { - p = strtok(pcommand, " "); - argument = p; - free(pcommand); - pcommand = 0; - } - - if (pcommand) - { - free(pcommand); - pcommand = 0; - } - - // covert argument into lowercase - transform (argument.begin(), argument.end(), argument.begin(), to_lower()); - - if ( argument == "y") - return 0; - else if ( argument == "n") - return 1; - } -} - -/****************************************************************************************** - * @brief dataPrompt - * - * purpose: Prompt for additional data - * - ******************************************************************************************/ -std::string dataPrompt(std::string promptCommand) -{ - char data[CmdSize]; - char* pdata = data; - char* pd; - string argument; - - while (true) - { - // read input - cout << promptCommand << endl; - pdata = readline(" Please enter: "); - - if (!pdata) // user hit -D - pdata = strdup("exit"); - - else if (!*pdata) - // just an enter-key was entered, ignore and reprompt - continue; - - pd = pdata; - argument = pd; - - return argument; - } -} - - -/****************************************************************************************** - * @brief writeLog for command - * - * purpose: write command to the log file - * - ******************************************************************************************/ -void writeLog(string command) -{ - Oam oam; - - //filter off password on reboot commands - - logFile << oam.getCurrentTime() << ": " << command << endl; - logFile.flush(); - return; -} - -/****************************************************************************************** - * @brief printAlarmSummary - * - * purpose: get active alarms and produce a summary - * - ******************************************************************************************/ -void printAlarmSummary() -{ - AlarmList alarmList; - Oam oam; - - try - { - oam.getActiveAlarms(alarmList); - } - catch (...) - { - return; - } - - int critical = 0, major = 0, minor = 0, warning = 0, info = 0; - AlarmList :: iterator i; - - for (i = alarmList.begin(); i != alarmList.end(); ++i) - { - switch (i->second.getSeverity()) - { - case CRITICAL: - ++critical; - break; - - case MAJOR: - ++major; - break; - - case MINOR: - ++minor; - break; - - case WARNING: - ++warning; - break; - - case INFORMATIONAL: - ++info; - break; - } - } - - cout << endl << "Active Alarm Counts: "; - cout << "Critical = " << critical; - cout << ", Major = " << major; - cout << ", Minor = " << minor; - cout << ", Warning = " << warning; - cout << ", Info = " << info; - cout << endl; -} - -/****************************************************************************************** - * @brief printCriticalAlarms - * - * purpose: get active Critical alarms - * - ******************************************************************************************/ -void printCriticalAlarms() -{ - AlarmList alarmList; - Oam oam; - - try - { - oam.getActiveAlarms(alarmList); - } - catch (...) - { - return; - } - - cout << endl << "Critical Active Alarms:" << endl << endl; - - AlarmList :: iterator i; - - for (i = alarmList.begin(); i != alarmList.end(); ++i) - { - switch (i->second.getSeverity()) - { - case CRITICAL: - cout << "AlarmID = " << i->second.getAlarmID() << endl; - cout << "Brief Description = " << i->second.getDesc() << endl; - cout << "Alarm Severity = "; - cout << "CRITICAL" << endl; - cout << "Time Issued = " << i->second.getTimestamp() << endl; - cout << "Reporting Module = " << i->second.getSname() << endl; - cout << "Reporting Process = " << i->second.getPname() << endl; - cout << "Reported Device = " << i->second.getComponentID() << endl << endl; - break; - - case MAJOR: - case MINOR: - case WARNING: - case INFORMATIONAL: - break; - } - } -} - -/****************************************************************************************** - * @brief printSystemStatus - * - * purpose: get and Display System and Module Statuses - * - ******************************************************************************************/ -void printSystemStatus() -{ - SystemStatus systemstatus; - Oam oam; - BRM::DBRM dbrm(true); - - cout << endl << "System " << systemName << endl << endl; - cout << "System and Module statuses" << endl << endl; - cout << "Component Status Last Status Change" << endl; - cout << "------------ -------------------------- ------------------------" << endl; - - try - { - oam.getSystemStatus(systemstatus, false); - cout << "System "; - cout.setf(ios::left); - cout.width(29); - int state = systemstatus.SystemOpState; - string extraInfo = " "; - bool bRollback = false; - bool bForce = false; - - if (dbrm.isDBRMReady()) - { - if (dbrm.getSystemSuspended() > 0) - { - extraInfo = " WRITE SUSPENDED"; - } - else if (dbrm.getSystemSuspendPending(bRollback) > 0) - { - extraInfo = " WRITE SUSPEND PENDING"; - } - else if (dbrm.getSystemShutdownPending(bRollback, bForce) > 0) - { - extraInfo = " SHUTDOWN PENDING"; - } - } - - printState(state, extraInfo); - cout.width(24); - string stime = systemstatus.StateChangeDate; - stime = stime.substr (0, 24); - cout << stime << endl << endl; - - for ( unsigned int i = 0 ; i < systemstatus.systemmodulestatus.modulestatus.size(); i++) - { - if ( systemstatus.systemmodulestatus.modulestatus[i].Module.empty() ) - // end of list - break; - - cout << "Module "; - cout.setf(ios::left); - cout.width(7); - cout << systemstatus.systemmodulestatus.modulestatus[i].Module; - cout.width(29); - state = systemstatus.systemmodulestatus.modulestatus[i].ModuleOpState; - - // get NIC functional state (degraded or not) - bool degraded = false; - - try - { - int state; - oam.getModuleStatus(systemstatus.systemmodulestatus.modulestatus[i].Module, state, degraded); - } - catch (...) - {} - - string nicFun = " "; - - if (degraded) - nicFun = "/" + DEGRADEDSTATE; - - printState(state, nicFun); - - cout.width(24); - string stime = systemstatus.systemmodulestatus.modulestatus[i].StateChangeDate ; - stime = stime.substr (0, 24); - cout << stime << endl; - } - - cout << endl; - - if ( systemstatus.systemmodulestatus.modulestatus.size() > 1) - { - // get and display Parent OAM Module - cout << "Active Parent OAM Performance Module is '" << getParentOAMModule() << "'" << endl; - - //display Primary UM Module - string PrimaryUMModuleName; - - try - { - oam.getSystemConfig("PrimaryUMModuleName", PrimaryUMModuleName); - } - catch (...) {} - - if ( serverInstallType != oam::INSTALL_COMBINE_DM_UM_PM ) - { - ModuleTypeConfig moduletypeconfig; - - try - { - oam.getSystemConfig("um", moduletypeconfig); - } - catch (...) - {} - - if ( moduletypeconfig.ModuleCount > 1 ) - { - if ( PrimaryUMModuleName != oam::UnassignedName ) - cout << "Primary Front-End MariaDB ColumnStore Module is '" << PrimaryUMModuleName << "'" << endl; - } - } - else - { - if ( PrimaryUMModuleName != oam::UnassignedName ) - cout << "Primary Front-End MariaDB ColumnStore Module is '" << PrimaryUMModuleName << "'" << endl; - } - } - - //display local Query / PMwithUM feature, if enabled - string PMwithUM; - - try - { - oam.getSystemConfig("PMwithUM", PMwithUM); - } - catch (...) {} - - if ( PMwithUM == "y" ) - cout << "Local Query Feature is enabled" << endl; - - //display MySQL replication feature, if enabled - string MySQLRep; - - try - { - oam.getSystemConfig("MySQLRep", MySQLRep); - } - catch (...) {} - - if ( MySQLRep == "y" ) - cout << "MariaDB ColumnStore Replication Feature is enabled" << endl; - - } - catch (exception& e) - { - cout << endl << "**** printSystemStatus Failed = " << e.what() << endl; - throw runtime_error(""); - } -} - -/****************************************************************************************** - * @brief printProcessStatus - * - * purpose: get and Display Process Statuses - * - ******************************************************************************************/ -void printProcessStatus(std::string port) -{ - SystemProcessStatus systemprocessstatus; - ProcessStatus processstatus; - ModuleTypeConfig moduletypeconfig; - Oam oam; - BRM::DBRM dbrm(true); - - int state; - string extraInfo = " "; - bool bRollback = false; - bool bForce = false; - bool bSuspend = false; - - if (dbrm.isDBRMReady()) - { - if (dbrm.getSystemSuspended() > 0) - { - bSuspend = true; - extraInfo = "WRITE_SUSPEND"; - } - else if (dbrm.getSystemSuspendPending(bRollback) > 0) - { - bSuspend = true; - - if (bRollback) - { - extraInfo = "ROLLBACK"; - } - else - { - extraInfo = "SUSPEND_PENDING"; - } - } - else if (dbrm.getSystemShutdownPending(bRollback, bForce) > 0) - { - bSuspend = true; - - if (bRollback) - { - extraInfo = "ROLLBACK"; - } - else - { - extraInfo = "SHUTDOWN_PENDING"; - } - } - } - - cout << endl << "MariaDB ColumnStore Process statuses" << endl << endl; - cout << "Process Module Status Last Status Change Process ID" << endl; - cout << "------------------ ------ --------------- ------------------------ ----------" << endl; - - try - { - oam.getProcessStatus(systemprocessstatus, port); - - string prevModule = systemprocessstatus.processstatus[0].Module; - - for ( unsigned int i = 0 ; i < systemprocessstatus.processstatus.size(); i++) - { - if ( prevModule != systemprocessstatus.processstatus[i].Module) - cout << endl; //added a space line between different modules - - cout.setf(ios::left); - cout.width(20); - cout << systemprocessstatus.processstatus[i].ProcessName; - cout.width(10); - cout << systemprocessstatus.processstatus[i].Module; - cout.width(18); - state = systemprocessstatus.processstatus[i].ProcessOpState; - - // For these processes, if state is ACTIVE and we're in write - // suspend, then we want to display the extra data instead of state. - // Otherwise, we ignore extra data and display state. - if (state == ACTIVE && bSuspend && - ( systemprocessstatus.processstatus[i].ProcessName == "DMLProc" - || systemprocessstatus.processstatus[i].ProcessName == "DDLProc" - || systemprocessstatus.processstatus[i].ProcessName == "WriteEngineServer")) - { - printState(LEAVE_BLANK, extraInfo); - } - else - { - state = systemprocessstatus.processstatus[i].ProcessOpState; - printState(state, " "); - } - - cout.width(24); - string stime = systemprocessstatus.processstatus[i].StateChangeDate ; - stime = stime.substr (0, 24); - cout << stime; - - if ( state == COLD_STANDBY ) - { - cout << endl; - continue; - } - else - { - cout.setf(ios::right); - cout.width(12); - - if ( systemprocessstatus.processstatus[i].ProcessID != 0 ) - cout << systemprocessstatus.processstatus[i].ProcessID << endl; - else - cout << endl; - } - - cout.unsetf(ios::right); - - prevModule = systemprocessstatus.processstatus[i].Module; - - } - } - catch (exception& e) - { - cout << endl << "**** printProcessStatus Failed = " << e.what() << endl; - throw runtime_error(""); - } -} - -/****************************************************************************************** - * @brief printModuleCpuUsers - * - * purpose: get and Display Module TOP CPU users - * - ******************************************************************************************/ -void printModuleCpuUsers(TopProcessCpuUsers topprocesscpuusers) -{ - cout << "Module '" + topprocesscpuusers.ModuleName + "' Top CPU Users" << endl << endl; - cout << "Process CPU Usage %" << endl; - cout << "----------------- -----------" << endl; - - for ( unsigned int i = 0 ; i < topprocesscpuusers.processcpuuser.size(); i++) - { - cout.setf(ios::left); - cout.width(25); - cout << topprocesscpuusers.processcpuuser[i].ProcessName; - cout.width(10); - cout << topprocesscpuusers.processcpuuser[i].CpuUsage << endl; - } - - cout << endl; -} - -/****************************************************************************************** - * @brief printModuleCpu - * - * purpose: get and Display Module CPU Usage - * - ******************************************************************************************/ -void printModuleCpu(ModuleCpu modulecpu) -{ - Oam oam; - - cout << endl << "Module '" + modulecpu.ModuleName + "' CPU Usage % = " + oam.itoa(modulecpu.CpuUsage) << endl; -} - -/****************************************************************************************** - * @brief printModuleMemoryUsers - * - * purpose: get and Display Module TOP Memory users - * - ******************************************************************************************/ -void printModuleMemoryUsers(TopProcessMemoryUsers topprocessmemoryusers) -{ - cout << "Module '" + topprocessmemoryusers.ModuleName + "' Top Memory Users (in bytes)" << endl << endl; - cout << "Process Memory Used Memory Usage %" << endl; - cout << "----------------- ----------- --------------" << endl; - - for ( unsigned int i = 0 ; i < topprocessmemoryusers.processmemoryuser.size(); i++) - { - cout.setf(ios::left); - cout.width(20); - cout << topprocessmemoryusers.processmemoryuser[i].ProcessName; - cout.width(19); - cout << topprocessmemoryusers.processmemoryuser[i].MemoryUsed; - cout.width(3); - cout << topprocessmemoryusers.processmemoryuser[i].MemoryUsage << endl; - } - - cout << endl; -} - -/****************************************************************************************** - * @brief printModuleMemory - * - * purpose: get and Display Module Memory Usage - * - ******************************************************************************************/ -void printModuleMemory(ModuleMemory modulememory) -{ - Oam oam; - cout.setf(ios::left); - cout.width(8); - cout << modulememory.ModuleName; - cout.width(11); - cout << oam.itoa(modulememory.MemoryTotal); - cout.width(10); - cout << oam.itoa(modulememory.MemoryUsed); - cout.width(13); - cout << oam.itoa(modulememory.cache); - cout.width(9); - cout << oam.itoa(modulememory.MemoryUsage); - cout.width(12); - cout << oam.itoa(modulememory.SwapTotal); - cout.width(16); - cout << oam.itoa(modulememory.SwapUsed); - cout.width(7); - cout << oam.itoa(modulememory.SwapUsage); - cout << endl; -} - -/****************************************************************************************** - * @brief printModuleDisk - * - * purpose: get and Display Module disk usage - * - ******************************************************************************************/ -void printModuleDisk(ModuleDisk moduledisk) -{ - Oam oam; - - cout << "Module '" + moduledisk.ModuleName + "' Disk Usage (in 1K blocks)" << endl << endl; - cout << "Mount Point Total Blocks Used Blocks Usage %" << endl; - cout << "----------------------------- ------------ ------------ -------" << endl; - - string etcdir = std::string(MCSSYSCONFDIR) + "/columnstore"; - - for ( unsigned int i = 0 ; i < moduledisk.diskusage.size(); i++) - { - //skip mounts to other server disk - if ( moduledisk.diskusage[i].DeviceName.find("/mnt", 0) == string::npos && - moduledisk.diskusage[i].DeviceName.find(etcdir, 0) == string::npos ) - { - cout.setf(ios::left); - cout.width(31); - - if (moduledisk.diskusage[i].DeviceName.length() > 29) - { - cout << "..." + moduledisk.diskusage[i].DeviceName.substr(moduledisk.diskusage[i].DeviceName.length() - 26); - } - else - { - cout << moduledisk.diskusage[i].DeviceName; - } - - cout.width(14); - cout << moduledisk.diskusage[i].TotalBlocks; - cout.width(17); - cout << moduledisk.diskusage[i].UsedBlocks; - cout.width(2); - cout << moduledisk.diskusage[i].DiskUsage << endl; - } - } - - cout << endl; -} - -/****************************************************************************************** - * @brief printModuleResources - * - * purpose: get and Display Module resource usage - * - ******************************************************************************************/ -void printModuleResources(TopProcessCpuUsers topprocesscpuusers, ModuleCpu modulecpu, TopProcessMemoryUsers topprocessmemoryusers, ModuleMemory modulememory, ModuleDisk moduledisk) -{ - Oam oam; - string etcdir = std::string(MCSSYSCONFDIR) + "/columnstore"; - - cout << endl << "Module '" + topprocesscpuusers.ModuleName + "' Resource Usage" << endl << endl; - - cout << "CPU: " + oam.itoa(modulecpu.CpuUsage) << "% Usage" << endl; - - cout << "Mem: " << oam.itoa(modulememory.MemoryTotal) << "k total, " << oam.itoa(modulememory.MemoryUsed); - cout << "k used, " << oam.itoa(modulememory.cache) << "k cache, " << oam.itoa(modulememory.MemoryUsage) << "% Usage" << endl; - cout << "Swap: " << oam.itoa(modulememory.SwapTotal) << " k total, " << oam.itoa(modulememory.SwapUsed); - cout << "k used, " << oam.itoa(modulememory.SwapUsage) << "% Usage" << endl; - - cout << "Top CPU Process Users: "; - - for ( unsigned int i = 0 ; i < topprocesscpuusers.processcpuuser.size(); i++) - { - cout << topprocesscpuusers.processcpuuser[i].ProcessName << " "; - cout << topprocesscpuusers.processcpuuser[i].CpuUsage; - - if ( i + 1 != topprocesscpuusers.processcpuuser.size() ) - cout << "%, "; - else - cout << "%"; - } - - cout << endl; - - cout << "Top Memory Process Users: "; - - for ( unsigned int i = 0 ; i < topprocessmemoryusers.processmemoryuser.size(); i++) - { - cout << topprocessmemoryusers.processmemoryuser[i].ProcessName << " "; - cout << topprocessmemoryusers.processmemoryuser[i].MemoryUsage; - - if ( i + 1 != topprocessmemoryusers.processmemoryuser.size() ) - cout << "%, "; - else - cout << "%"; - } - - cout << endl; - - cout << "Disk Usage: "; - - for ( unsigned int i = 0 ; i < moduledisk.diskusage.size(); i++) - { - //skip mounts to other server disk - if ( moduledisk.diskusage[i].DeviceName.find("/mnt", 0) == string::npos && - moduledisk.diskusage[i].DeviceName.find(etcdir, 0) == string::npos ) - { - cout << moduledisk.diskusage[i].DeviceName << " "; - cout << moduledisk.diskusage[i].DiskUsage; - - if ( i + 1 != moduledisk.diskusage.size() ) - cout << "%, "; - else - cout << "%"; - } - } - - cout << endl << endl; -} - -/****************************************************************************************** - * @brief printModuleResources - * - * purpose: get and Display Module resource usage - * - ******************************************************************************************/ -void printState(int state, std::string addInfo) -{ - switch (state) - { - case MAN_OFFLINE: - cout << MANOFFLINE + addInfo; - break; - - case AUTO_OFFLINE: - cout << AUTOOFFLINE + addInfo; - break; - - case MAN_INIT: - cout << MANINIT + addInfo; - break; - - case AUTO_INIT: - cout << AUTOINIT + addInfo; - break; - - case ACTIVE: - cout << ACTIVESTATE + addInfo; - break; - - case LEAVE_BLANK: - cout << addInfo; - break; - - case STANDBY: - cout << STANDBYSTATE + addInfo; - break; - - case FAILED: - cout << FAILEDSTATE + addInfo; - break; - - case UP: - cout << UPSTATE + addInfo; - break; - - case DOWN: - cout << DOWNSTATE + addInfo; - break; - - case COLD_STANDBY: - cout << COLDSTANDBYSTATE + addInfo; - break; - - case INITIAL: - cout << INITIALSTATE + addInfo; - break; - - case MAN_DISABLED: - cout << MANDISABLEDSTATE + addInfo; - break; - - case AUTO_DISABLED: - cout << AUTODISABLEDSTATE + addInfo; - break; - - case STANDBY_INIT: - cout << STANDBYINIT + addInfo; - break; - - case BUSY_INIT: - cout << BUSYINIT + addInfo; - break; - - case DEGRADED: - cout << DEGRADEDSTATE + addInfo; - break; - - default: - cout << INITIALSTATE + addInfo; - break; - } -} - -/****************************************************************************************** - * @brief checkPromptThread - * - * purpose: check for exit out of repeat command - * - ******************************************************************************************/ -static void checkPromptThread() -{ - char* pcommand = 0; - - while (true) - { - // check input - pcommand = readline(""); - - if (!pcommand) // user hit -D - { - repeatStop = true; - break; - } - - free(pcommand); - pcommand = 0; - } - - pthread_exit(0); - return; -} - -/****************************************************************************************** - * @brief getParentOAMModule - * - * purpose: get Parent OAm Module name - * - ******************************************************************************************/ -std::string getParentOAMModule() -{ - Oam oam; - - // Get Parent OAM module Name - try - { - string parentOAMModule; - oam.getSystemConfig("ParentOAMModuleName", parentOAMModule); - return parentOAMModule; - } - catch (...) - { - cout << endl << "**** Failed : Failed to read Parent OAM Module Name" << endl; - exit(-1); - } -} - -/****************************************************************************************** - * @brief checkForDisabledModules - * - * purpose: Chcek and report any modules in a disabled state - * - ******************************************************************************************/ -bool checkForDisabledModules() -{ - - SystemModuleTypeConfig systemmoduletypeconfig; - Oam oam; - - try - { - oam.getSystemConfig(systemmoduletypeconfig); - } - catch (...) - { - return false; - } - - bool found = false; - bool dbroot = false; - - for ( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++) - { - int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount; - - if ( moduleCount == 0) - continue; - - string moduleType = systemmoduletypeconfig.moduletypeconfig[i].ModuleType; - - DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end(); pt++) - { - string moduleName = (*pt).DeviceName; - - // report DISABLED modules - try - { - int opState; - bool degraded; - oam.getModuleStatus(moduleName, opState, degraded); - - if (opState == oam::MAN_DISABLED || opState == oam::AUTO_DISABLED) - { - if (!found) - { - cout << " NOTE: These module(s) are DISABLED: "; - found = true; - } - - cout << moduleName << " "; - - if ( moduleType == "um" ) - continue; - - //check if module has any dbroots assigned to it - string PMID = moduleName.substr(MAX_MODULE_TYPE_SIZE, MAX_MODULE_ID_SIZE);; - DBRootConfigList dbrootConfigList; - - try - { - oam.getPmDbrootConfig(atoi(PMID.c_str()), dbrootConfigList); - - if ( dbrootConfigList.size() != 0 ) - dbroot = true; - } - catch (exception& e) - {} - } - } - catch (...) - {} - } - } - - if (found) - cout << endl << endl; - - if (dbroot) - return false; - - return true; -} - -/** @brief Ask the user for cancel/wait/rollback/force - * - * When a Shutdown, stop, restart or suspend operation is - * requested but there are active transactions of some sort, - * we ask the user what to do. - */ -CC_SUSPEND_ANSWER AskSuspendQuestion(int CmdID) -{ - char* szAnswer = 0; - char* p; - string argument = "cancel"; - - const char* szCommand = "Unknown"; - - switch (CmdID) - { - case 16: - szCommand = "stop"; - break; - - case 17: - szCommand = "shutdown"; - break; - - case 19: - szCommand = "restart"; - break; - - case 28: - szCommand = "switch parent oam"; - break; - - case 32: - szCommand = "suspend"; - break; - - default: - return CANCEL; - break; - } - - cout << "Your options are:" << endl - << " Cancel -- Cancel the " << szCommand << " request" << endl - << " Wait -- Wait for write operations to end and then " << szCommand << endl; - -// << " Rollback -- Rollback all transactions and then " << szCommand << endl; - if (CmdID != 28 && CmdID != 32) - { - cout << " Force -- Force a " << szCommand << endl; - } - - while (true) - { - argument = "cancel"; - // read input - szAnswer = readline("What would you like to do: [Cancel]: "); - - if (szAnswer && *szAnswer) - { - p = strtok(szAnswer, " "); - argument = p; - free(szAnswer); - szAnswer = 0; - } - - // In case they just hit return. - if (szAnswer) - { - free(szAnswer); - szAnswer = 0; - } - - // convert argument into lowercase - transform(argument.begin(), argument.end(), argument.begin(), to_lower()); - - if ( argument == "cancel") - { - return CANCEL; - } - else if ( argument == "wait") - { - return WAIT; - } -// else if( argument == "rollback") -// { -// return ROLLBACK; -// } - else if ( argument == "force" && (CmdID == 16 || CmdID == 17 || CmdID == 19)) - { - return FORCE; - } - else - { - cout << argument << " is an invalid response" << endl; - } - } -} - -// Make a connection to the PM that uses DBRoot1. Used in redistribute -// return true if successful, false if fail. -bool connectToDBRoot1PM(Oam& oam, boost::scoped_ptr& msgQueueClient) -{ - int pmId = 0; - ModuleTypeConfig moduletypeconfig; - - try - { - oam.getDbrootPmConfig(1, pmId); - oam.getSystemConfig("pm", moduletypeconfig); - } - catch (const std::exception& ex) - { - cerr << "Caught exception when getting DBRoot1" << ex.what() << endl; - return false; - } - catch (...) - { - cerr << "Caught exception when getting DBRoot1 -- unknown" << endl; - return false; - } - - // Find the PM that has dbroot1, then make connection to its WES. - ostringstream oss; - oss << "pm" << pmId << "_WriteEngineServer"; - - try - { - msgQueueClient.reset(new MessageQueueClient(oss.str())); - } - catch (const std::exception& ex) - { - cerr << "Caught exception when connecting to " << oss.str() << " : " << ex.what() << endl; - return false; - } - catch (...) - { - cerr << "Caught exception when connecting to " << oss.str() << " : unknown" << endl; - } - - return true; -} - -bool SendToWES(Oam& oam, ByteStream bs) -{ - boost::scoped_ptr msgQueueClient; - - if (!connectToDBRoot1PM(oam, msgQueueClient)) - return false; - - uint32_t status = RED_STATE_UNDEF; - msgQueueClient->write(bs); - - SBS sbs; - sbs = msgQueueClient->read(); - - if (sbs->length() == 0) - { - cerr << "WriteEngineServer returned an empty stream. Might be a network error" << endl; - } - else if (sbs->length() < 5) - { - cerr << "WriteEngineServer returned too few bytes. Refistribute status is unknown" << endl; - } - else - { - ByteStream::byte wesMsgId; - *sbs >> wesMsgId; - *sbs >> status; - - string msg; - *sbs >> msg; - cout << "WriteEngineServer returned status " << status << ": " << msg << endl; - } - - return true; -} -// vim:ts=4 sw=4: diff --git a/oamapps/mcsadmin/mcsadmin.h b/oamapps/mcsadmin/mcsadmin.h deleted file mode 100644 index afd4e4362..000000000 --- a/oamapps/mcsadmin/mcsadmin.h +++ /dev/null @@ -1,135 +0,0 @@ -/* Copyright (C) 2014 InfiniDB, Inc. - Copyright (C) 2016 MariaDB Corporation - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; version 2 of - the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301, USA. */ - -/****************************************************************************************** - * $Id: MCSADMIN.h 3071 2013-04-04 18:45:53Z rdempsey $ - * - ******************************************************************************************/ -/** - * @file - */ -#ifndef MCSADMIN_H -#define MCSADMIN_H - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "liboamcpp.h" -#include "configcpp.h" -#include "alarmmanager.h" -#include "alarmglobal.h" -#include "calpontsystemcatalog.h" -#include "brmtypes.h" - - -const int CmdSize = 80; -const int ArgNum = 10; -const int DescNumMax = 10; -const int cmdNum = 68; - -const std::string DEFAULT_LOG_FILE = "/var/log/mariadb/columnstore/mcsadmin.log"; -std::ofstream logFile; - -/** - * write the command to the log file - */ -void writeLog(std::string command); - -/** @brief location of the Process Configuration file - */ -const std::string ConsoleCmdsFile = "ConsoleCmds.xml"; - -void getFlags(const std::string* arguments, oam::GRACEFUL_FLAG& gracefulTemp, oam::ACK_FLAG& ackTemp, oam::CC_SUSPEND_ANSWER& suspendAnswer, bool& bNeedsConfirm, std::string* password = NULL); -int confirmPrompt(std::string warningCommand); -std::string dataPrompt(std::string promptCommand); -int processCommand(std::string*); -int ProcessSupportCommand(int CommandID, std::string arguments[]); -void printAlarmSummary(); -void printCriticalAlarms(); -void checkRepeat(std::string*, int); -void printSystemStatus(); -void printProcessStatus(std::string port = "ProcStatusControl"); -void printModuleCpuUsers(oam::TopProcessCpuUsers topprocesscpuusers); -void printModuleCpu(oam::ModuleCpu modulecpu); -void printModuleMemoryUsers(oam::TopProcessMemoryUsers topprocessmemoryusers); -void printModuleMemory(oam::ModuleMemory modulememory); -void printModuleDisk(oam::ModuleDisk moduledisk); -void printModuleResources(oam::TopProcessCpuUsers topprocesscpuusers, oam::ModuleCpu modulecpu, oam::TopProcessMemoryUsers topprocessmemoryusers, oam::ModuleMemory modulememory, oam::ModuleDisk moduledisk); -void printState(int state, std::string addInfo); -std::string getParentOAMModule(); -bool checkForDisabledModules(); -oam::CC_SUSPEND_ANSWER AskSuspendQuestion(int CmdID); - - - -class to_lower -{ -public: - char operator() (char c) const // notice the return type - { - return tolower(c); - } -}; - -/** @brief Hidden Support commands in lower-case -*/ -const std::string supportCmds[] = { "helpsupport", - "stopprocess", - "startprocess", - "restartprocess", - "killpid", - "rebootsystem", - "rebootnode", - "stopdbrmprocess", - "startdbrmprocess", - "restartdbrmprocess", - "setsystemstartupstate", - "stopprimprocs", - "startprimprocs", - "restartprimprocs", - "stopexemgrs", - "startexemgrs", - "restartexemgrs", - "getprocessstatusstandby", - "distributeconfigfile", - "getpmdbrootconfig", - "getdbrootpmconfig", - "getsystemdbrootconfig", - "checkdbfunctional", - "getsystemreadflags", - "setsystemqueryready", - "" - }; - - -#endif diff --git a/oamapps/postConfigure/CMakeLists.txt b/oamapps/postConfigure/CMakeLists.txt deleted file mode 100644 index f5649c809..000000000 --- a/oamapps/postConfigure/CMakeLists.txt +++ /dev/null @@ -1,69 +0,0 @@ - -include_directories( ${ENGINE_COMMON_INCLUDES} ) - - -########### next target ############### - -set(postConfigure_SRCS postConfigure.cpp helpers.cpp) - -add_executable(postConfigure ${postConfigure_SRCS}) - -target_compile_options(postConfigure PRIVATE -Wno-unused-result) - -target_link_libraries(postConfigure ${ENGINE_LDFLAGS} ${ENGINE_READLINE_LIBRARY} ncurses ${MARIADB_CLIENT_LIBS} ${ENGINE_EXEC_LIBS}) - -install(TARGETS postConfigure DESTINATION ${ENGINE_BINDIR} COMPONENT columnstore-engine) - - -########### next target ############### - -set(installer_SRCS installer.cpp helpers.cpp) - -add_executable(columnstore_installer ${installer_SRCS}) - -target_compile_options(columnstore_installer PRIVATE -Wno-unused-result) - -target_link_libraries(columnstore_installer ${ENGINE_LDFLAGS} ${ENGINE_READLINE_LIBRARY} ncurses ${MARIADB_CLIENT_LIBS} ${ENGINE_EXEC_LIBS}) - -install(TARGETS columnstore_installer DESTINATION ${ENGINE_BINDIR} COMPONENT columnstore-engine) - - -########### next target ############### - -set(getMySQLpw_SRCS getMySQLpw.cpp) - -add_executable(getMySQLpw ${getMySQLpw_SRCS}) - -target_link_libraries(getMySQLpw ${ENGINE_LDFLAGS} ${ENGINE_READLINE_LIBRARY} ncurses ${MARIADB_CLIENT_LIBS} ${ENGINE_EXEC_LIBS}) - -install(TARGETS getMySQLpw DESTINATION ${ENGINE_BINDIR} COMPONENT columnstore-engine) - - -########### next target ############### - -#set(amazonInstaller_SRCS amazonInstaller.cpp helpers.cpp) - -#add_executable(amazonInstaller ${amazonInstaller_SRCS}) - -#target_link_libraries(amazonInstaller ${ENGINE_LDFLAGS} ${ENGINE_READLINE_LIBRARY} ncurses ${SNMP_LIBRARIES} ${MARIADB_CLIENT_LIBS} ${ENGINE_EXEC_LIBS}) - -#install(TARGETS amazonInstaller DESTINATION ${ENGINE_BINDIR} COMPONENT columnstore-platform) - - -########### next target ############### - -set(mycnfUpgrade_SRCS mycnfUpgrade.cpp) - -add_executable(mycnfUpgrade ${mycnfUpgrade_SRCS}) - -target_compile_options(mycnfUpgrade PRIVATE -Wno-unused-result) - -target_link_libraries(mycnfUpgrade ${ENGINE_LDFLAGS} ${ENGINE_READLINE_LIBRARY} ncurses ${MARIADB_CLIENT_LIBS} ${ENGINE_EXEC_LIBS}) - -install(TARGETS mycnfUpgrade DESTINATION ${ENGINE_BINDIR} COMPONENT columnstore-engine) - - -########### next target ############### - -install(PROGRAMS quick_installer_single_server.sh quick_installer_multi_server.sh - DESTINATION ${ENGINE_BINDIR} COMPONENT columnstore-engine) diff --git a/oamapps/postConfigure/Doxyfile b/oamapps/postConfigure/Doxyfile deleted file mode 100644 index ddb570a59..000000000 --- a/oamapps/postConfigure/Doxyfile +++ /dev/null @@ -1,275 +0,0 @@ -# Doxyfile 1.4.1-KDevelop - -#--------------------------------------------------------------------------- -# Project related configuration options -#--------------------------------------------------------------------------- -PROJECT_NAME = postConfigure.kdevelop -PROJECT_NUMBER = $VERSION$ -OUTPUT_DIRECTORY = -CREATE_SUBDIRS = NO -OUTPUT_LANGUAGE = English -USE_WINDOWS_ENCODING = NO -BRIEF_MEMBER_DESC = YES -REPEAT_BRIEF = YES -ABBREVIATE_BRIEF = "The $name class" \ - "The $name widget" \ - "The $name file" \ - is \ - provides \ - specifies \ - contains \ - represents \ - a \ - an \ - the -ALWAYS_DETAILED_SEC = NO -INLINE_INHERITED_MEMB = NO -FULL_PATH_NAMES = YES -STRIP_FROM_PATH = /home/dhill/InfiniDB_MariaDB/oamapps/mcsadmin/ -STRIP_FROM_INC_PATH = -SHORT_NAMES = NO -JAVADOC_AUTOBRIEF = NO -MULTILINE_CPP_IS_BRIEF = NO -DETAILS_AT_TOP = NO -INHERIT_DOCS = YES -DISTRIBUTE_GROUP_DOC = NO -TAB_SIZE = 8 -ALIASES = -OPTIMIZE_OUTPUT_FOR_C = NO -OPTIMIZE_OUTPUT_JAVA = NO -SUBGROUPING = YES -#--------------------------------------------------------------------------- -# Build related configuration options -#--------------------------------------------------------------------------- -EXTRACT_ALL = NO -EXTRACT_PRIVATE = NO -EXTRACT_STATIC = NO -EXTRACT_LOCAL_CLASSES = YES -EXTRACT_LOCAL_METHODS = NO -HIDE_UNDOC_MEMBERS = NO -HIDE_UNDOC_CLASSES = NO -HIDE_FRIEND_COMPOUNDS = NO -HIDE_IN_BODY_DOCS = NO -INTERNAL_DOCS = NO -CASE_SENSE_NAMES = YES -HIDE_SCOPE_NAMES = NO -SHOW_INCLUDE_FILES = YES -INLINE_INFO = YES -SORT_MEMBER_DOCS = YES -SORT_BRIEF_DOCS = NO -SORT_BY_SCOPE_NAME = NO -GENERATE_TODOLIST = YES -GENERATE_TESTLIST = YES -GENERATE_BUGLIST = YES -GENERATE_DEPRECATEDLIST= YES -ENABLED_SECTIONS = -MAX_INITIALIZER_LINES = 30 -SHOW_USED_FILES = YES -SHOW_DIRECTORIES = YES -FILE_VERSION_FILTER = -#--------------------------------------------------------------------------- -# configuration options related to warning and progress messages -#--------------------------------------------------------------------------- -QUIET = NO -WARNINGS = YES -WARN_IF_UNDOCUMENTED = YES -WARN_IF_DOC_ERROR = YES -WARN_NO_PARAMDOC = NO -WARN_FORMAT = "$file:$line: $text" -WARN_LOGFILE = -#--------------------------------------------------------------------------- -# configuration options related to the input files -#--------------------------------------------------------------------------- -INPUT = /home/dhill/InfiniDB_MariaDB/oamapps/postConfigure -FILE_PATTERNS = *.c \ - *.cc \ - *.cxx \ - *.cpp \ - *.c++ \ - *.java \ - *.ii \ - *.ixx \ - *.ipp \ - *.i++ \ - *.inl \ - *.h \ - *.hh \ - *.hxx \ - *.hpp \ - *.h++ \ - *.idl \ - *.odl \ - *.cs \ - *.php \ - *.php3 \ - *.inc \ - *.m \ - *.mm \ - *.dox \ - *.C \ - *.CC \ - *.C++ \ - *.II \ - *.I++ \ - *.H \ - *.HH \ - *.H++ \ - *.CS \ - *.PHP \ - *.PHP3 \ - *.M \ - *.MM \ - *.C \ - *.H \ - *.tlh \ - *.diff \ - *.patch \ - *.moc \ - *.xpm \ - *.dox -RECURSIVE = yes -EXCLUDE = -EXCLUDE_SYMLINKS = NO -EXCLUDE_PATTERNS = -EXAMPLE_PATH = -EXAMPLE_PATTERNS = * -EXAMPLE_RECURSIVE = NO -IMAGE_PATH = -INPUT_FILTER = -FILTER_PATTERNS = -FILTER_SOURCE_FILES = NO -#--------------------------------------------------------------------------- -# configuration options related to source browsing -#--------------------------------------------------------------------------- -SOURCE_BROWSER = NO -INLINE_SOURCES = NO -STRIP_CODE_COMMENTS = YES -REFERENCED_BY_RELATION = YES -REFERENCES_RELATION = YES -VERBATIM_HEADERS = YES -#--------------------------------------------------------------------------- -# configuration options related to the alphabetical class index -#--------------------------------------------------------------------------- -ALPHABETICAL_INDEX = NO -COLS_IN_ALPHA_INDEX = 5 -IGNORE_PREFIX = -#--------------------------------------------------------------------------- -# configuration options related to the HTML output -#--------------------------------------------------------------------------- -GENERATE_HTML = YES -HTML_OUTPUT = html -HTML_FILE_EXTENSION = .html -HTML_HEADER = -HTML_FOOTER = -HTML_STYLESHEET = -HTML_ALIGN_MEMBERS = YES -GENERATE_HTMLHELP = NO -CHM_FILE = -HHC_LOCATION = -GENERATE_CHI = NO -BINARY_TOC = NO -TOC_EXPAND = NO -DISABLE_INDEX = NO -ENUM_VALUES_PER_LINE = 4 -GENERATE_TREEVIEW = NO -TREEVIEW_WIDTH = 250 -#--------------------------------------------------------------------------- -# configuration options related to the LaTeX output -#--------------------------------------------------------------------------- -GENERATE_LATEX = YES -LATEX_OUTPUT = latex -LATEX_CMD_NAME = latex -MAKEINDEX_CMD_NAME = makeindex -COMPACT_LATEX = NO -PAPER_TYPE = a4wide -EXTRA_PACKAGES = -LATEX_HEADER = -PDF_HYPERLINKS = NO -USE_PDFLATEX = NO -LATEX_BATCHMODE = NO -LATEX_HIDE_INDICES = NO -#--------------------------------------------------------------------------- -# configuration options related to the RTF output -#--------------------------------------------------------------------------- -GENERATE_RTF = NO -RTF_OUTPUT = rtf -COMPACT_RTF = NO -RTF_HYPERLINKS = NO -RTF_STYLESHEET_FILE = -RTF_EXTENSIONS_FILE = -#--------------------------------------------------------------------------- -# configuration options related to the man page output -#--------------------------------------------------------------------------- -GENERATE_MAN = NO -MAN_OUTPUT = man -MAN_EXTENSION = .3 -MAN_LINKS = NO -#--------------------------------------------------------------------------- -# configuration options related to the XML output -#--------------------------------------------------------------------------- -GENERATE_XML = yes -XML_OUTPUT = xml -XML_SCHEMA = -XML_DTD = -XML_PROGRAMLISTING = YES -#--------------------------------------------------------------------------- -# configuration options for the AutoGen Definitions output -#--------------------------------------------------------------------------- -GENERATE_AUTOGEN_DEF = NO -#--------------------------------------------------------------------------- -# configuration options related to the Perl module output -#--------------------------------------------------------------------------- -GENERATE_PERLMOD = NO -PERLMOD_LATEX = NO -PERLMOD_PRETTY = YES -PERLMOD_MAKEVAR_PREFIX = -#--------------------------------------------------------------------------- -# Configuration options related to the preprocessor -#--------------------------------------------------------------------------- -ENABLE_PREPROCESSING = YES -MACRO_EXPANSION = NO -EXPAND_ONLY_PREDEF = NO -SEARCH_INCLUDES = YES -INCLUDE_PATH = -INCLUDE_FILE_PATTERNS = -PREDEFINED = -EXPAND_AS_DEFINED = -SKIP_FUNCTION_MACROS = YES -#--------------------------------------------------------------------------- -# Configuration::additions related to external references -#--------------------------------------------------------------------------- -TAGFILES = -GENERATE_TAGFILE = postConfigure.tag -ALLEXTERNALS = NO -EXTERNAL_GROUPS = YES -PERL_PATH = /usr/bin/perl -#--------------------------------------------------------------------------- -# Configuration options related to the dot tool -#--------------------------------------------------------------------------- -CLASS_DIAGRAMS = YES -HIDE_UNDOC_RELATIONS = YES -HAVE_DOT = NO -CLASS_GRAPH = YES -COLLABORATION_GRAPH = YES -GROUP_GRAPHS = YES -UML_LOOK = NO -TEMPLATE_RELATIONS = NO -INCLUDE_GRAPH = YES -INCLUDED_BY_GRAPH = YES -CALL_GRAPH = NO -GRAPHICAL_HIERARCHY = YES -DIRECTORY_GRAPH = YES -DOT_IMAGE_FORMAT = png -DOT_PATH = -DOTFILE_DIRS = -MAX_DOT_GRAPH_WIDTH = 1024 -MAX_DOT_GRAPH_HEIGHT = 1024 -MAX_DOT_GRAPH_DEPTH = 1000 -DOT_TRANSPARENT = NO -DOT_MULTI_TARGETS = NO -GENERATE_LEGEND = YES -DOT_CLEANUP = YES -#--------------------------------------------------------------------------- -# Configuration::additions related to the search engine -#--------------------------------------------------------------------------- -SEARCHENGINE = NO diff --git a/oamapps/postConfigure/getMySQLpw.cpp b/oamapps/postConfigure/getMySQLpw.cpp deleted file mode 100644 index 815d1e7e1..000000000 --- a/oamapps/postConfigure/getMySQLpw.cpp +++ /dev/null @@ -1,128 +0,0 @@ -/* Copyright (C) 2014 InfiniDB, Inc. - Copyright (C) 2017 MariaDB - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; version 2 of - the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301, USA. */ - -/****************************************************************************************** -* $Id: getMySQLpw.cpp 64 2006-10-12 22:21:51Z dhill $ -* -* -* -******************************************************************************************/ -/** - * @file - */ - - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "liboamcpp.h" -#include "installdir.h" - -using namespace std; -using namespace oam; - -int main(int argc, char* argv[]) -{ - Oam oam; - - cout << oam::UnassignedName << endl; - - exit (0); - - string USER = "root"; - char* p = getenv("USER"); - - if (p && *p) - USER = p; - - string HOME = "/root"; - p = getenv("HOME"); - - if (p && *p) - HOME = p; - - string fileName = HOME + "/.my.cnf"; - - ifstream file (fileName.c_str()); - - if (!file) - { - cout << oam::UnassignedName << endl; - exit (1); - } - - char line[400]; - string buf; - - while (file.getline(line, 400)) - { - buf = line; - - string::size_type pos = buf.find("root", 0); - - if (pos != string::npos) - { - file.getline(line, 400); - buf = line; - - pos = buf.find("password", 0); - - if (pos != string::npos) - { - string::size_type pos1 = buf.find("=", pos); - - if (pos1 != string::npos) - { - //password found - - string password = buf.substr(pos1 + 2, 80); - - cout << password << endl; - exit (0); - } - } - } - } - - file.close(); - - cout << oam::UnassignedName << endl; - - exit (1); - -} -// vim:ts=4 sw=4: - diff --git a/oamapps/postConfigure/helpers.cpp b/oamapps/postConfigure/helpers.cpp deleted file mode 100644 index ee769f224..000000000 --- a/oamapps/postConfigure/helpers.cpp +++ /dev/null @@ -1,844 +0,0 @@ -/* Copyright (C) 2014 InfiniDB, Inc. - Copyright (C) 2016 MariaDB Corporation - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; version 2 of - the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301, USA. */ - -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "mcsconfig.h" -#include "configcpp.h" -using namespace config; - -using namespace std; - -#include "liboamcpp.h" -using namespace oam; - -#include "helpers.h" - -using namespace installer; - -#include "installdir.h" - -string pwprompt = " "; - -string masterLogFile = oam::UnassignedName; -string masterLogPos = oam::UnassignedName; -string prompt; - -const char* pcommand = 0; - -bool noPrompting = false; - -namespace installer -{ - -const char* callReadline(string prompt) -{ - if ( !noPrompting ) - { - const char* ret = readline(prompt.c_str()); - - if ( ret == 0 ) - exit(1); - - return ret; - } - else - { - cout << prompt << endl; - return ""; - } -} - -void callFree(const char* ) -{ - if ( !noPrompting ) - free((void*)pcommand); - - pcommand = 0; -} - - -bool waitForActive() -{ - Oam oam; - - try - { - oam.waitForActive(); - return true; - } - catch (...) - {} - - return false; -} - - -void dbrmDirCheck() -{ - - const string fname = std::string(MCSSYSCONFDIR) + "/columnstore/Columnstore.xml.rpmsave"; - ifstream oldFile (fname.c_str()); - - if (!oldFile) return; - - string SystemSection = "SystemConfig"; - Config* sysConfig = Config::makeConfig(); - Config* sysConfigPrev = Config::makeConfig(fname); - - string dbrmroot = ""; - string dbrmrootPrev = ""; - - try - { - dbrmroot = sysConfig->getConfig(SystemSection, "DBRMRoot"); - dbrmrootPrev = sysConfigPrev->getConfig(SystemSection, "DBRMRoot"); - } - catch (const std::exception& exc) - { - std::cerr << exc.what() << std::endl; - } - - if ( dbrmrootPrev.empty() ) - return; - - if ( dbrmroot == dbrmrootPrev ) - return; - - string dbrmrootDir = ""; - string dbrmrootPrevDir = ""; - - string::size_type pos = dbrmroot.find("/BRM_saves", 0); - - if (pos != string::npos) - //get directory path - dbrmrootDir = dbrmroot.substr(0, pos); - else - { - return; - } - - pos = dbrmrootPrev.find("/BRM_saves", 0); - - if (pos != string::npos) - //get directory path - dbrmrootPrevDir = dbrmrootPrev.substr(0, pos); - else - { - return; - } - - // return if prev directory doesn't exist - ifstream File (dbrmrootPrevDir.c_str()); - - if (!File) - return; - - string dbrmrootCurrent = dbrmroot + "_current"; - string dbrmrootCurrentPrev = dbrmrootPrev + "_current"; - - // return if prev current file doesn't exist - ifstream File1 (dbrmrootCurrentPrev.c_str()); - - if (!File1) - return; - - string cmd; - // check if current file does't exist - // if not, copy prev files to current directory - ifstream File2 (dbrmrootCurrent.c_str()); - - if (!File2) - { - cout << endl << "===== DBRM Data File Directory Check =====" << endl << endl; - cmd = "/bin/cp -rpf " + dbrmrootPrevDir + "/* " + dbrmrootDir + "/."; - system(cmd.c_str()); - - //update the current file hardcoded path - ifstream oldFile (dbrmrootCurrent.c_str()); - - if (oldFile) - { - char line[200]; - oldFile.getline(line, 200); - string dbrmFile = line; - - string::size_type pos = dbrmFile.find("/BRM_saves", 0); - - if (pos != string::npos) - dbrmFile = dbrmrootDir + dbrmFile.substr(pos, 80); - - unlink (dbrmrootCurrent.c_str()); - ofstream newFile (dbrmrootCurrent.c_str()); - - string cmd = "echo " + dbrmFile + " > " + dbrmrootCurrent; - system(cmd.c_str()); - - newFile.close(); - } - - cmd = "mv -f " + dbrmrootPrevDir + " " + dbrmrootPrevDir + ".old"; - system(cmd.c_str()); - cout << endl << "DBRM data files were copied from dbrm directory" << endl; - cout << dbrmrootPrevDir << " to current directory of " << dbrmrootDir << "." << endl; - cout << "The old dbrm directory was renamed to " << dbrmrootPrevDir << ".old ." << endl; - } - else - { - string start = "y"; - cout << endl << "===== DBRM Data File Directory Check =====" << endl << endl; - cout << endl << "DBRM data files were found in " << dbrmrootPrevDir << endl; - cout << "and in the new location " << dbrmrootDir << "." << endl << endl; - cout << "Make sure that the correct set of files are in the new location." << endl; - cout << "Then rename the directory " << dbrmrootPrevDir << " to " << dbrmrootPrevDir << ".old" << endl; - cout << "If the files were copied from " << dbrmrootPrevDir << " to " << dbrmrootDir << endl; - cout << "you will need to edit the file BRM_saves_current to contain the current path of" << endl; - cout << dbrmrootDir << endl << endl; - cout << "Please reference the MariaDB Columnstore Installation Guide on Upgrade Installs for" << endl; - cout << "addition information, if needed." << endl << endl; - - while (true) - { - string answer = "n"; - prompt = "Enter 'y' when you are ready to continue > "; - pcommand = callReadline(prompt.c_str()); - - if (pcommand) - { - if (strlen(pcommand) > 0) answer = pcommand; - - callFree(pcommand); - pcommand = 0; - } - - if ( answer == "y" ) - break; - else - cout << "Invalid Entry, please enter 'y' for yes" << endl; - } - } - - cmd = "chmod 755 -R /var/lib/columnstore/data1/systemFiles/dbrm > /dev/null 2>&1"; - system(cmd.c_str()); - - return; -} - -void mysqlSetup() -{ - Oam oam; - string cmd; - string tmpDir = startup::StartUp::tmpDir(); - string mysqlpw = oam.getMySQLPassword(); - string passwordOption = ""; - - if ( mysqlpw != oam::UnassignedName ) - passwordOption = " --password=" + mysqlpw; - - cmd = "post-mysqld-install " + passwordOption + " --tmpdir=" + tmpDir + " > " + tmpDir + "/post-mysqld-install.log 2>&1"; - int rtnCode = system(cmd.c_str()); - - if (WEXITSTATUS(rtnCode) != 0) - { - cout << "Error running post-mysqld-install, check " << tmpDir << "/post-mysqld-install.log" << endl; - cout << "Exiting..." << endl; - exit (1); - } - else - cout << "post-mysqld-install Successfully Completed" << endl; - - int user; - bool rootUser = true; - user = getuid(); - - if (user != 0) - rootUser = false; - - string HOME = "/root"; - - if (!rootUser) - { - char* p = getenv("HOME"); - - if (p && *p) - HOME = p; - } - - cmd = "post-mysql-install --tmpdir=" + tmpDir + " > " + tmpDir + "/post-mysql-install.log"; - rtnCode = system(cmd.c_str()); - - if (WEXITSTATUS(rtnCode) == 2) - { - cout << "Error running post-mysql-install, password is needed. check " + HOME + "/.my.cnf " << endl; - cout << "Exiting..." << endl; - exit (1); - } - else if (WEXITSTATUS(rtnCode) == 1) - { - cout << "Error running post-mysql-install, " + tmpDir + "/post-mysql-install.log" << endl; - cout << "Exiting..." << endl; - exit (1); - } - else - cout << "post-mysql-install Successfully Completed" << endl; - - return; -} - -/****************************************************************************************** -* @brief sendReplicationRequest -* -* purpose: send Upgrade Request Msg to all ACTIVE UMs -* -* -******************************************************************************************/ -int sendReplicationRequest(int IserverTypeInstall, std::string password, bool pmwithum) -{ - Oam oam; - - SystemModuleTypeConfig systemmoduletypeconfig; - - string tmpDir = startup::StartUp::tmpDir(); - - try - { - oam.getSystemConfig(systemmoduletypeconfig); - } - catch (const std::exception& exc) - { - std::cerr << exc.what() << std::endl; - } - - //get Primary (Master) UM - string masterModule = oam::UnassignedName; - - try - { - oam.getSystemConfig("PrimaryUMModuleName", masterModule); - } - catch (...) - { - masterModule = oam::UnassignedName; - } - - if ( masterModule == oam::UnassignedName ) - { - // use default setting - masterModule = "um1"; - - if ( IserverTypeInstall == oam::INSTALL_COMBINE_DM_UM_PM ) - masterModule = "pm1"; - } - - int returnStatus = oam::API_SUCCESS; - - bool masterDone = false; - - for ( unsigned int i = 0; i < systemmoduletypeconfig.moduletypeconfig.size(); i++) - { - int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount; - - if ( moduleCount == 0) - continue; - - string moduleType = systemmoduletypeconfig.moduletypeconfig[i].ModuleType; - - DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end(); ) - { - // we want to do master first - if ( ( (*pt).DeviceName == masterModule && !masterDone ) || - ( (*pt).DeviceName != masterModule && masterDone ) ) - { - int opState = oam::ACTIVE; - bool degraded; - - try - { - oam.getModuleStatus((*pt).DeviceName, opState, degraded); - - if (opState == oam::ACTIVE || - opState == oam::DEGRADED) - { - if ( (*pt).DeviceName == masterModule ) - { - // set for Master MySQL DB distrubution to slaves - messageqcpp::ByteStream msg1; - messageqcpp::ByteStream::byte requestID = oam::MASTERDIST; - msg1 << requestID; - msg1 << password; - msg1 << "all"; // dist to all slave modules - - returnStatus = sendMsgProcMon( (*pt).DeviceName, msg1, requestID, 600 ); - - if ( returnStatus != API_SUCCESS) - { - cout << endl << "ERROR: Error return in running the MariaDB ColumnStore Master DB Distribute, check " + tmpDir + "/master-dist*.logs on " << masterModule << endl; - return returnStatus; - } - - // set for master repl request - messageqcpp::ByteStream msg; - requestID = oam::MASTERREP; - msg << requestID; - - returnStatus = sendMsgProcMon( (*pt).DeviceName, msg, requestID, 30 ); - - if ( returnStatus != API_SUCCESS) - { - cout << endl << "ERROR: Error return in running the MariaDB ColumnStore Master replication, check " + tmpDir + "/master-rep*.logs on " << masterModule << endl; - return returnStatus; - } - - masterDone = true; - pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin(); - } - else - { - // set for slave repl request - // don't do PMs unless PMwithUM flag is set - string moduleType = (*pt).DeviceName.substr(0, MAX_MODULE_TYPE_SIZE); - - if ( ( moduleType == "pm" && !pmwithum ) && - ( IserverTypeInstall != oam::INSTALL_COMBINE_DM_UM_PM ) ) - { - pt++; - continue; - } - - messageqcpp::ByteStream msg; - messageqcpp::ByteStream::byte requestID = oam::SLAVEREP; - msg << requestID; - - if ( masterLogFile == oam::UnassignedName || - masterLogPos == oam::UnassignedName ) - return API_FAILURE; - - msg << masterLogFile; - msg << masterLogPos; - - returnStatus = sendMsgProcMon( (*pt).DeviceName, msg, requestID, 30 ); - - if ( returnStatus != API_SUCCESS) - { - cout << endl << "ERROR: Error return in running the MariaDB ColumnStore Slave replication, check " + tmpDir + "/slave-rep*.logs on " << (*pt).DeviceName << endl; - return returnStatus; - } - - pt++; - } - } - else - { - pt++; - } - } - catch (const std::exception& exc) - { - std::cerr << exc.what() << std::endl; - } - } - else - pt++; - } - } - - return returnStatus; -} - - -/****************************************************************************************** -* @brief sendMsgProcMon -* -* purpose: Sends a Msg to ProcMon -* -******************************************************************************************/ -int sendMsgProcMon( std::string module, messageqcpp::ByteStream msg, int requestID, int timeout ) -{ - string msgPort = module + "_ProcessMonitor"; - int returnStatus = API_FAILURE; - Oam oam; - - // do a ping test to determine a quick failure - Config* sysConfig = Config::makeConfig(); - - string IPAddr = sysConfig->getConfig(msgPort, "IPAddr"); - - if ( IPAddr == oam::UnassignedIpAddr ) - { - return returnStatus; - } - - string cmdLine = "ping "; - string cmdOption = " -w 1 >> /dev/null"; - string cmd = cmdLine + IPAddr + cmdOption; - - if ( system(cmd.c_str()) != 0) - { - //ping failure - return returnStatus; - } - - try - { - messageqcpp::MessageQueueClient mqRequest(msgPort); - mqRequest.write(msg); - - if ( timeout > 0 ) - { - // wait for response - messageqcpp::ByteStream::byte returnACK; - messageqcpp::ByteStream::byte returnRequestID; - messageqcpp::ByteStream::byte requestStatus; - messageqcpp::ByteStream receivedMSG; - - struct timespec ts = { timeout, 0 }; - - // get current time in seconds - time_t startTimeSec; - time (&startTimeSec); - - while (true) - { - try - { - receivedMSG = mqRequest.read(&ts); - } - catch (const std::exception& exc) - { - std::cerr << exc.what() << std::endl; - return returnStatus; - } - - if (receivedMSG.length() > 0) - { - receivedMSG >> returnACK; - receivedMSG >> returnRequestID; - receivedMSG >> requestStatus; - - if ( requestID == oam::MASTERREP ) - { - receivedMSG >> masterLogFile; - receivedMSG >> masterLogPos; - } - - if ( returnACK == oam::ACK && returnRequestID == requestID) - { - // ACK for this request - returnStatus = requestStatus; - break; - } - } - else - { - //api timeout occurred, check if retry should be done - // get current time in seconds - time_t endTimeSec; - time (&endTimeSec); - - if ( timeout <= (endTimeSec - startTimeSec) ) - { - break; - } - } - } - } - else - returnStatus = oam::API_SUCCESS; - - mqRequest.shutdown(); - } - catch (const std::exception& exc) - { - std::cerr << exc.what() << std::endl; - } - - return returnStatus; -} - -void checkFilesPerPartion(int DBRootCount, Config* sysConfig) -{ - // check 'files per parition' with number of dbroots - // 'files per parition' need to be a multiple of dbroots - // update if no database already exist - // issue warning if database exist - - Oam oam; - string SystemSection = "SystemConfig"; - - string dbRoot = "/var/lib/columnstore/data1"; - - try - { - dbRoot = sysConfig->getConfig(SystemSection, "DBRoot1"); - } - catch (const std::exception& exc) - { - std::cerr << exc.what() << std::endl; - } - - dbRoot = dbRoot + "/000.dir"; - - float FilesPerColumnPartition = 4; - - try - { - string tmp = sysConfig->getConfig("ExtentMap", "FilesPerColumnPartition"); - FilesPerColumnPartition = atoi(tmp.c_str()); - } - catch (const std::exception& exc) - { - std::cerr << exc.what() << std::endl; - } - - if ( fmod(FilesPerColumnPartition, (float) DBRootCount) != 0 ) - { - ifstream oldFile (dbRoot.c_str()); - - if (!oldFile) - { - //set FilesPerColumnPartition == DBRootCount - sysConfig->setConfig("ExtentMap", "FilesPerColumnPartition", oam.itoa(DBRootCount)); - - cout << endl << "***************************************************************************" << endl; - cout << "NOTE: Mismatch between FilesPerColumnPartition (" + oam.itoa((int)FilesPerColumnPartition) + ") and number of DBRoots (" + oam.itoa(DBRootCount) + ")" << endl; - cout << " Setting FilesPerColumnPartition = number of DBRoots" << endl; - cout << "***************************************************************************" << endl; - } - else - { - cout << endl << "***************************************************************************" << endl; - cout << "WARNING: Mismatch between FilesPerColumnPartition (" + oam.itoa((int)FilesPerColumnPartition) + ") and number of DBRoots (" + oam.itoa(DBRootCount) + ")" << endl; - cout << " Database already exist, going forward could corrupt the database" << endl; - cout << " Please Contact Customer Support" << endl; - cout << "***************************************************************************" << endl; - exit (1); - } - } - - return; -} - -void checkMysqlPort(std::string& mysqlPort) -{ - string tmpDir = startup::StartUp::tmpDir(); - - while (true) - { - string cmd = "netstat -na | grep -e :" + mysqlPort + "[[:space:]] | grep LISTEN > " + tmpDir + "/mysqlport"; - - system(cmd.c_str()); - string fileName = tmpDir + "/mysqlport"; - ifstream oldFile (fileName.c_str()); - - if (oldFile) - { - oldFile.seekg(0, std::ios::end); - int size = oldFile.tellg(); - - if (size != 0) - { - cout << endl << "The MariaDB ColumnStore port of '" + mysqlPort + "' is already in-use" << endl; - cout << "Please stop the process that is using port '" + mysqlPort + "'" << endl; - exit(1); - } - else - break; - } - else - break; - } - -} - -void checkSystemMySQLPort(std::string& mysqlPort, Config* sysConfig, std::string USER, std::string password, ChildModuleList childmodulelist, int IserverTypeInstall, bool pmwithum) -{ - Oam oam; - - bool inUse = false; - - string tmpDir = startup::StartUp::tmpDir(); - - while (true) - { - string localnetstat = "netstat -na | grep -e :" + mysqlPort + "[[:space:]] | grep LISTEN > " + tmpDir + "/mysqlport"; - string remotenetstat = "netstat -na | grep -e :" + mysqlPort + "[[:space:]] | grep LISTEN"; - - //first check local mysql, if needed - if ( ( IserverTypeInstall == oam::INSTALL_COMBINE_DM_UM_PM ) || - ( ( IserverTypeInstall != oam::INSTALL_COMBINE_DM_UM_PM ) && pmwithum ) ) - { - system(localnetstat.c_str()); - string fileName = tmpDir + "/mysqlport"; - ifstream oldFile (fileName.c_str()); - - if (oldFile) - { - oldFile.seekg(0, std::ios::end); - int size = oldFile.tellg(); - - if ( size != 0 ) - { - if ( noPrompting ) - { - cout << endl << "The MariaDB ColumnStore port of '" + mysqlPort + "' is already in-use" << endl; - cout << "Either use the command line argument of 'port' to enter a different number" << endl; - cout << "or stop the process that is using port '" + mysqlPort + "'" << endl; - cout << "For No-prompt install, exiting" << endl; - exit(1); - } - else - inUse = true; - } - } - } - - // if not inuse by local, go check remote servers - string inUseServer = ""; - - if ( !inUse ) - { - ChildModuleList::iterator list1 = childmodulelist.begin(); - - for (; list1 != childmodulelist.end() ; list1++) - { - string remoteModuleName = (*list1).moduleName; - string remoteModuleIP = (*list1).moduleIP; - string remoteHostName = (*list1).hostName; - string remoteModuleType = remoteModuleName.substr(0, MAX_MODULE_TYPE_SIZE); - - if ( remoteModuleType == "um" || - (remoteModuleType == "pm" && IserverTypeInstall == oam::INSTALL_COMBINE_DM_UM_PM) || - (remoteModuleType == "pm" && pmwithum) ) - { - - string cmd = "remote_command_verify.sh " + remoteModuleIP + " " + " " + USER + " " + password + " '" + remotenetstat + "' tcp error 5 0 > /dev/null 2>&1"; - int rtnCode = system(cmd.c_str()); - - if (WEXITSTATUS(rtnCode) == 0) - { - if ( noPrompting ) - { - cout << endl << "The MariaDB ColumnStore port of '" + mysqlPort + "' is already in-use on " << remoteModuleName << endl; - cout << "Either use the command line argument of 'port' to enter a different number" << endl; - cout << "or stop the process that is using port '" + mysqlPort + "'" << endl; - cout << "For No-prompt install, exiting" << endl; - cout << "exiting..." << endl; - exit(1); - } - else - { - inUseServer = remoteModuleName; - inUse = true; - break; - } - } - } - } - } - - if ( inUse ) - { - cout << endl << "The MariaDB ColumnStore port of '" + mysqlPort + "' is already in-use on " << inUseServer << endl; - cout << "Either enter a different port to use" << endl; - cout << "or stop the process that is using port '" + mysqlPort + "' and enter '" + mysqlPort + "' to continue" << endl; - - while (true) - { - prompt = "Enter a port number > "; - pcommand = callReadline(prompt.c_str()); - - if (pcommand) - { - if (strlen(pcommand) > 0) mysqlPort = pcommand; - - callFree(pcommand); - pcommand = 0; - } - - if ( atoi(mysqlPort.c_str()) < 1000 || atoi(mysqlPort.c_str()) > 9999) - { - cout << " ERROR: Invalid MariaDB ColumnStore Port ID supplied, must be between 1000-9999" << endl; - } - else - break; - } - - inUse = false; - } - else - { - cout << endl; - - try - { - sysConfig->setConfig("Installation", "MySQLPort", mysqlPort); - } - catch (const std::exception& exc) - { - std::cerr << exc.what() << std::endl; - } - - if ( !writeConfig(sysConfig) ) - { - cout << "ERROR: Failed trying to update MariaDB Columnstore System Configuration file" << endl; - exit(1); - } - - break; - } - } - - // set mysql password - oam.changeMyCnf( "port", mysqlPort ); - - return; -} - -/* - * writeConfig - */ -bool writeConfig( Config* sysConfig ) -{ - for ( int i = 0 ; i < 3 ; i++ ) - { - try - { - sysConfig->write(); - return true; - } - catch (const std::exception& exc) - { - std::cerr << exc.what() << std::endl; - } - } - - return false; -} - -} - - diff --git a/oamapps/postConfigure/helpers.h b/oamapps/postConfigure/helpers.h deleted file mode 100644 index 6823486a9..000000000 --- a/oamapps/postConfigure/helpers.h +++ /dev/null @@ -1,52 +0,0 @@ -/* Copyright (C) 2014 InfiniDB, Inc. - Copyright (C) 2016 MariaDB Corporation - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; version 2 of - the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301, USA. */ - -#ifndef HELPERS_H__ -#define HELPERS_H__ - -#include "liboamcpp.h" - - -namespace installer -{ - -typedef struct Child_Module_struct -{ - std::string moduleName; - std::string moduleIP; - std::string hostName; -} ChildModule; - -typedef std::vector ChildModuleList; - -extern bool waitForActive(); -extern void dbrmDirCheck(); -extern void mysqlSetup(); -extern int sendMsgProcMon( std::string module, messageqcpp::ByteStream msg, int requestID, int timeout ); -extern int sendReplicationRequest(int IserverTypeInstall, std::string password, bool pmwithum); -extern void checkFilesPerPartion(int DBRootCount, Config* sysConfig); -extern void checkMysqlPort(string& mysqlPort); -extern bool writeConfig(Config* sysConfig); -extern void checkSystemMySQLPort(std::string& mysqlPort, Config* sysConfig, std::string USER, std::string password, ChildModuleList childmodulelist, int IserverTypeInstall, bool pmwithum); -extern const char* callReadline(string prompt); -extern void callFree(const char* ); - -} - -#endif - diff --git a/oamapps/postConfigure/installer.cpp b/oamapps/postConfigure/installer.cpp deleted file mode 100644 index 46b260aa6..000000000 --- a/oamapps/postConfigure/installer.cpp +++ /dev/null @@ -1,785 +0,0 @@ -/* Copyright (C) 2014 InfiniDB, Inc. - Copyright (C) 2016 MariaDB Corporation - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; version 2 of - the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301, USA. */ - -/****************************************************************************************** -* $Id: installer.cpp 64 2006-10-12 22:21:51Z dhill $ -* -* -* List of files that will be updated during system install time on each server -* /etc/rc.local -* -******************************************************************************************/ -/** - * @file - */ - - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "stdio.h" -#include "ctype.h" -#include -#include - -#include "mcsconfig.h" -#include "liboamcpp.h" -#include "configcpp.h" - -using namespace std; -using namespace oam; -using namespace config; - -#include "helpers.h" -using namespace installer; - -#include "installdir.h" - -typedef struct Module_struct -{ - std::string moduleName; - std::string moduleIP; - std::string hostName; -} Module; - -typedef std::vector ModuleList; - -bool setOSFiles(string parentOAMModuleName, int serverTypeInstall); -bool makeBASH(); -bool makeModuleFile(string moduleName, string parentOAMModuleName); -bool updateProcessConfig(int serverTypeInstall); -bool makeRClocal(string moduleName, int IserverTypeInstall); -bool uncommentCalpontXml( string entry); - -extern string pwprompt; - -string mysqlpw = " "; - -bool rootUser = true; -string USER = "root"; - -int main(int argc, char* argv[]) -{ - string cmd; - Oam oam; - string parentOAMModuleIPAddr; - string parentOAMModuleHostName; - ModuleList childmodulelist; - ModuleList directormodulelist; - ModuleList usermodulelist; - ModuleList performancemodulelist; - Module childmodule; - - string prompt; - string nodeps = "-h"; - string installer_debug = "0"; - string packageType = "rpm"; - - Config* sysConfig = Config::makeConfig(); - string SystemSection = "SystemConfig"; - string InstallSection = "Installation"; - - string DBRootStorageType; - - if (argc < 12) - { - cerr << "installer: ERROR: not enough arguments" << endl; - exit(1); - } - - string calpont_rpm1 = argv[1]; - string calpont_rpm2 = argv[2]; - string calpont_rpm3 = argv[3]; - string mysql_rpm = argv[4]; - string mysqld_rpm = argv[5]; - string installType = argv[6]; - string password = argv[7]; - string reuseConfig = argv[8]; - nodeps = argv[9]; - mysqlpw = argv[10]; - installer_debug = argv[11]; - - string numBlocksPctParam = ""; - string totalUmMemoryParam = ""; - if (argc >= 14) { - if (string(argv[12]) != "-") { - numBlocksPctParam = argv[12]; - } - if (string(argv[13]) != "-") { - totalUmMemoryParam = argv[13]; - } - } - - ofstream file("/dev/null"); - - //save cout stream buffer - streambuf* strm_buffer = cout.rdbuf(); - - // redirect cout to /dev/null - cout.rdbuf(file.rdbuf()); - - cout << calpont_rpm1 << endl; - cout << calpont_rpm2 << endl; - cout << calpont_rpm3 << endl; - cout << mysql_rpm << endl; - cout << mysqld_rpm << endl; - cout << installType << endl; - cout << password << endl; - cout << reuseConfig << endl; - cout << nodeps << endl; - cout << mysqlpw << endl; - cout << installer_debug << endl; - if (!numBlocksPctParam.empty()) { - cout << numBlocksPctParam << endl; - } - if (!totalUmMemoryParam.empty()) { - cout << totalUmMemoryParam << endl; - } - - // restore cout stream buffer - cout.rdbuf (strm_buffer); - - if ( mysqlpw == "dummymysqlpw" ) - mysqlpw = " "; - - pwprompt = "--password=" + mysqlpw; - - //check if root-user - int user; - user = getuid(); - - if (user != 0) - rootUser = false; - - char* p = getenv("USER"); - - if (p && *p) - USER = p; - - string tmpDir = startup::StartUp::tmpDir(); - - // setup to start on reboot, for non-root amazon installs - if ( !rootUser ) - { - system("sed -i -e 's/#runuser/runuser/g' /etc/rc.d/rc.local >/dev/null 2>&1"); - } - - //copy Columnstore.xml.rpmsave if upgrade option is selected - if ( installType == "upgrade" ) - { - cmd = "/bin/cp -f " + std::string(MCSSYSCONFDIR) + "/columnstore/Columnstore.xml " + std::string(MCSSYSCONFDIR) + "/columnstore/Columnstore.xml.new 2>&1"; - system(cmd.c_str()); - cmd = "/bin/cp -f " + std::string(MCSSYSCONFDIR) + "/columnstore/Columnstore.xml.rpmsave " + std::string(MCSSYSCONFDIR) + "/columnstore/Columnstore.xml 2>&1"; - system(cmd.c_str()); - } - - string serverTypeInstall; - int IserverTypeInstall; - - try - { - serverTypeInstall = sysConfig->getConfig(InstallSection, "ServerTypeInstall"); - } - catch (...) - { - cout << "ERROR: Problem getting ServerTypeInstall from the MariaDB ColumnStore System Configuration file" << endl; - exit(1); - } - - IserverTypeInstall = atoi(serverTypeInstall.c_str()); - - //get PPackage Type - try - { - packageType = sysConfig->getConfig(InstallSection, "EEPackageType"); - } - catch (...) - { - cout << "ERROR: Problem getting EEPackageType" << endl; - exit(1); - } - - //get Parent OAM Module Name and setup of it's Custom OS files - string parentOAMModuleName; - - try - { - parentOAMModuleName = sysConfig->getConfig(SystemSection, "ParentOAMModuleName"); - - if ( parentOAMModuleName == oam::UnassignedName ) - { - cout << "ERROR: Parent OAM Module Name is unassigned" << endl; - exit(1); - } - } - catch (...) - { - cout << "ERROR: Problem getting Parent OAM Module Name" << endl; - exit(1); - } - - if ( installType == "initial" ) - { - //cleanup/create /local/etc directories - cmd = "rm -rf /var/lib/columnstore/local/etc > /dev/null 2>&1"; - system(cmd.c_str()); - cmd = "mkdir /var/lib/columnstore/local/etc > /dev/null 2>&1"; - system(cmd.c_str()); - - //create associated /local/etc directory for parentOAMModuleName - cmd = "mkdir /var/lib/columnstore/local/etc/" + parentOAMModuleName + " > /dev/null 2>&1"; - system(cmd.c_str()); - } - - //Get list of configured system modules - SystemModuleTypeConfig sysModuleTypeConfig; - - try - { - oam.getSystemConfig(sysModuleTypeConfig); - } - catch (...) - { - cout << "ERROR: Problem reading the MariaDB ColumnStore System Configuration file" << endl; - exit(1); - } - - string ModuleSection = "SystemModuleConfig"; - - //get OAM Parent Module IP addresses and Host Name - for ( unsigned int i = 0 ; i < sysModuleTypeConfig.moduletypeconfig.size(); i++) - { - DeviceNetworkList::iterator listPT = sysModuleTypeConfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for ( ; listPT != sysModuleTypeConfig.moduletypeconfig[i].ModuleNetworkList.end() ; listPT++) - { - HostConfigList::iterator pt1 = (*listPT).hostConfigList.begin(); - - if ( (*listPT).DeviceName == parentOAMModuleName ) - { - parentOAMModuleIPAddr = (*pt1).IPAddr; - parentOAMModuleHostName = (*pt1).HostName; - break; - } - } - } - - for ( unsigned int i = 0 ; i < sysModuleTypeConfig.moduletypeconfig.size(); i++) - { - string moduleType = sysModuleTypeConfig.moduletypeconfig[i].ModuleType; - int moduleCount = sysModuleTypeConfig.moduletypeconfig[i].ModuleCount; - - if ( moduleCount == 0 ) - //no modules equipped for this Module Type, skip - continue; - - //get IP addresses and Host Names - DeviceNetworkList::iterator listPT = sysModuleTypeConfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for ( ; listPT != sysModuleTypeConfig.moduletypeconfig[i].ModuleNetworkList.end() ; listPT++) - { - string moduleName = (*listPT).DeviceName; - HostConfigList::iterator pt1 = (*listPT).hostConfigList.begin(); - string moduleIPAddr = (*pt1).IPAddr; - string moduleHostName = (*pt1).HostName; - - //save module info - childmodule.moduleName = moduleName; - childmodule.moduleIP = moduleIPAddr; - childmodule.hostName = moduleHostName; - - if ( moduleName != parentOAMModuleName) - { - childmodulelist.push_back(childmodule); - } - - if ( moduleType == "dm") - directormodulelist.push_back(childmodule); - - if ( moduleType == "um") - usermodulelist.push_back(childmodule); - - if ( moduleType == "pm") - performancemodulelist.push_back(childmodule); - - if ( installType == "initial" ) - { - //create associated /local/etc directory for module - cmd = "mkdir /var/lib/columnstore/local/etc/" + moduleName + " > /dev/null 2>&1"; - system(cmd.c_str()); - } - } - } //end of i for loop - - if ( installType != "uninstall" ) - { - //setup rc.local file in local parent module - if ( !makeRClocal(parentOAMModuleName, IserverTypeInstall) ) - cout << "makeRClocal error" << endl; - - //create associated /local/etc directory for module - // generate module - ModuleList::iterator list1 = directormodulelist.begin(); - - for (; list1 != directormodulelist.end() ; list1++) - { - cmd = "mkdir /var/lib/columnstore/local/etc/" + (*list1).moduleName + " > /dev/null 2>&1"; - system(cmd.c_str()); - - //make module file in /local/etc/"modulename" - if ( !makeModuleFile((*list1).moduleName, parentOAMModuleName) ) - cout << "makeModuleFile error" << endl; - - //setup rc.local file in module tmp dir - if ( !makeRClocal((*list1).moduleName, IserverTypeInstall) ) - cout << "makeRClocal error" << endl; - } - - list1 = usermodulelist.begin(); - - for (; list1 != usermodulelist.end() ; list1++) - { - cmd = "mkdir /var/lib/columnstore/local/etc/" + (*list1).moduleName + " > /dev/null 2>&1"; - system(cmd.c_str()); - - //make module file in /local/etc/"modulename" - if ( !makeModuleFile((*list1).moduleName, parentOAMModuleName) ) - cout << "makeModuleFile error" << endl; - - //setup rc.local file in module tmp dir - if ( !makeRClocal((*list1).moduleName, IserverTypeInstall) ) - cout << "makeRClocal error" << endl; - } - - list1 = performancemodulelist.begin(); - - for (; list1 != performancemodulelist.end() ; list1++) - { - cmd = "mkdir /var/lib/columnstore/local/etc/" + (*list1).moduleName + " > /dev/null 2>&1"; - system(cmd.c_str()); - - //make module file in /local/etc/"modulename" - if ( !makeModuleFile((*list1).moduleName, parentOAMModuleName) ) - cout << "makeModuleFile error" << endl; - - //setup rc.local file in module tmp dir - if ( !makeRClocal((*list1).moduleName, IserverTypeInstall) ) - cout << "makeRClocal error" << endl; - } - } - - if ( installType == "initial" ) - { - //setup local OS Files - if ( !setOSFiles(parentOAMModuleName, IserverTypeInstall) ) - { - cout << "ERROR: setOSFiles error" << endl; - cout << " IMPORTANT: Once issue has been resolved, rerun postConfigure" << endl << endl; - exit(1); - } - - cmd = "chmod 755 -R /var/lib/columnstore/data1/systemFiles/dbrm > /dev/null 2>&1"; - system(cmd.c_str()); - } - - // - // perform single-server install from postConfigure - // - - //run the mysql / mysqld setup scripts - cout << endl << "Running the MariaDB ColumnStore setup scripts" << endl << endl; - - // call the mysql setup scripts - mysqlSetup(); - sleep(5); - exit(0); -} - - -/* - * Setup OS Files by appending the Calpont versions - */ - -// /etc OS Files to be updated -string files[] = -{ - " " -}; - -bool setOSFiles(string parentOAMModuleName, int serverTypeInstall) -{ - string cmd; - bool allfound = true; - - //update /etc files - for ( int i = 0;; ++i) - { - if ( files[i] == " ") - //end of list - break; - - string fileName = "/etc/" + files[i]; - - //make a backup copy before changing - cmd = "rm -f " + fileName + ".columnstoreSave"; - system(cmd.c_str()); - - cmd = "cp " + fileName + " " + fileName + ".columnstoreSave > /dev/null 2>&1"; - system(cmd.c_str()); - - cmd = "cat /var/lib/columnstore/local/etc/" + parentOAMModuleName + "/" + files[i] + ".calpont >> " + fileName; - - if (geteuid() == 0) system(cmd.c_str()); - - cmd = "rm -f /var/lib/columnstore/local/ " + files[i] + "*.calpont > /dev/null 2>&1"; - system(cmd.c_str()); - - cmd = "cp /var/lib/columnstore/local/etc/" + parentOAMModuleName + "/" + files[i] + ".calpont /var/lib/columnstore/local/. > /dev/null 2>&1"; - system(cmd.c_str()); - } - - return allfound; -} - -/* - * Create a module file - */ -bool makeModuleFile(string moduleName, string parentOAMModuleName) -{ - string cmd; - string fileName; - - if ( moduleName == parentOAMModuleName ) - fileName = "/var/lib/columnstore/local/module"; - else - fileName = "/var/lib/columnstore/local/etc/" + moduleName + "/module"; - - unlink (fileName.c_str()); - ofstream newFile (fileName.c_str()); - - cmd = "echo " + moduleName + " > " + fileName; - system(cmd.c_str()); - - newFile.close(); - - return true; -} - -/* - * Update ProcessConfig.xml file for a single server configuration - * Change the 'um' and 'pm' to 'dm' - */ -bool updateProcessConfig(int serverTypeInstall) -{ - vector oldModule; - string newModule; - string cmd; - - switch ( serverTypeInstall ) - { - case (oam::INSTALL_COMBINE_DM_UM_PM): - { - newModule = ">pm"; - oldModule.push_back(">um"); - oldModule.push_back(">pm"); - break; - } - - case (oam::INSTALL_COMBINE_DM_UM): - { - newModule = ">um"; - oldModule.push_back(">dm"); - break; - } - - case (oam::INSTALL_COMBINE_PM_UM): - { - newModule = ">pm"; - oldModule.push_back(">um"); - break; - } - } - - string fileName = std::string(MCSSYSCONFDIR) + "/columnstore/ProcessConfig.xml"; - - //Save a copy of the original version - cmd = "/bin/cp -f " + fileName + " " + fileName + ".columnstoreSave > /dev/null 2>&1"; - system(cmd.c_str()); - - ifstream oldFile (fileName.c_str()); - - if (!oldFile) return false; - - vector lines; - char line[200]; - string buf; - string newLine; - string newLine1; - - while (oldFile.getline(line, 200)) - { - buf = line; - newLine = line; - - std::vector::iterator pt1 = oldModule.begin(); - - for ( ; pt1 != oldModule.end() ; pt1++) - { - int start = 0; - - while (true) - { - string::size_type pos = buf.find(*pt1, start); - - if (pos != string::npos) - { - newLine = buf.substr(0, pos); - newLine.append(newModule); - - newLine1 = buf.substr(pos + 3, 200); - newLine.append(newLine1); - start = pos + 3; - } - else - { - buf = newLine; - start = 0; - break; - } - } - } - - //output to temp file - lines.push_back(buf); - } - - oldFile.close(); - unlink (fileName.c_str()); - ofstream newFile (fileName.c_str()); - - //create new file - int fd = open(fileName.c_str(), O_RDWR | O_CREAT, 0666); - - copy(lines.begin(), lines.end(), ostream_iterator(newFile, "\n")); - newFile.close(); - - close(fd); - return true; -} - - -/* - * Make makeRClocal to set mount scheduler - */ -bool makeRClocal(string moduleName, int IserverTypeInstall) -{ - - return true; - - string moduleType = moduleName.substr(0, MAX_MODULE_TYPE_SIZE); - - vector lines; - - string mount1; - string mount2 - ; - - switch ( IserverTypeInstall ) - { - case (oam::INSTALL_NORMAL): // normal - { - if ( moduleType == "um" ) - mount1 = "/mnt\\/tmp/"; - else if ( moduleType == "pm" ) - mount1 = "/mariadb/columnstore\\/data/"; - else - return true; - - break; - } - - case (oam::INSTALL_COMBINE_DM_UM_PM): // combined #1 - dm/um/pm - { - if ( moduleType == "pm" ) - { - mount1 = "/mnt\\/tmp/"; - mount2 = "/mariadb/columnstore\\/data/"; - } - else - return true; - - break; - } - - case (oam::INSTALL_COMBINE_DM_UM): // combined #2 dm/um on a same server - { - if ( moduleType == "um" ) - mount1 = "/mnt\\/tmp/"; - else if ( moduleType == "pm" ) - mount1 = "/mariadb/columnstore\\/data/"; - else - return true; - - break; - } - - case (oam::INSTALL_COMBINE_PM_UM): // combined #3 um/pm on a same server - { - if ( moduleType == "pm" ) - { - mount1 = "/mnt\\/tmp/"; - mount2 = "/mariadb/columnstore\\/data/"; - } - else - return true; - - break; - } - } - - if ( !mount1.empty() ) - { - string line1 = "for scsi_dev in `mount | awk '" + mount1 + " {print $1}' | awk -F/ '{print $3}' | sed 's/[0-9]*$//'`; do"; - string line2 = " echo deadline > /sys/block/$scsi_dev/queue/scheduler"; - string line3 = "done"; - - lines.push_back(line1); - lines.push_back(line2); - lines.push_back(line3); - } - else - { - if ( !mount2.empty() ) - { - string line1 = "for scsi_dev in `mount | awk '" + mount2 + " {print $1}' | awk -F/ '{print $3}' | sed 's/[0-9]*$//'`; do"; - string line2 = " echo deadline > /sys/block/$scsi_dev/queue/scheduler"; - string line3 = "done"; - - lines.push_back(line1); - lines.push_back(line2); - lines.push_back(line3); - } - } - - if ( lines.begin() == lines.end()) - return true; - - string RCfileName = "/etc/rc.d/rc.local"; - std::ofstream file; - - file.open(RCfileName.c_str(), std::ios::out | std::ios::app); - - if (file.fail()) - { - RCfileName = "/etc/rc.local"; - - file.open(RCfileName.c_str(), std::ios::out | std::ios::app); - - if (file.fail()) - return true; - } - - file.exceptions(file.exceptions() | std::ios::failbit | std::ifstream::badbit); - - copy(lines.begin(), lines.end(), ostream_iterator(file, "\n")); - - file.close(); - - return true; -} - - -/* - * Uncomment entry in Columnstore.xml - */ -bool uncommentCalpontXml( string entry) -{ - string fileName = std::string(MCSSYSCONFDIR) + "/columnstore/Columnstore.xml"; - - ifstream oldFile (fileName.c_str()); - - if (!oldFile) return true; - - vector lines; - char line[200]; - string buf; - string newLine; - - string firstComment = ""; - - while (oldFile.getline(line, 200)) - { - buf = line; - - string::size_type pos = buf.find(entry, 0); - - if (pos != string::npos) - { - pos = buf.find(firstComment, 0); - - if (pos == string::npos) - { - return true; - } - else - { - buf = buf.substr(pos + 4, 80); - - pos = buf.find(lastComment, 0); - - if (pos == string::npos) - { - return true; - } - else - { - buf = buf.substr(0, pos); - } - } - } - - //output to temp file - lines.push_back(buf); - } - - oldFile.close(); - unlink (fileName.c_str()); - ofstream newFile (fileName.c_str()); - - //create new file - int fd = open(fileName.c_str(), O_RDWR | O_CREAT, 0666); - - copy(lines.begin(), lines.end(), ostream_iterator(newFile, "\n")); - newFile.close(); - - close(fd); - return true; -} - -// vim:ts=4 sw=4: diff --git a/oamapps/postConfigure/mycnfUpgrade.cpp b/oamapps/postConfigure/mycnfUpgrade.cpp deleted file mode 100644 index e35a36d57..000000000 --- a/oamapps/postConfigure/mycnfUpgrade.cpp +++ /dev/null @@ -1,287 +0,0 @@ -/* Copyright (C) 2014 InfiniDB, Inc. - Copyright (C) 2016 MariaDB Corporation - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; version 2 of - the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301, USA. */ - -/****************************************************************************************** -* $Id: mycnfUpgrade.cpp 64 2006-10-12 22:21:51Z dhill $ -* -* -* -******************************************************************************************/ -/** - * @file - */ - - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "liboamcpp.h" -#include "installdir.h" -#include "mcsconfig.h" - -using namespace std; -using namespace oam; - - -/* MCOL-1844. On an upgrade, the user may have customized options in their old - * myCnf-include-args.text file. Merge it with the packaged version, and then process as we - * have before. - */ -string rtrim(const string &in) { - string::const_reverse_iterator rbegin = in.rbegin(); - while (rbegin != in.rend() && isspace(*rbegin)) - ++rbegin; - return string(in.begin(), rbegin.base()); -} - -void mergeMycnfIncludeArgs() -{ - string userArgsFilename = std::string(MCSSUPPORTDIR) + "/myCnf-include-args.text.rpmsave"; - string packagedArgsFilename = std::string(MCSSUPPORTDIR) + "/myCnf-include-args.text"; - ifstream userArgs(userArgsFilename.c_str()); - fstream packagedArgs(packagedArgsFilename.c_str(), ios::in); - - if (!userArgs || !packagedArgs) - return; - - // de-dup the args and comments in both files - set argMerger; - set comments; - string line; - while (getline(packagedArgs, line)) { - line = rtrim(line); - if (line[0] == '#') - comments.insert(line); - else if (line.size() > 0) - argMerger.insert(line); - } - while (getline(userArgs, line)) { - line = rtrim(line); - if (line[0] == '#') - comments.insert(line); - else if (line.size() > 0) - argMerger.insert(line); - } - userArgs.close(); - packagedArgs.close(); - - // write the merged version, comments first. They'll get ordered - // alphabetically but, meh. - packagedArgs.open(packagedArgsFilename.c_str(), ios::out | ios::trunc); - for (set::iterator it = comments.begin(); it != comments.end(); it++) - packagedArgs << *it << endl; - for (set::iterator it = argMerger.begin(); it != argMerger.end(); it++) - packagedArgs << *it << endl; - packagedArgs.close(); -} - -int main(int argc, char* argv[]) -{ - Oam oam; - - //check for port argument - string mysqlPort; - - if (argc > 1) - { - mysqlPort = argv[1]; - - // set mysql password - oam.changeMyCnf( "port", mysqlPort ); - exit (0); - } - - //my.cnf file - string mycnfFile = std::string(MCSMYCNFDIR) + "/columnstore.cnf"; - ifstream mycnffile (mycnfFile.c_str()); - - if (!mycnffile) - { - cerr << "mycnfUpgrade - columnstore.cnf file not found: " << mycnfFile << endl; - exit (1); - } - - //my.cnf.rpmsave file - string mycnfsaveFile = std::string(MCSMYCNFDIR) + "/columnstore.cnf.rpmsave"; - ifstream mycnfsavefile (mycnfsaveFile.c_str()); - - if (!mycnfsavefile) - { - cerr << "mycnfUpgrade - columnstore.cnf.rpmsave file not found: " << mycnfsaveFile << endl; - exit (1); - } - - // MCOL-1844. The user may have added options to their myCnf-include-args file. Merge - // myCnf-include-args.text with myCnf-include-args.text.rpmsave, save in myCnf-include-args.text - mergeMycnfIncludeArgs(); - - //include arguments file - string includeFile = std::string(MCSSUPPORTDIR) + "/myCnf-include-args.text"; - ifstream includefile (includeFile.c_str()); - - if (!includefile) - { - cerr << "mycnfUpgrade - columnstore.cnf include argument file not found: " << includeFile << endl; - exit (1); - } - - //exclude arguments file - string excludeFile = std::string(MCSSUPPORTDIR) + "/myCnf-exclude-args.text"; - ifstream excludefile (excludeFile.c_str()); - - if (!excludefile) - { - cerr << "mycnfUpgrade - columnstore.cnf exclude argument file not found: " << endl; - exit (1); - } - - //go though include list - char line[200]; - string includeArg; - - while (includefile.getline(line, 200)) - { - includeArg = line; - - boost::regex icludeArgRegEx("^#*\\s*" + includeArg + "\\s*="); - //see if in columnstore.cnf.rpmsave - ifstream mycnfsavefile (mycnfsaveFile.c_str()); - char line[200]; - string oldbuf; - - while (mycnfsavefile.getline(line, 200)) - { - oldbuf = line; - - if ( boost::regex_search(oldbuf.begin(), oldbuf.end(), icludeArgRegEx) ) - { - //found in columnstore.cnf.rpmsave, check if this is commented out - if ( line[0] != '#' ) - { - // no, check in columnstore.cnf and replace if exist or add if it doesn't - - ifstream mycnffile (mycnfFile.c_str()); - vector lines; - char line1[200]; - string newbuf; - bool updated = false; - - while (mycnffile.getline(line1, 200)) - { - newbuf = line1; - - if ( boost::regex_search(newbuf.begin(), newbuf.end(), icludeArgRegEx) ) - { - newbuf = oldbuf; - cout << "Updated argument: " << includeArg << endl; - updated = true; - } - - //output to temp file - lines.push_back(newbuf); - } - - //write out a new columnstore.cnf - mycnffile.close(); - unlink (mycnfFile.c_str()); - ofstream newFile (mycnfFile.c_str()); - - //create new file - int fd = open(mycnfFile.c_str(), O_RDWR|O_CREAT, 0644); - - copy(lines.begin(), lines.end(), ostream_iterator(newFile, "\n")); - newFile.close(); - - close(fd); - - if (!updated) - { - //not found, so add - ifstream mycnffile (mycnfFile.c_str()); - vector lines; - char line1[200]; - string newbuf; - - while (mycnffile.getline(line1, 200)) - { - newbuf = line1; - boost::regex mysqldSectionRegEx("\\[mysqld\\]"); - - if ( boost::regex_search(newbuf.begin(), newbuf.end(), mysqldSectionRegEx) ) - { - lines.push_back(newbuf); - newbuf = oldbuf; - cout << "Added argument: " << includeArg << endl; - } - - //output to temp file - lines.push_back(newbuf); - } - - //write out a new columnstore.cnf - mycnffile.close(); - unlink (mycnfFile.c_str()); - ofstream newFile (mycnfFile.c_str()); - - //create new file - int fd = open(mycnfFile.c_str(), O_RDWR | O_CREAT, 0666); - - copy(lines.begin(), lines.end(), ostream_iterator(newFile, "\n")); - newFile.close(); - - close(fd); - break; - } - } - } - } - } - - string USER = "mysql"; - - char* p = getenv("USER"); - - if (p && *p) - USER = p; - - string cmd = "chown " + USER + ":" + USER + " " + mycnfFile; - system(cmd.c_str()); - - exit (0); -} -// vim:ts=4 sw=4: - diff --git a/oamapps/postConfigure/postConfigure.cpp b/oamapps/postConfigure/postConfigure.cpp deleted file mode 100644 index 2779c6b7a..000000000 --- a/oamapps/postConfigure/postConfigure.cpp +++ /dev/null @@ -1,154 +0,0 @@ -/* Copyright (C) 2014 InfiniDB, Inc. - Copyright (C) 2016 MariaDB Corporation - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; version 2 of - the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301, USA. */ - -/****************************************************************************************** -* $Id: postConfigure.cpp 64 2006-10-12 22:21:51Z dhill $ -* -* -* List of files being updated by post-install configure: -* mariadb/columnstore/etc/Columnstore.xml -* mariadb/columnstore/etc/ProcessConfig.xml -* /etc/rc.d/rc.local -* -******************************************************************************************/ -/** - * @file - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include /* for strncpy */ -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include "mcsconfig.h" -#include "columnstoreversion.h" -#include "liboamcpp.h" -#include "configcpp.h" - -using namespace std; -using namespace config; - -#include "helpers.h" -using namespace installer; - -#include "installdir.h" - -bool updateBash(string homeDir); - -int main(int argc, char* argv[]) -{ - string cmd; - string numBlocksPctParam = ""; - string totalUmMemoryParam = ""; - string homeDir = "/root"; - bool rootUser = true; - - //check if root-user - int user; - user = getuid(); - - if (user != 0) - { - rootUser = false; - } - - if (!rootUser) - { - char* p = getenv("HOME"); - - if (p && *p) - homeDir = p; - } - - //get current time and date - time_t now; - now = time(NULL); - struct tm tm; - localtime_r(&now, &tm); - char timestamp[200]; - strftime (timestamp, 200, "%m:%d:%y-%H:%M:%S", &tm); - string currentDate = timestamp; - string postConfigureLog = "/var/log/columnstore-postconfigure-" + currentDate; - - // perform single server install - cout << endl << "Performing the Single Server Install." << endl << endl; - - if (!rootUser) - { - if (!updateBash(homeDir)) - cout << "updateBash error" << endl; - } - - //check if dbrm data resides in older directory path and inform user if it does - dbrmDirCheck(); - - if (numBlocksPctParam.empty()) { - numBlocksPctParam = "-"; - } - if (totalUmMemoryParam.empty()) { - totalUmMemoryParam = "-"; - } - - cmd = "columnstore_installer dummy.rpm dummy.rpm dummy.rpm dummy.rpm dummy.rpm initial dummy n --nodeps ' ' 1 " + numBlocksPctParam + " " + totalUmMemoryParam; - system(cmd.c_str()); - exit(0); -} - -bool updateBash(string homeDir) -{ - string fileName = homeDir + "/.bashrc"; - ifstream newFile (fileName.c_str()); - newFile.close(); - - return true; -} -// vim:ts=4 sw=4: diff --git a/oamapps/postConfigure/quick_installer_amazon.sh b/oamapps/postConfigure/quick_installer_amazon.sh deleted file mode 100755 index d0ad2bcef..000000000 --- a/oamapps/postConfigure/quick_installer_amazon.sh +++ /dev/null @@ -1,67 +0,0 @@ -#!/bin/bash -# -# $Id: quick_installer_amazon.sh 3705 2018-07-07 19:47:20Z dhill $ -# -# Poddst- Quick Installer for Amazon MariaDB Columnstore - -pmCount="" -umCount="" -systemName="" - -for arg in "$@"; do - if [ `expr -- "$arg" : '--pm-count='` -eq 11 ]; then - pmCount="`echo $arg | awk -F= '{print $2}'`" - elif [ `expr -- "$arg" : '--um-count='` -eq 11 ]; then - umCount="`echo $arg | awk -F= '{print $2}'`" - elif [ `expr -- "$arg" : '--system-name='` -eq 14 ]; then - systemName="`echo $arg | awk -F= '{print $2}'`" - systemName="-sn "$systemName - elif [ `expr -- "$arg" : '--help'` -eq 6 ]; then - echo "Usage ./quick_installer_amazon.sh [OPTION]" - echo "" - echo "Quick Installer for an Amazon MariaDB ColumnStore Install" - echo "This requires to be run on a MariaDB ColumnStore AMI" - echo "" - echo "Performace Module (pm) number is required" - echo "User Module (um) number is option" - echo "When only pm counts provided, system is combined setup" - echo "When both pm/um counts provided, system is separate setup" - echo - echo "--pm-count=x Number of pm instances to create" - echo "--um-count=x Number of um instances to create, optional" - echo "--system-name=nnnn System Name, optional" - echo "" - exit 1 - else - echo "./quick_installer_amazon.sh: unknown argument: $arg, enter --help for help" 1>&2 - exit 1 - fi -done - -if [[ $pmCount = "" ]]; then - echo "" - echo "Performace Module (pm) count is required, exiting" - exit 1 -else - if [[ $umCount = "" ]]; then - echo "" - echo "NOTE: Performing a Multi-Server Combined install with um/pm running on some server" - echo"" - else - echo "" - echo "NOTE: Performing a Multi-Server separate install with um and pm running on separate servers" - echo"" - fi -fi - -echo "" -echo "${bold}Run post-install script${normal}" -echo "" -columnstore-post-install -echo "${bold}Run postConfigure script${normal}" -echo "" -if [[ $umCount = "" ]]; then - postConfigure -qa -pm-count $pmCount $systemName -d -else - postConfigure -qa -pm-count $pmCount -um-count $umCount $systemName -d -fi diff --git a/oamapps/postConfigure/quick_installer_multi_server.sh b/oamapps/postConfigure/quick_installer_multi_server.sh deleted file mode 100755 index cf2a2533b..000000000 --- a/oamapps/postConfigure/quick_installer_multi_server.sh +++ /dev/null @@ -1,69 +0,0 @@ -#!/bin/bash -# -# $Id: quick_installer_multi_server.sh 3705 2018-07-07 19:47:20Z dhill $ -# -# Poddst- Quick Installer for Multi Server MariaDB Columnstore - -pmIpAddrs="" -umIpAddrs="" -systemName="" - -for arg in "$@"; do - if [ `expr -- "$arg" : '--pm-ip-addresses='` -eq 18 ]; then - pmIpAddrs="`echo $arg | awk -F= '{print $2}'`" - elif [ `expr -- "$arg" : '--um-ip-addresses='` -eq 18 ]; then - umIpAddrs="`echo $arg | awk -F= '{print $2}'`" - elif [ `expr -- "$arg" : '--system-name='` -eq 14 ]; then - systemName="`echo $arg | awk -F= '{print $2}'`" - systemName="-sn "$systemName - elif [ `expr -- "$arg" : '--help'` -eq 6 ]; then - echo "Usage ./quick_installer_multi_server.sh [OPTION]" - echo "" - echo "Quick Installer for a Multi Server MariaDB ColumnStore Install" - echo "" - echo "Defaults to non-distrubuted install, meaning MariaDB Columnstore" - echo "needs to be preinstalled on all nodes in the system" - echo "" - echo "Performace Module (pm) IP addresses are required" - echo "User Module (um) IP addresses are option" - echo "When only pm IP addresses provided, system is combined setup" - echo "When both pm/um IP addresses provided, system is separate setup" - echo - echo "--pm-ip-addresses=xxx.xxx.xxx.xxx,xxx.xxx.xxx.xxx" - echo "--um-ip-addresses=xxx.xxx.xxx.xxx,xxx.xxx.xxx.xxx, optional" - echo "--system-name=nnnn System Name, optional" - echo "" - exit 1 - else - echo "quick_installer_multi_server.sh: unknown argument: $arg, enter --help for help" 1>&2 - exit 1 - fi -done - -if [[ $pmIpAddrs = "" ]]; then - echo "" - echo "Performace Module (pm) IP addresses required, exiting" - exit 1 -else - if [[ $umIpAddrs = "" ]]; then - echo "" - echo "NOTE: Performing a Multi-Server Combined install with um/pm running on some server" - echo"" - else - echo "" - echo "NOTE: Performing a Multi-Server separate install with um and pm running on separate servers" - echo"" - fi -fi - -echo "" -echo "${bold}Run post-install script${normal}" -echo "" -columnstore-post-install -echo "${bold}Run postConfigure script${normal}" -echo "" -if [[ $umIpAddrs = "" ]]; then - postConfigure -qm -pm-ip-addrs $pmIpAddrs $systemName -else - postConfigure -qm -pm-ip-addrs $pmIpAddrs -um-ip-addrs $umIpAddrs $systemName -fi diff --git a/oamapps/postConfigure/quick_installer_single_server.sh b/oamapps/postConfigure/quick_installer_single_server.sh deleted file mode 100755 index 7d6a230e0..000000000 --- a/oamapps/postConfigure/quick_installer_single_server.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash -# -# $Id: quick_installer_single_server.sh 3705 2018-07-07 19:47:20Z dhill $ -# -# Poddst- Quick Installer for Single Server MariaDB Columnstore - -for arg in "$@"; do - if [ `expr -- "$arg" : '--help'` -eq 6 ]; then - echo "Usage ./quick_installer_single_server.sh" - echo "" - echo "Quick Installer for a Single Server MariaDB ColumnStore Install" - echo "" - exit 1 - else - echo "quick_installer_single_server.sh: ignoring unknown argument: $arg" 1>&2 - fi -done - - -echo "" -echo "Run post-install script" -echo "" -columnstore-post-install -echo "Run postConfigure script" -echo "" -postConfigure -qs diff --git a/oamapps/resourceMonitor/hardwareMonitor.cpp b/oamapps/resourceMonitor/hardwareMonitor.cpp deleted file mode 100644 index 9ec322b2e..000000000 --- a/oamapps/resourceMonitor/hardwareMonitor.cpp +++ /dev/null @@ -1,213 +0,0 @@ -/* Copyright (C) 2014 InfiniDB, Inc. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; version 2 of - the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301, USA. */ - -/*************************************************************************** - * $Id: hardwareMonitor.cpp 34 2006-09-29 21:13:54Z dhill $ - * - * Author: David Hill - ***************************************************************************/ -#include -#include -#include -#include -#include -#include -#include - -#include "liboamcpp.h" -#include "messagelog.h" -#include "messageobj.h" -#include "loggingid.h" -#include "alarmmanager.h" - -#include "installdir.h" - -using namespace std; -using namespace oam; -using namespace alarmmanager; -using namespace logging; - -/** - * @brief send alarm - */ -void sendAlarm(string alarmItem, ALARMS alarmID, int action); - - -/************************************************************************************************************ -* @brief main function -* -* purpose: Get current hardware status and report alarms -* -* Parses file generated by the ipmitool -* -* pattern = what it is | value | units | status | value 1 | value 2 | value 3 | value 4 | value 5 | value 6 -* data(0) = what it is -* data(1) = value -* data(2) = units -* data(3) = status -* data(4)-data(9) = barrier values -* data(4) - low non-recoverable, i.e. fatal -* data(5) - low critical -* data(6) - low warning -* data(7) - high warning -* data(8) - high critical -* data(9) - high non-recoverable, i.e. fatal -* -************************************************************************************************************/ -int main (int argc, char** argv) -{ - string data[10]; - string SensorName; - string SensorValue; - string Units; - string SensorStatus; - string lowFatal; - string lowCritical; - string lowWarning; - string highWarning; - string highCritical; - string highFatal; - char* p; - - string tmpDir = startup::StartUp::tmpDir(); - - // loop forever reading the hardware status - while (TRUE) - { - string logFile = tmpDir + "/harwareMonitor.txt"; - string cmd = "ipmitool sensor list > " + logFile; - int returnCode = system(cmd.c_str()); - - if (returnCode) - { - // System error - cout << "Error running ipmitool sensor list!!!" << endl; - exit(-1); - } - - // parse output file - - ifstream File (logFile); - - if (!File) - { - // System error - cout << "Error opening " << logFile << endl; - exit(-1); - } - - char line[200]; - - while (File.getline(line, 200)) - { - // parse the line - int f = 0; - p = strtok(line, "|"); - - while (p) - { - data[f] = p; - p = strtok (NULL, "|"); - f++; - } - - if ( f == 0 ) - // nothing on this line, skip - continue; - - SensorName = data[0]; - SensorValue = data[1]; - Units = data[2]; - SensorStatus = data[3]; - lowFatal = data[4]; - lowCritical = data[5]; - lowWarning = data[6]; - highWarning = data[7]; - highCritical = data[8]; - highFatal = data[9]; - - // check status and issue apporiate alarm if needed - if ( (SensorStatus != "ok") && (SensorStatus != "nr") ) - { - // Status error, check for warning or critical levels - - if ( SensorValue >= highFatal ) - // issue critical alarm - sendAlarm(SensorName, HARDWARE_HIGH, SET); - - else if ( (SensorValue < highFatal) && (SensorValue >= highCritical) ) - // issue major alarm - sendAlarm(SensorName, HARDWARE_MED, SET); - - else if ( (SensorValue < highCritical ) && (SensorValue >= highWarning) ) - // issue minor alarm - sendAlarm(SensorName, HARDWARE_LOW, SET); - - else if ( (SensorValue <= lowWarning) && (SensorValue > lowCritical) ) - // issue minor alarm - sendAlarm(SensorName, HARDWARE_LOW, SET); - - else if ( (SensorValue <= lowCritical) && (SensorValue > lowFatal) ) - // issue major alarm - sendAlarm(SensorName, HARDWARE_MED, SET); - - else if ( SensorValue <= lowFatal ) - // issue critical alarm - sendAlarm(SensorName, HARDWARE_HIGH, SET); - } - - } //end of parsing file while - - File.close(); - sleep(5); - } //end of forever while loop -} - -/****************************************************************************************** -* @brief sendAlarm -* -* purpose: send a trap and log the process information -* -******************************************************************************************/ -void sendAlarm(string alarmItem, ALARMS alarmID, int action) -{ - Oam oam; - ALARMManager alarmMgr; - // send alarm - alarmMgr.sendAlarmReport(alarmItem.c_str(), alarmID, action); - - //Log this event - LoggingID lid; - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("Alarm action against "); - args.add(alarmItem); - - if ( action == SET ) - { - args.add("Action type: SET"); - } - else - { - args.add("Action type: CLEAR"); - } - - ml.logDebugMessage(msg); - - return; -} diff --git a/oamapps/resourceMonitor/resourceMonitor.cpp b/oamapps/resourceMonitor/resourceMonitor.cpp deleted file mode 100644 index 054e86c4c..000000000 --- a/oamapps/resourceMonitor/resourceMonitor.cpp +++ /dev/null @@ -1,348 +0,0 @@ -/* Copyright (C) 2014 InfiniDB, Inc. - Copyright (C) 2016 MariaDB Corporation - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; version 2 of - the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301, USA. */ - -/*************************************************************************** - * $Id: resourceMonitor.cpp 3072 2013-04-04 19:04:45Z rdempsey $ - * - * Author: Zhixuan Zhu - ***************************************************************************/ -#include -#include -#include -#include -#include -#include -#include -#include "liboamcpp.h" - -using namespace std; -using namespace oam; - -/** - * constants define - */ -enum HOST_INFO -{ - USER = 0, - SYSTEM, - NICE, - IDLE -}; - -const string FE_MOUNT_DIR = "/var/log/mariadb/columnstore/"; // FE mount dir -const int MONITOR_FREQ = 15; // monitor frequency in sec -const int LOG_FREQ = 900; // log frequency in sec -const int DEBUG = false; - -/** - * global variables - */ -static char cpu[5]; -static unsigned int usage[LOG_FREQ / MONITOR_FREQ]; -static int usageCount = 0; - -/** - * @brief get cpu usage for cpuNo - */ -unsigned int* getUsage(char* cpuNo); - -/** - * @brief get cpu usage diffence over 3 seconds - */ -unsigned int* diffUsage(); - -/** - * @brief calculate overall cpu usage - */ -unsigned int calcCPUusage(unsigned int* times); - -/** - * @brief log cpu usage to active log file - */ -void logCPUactive (unsigned int); - -/** - * @brief log cpu peak and average to stat file - */ -void logCPUstat (); - - -/***************************************************************************************** -* @brief main function -* -* purpose: Get current CPU usage and return current state -* -*****************************************************************************************/ -int main (int argc, char** argv) -{ - unsigned int* times; - unsigned int cpuUsage = 0; - // set defaults - unsigned int cpuCritical = 9000, - cpuMajor = 8000, - cpuMinor = 7000, - cpuMinorClear = 6000; - - strcpy (cpu, "cpu"); - - // get specific CPU info - times = diffUsage(); - - // no cpus available on the system - if (times == NULL) - { - cout << "No cpus on the system" << endl; - return 0; - } - - cpuUsage = calcCPUusage(times); - logCPUactive(cpuUsage); - cout << "cpuUsage: " << cpuUsage << endl; - - // Get CPU usage water mark from server configuration and compare - ServerConfig serverConfig; - Oam oam; - - try - { - oam.getSystemConfig (serverConfig); - cpuCritical = serverConfig.ServerCPUCriticalThreshold; - cpuMajor = serverConfig.ServerCPUMajorThreshold; - cpuMinor = serverConfig.ServerCPUMinorThreshold; - cpuMinorClear = serverConfig.ServerCPUMinorClearThreshold; - } - catch (runtime_error e) - { - throw e; - } - - cout << "critical water: " << serverConfig.ServerCPUCriticalThreshold << endl; - - if (cpuUsage >= cpuCritical) - { - cout << "return critical: " << CRITICAL << endl; - return CRITICAL; - } - else if (cpuUsage >= cpuMajor) - { - cout << "return major: " << MAJOR << endl; - return MAJOR; - } - else if (cpuUsage >= cpuMinor) - { - cout << "return Minor: " << MINOR << endl; - return MINOR; - } - else if (cpuUsage >= cpuMinorClear) - { - cout << "return MinorClear: " << WARNING << endl; - return WARNING; - } - else - { - cout << "return Below MinorClear: " << NO_SEVERITY << endl; - return NO_SEVERITY; - } -} - -/***************************************************************************************** -* @brief diffUsage -* -* purpose: Compare usage different for changes -* -*****************************************************************************************/ -unsigned int* diffUsage() -{ - static unsigned int times1[4]; - unsigned int* times; - int i; - - times = getUsage(cpu); - - if (times == NULL) - return NULL; - - memcpy(times1, getUsage(cpu), sizeof(unsigned int) * 4); - sleep(3); - times = getUsage(cpu); - - for (i = 0; i < 4; i++) - times1[i] = times[i] - times1[i]; - - return times1; -} - -/***************************************************************************************** -* @brief diffUsage -* -* purpose: Compare usage different for changes -* -*****************************************************************************************/ -unsigned int* getUsage(char* cpuNo) -{ - static unsigned int times[4]; - char tmp[5]; - char str[80]; - FILE* file; - - file = fopen("/proc/stat", "r"); - - while (fgets(str, 80, file)) - { - // search for cpuNo - if ((strstr (str, cpuNo) != NULL)) - { - sscanf(str, "%s %u %u %u %u", tmp, - ×[0], ×[1], ×[2], ×[3]); - fclose(file); - return times; - } - } - - fclose(file); - return NULL; -} - -/***************************************************************************************** -* @brief calcCPUusage -* -* purpose: Calculate CPU usage -* -*****************************************************************************************/ -unsigned int calcCPUusage (unsigned int* times) -{ - unsigned int total = 0; - - for (int i = 0; i < 4; i++) - total += times[i]; - - double load = (double)times[IDLE] * 100.0 / (double)total; - return (int)((100.0 - load)); -} - -/***************************************************************************************** -* @brief logCPUactive -* -* purpose: Log Peak and Average CPU usage -* -*****************************************************************************************/ -void logCPUactive (unsigned int cpuUsage) -{ - // determin the active log file name - string usageLogFileName = FE_MOUNT_DIR; - usageLogFileName = usageLogFileName + cpu + ".log"; - - if (DEBUG) - cout << usageLogFileName << endl; - - fstream usageLogFile; - usageLogFile.open (usageLogFileName.c_str(), ios::in | ios::out); - - if (usageLogFile.fail()) - { - ofstream file (usageLogFileName.c_str()); - file.close(); - usageLogFile.open(usageLogFileName.c_str(), ios::in | ios::out); - - if (!usageLogFile) cout << "--error" << endl; - } - - // get the counter - usageLogFile.seekg(0, ios::beg); - usageLogFile.read (reinterpret_cast(&usageCount), sizeof (int)); - - if (usageLogFile.eof()) usageLogFile.clear(); - - // new iteration - if (usageCount == 0) - { - usageLogFile.seekp(0, ios::beg); - usageLogFile.write (reinterpret_cast(&usageCount), sizeof (int)); - } - - usageCount ++; - - // append new usage data to the end - usageLogFile.seekp (0, ios::end); - usageLogFile.write (reinterpret_cast(&cpuUsage), sizeof (int)); - - if (DEBUG) - cout << "usage: " << usageCount << endl; - - // calculate peak and average if it's time to log usage data - if (usageCount >= LOG_FREQ / MONITOR_FREQ) - { - usageLogFile.seekg (4, ios::beg); - usageLogFile.read ((char*)usage, sizeof(unsigned int) * LOG_FREQ / MONITOR_FREQ); - - if (usageLogFile.eof()) usageLogFile.clear(); - - if (DEBUG) - { - for (int i = 0; i < usageCount; i++) - { - cout << usage [i] << endl; - } - } - - logCPUstat(); - - // delete the file - usageLogFile.close(); - unlink (usageLogFileName.c_str()); - } - - // else, update usageCount - else - { - usageLogFile.seekp(0, ios::beg); - usageLogFile.write (reinterpret_cast(&usageCount), sizeof (int)); - usageLogFile.close(); - } -} - -/***************************************************************************************** -* @brief logCPUstat -* -* purpose: Log CPU stat using system API -* -*****************************************************************************************/ -void logCPUstat () -{ - unsigned int max = 0; - unsigned int sum = 0; - float average = 0.0; - - for (int i = 0; i < usageCount; i++) - { - if (usage[i] > max) - max = usage[i]; - - sum += usage[i]; - } - - average = sum / usageCount; - - // Call system log api to store stats. - // for now, write on local for testing purpose. - string statFileName = FE_MOUNT_DIR; - statFileName = statFileName + cpu + "stat.log"; - ofstream file (statFileName.c_str(), ios::app); - file << max << " " << average << endl; - file.close(); -} diff --git a/oamapps/resourceMonitor/tdriver.cpp b/oamapps/resourceMonitor/tdriver.cpp deleted file mode 100644 index 74fe90280..000000000 --- a/oamapps/resourceMonitor/tdriver.cpp +++ /dev/null @@ -1,64 +0,0 @@ -/* Copyright (C) 2014 InfiniDB, Inc. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; version 2 of - the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301, USA. */ - -/* $Id: tdriver.cpp 3072 2013-04-04 19:04:45Z rdempsey $ */ - -#include -#include - -class ResourceMonitorTest : public CppUnit::TestFixture -{ - - CPPUNIT_TEST_SUITE( ResourceMonitorTest ); - - CPPUNIT_TEST( test1 ); - - CPPUNIT_TEST_SUITE_END(); - -private: - -public: - void setUp() - { - } - - void tearDown() - { - } - - void test1() - { - //system ("./resourceMonitor 0"); - - } - -}; - -CPPUNIT_TEST_SUITE_REGISTRATION( ResourceMonitorTest ); - -#include -#include - -int main( int argc, char** argv) -{ - CppUnit::TextUi::TestRunner runner; - CppUnit::TestFactoryRegistry& registry = CppUnit::TestFactoryRegistry::getRegistry(); - runner.addTest( registry.makeTest() ); - bool wasSuccessful = runner.run( "", false ); - return (wasSuccessful ? 0 : 1); -} - diff --git a/oamapps/serverMonitor/CMakeLists.txt b/oamapps/serverMonitor/CMakeLists.txt deleted file mode 100644 index 6382859ce..000000000 --- a/oamapps/serverMonitor/CMakeLists.txt +++ /dev/null @@ -1,26 +0,0 @@ - -include_directories( ${ENGINE_COMMON_INCLUDES} ) - - -########### next target ############### - -set(ServerMonitor_SRCS - main.cpp - serverMonitor.cpp - cpuMonitor.cpp - diskMonitor.cpp - memoryMonitor.cpp - procmonMonitor.cpp - msgProcessor.cpp - dbhealthMonitor.cpp - UMAutoSync.cpp - ../../utils/common/crashtrace.cpp) - -add_executable(ServerMonitor ${ServerMonitor_SRCS}) - -target_compile_options(ServerMonitor PRIVATE -Wno-unused-result) - -target_link_libraries(ServerMonitor ${ENGINE_LDFLAGS} ${MARIADB_CLIENT_LIBS} ${ENGINE_EXEC_LIBS}) - -install(TARGETS ServerMonitor DESTINATION ${ENGINE_BINDIR} COMPONENT columnstore-engine) - diff --git a/oamapps/serverMonitor/UMAutoSync.cpp b/oamapps/serverMonitor/UMAutoSync.cpp deleted file mode 100644 index 751993158..000000000 --- a/oamapps/serverMonitor/UMAutoSync.cpp +++ /dev/null @@ -1,270 +0,0 @@ -/* Copyright (C) 2014 InfiniDB, Inc. - * Copyright (C) 2016 MariaDB Corporation. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; version 2 of - the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301, USA. */ - -/*************************************************************************** - * $Id: UMAutoSync.cpp 34 2006-09-29 21:13:54Z dhill $ - * - * Author: Zhixuan Zhu - ***************************************************************************/ - -#include "serverMonitor.h" -#include "installdir.h" - -using namespace std; -using namespace oam; -using namespace alarmmanager; -using namespace logging; -using namespace servermonitor; -using namespace config; - -typedef struct UMmodule_struct -{ - std::string moduleName; - std::string IPAddr; -} UMmodule; - -typedef std::vector UMmoduleList; - -void rsync(std::string moduleName, std::string IPAddr, std::string rootPassword); - - -/***************************************************************************************** -* @brief UMAutoSync Thread -* -* purpose: check db health -* -*****************************************************************************************/ -void UMAutoSync() -{ - ServerMonitor serverMonitor; - Oam oam; - UMmoduleList ummodulelist; - - // sync run time in minutes - int UMSyncTime = 10; - - try - { - oam.getSystemConfig( "UMSyncTime", UMSyncTime); - } - catch (...) - { - UMSyncTime = 10; - } - - if ( UMSyncTime < 1 ) - UMSyncTime = 10; - - //get root password - string rootPassword = oam::UnassignedName; // default to 'n' - - try - { - oam.getSystemConfig( "rpw", rootPassword); - } - catch (...) - { - rootPassword = oam::UnassignedName; - } - - //if assigned, exit thread - if (rootPassword == oam::UnassignedName) - pthread_exit((void*) NULL); - - oamModuleInfo_t t; - - //get local module info - string localModuleName; - string localModuleType; - - try - { - t = oam.getModuleInfo(); - localModuleName = boost::get<0>(t); - localModuleType = boost::get<1>(t); - } - catch (...) {} - - // loop forever - while (true) - { -// LoggingID lid(SERVER_MONITOR_LOG_ID); -// MessageLog ml(lid); -// Message msg; -// Message::Args args; -// args.add("rsync thread running"); -// msg.format(args); -// ml.logDebugMessage(msg); - - // - // find non-disabled modules to sync up - // - try - { - SystemStatus systemstatus; - oam.getSystemStatus(systemstatus); - ummodulelist.clear(); - - for ( unsigned int i = 0 ; i < systemstatus.systemmodulestatus.modulestatus.size(); i++) - { - if ( systemstatus.systemmodulestatus.modulestatus[i].Module.empty() ) - // end of list - break; - - string moduleName = systemstatus.systemmodulestatus.modulestatus[i].Module; - string moduleType = moduleName.substr(0, MAX_MODULE_TYPE_SIZE); - - if ( moduleType == localModuleType && moduleName != localModuleName ) - { - int state = systemstatus.systemmodulestatus.modulestatus[i].ModuleOpState; - - if ( state == oam::MAN_DISABLED || state == oam::AUTO_DISABLED ) - continue; - else //get module IP Address - { - ModuleConfig moduleconfig; - oam.getSystemConfig(moduleName, moduleconfig); - HostConfigList::iterator pt1 = moduleconfig.hostConfigList.begin(); - - UMmodule ummodule; - - ummodule.IPAddr = (*pt1).IPAddr; - ummodule.moduleName = moduleName; - - ummodulelist.push_back(ummodule); - } - } - } - } - catch (...) {} - - //update all ums - UMmoduleList::iterator list1 = ummodulelist.begin(); - - for (; list1 != ummodulelist.end() ; list1++) - { - //call rsync function - rsync((*list1).moduleName, (*list1).IPAddr, rootPassword); - } - - // - // go into check for um module update module, rsync to new modules - // - for ( int time = 0 ; time < UMSyncTime ; time++ ) - { - try - { - SystemStatus systemstatus; - oam.getSystemStatus(systemstatus); - - for ( unsigned int i = 0 ; i < systemstatus.systemmodulestatus.modulestatus.size(); i++) - { - if ( systemstatus.systemmodulestatus.modulestatus[i].Module.empty() ) - // end of list - break; - - string moduleName = systemstatus.systemmodulestatus.modulestatus[i].Module; - string moduleType = moduleName.substr(0, MAX_MODULE_TYPE_SIZE); - - if ( moduleType == localModuleType && moduleName != localModuleName ) - { - int state = systemstatus.systemmodulestatus.modulestatus[i].ModuleOpState; - - if ( state == oam::MAN_DISABLED || state == oam::AUTO_DISABLED ) - continue; - else //check if in current sync list - { - UMmoduleList::iterator list1 = ummodulelist.begin(); - bool found = false; - - for (; list1 != ummodulelist.end() ; list1++) - { - if ( moduleName == (*list1).moduleName) - { - found = true; - break; - } - } - - if ( !found) - { - //get module IP Address - ModuleConfig moduleconfig; - oam.getSystemConfig(moduleName, moduleconfig); - HostConfigList::iterator pt1 = moduleconfig.hostConfigList.begin(); - - //call rsync function - rsync(moduleName, (*pt1).IPAddr, rootPassword); - - UMmodule ummodule; - - ummodule.IPAddr = (*pt1).IPAddr; - ummodule.moduleName = moduleName; - - ummodulelist.push_back(ummodule); - } - } - } - } - } - catch (...) {} - - sleep(60); - } - - } // end of while loop -} - -/* -* rsync script function -*/ - -void rsync(std::string moduleName, std::string IPAddr, std::string rootPassword) -{ - - string cmd = "rsync.sh " + IPAddr + " " + rootPassword + " 1 > /tmp/rsync_" + moduleName + ".log"; - int ret = system(cmd.c_str()); - - if ( WEXITSTATUS(ret) == 0 ) - { - /* LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("Successfully rsync to module: "); - args.add(moduleName); - msg.format(args); - ml.logDebugMessage(msg); - */ - } - else - { - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("Failure rsync to module: "); - args.add(moduleName); - msg.format(args); - ml.logDebugMessage(msg); - - } - - return; -} - diff --git a/oamapps/serverMonitor/cpuMonitor.cpp b/oamapps/serverMonitor/cpuMonitor.cpp deleted file mode 100644 index 4d7bc02d5..000000000 --- a/oamapps/serverMonitor/cpuMonitor.cpp +++ /dev/null @@ -1,593 +0,0 @@ -/* Copyright (C) 2014 InfiniDB, Inc. - Copyright (C) 2016 MariaDB Corporation - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; version 2 of - the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301, USA. */ - -/*************************************************************************** - * $Id: cpuMonitor.cpp 34 2006-09-29 21:13:54Z dhill $ - * - * Author: Zhixuan Zhu - ***************************************************************************/ - -#include "serverMonitor.h" - -using namespace std; -using namespace oam; -using namespace alarmmanager; -using namespace logging; -using namespace servermonitor; -//using namespace procheartbeat; - -float currentCpuUsage; -ProcessCPUList pcl; - -pthread_mutex_t CPU_LOCK; - -/** - * constants define - */ - -const std::string FE_MOUNT_DIR = "/var/log/mariadb/columnstore/"; // FE mount dir -const int MONITOR_FREQ = 5; // monitor frequency in sec -const int LOG_FREQ = 900; // log frequency in sec -const int RESOURCE_DEBUG = false; -static unsigned int usage[LOG_FREQ / MONITOR_FREQ]; -static int usageCount = 0; - -extern string tmpDir; - -/***************************************************************************************** -* @brief cpuMonitor Thread -* -* purpose: Get current CPU usage, average over 5 readings and report alarms -* -*****************************************************************************************/ -void cpuMonitor() -{ - ServerMonitor serverMonitor; - - // register for Heartbeat monitoring - /* try { - ProcHeartbeat procheartbeat; - procheartbeat.registerHeartbeat(CPU_HEARTBEAT_ID); - } - catch (exception& ex) - { - string error = ex.what(); - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("EXCEPTION ERROR on registerHeartbeat: "); - args.add(error); - msg.format(args); - ml.logErrorMessage(msg); - } - catch(...) - { - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("EXCEPTION ERROR on sendHeartbeat: Caught unknown exception!"); - msg.format(args); - ml.logErrorMessage(msg); - } - */ - const int periodCount = 5; - float cpuPeriod[periodCount]; - int periodCounter = 0; - float averageCpuUsage = 0; - currentCpuUsage = 0; - - // set defaults - unsigned int cpuCritical = 0, - cpuMajor = 0, - cpuMinor = 0, - cpuMinorClear = 0; - - // initial cpu Period table - for (int i = 0; i < periodCount; i++) - { - cpuPeriod[i] = 0; - } - - while (true) - { - // Get CPU usage water mark from server configuration and compare - ModuleTypeConfig moduleTypeConfig; - Oam oam; - - try - { - oam.getSystemConfig(moduleTypeConfig); - cpuCritical = moduleTypeConfig.ModuleCPUCriticalThreshold; - cpuMajor = moduleTypeConfig.ModuleCPUMajorThreshold; - cpuMinor = moduleTypeConfig.ModuleCPUMinorThreshold; - cpuMinorClear = moduleTypeConfig.ModuleCPUMinorClearThreshold; - } - catch (...) - { - sleep(5); - continue; - } - - if (RESOURCE_DEBUG) - cout << "critical water: " << moduleTypeConfig.ModuleCPUCriticalThreshold << endl; - - pthread_mutex_lock(&CPU_LOCK); - // - // get Process and System CPU usage - // - serverMonitor.getCPUdata(); - - // store and get average - cpuPeriod[periodCounter] = currentCpuUsage; - averageCpuUsage = 0; - - for (int i = 0; i < periodCount; i++) - { - averageCpuUsage += cpuPeriod[i]; - } - - averageCpuUsage = averageCpuUsage / periodCount; - -// serverMonitor.logCPUactive(averageCpuUsage); - if (CPU_DEBUG) - { - cout << "Current CPU Usage: " << currentCpuUsage << endl; - cout << "Average CPU Usage: " << averageCpuUsage << endl; - } - - if (averageCpuUsage >= cpuCritical && cpuCritical > 0 ) - { - serverMonitor.sendResourceAlarm("CPU", CPU_USAGE_HIGH, SET, (int) averageCpuUsage); - } - else if (averageCpuUsage >= cpuMajor && cpuMajor > 0 ) - serverMonitor.sendResourceAlarm("CPU", CPU_USAGE_MED, SET, (int) averageCpuUsage); - else if (averageCpuUsage >= cpuMinor && cpuMinor > 0 ) - serverMonitor.sendResourceAlarm("CPU", CPU_USAGE_LOW, SET, (int) averageCpuUsage); - else if (averageCpuUsage >= cpuMinorClear && cpuMinorClear > 0 ) - { - serverMonitor.checkCPUAlarm("CPU", CPU_USAGE_LOW); - //Log this event - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("Current CPU usage = "); - args.add((int) currentCpuUsage); - args.add(", Average CPU usage = "); - args.add((int) averageCpuUsage); - msg.format(args); - ml.logInfoMessage(msg); - } - else - serverMonitor.checkCPUAlarm("CPU"); - - // - // check CPU usage by process - // - ProcessCPUList::iterator p = pcl.begin(); - - while (p != pcl.end()) - { - string processName = (*p).processName; - double cpuUsage = (*p).usedPercent; - p++; - - if (CPU_DEBUG) - { - cout << "Process Name : " << processName << endl; - cout << "CPU Usage: " << cpuUsage << endl; - } - - // check if a Calpont Process, if so alarm is over thresholds - // if not, just log if over thresholds - if (cpuUsage >= cpuCritical && cpuCritical > 0) - { - /* try { - t = oam.getMyProcessStatus(processID); - processName = boost::get<1>(t); - - serverMonitor.sendResourceAlarm(processName, CPU_USAGE_HIGH, SET, (int) cpuUsage); - } - catch (...) { - */ LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("Process"); - args.add(processName); - args.add(" above Critical CPU threshold with a percentage of "); - args.add((int) cpuUsage); - msg.format(args); - ml.logInfoMessage(msg); -// } - } - else if (cpuUsage >= cpuMajor && cpuMajor > 0) - { - /* try { - t = oam.getMyProcessStatus(processID); - processName = boost::get<1>(t); - - serverMonitor.sendResourceAlarm(processName, CPU_USAGE_MED, SET, (int) cpuUsage); - } - catch (...) { - */ LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("Process"); - args.add(processName); - args.add(" above Major CPU threshold with a percentage of "); - args.add((int) cpuUsage); - msg.format(args); - ml.logInfoMessage(msg); -// } - } - else if (cpuUsage >= cpuMinor && cpuMinor > 0) - { - /* try { - t = oam.getMyProcessStatus(processID); - processName = boost::get<1>(t); - - serverMonitor.sendResourceAlarm(processName, CPU_USAGE_LOW, SET, (int) cpuUsage); - } - catch (...) { - */ LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("Process"); - args.add(processName); - args.add(" above Minor CPU threshold with a percentage of "); - args.add((int) cpuUsage); - msg.format(args); - ml.logInfoMessage(msg); -// } - } - - /* else if (cpuUsage >= cpuMinorClear) { - try { - t = oam.getMyProcessStatus(processID); - processName = boost::get<1>(t); - - serverMonitor.checkCPUAlarm(processName, CPU_USAGE_LOW); - } - catch (...) {} - } - else - serverMonitor.checkCPUAlarm(processName); - */ - } - - // send heartbeat message - /* try { - ProcHeartbeat procheartbeat; - procheartbeat.sendHeartbeat(CPU_HEARTBEAT_ID); - - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("Sent Heartbeat Msg"); - msg.format(args); - ml.logInfoMessage(msg); - } - catch (exception& ex) - { - string error = ex.what(); - if ( error.find("Disabled") == string::npos ) { - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("EXCEPTION ERROR on sendHeartbeat: "); - args.add(error); - msg.format(args); - ml.logErrorMessage(msg); - } - } - catch(...) - { - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("EXCEPTION ERROR on sendHeartbeat: Caught unknown exception!"); - msg.format(args); - ml.logErrorMessage(msg); - } - */ - - pthread_mutex_unlock(&CPU_LOCK); - - // sleep, 10 minutes - sleep(MONITOR_PERIOD * 10); - - ++periodCounter; - - if ( periodCounter >= periodCount ) - periodCounter = 0; - - } // end of while loop -} - -/****************************************************************************************** -* @brief checkCPUAlarm -* -* purpose: check to see if an alarm(s) is set on CPU and clear if so -* -******************************************************************************************/ -void ServerMonitor::checkCPUAlarm(string alarmItem, ALARMS alarmID) -{ - Oam oam; - ServerMonitor serverMonitor; - - // get current server name - string serverName; - oamModuleInfo_t st; - - try - { - st = oam.getModuleInfo(); - serverName = boost::get<0>(st); - } - catch (...) - { - serverName = "Unknown Server"; - } - - switch (alarmID) - { - case ALARM_NONE: // clear all alarms set if any found - if ( serverMonitor.checkActiveAlarm(CPU_USAGE_HIGH, serverName, alarmItem) ) - // alarm set, clear it - clearAlarm(alarmItem, CPU_USAGE_HIGH); - - if ( serverMonitor.checkActiveAlarm(CPU_USAGE_MED, serverName, alarmItem) ) - // alarm set, clear it - clearAlarm(alarmItem, CPU_USAGE_MED); - - if ( serverMonitor.checkActiveAlarm(CPU_USAGE_LOW, serverName, alarmItem) ) - // alarm set, clear it - clearAlarm(alarmItem, CPU_USAGE_LOW); - - break; - - case CPU_USAGE_LOW: // clear high and medium alarms set if any found - if ( serverMonitor.checkActiveAlarm(CPU_USAGE_HIGH, serverName, alarmItem) ) - // alarm set, clear it - clearAlarm(alarmItem, CPU_USAGE_HIGH); - - if ( serverMonitor.checkActiveAlarm(CPU_USAGE_MED, serverName, alarmItem) ) - // alarm set, clear it - clearAlarm(alarmItem, CPU_USAGE_MED); - - break; - - case CPU_USAGE_MED: // clear high alarms set if any found - if ( serverMonitor.checkActiveAlarm(CPU_USAGE_HIGH, serverName, alarmItem) ) - // alarm set, clear it - clearAlarm(alarmItem, CPU_USAGE_HIGH); - - break; - - default: // none to clear - break; - } // end of switch - - return; -} - -/***************************************************************************************** -* @brief logCPUactive -* -* purpose: Log Peak and Average CPU usage -* -*****************************************************************************************/ -void ServerMonitor::logCPUactive (unsigned int cpuUsage) -{ - ServerMonitor serverMonitor; - - // determin the active log file name - string usageLogFileName = FE_MOUNT_DIR; - usageLogFileName = usageLogFileName + "cpu.log"; - - if (RESOURCE_DEBUG) - cout << usageLogFileName << endl; - - fstream usageLogFile; - usageLogFile.open (usageLogFileName.c_str(), ios::in | ios::out); - - if (usageLogFile.fail()) - { - ofstream file (usageLogFileName.c_str()); - file.close(); - usageLogFile.open(usageLogFileName.c_str(), ios::in | ios::out); - - if (!usageLogFile) cout << "--error" << endl; - } - - // get the counter - usageLogFile.seekg(0, ios::beg); - usageLogFile.read (reinterpret_cast(&usageCount), sizeof (int)); - - if (usageLogFile.eof()) usageLogFile.clear(); - - // new iteration - if (usageCount == 0) - { - usageLogFile.seekp(0, ios::beg); - usageLogFile.write (reinterpret_cast(&usageCount), sizeof (int)); - } - - usageCount ++; - - // append new usage data to the end - usageLogFile.seekp (0, ios::end); - usageLogFile.write (reinterpret_cast(&cpuUsage), sizeof (int)); - - if (RESOURCE_DEBUG) - cout << "usage: " << usageCount << endl; - - // calculate peak and average if it's time to log usage data - if (usageCount >= LOG_FREQ / MONITOR_FREQ) - { - usageLogFile.seekg (4, ios::beg); - usageLogFile.read ((char*)usage, sizeof(unsigned int) * LOG_FREQ / MONITOR_FREQ); - - if (usageLogFile.eof()) usageLogFile.clear(); - - if (RESOURCE_DEBUG) - { - for (int i = 0; i < usageCount; i++) - { - cout << usage [i] << endl; - } - } - - serverMonitor.logCPUstat(usageCount); - - // delete the file - usageLogFile.close(); - unlink (usageLogFileName.c_str()); - } - - // else, update usageCount - else - { - usageLogFile.seekp(0, ios::beg); - usageLogFile.write (reinterpret_cast(&usageCount), sizeof (int)); - usageLogFile.close(); - } -} - -/***************************************************************************************** -* @brief logCPUstat -* -* purpose: Log CPU stat using system API -* -*****************************************************************************************/ -void ServerMonitor::logCPUstat (int usageCount) -{ - unsigned int max = 0; - unsigned int sum = 0; - float average = 0.0; - - for (int i = 0; i < usageCount; i++) - { - if (usage[i] > max) - max = usage[i]; - - sum += usage[i]; - } - - if ( usageCount == 0 ) - average = 0; - else - average = sum / usageCount; - - // Call system log api to store stats. - // for now, write on local for testing purpose. - string statFileName = FE_MOUNT_DIR; - statFileName = statFileName + "cpustat.log"; - ofstream file (statFileName.c_str(), ios::app); - file << max << " " << average << endl; - file.close(); -} - -/***************************************************************************************** -* @brief logCPUstat -* -* purpose: Log CPU stat using system API -* -*****************************************************************************************/ -void ServerMonitor::getCPUdata() -{ - pcl.clear(); - - string tmpProcessCpu = tmpDir + "/processCpu"; - - string cmd = "top -b -n1 | head -12 | awk '{print $9,$12}' | tail -5 > " + tmpProcessCpu; - system(cmd.c_str()); - - ifstream oldFile1 (tmpProcessCpu.c_str()); - - // read top 5 users - int i = 0; - char line[400]; - - while (oldFile1.getline(line, 400)) - { - string buf = line; - string::size_type pos = buf.find (' ', 0); - - if (pos != string::npos) - { - processCPU pc; - pc.processName = buf.substr(pos + 1, 80); - pc.usedPercent = atol(buf.substr(0, pos).c_str()); - pcl.push_back(pc); - i++; - } - } - - oldFile1.close(); - - // - // get and check Total CPU usage - // - - - string tmpsystemCpu = tmpDir + "/processCpu"; - - cmd = "top -b -n 6 -d 1 | grep '%Cpu' | awk '{print $8}' > " + tmpsystemCpu; - system(cmd.c_str()); - - ifstream oldFile (tmpsystemCpu.c_str()); - - float systemIdle = 0; - // skip first line in file, and average the next 5 entries which contains idle times - oldFile.getline(line, 400); - int count = 0; - - while (oldFile.getline(line, 400)) - { - string buf = line; - // Questionable replacement - string::size_type pos = buf.find("id,", 0); - if (pos == string::npos) - { - systemIdle = systemIdle + atol(buf.substr(0, pos - 1).c_str()); - count++; - } - else - { - systemIdle = systemIdle + 100; - count++; - } - } - - oldFile.close(); - - if ( count == 0 ) - currentCpuUsage = 0; - else - currentCpuUsage = 100 - (systemIdle / count); -} - diff --git a/oamapps/serverMonitor/dbhealthMonitor.cpp b/oamapps/serverMonitor/dbhealthMonitor.cpp deleted file mode 100644 index a1c1c12ba..000000000 --- a/oamapps/serverMonitor/dbhealthMonitor.cpp +++ /dev/null @@ -1,263 +0,0 @@ -/* Copyright (C) 2014 InfiniDB, Inc. - Copyright (C) 2016 MariaDB Corporation - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; version 2 of - the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301, USA. */ - -/*************************************************************************** - * $Id: dbhealthMonitor.cpp 34 2006-09-29 21:13:54Z dhill $ - * - * Author: Zhixuan Zhu - ***************************************************************************/ - -#include "serverMonitor.h" -#include "installdir.h" - -using namespace std; -using namespace oam; -using namespace alarmmanager; -using namespace logging; -using namespace servermonitor; -using namespace config; - - -/***************************************************************************************** -* @brief dbhealthMonitor Thread -* -* purpose: check db health -* -*****************************************************************************************/ -void dbhealthMonitor() -{ - ServerMonitor serverMonitor; - Oam oam; - - oamModuleInfo_t t; - - //get local module info - string localModuleName; - int serverInstallType = 2; - string OAMParentModuleName; - - try - { - t = oam.getModuleInfo(); - OAMParentModuleName = boost::get<3>(t); - } - catch (...) {} - - //Wait until DMLProc is Active, don't want to run if in rollback mode - while (true) - { - try - { - ProcessStatus procstat; - oam.getProcessStatus("DMLProc", OAMParentModuleName, procstat); - - if ( procstat.ProcessOpState == oam::ACTIVE) - break; - } - catch (...) - {} - - sleep(10); - } - - bool setlog = false; - bool clearlog = false; - - while (true) - { - try - { - t = oam.getModuleInfo(); - localModuleName = boost::get<0>(t); - OAMParentModuleName = boost::get<3>(t); - serverInstallType = boost::get<5>(t); - } - catch (...) {} - - string DBFunctionalMonitorFlag; - - try - { - oam.getSystemConfig( "DBFunctionalMonitorFlag", DBFunctionalMonitorFlag); - } - catch (...) {} - - // run on um1 or active pm - if ( localModuleName == "um1" || - ( localModuleName == OAMParentModuleName && - serverInstallType == oam::INSTALL_COMBINE_DM_UM_PM ) ) - { - - if (DBFunctionalMonitorFlag == "y" ) - { - if (!setlog ) - { - try - { - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("DBFunctionalMonitorFlag set: Running dbfunctional tester"); - msg.format(args); - ml.logDebugMessage(msg); - } - catch (...) - {} - - setlog = true; - } - - serverMonitor.healthCheck(); - } - else - { - if (!clearlog ) - { - try - { - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("DBFunctionalMonitorFlag not-set: Not Running dbfunctional tester"); - msg.format(args); - ml.logDebugMessage(msg); - } - catch (...) - {} - - clearlog = true; - } - } - } - - // sleep - sleep(MONITOR_PERIOD); - - } // end of while loop -} - -pthread_mutex_t FUNCTION_LOCK; - -int ServerMonitor::healthCheck(bool action) -{ - Oam oam; - - pthread_mutex_lock(&FUNCTION_LOCK); - - //get local module name - string localModuleName; - oamModuleInfo_t t; - - try - { - t = oam.getModuleInfo(); - localModuleName = boost::get<0>(t); - } - catch (...) {} - - //get action - string DBHealthMonitorAction; - oam.getSystemConfig( "DBHealthMonitorAction", DBHealthMonitorAction); - - GRACEFUL_FLAG gracefulTemp = GRACEFUL; - ACK_FLAG ackTemp = ACK_YES; - - //run Health script - string cmd = "dbhealth.sh > /var/log/mariadb/columnstore/dbfunctional.log1 2>&1"; - system(cmd.c_str()); - - if (!oam.checkLogStatus("/var/log/mariadb/columnstore/dbfunctional.log1", "OK")) - { - if (oam.checkLogStatus("/var/log/mariadb/columnstore/dbfunctional.log1", "ERROR 1045") ) - { - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("dbhealth.sh: Missing Password error"); - msg.format(args); - ml.logDebugMessage(msg); - } - else - { - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("DB Functional check failed"); - msg.format(args); - ml.logCriticalMessage(msg); - - if (action) - { - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("Send Notification for DB Functional check failed and perform OAM Command"); - args.add( DBHealthMonitorAction); - msg.format(args); - ml.logDebugMessage(msg); - - oam.sendDeviceNotification(localModuleName, DB_HEALTH_CHECK_FAILED); - - if ( DBHealthMonitorAction == "stopSystem") - { - try - { - oam.stopSystem(gracefulTemp, ackTemp); - } - catch (...) - { - } - } - else if ( DBHealthMonitorAction == "restartSystem") - { - try - { - oam.restartSystem(gracefulTemp, ackTemp); - } - catch (...) - { - } - } - else if ( DBHealthMonitorAction == "shutdownSystem") - { - try - { - oam.shutdownSystem(gracefulTemp, ackTemp); - } - catch (...) - { - } - } - } - } - - pthread_mutex_unlock(&FUNCTION_LOCK); - - return API_FAILURE; - } - - pthread_mutex_unlock(&FUNCTION_LOCK); - return API_SUCCESS; -} - - diff --git a/oamapps/serverMonitor/diskMonitor.cpp b/oamapps/serverMonitor/diskMonitor.cpp deleted file mode 100644 index 7ac86b114..000000000 --- a/oamapps/serverMonitor/diskMonitor.cpp +++ /dev/null @@ -1,756 +0,0 @@ -/* Copyright (C) 2014 InfiniDB, Inc. - Copyright (C) 2016 MariaDB Corporation - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; version 2 of - the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301, USA. */ - -/*************************************************************************** - * $Id: diskMonitor.cpp 34 2006-09-29 21:13:54Z dhill $ - * - * Author: Zhixuan Zhu - ***************************************************************************/ - -#include "serverMonitor.h" -#include "installdir.h" - -using namespace std; -using namespace oam; -using namespace alarmmanager; -using namespace logging; -using namespace servermonitor; -using namespace config; -//using namespace procheartbeat; - -SystemDiskList sdl; - -typedef struct DBrootData_struct -{ - std::string dbrootDir; - bool downFlag; -} -DBrootData; - -typedef std::vector DBrootList; - - -/***************************************************************************************** -* @brief diskMonitor Thread -* -* purpose: Get current Local and External disk usage and report alarms -* -*****************************************************************************************/ -void diskMonitor() -{ - ServerMonitor serverMonitor; - Oam oam; - SystemConfig systemConfig; - ModuleTypeConfig moduleTypeConfig; - typedef std::vector LocalFileSystems; - LocalFileSystems lfs; - struct statvfs buf; - - // set defaults - int localDiskCritical = 90, - localDiskMajor = 80, - localDiskMinor = 70, - ExternalDiskCritical = 90, - ExternalDiskMajor = 80, - ExternalDiskMinor = 70; - - // get module types - string moduleType; - int moduleID = -1; - string moduleName; - oamModuleInfo_t t; - - try - { - t = oam.getModuleInfo(); - moduleType = boost::get<1>(t); - moduleID = boost::get<2>(t); - moduleName = boost::get<0>(t); - } - catch (exception& e) {} - - bool Externalflag = false; - - string cloud = oam::UnassignedName; - - try - { - oam.getSystemConfig( "Cloud", cloud); - } - catch (...) - { - cloud = oam::UnassignedName; - } - - //get Gluster Config setting - string DataRedundancyConfig = "n"; - - try - { - oam.getSystemConfig( "DataRedundancyConfig", DataRedundancyConfig); - } - catch (...) - { - DataRedundancyConfig = "n"; - } - - int diskSpaceCheck = 0; - - while (true) - { - //check for external disk - DBrootList dbrootList; - - if (moduleType == "pm") - { - systemStorageInfo_t t; - t = oam.getStorageConfig(); - - if ( boost::get<0>(t) == "external") - Externalflag = true; - - // get dbroot list and storage type from config file - DBRootConfigList dbrootConfigList; - oam.getPmDbrootConfig(moduleID, dbrootConfigList); - - DBRootConfigList::iterator pt = dbrootConfigList.begin(); - - for ( ; pt != dbrootConfigList.end() ; pt++) - { - int dbrootID = *pt; - - string dbroot = "DBRoot" + oam.itoa(dbrootID); - - string dbootdir; - - try - { - oam.getSystemConfig(dbroot, dbootdir); - } - catch (...) {} - - if ( dbootdir.empty() || dbootdir == "" ) - continue; - - DBrootData dbrootData; - dbrootData.dbrootDir = dbootdir; - dbrootData.downFlag = false; - - dbrootList.push_back(dbrootData); - } - } - - SystemStatus systemstatus; - - try - { - oam.getSystemStatus(systemstatus); - } - catch (exception& ex) - {} - - if (systemstatus.SystemOpState != oam::ACTIVE ) - { - sleep(5); - continue; - } - - // Get Local/External Disk Mount points to monitor and associated thresholds - - try - { - oam.getSystemConfig (moduleTypeConfig); - localDiskCritical = moduleTypeConfig.ModuleDiskCriticalThreshold; - localDiskMajor = moduleTypeConfig.ModuleDiskMajorThreshold; - localDiskMinor = moduleTypeConfig.ModuleDiskMinorThreshold; - - DiskMonitorFileSystems::iterator p = moduleTypeConfig.FileSystems.begin(); - - for ( ; p != moduleTypeConfig.FileSystems.end() ; p++) - { - string fs = *p; - lfs.push_back(fs); - - if (DISK_DEBUG) - { - //Log this event - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("Local Config File System to monitor ="); - args.add(fs); - msg.format(args); - ml.logDebugMessage(msg); - } - } - - } - catch (...) - { - sleep(5); - continue; - } - - // get External info - try - { - oam.getSystemConfig(systemConfig); - - } - catch (...) - { - sleep(5); - continue; - } - - if (Externalflag) - { - // get External info - try - { - ExternalDiskCritical = systemConfig.ExternalCriticalThreshold; - ExternalDiskMajor = systemConfig.ExternalMajorThreshold; - ExternalDiskMinor = systemConfig.ExternalMinorThreshold; - - } - catch (...) - { - sleep(5); - continue; - } - } - - if ( diskSpaceCheck == 0 ) - { - //check for local file systems - LocalFileSystems::iterator p = lfs.begin(); - - while (p != lfs.end()) - { - string deviceName = *p; - ++p; - string fileName; - - // check local - if ( deviceName == "/") - { - fileName = deviceName + "/var/lib/columnstore/"; - } - else - { - fileName = deviceName + "/000.dir"; - } - - uint64_t totalBlocks; - uint64_t usedBlocks; - - if (!statvfs(fileName.c_str(), &buf)) - { - - uint64_t blksize, blocks, freeblks, free; - - blksize = buf.f_bsize; - blocks = buf.f_blocks; - freeblks = buf.f_bavail; - - totalBlocks = blocks * blksize; - free = freeblks * blksize; - usedBlocks = totalBlocks - free; - } - else - continue; - - int64_t diskUsage = 0; - - if ( totalBlocks == 0 ) - { - diskUsage = 0; - - //Log this event - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("Total Disk Usage is set to 0"); - msg.format(args); - ml.logWarningMessage(msg); - } - else - diskUsage = (usedBlocks / (totalBlocks / 100)) + 1; - - SMSystemDisk sd; - sd.deviceName = deviceName; - sd.usedPercent = diskUsage; - sd.totalBlocks = totalBlocks; - sd.usedBlocks = usedBlocks; - sdl.push_back(sd); - - if (DISK_DEBUG) - cout << "Disk Usage for " << deviceName << " is " << diskUsage << endl; - - if (diskUsage >= localDiskCritical && localDiskCritical > 0 ) - { - //adjust if over 100% - if ( diskUsage > 100 ) - diskUsage = 100; - - if ( serverMonitor.sendResourceAlarm(deviceName, DISK_USAGE_HIGH, SET, (int) diskUsage) ) - { - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("Local Disk above Critical Disk threshold with a percentage of "); - args.add((int) diskUsage); - msg.format(args); - ml.logInfoMessage(msg); - } - } - else if (diskUsage >= localDiskMajor && localDiskMajor > 0 ) - { - if (serverMonitor.sendResourceAlarm(deviceName, DISK_USAGE_MED, SET, (int) diskUsage)) - { - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("Local Disk above Major Disk threshold with a percentage of "); - args.add((int) diskUsage); - msg.format(args); - ml.logInfoMessage(msg); - } - } - else if (diskUsage >= localDiskMinor && localDiskMinor > 0 ) - { - if ( serverMonitor.sendResourceAlarm(deviceName, DISK_USAGE_LOW, SET, (int) diskUsage)) - { - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("Local Disk above Minor Disk threshold with a percentage of "); - args.add((int) diskUsage); - msg.format(args); - ml.logInfoMessage(msg); - } - } - else - serverMonitor.checkDiskAlarm(deviceName); - } - - //check for external file systems/devices - if (Externalflag || - (!Externalflag && DataRedundancyConfig == "y" && moduleType == "pm") ) - { - try - { - DBRootConfigList dbrootConfigList; - oam.getPmDbrootConfig(moduleID, dbrootConfigList); - - DBRootConfigList::iterator pt = dbrootConfigList.begin(); - - for ( ; pt != dbrootConfigList.end() ; pt++) - { - int dbroot = *pt; - string deviceName = systemConfig.DBRoot[dbroot - 1]; - string fileName = deviceName + "/000.dir"; - - if (DISK_DEBUG) - { - //Log this event - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("DBRoots monitoring"); - args.add(dbroot); - args.add(" ,file system =" ); - args.add(fileName); - msg.format(args); - ml.logDebugMessage(msg); - } - - uint64_t totalBlocks; - uint64_t usedBlocks; - - if (!statvfs(fileName.c_str(), &buf)) - { - - uint64_t blksize, blocks, freeblks, free; - - blksize = buf.f_bsize; - blocks = buf.f_blocks; - freeblks = buf.f_bavail; - - totalBlocks = blocks * blksize; - free = freeblks * blksize; - usedBlocks = totalBlocks - free; - } - else - { - SMSystemDisk sd; - sd.deviceName = deviceName; - sd.usedPercent = 0; - sd.totalBlocks = 0; - sd.usedBlocks = 0; - sdl.push_back(sd); - continue; - } - - int diskUsage = 0; - - if ( totalBlocks == 0 ) - { - diskUsage = 0; - - //Log this event - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("Total Disk Usage is set to 0"); - msg.format(args); - ml.logWarningMessage(msg); - } - else - diskUsage = (usedBlocks / (totalBlocks / 100)) + 1; - - SMSystemDisk sd; - sd.deviceName = deviceName; - sd.usedPercent = diskUsage; - sd.totalBlocks = totalBlocks; - sd.usedBlocks = usedBlocks; - sdl.push_back(sd); - - if (DISK_DEBUG) - cout << "Disk Usage for " << deviceName << " is " << diskUsage << endl; - - if (diskUsage >= ExternalDiskCritical && ExternalDiskCritical > 0 ) - { - //adjust if over 100% - if ( diskUsage > 100 ) - diskUsage = 100; - - if ( serverMonitor.sendResourceAlarm(deviceName, DISK_USAGE_HIGH, SET, diskUsage)) - { - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("Disk usage for"); - args.add(deviceName); - args.add(" above Critical Disk threshold with a percentage of "); - args.add((int) diskUsage); - msg.format(args); - ml.logInfoMessage(msg); - } - } - else if (diskUsage >= ExternalDiskMajor && ExternalDiskMajor > 0 ) - { - if ( serverMonitor.sendResourceAlarm(deviceName, DISK_USAGE_MED, SET, diskUsage)) - { - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("Disk usage for"); - args.add(deviceName); - args.add(" above Major Disk threshold with a percentage of "); - args.add((int) diskUsage); - msg.format(args); - ml.logInfoMessage(msg); - } - } - else if (diskUsage >= ExternalDiskMinor && ExternalDiskMinor > 0 ) - { - if ( serverMonitor.sendResourceAlarm(deviceName, DISK_USAGE_LOW, SET, diskUsage)) - { - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("Disk usage for"); - args.add(deviceName); - args.add(" above Minor Disk threshold with a percentage of "); - args.add((int) diskUsage); - msg.format(args); - ml.logInfoMessage(msg); - } - } - else - serverMonitor.checkDiskAlarm(deviceName); - } - } - catch (exception& e) - { - cout << endl << "**** getPmDbrootConfig Failed : " << e.what() << endl; - } - } - } - - //check OAM dbroot test flag to validate dbroot exist if on pm - if ( moduleName.find("pm") != string::npos ) - { - //check OAM dbroot test flag to validate dbroot exist - if ( dbrootList.size() != 0 ) - { - DBrootList::iterator p = dbrootList.begin(); - - while ( p != dbrootList.end() ) - { - //get dbroot directory - string dbrootDir = (*p).dbrootDir; - string dbrootName; - string dbrootID; - - //get dbroot name - string::size_type pos = dbrootDir.rfind("/", 80); - - if (pos != string::npos) - dbrootName = dbrootDir.substr(pos + 1, 80); - - //get ID - dbrootID = dbrootName.substr(4, 80); - - string fileName = dbrootDir + "/OAMdbrootCheck"; - - // retry in case we hit the remount window - for ( int retry = 0 ; ; retry++ ) - { - bool fail = false; - //first test, check if OAMdbrootCheck exists - ifstream file (fileName.c_str()); - - if (!file) - fail = true; - else - fail = false; - - if (fail) - { - //double check system status before reporting any error BUG 5078 - SystemStatus systemstatus; - - try - { - oam.getSystemStatus(systemstatus); - } - catch (exception& ex) - {} - - if (systemstatus.SystemOpState != oam::ACTIVE ) - { - break; - } - - if ( retry < 10 ) - { - sleep(3); - continue; - } - else - { - if ( !(*p).downFlag ) - { - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("dbroot monitoring: Lost access to "); - args.add(dbrootDir); - msg.format(args); - ml.logWarningMessage(msg); - - oam.sendDeviceNotification(dbrootName, DBROOT_DOWN, moduleName); - (*p).downFlag = true; - - try - { - oam.setDbrootStatus(dbrootID, oam::AUTO_OFFLINE); - } - catch (exception& ex) - {} - - break; - } - } - } - else - { - if ( (*p).downFlag ) - { - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("dbroot monitoring: Access back to "); - args.add(dbrootDir); - msg.format(args); - ml.logInfoMessage(msg); - - oam.sendDeviceNotification(dbrootName, DBROOT_UP, moduleName); - (*p).downFlag = false; - - try - { - oam.setDbrootStatus(dbrootID, oam::ACTIVE); - } - catch (exception& ex) - {} - } - - file.close(); - break; - } - } - - p++; - } - } - } - - //do Gluster status check, if configured - if ( DataRedundancyConfig == "y") - { - bool pass = true; - string errmsg = "unknown"; - - try - { - string arg1 = ""; - string arg2 = ""; - int ret = oam.glusterctl(oam::GLUSTER_STATUS, arg1, arg2, errmsg); - - if ( ret != 0 ) - { - cerr << "FAILURE: Status check error: " + errmsg << endl; - pass = false; - } - } - catch (exception& e) - { - cerr << endl << "**** glusterctl API exception: " << e.what() << endl; - cerr << "FAILURE: Status check error" << endl; - pass = false; - } - catch (...) - { - cerr << endl << "**** glusterctl API exception: UNKNOWN" << endl; - cerr << "FAILURE: Status check error" << endl; - pass = false; - } - - if ( !pass ) - { - // issue log and alarm - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("Gluster Status check failure error msg: "); - args.add(errmsg); - msg.format(args); - ml.logWarningMessage(msg); - serverMonitor.sendResourceAlarm(errmsg, GLUSTER_DISK_FAILURE, SET, 0); - } - } - - // sleep 30 seconds - sleep(MONITOR_PERIOD / 3); - - //check disk space every 10 minutes - diskSpaceCheck++; - - if ( diskSpaceCheck >= 20 ) - { - diskSpaceCheck = 0; - - lfs.clear(); - sdl.clear(); - } - - } // end of while loop -} - -/****************************************************************************************** -* @brief checkDiskAlarm -* -* purpose: check to see if an alarm(s) is set on Disk and clear if so -* -******************************************************************************************/ -void ServerMonitor::checkDiskAlarm(string alarmItem, ALARMS alarmID) -{ - Oam oam; - ServerMonitor serverMonitor; - - // get current server name - string serverName; - oamModuleInfo_t st; - - try - { - st = oam.getModuleInfo(); - serverName = boost::get<0>(st); - } - catch (...) - { - serverName = "Unknown Server"; - } - - switch (alarmID) - { - case ALARM_NONE: // clear all alarms set if any found - if ( serverMonitor.checkActiveAlarm(DISK_USAGE_HIGH, serverName, alarmItem) ) - // alarm set, clear it - clearAlarm(alarmItem, DISK_USAGE_HIGH); - - if ( serverMonitor.checkActiveAlarm(DISK_USAGE_MED, serverName, alarmItem) ) - // alarm set, clear it - clearAlarm(alarmItem, DISK_USAGE_MED); - - if ( serverMonitor.checkActiveAlarm(DISK_USAGE_LOW, serverName, alarmItem) ) - // alarm set, clear it - clearAlarm(alarmItem, DISK_USAGE_LOW); - - break; - - case DISK_USAGE_LOW: // clear high and medium alarms set if any found - if ( serverMonitor.checkActiveAlarm(DISK_USAGE_HIGH, serverName, alarmItem) ) - // alarm set, clear it - clearAlarm(alarmItem, DISK_USAGE_HIGH); - - if ( serverMonitor.checkActiveAlarm(DISK_USAGE_MED, serverName, alarmItem) ) - // alarm set, clear it - clearAlarm(alarmItem, DISK_USAGE_MED); - - break; - - case DISK_USAGE_MED: // clear high alarms set if any found - if ( serverMonitor.checkActiveAlarm(DISK_USAGE_HIGH, serverName, alarmItem) ) - // alarm set, clear it - clearAlarm(alarmItem, DISK_USAGE_HIGH); - - break; - - default: // none to clear - break; - } // end of switch - - return; -} diff --git a/oamapps/serverMonitor/hardwareMonitor.cpp b/oamapps/serverMonitor/hardwareMonitor.cpp deleted file mode 100644 index 8e6a19dc7..000000000 --- a/oamapps/serverMonitor/hardwareMonitor.cpp +++ /dev/null @@ -1,269 +0,0 @@ -/* Copyright (C) 2014 InfiniDB, Inc. - * Copyright (C) 2016 MariaDB Corporation. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; version 2 of - the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301, USA. */ - -/*************************************************************************** - * $Id: hardwareMonitor.cpp 34 2006-09-29 21:13:54Z dhill $ - * - * Author: David Hill - ***************************************************************************/ - -#include "serverMonitor.h" - -using namespace std; -using namespace oam; -using namespace alarmmanager; -using namespace logging; -using namespace servermonitor; -//using namespace procheartbeat; - -extern string tmpDir; - - -/************************************************************************************************************ -* @brief hardwareMonitor function -* -* purpose: Monitor Hardware and report problems -* -* Parses file generated by the ipmitool -* -* pattern = what it is | value | units | status | value 1 | value 2 | value 3 | value 4 | value 5 | value 6 -* data(0) = what it is -* data(1) = value -* data(2) = units -* data(3) = status -* data(4)-data(9) = barrier values -* data(4) - low non-recoverable, i.e. fatal -* data(5) - low critical -* data(6) - low warning -* data(7) - high warning -* data(8) - high critical -* data(9) - high non-recoverable, i.e. fatal -* -************************************************************************************************************/ - -void hardwareMonitor(int IPMI_SUPPORT) -{ - ServerMonitor serverMonitor; - string data[10]; - string SensorName; - float SensorValue; - string Units; - string SensorStatus; - float lowFatal; - float lowCritical; - float lowWarning; - float highWarning; - float highCritical; - float highFatal; - char* p; - - if ( IPMI_SUPPORT == 0) - { - string tmpharwareMonitor = tmpDir + "/harwareMonitor.txt"; - - string cmd = "ipmitool sensor list > > " + tmpharwareMonitor; - int returnCode = system(cmd.c_str()); - - if (returnCode) - { - // System error, Log this event - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("Error running ipmitool sensor list!!!"); - msg.format(args); - ml.logWarningMessage(msg); - - while (TRUE) - sleep(10000); - } - } - else - { - while (TRUE) - sleep(10000); - } - - // register for Heartbeat monitoring - /* try { - ProcHeartbeat procheartbeat; - procheartbeat.registerHeartbeat(HW_HEARTBEAT_ID); - } - catch (exception& ex) - { - string error = ex.what(); - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("EXCEPTION ERROR on registerHeartbeat: "); - args.add(error); - msg.format(args); - ml.logErrorMessage(msg); - } - catch(...) - { - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("EXCEPTION ERROR on sendHeartbeat: Caught unknown exception!"); - msg.format(args); - ml.logErrorMessage(msg); - } - */ - // loop forever reading the hardware status - while (TRUE) - { - // parse output file - - ifstream File (tmpharwareMonitor); - - if (!File) - { - // System error, Log this event - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("Error opening harwareMonitor.txt!!!"); - msg.format(args); - ml.logWarningMessage(msg); - sleep(300); - continue; - } - - char line[200]; - - while (File.getline(line, 200)) - { - // parse the line - int f = 0; - p = strtok(line, "|"); - - while (p) - { - data[f] = p; - data[f] = serverMonitor.StripWhitespace(data[f]); - p = strtok (NULL, "|"); - f++; - } - - if ( f == 0 ) - // nothing on this line, skip - continue; - - SensorName = data[0]; - SensorValue = atof(data[1].c_str()); - Units = data[2]; - SensorStatus = data[3]; - lowFatal = atof(data[4].c_str()); - lowCritical = atof(data[5].c_str()); - lowWarning = atof(data[6].c_str()); - highWarning = atof(data[7].c_str()); - highCritical = atof(data[8].c_str()); - highFatal = atof(data[9].c_str()); - - // check status and issue apporiate alarm if needed - if ( (SensorStatus != "ok") && (SensorStatus != "nr") && (SensorStatus != "na") ) - { - // Status error, check for warning or critical levels - - if ( SensorValue >= highFatal ) - { - // issue critical alarm and send message to shutdown Server - serverMonitor.sendAlarm(SensorName, HARDWARE_HIGH, SET, SensorValue); - serverMonitor.sendMsgShutdownServer(); - } - else if ( (SensorValue < highFatal) && (SensorValue >= highCritical) ) - // issue major alarm - serverMonitor.sendAlarm(SensorName, HARDWARE_MED, SET, SensorValue); - - else if ( (SensorValue < highCritical ) && (SensorValue >= highWarning) ) - // issue minor alarm - serverMonitor.sendAlarm(SensorName, HARDWARE_LOW, SET, SensorValue); - - else if ( (SensorValue <= lowWarning) && (SensorValue > lowCritical) ) - // issue minor alarm - serverMonitor.sendAlarm(SensorName, HARDWARE_LOW, SET, SensorValue); - - else if ( (SensorValue <= lowCritical) && (SensorValue > lowFatal) ) - // issue major alarm - serverMonitor.sendAlarm(SensorName, HARDWARE_MED, SET, SensorValue); - - else if ( SensorValue <= lowFatal ) - { - // issue critical alarm and send message to shutdown Server - serverMonitor.sendAlarm(SensorName, HARDWARE_HIGH, SET, SensorValue); - serverMonitor.sendMsgShutdownServer(); - } - else - // check if there are any active alarms that needs to be cleared - serverMonitor.checkAlarm(SensorName); - } - else - // check if there are any active alarms that needs to be cleared - serverMonitor.checkAlarm(SensorName); - - } //end of parsing file while - - File.close(); - - // send heartbeat message - /* try { - ProcHeartbeat procheartbeat; - procheartbeat.sendHeartbeat(HW_HEARTBEAT_ID); - - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("Sent Heartbeat Msg"); - msg.format(args); - ml.logDebugMessage(msg); - } - catch (exception& ex) - { - string error = ex.what(); - if ( error.find("Disabled") == string::npos ) { - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("EXCEPTION ERROR on sendHeartbeat: "); - args.add(error); - msg.format(args); - ml.logErrorMessage(msg); - } - } - catch(...) - { - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("EXCEPTION ERROR on sendHeartbeat: Caught unknown exception!"); - msg.format(args); - ml.logErrorMessage(msg); - } - */ - // sleep - sleep(MONITOR_PERIOD); - } //end of forever while loop -} diff --git a/oamapps/serverMonitor/main.cpp b/oamapps/serverMonitor/main.cpp deleted file mode 100644 index 73698a4c5..000000000 --- a/oamapps/serverMonitor/main.cpp +++ /dev/null @@ -1,364 +0,0 @@ -/* Copyright (C) 2014 InfiniDB, Inc. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; version 2 of - the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301, USA. */ - -#include "IDBPolicy.h" -#include "serverMonitor.h" - -#include "crashtrace.h" -#include "installdir.h" - -using namespace std; -using namespace servermonitor; -using namespace oam; -using namespace logging; - -extern int swapFlag; - -string tmpDir; - -/***************************************************************************** -* @brief main -* -* purpose: Launch Resource Monitor threads and call Hardware Monitor function -* -* -******************************************************************************/ - -int main (int argc, char** argv) -{ - ServerMonitor serverMonitor; - Oam oam; - - tmpDir = startup::StartUp::tmpDir(); - - struct sigaction ign; - - memset(&ign, 0, sizeof(ign)); - ign.sa_handler = fatalHandler; - sigaction(SIGSEGV, &ign, 0); - sigaction(SIGABRT, &ign, 0); - sigaction(SIGFPE, &ign, 0); - - //Launch Memory Monitor Thread and check if swap is in critical condition - pthread_t memoryMonitorThread; - pthread_create (&memoryMonitorThread, NULL, (void* (*)(void*)) &memoryMonitor, NULL); - - - // initialize IDBPolicy while waiting swap flag being set. - idbdatafile::IDBPolicy::configIDBPolicy(); - - // wait until swap flag is set. - while ( swapFlag == 0 ) - { - sleep(1); - } - - if ( swapFlag == 1 ) - { - try - { - oam.processInitFailure(); - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("processInitFailure Called"); - msg.format(args); - ml.logInfoMessage(msg); - sleep(5); - exit(1); - } - catch (exception& ex) - { - string error = ex.what(); - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("EXCEPTION ERROR on processInitComplete: "); - args.add(error); - msg.format(args); - ml.logErrorMessage(msg); - sleep(5); - exit(1); - } - catch (...) - { - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("EXCEPTION ERROR on processInitComplete: Caught unknown exception!"); - msg.format(args); - ml.logErrorMessage(msg); - sleep(5); - exit(1); - } - } - else - { - try - { - oam.processInitComplete("ServerMonitor"); - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("processInitComplete Successfully Called"); - msg.format(args); - ml.logInfoMessage(msg); - } - catch (exception& ex) - { - string error = ex.what(); - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("EXCEPTION ERROR on processInitComplete: "); - args.add(error); - msg.format(args); - ml.logErrorMessage(msg); - } - catch (...) - { - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("EXCEPTION ERROR on processInitComplete: Caught unknown exception!"); - msg.format(args); - ml.logErrorMessage(msg); - } - } - - //Ignore SIGPIPE signals - signal(SIGPIPE, SIG_IGN); - - //Ignore SIGHUP signals - signal(SIGHUP, SIG_IGN); - - //get auto rsync setting - string umAutoSync = "n"; // default to 'n' - - try - { - oam.getSystemConfig( "UMAutoSync", umAutoSync); - } - catch (...) - { - umAutoSync = "n"; - } - - oamModuleInfo_t t; - - //get local module info - string localModuleName; - string localModuleType; - int serverInstallType = 2; - string OAMParentModuleName; - - try - { - t = oam.getModuleInfo(); - localModuleName = boost::get<0>(t); - localModuleType = boost::get<1>(t); - serverInstallType = boost::get<5>(t); - } - catch (...) {} - - string SingleServerInstall = "n"; // default to 'n' - - try - { - oam.getSystemConfig( "SingleServerInstall", SingleServerInstall); - } - catch (...) - { - SingleServerInstall = "n"; - } - - //Launch Rsync Thread, if needed - // run on first non-disabled user-module - // if combo um/pm configured a non single-server, then that is a pm - // if separate um / pm, then that is a um - bool launchUMAutoSync = false; - SystemStatus systemstatus; - - if (umAutoSync == "y" ) - { - if ( serverInstallType == oam::INSTALL_COMBINE_DM_UM_PM ) - { - if ( SingleServerInstall != "y" ) - { - //get first non-disabled pm - try - { - oam.getSystemStatus(systemstatus, false); - - for ( unsigned int i = 0 ; i < systemstatus.systemmodulestatus.modulestatus.size(); i++) - { - if ( systemstatus.systemmodulestatus.modulestatus[i].Module.empty() ) - // end of list - break; - - string moduleName = systemstatus.systemmodulestatus.modulestatus[i].Module; - string moduleType = moduleName.substr(0, MAX_MODULE_TYPE_SIZE); - - if ( moduleType == "pm" ) - { - int state = systemstatus.systemmodulestatus.modulestatus[i].ModuleOpState; - - if ( state == oam::MAN_DISABLED || state == oam::AUTO_DISABLED ) - continue; - else - { - //module is enabled, runs if this is pm1 and only pm1, so it will not run - //if pm1 is down for an extented period of time - if ( moduleName == "pm1" ) - { - if (localModuleName == "pm1" ) - launchUMAutoSync = true; - - break; - } - } - } - } - } - catch (...) {} - } - } - else - { - //get first non-disabled um - if ( localModuleType == "um" ) - { - try - { - oam.getSystemStatus(systemstatus, false); - - for ( unsigned int i = 0 ; i < systemstatus.systemmodulestatus.modulestatus.size(); i++) - { - if ( systemstatus.systemmodulestatus.modulestatus[i].Module.empty() ) - // end of list - break; - - string moduleName = systemstatus.systemmodulestatus.modulestatus[i].Module; - string moduleType = moduleName.substr(0, MAX_MODULE_TYPE_SIZE); - - if ( moduleType == "um" ) - { - int state = systemstatus.systemmodulestatus.modulestatus[i].ModuleOpState; - - if ( state == oam::MAN_DISABLED || state == oam::AUTO_DISABLED ) - continue; - else - { - //module is enabled, runs if this is um1 and only um1, so it will not run - //if um1 is down for an extented period of time - if ( moduleName == "um1" ) - { - if (localModuleName == "um1" ) - launchUMAutoSync = true; - - break; - } - } - } - } - } - catch (...) {} - } - } - } - - //wait until system is active before launching monitoring threads - while (true) - { - SystemStatus systemstatus; - - try - { - oam.getSystemStatus(systemstatus); - } - catch (exception& ex) - {} - - if (systemstatus.SystemOpState == oam::ACTIVE ) - { - - if (launchUMAutoSync) - { - //Launch UM Auto Sync Thread - pthread_t rsyncThread; - pthread_create (&rsyncThread, NULL, (void* (*)(void*)) &UMAutoSync, NULL); - } - - //Launch CPU Monitor Thread - pthread_t cpuMonitorThread; - pthread_create (&cpuMonitorThread, NULL, (void* (*)(void*)) &cpuMonitor, NULL); - - //Launch Disk Monitor Thread if it's not a storagemanager cluster - config::Config *_config = config::Config::makeConfig(); - string storageType = _config->getConfig("Installation", "DBRootStorageType"); - if (storageType != "storagemanager") - { - pthread_t diskMonitorThread; - pthread_create (&diskMonitorThread, NULL, (void* (*)(void*)) &diskMonitor, NULL); - } - - //Launch DB Health Check Thread -// pthread_t dbhealthMonitorThread; -// pthread_create (&dbhealthMonitorThread, NULL, (void*(*)(void*)) &dbhealthMonitor, NULL); - - //Call msg process request function - msgProcessor(); - - break; - } - - sleep(5); - } - - return 0; -} - -// common functions - -// -// Check Active alarms -// -// Use a mutex to limit the number of calls -// - -pthread_mutex_t THREAD_LOCK; - - -bool ServerMonitor::checkActiveAlarm(const int alarmid, const std::string moduleName, const std::string deviceName) -{ - Oam oam; - - pthread_mutex_lock(&THREAD_LOCK); - - bool status = oam.checkActiveAlarm(alarmid, moduleName, deviceName); - - pthread_mutex_unlock(&THREAD_LOCK); - - return status; -} - diff --git a/oamapps/serverMonitor/memoryMonitor.cpp b/oamapps/serverMonitor/memoryMonitor.cpp deleted file mode 100644 index d96217d56..000000000 --- a/oamapps/serverMonitor/memoryMonitor.cpp +++ /dev/null @@ -1,573 +0,0 @@ -/* Copyright (C) 2014 InfiniDB, Inc. - Copyright (C) 2016 MariaDB Corporation - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; version 2 of - the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301, USA. */ - -/*************************************************************************** - * $Id: memoryMonitor.cpp 34 2006-09-29 21:13:54Z dhill $ - * - * Author: Zhixuan Zhu - ***************************************************************************/ - -#include "cgroupconfigurator.h" -#include "serverMonitor.h" - -using namespace std; -using namespace oam; -using namespace alarmmanager; -using namespace logging; -using namespace servermonitor; -//using namespace procheartbeat; - -ProcessMemoryList pml; -int swapFlag = 0; - -uint64_t totalMem; - -pthread_mutex_t MEMORY_LOCK; - -extern string tmpDir; - -/***************************************************************************************** -* @brief memoryMonitor Thread -* -* purpose: Get current Memory and Swap usage, report alarms -* -*****************************************************************************************/ -void memoryMonitor() -{ - ServerMonitor serverMonitor; - - int swapUsagePercent = 0; - - // set defaults - int memoryCritical = 90, - memoryMajor = 0, - memoryMinor = 0, - swapCritical = 90, - swapMajor = 80, - swapMinor = 70; - - int day = 0; - - //set monitoring period to 60 seconds - int monitorPeriod = MONITOR_PERIOD; - utils::CGroupConfigurator cg; - - while (true) - { - // Get MEMORY usage water mark from server configuration and compare - ModuleTypeConfig moduleTypeConfig; - Oam oam; - - try - { - oam.getSystemConfig (moduleTypeConfig); - memoryCritical = moduleTypeConfig.ModuleMemCriticalThreshold; - memoryMajor = moduleTypeConfig.ModuleMemMajorThreshold; - memoryMinor = moduleTypeConfig.ModuleMemMinorThreshold; - swapCritical = moduleTypeConfig.ModuleSwapCriticalThreshold; - swapMajor = moduleTypeConfig.ModuleSwapMajorThreshold; - swapMinor = moduleTypeConfig.ModuleSwapMinorThreshold; - } - catch (...) - { - sleep(5); - continue; - } - - //get memory stats - totalMem = cg.getTotalMemory(); - uint64_t freeMem = cg.getFreeMemory(); - uint64_t usedMem = totalMem - freeMem; - - //get swap stats - uint64_t totalSwap = cg.getTotalSwapSpace(); - uint64_t usedSwap = cg.getSwapInUse(); - - if ( totalSwap == 0 ) - { - swapUsagePercent = 0; - swapFlag = 2; - - //get current day, log warning only once a day - time_t now; - now = time(NULL); - struct tm tm; - localtime_r(&now, &tm); - - if ( day != tm.tm_mday) - { - day = tm.tm_mday; - - //Log this event - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("Total Swap space is set to 0"); - msg.format(args); - ml.logWarningMessage(msg); - } - } - else - swapUsagePercent = usedSwap / (totalSwap / 100); - - int memoryUsagePercent; - - if ( totalMem == 0 ) - { - memoryUsagePercent = 0; - - //Log this event - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("Total Memory space is set to 0"); - msg.format(args); - ml.logWarningMessage(msg); - } - else - memoryUsagePercent = (usedMem / (totalMem / 100)) + 1; - - /*LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("memoryUsagePercent "); - args.add((uint64_t) memoryUsagePercent); - args.add("usedMem "); - args.add((uint64_t) usedMem); - args.add("totalMem "); - args.add((uint64_t) totalMem); - msg.format(args); - ml.logInfoMessage(msg); - */ - //first time called, log - //adjust if over 100% - if ( swapUsagePercent < 0 ) - swapUsagePercent = 0; - - if ( swapUsagePercent > 100 ) - swapUsagePercent = 100; - - if ( memoryUsagePercent < 0 ) - memoryUsagePercent = 0; - - if ( memoryUsagePercent > 100 ) - memoryUsagePercent = 100; - - // check for Memory alarms - if (memoryUsagePercent >= memoryCritical && memoryCritical > 0 ) - { - if ( monitorPeriod == MONITOR_PERIOD ) - { - - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("Local Memory above Critical Memory threshold with a percentage of "); - args.add((int) memoryUsagePercent); - args.add(" ; Swap "); - args.add((int) swapUsagePercent); - msg.format(args); - ml.logInfoMessage(msg); - serverMonitor.sendResourceAlarm("Local-Memory", MEMORY_USAGE_HIGH, SET, memoryUsagePercent); - - pthread_mutex_lock(&MEMORY_LOCK); - serverMonitor.outputProcMemory(true); - pthread_mutex_unlock(&MEMORY_LOCK); - } - - // change to 1 second for quick swap space monitoring - monitorPeriod = 1; - } - else if (memoryUsagePercent >= memoryMajor && memoryMajor > 0 ) - { - monitorPeriod = MONITOR_PERIOD; - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("Local Memory above Major Memory threshold with a percentage of "); - args.add((int) memoryUsagePercent); - msg.format(args); - ml.logInfoMessage(msg); - serverMonitor.sendResourceAlarm("Local-Memory", MEMORY_USAGE_MED, SET, memoryUsagePercent); - } - else if (memoryUsagePercent >= memoryMinor && memoryMinor > 0 ) - { - monitorPeriod = MONITOR_PERIOD; - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("Local Memory above Minor Memory threshold with a percentage of "); - args.add((int) memoryUsagePercent); - msg.format(args); - ml.logInfoMessage(msg); - serverMonitor.sendResourceAlarm("Local-Memory", MEMORY_USAGE_LOW, SET, memoryUsagePercent); - } - else - { - monitorPeriod = MONITOR_PERIOD; - serverMonitor.checkMemoryAlarm("Local-Memory"); - } - - // check for Swap alarms - if (swapUsagePercent >= swapCritical && swapCritical > 0 ) - { - //adjust if over 100% - if ( swapUsagePercent > 100 ) - swapUsagePercent = 100; - - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("Swap above Critical Memory threshold with a percentage of "); - args.add((int) swapUsagePercent); - msg.format(args); - ml.logInfoMessage(msg); - serverMonitor.sendResourceAlarm("Swap", SWAP_USAGE_HIGH, SET, swapUsagePercent); - serverMonitor.checkSwapAction(); - } - else if (swapUsagePercent >= swapMajor && swapMajor > 0 ) - { - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("Swap above Major Memory threshold with a percentage of "); - args.add((int) swapUsagePercent); - msg.format(args); - ml.logInfoMessage(msg); - serverMonitor.sendResourceAlarm("Swap", SWAP_USAGE_MED, SET, swapUsagePercent); - serverMonitor.checkSwapAction(); - } - else if (swapUsagePercent >= swapMinor && swapMinor > 0 ) - { - swapFlag = 2; - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("Swap above Minor Memory threshold with a percentage of "); - args.add((int) swapUsagePercent); - msg.format(args); - ml.logInfoMessage(msg); - serverMonitor.sendResourceAlarm("Swap", SWAP_USAGE_LOW, SET, swapUsagePercent); - } - else - { - swapFlag = 2; - serverMonitor.checkSwapAlarm("Swap"); - } - - // sleep, 1 minute - sleep(monitorPeriod); - - } // end of while loop -} - -/****************************************************************************************** -* @brief checkMemoryAlarm -* -* purpose: check to see if an alarm(s) is set on MEMORY and clear if so -* -******************************************************************************************/ -void ServerMonitor::checkMemoryAlarm(string alarmItem, ALARMS alarmID) -{ - Oam oam; - ServerMonitor serverMonitor; - - // get current server name - string serverName; - oamModuleInfo_t st; - - try - { - st = oam.getModuleInfo(); - serverName = boost::get<0>(st); - } - catch (...) - { - serverName = "Unknown Server"; - } - - switch (alarmID) - { - case ALARM_NONE: // clear all alarms set if any found - if ( serverMonitor.checkActiveAlarm(MEMORY_USAGE_HIGH, serverName, alarmItem) ) - // alarm set, clear it - clearAlarm(alarmItem, MEMORY_USAGE_HIGH); - - if ( serverMonitor.checkActiveAlarm(MEMORY_USAGE_MED, serverName, alarmItem) ) - // alarm set, clear it - clearAlarm(alarmItem, MEMORY_USAGE_MED); - - if ( serverMonitor.checkActiveAlarm(MEMORY_USAGE_LOW, serverName, alarmItem) ) - // alarm set, clear it - clearAlarm(alarmItem, MEMORY_USAGE_LOW); - - break; - - case MEMORY_USAGE_LOW: // clear high and medium alarms set if any found - if ( serverMonitor.checkActiveAlarm(MEMORY_USAGE_HIGH, serverName, alarmItem) ) - // alarm set, clear it - clearAlarm(alarmItem, MEMORY_USAGE_HIGH); - - if ( serverMonitor.checkActiveAlarm(MEMORY_USAGE_MED, serverName, alarmItem) ) - // alarm set, clear it - clearAlarm(alarmItem, MEMORY_USAGE_MED); - - break; - - case MEMORY_USAGE_MED: // clear high alarms set if any found - if ( serverMonitor.checkActiveAlarm(MEMORY_USAGE_HIGH, serverName, alarmItem) ) - // alarm set, clear it - clearAlarm(alarmItem, MEMORY_USAGE_HIGH); - - break; - - default: // none to clear - break; - } // end of switch - - return; -} - -/****************************************************************************************** -* @brief checkSwapAlarm -* -* purpose: check to see if an alarm(s) is set on SWAP and clear if so -* -******************************************************************************************/ -void ServerMonitor::checkSwapAlarm(string alarmItem, ALARMS alarmID) -{ - Oam oam; - ServerMonitor serverMonitor; - - // get current server name - string serverName; - oamModuleInfo_t st; - - try - { - st = oam.getModuleInfo(); - serverName = boost::get<0>(st); - } - catch (...) - { - serverName = "Unknown Server"; - } - - switch (alarmID) - { - case ALARM_NONE: // clear all alarms set if any found - if ( serverMonitor.checkActiveAlarm(SWAP_USAGE_HIGH, serverName, alarmItem) ) - // alarm set, clear it - clearAlarm(alarmItem, SWAP_USAGE_HIGH); - - if ( serverMonitor.checkActiveAlarm(SWAP_USAGE_MED, serverName, alarmItem) ) - // alarm set, clear it - clearAlarm(alarmItem, SWAP_USAGE_MED); - - if ( serverMonitor.checkActiveAlarm(SWAP_USAGE_LOW, serverName, alarmItem) ) - // alarm set, clear it - clearAlarm(alarmItem, SWAP_USAGE_LOW); - - break; - - case SWAP_USAGE_LOW: // clear high and medium alarms set if any found - if ( serverMonitor.checkActiveAlarm(SWAP_USAGE_HIGH, serverName, alarmItem) ) - // alarm set, clear it - clearAlarm(alarmItem, SWAP_USAGE_HIGH); - - if ( serverMonitor.checkActiveAlarm(SWAP_USAGE_MED, serverName, alarmItem) ) - // alarm set, clear it - clearAlarm(alarmItem, SWAP_USAGE_MED); - - break; - - case SWAP_USAGE_MED: // clear high alarms set if any found - if ( serverMonitor.checkActiveAlarm(SWAP_USAGE_HIGH, serverName, alarmItem) ) - // alarm set, clear it - clearAlarm(alarmItem, SWAP_USAGE_HIGH); - - break; - - default: // none to clear - break; - } // end of switch - - return; -} - -/****************************************************************************************** -* @brief checkSwapAction -* -* purpose: check if any system action needs tyo be taken -* -******************************************************************************************/ -void ServerMonitor::checkSwapAction() -{ - Oam oam; - - if ( swapFlag == 0 ) - { - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("Swap Space usage over Major threashold, startSystem failure"); - msg.format(args); - ml.logCriticalMessage(msg); - - swapFlag = 1; - sleep(5); - return; - } - - string swapAction = "restartSystem"; - - try - { - oam.getSystemConfig ("SwapAction", swapAction); - } - catch (...) - {} - - if (swapAction == "none") - return; - - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("Swap Space usage over Major threashold, perform OAM command "); - args.add( swapAction); - msg.format(args); - ml.logCriticalMessage(msg); - - GRACEFUL_FLAG gracefulTemp = GRACEFUL; - ACK_FLAG ackTemp = ACK_YES; - - if ( swapAction == "stopSystem") - { - try - { - oam.stopSystem(gracefulTemp, ackTemp); - } - catch (exception& e) - { - } - } - else if ( swapAction == "restartSystem") - { - try - { - oam.restartSystem(gracefulTemp, ackTemp); - } - catch (exception& e) - { - } - } -} - -/****************************************************************************************** -* @brief outputProcMemory -* -* purpose: output Top memory users -* -******************************************************************************************/ -void ServerMonitor::outputProcMemory(bool log) -{ - // - // get top 5 Memory users by process - // - - string tmpprocessMem = tmpDir + "/processMem"; - - string cmd = "ps -e -orss=1,args= | sort -b -k1,1n |tail -n 5 | awk '{print $1,$2}' > " + tmpprocessMem; - system(cmd.c_str()); - - ifstream oldFile (tmpprocessMem.c_str()); - - string process; - long long memory; - int memoryUsage; - pml.clear(); - - char line[400]; - - while (oldFile.getline(line, 400)) - { - string buf = line; - string::size_type pos = buf.find (' ', 0); - - if (pos != string::npos) - { - memory = atol(buf.substr(0, pos - 1).c_str()); - memoryUsage = (memory * 1024 * 1000 / totalMem) + 1 ; - process = buf.substr(pos + 1, 80); - - //cleanup process name - pos = process.rfind ('/'); - - if (pos != string::npos) - process = process.substr(pos + 1, 80); - else - { - pos = process.find ('[', 0); - - if (pos != string::npos) - process = process.substr(pos + 1, 80); - - pos = process.find (']', 0); - - if (pos != string::npos) - process = process.substr(0, pos); - } - - processMemory pm; - pm.processName = process; - pm.usedBlocks = memory; - pm.usedPercent = memoryUsage; - pml.push_back(pm); - - if (log) - { - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("Memory Usage for Process: "); - args.add(process); - args.add(" : Memory Used "); - args.add((int) memory); - args.add(" : % Used "); - args.add(memoryUsage); - msg.format(args); - ml.logInfoMessage(msg); - } - } - } - - oldFile.close(); -} diff --git a/oamapps/serverMonitor/msgProcessor.cpp b/oamapps/serverMonitor/msgProcessor.cpp deleted file mode 100644 index fe330547e..000000000 --- a/oamapps/serverMonitor/msgProcessor.cpp +++ /dev/null @@ -1,693 +0,0 @@ -/* Copyright (C) 2014 InfiniDB, Inc. - Copyright (C) 2016 MariaDB Corporation - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; version 2 of - the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301, USA. */ - -/*************************************************************************** - * $Id: msgProcessor.cpp 34 2006-09-29 21:13:54Z dhill $ - * - * Author: David Hill - ***************************************************************************/ - -#include "serverMonitor.h" - -using namespace std; -using namespace oam; -using namespace messageqcpp; -using namespace logging; -using namespace servermonitor; -using namespace config; - -extern float currentCpuUsage; -extern unsigned long totalMem; -extern SystemDiskList sdl; -extern ProcessCPUList pcl; -extern ProcessMemoryList pml; -extern pthread_mutex_t CPU_LOCK; -extern pthread_mutex_t MEMORY_LOCK; - -extern string tmpDir; - -/** - * constants define - */ - -struct PendingSQLStatement -{ - string sqlStatement; - time_t startTime; -}; - - -/***************************************************************************************** -* @brief msgProcessor Thread -* -* purpose: Process incoming message request -* -*****************************************************************************************/ -void msgProcessor() -{ - ServerMonitor serverMonitor; - Oam oam; - - ByteStream msg; - IOSocket fIos; - - string name[5]; - double usage[5]; - - bool DBhealthActive = false; - - // get current server name - string moduleName; - oamModuleInfo_t st; - - try - { - st = oam.getModuleInfo(); - moduleName = boost::get<0>(st); - } - catch (...) - { - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("Failed to get Module Name"); - msg.format(args); - ml.logErrorMessage(msg); - moduleName = "Unknown Server"; - } - - string msgPort = moduleName + "_ServerMonitor"; - - //read and cleanup port before trying to use - try - { - Config* sysConfig = Config::makeConfig(); - string port = sysConfig->getConfig(msgPort, "Port"); - string cmd = "fuser -k " + port + "/tcp >/dev/null 2>&1"; - //int user; - //user = getuid(); - - system(cmd.c_str()); - } - catch (...) - { - } - - for (;;) - { - try - { - MessageQueueServer mqs(msgPort); - - for (;;) - { - try - { - fIos = mqs.accept(); - msg = fIos.read(); - - if (msg.length() > 0) - { - ByteStream::byte requestType; - msg >> requestType; - - switch (requestType) - { - case GET_PROC_CPU_USAGE: - { - ByteStream::byte top_users; - ByteStream ackmsg; - - msg >> top_users; - - int count = 0; - - // - // get Process and System CPU usage - // - pthread_mutex_lock(&CPU_LOCK); - serverMonitor.getCPUdata(); - - ProcessCPUList::iterator p = pcl.begin(); - - while (p != pcl.end()) - { - double cpuUsage = (*p).usedPercent; - - if ( cpuUsage != 0 ) - { - usage[count] = cpuUsage; - name[count] = (*p).processName; - count++; - } - - p++; - } - - // if all processes are idle, puch the first one for display - if ( count == 0 ) - { - p = pcl.begin(); - usage[count] = (*p).usedPercent; - name[count] = (*p).processName; - count++; - } - - pthread_mutex_unlock(&CPU_LOCK); - - if ( count < top_users ) - ackmsg << (ByteStream::byte) count; - else - ackmsg << (ByteStream::byte) top_users; - - // output top requested processes - for (int i = 0 ; i < count ; i++) - { - ackmsg << name[i]; - ackmsg << (ByteStream::quadbyte) usage[i]; - - if ( i == top_users ) - break; - } - - fIos.write(ackmsg); - - ackmsg.reset(); - } - break; - - case GET_MODULE_CPU_USAGE: - { - ByteStream ackmsg; - // - // get Process and System CPU usage - // - pthread_mutex_lock(&CPU_LOCK); - serverMonitor.getCPUdata(); - - ackmsg << (ByteStream::byte) currentCpuUsage; - - pthread_mutex_unlock(&CPU_LOCK); - - fIos.write(ackmsg); - - ackmsg.reset(); - } - break; - - case GET_PROC_MEMORY_USAGE: - { - ByteStream ackmsg; - ByteStream::byte top_users; - - msg >> top_users; - - // - // get top Memory users by process - // - - pthread_mutex_lock(&MEMORY_LOCK); - serverMonitor.outputProcMemory(false); - - ackmsg << (ByteStream::byte) pml.size(); - - ProcessMemoryList::iterator p = pml.end(); - - while (p != pml.begin()) - { - p--; - ackmsg << (*p).processName; - ackmsg << (ByteStream::quadbyte) (*p).usedBlocks; - ackmsg << (ByteStream::byte) (*p).usedPercent; - } - - pthread_mutex_unlock(&MEMORY_LOCK); - - fIos.write(ackmsg); - - ackmsg.reset(); - } - break; - - case GET_MODULE_MEMORY_USAGE: - { - // - // get Module Memory/Swap usage - // - - ByteStream ackmsg; - - // get cache MEMORY stats - string tmpcached = tmpDir + "/cached"; - - string cmd = "cat /proc/meminfo | grep Cached -m 1 | awk '{print $2}' > " + tmpcached; - system(cmd.c_str()); - - ifstream oldFile (tmpcached.c_str()); - - string strCache; - long long cache; - - char line[400]; - - while (oldFile.getline(line, 400)) - { - strCache = line; - break; - } - - oldFile.close(); - - if (strCache.empty() ) - cache = 0; - else - cache = atol(strCache.c_str()) * 1024; - - struct sysinfo myinfo; - sysinfo(&myinfo); - - //get memory stats - unsigned long mem_total = myinfo.totalram ; - unsigned long freeMem = myinfo.freeram ; - - // adjust for cache, which is available memory - unsigned long mem_used = mem_total - freeMem - cache; - - //get swap stats - unsigned long swap_total = myinfo.totalswap ; - unsigned long freeswap = myinfo.freeswap ; - unsigned long swap_used = swap_total - freeswap ; - - unsigned int memoryUsagePercent; - memoryUsagePercent = (mem_used / (mem_total / 100)); - - unsigned int swapUsagePercent; - - if ( swap_total == 0 ) - swapUsagePercent = 0; - else - swapUsagePercent = (swap_used / (swap_total / 100)); - - ackmsg << (ByteStream::quadbyte) (mem_total / 1024); - ackmsg << (ByteStream::quadbyte) (mem_used / 1024); - ackmsg << (ByteStream::quadbyte) (cache / 1024); - ackmsg << (ByteStream::byte) memoryUsagePercent; - ackmsg << (ByteStream::quadbyte) (swap_total / 1024); - ackmsg << (ByteStream::quadbyte) (swap_used / 1024); - ackmsg << (ByteStream::byte) swapUsagePercent; - - fIos.write(ackmsg); - - ackmsg.reset(); - } - break; - - case GET_MODULE_DISK_USAGE: - { - // - // get Module Disk usage - // - - ByteStream ackmsg; - - ackmsg << (ByteStream::byte) sdl.size(); - - SystemDiskList::iterator p = sdl.begin(); - - while (p != sdl.end()) - { - ackmsg << (*p).deviceName; - //ackmsg << (ByteStream::quadbyte) ((*p).totalBlocks / 1024) ; - //ackmsg << (ByteStream::quadbyte) ((*p).usedBlocks / 1024); - //ackmsg << (ByteStream::byte) (*p).usedPercent; - ackmsg << (uint64_t) ((*p).totalBlocks / 1024) ; - ackmsg << (uint64_t) ((*p).usedBlocks / 1024); - ackmsg << (uint8_t) (*p).usedPercent; - p++; - } - - fIos.write(ackmsg); - - ackmsg.reset(); - } - break; - - case GET_ACTIVE_SQL_QUERY: - { - // - // get Active SQL Query - // determined from UM debug.log file - // - map pendingSQLStatements; - map::iterator pendingIter; - - ByteStream ackmsg; - char line[15100]; - char* pos; - uint64_t currentSessionID; - const char* szStartSql = "Start SQL statement"; - const char* szEndSql = "End SQL statement"; - - time_t rawtime; - struct tm tmStartTime; - time_t moduleStartTime = 0; - time_t queryStartTime = 0; - - string fileName = "/var/log/mariadb/columnstore/debug.log"; - - try - { - // Get ServerMonitor start time. We don't report any SQL that started before then. - Oam oam; - ProcessStatus procstat; - oam.getProcessStatus("ExeMgr", moduleName, procstat); - - if (strptime((procstat.StateChangeDate).c_str(), "%a %b %d %H:%M:%S %Y", &tmStartTime) != NULL) - { - tmStartTime.tm_isdst = -1; - moduleStartTime = mktime(&tmStartTime); - } - - cout << "UM start time " << moduleStartTime << endl; - // Open the Calpont debug.log file - ifstream file (fileName.c_str()); - - if (!file) - { - try - { - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("File open error: "); - args.add(fileName); - msg.format(args); - ml.logErrorMessage(msg); - } - catch (...) - {} - - ackmsg << (ByteStream::byte) oam::API_FILE_OPEN_ERROR; - fIos.write(ackmsg); - break; - } - - ackmsg << (ByteStream::byte) oam::API_SUCCESS; - - // Read the file. Filter out anything we don't care about. Store - // each SQL Start statement. When a SQL End statement is found, remove the - // corresponding SQL statement from the collection. - while (file.good()) - { - file.getline(line, 15100, '\n'); - pos = strstr(line, szStartSql); - - if (pos) - { - //filter out System Catalog inqueries - if (strstr(pos + 21, "/FE") || strstr(pos + 21, "/EC")) - { - continue; - } - - // Filter any query that started before the ServerMonitor - if (strptime(line, "%b %d %H:%M:%S", &tmStartTime) != NULL) - { - // The date in the debug.log file doesn't have a year. - // Assume the start time is no more than a year ago. - struct tm tmNow; - tmStartTime.tm_isdst = -1; - time ( &rawtime ); - localtime_r ( &rawtime, &tmNow ); - - // Allow for New year turnover - if (tmStartTime.tm_mon > tmNow.tm_mon) - { - tmStartTime.tm_year = tmNow.tm_year - 1; - } - else - { - tmStartTime.tm_year = tmNow.tm_year; - } - - queryStartTime = mktime(&tmStartTime); - - // Ignore if the query started before this process - if (queryStartTime < moduleStartTime) - { - continue; - } - } - else - { - continue; - } - - // Find the sessionid - char* pos1; - char* pos2; - pos1 = strchr(line, '|'); - - if (!pos1) - { - continue; - } - - pos2 = strchr(pos1 + 1, '|'); - - if (!pos2) - { - continue; - } - - currentSessionID = strtoll(pos1 + 1, NULL, 0); - - // Check the map for this sessionid. If found, we have two pending - // SQL statements from the same session, which is theoretically - // impossible. Throw the first one away for now. Error handling? - if ((pendingIter = pendingSQLStatements.find(currentSessionID)) != pendingSQLStatements.end()) - { - pendingSQLStatements.erase(pendingIter); - } - - PendingSQLStatement pendingSQLStatement; - pendingSQLStatement.sqlStatement = pos + 21; - pendingSQLStatement.startTime = queryStartTime; - pair sqlPair; - sqlPair.first = currentSessionID; - sqlPair.second = pendingSQLStatement; - pendingSQLStatements.insert(sqlPair); - } - else - { - pos = strstr(line, szEndSql); - - if (pos) - { - // Find the sessionid - char* pos1; - char* pos2; - pos1 = strchr(line, '|'); - - if (!pos1) - { - continue; - } - - pos2 = strchr(pos1 + 1, '|'); - - if (!pos2) - { - continue; - } - - currentSessionID = strtoll(pos1 + 1, NULL, 0); - - // Check the map for this sessionid. If found, this is a completed SQL statement - // remove it from our collection - if ((pendingIter = pendingSQLStatements.find(currentSessionID)) != pendingSQLStatements.end()) - { - pendingSQLStatements.erase(pendingIter); - } - } - } - } - - file.close(); - - // Send the number of pending statements - ackmsg << (ByteStream::byte) pendingSQLStatements.size(); - - // Send the pending statements we discovered. - for (pendingIter = pendingSQLStatements.begin(); - pendingIter != pendingSQLStatements.end(); - ++pendingIter) - { - ackmsg << (*pendingIter).second.sqlStatement; - ackmsg << (unsigned)(*pendingIter).second.startTime; - ackmsg << (*pendingIter).first; - } - } - catch (...) - { - } - - fIos.write(ackmsg); - ackmsg.reset(); - } - break; - - case RUN_DBHEALTH_CHECK: - { - ByteStream::byte action; - msg >> action; - - ByteStream ackmsg; - - ackmsg << (ByteStream::byte) RUN_DBHEALTH_CHECK; - - try - { - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("RUN_DBHEALTH_CHECK called"); - msg.format(args); - ml.logDebugMessage(msg); - } - catch (...) - {} - - if ( DBhealthActive ) - { - try - { - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("RUN_DBHEALTH_CHECK already Active, exiting"); - msg.format(args); - ml.logDebugMessage(msg); - } - catch (...) - {} - - ackmsg << (ByteStream::byte) oam::API_ALREADY_IN_PROGRESS; - fIos.write(ackmsg); - ackmsg.reset(); - - break; - } - - DBhealthActive = true; - - int ret = serverMonitor.healthCheck(action); - - if ( ret == API_SUCCESS ) - { - try - { - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("RUN_DBHEALTH_CHECK passed"); - msg.format(args); - ml.logDebugMessage(msg); - } - catch (...) - {} - - ackmsg << (ByteStream::byte) oam::API_SUCCESS; - fIos.write(ackmsg); - ackmsg.reset(); - } - else - { - try - { - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("RUN_DBHEALTH_CHECK failed, check dbhealthTest.log"); - msg.format(args); - ml.logDebugMessage(msg); - } - catch (...) - {} - - ackmsg << (ByteStream::byte) oam::API_FAILURE; - fIos.write(ackmsg); - ackmsg.reset(); - } - - DBhealthActive = false; - break; - } - - default: - break; - - } // end of switch - } - - try - { - fIos.close(); - } - catch (...) - {} - } - catch (...) - {} - } // end of for loop - } - catch (exception& ex) - { - string error = ex.what(); - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("EXCEPTION ERROR on ServerMonitor: "); - args.add(error); - msg.format(args); - ml.logErrorMessage(msg); - // takes 2 - 4 minites to free sockets, sleep and retry - sleep(10); - } - catch (...) - { - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("EXCEPTION ERROR on ServerMonitor"); - msg.format(args); - ml.logErrorMessage(msg); - // takes 2 - 4 minites to free sockets, sleep and retry - sleep(10); - } - } -} - diff --git a/oamapps/serverMonitor/procmonMonitor.cpp b/oamapps/serverMonitor/procmonMonitor.cpp deleted file mode 100644 index aead97cf1..000000000 --- a/oamapps/serverMonitor/procmonMonitor.cpp +++ /dev/null @@ -1,193 +0,0 @@ -/* Copyright (C) 2014 InfiniDB, Inc. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; version 2 of - the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301, USA. */ - -/*************************************************************************** - * $Id: procmonMonitor.cpp 34 2006-09-29 21:13:54Z dhill $ - * - * Author: David Hill - ***************************************************************************/ - -#include "serverMonitor.h" - -using namespace std; -using namespace oam; -using namespace alarmmanager; -using namespace logging; -using namespace servermonitor; -using namespace messageqcpp; - -/************************************************************************************************************ -* @brief procmonMonitor function -* -* purpose: Monitor Local Process Monitor (like a local heartbeat check) abd reset when it's not responding -* -* -************************************************************************************************************/ - -void procmonMonitor() -{ - ServerMonitor serverMonitor; - Oam oam; - - //wait before monitoring is started - sleep(60); - - // get current server name - string moduleName; - oamModuleInfo_t st; - - try - { - st = oam.getModuleInfo(); - moduleName = boost::get<0>(st); - } - catch (...) - { - // Critical error, Log this event and exit - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("Failed to read local module Info"); - msg.format(args); - ml.logCriticalMessage(msg); - exit(-1); - } - - string msgPort = moduleName + "_ProcessMonitor"; - - int heartbeatCount = 0; - - // loop forever monitoring Local Process Monitor - while (true) - { - - ByteStream msg; - ByteStream::byte requestID = LOCALHEARTBEAT; - - msg << requestID; - - try - { - MessageQueueClient mqRequest(msgPort); - mqRequest.write(msg); - - // wait 10 seconds for response - ByteStream::byte returnACK; - ByteStream::byte returnRequestID; - ByteStream::byte requestStatus; - ByteStream receivedMSG; - - struct timespec ts = { 10, 0 }; - - try - { - receivedMSG = mqRequest.read(&ts); - - if (receivedMSG.length() > 0) - { - receivedMSG >> returnACK; - receivedMSG >> returnRequestID; - receivedMSG >> requestStatus; - - if ( returnACK == oam::ACK && returnRequestID == requestID) - { - // ACK for this request - heartbeatCount = 0; - } - } - else - { - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("procmonMonitor: ProcMon Msg timeout!!!"); - msg.format(args); - ml.logWarningMessage(msg); - - heartbeatCount++; - - if ( heartbeatCount > 2 ) - { - //Process Monitor not responding, restart it - system("pkill ProcMon"); - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("procmonMonitor: Restarting ProcMon"); - msg.format(args); - ml.logWarningMessage(msg); - - sleep(60); - heartbeatCount = 0; - } - } - - mqRequest.shutdown(); - - } - catch (SocketClosed& ex) - { - string error = ex.what(); - - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("procmonMonitor: EXCEPTION ERROR on mqRequest.read: " + error); - msg.format(args); - ml.logErrorMessage(msg); - } - catch (...) - { - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("procmonMonitor: EXCEPTION ERROR on mqRequest.read: Caught unknown exception"); - msg.format(args); - ml.logErrorMessage(msg); - } - } - catch (exception& ex) - { - string error = ex.what(); - - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("procmonMonitor: EXCEPTION ERROR on MessageQueueClient.read: " + error); - msg.format(args); - ml.logErrorMessage(msg); - } - catch (...) - { - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("procmonMonitor: EXCEPTION ERROR on MessageQueueClient: Caught unknown exception"); - msg.format(args); - ml.logErrorMessage(msg); - } - - sleep(60); - } //while loop -} diff --git a/oamapps/serverMonitor/serverMonitor.cpp b/oamapps/serverMonitor/serverMonitor.cpp deleted file mode 100644 index 33224d56f..000000000 --- a/oamapps/serverMonitor/serverMonitor.cpp +++ /dev/null @@ -1,369 +0,0 @@ -/* Copyright (C) 2014 InfiniDB, Inc. - * Copyright (C) 2016 MariaDB Corporation. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; version 2 of - the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301, USA. */ - -/*************************************************************************** - * $Id: serverMonitor.cpp 34 2006-09-29 21:13:54Z dhill $ - * - * Author: David Hill - ***************************************************************************/ - - -#include "serverMonitor.h" -#include "installdir.h" - -using namespace std; -using namespace oam; -using namespace alarmmanager; -using namespace logging; -using namespace servermonitor; - -namespace servermonitor -{ - - -/****************************************************************************************** -* @brief ServerMonitor Constructor -* -* purpose: ServerMonitor Constructor -* -******************************************************************************************/ -ServerMonitor::ServerMonitor() -{ -} - -/****************************************************************************************** -* @brief ServerMonitor Destructor -* -* purpose: ServerMonitor Destructor -* -******************************************************************************************/ -ServerMonitor::~ServerMonitor() -{ -} - -/****************************************************************************************** -* @brief sendAlarm -* -* purpose: send a trap and log the process information -* -******************************************************************************************/ -void ServerMonitor::sendAlarm(string alarmItem, ALARMS alarmID, int action, float sensorValue) -{ - ServerMonitor serverMonitor; - Oam oam; - - //Log this event - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add(alarmItem); - args.add(", sensor value out-of-range: "); - args.add(sensorValue); - - // get current server name - string moduleName; - oamModuleInfo_t st; - - try - { - st = oam.getModuleInfo(); - moduleName = boost::get<0>(st); - } - catch (...) - { - moduleName = "Unknown Server"; - } - - // check if there is an active alarm above the reporting theshold - // that needs to be cleared - serverMonitor.checkAlarm(alarmItem, alarmID); - - // check if Alarm is already active, don't resend - if ( !( oam.checkActiveAlarm(alarmID, moduleName, alarmItem)) ) - { - - ALARMManager alarmMgr; - // send alarm - alarmMgr.sendAlarmReport(alarmItem.c_str(), alarmID, action); - - args.add(", Alarm set: "); - args.add(alarmID); - } - - // output log - msg.format(args); - ml.logWarningMessage(msg); - - return; -} - -/****************************************************************************************** -* @brief checkAlarm -* -* purpose: check to see if an alarm(s) is set on device and clear if so -* -******************************************************************************************/ -void ServerMonitor::checkAlarm(string alarmItem, ALARMS alarmID) -{ - Oam oam; - - // get current server name - string moduleName; - oamModuleInfo_t st; - - try - { - st = oam.getModuleInfo(); - moduleName = boost::get<0>(st); - } - catch (...) - { - moduleName = "Unknown Server"; - } - - switch (alarmID) - { - case ALARM_NONE: // clear all alarms set if any found - if ( oam.checkActiveAlarm(HARDWARE_HIGH, moduleName, alarmItem) ) - // alarm set, clear it - clearAlarm(alarmItem, HARDWARE_HIGH); - - if ( oam.checkActiveAlarm(HARDWARE_MED, moduleName, alarmItem) ) - // alarm set, clear it - clearAlarm(alarmItem, HARDWARE_MED); - - if ( oam.checkActiveAlarm(HARDWARE_LOW, moduleName, alarmItem) ) - // alarm set, clear it - clearAlarm(alarmItem, HARDWARE_LOW); - - break; - - case HARDWARE_LOW: // clear high and medium alarms set if any found - if ( oam.checkActiveAlarm(HARDWARE_HIGH, moduleName, alarmItem) ) - // alarm set, clear it - clearAlarm(alarmItem, HARDWARE_HIGH); - - if ( oam.checkActiveAlarm(HARDWARE_MED, moduleName, alarmItem) ) - // alarm set, clear it - clearAlarm(alarmItem, HARDWARE_MED); - - break; - - case HARDWARE_MED: // clear high alarms set if any found - if ( oam.checkActiveAlarm(HARDWARE_HIGH, moduleName, alarmItem) ) - // alarm set, clear it - clearAlarm(alarmItem, HARDWARE_HIGH); - - break; - - default: // none to clear - break; - } // end of switch - - return; -} - -/****************************************************************************************** -* @brief clearAlarm -* -* purpose: clear Alarm that was previously set -* -******************************************************************************************/ -void ServerMonitor::clearAlarm(string alarmItem, ALARMS alarmID) -{ - ALARMManager alarmMgr; - alarmMgr.sendAlarmReport(alarmItem.c_str(), alarmID, CLEAR); - - //Log this event - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add(alarmItem); - args.add(" alarm #"); - args.add(alarmID); - args.add("cleared"); - msg.format(args); - ml.logWarningMessage(msg); -} - -/****************************************************************************************** -* @brief sendMsgShutdownServer -* -* purpose: send a Message to Shutdown server -* -******************************************************************************************/ -/*void ServerMonitor::sendMsgShutdownServer() -{ - Oam oam; - - //Log this event - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("serverMonitor: Fatal Hardware Alarm detected, Server being shutdown"); - msg.format(args); - ml.logCriticalMessage(msg); - - string moduleName; - oamModuleInfo_t st; - try { - st = oam.getModuleInfo(); - moduleName = boost::get<0>(st); - } - catch (...) { - // o well, let's take out own action - system("init 0"); - } - - try - { - oam.shutdownModule(moduleName, FORCEFUL, ACK_NO); - } - catch (exception& e) - { - // o well, let's take out own action - system("init 0"); - } -} -*/ -/****************************************************************************************** -* @brief StripWhitespace -* -* purpose: strip off whitespaces from a string -* -******************************************************************************************/ -string ServerMonitor::StripWhitespace(string value) -{ - for (;;) - { - string::size_type pos = value.find (' ', 0); - - if (pos == string::npos) - // no more found - break; - - // strip leading - if (pos == 0) - { - value = value.substr (pos + 1, 10000); - } - else - { - // strip trailing - value = value.substr (0, pos); - } - } - - return value; -} - - -/****************************************************************************************** -* @brief sendResourceAlarm -* -* purpose: send a trap and log the process information -* -******************************************************************************************/ -bool ServerMonitor::sendResourceAlarm(string alarmItem, ALARMS alarmID, int action, int usage) -{ - ServerMonitor serverMonitor; - Oam oam; - - //Log this event - LoggingID lid(SERVER_MONITOR_LOG_ID); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add(alarmItem); - args.add(" usage at percentage of "); - args.add(usage); - - // get current module name - string moduleName; - oamModuleInfo_t st; - - try - { - st = oam.getModuleInfo(); - moduleName = boost::get<0>(st); - } - catch (...) - { - moduleName = "Unknown Server"; - } - - // check if there is an active alarm above the reporting theshold - // that needs to be cleared - - if (alarmItem == "CPU") - serverMonitor.checkCPUAlarm(alarmItem, alarmID); - else if (alarmItem == "Local Disk" || alarmItem == "External") - serverMonitor.checkDiskAlarm(alarmItem, alarmID); - else if (alarmItem == "Local Memory") - serverMonitor.checkMemoryAlarm(alarmItem, alarmID); - else if (alarmItem == "Local Swap") - serverMonitor.checkSwapAlarm(alarmItem, alarmID); - - // don't issue an alarm on thge dbroots is already issued by this or another server - if ( alarmItem.find("/var/lib/columnstore/data") == 0 ) - { - // check if Alarm is already active from any module, don't resend - if ( !( oam.checkActiveAlarm(alarmID, "*", alarmItem)) ) - { - - ALARMManager alarmMgr; - // send alarm - alarmMgr.sendAlarmReport(alarmItem.c_str(), alarmID, action); - - args.add(", Alarm set: "); - args.add(alarmID); - msg.format(args); - ml.logInfoMessage(msg); - return true; - } - else - return false; - } - else - { - // check if Alarm is already active from this module, don't resend - if ( !( oam.checkActiveAlarm(alarmID, moduleName, alarmItem)) ) - { - - ALARMManager alarmMgr; - // send alarm - alarmMgr.sendAlarmReport(alarmItem.c_str(), alarmID, action); - - args.add(", Alarm set: "); - args.add(alarmID); - msg.format(args); - ml.logInfoMessage(msg); - return true; - } - else - return false; - } - - return true; -} - - -} // end of namespace diff --git a/oamapps/serverMonitor/serverMonitor.h b/oamapps/serverMonitor/serverMonitor.h deleted file mode 100644 index d1e7c5a37..000000000 --- a/oamapps/serverMonitor/serverMonitor.h +++ /dev/null @@ -1,250 +0,0 @@ -/* Copyright (C) 2014 InfiniDB, Inc. - * Copyright (C) 2016 MariaDB Corporation. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; version 2 of - the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301, USA. */ - -/*************************************************************************** - * $Id: serverMonitor.h 34 2006-09-29 21:13:54Z dhill $ - * - * Author: David Hill - ***************************************************************************/ -/** - * @file - */ -#ifndef SERVER_MONITOR_H -#define SERVER_MONITOR_H - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "liboamcpp.h" -#include "messagelog.h" -#include "messageobj.h" -#include "loggingid.h" -#include "alarmmanager.h" -#include "socketclosed.h" -#include "shmkeys.h" -#include "alarmglobal.h" -# - - -#define CPU_DEBUG 0 // 0 for supported -#define DISK_DEBUG 0 // 0 for supported - -#define CPU_HEARTBEAT_ID 1 -#define MEMORY_HEARTBEAT_ID 2 -#define HW_HEARTBEAT_ID 3 - -#define MONITOR_PERIOD 60 // 60 seconds - -typedef struct -{ - std::string processName; - double usedPercent; -} processCPU; - -typedef std::list ProcessCPUList; - -typedef struct -{ - std::string processName; - long long usedBlocks; - double usedPercent; -} processMemory; - -typedef std::list ProcessMemoryList; - -typedef struct -{ - std::string deviceName; - uint64_t totalBlocks; - uint64_t usedBlocks; - uint64_t usedPercent; -} SMSystemDisk; - -typedef std::list SystemDiskList; - - -/** -* @brief Local Process-Monitor Monitor Thread -*/ -void procmonMonitor(); - -/** -* @brief UM Auto Sync Thread -*/ -void UMAutoSync(); - -/** -* @brief CPU Monitor Thread -*/ -void cpuMonitor(); - -/** -* @brief Disk Monitor Thread -*/ -void diskMonitor(); - -/** -* @brief Disk Monitor Thread -*/ -void memoryMonitor(); - -/** -* @brief Hardware Monitor -*/ -void hardwareMonitor(int IPMI_SUPPORT); - -/** -* @brief Message Processor Thread -*/ -void msgProcessor(); - -/** -* @brief Disk Monitor Thread -*/ -void diskTest(); - -/** -* @brief DB Health Monitor Thread -*/ -void dbhealthMonitor(); - -namespace servermonitor -{ - -// Log ID -#define SERVER_MONITOR_LOG_ID 9 - - -class ServerMonitor -{ -public: - /** - * @brief Constructor - */ - ServerMonitor(); - - /** - * @brief Default Destructor - */ - ~ServerMonitor(); - - /** - * @brief send alarm - */ - void sendAlarm(std::string alarmItem, oam::ALARMS alarmID, int action, float sensorValue); - - /** - * @brief check alarm - */ - void checkAlarm(std::string alarmItem, oam::ALARMS alarmID = oam::ALARM_NONE); - - /** - * @brief clear alarm - */ - void clearAlarm(std::string alarmItem, oam::ALARMS alarmID); - - /** - * @brief send msg to shutdown server - */ - void sendMsgShutdownServer(); - - /** - * @brief strip off whitespaces from a string - */ - std::string StripWhitespace(std::string value); - - /** - * @brief log cpu usage to active log file - */ - void logCPUactive (unsigned int); - - /** - * @brief log cpu peak and average to stat file - */ - void logCPUstat (int usageCount); - - /** - * @brief send alarm - */ - bool sendResourceAlarm(std::string alarmItem, oam::ALARMS alarmID, int action, int usage); - - /** - * @brief check CPU alarm - */ - void checkCPUAlarm(std::string alarmItem, oam::ALARMS alarmID = oam::ALARM_NONE); - - /** - * @brief check Disk alarm - */ - void checkDiskAlarm(std::string alarmItem, oam::ALARMS alarmID = oam::ALARM_NONE); - - /** - * @brief check Memory alarm - */ - void checkMemoryAlarm(std::string alarmItem, oam::ALARMS alarmID = oam::ALARM_NONE); - - /** - * @brief check Swap alarm - */ - void checkSwapAlarm(std::string alarmItem, oam::ALARMS alarmID = oam::ALARM_NONE); - - /** - * @brief check Swap action - */ - void checkSwapAction(); - - /** - * @brief output Proc Memory - */ - void outputProcMemory(bool); - - /** - * @brief get CPU Data - */ - void getCPUdata(); - - /** - * @brief db health check - */ - int healthCheck(bool action = true); - - /** - * @brief Check Active Alarm - */ - bool checkActiveAlarm(const int alarmid, const std::string moduleName, const std::string deviceName); - - -}; // end of class - -} // end of namespace - -#endif diff --git a/primitives/primproc/primitiveserver.cpp b/primitives/primproc/primitiveserver.cpp index d0d07c341..704692385 100644 --- a/primitives/primproc/primitiveserver.cpp +++ b/primitives/primproc/primitiveserver.cpp @@ -2507,16 +2507,6 @@ void PrimitiveServer::start(Service *service) fServerpool.invoke(ServerThread(oss.str(), this)); } - { - Oam oam; - - try - { - oam.processInitComplete("PrimProc"); - } - catch (...) {} - } - service->NotifyServiceStarted(); fServerpool.wait(); diff --git a/primitives/primproc/primproc.cpp b/primitives/primproc/primproc.cpp index 2e8e08c1c..94cfa60f6 100644 --- a/primitives/primproc/primproc.cpp +++ b/primitives/primproc/primproc.cpp @@ -411,14 +411,6 @@ int ServicePrimProc::Child() mlp->logMessage(errMsg); cerr << errMsg << endl; - try - { - oam.processInitFailure(); - } - catch (...) - { - } - NotifyServiceInitializationFailed(); return 2; } diff --git a/procmgr/CMakeLists.txt b/procmgr/CMakeLists.txt deleted file mode 100644 index 707f4b8ac..000000000 --- a/procmgr/CMakeLists.txt +++ /dev/null @@ -1,16 +0,0 @@ - -include_directories( ${ENGINE_COMMON_INCLUDES} ) - - -########### next target ############### - -set(ProcMgr_SRCS main.cpp processmanager.cpp ../utils/common/crashtrace.cpp) - -add_executable(ProcMgr ${ProcMgr_SRCS}) - -target_compile_options(ProcMgr PRIVATE -Wno-unused-result) - -target_link_libraries(ProcMgr ${ENGINE_LDFLAGS} cacheutils ${NETSNMP_LIBRARIES} ${MARIADB_CLIENT_LIBS} ${ENGINE_EXEC_LIBS}) - -install(TARGETS ProcMgr DESTINATION ${ENGINE_BINDIR} COMPONENT columnstore-engine) - diff --git a/procmgr/main.cpp b/procmgr/main.cpp deleted file mode 100644 index 0f44716a2..000000000 --- a/procmgr/main.cpp +++ /dev/null @@ -1,3170 +0,0 @@ -/* Copyright (C) 2014 InfiniDB, Inc. - Copyright (C) 2016 MariaDB Corporation - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; version 2 of - the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301, USA. */ - -/***************************************************************************************** -* $Id: main.cpp 2203 2013-07-08 16:50:51Z bpaul $ -* -*****************************************************************************************/ - - - -#include - -#include - -#include "columnstoreversion.h" -#include "processmanager.h" -#include "installdir.h" -#include "IDBPolicy.h" -#include "utils_utf8.h" - -#include "crashtrace.h" - -using namespace std; -using namespace logging; -using namespace messageqcpp; -using namespace processmanager; -using namespace oam; -using namespace alarmmanager; -using namespace threadpool; -//using namespace procheartbeat; -using namespace config; - -bool runStandby = false; -bool MsgThreadActive = false; -bool runCold = false; -string systemName = "system"; -string iface_name; -string cloud; -bool amazon = false; -string PMInstanceType; -string UMInstanceType; -string AmazonPMFailover = "y"; -string DataRedundancyConfig = "n"; -bool rootUser = true; -string USER = "root"; -bool HDFS = false; -string localHostName; -string PMwithUM = "n"; -string MySQLRep = "n"; -string tmpLogDir; - - -// pushing the ACTIVE_ALARMS_FILE to all nodes every 10 seconds. -const int ACTIVE_ALARMS_PUSHING_INTERVAL = 10; - -typedef map moduleList; -moduleList moduleInfoList; - -extern HeartBeatProcList hbproclist; -extern pthread_mutex_t THREAD_LOCK; -extern bool startsystemthreadStop; -extern string gdownActiveOAMModule; -extern int startsystemthreadStatus; -extern vector downModuleList; -extern bool startFailOver; -extern bool gOAMParentModuleFlag; - -static void* messageThread(Configuration* config); -static void* alarmMessageThread(Configuration* config); -static void sigUser1Handler(int sig); -static void startMgrProcessThread(); -static void hdfsActiveAlarmsPushingThread(); -//static void pingDeviceThread(); -//static void heartbeatProcessThread(); -//static void heartbeatMsgThread(); - -/***************************************************************************************** -* @brief main -* -* purpose: request launching of Mgr controlled processes and wait for incoming messages -* -*****************************************************************************************/ -int main(int argc, char** argv) -{ -#ifndef _MSC_VER - setuid(0); // set effective ID to root; ignore return status -#endif - // Set locale language - setlocale(LC_ALL, ""); - setlocale(LC_NUMERIC, "C"); - - idbdatafile::IDBPolicy::configIDBPolicy(); - - // This is unset due to the way we start it - program_invocation_short_name = const_cast("ProcMgr"); - - struct sigaction ign; - memset(&ign, 0, sizeof(ign)); - ign.sa_handler = fatalHandler; - sigaction(SIGSEGV, &ign, 0); - sigaction(SIGABRT, &ign, 0); - sigaction(SIGFPE, &ign, 0); - - Oam oam; - - //check if root-user - int user; - user = getuid(); - - if (user != 0) - rootUser = false; - - char* p = getenv("USER"); - - if (p && *p) - USER = p; - - ProcessLog log; - Configuration config; - ProcessManager processManager(config, log); - ALARMManager aManager; - - log.writeLog(__LINE__, " "); - log.writeLog(__LINE__, "**********Process Manager Started**********"); - - //Ignore SIGPIPE signals - signal(SIGPIPE, SIG_IGN); - - //Ignore SIGHUP signals - signal(SIGHUP, SIG_IGN); - - //create SIGUSR1 handler to get configuration updates - signal(SIGUSR1, sigUser1Handler); - - // Get System Name - try - { - oam.getSystemConfig("SystemName", systemName); - } - catch (...) - {} - - //get cloud setting - try - { - oam.getSystemConfig( "Cloud", cloud); - } - catch (...) {} - - //get amazon parameters - if ( cloud == "amazon-ec2" || cloud == "amazon-vpc" ) - { - oam.getSystemConfig("PMInstanceType", PMInstanceType); - oam.getSystemConfig("UMInstanceType", UMInstanceType); - oam.getSystemConfig("AmazonPMFailover", AmazonPMFailover); - - amazon = true; - } - - //get gluster config - try - { - oam.getSystemConfig( "DataRedundancyConfig", DataRedundancyConfig); - } - catch (...) - { - DataRedundancyConfig = "n"; - } - - //hdfs / hadoop config - string DBRootStorageType; - - try - { - oam.getSystemConfig( "DBRootStorageType", DBRootStorageType); - } - catch (...) {} - - if ( DBRootStorageType == "hdfs" ) - HDFS = true; - - log.writeLog(__LINE__, "Main: DBRootStorageType = " + DBRootStorageType, LOG_TYPE_DEBUG); - - //PMwithUM config - try - { - oam.getSystemConfig( "PMwithUM", PMwithUM); - } - catch (...) - { - PMwithUM = "n"; - } - - try - { - oam.getSystemConfig("MySQLRep", MySQLRep); - } - catch (...) - { - MySQLRep = "n"; - } - - // get system uptime and alarm if this is a restart after module outage - if ( gOAMParentModuleFlag ) - { - log.writeLog(__LINE__, "Running Active"); - log.writeLog(__LINE__, "Running Active", LOG_TYPE_DEBUG); - } - else - { - log.writeLog(__LINE__, "Running Standby"); - log.writeLog(__LINE__, "Running Standby", LOG_TYPE_DEBUG); - runStandby = true; - } - - //get local module main IP address - ModuleConfig moduleconfig; - oam.getSystemConfig(config.moduleName(), moduleconfig); - HostConfigList::iterator pt1 = moduleconfig.hostConfigList.begin(); - string localIPaddr = (*pt1).IPAddr; - localHostName = (*pt1).HostName; - - struct ifaddrs* addrs, *iap; - struct sockaddr_in* sa; - char buf[32]; - - getifaddrs(&addrs); - - for (iap = addrs; iap != NULL; iap = iap->ifa_next) - { - - if (iap->ifa_addr && (iap->ifa_flags & IFF_UP) && iap->ifa_addr->sa_family == AF_INET) - { - sa = (struct sockaddr_in*)(iap->ifa_addr); - inet_ntop(iap->ifa_addr->sa_family, (void*) & (sa->sin_addr), buf, sizeof(buf)); - - if (!strcmp(localIPaddr.c_str(), buf)) - { - iface_name = iap->ifa_name; - break; - } - } - } - - freeifaddrs(addrs); - log.writeLog(__LINE__, "Main Ethernet Port = " + iface_name, LOG_TYPE_DEBUG); - - //get tmp log directory - tmpLogDir = startup::StartUp::tmpDir(); - - // - //start a thread to ping all system modules - // - if (runStandby) - { - //running standby after startup - try - { - oam.processInitComplete("ProcessManager", oam::STANDBY); - log.writeLog(__LINE__, "processInitComplete Successfully Called", LOG_TYPE_DEBUG); - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "EXCEPTION ERROR on processInitComplete: " + error, LOG_TYPE_ERROR); - } - catch (...) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on processInitComplete: Caught unknown exception!", LOG_TYPE_ERROR); - } - - // create message thread - pthread_t MessageThread; - int ret = pthread_create (&MessageThread, NULL, (void* (*)(void*)) &messageThread, &config); - - if ( ret != 0 ) - log.writeLog(__LINE__, "pthread_create failed, return code = " + oam.itoa(ret), LOG_TYPE_ERROR); - - // create alarm message thread - pthread_t AlarmMessageThread; - ret = pthread_create (&AlarmMessageThread, NULL, (void* (*)(void*)) &alarmMessageThread, &config); - - if ( ret != 0 ) - log.writeLog(__LINE__, "pthread_create failed, return code = " + oam.itoa(ret), LOG_TYPE_ERROR); - - //monitor OAM Parent Module for failover - while (true) - { - if ( processManager.OAMParentModuleChange() == oam::API_SUCCESS ) - break; - - log.writeLog(__LINE__, "OAMParentModuleChange failure", LOG_TYPE_WARNING); - // GO TRY AGAIN - } - - pthread_t srvThread; - int status = pthread_create (&srvThread, NULL, (void* (*)(void*)) &pingDeviceThread, NULL); - - if ( status != 0 ) - log.writeLog(__LINE__, "pingDeviceThread: pthread_create failed, return status = " + oam.itoa(status), LOG_TYPE_ERROR); - } - else - { - //running active after startup - //Update DBRM section of Columnstore.xml - processManager.updateWorkerNodeconfig(); -// processManager.distributeConfigFile("system"); - - pthread_t srvThread; - int status = pthread_create (&srvThread, NULL, (void* (*)(void*)) &pingDeviceThread, NULL); - - if ( status != 0 ) - log.writeLog(__LINE__, "pingDeviceThread: pthread_create failed, return status = " + oam.itoa(status), LOG_TYPE_ERROR); - - // if HDFS, create a thread to push an image of activeAlarms to HDFS filesystem - if (HDFS) - { - pthread_t hdfsAlarmThread; - int status = pthread_create(&hdfsAlarmThread, NULL, (void* (*)(void*)) &hdfsActiveAlarmsPushingThread, NULL); - - if ( status != 0 ) - log.writeLog(__LINE__, "hdfsActiveAlarmsPushingThread pthread_create failed, return code = " + oam.itoa(status), LOG_TYPE_ERROR); - } - - sleep(5); - - SystemStatus systemstatus; - - try - { - oam.getSystemStatus(systemstatus); - } - catch (exception& ex) - { -// string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemStatus: " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemStatus: Caught unknown exception!", LOG_TYPE_ERROR); - } - - if (systemstatus.SystemOpState != oam::MAN_OFFLINE && - systemstatus.SystemOpState != oam::ACTIVE) - { - pthread_t mgrProcThread; - int status = pthread_create (&mgrProcThread, NULL, (void* (*)(void*)) &startMgrProcessThread, NULL); - - if ( status != 0 ) - log.writeLog(__LINE__, "startMgrProcessThread: pthread_create failed, return status = " + oam.itoa(status), LOG_TYPE_ERROR); - } - - try - { - oam.processInitComplete("ProcessManager"); - log.writeLog(__LINE__, "processInitComplete Successfully Called", LOG_TYPE_DEBUG); - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "EXCEPTION ERROR on processInitComplete: " + error, LOG_TYPE_ERROR); - } - catch (...) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on processInitComplete: Caught unknown exception!", LOG_TYPE_ERROR); - } - - //make sure ProcMgr IP Address is configured correctly - try - { - Config* sysConfig = Config::makeConfig(); - - // get Standby IP address - ModuleConfig moduleconfig; - oam.getSystemConfig(config.moduleName(), moduleconfig); - HostConfigList::iterator pt1 = moduleconfig.hostConfigList.begin(); - string IPaddr = (*pt1).IPAddr; - - sysConfig->setConfig("ProcMgr", "IPAddr", IPaddr); - sysConfig->setConfig("ProcMgr_Alarm", "IPAddr", IPaddr); - - log.writeLog(__LINE__, "set ProcMgr IPaddr to " + IPaddr, LOG_TYPE_DEBUG); - - //update Calpont Config table - try - { - sysConfig->write(); - } - catch (...) - { - log.writeLog(__LINE__, "ERROR: sysConfig->write", LOG_TYPE_ERROR); - } - } - catch (...) - { - log.writeLog(__LINE__, "ERROR: makeConfig failed", LOG_TYPE_ERROR); - } - - - // TODO: This is called before MessageThread is created. - // Doesn't break anything but can be removed as it's done after MessageThread creation. - try - { - oam.distributeConfigFile(); - } - catch (...) - {} - - // create message thread - pthread_t MessageThread; - int ret = pthread_create (&MessageThread, NULL, (void* (*)(void*)) &messageThread, &config); - - if ( ret != 0 ) - log.writeLog(__LINE__, "pthread_create failed, return code = " + oam.itoa(ret), LOG_TYPE_ERROR); - - // create alarm message thread - pthread_t AlarmMessageThread; - ret = pthread_create (&AlarmMessageThread, NULL, (void* (*)(void*)) &alarmMessageThread, &config); - - if ( ret != 0 ) - log.writeLog(__LINE__, "pthread_create failed, return code = " + oam.itoa(ret), LOG_TYPE_ERROR); - } - - // - //start a thread to process heartbeat checks - // -// pthread_t heartThread; -// pthread_create (&heartThread, NULL, (void*(*)(void*)) &heartbeatProcessThread, NULL); - - // - //start a thread to read heartbeat messages - // -// pthread_t heartMsgThread; -// pthread_create (&heartMsgThread, NULL, (void*(*)(void*)) &heartbeatMsgThread, NULL); - - // suspend forever - while (true) - { - sleep(1000); - } -} - -/****************************************************************************************** -* @brief messageThread -* -* purpose: Read incoming messages -* -******************************************************************************************/ -static void* messageThread(Configuration* config) -{ - ProcessLog log; - assert(config); - ProcessManager processManager(*config, log); - Oam oam; - - //check for running active, then launch - while (true) - { - if ( !runStandby) - break; - - sleep (1); - } - - //read and cleanup port before trying to use - try - { - Config* sysConfig = Config::makeConfig(); - string port = sysConfig->getConfig("ProcMgr", "Port"); - string cmd = "fuser -k " + port + "/tcp >/dev/null 2>&1"; - - system(cmd.c_str()); - } - catch (...) - { - } - - log.writeLog(__LINE__, "Message Thread started ..", LOG_TYPE_DEBUG); - - // - //waiting for request - // - IOSocket fIos; - - for (;;) - { - try - { - MessageQueueServer procmgr("ProcMgr"); - MsgThreadActive = true; - for (;;) - { - try - { - fIos = procmgr.accept(); - - pthread_t messagethread; - int status = pthread_create (&messagethread, NULL, (void* (*)(void*)) &processMSG, &fIos); - - if ( status != 0 ) - log.writeLog(__LINE__, "messagethread: pthread_create failed, return status = " + oam.itoa(status), LOG_TYPE_ERROR); - } - catch (...) - {} - - } - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "EXCEPTION ERROR on MessageQueueServer for ProcMgr:" + error, LOG_TYPE_ERROR); - - // takes 2 - 4 minites to free sockets, sleep and retry - sleep(60); - } - catch (...) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on MessageQueueServer for ProcMgr: Caught unknown exception!", LOG_TYPE_ERROR); - - // takes 2 - 4 minites to free sockets, sleep and retry - sleep(60); - } - } - return NULL; -} - -/****************************************************************************************** -* @brief alarmMesssageThread -* -* purpose: Read incoming alarm messages -* -******************************************************************************************/ -static void* alarmMessageThread(Configuration* config) -{ - ProcessLog log; - assert(config); - ProcessManager processManager(*config, log); - Oam oam; - - ByteStream msg; - - //check for running active, then launch - while (true) - { - if ( !runStandby) - break; - - sleep (1); - } - - log.writeLog(__LINE__, "Alarm Message Thread started ..", LOG_TYPE_DEBUG); - - //read and cleanup port before trying to use - try - { - Config* sysConfig = Config::makeConfig(); - string port = sysConfig->getConfig("ProcMgr_Alarm", "Port"); - string cmd = "fuser -k " + port + "/tcp >/dev/null 2>&1"; - - system(cmd.c_str()); - } - catch (...) - { - } - - // - //waiting for request - // - IOSocket fIos; - - for (;;) - { - try - { - MessageQueueServer procmgr("ProcMgr_Alarm"); - - for (;;) - { - try - { - fIos = procmgr.accept(); - - try - { - msg = fIos.read(); - - if (msg.length() <= 0) - { - fIos.close(); - continue; - } - - //log.writeLog(__LINE__, "MSG RECEIVED: Process Alarm Message"); - - ByteStream::byte alarmID; - std::string componentID; - ByteStream::byte state; - std::string ModuleName; - std::string processName; - ByteStream::byte pid; - ByteStream::byte tid; - - msg >> alarmID; - msg >> componentID; - msg >> state; - msg >> ModuleName; - msg >> processName; - msg >> pid; - msg >> tid; - - Alarm calAlarm; - - calAlarm.setAlarmID (alarmID); - calAlarm.setComponentID (componentID); - calAlarm.setState (state); - calAlarm.setSname (ModuleName); - calAlarm.setPname (processName); - calAlarm.setPid (pid); - calAlarm.setTid (tid); - - ALARMManager aManager; - aManager.processAlarmReport(calAlarm); - - fIos.close(); - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "EXCEPTION ERROR on read for ProcMgr_Alarm:" + error, LOG_TYPE_ERROR); - fIos.close(); - continue; - } - catch (...) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on read for ProcMgr_Alarm: Caught unknown exception!", LOG_TYPE_ERROR); - fIos.close(); - continue; - } - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "EXCEPTION ERROR on accept for ProcMgr_Alarm:" + error, LOG_TYPE_ERROR); - continue; - } - catch (...) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on accept for ProcMgr_Alarm: Caught unknown exception!", LOG_TYPE_ERROR); - continue; - } - } - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "EXCEPTION ERROR on MessageQueueServer for ProcMgr_Alarm:" + error, LOG_TYPE_ERROR); - - sleep(1); - } - catch (...) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on MessageQueueServer for ProcMgr_Alarm: Caught unknown exception!", LOG_TYPE_ERROR); - - sleep(1); - } - } - return NULL; -} - -/****************************************************************************************** -* @brief sigUser1Handler -* -* purpose: Handler SIGUSER1 signal and initial failover -* -******************************************************************************************/ -static void sigUser1Handler(int sig) -{ - ProcessLog log; - Configuration config; - ProcessManager processManager(config, log); - Oam oam; - log.writeLog(__LINE__, "SIGUSER1 received, set startFailOver = true", LOG_TYPE_DEBUG); - - startFailOver = true; -} - -/***************************************************************************************** -* @brief Start Mgr Process by module Thread -* -* purpose: Send Messages to Module Process Monitors to start Processes -* -*****************************************************************************************/ -static void startMgrProcessThread() -{ - ProcessLog log; - Configuration config; - ProcessManager processManager(config, log); - Oam oam; - SystemModuleTypeConfig systemmoduletypeconfig; - ModuleTypeConfig PMSmoduletypeconfig; - ALARMManager aManager; - - int waitTime = 180; - - log.writeLog(__LINE__, "startMgrProcessThread launched", LOG_TYPE_DEBUG); - - //get calpont software version and release - string localSoftwareInfo = columnstore_version + columnstore_release; - //get systemStartupOffline - string systemStartupOffline = "n"; - - try - { - Config* sysConfig = Config::makeConfig(); - - systemStartupOffline = sysConfig->getConfig("Installation", "SystemStartupOffline"); - } - catch (...) - { - log.writeLog(__LINE__, "ERROR: Problem getting systemStartupOffline from the Calpont System Configuration file", LOG_TYPE_ERROR); - systemStartupOffline = "n"; - } - - if ( systemStartupOffline == "y" ) - log.writeLog(__LINE__, "SystemStartupOffline set to 'y', Not starting up Calpont Database Processes", LOG_TYPE_INFO); - - try - { - oam.getSystemConfig(systemmoduletypeconfig); - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR); - } - catch (...) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR); - } - - //Send out a start service just to make sure Columnstore is runing on remote nodes - //note this only works for systems with ssh-keys - /* for( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++) - { - int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount; - if( moduleCount == 0) - continue; - - DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin(); - for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end(); pt++) - { - //skip OAM Parent module - if ( (*pt).DeviceName == config.moduleName() ) - continue; - - HostConfigList::iterator pt1 = (*pt).hostConfigList.begin(); - for( ; pt1 != (*pt).hostConfigList.end() ; pt1++) - { - //run remote command script - string cmd = startup::StartUp::installDir() + "/bin/remote_command.sh " + (*pt1).IPAddr + " ssh '" + startup::StartUp::installDir() + "/bin/columnstore restart' 0"; - system(cmd.c_str()); - } - } - } - */ - //distribute system and process config files - processManager.distributeConfigFile("system"); - processManager.distributeConfigFile("system", "ProcessConfig.xml"); - - //send out moduleName to remote nodes, this will be used to startup new installed nodes - { - int status = API_SUCCESS; - int k = 0; - - for ( ; k < waitTime ; k++ ) - { - if ( startsystemthreadStop ) - { - processManager.setSystemState(oam::MAN_OFFLINE); - - // exit thread - log.writeLog(__LINE__, "startMgrProcessThread Exit with a stop system flag", LOG_TYPE_DEBUG); - pthread_exit(0); - } - - status = API_SUCCESS; - - for ( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++) - { - int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount; - - if ( moduleCount == 0) - continue; - - DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end(); pt++) - { - string moduleName = (*pt).DeviceName; - - //skip OAM Parent module - if ( (*pt).DeviceName == config.moduleName() ) - continue; - - if ( (*pt).DisableState == oam::MANDISABLEDSTATE || - (*pt).DisableState == oam::AUTODISABLEDSTATE ) - continue; - - int ret = processManager.configureModule(moduleName); - - if ( ret != API_SUCCESS ) - status = ret; - } - } - - //get out of loop if all modules updated - if ( status == API_SUCCESS ) - break; - - //retry after sleeping for a bit - sleep(1); - } - - if ( k == waitTime || status == API_FAILURE) - { - // system didn't successfull restart - processManager.setSystemState(oam::FAILED); - // exit thread - log.writeLog(__LINE__, "startMgrProcessThread Exit with a failure, not all ProcMons running", LOG_TYPE_CRITICAL); - log.writeLog(__LINE__, "startMgrProcessThread Exit - failure", LOG_TYPE_DEBUG); - pthread_exit(0); - } - } - - //wait until all modules are up after a system reboot - int i = 0; - - for ( ; i < waitTime ; i++ ) - { - if ( startsystemthreadStop ) - { - processManager.setSystemState(oam::MAN_OFFLINE); - - // exit thread - log.writeLog(__LINE__, "startMgrProcessThread Exit with a stop system flag", LOG_TYPE_DEBUG); - pthread_exit(0); - } - - int status = API_SUCCESS; - - for ( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++) - { - if ( systemmoduletypeconfig.moduletypeconfig[i].ModuleType == "pm" ) - PMSmoduletypeconfig = systemmoduletypeconfig.moduletypeconfig[i]; - - int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount; - - if ( moduleCount == 0) - continue; - - DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end(); pt++) - { - string moduleName = (*pt).DeviceName; - - // Is Module UP - try - { - bool degraded; - int opState = oam::ACTIVE; - oam.getModuleStatus(moduleName, opState, degraded); - - if ( opState == oam::MAN_DISABLED ) - //mark all processes running on module man-offline except ProcMon - processManager.setProcessStates(moduleName, oam::MAN_OFFLINE); - - if ( opState == oam::AUTO_DISABLED) - //mark all processes running on module auto-offline - processManager.setProcessStates(moduleName, oam::AUTO_OFFLINE); - - if (opState == oam::INITIAL || - opState == oam::DOWN) - { - //a module is not up - status = API_MINOR_FAILURE; - break; - } - } - catch (exception& ex) - { -// string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": Caught unknown exception!", LOG_TYPE_ERROR); - } - } - - if ( status == API_MINOR_FAILURE) - { - sleep(1); - break; - } - } - - if ( status == API_SUCCESS) - //all modules are up - break; - } - - if ( i == waitTime ) - { - // system didn't successfull restart - processManager.setSystemState(oam::FAILED); - - // exit thread - log.writeLog(__LINE__, "startMgrProcessThread Exit with a failure, not all modules are UP", LOG_TYPE_CRITICAL); - pthread_exit(0); - } - - //configure the PMS settings - processManager.updatePMSconfig(); - - if (HDFS) - //distribute config file - processManager.distributeConfigFile("system"); - - //now wait until all procmons are ACTIVE and validate rpms on each module - int status = API_SUCCESS; - int k = 0; - - for ( ; k < waitTime ; k++ ) - { - if ( startsystemthreadStop ) - { - processManager.setSystemState(oam::MAN_OFFLINE); - - // exit thread - log.writeLog(__LINE__, "startMgrProcessThread Exit with a stop system flag", LOG_TYPE_DEBUG); - pthread_exit(0); - } - - status = API_SUCCESS; - - for ( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++) - { - int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount; - - if ( moduleCount == 0) - continue; - - DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end(); pt++) - { - string moduleName = (*pt).DeviceName; - - if ( (*pt).DisableState == oam::MANDISABLEDSTATE || - (*pt).DisableState == oam::AUTODISABLEDSTATE ) - continue; - - int moduleOpState = oam::ACTIVE; - - // check module state - try - { - bool degraded; - oam.getModuleStatus(moduleName, moduleOpState, degraded); - - // if up, set to MAN_INIT - if ( HDFS && - (moduleOpState == oam::UP) ) - { - processManager.setModuleState(moduleName, oam::MAN_INIT); - } - } - catch (exception& ex) - { -// string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": Caught unknown exception!", LOG_TYPE_ERROR); - } - - // Is Module's ProcMon ACTIVE and module status has been updated - int opState = oam::ACTIVE; - - try - { - ProcessStatus procstat; - oam.getProcessStatus("ProcessMonitor", moduleName, procstat); - opState = procstat.ProcessOpState; - - if (opState != oam::ACTIVE) - { - //skip if Not ACTIVE - log.writeLog(__LINE__, "Module ProcMon not active yet: " + moduleName, LOG_TYPE_DEBUG); - status = API_MINOR_FAILURE; - continue; - } - } - catch (exception& ex) - { -// string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: " + error, LOG_TYPE_ERROR); - status = API_MINOR_FAILURE; - continue; - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: Caught unknown exception!", LOG_TYPE_ERROR); - status = API_MINOR_FAILURE; - continue; - } - - //skip OAM Parent module - if ( moduleName == config.moduleName() ) - continue; - - //ProcMon ACTIVE, validate the software release and version of that module - ByteStream msg; - ByteStream::byte requestID = GETSOFTWAREINFO; - msg << requestID; - - string moduleSoftwareInfo = processManager.sendMsgProcMon1( moduleName, msg, requestID ); - - if ( moduleSoftwareInfo == "FAILED" ) - continue; - - if ( localSoftwareInfo != moduleSoftwareInfo ) - { - // module not running on same Calpont Software build as this local Director - // alarm and fail the module - log.writeLog(__LINE__, "Software Version mismatch : " + moduleName + "/" + localSoftwareInfo + "/" + moduleSoftwareInfo, LOG_TYPE_CRITICAL); - - aManager.sendAlarmReport(moduleName.c_str(), INVALID_SW_VERSION, SET); - processManager.setModuleState(moduleName, oam::FAILED); - status = API_FAILURE; - break; - } - } - } - - //get out of loop if all modules ACTTVE or MAN_OFFLINE - if ( status == API_SUCCESS ) - { - if ( systemStartupOffline == "y" ) - { - processManager.setSystemState(oam::MAN_OFFLINE); - log.writeLog(__LINE__, "SystemStartupOffline set to 'y', Not starting up Calpont Database Processes", LOG_TYPE_DEBUG); - } - - break; - } - else - { - //get out of loop if start module failed - if ( status == API_FAILURE ) - break; - - //retry after sleeping for a bit - sleep(1); - } - } - - if ( k == waitTime || status == API_FAILURE) - { - // system didn't successfull restart - processManager.setSystemState(oam::FAILED); - // exit thread - log.writeLog(__LINE__, "startMgrProcessThread Exit with a failure, not all ProcMons ACTIVE", LOG_TYPE_CRITICAL); - log.writeLog(__LINE__, "startMgrProcessThread Exit - failure", LOG_TYPE_DEBUG); - pthread_exit(0); - } - else - { - //distribute config file -// processManager.distributeConfigFile("system"); - - if ( systemStartupOffline == "n" && status == API_SUCCESS ) - { - oam::DeviceNetworkList devicenetworklist; - pthread_t startsystemthread; - int status = pthread_create (&startsystemthread, NULL, (void* (*)(void*)) &startSystemThread, &devicenetworklist); - - if ( status != 0 ) - { - log.writeLog(__LINE__, "STARTSYSTEMS: pthread_create failed, return status = " + oam.itoa(status)); - status = API_FAILURE; - } - - if (status == 0) - { - pthread_join(startsystemthread, NULL); - status = startsystemthreadStatus; - } - - if ( status != API_SUCCESS ) - { - // system didn't successfull restart - processManager.setSystemState(oam::FAILED); - log.writeLog(__LINE__, "startMgrProcessThread Exit with a failure, error returned from startSystemThread", LOG_TYPE_CRITICAL); - } - else - //distribute config file - processManager.distributeConfigFile("system"); - } - } - - // exit thread - log.writeLog(__LINE__, "startMgrProcessThread Exit", LOG_TYPE_DEBUG); - pthread_exit(0); -} - - -/***************************************************************************************** -* @brief pingDeviceThread -* -* purpose: perform ping testing on the devices within the system -* -*****************************************************************************************/ -void pingDeviceThread() -{ - ProcessLog log; - Configuration config; - ProcessManager processManager(config, log); - Oam oam; - ModuleTypeConfig moduletypeconfig; - ALARMManager aManager; - BRM::DBRM dbrm; - - log.writeLog(__LINE__, "pingDeviceThread launched", LOG_TYPE_DEBUG); - - string cmdLine = "ping "; - string cmdOption = " -c 1 -w 5 >> /dev/null"; - string cmd; - string deviceIP; - - // - // Get Module Info - // - SystemModuleTypeConfig systemModuleTypeConfig; - - try - { - oam.getSystemConfig(systemModuleTypeConfig); - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR); - } - catch (...) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR); - } - - //Build the initial list, clear module state - - for ( unsigned int i = 0 ; i < systemModuleTypeConfig.moduletypeconfig.size(); i++) - { - int moduleCount = systemModuleTypeConfig.moduletypeconfig[i].ModuleCount; - - if ( moduleCount == 0 ) - // skip of no modules configured - continue; - - DeviceNetworkList::iterator pt = systemModuleTypeConfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for ( ; pt != systemModuleTypeConfig.moduletypeconfig[i].ModuleNetworkList.end() ; pt++) - { - moduleInfoList.insert(moduleList::value_type((*pt).DeviceName, 0)); - } - } - - typedef map nicList; - nicList nicInfoList; - - //Build the initial list, clear NIC state - - for ( unsigned int i = 0 ; i < systemModuleTypeConfig.moduletypeconfig.size(); i++) - { - int moduleCount = systemModuleTypeConfig.moduletypeconfig[i].ModuleCount; - - if ( moduleCount == 0 ) - // skip of no modules configured - continue; - - DeviceNetworkList::iterator pt = systemModuleTypeConfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for ( ; pt != systemModuleTypeConfig.moduletypeconfig[i].ModuleNetworkList.end() ; pt++) - { - - HostConfigList::iterator pt1 = (*pt).hostConfigList.begin(); - - for ( ; pt1 != (*pt).hostConfigList.end() ; pt1++ ) - { - nicInfoList.insert(moduleList::value_type((*pt1).HostName, 0)); - } - } - } - - // - // Get ext device info - // - SystemExtDeviceConfig systemextdeviceconfig; - - try - { - oam.getSystemConfig(systemextdeviceconfig); - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR); - } - - typedef map extDeviceList; - extDeviceList extDeviceInfoList; - - //Build the initial list, clear ext device state - - for ( unsigned int i = 0 ; i < systemextdeviceconfig.Count; i++) - { - string name = systemextdeviceconfig.extdeviceconfig[i].Name; - extDeviceInfoList.insert(extDeviceList::value_type(name, 0)); - } - - //storage config - string DBRootStorageType; - - try - { - oam.getSystemConfig( "DBRootStorageType", DBRootStorageType); - } - catch (...) {} - - log.writeLog(__LINE__, "pingDeviceThread: DBRootStorageType = " + DBRootStorageType, LOG_TYPE_DEBUG); - - int rtnCode = 0; - Configuration configData; - SystemStatus systemstatus; - - bool enableModuleMonitor = true; - - bool LANOUTAGEACTIVE = false; - bool HOTSTANDBYACTIVE = false; - bool downActiveOAMModule = false; - - // monitor module and external device loop - - while (true) - { - //don't peform module test if system is MAN_OFFLINE or not getting status's - while (true) - { - SystemStatus systemstatus; - - try - { - oam.getSystemStatus(systemstatus); - - if (systemstatus.SystemOpState == oam::MAN_OFFLINE ) - sleep(5); - else - break; - } - catch (...) - { - sleep(5); - } - } - - // Module Heartbeat period and failure count - int ModuleHeartbeatPeriod; - int ModuleHeartbeatCount; - - try - { - oam.getSystemConfig("ModuleHeartbeatPeriod", ModuleHeartbeatPeriod); - oam.getSystemConfig("ModuleHeartbeatCount", ModuleHeartbeatCount); - ModuleHeartbeatPeriod = ModuleHeartbeatPeriod * 10; - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR); - sleep(5); - continue; - } - catch (...) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR); - sleep(5); - continue; - } - - // skip testing if Heartbeat is disable - if ( ModuleHeartbeatPeriod <= 0 ) - { - if ( enableModuleMonitor ) - log.writeLog(__LINE__, "ModuleHeartbeatPeriod set to disabled", LOG_TYPE_DEBUG); - - enableModuleMonitor = false; - } - else - { - if ( !enableModuleMonitor && moduleInfoList.size() > 1 ) - log.writeLog(__LINE__, "ModuleHeartbeatPeriod set to enabled", LOG_TYPE_DEBUG); - - enableModuleMonitor = true; - } - - //single server system - if ( moduleInfoList.size() <= 1) - enableModuleMonitor = false; - - // - // ping NIC - // - - // read each time to catch updates - pthread_mutex_lock(&THREAD_LOCK); - systemModuleTypeConfig.moduletypeconfig.clear(); - - try - { - oam.getSystemConfig(systemModuleTypeConfig); - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR); - sleep(5); - continue; - } - catch (...) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR); - sleep(5); - continue; - } - - pthread_mutex_unlock(&THREAD_LOCK); - - bool LANOUTAGESUPPORT = true; - bool LOCALNICDOWN = false; - - if (enableModuleMonitor) - { - //test main local Ethernet interface status - for ( int count = 0 ; ; count ++) - { - int sockfd; - struct ifreq ifr; - - sockfd = socket(AF_INET, SOCK_DGRAM, 0); - - if (sockfd == -1) - { - log.writeLog(__LINE__, "Could not get socket to check", LOG_TYPE_ERROR); - close(sockfd); - break; - } - - /* get interface name */ - strncpy(ifr.ifr_name, iface_name.c_str(), IFNAMSIZ); - - /* Read interface flags */ - if (ioctl(sockfd, SIOCGIFFLAGS, &ifr) < 0) - { - // not supported - close(sockfd); - break; - } - - if (ifr.ifr_flags & IFF_UP) - { - // ethernet port is up, continue on - close(sockfd); - break; - } - else - { - // ethernet port is down - log.writeLog(__LINE__, "NIC #1 is DOWN", LOG_TYPE_WARNING); - - if ( count >= ModuleHeartbeatCount ) - { - LOCALNICDOWN = true; - close(sockfd); - break; - } - else - sleep(5); - } - - close(sockfd); - } - } - - // if the NIC is down, go directly to LAN outage processing - if ( !LOCALNICDOWN ) - { - for ( unsigned int i = 0 ; i < systemModuleTypeConfig.moduletypeconfig.size(); i++) - { - int moduleCount = systemModuleTypeConfig.moduletypeconfig[i].ModuleCount; - - if ( moduleCount == 0) - continue; - - DeviceNetworkList::iterator pt = systemModuleTypeConfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for ( ; pt != systemModuleTypeConfig.moduletypeconfig[i].ModuleNetworkList.end() ; pt++) - { - string moduleName = (*pt).DeviceName; - string ipAddr; - string hostName; - int moduleState = oam::INITIAL; - HostConfigList::iterator pt1 = (*pt).hostConfigList.begin(); - - for ( ; pt1 != (*pt).hostConfigList.end() ; pt1++ ) - { - ipAddr = (*pt1).IPAddr; - hostName = (*pt1).HostName; - - if (enableModuleMonitor) - { - // perform ping test - cmd = cmdLine + ipAddr + cmdOption; - rtnCode = system(cmd.c_str()); - rtnCode = WEXITSTATUS(rtnCode); - } - else - rtnCode = 0; - - int currentNICState = oam::UP; - - try - { - oam.getNICStatus(hostName, currentNICState); - } - catch (exception& ex) - { -// string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getNICStatus: " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getNICStatus: Caught unknown exception!", LOG_TYPE_ERROR); - } - - switch (rtnCode) - { - case 0: - - //NIC Ack ping - if ( currentNICState != oam::UP ) - { - processManager.setNICState(hostName, oam::UP); - - if ( ModuleHeartbeatPeriod > 0 ) - //Clear an alarm - aManager.sendAlarmReport(hostName.c_str(), NIC_DOWN_AUTO, CLEAR); - } - - //set LAN Outage indicator to false since a module is responding - if ( moduleState == oam::INITIAL) - if ( moduleName != config.moduleName()) - LANOUTAGESUPPORT = false; - - //set Module State - if ( moduleState == oam::INITIAL || moduleState == oam::UP) - moduleState = oam::UP; - - break; - - default: - - //NIC failed to respond to ping - if ( currentNICState != oam::DOWN ) - { - log.writeLog(__LINE__, "NIC failed to respond to ping: " + hostName, LOG_TYPE_WARNING); - processManager.setNICState(hostName, oam::DOWN); - - if ( ModuleHeartbeatPeriod > 0 ) - //Issue an alarm - aManager.sendAlarmReport(hostName.c_str(), NIC_DOWN_AUTO, SET); - } - - //set Module State - if ( moduleState == oam::INITIAL || moduleState == oam::DOWN) - moduleState = oam::DOWN; - else - // NIC 1 is up and NIC 2 is down - moduleState = oam::DEGRADED; - - break; - } - } - - // if disable, default module state to up - if (!enableModuleMonitor) - moduleState = oam::UP; - - // moduleState coming out of the NIC monitoring loop - // UP - ALL NICs passed ping test - // DEGRADED - NIC 1 passed, NIC 2 failed ping test - // DOWN - NIC 1 or ALL NICs failed ping test - - int opState = oam::ACTIVE; - - try - { - bool degraded; - oam.getModuleStatus(moduleName, opState, degraded); - } - catch (exception& ex) - { -// string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": Caught unknown exception!", LOG_TYPE_ERROR); - } - - // skip module check if not inuse or in FAILED state - if (opState == oam::MAN_OFFLINE || - opState == oam::MAN_DISABLED || - opState == oam::FAILED) - continue; - - //fast track a restart of a downed failover modules - if ( gdownActiveOAMModule == moduleName ) - { - moduleInfoList[moduleName] = ModuleHeartbeatCount - 1; - gdownActiveOAMModule.clear(); - moduleState = oam::DOWN; - downActiveOAMModule = true; - } - - vector::iterator pt2 = downModuleList.begin(); - - for ( ; pt2 != downModuleList.end() ; pt2++) - { - if ( *pt2 == moduleName ) - { - moduleInfoList[moduleName] = ModuleHeartbeatCount - 1; - moduleState = oam::DOWN; - downModuleList.erase(pt2); - break; - } - } - - switch (moduleState) - { - case oam::DEGRADED: - // do nothing for now - break; - - case oam::UP: - -// comment out, only come up when both nic are up, if not the pms list will not have the second nic in there -// case oam::DEGRADED: - if (opState == oam::DOWN || opState == oam::INITIAL - || opState == oam::AUTO_DISABLED) - { - //Set the module state to up - processManager.setModuleState(moduleName, moduleState); - } - - if ( moduleName == config.OAMStandbyName() ) - HOTSTANDBYACTIVE = true; - - // if LAN OUTAGE ACTIVE, skip module checks - if (LANOUTAGEACTIVE) - break; - - try - { - oam.getSystemConfig("MySQLRep", MySQLRep); - } - catch (...) - { - MySQLRep = "n"; - } - - if (moduleInfoList[moduleName] >= ModuleHeartbeatCount || - opState == oam::DOWN || opState == oam::AUTO_DISABLED) - { - log.writeLog(__LINE__, "*** Module alive, bring it back online: " + moduleName, LOG_TYPE_DEBUG); - - string PrimaryUMModuleName = config.moduleName(); - - try - { - oam.getSystemConfig("PrimaryUMModuleName", PrimaryUMModuleName); - } - catch (...) {} - - bool busy = false; - - for ( int retry = 0 ; retry < 20 ; retry++ ) - { - busy = false; - ProcessStatus DMLprocessstatus; - - try - { - oam.getProcessStatus("DMLProc", PrimaryUMModuleName, DMLprocessstatus); - - if ( DMLprocessstatus.ProcessOpState == oam::BUSY_INIT) - { - log.writeLog(__LINE__, "DMLProc in BUSY_INIT, skip bringing module online " + moduleName, LOG_TYPE_DEBUG); - busy = true; - sleep(5); - } - else - break; - } - catch (...) - { - sleep(5); - } - } - - if (busy) - break; - - processManager.reinitProcessType("cpimport"); - - // halt the dbrm - oam.dbrmctl("halt"); - log.writeLog(__LINE__, "'dbrmctl halt' done", LOG_TYPE_DEBUG); - - //set query system state not ready - processManager.setQuerySystemState(false); - - processManager.setSystemState(oam::BUSY_INIT); - - aManager.sendAlarmReport(moduleName.c_str(), MODULE_DOWN_AUTO, CLEAR); - - //send notification - oam.sendDeviceNotification(config.moduleName(), MODULE_UP); - - int status; - DBRootConfigList dbrootConfigList; - - // if shared pm, move dbroots back to pm - if ( ( moduleName.find("pm") == 0 && !amazon && ( DBRootStorageType != "internal") ) || - ( moduleName.find("pm") == 0 && amazon && downActiveOAMModule ) || - ( moduleName.find("pm") == 0 && amazon && AmazonPMFailover == "y") ) - { - - //restart to get the versionbuffer files closed so it can be unmounted - processManager.restartProcessType("WriteEngineServer", moduleName); - - //set module to enable state - processManager.enableModule(moduleName, oam::AUTO_OFFLINE, true); - - downActiveOAMModule = false; - int retry; - - for ( retry = 0 ; retry < 5 ; retry++ ) - { - try - { - log.writeLog(__LINE__, "Call autoUnMovePmDbroot", LOG_TYPE_DEBUG); - oam.autoUnMovePmDbroot(moduleName); - - //check if any dbroots got assigned back to this module - // they could not be moved if there were busy on other pms - try - { - int moduleID = atoi(moduleName.substr(MAX_MODULE_TYPE_SIZE, MAX_MODULE_ID_SIZE).c_str()); - oam.getPmDbrootConfig(moduleID, dbrootConfigList); - - if ( dbrootConfigList.size() == 0 ) - { - // no dbroots, fail module - log.writeLog(__LINE__, "autoUnMovePmDbroot left no dbroots mounted, failing module restart: " + moduleName, LOG_TYPE_WARNING); - - //Issue an alarm - aManager.sendAlarmReport(moduleName.c_str(), MODULE_DOWN_AUTO, SET); - - //set module to disable state - processManager.disableModule(moduleName, true); - - //call dbrm control - oam.dbrmctl("reload"); - log.writeLog(__LINE__, "'dbrmctl reload' done", LOG_TYPE_DEBUG); - - // resume the dbrm - oam.dbrmctl("resume"); - log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG); - - //clear count - moduleInfoList[moduleName] = 0; - - processManager.setSystemState(oam::ACTIVE); - - //set query system state ready - processManager.setQuerySystemState(true); - - // waiting until dml are ACTIVE - // disableModule is going to trigger DMLProc to restart wait for it - int retry = 0; - while (retry < 30) - { - ProcessStatus DMLprocessstatus; - - try - { - oam.getProcessStatus("DMLProc", config.moduleName(), DMLprocessstatus); - } - catch (exception& ex) - {} - catch (...) - {} - - if (DMLprocessstatus.ProcessOpState == oam::BUSY_INIT) - log.writeLog(__LINE__, "Waiting for DMLProc to finish rollback", LOG_TYPE_DEBUG); - - if (DMLprocessstatus.ProcessOpState == oam::ACTIVE) - break; - - if (DMLprocessstatus.ProcessOpState == oam::FAILED) - break; - - // wait some more - sleep(2); - ++retry; - } - - goto break_case; - } - } - catch (...) - {} - - log.writeLog(__LINE__, "autoUnMovePmDbroot success", LOG_TYPE_DEBUG); - - //distribute config file - processManager.distributeConfigFile("system"); - - break; - } - catch (...) - { - sleep(5); - } - } - - if ( retry == 5 ) - { - log.writeLog(__LINE__, "autoUnMovePmDbroot: Failed. Fail Module", LOG_TYPE_WARNING); - log.writeLog(__LINE__, "System DBRM READ ONLY - Verify dbroot mounts.", LOG_TYPE_WARNING); - //Issue an alarm - aManager.sendAlarmReport(moduleName.c_str(), MODULE_DOWN_AUTO, SET); - - //set module to disable state - processManager.disableModule(moduleName, true); - - // Need to do something here to verify data mounts before resuming - // Best to assume if we reach this you need to put into readonly and verify all dbroots are mounted - - //call dbrm control - oam.dbrmctl("readonly"); - log.writeLog(__LINE__, "'dbrmctl readonly' done", LOG_TYPE_DEBUG); - - //clear count - moduleInfoList[moduleName] = 0; - - processManager.setSystemState(oam::DEGRADED); - - //set query system state ready - processManager.setQuerySystemState(true); - - break; - } - } - else - //set module to enable state - processManager.enableModule(moduleName, oam::AUTO_OFFLINE, true); - - //restart module processes - int retry = 0; - - int ModuleProcMonWaitCount = 12; - - try - { - oam.getSystemConfig("ModuleProcMonWaitCount", ModuleProcMonWaitCount); - } - catch (...) - { - ModuleProcMonWaitCount = 12; - } - - for ( ; retry < ModuleProcMonWaitCount ; retry ++ ) - { - // first, wait until module's ProcMon is ACTIVE - int opState = oam::ACTIVE; - - try - { - ProcessStatus procstat; - oam.getProcessStatus("ProcessMonitor", moduleName, procstat); - opState = procstat.ProcessOpState; - - if (opState != oam::ACTIVE) - { - log.writeLog(__LINE__, "Waiting for Module ProcMon to go ACTIVE: " + moduleName, LOG_TYPE_DEBUG); - sleep(5); - continue; - } - } - catch (exception& ex) - { -// string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: " + error, LOG_TYPE_ERROR); - sleep(5); - continue; - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: Caught unknown exception!", LOG_TYPE_ERROR); - sleep(5); - continue; - } - - //check and assign Elastic IP Address - int AmazonElasticIPCount = 0; - - try - { - oam.getSystemConfig("AmazonElasticIPCount", AmazonElasticIPCount); - } - catch (...) - { - AmazonElasticIPCount = 0; - } - - for ( int id = 1 ; id < AmazonElasticIPCount + 1 ; id++ ) - { - string AmazonElasticModule = "AmazonElasticModule" + oam.itoa(id); - string ELmoduleName; - - try - { - oam.getSystemConfig(AmazonElasticModule, ELmoduleName); - } - catch (...) {} - - if ( ELmoduleName == moduleName ) - { - //match found assign Elastic IP Address - string AmazonElasticIPAddr = "AmazonElasticIPAddr" + oam.itoa(id); - string ELIPaddress; - - try - { - oam.getSystemConfig(AmazonElasticIPAddr, ELIPaddress); - } - catch (...) {} - - try - { - oam.assignElasticIP(hostName, ELIPaddress); - log.writeLog(__LINE__, "Set Elastic IP Address: " + hostName + "/" + ELIPaddress, LOG_TYPE_DEBUG); - } - catch (...) - { - log.writeLog(__LINE__, "Failed to Set Elastic IP Address: " + hostName + "/" + ELIPaddress, LOG_TYPE_ERROR); - } - - break; - } - } - - // next, stopmodule to start up clean - status = processManager.stopModule(moduleName, oam::FORCEFUL, false); - - if ( status == oam::API_SUCCESS ) - { - string newStandbyModule = processManager.getStandbyModule(); - - if ( !newStandbyModule.empty() && newStandbyModule != "NONE") - { - processManager.setStandbyModule(newStandbyModule); - } - else - { - if ( newStandbyModule == "NONE") - if ( moduleName.substr(0, MAX_MODULE_TYPE_SIZE) == "pm" ) - processManager.setStandbyModule(moduleName); - } - - if ((moduleName.find("pm") == 0) && (dbrootConfigList.size() > 0)) - { - DBRootConfigList::iterator pt = dbrootConfigList.begin(); - - // StorageManager: Need to do this for storagemanager as well? What's it doing? - if (( DBRootStorageType == "DataRedundancy") && (*pt == 1)) - { - log.writeLog(__LINE__, "stopModule, " + config.moduleName(), LOG_TYPE_DEBUG); - processManager.stopModule(config.moduleName(), oam::FORCEFUL, false); - processManager.switchParentOAMModule(moduleName); - processManager.stopProcess(config.moduleName(), "ProcessManager", oam::FORCEFUL, true); - break; - } - } - } - else - { - //stop failed, retry - log.writeLog(__LINE__, "stopModule, failed will retry: " + moduleName, LOG_TYPE_DEBUG); - sleep(5); - continue; - } - - // next, startmodule - status = processManager.startModule(moduleName, oam::FORCEFUL, oam::AUTO_OFFLINE); - - if ( status == oam::API_SUCCESS ) - break; - - log.writeLog(__LINE__, "startModule, failed will retry: " + moduleName, LOG_TYPE_DEBUG); - - //sleep and retry all over again - sleep (5); - } // end of the retry loop - - if ( retry < ModuleProcMonWaitCount ) - { - // module successfully started - - //call dbrm control, need to resume before start so the getdbrmfiles halt doesn't hang - oam.dbrmctl("reload"); - log.writeLog(__LINE__, "'dbrmctl reload' done", LOG_TYPE_DEBUG); - - // resume the dbrm - oam.dbrmctl("resume"); - log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG); - - //set recycle process - processManager.recycleProcess(moduleName); - - //distribute config file - processManager.distributeConfigFile("system"); - sleep(1); - - string moduleType = moduleName.substr(0, MAX_MODULE_TYPE_SIZE); - - if ( MySQLRep == "y" ) - { - if ( moduleType == "um" || - ( moduleType == "pm" && config.ServerInstallType() == oam::INSTALL_COMBINE_DM_UM_PM ) || - ( moduleType == "pm" && PMwithUM == "y") ) - { - - //setup MySQL Replication for started modules - - log.writeLog(__LINE__, "Setup MySQL Replication for module recovering from outage on " + moduleName, LOG_TYPE_DEBUG); - DeviceNetworkList devicenetworklist; - DeviceNetworkConfig devicenetworkconfig; - devicenetworkconfig.DeviceName = moduleName; - devicenetworklist.push_back(devicenetworkconfig); - processManager.setMySQLReplication(devicenetworklist, oam::UnassignedName, true); - } - } - - //set query system state ready - processManager.setQuerySystemState(true); - - processManager.setSystemState(oam::ACTIVE); - //clear count - moduleInfoList[moduleName] = 0; - } - else - { - // module failed to restart, place back in disabled state - //Log failure, issue alarm, set moduleOpState - Configuration config; - - //Issue an alarm - aManager.sendAlarmReport(moduleName.c_str(), MODULE_DOWN_AUTO, SET); - - // if pm, move dbroots back to pm - if ( ( moduleName.find("pm") == 0 && !amazon && ( DBRootStorageType != "internal") ) || - ( moduleName.find("pm") == 0 && amazon && downActiveOAMModule ) || - ( moduleName.find("pm") == 0 && amazon && AmazonPMFailover == "y") ) - { - //move dbroots to other modules - try - { - log.writeLog(__LINE__, "Call autoMovePmDbroot", LOG_TYPE_DEBUG); - oam.autoMovePmDbroot(moduleName); - log.writeLog(__LINE__, "autoMovePmDbroot success", LOG_TYPE_DEBUG); - //distribute config file - processManager.distributeConfigFile("system"); - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "EXCEPTION ERROR on autoMovePmDbroot: " + error, LOG_TYPE_DEBUG); - } - catch (...) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on autoMovePmDbroot: Caught unknown exception!", LOG_TYPE_ERROR); - } - } - - //set module to disable state - processManager.disableModule(moduleName, true); - - //call dbrm control - oam.dbrmctl("reload"); - log.writeLog(__LINE__, "'dbrmctl reload' done", LOG_TYPE_DEBUG); - - // resume the dbrm - oam.dbrmctl("resume"); - log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG); - - log.writeLog(__LINE__, "Module failed to auto start: " + moduleName, LOG_TYPE_CRITICAL); - - - if ( amazon ) - processManager.setSystemState(oam::FAILED); - else - processManager.setSystemState(oam::ACTIVE); - - //set query system state ready - processManager.setQuerySystemState(true); - - // waiting until dml are ACTIVE - // disableModule is going to trigger DMLProc to restart wait for it - int retry = 0; - while (retry < 30) - { - ProcessStatus DMLprocessstatus; - - try - { - oam.getProcessStatus("DMLProc", config.moduleName(), DMLprocessstatus); - } - catch (exception& ex) - {} - catch (...) - {} - - if (DMLprocessstatus.ProcessOpState == oam::BUSY_INIT) - log.writeLog(__LINE__, "Waiting for DMLProc to finish rollback", LOG_TYPE_DEBUG); - - if (DMLprocessstatus.ProcessOpState == oam::ACTIVE) - break; - - if (DMLprocessstatus.ProcessOpState == oam::FAILED) - break; - - // wait some more - sleep(2); - ++retry; - } - //clear count - moduleInfoList[moduleName] = 0; - } - } - - break; - - case oam::DOWN: - - // if initial state, skip - if (opState == oam::INITIAL) - break; - - // if disabled and not amazon, skip - if ( (opState == oam::AUTO_DISABLED) && !amazon) - break; - - // if disabled, amazon,and NOT terminated skip - if ( (opState == oam::AUTO_DISABLED) && amazon) - { - // return values = 'ip address' for running or rebooting, stopped or terminated - string currentIPAddr = oam.getEC2InstanceIpAddress(hostName); - - if ( currentIPAddr != "terminated") - break; - } - - log.writeLog(__LINE__, "module failed to respond to pings: " + moduleName, LOG_TYPE_WARNING); - - //bump module ping failure counter - moduleInfoList[moduleName]++; - - if ( moduleName == config.OAMStandbyName() ) - HOTSTANDBYACTIVE = false; - - if (moduleInfoList[moduleName] == ModuleHeartbeatCount) - { - // if LAN OUTAGE ACTIVE,skip module checks - if (LANOUTAGEACTIVE) - break; - - //check if down module is PrimaryUMModuleName - bool downPrimaryUM = false; - string PrimaryUMModuleName; - - try - { - oam.getSystemConfig("PrimaryUMModuleName", PrimaryUMModuleName); - } - catch (...) {} - - if ( PrimaryUMModuleName == moduleName ) - downPrimaryUM = true; - - // if disabled, skip - if (opState != oam::AUTO_DISABLED ) - { - //Log failure, issue alarm, set moduleOpState - Configuration config; - log.writeLog(__LINE__, "*** module is down: " + moduleName, LOG_TYPE_CRITICAL); - - processManager.reinitProcessType("cpimport"); - - // halt the dbrm - oam.dbrmctl("halt"); - log.writeLog(__LINE__, "'dbrmctl halt' done", LOG_TYPE_DEBUG); - - //set query system state not ready - processManager.setQuerySystemState(false); - - processManager.setSystemState(oam::BUSY_INIT); - - // call for a reload in case cpimport was running and - // some cleanup is needed on dbrmcontroller thats active before continuing - oam.dbrmctl("reload"); - log.writeLog(__LINE__, "'dbrmctl reload' done", LOG_TYPE_DEBUG); - - - //send notification - oam.sendDeviceNotification(moduleName, MODULE_DOWN); - - //Issue an alarm - aManager.sendAlarmReport(moduleName.c_str(), MODULE_DOWN_AUTO, SET); - - //mark all processes running on module auto-offline - processManager.setProcessStates(moduleName, oam::AUTO_OFFLINE); - - //set module to disable state - processManager.disableModule(moduleName, false); - - - - // if pm, move dbroots to other pms - if ( ( moduleName.find("pm") == 0 && !amazon && ( DBRootStorageType != "internal") ) || - ( moduleName.find("pm") == 0 && amazon && downActiveOAMModule ) || - ( moduleName.find("pm") == 0 && amazon && AmazonPMFailover == "y") ) - { - string error; - - try - { - log.writeLog(__LINE__, "Call autoMovePmDbroot", LOG_TYPE_DEBUG); - oam.autoMovePmDbroot(moduleName); - log.writeLog(__LINE__, "autoMovePmDbroot success", LOG_TYPE_DEBUG); - //distribute config file - processManager.distributeConfigFile("system"); - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "EXCEPTION ERROR on autoMovePmDbroot: " + error, LOG_TYPE_DEBUG); - } - catch (...) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on autoMovePmDbroot: Caught unknown exception!", LOG_TYPE_ERROR); - } - - if ( error == oam.itoa(oam::API_DETACH_FAILURE) ) - { - processManager.setModuleState(moduleName, oam::AUTO_DISABLED); - - //call dbrm control - oam.dbrmctl("reload"); - log.writeLog(__LINE__, "'dbrmctl reload' done", LOG_TYPE_DEBUG); - // resume the dbrm - oam.dbrmctl("resume"); - log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG); - - //set query system state ready - processManager.setQuerySystemState(true); - - // waiting until dml are ACTIVE - // disableModule is going to trigger DMLProc to restart wait for it - int retry = 0; - while (retry < 30) - { - ProcessStatus DMLprocessstatus; - - try - { - oam.getProcessStatus("DMLProc", config.moduleName(), DMLprocessstatus); - } - catch (exception& ex) - {} - catch (...) - {} - - if (DMLprocessstatus.ProcessOpState == oam::BUSY_INIT) - log.writeLog(__LINE__, "Waiting for DMLProc to finish rollback", LOG_TYPE_DEBUG); - - if (DMLprocessstatus.ProcessOpState == oam::ACTIVE) - break; - - if (DMLprocessstatus.ProcessOpState == oam::FAILED) - break; - - // wait some more - sleep(2); - ++retry; - } - - break; - } - } - } - - // if Cloud Instance - // state = terminate, remove/addmodule to launch new instance - if ( amazon ) - { - if ( moduleName.find("um") == 0 ) - { - //call dbrm control - oam.dbrmctl("reload"); - log.writeLog(__LINE__, "'dbrmctl reload' done", LOG_TYPE_DEBUG); - // resume the dbrm - oam.dbrmctl("resume"); - log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG); - } - - // return values = 'ip address' for running or rebooting, stopped or terminated - string currentIPAddr = oam.getEC2InstanceIpAddress(hostName); - - if ( currentIPAddr == "terminated") - { - //check if down module was Standby OAM, if so find another one - if ( moduleName == config.OAMStandbyName() ) - { - - //set down module ProcessManager to AOS - processManager.setProcessState(moduleName, "ProcessManager", oam::AUTO_OFFLINE, 0); - - //get another standby OAM module - string newStandbyModule = processManager.getStandbyModule(); - - //send message to start new Standby Process-Manager, if needed - if ( !newStandbyModule.empty() && newStandbyModule != "NONE") - { - processManager.setStandbyModule(newStandbyModule); - } - else - { - Config* sysConfig = Config::makeConfig(); - - // clear Standby OAM Module - sysConfig->setConfig("SystemConfig", "StandbyOAMModuleName", oam::UnassignedName); - sysConfig->setConfig("ProcStatusControlStandby", "IPAddr", oam::UnassignedIpAddr); - - //update Calpont Config table - try - { - sysConfig->write(); - } - catch (...) - { - log.writeLog(__LINE__, "ERROR: sysConfig->write", LOG_TYPE_ERROR); - } - } - } - - // remove/addmodule - log.writeLog(__LINE__, "Instance terminated, re-launching: " + hostName, LOG_TYPE_DEBUG); - - // if pm, get assigned dbroots and deattach EBS - DBRootConfigList dbrootConfigList; - int moduleID = atoi(moduleName.substr(MAX_MODULE_TYPE_SIZE, MAX_MODULE_ID_SIZE).c_str()); - - if ( moduleName.find("pm") == 0 ) - { - //get dbroots ids for to PM - try - { - oam.getPmDbrootConfig(moduleID, dbrootConfigList); - } - catch (exception& e) - { - log.writeLog(__LINE__, "ERROR: getPmDbrootConfig error: " + moduleName, LOG_TYPE_DEBUG); - } - } - - DeviceNetworkList devicenetworklist; - DeviceNetworkConfig devicenetworkconfig; - HostConfig hostconfig; - - devicenetworkconfig.DeviceName = moduleName; - - if (cloud == "amazon-vpc") - hostconfig.IPAddr = ipAddr; - else - hostconfig.IPAddr = oam::UnassignedName; - - hostconfig.HostName = oam::UnassignedName; - hostconfig.NicID = 1; - devicenetworkconfig.hostConfigList.push_back(hostconfig); - - devicenetworklist.push_back(devicenetworkconfig); - - bool pass = true; - - for ( int addRetry = 0 ; addRetry < 5 ; addRetry++ ) - { - //remove module - int ret = processManager.removeModule(devicenetworklist, false); - - if ( ret != oam::API_SUCCESS ) - { - log.writeLog(__LINE__, "Instance failed to remove, retry: " + moduleName, LOG_TYPE_DEBUG); - } - else - { - pass = true; - log.writeLog(__LINE__, "Instance removed, module: " + moduleName, LOG_TYPE_DEBUG); - } - - // add module - ret = processManager.addModule(devicenetworklist, "ssh", false); - - if ( ret != oam::API_SUCCESS ) - { - log.writeLog(__LINE__, "Instance failed to add, retry: " + moduleName, LOG_TYPE_CRITICAL); - pass = false; - } - else - { - pass = true; - log.writeLog(__LINE__, "New Instance Launched for " + moduleName, LOG_TYPE_DEBUG); - - // if pm, config and attach EBS - if ( moduleName.find("pm") == 0 && !dbrootConfigList.empty() ) - { - try - { - oam.setPmDbrootConfig(moduleID, dbrootConfigList); - - std::vector dbrootList; - DBRootConfigList::iterator pt1 = dbrootConfigList.begin(); - - for ( ; pt1 != dbrootConfigList.end() ; pt1++) - { - dbrootList.push_back(oam.itoa(*pt1)); - } - - //attach EBS - try - { - oam.amazonReattach(moduleName, dbrootList, true); - pass = true; - break; - } - catch (exception& e) - { - log.writeLog(__LINE__, "ERROR: amazonReattach error on " + moduleName, LOG_TYPE_ERROR); - pass = false; - } - } - catch (exception& e) - { - log.writeLog(__LINE__, "ERROR: setPmDbrootConfig error on " + moduleName, LOG_TYPE_ERROR); - pass = false; - } - } - else - { - pass = true; - break; - } - } - - if (pass) - break; - } - - if (pass) - //Set the module state so it will be brought back up - processManager.setModuleState(moduleName, oam::AUTO_DISABLED); - else - { - //new instance failed to get added - //remove and try auto moving dbroots to other pms - processManager.removeModule(devicenetworklist, false); - - // if pm, move dbroots to other pms - if ( ( moduleName.find("pm") == 0 && !amazon && ( DBRootStorageType != "internal") ) || - ( moduleName.find("pm") == 0 && amazon && downActiveOAMModule ) || - ( moduleName.find("pm") == 0 && amazon && AmazonPMFailover == "y") ) - { - try - { - log.writeLog(__LINE__, "Call autoMovePmDbroot", LOG_TYPE_DEBUG); - oam.autoMovePmDbroot(moduleName); - log.writeLog(__LINE__, "autoMovePmDbroot success", LOG_TYPE_DEBUG); - //distribute config file - processManager.distributeConfigFile("system"); - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "EXCEPTION ERROR on autoMovePmDbroot: " + error, LOG_TYPE_DEBUG); - } - catch (...) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on autoMovePmDbroot: Caught unknown exception!", LOG_TYPE_ERROR); - } - } - - //set reinit process - processManager.reinitProcesses(); - - //set query system state ready - processManager.setQuerySystemState(true); - - sleep(2); - processManager.setSystemState(oam::ACTIVE); - } - } - - if ( ( moduleName.find("pm") == 0 ) && - ( opState != oam::AUTO_DISABLED ) ) - - { - //call dbrm control - oam.dbrmctl("reload"); - log.writeLog(__LINE__, "'dbrmctl reload' done", LOG_TYPE_DEBUG); - // resume the dbrm - oam.dbrmctl("resume"); - log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG); - - //set query system state ready - processManager.setQuerySystemState(true); - } - } - else - { - // non-amazon - //call dbrm control - oam.dbrmctl("reload"); - log.writeLog(__LINE__, "'dbrmctl reload' done", LOG_TYPE_DEBUG); - // resume the dbrm - oam.dbrmctl("resume"); - log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG); - - //set recycle process - // waiting until dml are ACTIVE - // disableModule is going to trigger DMLProc to restart wait for it - int retry = 0; - while (retry < 30) - { - ProcessStatus DMLprocessstatus; - - try - { - oam.getProcessStatus("DMLProc", config.moduleName(), DMLprocessstatus); - } - catch (exception& ex) - {} - catch (...) - {} - - if (DMLprocessstatus.ProcessOpState == oam::BUSY_INIT) - log.writeLog(__LINE__, "Waiting for DMLProc to finish rollback", LOG_TYPE_DEBUG); - - if (DMLprocessstatus.ProcessOpState == oam::ACTIVE) - break; - - if (DMLprocessstatus.ProcessOpState == oam::FAILED) - break; - - // wait some more - sleep(2); - ++retry; - } - // restart DMLProc again to retrigger rollback with all dbroots connected - processManager.restartProcessType("DMLProc"); - //set query system state ready - processManager.setQuerySystemState(true); - } - - //check if down module was Standby OAM, if so find another one - if ( moduleName == config.OAMStandbyName() ) - { - - //set down module ProcessManager to AOS - processManager.setProcessState(moduleName, "ProcessManager", oam::AUTO_OFFLINE, 0); - - //get another standby OAM module - string newStandbyModule = processManager.getStandbyModule(); - - //send message to start new Standby Process-Manager, if needed - if ( !newStandbyModule.empty() && newStandbyModule != "NONE") - { - processManager.setStandbyModule(newStandbyModule); - } - else - { - Config* sysConfig = Config::makeConfig(); - - // clear Standby OAM Module - sysConfig->setConfig("SystemConfig", "StandbyOAMModuleName", oam::UnassignedName); - sysConfig->setConfig("ProcStatusControlStandby", "IPAddr", oam::UnassignedIpAddr); - - //update Calpont Config table - try - { - sysConfig->write(); - } - catch (...) - { - log.writeLog(__LINE__, "ERROR: sysConfig->write", LOG_TYPE_ERROR); - } - } - } - - // reset up mysql rep slaves is master changed - if ( downPrimaryUM && - ( MySQLRep == "y" ) ) - { - //setup MySQL Replication for started modules - log.writeLog(__LINE__, "Setup MySQL Replication for module outage on " + moduleName, LOG_TYPE_DEBUG); - DeviceNetworkList devicenetworklist; - processManager.setMySQLReplication(devicenetworklist); - } - - // if disabled and amazon, break out - if ( (opState == oam::AUTO_DISABLED ) && amazon ) - break; - - //start SIMPLEX runtype processes on a SIMPLEX runtype module - string moduletype = moduleName.substr(0, MAX_MODULE_TYPE_SIZE); - - try - { - oam.getSystemConfig(moduletype, moduletypeconfig); - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR); - } - catch (...) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR); - } - - if ( moduletypeconfig.RunType == SIMPLEX ) - { - DeviceNetworkList::iterator pt = moduletypeconfig.ModuleNetworkList.begin(); - - for ( ; pt != moduletypeconfig.ModuleNetworkList.end() ; pt++) - { - string launchModuleName = (*pt).DeviceName; - string launchModuletype = launchModuleName.substr(0, MAX_MODULE_TYPE_SIZE); - - if ( moduletype != launchModuletype ) - continue; - - //skip if active pm module (local module) - if ( launchModuleName == config.moduleName() ) - continue; - - if ( moduleName != launchModuleName ) - { - //check if module is active before starting any SIMPLEX STANDBY apps - try - { - int launchopState = oam::ACTIVE; - bool degraded; - oam.getModuleStatus(launchModuleName, launchopState, degraded); - - if (launchopState != oam::ACTIVE && launchopState != oam::STANDBY ) - { - continue; - } - } - catch (exception& ex) - { -// string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on : " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": Caught unknown exception!", LOG_TYPE_ERROR); - } - - int status; - log.writeLog(__LINE__, "Starting up STANDBY process on module " + launchModuleName, LOG_TYPE_DEBUG); - - for ( int j = 0 ; j < 20 ; j ++ ) - { - status = processManager.startModule(launchModuleName, oam::FORCEFUL, oam::AUTO_OFFLINE); - - if ( status == API_SUCCESS) - break; - } - - log.writeLog(__LINE__, "pingDeviceThread: ACK received from '" + launchModuleName + "' Process-Monitor, return status = " + oam.itoa(status), LOG_TYPE_DEBUG); - } - } - } - } - - break; - } - } - } //end of for loop - } - break_case: - - // check and take action if LAN outage is flagged - if (LANOUTAGESUPPORT && !LANOUTAGEACTIVE && LOCALNICDOWN) - { - log.writeLog(__LINE__, "LAN Failure detected", LOG_TYPE_CRITICAL); - - oam.sendDeviceNotification(config.moduleName(), START_PM_MASTER_DOWN); - - LANOUTAGEACTIVE = true; - - log.writeLog(__LINE__, "Kill any cpimport running", LOG_TYPE_INFO); - system("pkill -9 cpimport"); - - //request stop of local module - int status = processManager.stopModule(config.moduleName(), oam::FORCEFUL, false); - - if ( status != oam::API_SUCCESS ) - log.writeLog(__LINE__, "stopmodule failed", LOG_TYPE_ERROR); - - //stop snmptrap daemon process - processManager.stopProcess(config.moduleName(), "SNMPTrapDaemon", oam::FORCEFUL, false); - } - else - { - if ( LANOUTAGEACTIVE && HOTSTANDBYACTIVE && !LOCALNICDOWN) - { -// pthread_mutex_unlock(&THREAD_LOCK); - LANOUTAGEACTIVE = false; - - log.writeLog(__LINE__, "LAN Failure recovery"); - - //check if this module still is active according to last know hot standby module - ByteStream msg; - ByteStream::byte requestID = GETPARENTOAMMODULE; - msg << requestID; - - string parentOAMModule = processManager.sendMsgProcMon1( config.OAMStandbyName(), msg, requestID ); - - if ( parentOAMModule == config.moduleName() || - parentOAMModule == "FAILED" ) - { - - //srestart to these guys incase they marked any PrimProcs offline - processManager.restartProcessType("ExeMgr"); - processManager.reinitProcessType("DDLProc"); - processManager.reinitProcessType("DMLProc"); - } - else - { - //send message to local Process Monitor to run coldStandby - ByteStream msg; - ByteStream::byte requestID = OAMPARENTCOLD; - - msg << requestID; - - int returnStatus = processManager.sendMsgProcMon( config.moduleName(), msg, requestID ); - log.writeLog(__LINE__, "sent OAM Parent Cold message to local Process-Monitor, status: " + oam.itoa(returnStatus), LOG_TYPE_DEBUG); - - //request stop of local module - int status = processManager.stopModule(config.moduleName(), oam::INSTALL, false); - - if ( status != oam::API_SUCCESS ) - log.writeLog(__LINE__, "stopmodule failed", LOG_TYPE_ERROR); - } - } - } - - // - // ping ext devices - // - - // read each time to catch updates - systemextdeviceconfig.extdeviceconfig.clear(); - - try - { - oam.getSystemConfig(systemextdeviceconfig); - } - catch (exception& ex) - { - string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR); - } - - for ( unsigned int i = 0 ; i < systemextdeviceconfig.Count ; i++ ) - { - string extDeviceName = systemextdeviceconfig.extdeviceconfig[i].Name; - string ipAddr = systemextdeviceconfig.extdeviceconfig[i].IPAddr; - - int opState = oam::ACTIVE; - - try - { - oam.getExtDeviceStatus(extDeviceName, opState); - } - catch (exception& ex) - { -// string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getExtDeviceStatus: " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getExtDeviceStatus: Caught unknown exception!", LOG_TYPE_ERROR); - } - - cmd = cmdLine + ipAddr + cmdOption; - rtnCode = system(cmd.c_str()); - - switch (WEXITSTATUS(rtnCode)) - { - case 0: - - //Switch Ack ping, Check whether alarm have been issued - if (extDeviceInfoList[extDeviceName] >= ModuleHeartbeatCount) - { - aManager.sendAlarmReport(extDeviceName.c_str(), EXT_DEVICE_DOWN_AUTO, CLEAR); - - } - - extDeviceInfoList[extDeviceName] = 0; - - if (opState != oam::ACTIVE) - { - //Set the switch state to active - processManager.setExtdeviceState(extDeviceName, oam::ACTIVE); - } - - break; - - default: - //extDevice failed to respond to ping - log.writeLog(__LINE__, "extDevice failed to respond to ping: " + extDeviceName, LOG_TYPE_WARNING); - extDeviceInfoList[extDeviceName]++; - - if (extDeviceInfoList[extDeviceName] == ModuleHeartbeatCount) - { - //Log failure, issue alarm, set extDeviceOpState - log.writeLog(__LINE__, "extDevice is down: " + extDeviceName, LOG_TYPE_CRITICAL); - - processManager.setExtdeviceState(extDeviceName, oam::AUTO_OFFLINE); - - //Issue an alarm - aManager.sendAlarmReport(extDeviceName.c_str(), EXT_DEVICE_DOWN_AUTO, SET); - } - - break; - } - } //end of for loop - - // double check to make sure the system status is ACTIVE if all module status's are ACTIVE - try - { - if (dbrm.isDBRMReady()) - { - int systemReady = dbrm.getSystemReady(); // -1 == fail, 0 == not ready, 1 == ready - - if (systemReady > 0) - { - bool updateActive = true; - - for ( unsigned int i = 0 ; i < systemModuleTypeConfig.moduletypeconfig.size(); i++) - { - int moduleCount = systemModuleTypeConfig.moduletypeconfig[i].ModuleCount; - - if ( moduleCount == 0) - continue; - - DeviceNetworkList::iterator pt = systemModuleTypeConfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for ( ; pt != systemModuleTypeConfig.moduletypeconfig[i].ModuleNetworkList.end() ; pt++) - { - string moduleName = (*pt).DeviceName; - - int opState = oam::ACTIVE; - - try - { - bool degraded; - oam.getModuleStatus(moduleName, opState, degraded); - - if (opState == oam::ACTIVE || - opState == oam::DEGRADED || - opState == oam::MAN_DISABLED || - opState == oam::AUTO_DISABLED ) - continue; - - updateActive = false; - } - catch (exception& ex) - { - // string error = ex.what(); - // log.writeLog(__LINE__, "EXCEPTION ERROR on : " + error, LOG_TYPE_ERROR); - } - catch (...) - { - // log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": Caught unknown exception!", LOG_TYPE_ERROR); - } - } - } - - if (updateActive) - { -// log.writeLog(__LINE__, "Modules are ACTIVE, check system state ", LOG_TYPE_DEBUG); - - string PrimaryUMModuleName; - - try - { - oam.getSystemConfig("PrimaryUMModuleName", PrimaryUMModuleName); - } - catch (...) {} - -// log.writeLog(__LINE__, "PrimaryUMModuleName = " + PrimaryUMModuleName, LOG_TYPE_DEBUG); - - ProcessStatus DMLprocessstatus; - - try - { - oam.getProcessStatus("DMLProc", PrimaryUMModuleName, DMLprocessstatus); - } - catch (exception& ex) - { - // string error = ex.what(); - // log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: " + error, LOG_TYPE_ERROR); - } - catch (...) - { - // log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: Caught unknown exception!", LOG_TYPE_ERROR); - } - -// log.writeLog(__LINE__, "DMLPROC STATUS = " + oamState[DMLprocessstatus.ProcessOpState], LOG_TYPE_DEBUG); - - if (DMLprocessstatus.ProcessOpState == oam::ACTIVE) - { - - //set the system status if a change has occurred - SystemStatus systemstatus; - - try - { - oam.getSystemStatus(systemstatus); - } - catch (exception& ex) - { - // string error = ex.what(); - // log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemStatus: " + error, LOG_TYPE_ERROR); - } - catch (...) - { - // log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemStatus: Caught unknown exception!", LOG_TYPE_ERROR); - } - - if ( systemstatus.SystemOpState != oam::ACTIVE ) - { - processManager.setSystemState(oam::ACTIVE); - } - } - - if (DMLprocessstatus.ProcessOpState == oam::BUSY_INIT) - { - - //set the system status if a change has occurred - SystemStatus systemstatus; - - try - { - oam.getSystemStatus(systemstatus); - } - catch (exception& ex) - { - // string error = ex.what(); - // log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemStatus: " + error, LOG_TYPE_ERROR); - } - catch (...) - { - // log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemStatus: Caught unknown exception!", LOG_TYPE_ERROR); - } - - if ( systemstatus.SystemOpState != oam::BUSY_INIT ) - { - processManager.setSystemState(oam::BUSY_INIT); - } - } - } - } - } - } - catch (...) - { - } - - //go sleep for a bit - int sleepTime = ModuleHeartbeatPeriod / 10; - - if (!enableModuleMonitor && systemextdeviceconfig.Count == 0) - sleep(60); - else - sleep(sleepTime); - } - - return; -} - -/****************************************************************************************** -* @brief hdfsActiveAlarmsPushingThread -* -* purpose: Push an image of ActiveAlarms to HDFS for non-OAMParentModule to view. -* -******************************************************************************************/ -static void hdfsActiveAlarmsPushingThread() -{ - boost::filesystem::path filePath(ACTIVE_ALARM_FILE); - boost::filesystem::path dirPath = filePath.parent_path(); - string dirName = boost::filesystem::canonical(dirPath).string(); - - if (boost::filesystem::exists("/etc/pdsh/machines")) - { - string cpCmd = "pdcp -a -x " + localHostName + " " + ACTIVE_ALARM_FILE + " " + dirName + - " > /dev/null 2>&1"; - string rmCmd = "pdsh -a -x " + localHostName + " rm -f " + ACTIVE_ALARM_FILE + - " > /dev/null 2>&1"; - - while (1) - { - if (boost::filesystem::exists(filePath)) - system(cpCmd.c_str()); - else - system(rmCmd.c_str()); - - sleep(ACTIVE_ALARMS_PUSHING_INTERVAL); - } - } - - return; -} - - -/***************************************************************************************** -* @brief Processor Heartbeat Msg Thread -* -* purpose: Read Heartbeat Messages from other Processes -* -*****************************************************************************************/ -/* -static void heartbeatMsgThread() -{ - ProcessLog log; - Configuration config; - ProcessManager processManager(config, log); - - // - //waiting for request - // - ByteStream receivedMSG; - IOSocket fIos; - - for (;;) - { - try - { - MessageQueueServer procmgr("ProcHeartbeatControl"); - for (;;) - { - try - { - fIos = procmgr.accept(); - receivedMSG = fIos.read(); - - if (receivedMSG.length() > 0) { - processManager.processMSG(fIos, receivedMSG); - } - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "EXCEPTION ERROR on ProcHeartbeatControl.accept: " + error, LOG_TYPE_ERROR); - } - catch(...) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on ProcHeartbeatControl.accept: Caught unknown exception!", LOG_TYPE_ERROR); - } - - fIos.close(); - } - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "EXCEPTION ERROR on MessageQueueServer for ProcMgr:" + error, LOG_TYPE_ERROR); - // takes 2 - 4 minites to free sockets, sleep and retry - sleep(60); - } - catch(...) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on MessageQueueServer for ProcHeartbeatControl: Caught unknown exception!", LOG_TYPE_ERROR); - // takes 2 - 4 minites to free sockets, sleep and retry - sleep(60); - } - } - -} -*/ - -/***************************************************************************************** -* @brief Processor Heartbeat Thread -* -* purpose: Check Heartbeat Messages from other Processes -* -*****************************************************************************************/ -/* -static void heartbeatProcessThread() -{ - ProcessLog log; - Configuration config; - ProcessManager processManager(config, log); - Oam oam; - ALARMManager aManager; - - int processHeartbeatPeriod=60; //default value to 60 seconds - - log.writeLog(__LINE__, "Thread Launched: Process Heartbeat!!!"); - - while (true) - { - // - // check and report on register process not sending heartbeats - // - - // get process heartbeat period - try { - oam.getSystemConfig("ProcessHeartbeatPeriod", processHeartbeatPeriod); - processHeartbeatPeriod = processHeartbeatPeriod * 60; - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR); - } - catch(...) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR); - } - - Oam oam; - log.writeLog(__LINE__, "Process Heartbeat check started, Heartbeat period is " + oam.itoa(processHeartbeatPeriod), LOG_TYPE_DEBUG); - - sleep(processHeartbeatPeriod); - - HeartBeatProcList::iterator list = hbproclist.begin(); - for( ; list != hbproclist.end() ; list++) - { - string moduleName = (*list).ModuleName; - string processName = (*list).ProcessName; - int id = (*list).ID; - - // get Process state and only check if ACTIVE - ProcessStatus procstat; - try{ - oam.getProcessStatus(processName, moduleName, procstat); - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: " + error, LOG_TYPE_ERROR); - procstat.ProcessOpState = oam::MAN_OFFLINE; - } - catch(...) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: Caught unknown exception!", LOG_TYPE_ERROR); - procstat.ProcessOpState = oam::MAN_OFFLINE; - } - - if ( procstat.ProcessOpState == oam::ACTIVE ) { - // skip testing if Heartbeat is disable - if( processHeartbeatPeriod != -1 ) { -//log.writeLog(__LINE__, "Heartbeat: Process being monitored: " + moduleName + " / " + processName + " / " + oam.itoa(id), LOG_TYPE_DEBUG); - if ( !(*list).receiveFlag ) { - // got a missing heartbeat, request a restart on the process - log.writeLog(__LINE__, "heartbeatProcessThread: Failure from process " + moduleName + " / " + processName+ " / " + oam.itoa(id), LOG_TYPE_WARNING); - - oam.restartProcess(moduleName, processName, FORCEFUL, ACK_NO); - (*list).receiveFlag = true; - // reset all other entries for this process - HeartBeatProcList::iterator list1 = hbproclist.begin(); - for( ; list1 != hbproclist.end() ; list1++) - { - string moduleName1 = (*list1).ModuleName; - string processName1 = (*list1).ProcessName; - if ( moduleName == moduleName1 && processName == processName1 ) - (*list1).receiveFlag = true; - } - } - else - // reset receive heartbeat indication flag - (*list).receiveFlag = false; - } - else - // heartbeat is disabled - (*list).receiveFlag=true; - } - else - { // registered process not active, remove from list - hbproclist.erase(list); - log.writeLog(__LINE__, "Removing OOS Process from Heartbeat Monitor list: " + moduleName + " / " + processName+ " / " + oam.itoa(id)); - break; - } - } - } // end of while forever loop -} -*/ -// vim:ts=4 sw=4: diff --git a/procmgr/processmanager.cpp b/procmgr/processmanager.cpp deleted file mode 100644 index e0ce3d789..000000000 --- a/procmgr/processmanager.cpp +++ /dev/null @@ -1,11262 +0,0 @@ -/* Copyright (C) 2014 InfiniDB, Inc. - Copyright (C) 2016 MariaDB Corporation - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; version 2 of - the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301, USA. */ - -/****************************************************************************************** -* $Id: processmanager.cpp 2216 2013-08-13 14:34:10Z dhill $ -* -******************************************************************************************/ - -//#define NDEBUG -#include - -#include "columnstoreversion.h" -#include "mcsconfig.h" -#include "processmanager.h" -#include "installdir.h" -#include "dbrm.h" -#include "cacheutils.h" -#include "ddlcleanuputil.h" -#include "IDBFileSystem.h" -#include "IDBDataFile.h" -#include "IDBPolicy.h" -#include - -using namespace cacheutils; - -using namespace std; -using namespace processmanager; -using namespace messageqcpp; -using namespace oam; -using namespace logging; -using namespace alarmmanager; -using namespace config; -using namespace idbdatafile; - -pthread_mutex_t STATUS_LOCK; -pthread_mutex_t THREAD_LOCK; - -extern string cloud; -extern bool amazon; -extern bool runStandby; -extern bool MsgThreadActive; -extern string iface_name; -extern string PMInstanceType; -extern string UMInstanceType; -extern string DataRedundancyConfig; -extern bool rootUser; -extern string USER; -extern bool HDFS; -extern string localHostName; -extern string PMwithUM; -extern string AmazonPMFailover; -extern string tmpLogDir; - -typedef map moduleList; -extern moduleList moduleInfoList; - -bool gOAMParentModuleFlag; - -oam::DeviceNetworkList startdevicenetworklist; - -int upgradethreadStatus = oam::API_SUCCESS; -int startsystemthreadStatus = oam::API_SUCCESS; -int stopsystemthreadStatus = oam::API_SUCCESS; -int startmodulethreadStatus = oam::API_SUCCESS; -bool startsystemthreadStop = false; -bool startsystemthreadRunning = false; -string gdownActiveOAMModule; -vector downModuleList; -bool startFailOver = false; - -string masterLogFile = oam::UnassignedName; -string masterLogPos = oam::UnassignedName; - - -HeartBeatProcList hbproclist; - -namespace processmanager -{ - - -/****************************************************************************************** -* @brief Configuration Constructor -* -* purpose: Configuration Constructor -* -******************************************************************************************/ -Configuration::Configuration() -{ - Oam oam; - oamModuleInfo_t t; - - try - { - t = oam.getModuleInfo(); - flocalModuleName = boost::get<0>(t); - flocalModuleType = boost::get<1>(t); - flocalModuleID = boost::get<2>(t); - fOAMParentModuleName = boost::get<3>(t); - fOAMParentModuleFlag = boost::get<4>(t); - fserverInstallType = boost::get<5>(t); - fOAMStandbyModuleName = boost::get<6>(t); - fOAMStandbyModuleFlag = boost::get<7>(t); - - gOAMParentModuleFlag = boost::get<4>(t); - } - catch (exception& e) - { - cout << endl << "ProcMgr Construct Error = " << e.what() << endl; - exit(-1); - } - -} - -/****************************************************************************************** -* @brief Configuration Destructor# -* -* purpose: Configuration -* -******************************************************************************************/ -Configuration::~Configuration() -{ -} - -/****************************************************************************************** -* @brief getstateInfo -* -* purpose: Return the module opstate tag -* -******************************************************************************************/ -string Configuration::getstateInfo(string moduleName) -{ - return stateInfoList[moduleName]; -} - - -/****************************************************************************************** -* @brief ProcessLog Constructor -* -* purpose: ProcessLog Constructorname -* -******************************************************************************************/ -ProcessLog::ProcessLog() -{ -} - -/****************************************************************************************** -* @brief ProcessLog Destructor -* -* purpose: ProcessLog Destructor -* -******************************************************************************************/ -ProcessLog::~ProcessLog() -{ -} - -/****************************************************************************************** -* @brief writeLog -* -* purpose: Write the message to the log -* -******************************************************************************************/ -void ProcessLog::writeLog(const int lineNumber, const string logContent, const LOG_TYPE logType) -{ - LoggingID lid(17); - MessageLog ml(lid); - Message msg; - Message::Args args; - - if (logType == LOG_TYPE_ERROR) - { - args.add("line:"); - args.add(lineNumber); - } - - args.add(logContent); - - msg.format(args); - - switch (logType) - { - case LOG_TYPE_DEBUG: - try - { - ml.logDebugMessage(msg); - } - catch (...) {} - - break; - - case LOG_TYPE_INFO: - try - { - ml.logInfoMessage(msg); - } - catch (...) {} - - break; - - case LOG_TYPE_WARNING: - try - { - ml.logWarningMessage(msg); - } - catch (...) {} - - break; - - case LOG_TYPE_ERROR: - try - { - ml.logErrorMessage(msg); - } - catch (...) {} - - break; - - case LOG_TYPE_CRITICAL: - try - { - ml.logCriticalMessage(msg); - } - catch (...) {} - - break; - } - - return; -} - -/****************************************************************************************** -* @brief writeLog -* -* purpose: Write the message to the log -* -******************************************************************************************/ -void ProcessLog::writeLog(const int lineNumber, const int logContent, const LOG_TYPE logType) -{ - LoggingID lid(17); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add(logContent); - msg.format(args); - - switch (logType) - { - case LOG_TYPE_DEBUG: - ml.logDebugMessage(msg); - break; - - case LOG_TYPE_INFO: - ml.logInfoMessage(msg); - break; - - case LOG_TYPE_WARNING: - ml.logWarningMessage(msg); - break; - - case LOG_TYPE_ERROR: - args.add("line:"); - args.add(lineNumber); - ml.logErrorMessage(msg); - break; - - case LOG_TYPE_CRITICAL: - ml.logCriticalMessage(msg); - break; - } - - return; -} - -/****************************************************************************************** -* @brief setSysLogData -* -* purpose: Write the message to the log -* -******************************************************************************************/ -void ProcessLog::setSysLogData() -{ - return; -} - -/****************************************************************************************** -* @brief getSysLogData -* -* purpose: return the sysLogData -* -******************************************************************************************/ -string ProcessLog::getSysLogData() -{ - string i; - return i; -} - -/****************************************************************************************** -* @brief writeSystemLog -* -* purpose: log process status change into system log -* -******************************************************************************************/ -void ProcessLog::writeSystemLog() -{ -} - -/****************************************************************************************** -* @brief ProcessManager Constructor -* -* purpose: ProcessManager Constructor -* -******************************************************************************************/ -ProcessManager::ProcessManager(Configuration& aconfig, ProcessLog& alog): config(aconfig), log(alog) -{ -} - -/****************************************************************************************** -* @brief ProcessManager Destructor -* -* purpose: ProcessManager Destructor -* -******************************************************************************************/ -ProcessManager::~ProcessManager() -{ -} - -/****************************************************************************************** -* @brief processMSG -* -* purpose: Process the received message -* -******************************************************************************************/ -//void ProcessManager::processMSG( messageqcpp::IOSocket fIos, messageqcpp::ByteStream msg) -void* processMSG(messageqcpp::IOSocket* cfIos) -{ - messageqcpp::IOSocket fIos = *cfIos; - - pthread_t ThreadId; - ThreadId = pthread_self(); - - ByteStream msg; - - try - { - msg = fIos.read(); - } - catch (...) - { - pthread_detach (ThreadId); - pthread_exit(0); - } - - if (msg.length() <= 0) - { - fIos.close(); - pthread_detach (ThreadId); - pthread_exit(0); - } - - ByteStream::byte msgType; - msg >> msgType; - - Oam oam; - ProcessLog log; -// log.writeLog(__LINE__, "** processMSG msg type: " + oam.itoa(msgType), LOG_TYPE_DEBUG); - - Configuration config; - ProcessManager processManager(config, log); - - ByteStream::byte actionType; - string target; - ByteStream::byte graceful; - ByteStream::byte ackIndicator = 0; - ByteStream::byte manualFlag; - ByteStream ackMsg; - ByteStream::byte status = 0; - - ALARMManager aManager; - SystemModuleTypeConfig systemmoduletypeconfig; - SystemProcessConfig systemprocessconfig; - - try - { - oam.getSystemConfig(systemmoduletypeconfig); - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR); - } - catch (...) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR); - } - - switch (msgType) - { - case REQUEST: - msg >> actionType; - msg >> target; - msg >> graceful; - msg >> ackIndicator; - msg >> manualFlag; - - switch (actionType) - { - case STOPMODULE: - { - uint16_t count, hostConfigCount; - string value; - oam::DeviceNetworkConfig devicenetworkconfig; - oam::DeviceNetworkList devicenetworklist; - - //get module count to remove - msg >> count; - - if ( count > 0 ) - { - - for (int i = 0; i < count; i++) - { - msg >> value; - devicenetworkconfig.DeviceName = value; - msg >> value; - devicenetworkconfig.UserTempDeviceName = value; - msg >> value; - devicenetworkconfig.DisableState = value; - devicenetworklist.push_back(devicenetworkconfig); - msg >> hostConfigCount; - } - - string password; - - msg >> password; - - DeviceNetworkList::iterator listPT = devicenetworklist.begin(); - - for ( ; listPT != devicenetworklist.end() ; listPT++) - { - string moduleName = (*listPT).DeviceName; - - log.writeLog(__LINE__, "MSG RECEIVED: Stop Module request on " + moduleName ); - - string moduletype = moduleName.substr(0, MAX_MODULE_TYPE_SIZE); - status = API_SUCCESS; - - int opState = oam::ACTIVE; - bool degraded; - - try - { - oam.getModuleStatus(moduleName, opState, degraded); - } - catch (exception& ex) - { -// string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": Caught unknown exception!", LOG_TYPE_ERROR); - } - - if (opState == oam::MAN_DISABLED || opState == oam::AUTO_DISABLED) - { - status = API_DISABLED; - log.writeLog(__LINE__, "Stop Module requested Ignored on a Disabled " + moduleName); - } - else - { - status = processManager.stopModule(moduleName, graceful, manualFlag); - log.writeLog(__LINE__, "Stop Module Completed on " + moduleName, LOG_TYPE_INFO); - - Configuration config; - - if ( moduleName == config.OAMStandbyName() ) - { - string newStandbyModule = processManager.getStandbyModule(); - - if ( !newStandbyModule.empty() && newStandbyModule != "NONE") - processManager.setStandbyModule(newStandbyModule); - else - { - Config* sysConfig = Config::makeConfig(); - - // clear Standby OAM Module - sysConfig->setConfig("SystemConfig", "StandbyOAMModuleName", oam::UnassignedName); - sysConfig->setConfig("ProcStatusControlStandby", "IPAddr", oam::UnassignedIpAddr); - - //update Calpont Config table - try - { - sysConfig->write(); - } - catch (...) - { - log.writeLog(__LINE__, "ERROR: sysConfig->write", LOG_TYPE_ERROR); - } - } - } - } - } - } - else - { - status = oam::API_INVALID_PARAMETER; - log.writeLog(__LINE__, "STOPMODULE: Module Count invalid = " + oam.itoa(count)); - } - - log.writeLog(__LINE__, "STOPMODULE: ACK received from Process-Monitor, return status = " + oam.itoa(status)); - - if (ackIndicator) - { - ackMsg << (ByteStream::byte) oam::ACK; - ackMsg << actionType; - ackMsg << status; - - try - { - fIos.write(ackMsg); - } - catch (...) {} - - log.writeLog(__LINE__, "STOPMODULE: ACK back to sender"); - } - - break; - } - - case SHUTDOWNMODULE: - { - uint16_t count, hostConfigCount; - string value; - oam::DeviceNetworkConfig devicenetworkconfig; - oam::DeviceNetworkList devicenetworklist; - - //get module count to remove - msg >> count; - - if ( count > 0 ) - { - - for (int i = 0; i < count; i++) - { - msg >> value; - devicenetworkconfig.DeviceName = value; - msg >> value; - devicenetworkconfig.UserTempDeviceName = value; - msg >> value; - devicenetworkconfig.DisableState = value; - devicenetworklist.push_back(devicenetworkconfig); - msg >> hostConfigCount; - } - - string password; - - msg >> password; - - DeviceNetworkList::iterator listPT = devicenetworklist.begin(); - - for ( ; listPT != devicenetworklist.end() ; listPT++) - { - string moduleName = (*listPT).DeviceName; - - log.writeLog(__LINE__, "MSG RECEIVED: Shutdown Module request on " + moduleName ); - - status = API_SUCCESS; - - log.writeLog(__LINE__, "Shutdown Module Requested on " + moduleName, LOG_TYPE_INFO); - processManager.shutdownModule(moduleName, graceful, manualFlag, 0); - - //check for SIMPLEX Processes on mate might need to be started - processManager.checkSimplexModule(moduleName); - - Configuration config; - - if ( moduleName == config.OAMStandbyName() ) - { - string newStandbyModule = processManager.getStandbyModule(); - - if ( !newStandbyModule.empty() && newStandbyModule != "NONE") - processManager.setStandbyModule(newStandbyModule); - } - } - } - else - { - status = oam::API_INVALID_PARAMETER; - log.writeLog(__LINE__, "SHUTDOWNMODULE: Module Count invalid = " + oam.itoa(count)); - } - - if (ackIndicator) - { - ackMsg << (ByteStream::byte) oam::ACK; - ackMsg << actionType; - ackMsg << status; - - try - { - fIos.write(ackMsg); - } - catch (...) {} - - log.writeLog(__LINE__, "SHUTDOWNMODULE: ACK back to sender, return status = " + oam.itoa(status)); - } - - break; - } - - case STARTMODULE: - { - log.writeLog(__LINE__, "MSG RECEIVED: Start Module request" ); - - startsystemthreadStop = false; - - uint16_t count, hostConfigCount; - string value; - oam::DeviceNetworkConfig devicenetworkconfig; - startdevicenetworklist.clear(); - - //get module count to remove - msg >> count; - - if ( count > 0 ) - { - - string module = oam::UnassignedName; - for (int i = 0; i < count; i++) - { - msg >> value; - devicenetworkconfig.DeviceName = value; - module = value; - msg >> value; - devicenetworkconfig.UserTempDeviceName = value; - msg >> value; - devicenetworkconfig.DisableState = value; - startdevicenetworklist.push_back(devicenetworkconfig); - msg >> hostConfigCount; - } - - string password; - - msg >> password; - - pthread_t startsystemthread; - status = pthread_create (&startsystemthread, NULL, (void* (*)(void*)) &startSystemThread, &startdevicenetworklist); - - if ( status != 0 ) - { - log.writeLog(__LINE__, "STARTMODULE: pthread_create failed, return status = " + oam.itoa(status)); - status = API_FAILURE; - } - - if (status == 0 && ackIndicator) - { - pthread_join(startsystemthread, NULL); - status = startsystemthreadStatus; - } - - if ( status == API_SUCCESS) - { - processManager.setSystemState(oam::BUSY_INIT); - - //set query system state not ready - processManager.setQuerySystemState(false); - - //set recycle process - processManager.recycleProcess(target, true); - - //distribute config file - processManager.distributeConfigFile("system"); - - //set query system state ready - processManager.setQuerySystemState(true); - - processManager.setSystemState(oam::ACTIVE); - } - } - else - { - status = oam::API_INVALID_PARAMETER; - log.writeLog(__LINE__, "STARTMODULE: Module Count invalid = " + oam.itoa(count)); - } - - log.writeLog(__LINE__, "STARTMODULE: ACK received from Process-Monitor, return status = " + oam.itoa(status)); - - if (ackIndicator) - { - ackMsg << (ByteStream::byte) oam::ACK; - ackMsg << actionType; - ackMsg << status; - - try - { - fIos.write(ackMsg); - } - catch (...) {} - - log.writeLog(__LINE__, "STARTMODULE: ACK back to sender"); - } - - break; - } - - case RESTARTMODULE: - { - uint16_t count, hostConfigCount; - string value; - oam::DeviceNetworkConfig devicenetworkconfig; - startdevicenetworklist.clear(); - - startsystemthreadStop = false; - - //get module count to remove - msg >> count; - - if ( count > 0 ) - { - - for (int i = 0; i < count; i++) - { - msg >> value; - devicenetworkconfig.DeviceName = value; - msg >> value; - devicenetworkconfig.UserTempDeviceName = value; - msg >> value; - devicenetworkconfig.DisableState = value; - startdevicenetworklist.push_back(devicenetworkconfig); - msg >> hostConfigCount; - } - - string password; - - msg >> password; - - DeviceNetworkList::iterator listPT = startdevicenetworklist.begin(); - - for ( ; listPT != startdevicenetworklist.end() ; listPT++) - { - string moduleName = (*listPT).DeviceName; - - log.writeLog(__LINE__, "MSG RECEIVED: Restart Module request on " + moduleName ); - status = API_SUCCESS; - - int opState = oam::ACTIVE; - bool degraded; - - try - { - oam.getModuleStatus(moduleName, opState, degraded); - } - catch (exception& ex) - { -// string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": Caught unknown exception!", LOG_TYPE_ERROR); - } - - if (opState != oam::MAN_DISABLED) - { - - status = processManager.stopModule(moduleName, graceful, manualFlag); - - log.writeLog(__LINE__, "Stop Module Completed on " + moduleName, LOG_TYPE_INFO); - - Configuration config; - - if ( moduleName == config.OAMStandbyName() ) - { - string newStandbyModule = processManager.getStandbyModule(); - - if ( !newStandbyModule.empty() && newStandbyModule != "NONE") - processManager.setStandbyModule(newStandbyModule); - } - } - else - { - status = API_DISABLED; - log.writeLog(__LINE__, "Stop Module requested Ignored on a Disabled " + moduleName); - } - } - - pthread_t startsystemthread; - status = pthread_create (&startsystemthread, NULL, (void* (*)(void*)) &startSystemThread, &startdevicenetworklist); - - if ( status != 0 ) - { - log.writeLog(__LINE__, "RESTARTMODULE: pthread_create failed, return status = " + oam.itoa(status)); - status = API_FAILURE; - } - - if (status == 0 && ackIndicator) - { - pthread_join(startsystemthread, NULL); - status = startsystemthreadStatus; - } - - if ( status == API_SUCCESS) - { - //distribute config file - processManager.distributeConfigFile("system"); - - processManager.restartProcessType("ExeMgr"); - } - } - else - { - status = oam::API_INVALID_PARAMETER; - log.writeLog(__LINE__, "RESTARTMODULE: Module Count invalid = " + oam.itoa(count)); - } - - log.writeLog(__LINE__, "RESTARTMODULE: ACK received from Process-Monitor, return status = " + oam.itoa(status)); - - if (ackIndicator) - { - ackMsg << (ByteStream::byte) oam::ACK; - ackMsg << actionType; - ackMsg << (ByteStream::byte) status; - - try - { - fIos.write(ackMsg); - } - catch (...) {} - - log.writeLog(__LINE__, "RESTARTMODULE: ACK back to sender"); - } - - break; - } - - case DISABLEMODULE: - { - uint16_t count, hostConfigCount; - string value; - oam::DeviceNetworkConfig devicenetworkconfig; - oam::DeviceNetworkList devicenetworklist; - - //get module count to remove - msg >> count; - - if ( count > 0 ) - { - - for (int i = 0; i < count; i++) - { - msg >> value; - devicenetworkconfig.DeviceName = value; - msg >> value; - devicenetworkconfig.UserTempDeviceName = value; - msg >> value; - devicenetworkconfig.DisableState = value; - devicenetworklist.push_back(devicenetworkconfig); - msg >> hostConfigCount; - } - - string password; - - msg >> password; - - DeviceNetworkList::iterator listPT = devicenetworklist.begin(); - - for ( ; listPT != devicenetworklist.end() ; listPT++) - { - string moduleName = (*listPT).DeviceName; - - log.writeLog(__LINE__, "MSG RECEIVED: Disable Module request on " + moduleName ); - - // check module status, Disable module - int opState = oam::ACTIVE; - bool degraded; - - try - { - oam.getModuleStatus(moduleName, opState, degraded); - } - catch (exception& ex) - { -// string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": Caught unknown exception!", LOG_TYPE_ERROR); - } - - //don't allow disble of current Parent OAM Module - if ( moduleName == config.moduleName() ) - { - log.writeLog(__LINE__, "ERROR: can't disable Parent OAM module", LOG_TYPE_ERROR); - status = API_INVALID_PARAMETER; - break; - } - - if (opState == oam::MAN_OFFLINE || opState == oam::MAN_DISABLED - || opState == oam::AUTO_DISABLED || opState == oam::AUTO_OFFLINE) - { - - processManager.setSystemState(oam::BUSY_INIT); - - //set query system state not ready - processManager.setQuerySystemState(false); - - status = processManager.disableModule(moduleName, true); - log.writeLog(__LINE__, "Disable Module Completed on " + moduleName, LOG_TYPE_INFO); - - //check for SIMPLEX Processes on mate might need to be started - processManager.checkSimplexModule(moduleName); - - processManager.setSystemState(oam::ACTIVE); - - //set query system state ready - processManager.setQuerySystemState(true); - } - else - { - log.writeLog(__LINE__, "ERROR: module not stopped, state = " + oam.itoa(opState), LOG_TYPE_ERROR); - status = API_FAILURE; - break; - } - } - } - else - { - status = oam::API_INVALID_PARAMETER; - log.writeLog(__LINE__, "DISABLEMODULE: Module Count invalid = " + oam.itoa(count)); - } - - log.writeLog(__LINE__, "DISABLEMODULE: ACK received from Process-Monitor, return status = " + oam.itoa(status)); - - if (ackIndicator) - { - ackMsg << (ByteStream::byte) oam::ACK; - ackMsg << actionType; - ackMsg << status; - - try - { - fIos.write(ackMsg); - } - catch (...) {} - - log.writeLog(__LINE__, "DISABLEMODULE: ACK back to sender"); - } - - break; - } - - case ENABLEMODULE: - { - uint16_t count, hostConfigCount; - string value; - oam::DeviceNetworkConfig devicenetworkconfig; - oam::DeviceNetworkList devicenetworklist; - - //get module count to remove - msg >> count; - - if ( count > 0 ) - { - - for (int i = 0; i < count; i++) - { - msg >> value; - devicenetworkconfig.DeviceName = value; - msg >> value; - devicenetworkconfig.UserTempDeviceName = value; - msg >> value; - devicenetworkconfig.DisableState = value; - devicenetworklist.push_back(devicenetworkconfig); - msg >> hostConfigCount; - } - - string password; - - msg >> password; - - DeviceNetworkList::iterator listPT = devicenetworklist.begin(); - - // do stopmodule then enable - for ( ; listPT != devicenetworklist.end() ; listPT++) - { - string moduleName = (*listPT).DeviceName; - - log.writeLog(__LINE__, "MSG RECEIVED: Enable Module request on " + moduleName ); - - int opState = oam::ACTIVE; - bool degraded; - - try - { - oam.getModuleStatus(moduleName, opState, degraded); - } - catch (exception& ex) - { -// string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": Caught unknown exception!", LOG_TYPE_ERROR); - } - - if (opState == oam::MAN_DISABLED) - { - processManager.stopModule(moduleName, graceful, manualFlag); - log.writeLog(__LINE__, "stop Module Completed on " + moduleName, LOG_TYPE_INFO); - - status = processManager.enableModule(moduleName, oam::MAN_OFFLINE); - log.writeLog(__LINE__, "Enable Module Completed on " + moduleName, LOG_TYPE_INFO); - } - else - { - log.writeLog(__LINE__, "ERROR: module name not Disabled", LOG_TYPE_ERROR); - status = API_INVALID_STATE; - break; - } - } - } - else - { - status = oam::API_INVALID_PARAMETER; - log.writeLog(__LINE__, "ENABLEMODULE: Module Count invalid = " + oam.itoa(count)); - } - - log.writeLog(__LINE__, "ENABLEMODULE: ACK received from Process-Monitor, return status = " + oam.itoa(status)); - - if (ackIndicator) - { - ackMsg << (ByteStream::byte) oam::ACK; - ackMsg << actionType; - ackMsg << status; - - try - { - fIos.write(ackMsg); - } - catch (...) {} - - log.writeLog(__LINE__, "ENABLEMODULE: ACK back to sender"); - } - - break; - } - - case STOPSYSTEM: - { - log.writeLog(__LINE__, "MSG RECEIVED: Stop System request..." ); - - // GRACEFUL_WAIT means that we are shutting down, but waiting for - // all transactions to finish or rollback as commanded. This is only set if - // there are, in fact, transactions active (or cpimport). - if (graceful == GRACEFUL_WAIT) - { - ByteStream stillWorkingMsg; - stillWorkingMsg << (ByteStream::byte) oam::ACK; - stillWorkingMsg << actionType; - stillWorkingMsg << target; - stillWorkingMsg << (ByteStream::byte) API_STILL_WORKING; - - // This wait can take a while. We wait for table locks to release and open transactions to commit. - if (oam.waitForSystem(STOPSYSTEM, fIos, stillWorkingMsg)) - { - graceful = GRACEFUL; // ProcMonitor doesn't know GRACEFUL_WAIT. - // Send an ack back to say we're done waiting and are now shutting down. - ackMsg << (ByteStream::byte) oam::ACK; - ackMsg << actionType; - ackMsg << target; - ackMsg << (ByteStream::byte) API_TRANSACTIONS_COMPLETE; - - try - { - fIos.write(ackMsg); - } - catch (...) {} - - log.writeLog(__LINE__, "STOPSYSTEM: ACK transactions complete back to sender, return status = " + oam.itoa(API_TRANSACTIONS_COMPLETE)); - } - else - { - // We've been cancelled. - if (ackIndicator) - { - ackMsg << (ByteStream::byte) oam::ACK; - ackMsg << actionType; - ackMsg << target; - ackMsg << (ByteStream::byte) API_CANCELLED; - - try - { - fIos.write(ackMsg); - } - catch (...) {} - - log.writeLog(__LINE__, "STOPSYSTEM: ACK back to sender (canceled)"); - break; - } - } - } - - //set the flag to have any startsystemthreads to exit out before stop is done - startsystemthreadStop = true; - - if ( startsystemthreadRunning ) - sleep(5); - - //stop by process type first, if system is ACTIVE - SystemStatus systemstatus; - - try - { - oam.getSystemStatus(systemstatus); - } - catch (...) - {} - - //set system status - processManager.setSystemState(oam::MAN_INIT); - - if (HDFS) - { - oam::DeviceNetworkList devicenetworklist; - pthread_t stopsystemthread; - status = pthread_create (&stopsystemthread, NULL, (void* (*)(void*)) &stopSystemThread, &devicenetworklist); - - if ( status != 0 ) - { - log.writeLog(__LINE__, "STOPSYSTEMS: pthread_create failed, return status = " + oam.itoa(status)); - status = API_FAILURE; - } - - if (status == 0 && ackIndicator) - { - ackMsg << (ByteStream::byte) oam::ACK; - ackMsg << actionType; - ackMsg << target; - ackMsg << (ByteStream::byte) status; - - try - { - fIos.write(ackMsg); - } - catch (...) {} - - log.writeLog(__LINE__, "STOPSYSTEM: ACK back to sender"); - } - - break; - } - - //call to update module status and send notification message - for ( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++) - { - int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount; - - if ( moduleCount == 0) - continue; - - DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end(); pt++) - { - int opState = oam::ACTIVE; - bool degraded; - - try - { - oam.getModuleStatus((*pt).DeviceName, opState, degraded); - } - catch (exception& ex) - { -// string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + (*pt).DeviceName + ": " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + (*pt).DeviceName + ": Caught unknown exception!", LOG_TYPE_ERROR); - } - - if (opState == oam::MAN_DISABLED || opState == oam::AUTO_DISABLED) - continue; - - processManager.stopModule((*pt).DeviceName, STATUS_UPDATE, manualFlag, 0); - } - } - - //set query system state not ready - processManager.setQuerySystemState(false); - - if (systemstatus.SystemOpState == ACTIVE && graceful == oam::GRACEFUL) - processManager.stopProcessTypes(manualFlag); - - //stop all of processes.. - for ( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++) - { - int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount; - - if ( moduleCount == 0) - continue; - - DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end(); pt++) - { - //skip OAM Parent module, do at the end - if ( (*pt).DeviceName == config.moduleName() ) - continue; - - int opState = oam::ACTIVE; - bool degraded; - - try - { - oam.getModuleStatus((*pt).DeviceName, opState, degraded); - } - catch (exception& ex) - { -// string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + (*pt).DeviceName + ": " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + (*pt).DeviceName + ": Caught unknown exception!", LOG_TYPE_ERROR); - } - - if (opState == oam::MAN_DISABLED || opState == oam::AUTO_DISABLED) - continue; - - log.writeLog(__LINE__, "STOPSYSTEM: Request Stop Module on " + (*pt).DeviceName ); - -// int retStatus = processManager.stopModule((*pt).DeviceName, graceful, manualFlag, 0); - processManager.stopModule((*pt).DeviceName, graceful, manualFlag, 0); - -// log.writeLog(__LINE__, "STOPSYSTEM: ACK received from Process-Monitor, return status = " + oam.itoa(status)); -// if (retStatus != API_SUCCESS) -// status = retStatus; - } - } - - //wait until all child modules are offline or A FAILURE HAS OCCURRED - bool failure = false; - bool stopped = true; - - for ( int retry = 0 ; retry < 30 ; retry++ ) - { - sleep(1); - stopped = true; - - for ( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++) - { - int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount; - - if ( moduleCount == 0) - continue; - - DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end() ; pt++) - { - string moduleName = (*pt).DeviceName; - - //skip OAM Parent module, do at the end - if ( moduleName == config.moduleName() ) - continue; - - int opState = oam::ACTIVE; - - try - { - bool degraded; - oam.getModuleStatus(moduleName, opState, degraded); - - if (opState == oam::FAILED) - { - failure = true; - log.writeLog(__LINE__, "STOPSYSTEM: Failed, failure on module " + moduleName, LOG_TYPE_ERROR); - break; - } - - if (opState == oam::MAN_OFFLINE || - opState == oam::MAN_DISABLED || - opState == oam::AUTO_DISABLED ) - continue; - - stopped = false; - } - catch (exception& ex) - { -// string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on : " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": Caught unknown exception!", LOG_TYPE_ERROR); - } - } - - if ( failure ) - break; - } - - if ( failure) - break; - - if ( stopped ) - break; - } - - if ( failure ) - { - processManager.setSystemState(oam::FAILED); - } - else - { - if ( !stopped) - { - //timeout waiting for system to stop, error out - log.writeLog(__LINE__, "STOPSYSTEM: Failed, timeout waiting for module to stop", LOG_TYPE_ERROR); - processManager.setSystemState(oam::FAILED); - } - else - { - /* XXXPAT: saveBRM requires StorageManager being up at the time. - A couple options. 1) start/stop SM around saveBRM(). Will work but it means SM would go - down-up-down for this single operation. 2) add a special path to stopModule() - to NOT stop SM in the first call, then after saveBRM(), stop SM. - - Neither option is great. The least invasive is option 1, so going with that - for now. - */ - - //now stop local module - processManager.stopModule(config.moduleName(), graceful, manualFlag ); - - //run save brm script - string storageType = Config::makeConfig()->getConfig("Installation", "DBRootStorageType"); - if (storageType == "storagemanager") - processManager.startProcess(config.moduleName(), "StorageManager", FORCEFUL); - - processManager.saveBRM(false); - - if (storageType == "storagemanager") - processManager.stopProcess(config.moduleName(), "StorageManager", GRACEFUL, false); - - log.writeLog(__LINE__, "Stop System Completed Success", LOG_TYPE_INFO); - - processManager.setSystemState(oam::MAN_OFFLINE); - - //clearout auto move dbroots files - string cmd = "rm -f /var/lib/columnstore/local/moveDbrootTransactionLog"; - system(cmd.c_str()); - cmd = "touch /var/lib/columnstore/local/moveDbrootTransactionLog"; - system(cmd.c_str()); - } - } - - if (ackIndicator) - { - ackMsg.reset(); - ackMsg << (ByteStream::byte) oam::ACK; - ackMsg << actionType; - ackMsg << target; - ackMsg << (ByteStream::byte) API_SUCCESS; - fIos.write(ackMsg); - - log.writeLog(__LINE__, "STOPSYSTEM: ACK back to sender"); - } - - //set query system state ready - processManager.setQuerySystemState(true); - - startsystemthreadStop = false; - - break; - } - - case SHUTDOWNSYSTEM: - { - log.writeLog(__LINE__, "MSG RECEIVED: Shutdown System request..." ); - - // GRACEFUL_WAIT means that we are shutting down, but waiting for - // all transactions to finish or rollback as commanded. This is only set if - // there are, in fact, transactions active (or cpimport). - - //int retStatus = oam::API_SUCCESS; - - if (HDFS) - { - if (ackIndicator) - { - ackMsg.reset(); - ackMsg << (ByteStream::byte) oam::ACK; - ackMsg << actionType; - ackMsg << target; - ackMsg << (ByteStream::byte) status; - - try - { - fIos.write(ackMsg); - } - catch (...) {} - - log.writeLog(__LINE__, "SHUTDOWNSYSTEM: ACK back to sender, return status = " + oam.itoa(API_SUCCESS)); - } - - Config* sysConfig = Config::makeConfig(); - - // clear Standby OAM Module - sysConfig->setConfig("SystemConfig", "StandbyOAMModuleName", oam::UnassignedName); - sysConfig->setConfig("ProcStatusControlStandby", "IPAddr", oam::UnassignedIpAddr); - - //update Calpont Config table - try - { - sysConfig->write(); - } - catch (...) - { - log.writeLog(__LINE__, "ERROR: sysConfig->write", LOG_TYPE_ERROR); - } - - string cmd = "pdsh -a -x " + localHostName + " 'columnstore stop' > /dev/null 2>&1"; - system(cmd.c_str()); - - break; - } - else - { - for ( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++) - { - int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount; - - if ( moduleCount == 0) - continue; - - DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end(); pt++) - { - //do local module last - if ( (*pt).DeviceName == config.moduleName() ) - { - continue; - } - - int opState = oam::ACTIVE; - bool degraded; - - try - { - oam.getModuleStatus((*pt).DeviceName, opState, degraded); - } - catch (exception& ex) - { -// string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + (*pt).DeviceName + ": " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + (*pt).DeviceName + ": Caught unknown exception!", LOG_TYPE_ERROR); - } - - if (opState == oam::MAN_DISABLED || opState == oam::AUTO_DISABLED) - continue; - - processManager.shutdownModule((*pt).DeviceName, graceful, manualFlag, 0); - } - } - } - - if (ackIndicator) - { - ackMsg.reset(); - ackMsg << (ByteStream::byte) oam::ACK; - ackMsg << actionType; - ackMsg << target; - ackMsg << (ByteStream::byte) status; - - try - { - fIos.write(ackMsg); - } - catch (...) {} - - log.writeLog(__LINE__, "SHUTDOWNSYSTEM: ACK back to sender, return status = " + oam.itoa(API_SUCCESS)); - } - - Config* sysConfig = Config::makeConfig(); - - // clear Standby OAM Module - sysConfig->setConfig("SystemConfig", "StandbyOAMModuleName", oam::UnassignedName); - sysConfig->setConfig("ProcStatusControlStandby", "IPAddr", oam::UnassignedIpAddr); - - //update Calpont Config table - try - { - sysConfig->write(); - } - catch (...) - { - log.writeLog(__LINE__, "ERROR: sysConfig->write", LOG_TYPE_ERROR); - } - - //clearout auto move dbroots files - string cmd = "rm -f /var/lib/columnstore/local/moveDbrootTransactionLog"; - system(cmd.c_str()); - cmd = "touch /var/lib/columnstore/local/moveDbrootTransactionLog"; - system(cmd.c_str()); - - //clear shared memory - cmd = "clearShm > /dev/null 2>&1"; - int rtnCode = system(cmd.c_str()); - - if (WEXITSTATUS(rtnCode) != 1) - log.writeLog(__LINE__, "Successfully ran DBRM clearShm", LOG_TYPE_DEBUG); - else - log.writeLog(__LINE__, "Error running DBRM clearShm", LOG_TYPE_ERROR); - - // now do local module - processManager.shutdownModule(config.moduleName(), graceful, manualFlag); - - break; - } - - case STARTSYSTEM: - { - log.writeLog(__LINE__, "MSG RECEIVED: Start System request...ackIndicator=" + oam.itoa(ackIndicator)); - - startsystemthreadStop = false; - - // get system status and don't process if already in-progress - try - { - SystemStatus systemstatus; - oam.getSystemStatus(systemstatus); - - if (systemstatus.SystemOpState == MAN_INIT) - { - log.writeLog(__LINE__, "STARTSYSTEM: Start already in-progess"); - - if (ackIndicator) - { - ackMsg << (ByteStream::byte) oam::ACK; - ackMsg << actionType; - ackMsg << target; - ackMsg << (ByteStream::byte) API_ALREADY_IN_PROGRESS; - - try - { - fIos.write(ackMsg); - } - catch (...) {} - - log.writeLog(__LINE__, "STARTSYSTEM: ACK back to sender"); - } - - break; - } - } - catch (exception& ex) - { -// string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemStatus: " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemStatus: Caught unknown exception!", LOG_TYPE_ERROR); - } - - oam::DeviceNetworkList devicenetworklist; - pthread_t startsystemthread; - status = pthread_create (&startsystemthread, NULL, (void* (*)(void*)) &startSystemThread, &devicenetworklist); - - if ( status != 0 ) - { - log.writeLog(__LINE__, "STARTSYSTEMS: pthread_create failed, return status = " + oam.itoa(status)); - status = API_FAILURE; - } - - if (status == 0 && ackIndicator) - { - pthread_join(startsystemthread, NULL); - status = stopsystemthreadStatus; - - ackMsg << (ByteStream::byte) oam::ACK; - ackMsg << actionType; - ackMsg << target; - ackMsg << (ByteStream::byte) status; - - try - { - fIos.write(ackMsg); - } - catch (...) {} - - log.writeLog(__LINE__, "STARTSYSTEM: ACK back to sender"); - } - - log.writeLog(__LINE__, "STARTSYSTEM: Start System Request Completed with status = " + oam.itoa(status)); - - break; - } - - case RESTARTSYSTEM: - { - log.writeLog(__LINE__, "MSG RECEIVED: Restart System request..." ); - - startsystemthreadStop = false; - - // GRACEFUL_WAIT means that we are shutting down, but waiting for - // all transactions to finish or rollback as commanded. This is only set if - // there are, in fact, transactions active (or cpimport). - if (graceful == GRACEFUL_WAIT) - { - ByteStream stillWorkingMsg; - stillWorkingMsg << (ByteStream::byte) oam::ACK; - stillWorkingMsg << actionType; - stillWorkingMsg << target; - stillWorkingMsg << (ByteStream::byte) API_STILL_WORKING; - - // This wait can take a while. We wait for table locks to release and open transactions to commit. - if (oam.waitForSystem(RESTARTSYSTEM, fIos, stillWorkingMsg)) - { - graceful = GRACEFUL; // ProcMonitor doesn't know GRACEFUL_WAIT. - // Send an ack back to say we're done waiting and are now shutting down. - ackMsg << (ByteStream::byte) oam::ACK; - ackMsg << actionType; - ackMsg << target; - ackMsg << (ByteStream::byte) API_TRANSACTIONS_COMPLETE; - - try - { - fIos.write(ackMsg); - } - catch (...) {} - - log.writeLog(__LINE__, "RESTARTSYSTEM: ACK transactions complete back to sender, return status = " + oam.itoa(API_TRANSACTIONS_COMPLETE)); - } - else - { - // We've been cancelled. - if (ackIndicator) - { - ackMsg << (ByteStream::byte) oam::ACK; - ackMsg << actionType; - ackMsg << target; - ackMsg << (ByteStream::byte) API_CANCELLED; - - try - { - fIos.write(ackMsg); - } - catch (...) {} - - log.writeLog(__LINE__, "RESTARTSYSTEM: ACK back to sender (canceled)"); - break; - } - } - } - - //set the flag to have any startsystemthreads to exit out before stop is done - startsystemthreadStop = true; - - if ( startsystemthreadRunning ) - sleep(5); - - //get system status - SystemStatus systemstatus; - - try - { - oam.getSystemStatus(systemstatus); - } - catch (...) - {} - - //set system status - processManager.setSystemState(oam::MAN_OFFLINE); - - //call to update module status and send notification message - //stop all of processes.. - for ( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++) - { - int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount; - - if ( moduleCount == 0) - continue; - - DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end(); pt++) - { - int opState = oam::ACTIVE; - bool degraded; - - try - { - oam.getModuleStatus((*pt).DeviceName, opState, degraded); - } - catch (exception& ex) - { -// string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + (*pt).DeviceName + ": " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + (*pt).DeviceName + ": Caught unknown exception!", LOG_TYPE_ERROR); - } - - if (opState == oam::MAN_DISABLED || opState == oam::AUTO_DISABLED) - continue; - - processManager.stopModule((*pt).DeviceName, STATUS_UPDATE, manualFlag); - } - } - - //stop by process type first, if system is ACTIVE - if (systemstatus.SystemOpState == ACTIVE) - processManager.stopProcessTypes(manualFlag); - - status = API_SUCCESS; - - // stop modules - for ( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++) - { - int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount; - - if ( moduleCount == 0) - continue; - - DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end(); pt++) - { - //skip OAM Parent module, do at the end - if ( (*pt).DeviceName == config.moduleName() ) - continue; - - int opState = oam::ACTIVE; - bool degraded; - - try - { - oam.getModuleStatus((*pt).DeviceName, opState, degraded); - } - catch (exception& ex) - { -// string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + (*pt).DeviceName + ": " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + (*pt).DeviceName + ": Caught unknown exception!", LOG_TYPE_ERROR); - } - - if (opState == oam::MAN_DISABLED || opState == oam::AUTO_DISABLED) - continue; - - log.writeLog(__LINE__, "RESTARTSYSTEM: Request Stop Module on " + (*pt).DeviceName ); - - int retStatus = processManager.stopModule((*pt).DeviceName, graceful, manualFlag); - - log.writeLog(__LINE__, "RESTARTSYSTEM: ACK received from Process-Monitor, return status = " + oam.itoa(status)); - - if (retStatus != API_SUCCESS) - status = retStatus; - } - } - - //now stop local module - processManager.stopModule(config.moduleName(), graceful, manualFlag ); - - //run save.brm script - string storageType = Config::makeConfig()->getConfig("Installation", "DBRootStorageType"); - if (storageType == "storagemanager") - processManager.startProcess(config.moduleName(), "StorageManager", FORCEFUL); - - processManager.saveBRM(false); - - if (storageType == "storagemanager") - processManager.stopProcess(config.moduleName(), "StorageManager", GRACEFUL, false); - - - log.writeLog(__LINE__, "RESTARTSYSTEM: ACK received from Process-Monitor for stopModule requests, return status = " + oam.itoa(status)); - - startsystemthreadStop = false; - - if (status == API_SUCCESS ) - { - //distribute config file - processManager.distributeConfigFile("system"); - - oam::DeviceNetworkList devicenetworklist; - pthread_t startsystemthread; - status = pthread_create (&startsystemthread, NULL, (void*(*)(void*)) &startSystemThread, &devicenetworklist); - - if ( status != 0 ) - { - log.writeLog(__LINE__, "STARTMODULE: pthread_create failed, return status = " + oam.itoa(status)); - status = API_FAILURE; - } - - if (status == 0 && ackIndicator) - { - pthread_join(startsystemthread, NULL); - status = startsystemthreadStatus; - } - - // setup MySQL Replication after FORCE restart command - if ( (status == API_SUCCESS) && - (graceful == oam::FORCEFUL) ) - { - log.writeLog(__LINE__, "Setup MySQL Replication for restartSystem FORCE", LOG_TYPE_DEBUG); - oam::DeviceNetworkList devicenetworklist; - processManager.setMySQLReplication(devicenetworklist, oam::UnassignedName, true); - } - - log.writeLog(__LINE__, "RESTARTSYSTEM: Start System Request Completed", LOG_TYPE_INFO); - } - - if (ackIndicator) - { - ackMsg.reset(); - ackMsg << (ByteStream::byte) oam::ACK; - ackMsg << actionType; - ackMsg << target; - ackMsg << (ByteStream::byte) status; - - try - { - fIos.write(ackMsg); - } - catch (...) {} - - log.writeLog(__LINE__, "RESTARTSYSTEM: ACK back to sender"); - } - - log.writeLog(__LINE__, "Restart System Completed, status = " + oam.itoa(status), LOG_TYPE_INFO); - - break; - } - - case STOPPROCESS: - { - log.writeLog(__LINE__, "MSG RECEIVED: Stop Process request on " + target ); - string moduleName; - - msg >> moduleName; - status = API_SUCCESS; - - status = processManager.stopProcess(moduleName, target, graceful, manualFlag); - - log.writeLog(__LINE__, "STOPPROCESS: ACK received from Process-Monitor, return status = " + oam.itoa(status)); - log.writeLog(__LINE__, "Stop Process Completed on " + moduleName + " / " + target, LOG_TYPE_INFO ); - - if (ackIndicator) - { - ackMsg << (ByteStream::byte) oam::ACK; - ackMsg << actionType; - ackMsg << target; - ackMsg << (ByteStream::byte) status; - - try - { - fIos.write(ackMsg); - } - catch (...) {} - - log.writeLog(__LINE__, "STOPPROCESS: ACK back to sender"); - } - - break; - } - - case STARTPROCESS: - { - log.writeLog(__LINE__, "MSG RECEIVED: Start Process request on " + target); - string moduleName; - - msg >> moduleName; - - status = processManager.startProcess(moduleName, target, graceful); - - log.writeLog(__LINE__, "STARTPROCESS: ACK received from Process-Monitor, return status = " + oam.itoa(status)); - log.writeLog(__LINE__, "Start Process Completed on " + moduleName + " / " + target, LOG_TYPE_INFO ); - - // if a PrimProc was restarted, restart ACTIVE ExeMgr(s) and DDL/DMLProc -#if 0 // A RESTARTPROCESS message is about to arrive, so this is redundant. - - if ( target.find("PrimProc") == 0) - { - - //distribute config file - processManager.distributeConfigFile("system"); - - processManager.reinitProcessType("WriteEngineServer"); - processManager.restartProcessType("ExeMgr"); - processManager.reinitProcessType("DDLProc"); - processManager.reinitProcessType("DMLProc"); - } - - // if a WriteEngineServer was restarted, restart DDL/DMLProc - if ( target.find("WriteEngineServer") == 0) - { - - processManager.reinitProcessType("DDLProc"); - processManager.reinitProcessType("DMLProc"); - } - -#endif - - // if DDL or DMLProc, change IP Address - if ( target.find("DDLProc") == 0 || - target.find("DMLProc") == 0 ) - { - - processManager.setPMProcIPs(moduleName, target); - } - - if (ackIndicator) - { - ackMsg << (ByteStream::byte) oam::ACK; - ackMsg << actionType; - ackMsg << target; - ackMsg << (ByteStream::byte) status; - - try - { - fIos.write(ackMsg); - } - catch (...) {} - - log.writeLog(__LINE__, "STARTPROCESS: ACK back to sender"); - } - - break; - } - - case RESTARTPROCESS: - { - log.writeLog(__LINE__, "MSG RECEIVED: Restart Process request on " + target ); - string moduleName; - - msg >> moduleName; - - status = processManager.restartProcess(moduleName, target, graceful, manualFlag); - - // if a PrimProc was restarted, restart ACTIVE ExeMgr(s) - if ( target.find("PrimProc") == 0) - { - - //distribute config file - processManager.distributeConfigFile("system"); - - processManager.reinitProcessType("WriteEngineServer"); - processManager.restartProcessType("ExeMgr"); - processManager.reinitProcessType("DDLProc"); - processManager.reinitProcessType("DMLProc"); - } - - // if a WriteEngineServer was restarted, restart DDL/DMLProc - if ( target.find("WriteEngineServer") == 0) - { - - processManager.reinitProcessType("DDLProc"); - processManager.reinitProcessType("DMLProc"); - } - - log.writeLog(__LINE__, "RESTARTPROCESS: ACK received from Process-Monitor, return status = " + oam.itoa(status)); - log.writeLog(__LINE__, "Restart Process Completed on " + moduleName + " / " + target, LOG_TYPE_INFO ); - - if (ackIndicator) - { - ackMsg << (ByteStream::byte) oam::ACK; - ackMsg << actionType; - ackMsg << target; - ackMsg << (ByteStream::byte) status; - - try - { - fIos.write(ackMsg); - } - catch (...) {} - - log.writeLog(__LINE__, "RESTARTPROCESS: ACK back to sender"); - } - - break; - } - - case UPDATELOG: - { - string action; - string level; - - msg >> action; - msg >> level; - - log.writeLog(__LINE__, "MSG RECEIVED: " + action + " logging on " + target + " for level " + level ); - - status = API_SUCCESS; - - if ( target == "system" ) - { - // send logging message to all modules - for ( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++) - { - int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount; - - if ( moduleCount == 0) - continue; - - DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end(); pt++) - { - int retStatus = processManager.updateLog(action, (*pt).DeviceName, level); - - if ( retStatus != API_SUCCESS) - status = retStatus; - } - } - } - else - { - // for a specific module - // validate module name - bool found = false; - - for ( unsigned int i = 0; i < systemmoduletypeconfig.moduletypeconfig.size(); i++) - { - int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount; - - if ( moduleCount == 0) - continue; - - DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end(); pt++) - { - if ((*pt).DeviceName == target) - { - status = processManager.updateLog(action, target, level); - found = true; - break; - } - } - } - - if ( found == false ) - { - log.writeLog(__LINE__, "ERROR: Invalid module name: " + target, LOG_TYPE_ERROR); - status = API_INVALID_PARAMETER; - } - } - - ackMsg << (ByteStream::byte) oam::ACK; - ackMsg << actionType; - ackMsg << target; - ackMsg << (ByteStream::byte) status; - - try - { - fIos.write(ackMsg); - } - catch (...) {} - - log.writeLog(__LINE__, "UPDATELOG: ACK back to sender, return status = " + oam.itoa(status)); - - break; - } - - case GETCONFIGLOG: - { - log.writeLog(__LINE__, "MSG RECEIVED: Get Log Configuation" ); - - status = API_SUCCESS; - - // validate module name and make request - bool found = false; - - for ( unsigned int i = 0; i < systemmoduletypeconfig.moduletypeconfig.size(); i++) - { - int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount; - - if ( moduleCount == 0) - continue; - - DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end(); pt++) - { - if ((*pt).DeviceName == target) - { - status = processManager.getConfigLog(target); - found = true; - break; - } - } - } - - if ( found == false ) - { - log.writeLog(__LINE__, "ERROR: Invalid module name: " + target, LOG_TYPE_ERROR); - status = API_INVALID_PARAMETER; - } - - ackMsg << (ByteStream::byte) oam::ACK; - ackMsg << actionType; - ackMsg << target; - ackMsg << (ByteStream::byte) status; - - try - { - fIos.write(ackMsg); - } - catch (...) {} - - log.writeLog(__LINE__, "GETCONFIGLOG: ACK back to sender, return status = " + oam.itoa(status)); - - break; - } - - case REINITPROCESS: - { - log.writeLog(__LINE__, "MSG RECEIVED: Re-Init Process request..." ); - string moduleName; - - msg >> moduleName; - - //distribute config file - processManager.distributeConfigFile(moduleName); - - status = processManager.reinitProcess(moduleName, target); - - log.writeLog(__LINE__, "REINITPROCESS: ACK received from Process-Monitor, return status = " + oam.itoa(status)); - - if (ackIndicator) - { - ackMsg << (ByteStream::byte) oam::ACK; - ackMsg << actionType; - ackMsg << target; - ackMsg << (ByteStream::byte) status; - - try - { - fIos.write(ackMsg); - } - catch (...) {} - - log.writeLog(__LINE__, "REINITPROCESS: ACK back to sender"); - } - - break; - } - - case UPDATECONFIG: - { - log.writeLog(__LINE__, "MSG RECEIVED: Update Process Configuation" ); - - status = API_SUCCESS; - - //distribute update of process config file - processManager.distributeConfigFile("system", "ProcessConfig.xml"); - - for ( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++) - { - int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount; - - if ( moduleCount == 0) - continue; - - DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for (; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end(); pt++) - { - int retStatus = processManager.updateConfig((*pt).DeviceName); - - if (retStatus != API_SUCCESS) - status = retStatus; - } - } - - log.writeLog(__LINE__, "UPDATECONFIG: ACK back to sender, return status = " + oam.itoa(status)); - break; - } - - case BUILDSYSTEMTABLES: - { - log.writeLog(__LINE__, "MSG RECEIVED: Send Build System Table request to " + target); - - status = processManager.buildSystemTables(target); - - log.writeLog(__LINE__, "BUILDSYSTEMTABLES: ACK received from Process-Monitor, return status = " + oam.itoa(status)); - - if (ackIndicator) - { - ackMsg << (ByteStream::byte) oam::ACK; - ackMsg << actionType; - ackMsg << target; - ackMsg << (ByteStream::byte) status; - - try - { - fIos.write(ackMsg); - } - catch (...) {} - - log.writeLog(__LINE__, "BUILDSYSTEMTABLES: ACK back to sender"); - } - - break; - } - - case ADDMODULE: - { - log.writeLog(__LINE__, "MSG RECEIVED: Add Module request"); - - string value; - uint16_t count, ivalue, nicCount; - uint8_t tmp8; - oam::DeviceNetworkConfig devicenetworkconfig; - oam::DeviceNetworkList devicenetworklist; - oam::HostConfig hostconfig; - bool storeHostnames; - - msg >> tmp8; - storeHostnames = (tmp8 != 0); - - //get module count to add - msg >> count; - - if ( count > 0 ) - { - - for (int i = 0; i < count; i++) - { - msg >> value; - devicenetworkconfig.DeviceName = value; - msg >> value; - devicenetworkconfig.UserTempDeviceName = value; - msg >> value; - devicenetworkconfig.DisableState = value; - - msg >> nicCount; - - for (int j = 0 ; j < nicCount ; j ++ ) - { - msg >> value; - hostconfig.IPAddr = value; - msg >> value; - hostconfig.HostName = value; - msg >> ivalue; - hostconfig.NicID = ivalue; - devicenetworkconfig.hostConfigList.push_back(hostconfig); - } - - devicenetworklist.push_back(devicenetworkconfig); - devicenetworkconfig.hostConfigList.clear(); - } - - string password; - msg >> password; - - status = processManager.addModule(devicenetworklist, password, storeHostnames); - - log.writeLog(__LINE__, "ADDMODULE: ACK received from Process-Monitor, return status = " + oam.itoa(status)); - } - else - { - status = oam::API_INVALID_PARAMETER; - log.writeLog(__LINE__, "ADDMODULE: Module Count invalid = " + oam.itoa(count)); - } - - if (ackIndicator) - { - ackMsg << (ByteStream::byte) oam::ACK; - ackMsg << actionType; - ackMsg << status; - - try - { - fIos.write(ackMsg); - } - catch (...) {} - - log.writeLog(__LINE__, "ADDMODULE: ACK back to sender"); - } - - break; - } - - case REMOVEMODULE: - { - log.writeLog(__LINE__, "MSG RECEIVED: Remove Module request"); - - uint16_t count, hostConfigCount; - string value; - oam::DeviceNetworkConfig devicenetworkconfig; - oam::DeviceNetworkList devicenetworklist; - - //get module count to remove - msg >> count; - - if ( count > 0 ) - { - - for (int i = 0; i < count; i++) - { - msg >> value; - devicenetworkconfig.DeviceName = value; - msg >> value; - devicenetworkconfig.UserTempDeviceName = value; - msg >> value; - devicenetworkconfig.DisableState = value; - devicenetworklist.push_back(devicenetworkconfig); - msg >> hostConfigCount; - } - - string password; - - msg >> password; - - status = processManager.removeModule(devicenetworklist); - - log.writeLog(__LINE__, "REMOVEMODULE: ACK received from Process-Monitor, return status = " + oam.itoa(status)); - log.writeLog(__LINE__, "Remove Module Completed", LOG_TYPE_INFO); - } - else - { - status = oam::API_INVALID_PARAMETER; - log.writeLog(__LINE__, "REMOVEMODULE: Module Count invalid = " + oam.itoa(count)); - } - - if (ackIndicator) - { - ackMsg << (ByteStream::byte) oam::ACK; - ackMsg << actionType; - ackMsg << status; - - try - { - fIos.write(ackMsg); - } - catch (...) {} - - log.writeLog(__LINE__, "REMOVEMODULE: ACK back to sender"); - } - - break; - } - - case RECONFIGUREMODULE: - { - log.writeLog(__LINE__, "MSG RECEIVED: Reconfigure Module request"); - - string value; - uint16_t count, ivalue, nicCount; - oam::DeviceNetworkConfig devicenetworkconfig; - oam::DeviceNetworkList devicenetworklist; - oam::HostConfig hostconfig; - - //get module count - msg >> count; - - if ( count > 0 ) - { - - for (int i = 0; i < count; i++) - { - msg >> value; - devicenetworkconfig.DeviceName = value; - msg >> value; - devicenetworkconfig.UserTempDeviceName = value; - msg >> value; - devicenetworkconfig.DisableState = value; - - msg >> nicCount; - - for (int j = 0 ; j < nicCount ; j ++ ) - { - msg >> value; - hostconfig.IPAddr = value; - msg >> value; - hostconfig.HostName = value; - msg >> ivalue; - hostconfig.NicID = ivalue; - devicenetworkconfig.hostConfigList.push_back(hostconfig); - } - - devicenetworklist.push_back(devicenetworkconfig); - devicenetworkconfig.hostConfigList.clear(); - } - - string password; - - msg >> password; - - status = processManager.reconfigureModule(devicenetworklist); - - log.writeLog(__LINE__, "RECONFIGUREMODULE: ACK received from Process-Monitor, return status = " + oam.itoa(status)); - } - else - { - status = oam::API_INVALID_PARAMETER; - log.writeLog(__LINE__, "RECONFIGUREMODULE: Module Count invalid = " + oam.itoa(count)); - } - - if (ackIndicator) - { - ackMsg << (ByteStream::byte) oam::ACK; - ackMsg << actionType; - ackMsg << status; - - try - { - fIos.write(ackMsg); - } - catch (...) {} - - log.writeLog(__LINE__, "RECONFIGUREMODULE: ACK back to sender"); - } - - break; - } - - case STOPPROCESSTYPE: - { - log.writeLog(__LINE__, "MSG RECEIVED: Stop Process Type request: " + target); - - if ( target == "DBRM" ) - { - processManager.stopProcessType("DBRMControllerNode"); - processManager.stopProcessType("DBRMWorkerNode"); - } - else - processManager.stopProcessType(target); - - log.writeLog(__LINE__, "Stop Process Type Completed", LOG_TYPE_INFO ); - break; - } - - case STARTPROCESSTYPE: - { - log.writeLog(__LINE__, "MSG RECEIVED: Start Process Type request: " + target); - - if ( target == "DBRM" ) - { - processManager.startProcessType("DBRMControllerNode"); - processManager.startProcessType("DBRMWorkerNode"); - } - else - processManager.startProcessType(target); - - // if a PrimProc was restarted, restart ACTIVE ExeMgr(s) and DDL/DMLProc - if ( target == "PrimProc" ) - { - - //distribute config file - processManager.distributeConfigFile("system"); - - processManager.reinitProcessType("WriteEngineServer"); - processManager.restartProcessType("ExeMgr"); - processManager.reinitProcessType("DDLProc"); - processManager.reinitProcessType("DMLProc"); - } - - // if a WriteEngineServer was restarted, restart DDL/DMLProc - if ( target.find("WriteEngineServer") == 0) - { - - processManager.reinitProcessType("DDLProc"); - processManager.reinitProcessType("DMLProc"); - } - - log.writeLog(__LINE__, "Start Process Type Completed", LOG_TYPE_INFO ); - break; - } - - case RESTARTPROCESSTYPE: - { - log.writeLog(__LINE__, "MSG RECEIVED: Restart Process Type request: " + target); - - if ( target == "DBRM" ) - { - processManager.restartProcessType("DBRMControllerNode"); - processManager.restartProcessType("DBRMWorkerNode"); - } - else - { - processManager.restartProcessType(target); - - // if a PrimProc was restarted, restart ACTIVE ExeMgr(s) and DDL/DMLProc - if ( target == "PrimProc" ) - { - - //distribute config file - processManager.distributeConfigFile("system"); - - processManager.reinitProcessType("WriteEngineServer"); - processManager.restartProcessType("ExeMgr"); - processManager.reinitProcessType("DDLProc"); - processManager.reinitProcessType("DMLProc"); - } - } - - // if a WriteEngineServer was restarted, restart DDL/DMLProc - if ( target.find("WriteEngineServer") == 0) - { - - processManager.reinitProcessType("DDLProc"); - processManager.reinitProcessType("DMLProc"); - } - - log.writeLog(__LINE__, "Restart Process Type Completed", LOG_TYPE_INFO ); - break; - } - - case REINITPROCESSTYPE: - { - log.writeLog(__LINE__, "MSG RECEIVED: Reinit Process Type request: " + target); - - status = processManager.reinitProcessType(target); - - if (ackIndicator) - { - ackMsg << (ByteStream::byte) oam::ACK; - ackMsg << actionType; - ackMsg << target; - ackMsg << (ByteStream::byte) status; - - try - { - fIos.write(ackMsg); - } - catch (...) {} - } - - log.writeLog(__LINE__, "Reinit Process Type Completed, return status = " + oam.itoa(status)); - break; - } - - case DISTRIBUTECONFIG: - { - string file; - - msg >> file; - - log.writeLog(__LINE__, "MSG RECEIVED: Distribute Config File " + target + "/" + file); - - processManager.distributeConfigFile(target, file); - - if (ackIndicator) - { - ackMsg << (ByteStream::byte) oam::ACK; - ackMsg << actionType; - ackMsg << target; - ackMsg << (ByteStream::byte) oam::API_SUCCESS; - - try - { - fIos.write(ackMsg); - } - catch (...) {} - } - - log.writeLog(__LINE__, "Distribute Config File Completed " + target + "/" + file); - break; - } - - case SWITCHOAMPARENT: - { - log.writeLog(__LINE__, "MSG RECEIVED: Switch OAM Parent to : " + target); - - // GRACEFUL_WAIT means that we are shutting down, but waiting for - // all transactions to finish or rollback as commanded. This is only set if - // there are, in fact, transactions active (or cpimport). - - if (graceful == GRACEFUL_WAIT) - { - ByteStream stillWorkingMsg; - stillWorkingMsg << (ByteStream::byte) oam::ACK; - stillWorkingMsg << actionType; - stillWorkingMsg << target; - stillWorkingMsg << (ByteStream::byte) API_STILL_WORKING; - - // This wait can take a while. We wait for table locks to release and open transactions to commit. - if (oam.waitForSystem(RESTARTSYSTEM, fIos, stillWorkingMsg)) - { - graceful = GRACEFUL; // ProcMonitor doesn't know GRACEFUL_WAIT. - // Send an ack back to say we're done waiting and are now shutting down. - ackMsg << (ByteStream::byte) oam::ACK; - ackMsg << actionType; - ackMsg << target; - ackMsg << (ByteStream::byte) API_TRANSACTIONS_COMPLETE; - - try - { - fIos.write(ackMsg); - } - catch (...) {} - - log.writeLog(__LINE__, "SWITCHOAMPARENT: ACK transactions complete back to sender, return status = " + oam.itoa(API_TRANSACTIONS_COMPLETE)); - } - else - { - // We've been cancelled. - if (ackIndicator) - { - ackMsg << (ByteStream::byte) oam::ACK; - ackMsg << actionType; - ackMsg << target; - ackMsg << (ByteStream::byte) API_CANCELLED; - - try - { - fIos.write(ackMsg); - } - catch (...) {} - - log.writeLog(__LINE__, "SWITCHOAMPARENT: ACK back to sender (canceled)"); - break; - } - } - } - - status = processManager.switchParentOAMModule(target); - - log.writeLog(__LINE__, "Switch OAM Parent Completed", LOG_TYPE_INFO ); - - ackMsg << (ByteStream::byte) oam::ACK; - ackMsg << actionType; - ackMsg << target; - ackMsg << (ByteStream::byte) status; - - try - { - fIos.write(ackMsg); - } - catch (...) {} - - // stop myself - processManager.stopProcess(config.moduleName(), "ProcessManager", oam::FORCEFUL, true); - - break; - } - - case UNMOUNT: - { - log.writeLog(__LINE__, "MSG RECEIVED: Unmount dbroot : " + target); - - status = processManager.unmountDBRoot(target); - - log.writeLog(__LINE__, "UnMount Completed status: " + oam.itoa(status) ); - - ackMsg << (ByteStream::byte) oam::ACK; - ackMsg << actionType; - ackMsg << target; - ackMsg << (ByteStream::byte) status; - - try - { - fIos.write(ackMsg); - } - catch (...) {} - - break; - } - - case MOUNT: - { - log.writeLog(__LINE__, "MSG RECEIVED: mount dbroot : " + target); - - status = processManager.mountDBRoot(target); - - log.writeLog(__LINE__, "Mount Completed status: " + oam.itoa(status) ); - - ackMsg << (ByteStream::byte) oam::ACK; - ackMsg << actionType; - ackMsg << target; - ackMsg << (ByteStream::byte) status; - - try - { - fIos.write(ackMsg); - } - catch (...) {} - - break; - } - - case SUSPENDWRITES: - { - ByteStream::byte ackResponse = API_FAILURE; - log.writeLog(__LINE__, "MSG RECEIVED: suspend database writes"); - - string storageType = Config::makeConfig()->getConfig("Installation", "DBRootStorageType"); - - // GRACEFUL_WAIT means that we are Suspending writes, but waiting for all - // transactions to finish or rollback as commanded. This is only set if there - // are, in fact, transactions active (or cpimport). - if (graceful == GRACEFUL_WAIT) - { - ByteStream stillWorkingMsg; - stillWorkingMsg << (ByteStream::byte) oam::ACK; - stillWorkingMsg << actionType; - stillWorkingMsg << target; - stillWorkingMsg << (ByteStream::byte) API_STILL_WORKING; - - // This wait can take a while. We wait for table locks to release and open transactions to commit. - if (oam.waitForSystem(SUSPENDWRITES, fIos, stillWorkingMsg)) - { - graceful = GRACEFUL; // ProcMonitor doesn't know GRACEFUL_WAIT. - // Send an ack back to say we're done waiting and are now shutting down. - ackMsg << (ByteStream::byte) oam::ACK; - ackMsg << actionType; - ackMsg << target; - ackMsg << (ByteStream::byte) API_TRANSACTIONS_COMPLETE; - - try - { - fIos.write(ackMsg); - } - catch (...) {} - - log.writeLog(__LINE__, "SUSPENDWRITES: ACK transactions complete back to sender, return status = " + oam.itoa(API_TRANSACTIONS_COMPLETE)); - } - else - { - // We've been cancelled. - if (ackIndicator) - { - ackMsg << (ByteStream::byte) oam::ACK; - ackMsg << actionType; - ackMsg << target; - ackMsg << (ByteStream::byte) API_CANCELLED; - - try - { - fIos.write(ackMsg); - } - catch (...) {} - - log.writeLog(__LINE__, "SUSPENDWRITES: ACK back to sender (canceled)"); - break; - } - } - } - - BRM::DBRM dbrm; - dbrm.setSystemSuspended(true); - // Wait for everything to settle down - sleep(5); - // Save the BRM. This command presages a system backup. Best to have a current BRM on disk - string logdir("/var/log/mariadb/columnstore"); - - if (access(logdir.c_str(), W_OK) != 0) logdir = tmpLogDir; - - string cmd = "save_brm > " + logdir + "/save_brm.log1 2>&1"; - int rtnCode = system(cmd.c_str()); - - if (WEXITSTATUS(rtnCode) == 0) - { - ackResponse = API_SUCCESS; - } - else - { - ackResponse = API_FAILURE_DB_ERROR; - dbrm.setSystemSuspended(false); - } - - if (storageType == "storagemanager") - { - //sync fs on all pm nodes if up - for ( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++) - { - if ( systemmoduletypeconfig.moduletypeconfig[i].ModuleType != "pm" ) - continue; - - int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount; - - if ( moduleCount == 0) - continue; - - DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end(); pt++) - { - int opState = oam::ACTIVE; - bool degraded; - - try - { - oam.getModuleStatus((*pt).DeviceName, opState, degraded); - } - catch (...) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + (*pt).DeviceName + ": Caught unknown exception!", LOG_TYPE_ERROR); - } - - if (opState == oam::MAN_DISABLED || opState == oam::AUTO_DISABLED) - continue; - - int returnStatus = processManager.syncFsAll( (*pt).DeviceName ); - - if (returnStatus != API_SUCCESS) - { - ackMsg << (ByteStream::byte) oam::ACK; - ackMsg << actionType; - ackMsg << target; - ackMsg << (ByteStream::byte) API_FAILURE; - - try - { - fIos.write(ackMsg); - } - catch (...) {} - - log.writeLog(__LINE__, "SUSPENDWRITES: API_FAILURE filestemSync() on module " + (*pt).DeviceName,LOG_TYPE_ERROR); - break; - } - } - } - } - - ackMsg.reset(); - ackMsg << (ByteStream::byte) oam::ACK; - ackMsg << actionType; - ackMsg << target; - ackMsg << ackResponse; - - try - { - fIos.write(ackMsg); - } - catch (...) {} - - log.writeLog(__LINE__, "SUSPENDWRITES: ACK back to sender" + oam.itoa(ackResponse)); - break; - } - - case FSTABUPDATE: - { - log.writeLog(__LINE__, "MSG RECEIVED: Distribute Fstab update" ); - - //get fstab entry - string entry; - msg >> entry; - - status = API_SUCCESS; - - if ( target == "system" ) - { - //send out to all pms except local module - for ( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++) - { - if ( systemmoduletypeconfig.moduletypeconfig[i].ModuleType != "pm" ) - continue; - - int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount; - - if ( moduleCount == 0) - continue; - - DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for (; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end(); pt++) - { - if ( (*pt).DeviceName == config.moduleName() ) - continue; - - int retStatus = processManager.updateFstab((*pt).DeviceName, entry); - - if (retStatus != API_SUCCESS) - status = retStatus; - } - } - } - else - { - int retStatus = processManager.updateFstab(target, entry); - - if (retStatus != API_SUCCESS) - status = retStatus; - } - - ackMsg << (ByteStream::byte) oam::ACK; - ackMsg << actionType; - ackMsg << target; - ackMsg << (ByteStream::byte) status; - - try - { - fIos.write(ackMsg); - } - catch (...) {} - - log.writeLog(__LINE__, "FSTABUPDATE: ACK back to sender, return status = " + oam.itoa(status)); - - break; - } - - case ENABLEMYSQLREP: - { - log.writeLog(__LINE__, "MSG RECEIVED: Enable MySQL Replication"); - - // target = root password - oam::DeviceNetworkList devicenetworklist; - status = processManager.setMySQLReplication(devicenetworklist, oam::UnassignedName, true, target); - - log.writeLog(__LINE__, "Enable MySQL Replication status: " + oam.itoa(status) ); - - ackMsg << (ByteStream::byte) oam::ACK; - ackMsg << actionType; - ackMsg << target; - ackMsg << (ByteStream::byte) status; - - try - { - fIos.write(ackMsg); - } - catch (...) {} - - break; - } - - case DISABLEMYSQLREP: - { - log.writeLog(__LINE__, "MSG RECEIVED: Disable MySQL Replication"); - - // target = root password - oam::DeviceNetworkList devicenetworklist; - status = processManager.setMySQLReplication(devicenetworklist, oam::UnassignedName, false, target, false); - - log.writeLog(__LINE__, "Disable MySQL Replication status: " + oam.itoa(status) ); - - ackMsg << (ByteStream::byte) oam::ACK; - ackMsg << actionType; - ackMsg << target; - ackMsg << (ByteStream::byte) status; - - try - { - fIos.write(ackMsg); - } - catch (...) {} - - break; - } - - case GLUSTERASSIGN: - { - string dbroot; - msg >> dbroot; - - log.writeLog(__LINE__, "MSG RECEIVED: Gluster Assign DBRoot: " + dbroot); - - status = processManager.glusterAssign(target, dbroot); - - log.writeLog(__LINE__, "Gluster Assign DBRoot status: " + oam.itoa(status) ); - - ackMsg << (ByteStream::byte) oam::ACK; - ackMsg << actionType; - ackMsg << target; - ackMsg << (ByteStream::byte) status; - - try - { - fIos.write(ackMsg); - } - catch (...) {} - - break; - } - - case GLUSTERUNASSIGN: - { - string dbroot; - msg >> dbroot; - - log.writeLog(__LINE__, "MSG RECEIVED: Gluster Unassign DBRoot: " + dbroot); - - status = processManager.glusterUnassign(target, dbroot); - - log.writeLog(__LINE__, "Gluster Unassign DBRoot status: " + oam.itoa(status) ); - - ackMsg << (ByteStream::byte) oam::ACK; - ackMsg << actionType; - ackMsg << target; - ackMsg << (ByteStream::byte) status; - - try - { - fIos.write(ackMsg); - } - catch (...) {} - - break; - } - - default: - log.writeLog(__LINE__, "MSG RECEIVED: Invalid type" ); - break; - } - - break; - - case HEARTBEAT_REGISTER: - { - string moduleName; - string processName; - ByteStream::byte id; - - msg >> moduleName; - msg >> processName; - msg >> id; - - HeartBeatProc hbproc; - hbproc.ModuleName = moduleName; - hbproc.ProcessName = processName; - hbproc.ID = id; - hbproc.receiveFlag = true; - - HeartBeatProcList::iterator list = hbproclist.begin(); - - for ( ; list != hbproclist.end() ; list++) - { - if ( (*list).ModuleName == moduleName - && (*list).ProcessName == processName - && (*list).ID == id) - { - // already in the list - break; - } - } - - if ( list == hbproclist.end() ) - { - // add to list - hbproclist.push_front(hbproc); - log.writeLog(__LINE__, "Adding Process to Heartbeat Monitor list: " + moduleName + " / " + processName + " / " + oam.itoa(id)); - } - } - break; - - case HEARTBEAT_DEREGISTER: - { - string moduleName; - string processName; - ByteStream::byte id; - - msg >> moduleName; - msg >> processName; - msg >> id; - - HeartBeatProcList::iterator list = hbproclist.begin(); - - for ( ; list != hbproclist.end() ; list++) - { - if ( (*list).ModuleName == moduleName - && (*list).ProcessName == processName - && (*list).ID == id) - { - hbproclist.erase(list); - log.writeLog(__LINE__, "Removing Process from Heartbeat Monitor list: " + moduleName + " / " + processName + " / " + oam.itoa(id)); - break; - } - } - } - break; - - case HEARTBEAT_SEND: - { - string moduleName; - string processName; - string timeStamp; - ByteStream::byte id; - ByteStream::byte ackFlag; - - msg >> moduleName; - msg >> processName; - msg >> timeStamp; - msg >> id; - msg >> ackFlag; - - if ( ackFlag == oam::ACK_YES ) - { - // send back an ack msg - ackMsg << (ByteStream::byte) HEARTBEAT_SEND; - - try - { - fIos.write(ackMsg); - } - catch (...) {} - -//log.writeLog(__LINE__, "Heartbeat Ack message sent", LOG_TYPE_DEBUG); - } - - HeartBeatProcList::iterator list = hbproclist.begin(); - - for ( ; list != hbproclist.end() ; list++) - { - if ( (*list).ModuleName == moduleName - && (*list).ProcessName == processName - && (*list).ID == id) - { - (*list).receiveFlag = true; -//log.writeLog(__LINE__, "Heartbeat Received: " + moduleName + " / " + processName + " / " + oam.itoa(id) + ", timestamp: " + timeStamp, LOG_TYPE_DEBUG); - break; - } - } - - if ( list == hbproclist.end() ) - { - // not found, add to list - HeartBeatProc hbproc; - hbproc.ModuleName = moduleName; - hbproc.ProcessName = processName; - hbproc.ID = id; - hbproc.receiveFlag = true; - hbproclist.push_front(hbproc); - log.writeLog(__LINE__, "Adding Process to Heartbeat Monitor list: " + moduleName + " / " + processName + " / " + oam.itoa(id)); - } - } - break; - - case PROCESSRESTART: - { - string moduleName; - string processName; - ByteStream::byte manual; - - msg >> moduleName; - msg >> processName; - msg >> manual; - - log.writeLog(__LINE__, "MSG RECEIVED: Process Restarted on " + moduleName + "/" + processName); - - //set query system states not ready - processManager.setQuerySystemState(false); - - processManager.setSystemState(oam::BUSY_INIT); - - processManager.reinitProcessType("cpimport"); - - //request reinit after Process is active - for ( int i = 0; i < 10 ; i++ ) { - try { - ProcessStatus procstat; - oam.getProcessStatus(processName, moduleName, procstat); - - if (procstat.ProcessOpState == oam::COLD_STANDBY) - break; - - if ( (procstat.ProcessOpState == oam::ACTIVE) || - (procstat.ProcessOpState == oam::STANDBY) ) { - // if a PrimProc was restarted, reinit ACTIVE ExeMgr(s) and DDL/DMLProc - if ( processName == "PrimProc") - { - - //distribute config file - processManager.distributeConfigFile("system"); - - processManager.reinitProcessType("WriteEngineServer"); - processManager.restartProcessType("ExeMgr"); - processManager.reinitProcessType("DDLProc"); - processManager.reinitProcessType("DMLProc"); - } - - // if a WriteEngineServer was restarted, restart DDL/DMLProc - if ( processName == "WriteEngineServer") - { - - processManager.reinitProcessType("DDLProc"); - processManager.reinitProcessType("DMLProc"); - } - - // if a ControllerNode was restarted, restart DMLProc - if ( processName == "DBRMControllerNode") - { - // sleep(5); - // processManager.reinitProcessType("DBRMWorkerNode"); - // Wait for DBRMControllerNode to go active - ProcessStatus procstat; - uint16_t state = AUTO_OFFLINE; - - while (state == oam::MAN_OFFLINE - || state == oam::AUTO_OFFLINE - || state == oam::MAN_INIT - || state == oam::AUTO_INIT) - { - oam.getProcessStatus("DBRMControllerNode", config.OAMParentName(), procstat); - state = procstat.ProcessOpState; - - if ( procstat.ProcessOpState == oam::ACTIVE) - break; - - sleep(1); - } - - processManager.restartProcessType("DDLProc"); - processManager.restartProcessType("DMLProc"); - sleep(1); - - string DMLmodule = config.OAMParentName(); - - if ( config.ServerInstallType() != oam::INSTALL_COMBINE_DM_UM_PM ) - { - string PrimaryUMModuleName; - - try - { - oam.getSystemConfig("PrimaryUMModuleName", PrimaryUMModuleName); - } - catch (...) {} - - if ( !PrimaryUMModuleName.empty() ) - DMLmodule = PrimaryUMModuleName; - } - - // Wait for DMLProc to be ACTIVE - BRM::DBRM dbrm; - state = AUTO_OFFLINE; - - while (state == oam::MAN_OFFLINE - || state == oam::AUTO_OFFLINE - || state == oam::MAN_INIT - || state == oam::AUTO_INIT - || state == oam::ROLLBACK_INIT) - { - oam.getProcessStatus("DMLProc", DMLmodule, procstat); - state = procstat.ProcessOpState; - - if ( procstat.ProcessOpState == oam::ACTIVE) - break; - - sleep(1); - } - processManager.setQuerySystemState(true); - - } - - // if a DDLProc was restarted, restart DMLProc - if ( processName == "DDLProc") - { - processManager.reinitProcessType("DMLProc"); - //set query system states ready - processManager.setQuerySystemState(true); - - processManager.setSystemState(oam::ACTIVE); - } - - //only run on auto process restart - if (manual == 0 ) - { - //get dbhealth flag - string DBHealthMonitorFlag = "n"; - string DBFunctionalMonitorFlag; - - try - { - oam.getSystemConfig( "DBHealthMonitorFlag", DBHealthMonitorFlag); - } - catch (...) - { - DBHealthMonitorFlag = "n"; - } - - //check the db health - if (DBHealthMonitorFlag == "y" ) - { - log.writeLog(__LINE__, "Call the check DB Health API", LOG_TYPE_DEBUG); - - try - { - oam.checkDBFunctional(); - log.writeLog(__LINE__, "check DB Health passed", LOG_TYPE_DEBUG); - } - catch (...) - { - log.writeLog(__LINE__, "check DB Health FAILED", LOG_TYPE_ERROR); - } - } - } - - break; - } - - sleep(1); - } - catch (exception& ex) - { -// string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: " + error, LOG_TYPE_ERROR); - break; - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: Caught unknown exception!", LOG_TYPE_ERROR); - break; - } - } - - //set query system states ready - processManager.setQuerySystemState(true); - - processManager.setSystemState(oam::ACTIVE); - - log.writeLog(__LINE__, "MSG RECEIVED: Process Restarted Completed"); - } - break; - - case GETDBRMDATA: - { - log.writeLog(__LINE__, "MSG RECEIVED: Get DBRM Data Files"); - - string moduleName; - - msg >> moduleName; - - int ret = processManager.getDBRMData(fIos, moduleName); - - if ( ret == oam::API_SUCCESS ) - log.writeLog(__LINE__, "Get DBRM Data Files Completed"); - else - log.writeLog(__LINE__, "Get DBRM Data Files Failed"); - } - break; - - case GETALARMDATA: - { - log.writeLog(__LINE__, "MSG RECEIVED: Get Alarm Data Files"); - - string date; - - msg >> date; - - processManager.getAlarmData(fIos, GETALARMDATA, date); - - log.writeLog(__LINE__, "Get Alarm Data Files Completed"); - } - break; - - case GETACTIVEALARMDATA: - { -// log.writeLog(__LINE__, "MSG RECEIVED: Get Active Alarm Data Files"); - - //pull off, but don't need - string date; - - msg >> date; - - processManager.getAlarmData(fIos, GETACTIVEALARMDATA, ""); - -// log.writeLog(__LINE__, "Get Active Alarm Data Files Completed"); - } - break; - - default: - break; - } - - sleep(5); - fIos.close(); - pthread_detach (ThreadId); - pthread_exit(0); - return NULL; -} - -/****************************************************************************************** -* @brief getAlarmData -* -* purpose: get DBRM Data and send to requester -* -******************************************************************************************/ -int ProcessManager::getAlarmData(messageqcpp::IOSocket fIos, int type, std::string date) -{ - ByteStream msg; - Oam oam; - int returnStatus = oam::API_SUCCESS; - - AlarmList alarmList; - - if ( type == GETALARMDATA ) - { - try - { - ALARMManager sm; - sm.getAlarm(date, alarmList); - } - catch (...) - { - msg << (ByteStream::byte) oam::ACK; - msg << (ByteStream::byte) type; - msg << (ByteStream::byte) oam::API_FAILURE; - - try - { - fIos.write(msg); - } - catch (...) {} - - return oam::API_FAILURE; - } - } - else - { - try - { - ALARMManager sm; - sm.getActiveAlarm(alarmList); - } - catch (...) - { - msg << (ByteStream::byte) oam::ACK; - msg << (ByteStream::byte) type; - msg << (ByteStream::byte) oam::API_FAILURE; - - try - { - fIos.write(msg); - } - catch (...) {} - - return oam::API_FAILURE; - } - } - - msg << (ByteStream::byte) oam::ACK; - msg << (ByteStream::byte) type; - msg << (ByteStream::byte) oam::API_SUCCESS; - - //number of alarms - msg << (ByteStream::byte) alarmList.size(); -//log.writeLog(__LINE__, oam.itoa(alarmList.size()), LOG_TYPE_ERROR ); - - AlarmList :: iterator i; - - for (i = alarmList.begin(); i != alarmList.end(); ++i) - { - msg << (ByteStream::doublebyte) i->second.getAlarmID(); -//log.writeLog(__LINE__, oam.itoa(i->second.getAlarmID()), LOG_TYPE_ERROR ); - msg << i->second.getDesc(); - msg << (ByteStream::doublebyte) i->second.getSeverity(); - msg << i->second.getTimestamp(); - msg << i->second.getSname(); - msg << i->second.getPname(); - msg << i->second.getComponentID(); - } - - try - { - fIos.write(msg); - } - catch (...) {} - - return returnStatus; -} - -/****************************************************************************************** -* @brief buildRequestMessage -* -* purpose: Build a request message -* -******************************************************************************************/ -ByteStream ProcessManager::buildRequestMessage(ByteStream::byte requestID, - ByteStream::byte actionIndicator, string processName, bool manualFlag) -{ - ByteStream msg; - ByteStream::byte messageType = REQUEST; - - msg << messageType; - msg << requestID; - msg << actionIndicator; - - if (processName != "" ) - msg << processName; - - msg << (ByteStream::byte) manualFlag; - - return msg; -} - -/****************************************************************************************** -* @brief startModule -* -* purpose: Start all processes on the specified module -* -******************************************************************************************/ -int ProcessManager::startModule(string target, messageqcpp::ByteStream::byte actionIndicator, uint16_t startType, bool systemStart) -{ - ByteStream msg; - ByteStream::byte requestID = STARTALL; - string processName = ""; - Oam oam; - - if ( startType == oam::MAN_OFFLINE ) - setModuleState(target, oam::MAN_INIT); - else - setModuleState(target, oam::AUTO_INIT); - - msg = buildRequestMessage(requestID, actionIndicator, processName); - - int returnStatus = sendMsgProcMon( target, msg, requestID ); - - if ( returnStatus == API_SUCCESS) - { - setModuleState(target, oam::ACTIVE); - - //clear alarm, log the event - log.writeLog(__LINE__, target + " module is started by request.", LOG_TYPE_DEBUG); - - //clear an alarm - ALARMManager aManager; - aManager.sendAlarmReport(target.c_str(), MODULE_DOWN_MANUAL, CLEAR); - aManager.sendAlarmReport(target.c_str(), MODULE_DOWN_AUTO, CLEAR); - } - else - { - if ( returnStatus == oam::API_FAILURE || returnStatus == API_FAILURE_DB_ERROR) - setModuleState(target, oam::FAILED); - else if ( !systemStart ) - setModuleState(target, oam::FAILED); - - //log the event - log.writeLog(__LINE__, target + " module failed to start!!", LOG_TYPE_DEBUG); - } - - return returnStatus; -} - -/****************************************************************************************** -* @brief stopModule -* -* purpose: Stop all processes on the specified module -* -******************************************************************************************/ -int ProcessManager::stopModule(string target, ByteStream::byte actionIndicator, bool manualFlag, int timeout) -{ - Configuration config; - ProcessManager processManager(config, log); - ByteStream msg; - ByteStream::byte requestID = STOPALL; - string processName = ""; - - msg = buildRequestMessage(requestID, actionIndicator, processName, manualFlag); - - string msgPort = target; - msgPort = msgPort + "_ProcessMonitor"; - - int returnStatus = API_FAILURE; - - if ( actionIndicator == INSTALL && target == config.OAMParentName() ) - { - // Process Manager will be taken down, do your updates now - log.writeLog(__LINE__, target + " module is stopped by request.", LOG_TYPE_DEBUG); - - if ( manualFlag ) - { - setModuleState(target, oam::MAN_OFFLINE); - - //Issue an alarm - ALARMManager aManager; - aManager.sendAlarmReport(target.c_str(), MODULE_DOWN_MANUAL, SET); - } - else - { - setModuleState(target, oam::AUTO_OFFLINE); - - //Issue an alarm - ALARMManager aManager; - aManager.sendAlarmReport(target.c_str(), MODULE_DOWN_AUTO, SET); - } - } - else - { - log.writeLog(__LINE__, target + " module is stopped by request.", LOG_TYPE_DEBUG); - - if ( manualFlag ) - { - setModuleState(target, oam::MAN_INIT); - } - else - { - setModuleState(target, oam::AUTO_INIT); - } - } - - returnStatus = sendMsgProcMon( target, msg, requestID, timeout ); - - if ( actionIndicator != STATUS_UPDATE ) - { - if ( returnStatus == API_SUCCESS) - { - //Issue an alarm, log the event - log.writeLog(__LINE__, target + " module is successfully stopped.", LOG_TYPE_DEBUG); - - if ( manualFlag ) - { -// setModuleState(target, oam::MAN_OFFLINE); - - //Issue an alarm - ALARMManager aManager; - aManager.sendAlarmReport(target.c_str(), MODULE_DOWN_MANUAL, SET); - } - else - { -// setModuleState(target, oam::AUTO_OFFLINE); - - //Issue an alarm - ALARMManager aManager; - aManager.sendAlarmReport(target.c_str(), MODULE_DOWN_AUTO, SET); - } - } - else - { -// if ( manualFlag ) { -// setModuleState(target, oam::FAILED); -// } - - //log the event - log.writeLog(__LINE__, target + " module failed to stop!!", LOG_TYPE_WARNING); - } - } - - return returnStatus; -} - -/****************************************************************************************** -* @brief shutdownModule -* -* purpose: power off the specified module, -* -******************************************************************************************/ -int ProcessManager::shutdownModule(string target, ByteStream::byte actionIndicator, bool manualFlag, int timeout) -{ - ByteStream msg; - ByteStream::byte requestID = SHUTDOWNMODULE; - string processName = ""; - - msg = buildRequestMessage(requestID, actionIndicator, processName, manualFlag); - - int returnStatus = sendMsgProcMon( target, msg, requestID, timeout ); - - if ( returnStatus == API_SUCCESS) - { - //Issue an alarm, log the event - log.writeLog(__LINE__, target + " module is shutdown by request.", LOG_TYPE_DEBUG); - - if ( manualFlag ) - { - setModuleState(target, oam::MAN_OFFLINE); - - //mark all processes running on module man-offline - setProcessStates(target, oam::MAN_OFFLINE); - - //Issue an alarm - ALARMManager aManager; - aManager.sendAlarmReport(target.c_str(), MODULE_DOWN_MANUAL, SET); - } - else - { - setModuleState(target, oam::AUTO_OFFLINE); - - //mark all processes running on module auto-offline - setProcessStates(target, oam::AUTO_OFFLINE); - - //Issue an alarm - ALARMManager aManager; - aManager.sendAlarmReport(target.c_str(), MODULE_DOWN_AUTO, SET); - } - } - else - { - setModuleState(target, oam::FAILED); - - //log the event - log.writeLog(__LINE__, target + " module failed to shutdown!!", LOG_TYPE_WARNING); - } - - return returnStatus; -} - -/****************************************************************************************** -* @brief disableModule -* -* purpose: Set the Disable State on a specified module -* -******************************************************************************************/ -int ProcessManager::disableModule(string target, bool manualFlag) -{ - Oam oam; - ProcessManager processManager(config, log); - ModuleConfig moduleconfig; - - log.writeLog(__LINE__, "disableModule request for " + target, LOG_TYPE_DEBUG); - - string moduleType = target.substr(0, MAX_MODULE_TYPE_SIZE); - - pthread_mutex_lock(&THREAD_LOCK); - - int newState; - string SnewState; - - if ( manualFlag ) - { - newState = oam::MAN_DISABLED; - SnewState = oam::MANDISABLEDSTATE; - } - else - { - newState = oam::AUTO_DISABLED; - SnewState = oam::AUTODISABLEDSTATE; - } - - // skip of module already in current DISABLED state or in MAN_DISABLED state - try - { - int opState = oam::ACTIVE; - bool degraded; - oam.getModuleStatus(target, opState, degraded); - - if (opState == newState || opState == oam::MAN_DISABLED) - { - pthread_mutex_unlock(&THREAD_LOCK); - return API_SUCCESS; - } - - // if current state is AUTO_DISABLED and new state is MAN_DISABLED - // update state to MAN_DISABLED - - if (opState == oam::AUTO_DISABLED && newState == oam::MAN_DISABLED) - { - //removemodule to get proess in MAN_OFFLINE - stopModule(target, REMOVE, true); - - try - { - oam.getSystemConfig(target, moduleconfig); - - moduleconfig.DisableState = oam::MANDISABLEDSTATE; - - try - { - oam.setSystemConfig(target, moduleconfig); - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "EXCEPTION ERROR on setSystemConfig: " + error, LOG_TYPE_ERROR); - } - catch (...) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on setSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR); - } - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR); - } - catch (...) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR); - } - - pthread_mutex_unlock(&THREAD_LOCK); - - setModuleState(target, oam::MAN_DISABLED); - - return API_SUCCESS; - } - } - catch (exception& ex) - { -// string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + target + ": " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + target + ": Caught unknown exception!", LOG_TYPE_ERROR); - } - - pthread_mutex_unlock(&THREAD_LOCK); - - setModuleState(target, newState); - - //set Columnstore.xml enable state - setEnableState( target, SnewState); - - //sleep a bit to give time for the state change to apply - sleep(1); - - //update PMS area if PM was disabled - if ( moduleType == "pm" ) - { - if ( updatePMSconfig() != API_SUCCESS ) - return API_FAILURE; - } - - //Update DBRM section of Columnstore.xml - if ( updateWorkerNodeconfig() != API_SUCCESS ) - { - return API_FAILURE; - } - - //distribute config file - distributeConfigFile("system"); - - processManager.reinitProcesses(); - - log.writeLog(__LINE__, "disableModule successfully complete for " + target, LOG_TYPE_DEBUG); - - return API_SUCCESS; -} - - -void ProcessManager::reinitProcesses(std::string skipModule) -{ - Oam oam; - - log.writeLog(__LINE__, "reinitProcesses... ", LOG_TYPE_DEBUG); - - reinitProcessType("DBRMWorkerNode"); - reinitProcessType("WriteEngineServer"); - restartProcessType("ExeMgr",skipModule); - sleep(1); - restartProcessType("DDLProc",skipModule); - sleep(1); - restartProcessType("DMLProc",skipModule); - sleep(3); - - log.writeLog(__LINE__, "reinitProcesses complete", LOG_TYPE_DEBUG); -} - -/****************************************************************************************** -* @brief recycleProcess -* -* purpose: recyle process, done after disable/enable module -* -******************************************************************************************/ -void ProcessManager::recycleProcess(string module, bool enableModule) -{ - Oam oam; - ModuleConfig moduleconfig; - - log.writeLog(__LINE__, "recycleProcess request after module status update: " + module, LOG_TYPE_DEBUG); - - string moduleType = module.substr(0, MAX_MODULE_TYPE_SIZE); - - string PrimaryUMModuleName; - - try - { - oam.getSystemConfig("PrimaryUMModuleName", PrimaryUMModuleName); - } - catch (...) {} - - stopProcessType("WriteEngineServer"); - - stopProcessType("ExeMgr"); - - stopProcessType("PrimProc"); - - stopProcessType("DBRMControllerNode"); - stopProcessType("DBRMWorkerNode"); - - stopProcessType("DDLProc"); - stopProcessType("DMLProc"); - - stopProcessType("mysqld"); - -// restartProcessType("mysqld"); - - startProcessType("DBRMControllerNode"); - startProcessType("DBRMWorkerNode"); - - startProcessType("PrimProc"); - sleep(5); - - startProcessType("WriteEngineServer"); - sleep(3); - - startProcessType("ExeMgr"); - - startProcessType("DDLProc"); - sleep(1); - - startProcessType("DMLProc"); - - startProcessType("mysqld"); - - return; -} - -/****************************************************************************************** -* @brief enableModule -* -* purpose: Clear the Disable State on a specified module -* -******************************************************************************************/ -int ProcessManager::enableModule(string target, int state, bool failover) -{ - Oam oam; - ModuleConfig moduleconfig; - - log.writeLog(__LINE__, "enableModule request for " + target, LOG_TYPE_DEBUG); - - string moduleType = target.substr(0, MAX_MODULE_TYPE_SIZE); - - if (setEnableState( target, oam::ENABLEDSTATE) != API_SUCCESS ) - return API_FAILURE; - - setModuleState(target, state); - - //sleep a bit to give time for the state change to apply - sleep(5); - - //update PMS area if PM was disabled - if ( moduleType == "pm" ) - { - if ( updatePMSconfig() != API_SUCCESS ) - return API_FAILURE; - - log.writeLog(__LINE__, "enableModule - Updated PM server Count", LOG_TYPE_DEBUG); - } - - //Update DBRM section of Columnstore.xml - if ( updateWorkerNodeconfig() != API_SUCCESS ) - return API_FAILURE; - - //distribute config file - distributeConfigFile("system"); - - //check if new module should be hot-standby - string newStandbyModule = getStandbyModule(); - - if ( newStandbyModule == target) - setStandbyModule(newStandbyModule); - - log.writeLog(__LINE__, "enableModule request for " + target + " completed", LOG_TYPE_DEBUG); - - return API_SUCCESS; -} - - -/****************************************************************************************** -* @brief startMgrProcesses -* -* purpose: start all Mgr Controlled processes for a module -* -******************************************************************************************/ -void ProcessManager::startMgrProcesses(std::string moduleName) -{ - Oam oam; - SystemProcessConfig systemprocessconfig; - vector::iterator itor; - - ByteStream msg; - string modulePortName = moduleName + "_ProcessMonitor"; - - try - { - oam.getProcessConfig(systemprocessconfig); - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessConfig: " + error, LOG_TYPE_ERROR); - } - catch (...) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessConfig: Caught unknown exception!", LOG_TYPE_ERROR); - } - - string moduleType = moduleName.substr(0, MAX_MODULE_TYPE_SIZE); - - while (true) - { - bool status = true; - - for (itor = systemprocessconfig.processconfig.begin(); - itor != systemprocessconfig.processconfig.end(); ++itor) - { - status = true; - - if ((*itor).BootLaunch == MGR_LAUNCH) - { - if ((*itor).ModuleType == moduleType - || (*itor).ModuleType == "ChildExtOAMModule" - || ( (*itor).ModuleType == "ChildOAMModule") - || ((*itor).ModuleType == "ParentOAMModule" && moduleName == config.OAMParentName()) ) - { - int state = oam::ACTIVE; - - try - { - ProcessStatus procstat; - oam.getProcessStatus((*itor).ProcessName, moduleName, procstat); - state = procstat.ProcessOpState; - } - catch (exception& ex) - { -// string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: " + error, LOG_TYPE_ERROR); - continue; - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: Caught unknown exception!", LOG_TYPE_ERROR); - continue; - } - - if ( state == oam::INITIAL ) - { - - msg = buildRequestMessage(START, FORCEFUL, (*itor).ProcessName); - - log.writeLog(__LINE__, "Request Start of Process/Module: " + (*itor).ProcessName + " / " + moduleName, LOG_TYPE_DEBUG); - - try - { - MessageQueueClient mqRequest(modulePortName); - mqRequest.write(msg); - mqRequest.shutdown(); -// sleep(2); - status = false; - } - catch (exception& ex) - { - string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on MessageQueueClient: " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on MessageQueueClient: Caught unknown exception!", LOG_TYPE_ERROR); - } - } - } - } - } //end of for loop - - if (status) - return; - } //end of while -} - -/****************************************************************************************** -* @brief stopProcess -* -* purpose: Stop a Process on the specified module -* -******************************************************************************************/ -int ProcessManager::stopProcess(string moduleName, string processName, - messageqcpp::ByteStream::byte actionIndicator, bool manualFlag, int timeout) -{ - ByteStream msg; - ByteStream::byte requestID = STOP; - - msg = buildRequestMessage(requestID, actionIndicator, processName, manualFlag); - - int returnStatus = sendMsgProcMon( moduleName, msg, requestID, timeout ); - - if ( returnStatus == API_SUCCESS) - //log the event - log.writeLog(__LINE__, processName + " process is stopped by request.", LOG_TYPE_DEBUG); - else - //log the event - log.writeLog(__LINE__, processName + " process failed to stop!!", LOG_TYPE_WARNING); - - return returnStatus; -} - -/****************************************************************************************** -* @brief startProcess -* -* purpose: Start a Process on the specified module -* -******************************************************************************************/ -int ProcessManager::startProcess(string moduleName, string processName, - messageqcpp::ByteStream::byte actionIndicator) -{ - Oam oam; - - if ( actionIndicator != oam::STATUS_UPDATE ) - { - //skip if module is DISABLED - int opState; - bool degraded; - - try - { - oam.getModuleStatus(moduleName, opState, degraded); - } - catch (...) - {} - - //check if disabled - if (opState == oam::MAN_DISABLED || opState == oam::AUTO_DISABLED) - return API_SUCCESS; - } - - ByteStream msg; - ByteStream::byte requestID = START; - - msg = buildRequestMessage(requestID, actionIndicator, processName); - - int returnStatus = sendMsgProcMon( moduleName, msg, requestID ); - - if ( returnStatus == API_SUCCESS) - //log the event - log.writeLog(__LINE__, moduleName + "/" + processName + " process is started by request.", LOG_TYPE_DEBUG); - else - //log the event - log.writeLog(__LINE__, moduleName + "/" + processName + " process failed to start!!", LOG_TYPE_WARNING); - - return returnStatus; -} - -/****************************************************************************************** -* @brief restartProcess -* -* purpose: Restart a Process on the specified module -* -******************************************************************************************/ -int ProcessManager::restartProcess(string moduleName, string processName, - messageqcpp::ByteStream::byte actionIndicator, bool manualFlag) -{ - Oam oam; - - //skip if module is DISABLED - int opState; - bool degraded; - - try - { - oam.getModuleStatus(moduleName, opState, degraded); - } - catch (...) - {} - - //check if disabled - if (opState == oam::MAN_DISABLED || opState == oam::AUTO_DISABLED) - return API_SUCCESS; - - ByteStream msg; - ByteStream::byte requestID = RESTART; - - msg = buildRequestMessage(requestID, actionIndicator, processName, manualFlag); - - int returnStatus; - - // need retry due to the depend process checks - for ( int retry = 0 ; retry < 5 ; retry++) - { - returnStatus = sendMsgProcMon( moduleName, msg, requestID ); - - if ( returnStatus == API_SUCCESS) - { - log.writeLog(__LINE__, processName + " process is restarted by request.", LOG_TYPE_DEBUG); - return returnStatus; - } - else - log.writeLog(__LINE__, processName + " process failed to restart, will retry!!", LOG_TYPE_WARNING); - - sleep(2); - } - - return returnStatus; -} - -/****************************************************************************************** -* @brief reinitProcess -* -* purpose: Reinit a Process on the specified module -* -******************************************************************************************/ -int ProcessManager::reinitProcess(string moduleName, string processName) -{ - Oam oam; - - //skip if module is DISABLED - int opState; - bool degraded; - - try - { - oam.getModuleStatus(moduleName, opState, degraded); - } - catch (...) - {} - - //check if disabled - if (opState == oam::MAN_DISABLED || opState == oam::AUTO_DISABLED) - return API_SUCCESS; - - ByteStream msg; - ByteStream::byte requestID = PROCREINITPROCESS; - ByteStream::byte actionIndicator = FORCEFUL; - - msg = buildRequestMessage(requestID, actionIndicator, processName); - - int returnStatus = sendMsgProcMon( moduleName, msg, requestID, 0 ); - - if ( returnStatus == API_SUCCESS) - //log the event - log.writeLog(__LINE__, processName + " process is reinited by request.", LOG_TYPE_DEBUG); - else - //log the event - log.writeLog(__LINE__, processName + " process failed to reinit!!", LOG_TYPE_WARNING); - - return returnStatus; -} - -/****************************************************************************************** -* @brief setSystemState -* -* purpose: set System State and process required alarms -* -******************************************************************************************/ -void ProcessManager::setSystemState(uint16_t state) -{ - ProcessLog log; - Oam oam; - ALARMManager aManager; - Configuration config; - ProcessManager processManager(config, log); - - log.writeLog(__LINE__, "Set System State = " + oamState[state], LOG_TYPE_DEBUG); - - pthread_mutex_lock(&STATUS_LOCK); - - try - { - oam.setSystemStatus(state); - } - catch (exception& ex) - { - string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on MessageQueueClient: " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on MessageQueueClient: Caught unknown exception!", LOG_TYPE_ERROR); - } - - // Process Alarms - string system = "System"; - if( state == oam::ACTIVE ) { - //set query system states ready - processManager.setQuerySystemState(true); - - //clear alarms if set - aManager.sendAlarmReport(system.c_str(), SYSTEM_DOWN_AUTO, CLEAR); - aManager.sendAlarmReport(system.c_str(), SYSTEM_DOWN_MANUAL, CLEAR); - } - else - { - if ( state == oam::MAN_OFFLINE ) - aManager.sendAlarmReport(system.c_str(), SYSTEM_DOWN_MANUAL, SET); - else if ( state == oam::AUTO_OFFLINE ) - aManager.sendAlarmReport(system.c_str(), SYSTEM_DOWN_AUTO, SET); - - aManager.sendAlarmReport(system.c_str(), CONN_FAILURE, CLEAR); - } - - pthread_mutex_unlock(&STATUS_LOCK); - return; -} - -/****************************************************************************************** -* @brief setModuleState -* -* purpose: set Module State of a specific module -* -******************************************************************************************/ -void ProcessManager::setModuleState(string moduleName, uint16_t state) -{ - ProcessLog log; - Oam oam; - log.writeLog(__LINE__, "Set Module " + moduleName + " State = " + oam.itoa(state), LOG_TYPE_DEBUG); - - pthread_mutex_lock(&STATUS_LOCK); - - try - { - oam.setModuleStatus(moduleName, state); - } - catch (exception& ex) - { - string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on setModuleStatus: " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on setModuleStatus: Caught unknown exception!", LOG_TYPE_ERROR); - } - - pthread_mutex_unlock(&STATUS_LOCK); - return; -} - -/****************************************************************************************** -* @brief setExtdeviceState -* -* purpose: set Switch State of a specific switch -* -******************************************************************************************/ -void ProcessManager::setExtdeviceState(string extDeviceName, uint16_t state) -{ - ProcessLog log; - Oam oam; - log.writeLog(__LINE__, "Set Ext Device " + extDeviceName + " State = " + oam.itoa(state), LOG_TYPE_DEBUG); - - pthread_mutex_lock(&STATUS_LOCK); - - try - { - oam.setExtDeviceStatus(extDeviceName, state); - } - catch (exception& ex) - { - string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on setExtDeviceStatus: " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on setExtDeviceStatus: Caught unknown exception!", LOG_TYPE_ERROR); - } - - pthread_mutex_unlock(&STATUS_LOCK); - return; -} - -/****************************************************************************************** -* @brief setNICState -* -* purpose: set NIC State of a specific storage -* -******************************************************************************************/ -void ProcessManager::setNICState(string hostName, uint16_t state) -{ - ProcessLog log; - Oam oam; - log.writeLog(__LINE__, "Set NIC " + hostName + " State = " + oam.itoa(state), LOG_TYPE_DEBUG); - - pthread_mutex_lock(&STATUS_LOCK); - - try - { - oam.setNICStatus(hostName, state); - } - catch (exception& ex) - { - string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on setNICStatus: " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on setNICStatus: Caught unknown exception!", LOG_TYPE_ERROR); - } - - pthread_mutex_unlock(&STATUS_LOCK); - return; -} - - -/****************************************************************************************** -* @brief setProcessState -* -* purpose: set Process State of a specific Process -* -******************************************************************************************/ -int ProcessManager::setProcessState(string moduleName, string processName, uint16_t state, pid_t PID) -{ - ProcessLog log; - Oam oam; - log.writeLog(__LINE__, "StatusUpdate of Process " + processName + " State = " + oam.itoa(state), LOG_TYPE_DEBUG); - - try - { - oam.setProcessStatus(processName, moduleName, state, PID); - } - catch (exception& ex) - { - string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on setProcessStatus: " + error, LOG_TYPE_ERROR); - return oam::API_FAILURE; - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on setProcessStatus: Caught unknown exception!", LOG_TYPE_ERROR); - return oam::API_FAILURE; - } - - return oam::API_SUCCESS; -} - -/****************************************************************************************** -* @brief setProcessStates -* -* purpose: set all processes running on a module to requested state -* -******************************************************************************************/ -void ProcessManager::setProcessStates(std::string moduleName, uint16_t state, std::string processNameSkip ) -{ - ProcessLog log; - Oam oam; - log.writeLog(__LINE__, "Set All NON-MAN_OFFLINE Process for module " + moduleName + " = " + oam.itoa(state), LOG_TYPE_DEBUG); - - SystemProcessConfig systemprocessconfig; - vector::iterator itor; - - //PMwithUM config - string PMwithUM = "n"; - - try - { - oam.getSystemConfig( "PMwithUM", PMwithUM); - } - catch (...) - { - PMwithUM = "n"; - } - - string moduleType = moduleName.substr(0, MAX_MODULE_TYPE_SIZE); - - try - { - oam.getProcessConfig(systemprocessconfig); - } - catch (exception& ex) - { - string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessConfig: " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessConfig: Caught unknown exception!", LOG_TYPE_ERROR); - } - - string moduleTypeSet = moduleName.substr(0, MAX_MODULE_TYPE_SIZE); - - for (itor = systemprocessconfig.processconfig.begin(); - itor != systemprocessconfig.processconfig.end(); ++itor) - { - if ( (*itor).ModuleType == moduleType - || (*itor).ModuleType == "ChildExtOAMModule" - || ( (*itor).ModuleType == "ChildOAMModule" ) - || ((*itor).ModuleType == "ParentOAMModule") ) - { - if ( (*itor).ProcessName == processNameSkip ) - continue; - - ProcessStatus processstatus; - - try - { - oam.getProcessStatus((*itor).ProcessName, moduleName, processstatus); - } - catch (exception& ex) - { -// string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: Caught unknown exception!", LOG_TYPE_ERROR); - } - - if (processstatus.ProcessOpState != oam::MAN_OFFLINE) - { - setProcessState(moduleName, (*itor).ProcessName, state, 0); - - if ( (*itor).ProcessName == "ExeMgr" || state == oam::AUTO_OFFLINE ) - setProcessState(moduleName, "mysqld", state, 0); - } - } - else - { - //for for umwithpm apps, which is ExeMgr now - if ( moduleTypeSet == "pm" && PMwithUM == "y" ) - { - ProcessStatus processstatus; - - try - { - oam.getProcessStatus("ExeMgr", moduleName, processstatus); - } - catch (exception& ex) - { -// string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: Caught unknown exception!", LOG_TYPE_ERROR); - } - - if (processstatus.ProcessOpState != oam::MAN_OFFLINE) - { - setProcessState(moduleName, "ExeMgr", state, 0); - - if ( state == oam::AUTO_OFFLINE ) - setProcessState(moduleName, "mysqld", state, 0); - } - } - } - } -} - -/****************************************************************************************** -* @brief updateLog -* -* purpose: updatelog on a specific module -* -******************************************************************************************/ -int ProcessManager::updateLog(std::string action, std::string moduleName, std::string level) -{ - ByteStream msg; - ByteStream::byte requestID = PROCUPDATELOG; - - msg << requestID; - msg << action; - msg << level; - - int returnStatus = sendMsgProcMon( moduleName, msg, requestID, 30 ); - - if ( returnStatus == API_SUCCESS) - { - //log the success event - log.writeLog(__LINE__, moduleName + " updateLog by request.", LOG_TYPE_DEBUG); - } - else - { - //log the error event - log.writeLog(__LINE__, moduleName + " updateLog failed!!", LOG_TYPE_WARNING); - } - - return returnStatus; -} - -/****************************************************************************************** -* @brief getConfigLog -* -* purpose: get Log Configation on a specific module -* -******************************************************************************************/ -int ProcessManager::getConfigLog(std::string moduleName) -{ - ByteStream msg; - ByteStream::byte requestID = PROCGETCONFIGLOG; - - msg << requestID; - - int returnStatus = sendMsgProcMon( moduleName, msg, requestID, 30 ); - - return returnStatus; -} - -/****************************************************************************************** -* @brief updateConfig -* -* purpose: Send Msg to Process-Monitor to re-read updated Configation data -* -******************************************************************************************/ -int ProcessManager::updateConfig(std::string moduleName) -{ - ByteStream msg; - ByteStream::byte requestID = PROCUPDATECONFIG; - - msg << requestID; - - int returnStatus = sendMsgProcMon( moduleName, msg, requestID, 30 ); - - return returnStatus; -} - -/****************************************************************************************** -* @brief buildSystemTables -* -* purpose: Send a Message to 'pm1' to check and build System Table -* -******************************************************************************************/ -int ProcessManager::buildSystemTables(string target) -{ - ByteStream msg; - ByteStream::byte requestID = PROCBUILDSYSTEMTABLES; - - msg << requestID; - - int returnStatus = sendMsgProcMon( target, msg, requestID ); - - return returnStatus; -} - -/****************************************************************************************** -* @brief updateFstab -* -* purpose: send Fstab Update to a specific module -* -******************************************************************************************/ -int ProcessManager::updateFstab(std::string moduleName, std::string entry) -{ - ByteStream msg; - ByteStream::byte requestID = PROCFSTABUPDATE; - - msg << requestID; - msg << entry; - - int returnStatus = sendMsgProcMon( moduleName, msg, requestID, 30 ); - - return returnStatus; -} - - -/****************************************************************************************** -* @brief stopProcessType -* -* purpose: Stops a type of process within the system -* -******************************************************************************************/ -int ProcessManager::stopProcessType( std::string processName, bool manualFlag ) -{ - ProcessLog log; - Configuration config; - ProcessManager processManager(config, log); - Oam oam; - SystemProcessStatus systemprocessstatus; - ProcessStatus processstatus; - - log.writeLog(__LINE__, "stopProcessType: Stop all " + processName, LOG_TYPE_DEBUG); - - try - { - oam.getProcessStatus(systemprocessstatus); - - for ( unsigned int i = 0 ; i < systemprocessstatus.processstatus.size(); i++) - { - if ( systemprocessstatus.processstatus[i].ProcessName == processName) - { - //skip if in a COLD_STANDBY state -// if ( systemprocessstatus.processstatus[i].ProcessOpState == oam::COLD_STANDBY ) - if ( systemprocessstatus.processstatus[i].ProcessOpState != oam::ACTIVE ) - continue; - - // found one, request restart of it - processManager.stopProcess(systemprocessstatus.processstatus[i].Module, - processName, - GRACEFUL, - manualFlag, 0); -// log.writeLog(__LINE__, "stopProcessType: Start ACK received from Process-Monitor, return status = " + oam.itoa(retStatus), LOG_TYPE_DEBUG); - } - } - } - catch (exception& ex) - { -// string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: " + error, LOG_TYPE_ERROR); - return API_FAILURE; - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: Caught unknown exception!", LOG_TYPE_ERROR); - return API_FAILURE; - } - - return API_SUCCESS; - -} - -/****************************************************************************************** -* @brief startProcessType -* -* purpose: Starts a type of process within the system -* -******************************************************************************************/ -int ProcessManager::startProcessType( std::string processName ) -{ - ProcessLog log; - Configuration config; - ProcessManager processManager(config, log); - Oam oam; - SystemProcessStatus systemprocessstatus; - ProcessStatus processstatus; - - log.writeLog(__LINE__, "StartProcessType: Start all " + processName, LOG_TYPE_DEBUG); - - try - { - oam.getProcessStatus(systemprocessstatus); - - for ( unsigned int i = 0 ; i < systemprocessstatus.processstatus.size(); i++) - { - if ( systemprocessstatus.processstatus[i].ProcessName == processName) - { - // found one, request restart of it - int retStatus = processManager.startProcess(systemprocessstatus.processstatus[i].Module, - processName, - FORCEFUL); - log.writeLog(__LINE__, "StartProcessType: Start ACK received from Process-Monitor, return status = " + oam.itoa(retStatus), LOG_TYPE_DEBUG); - } - } - } - catch (exception& ex) - { -// string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: " + error, LOG_TYPE_ERROR); - return API_FAILURE; - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: Caught unknown exception!", LOG_TYPE_ERROR); - return API_FAILURE; - } - - return API_SUCCESS; -} - -/****************************************************************************************** -* @brief restartProcessType -* -* purpose: Restarts ACTIVE type of process within the system -* -******************************************************************************************/ -int ProcessManager::restartProcessType( std::string processName, std::string skipModule, bool manualFlag ) -{ - ProcessLog log; - Configuration config; - ProcessManager processManager(config, log); - Oam oam; - SystemProcessStatus systemprocessstatus; - ProcessStatus processstatus; - int retStatus = API_SUCCESS; - - log.writeLog(__LINE__, "restartProcessType: Restart all " + processName, LOG_TYPE_DEBUG); - - //PMwithUM config - string PMwithUM = "n"; - - try - { - oam.getSystemConfig( "PMwithUM", PMwithUM); - } - catch (...) - { - PMwithUM = "n"; - } - - // If mysqld is the processName, then send to modules were ExeMgr is running - try - { - oam.getProcessStatus(systemprocessstatus); - - for ( unsigned int i = 0 ; i < systemprocessstatus.processstatus.size(); i++) - { - //check for skipModule - if ( systemprocessstatus.processstatus[i].Module == skipModule ) - continue; - - if ( processName == "mysqld" ) - { - if ( systemprocessstatus.processstatus[i].ProcessName == "ExeMgr") - { - ProcessStatus procstat; - oam.getProcessStatus("mysqld", systemprocessstatus.processstatus[i].Module, procstat); - int state = procstat.ProcessOpState; - - if ( state == ACTIVE ) - { - retStatus = processManager.restartProcess(systemprocessstatus.processstatus[i].Module, - processName, - FORCEFUL, - true); - log.writeLog(__LINE__, "restartProcessType: Start ACK received from Process-Monitor, return status = " + oam.itoa(retStatus), LOG_TYPE_DEBUG); - } - } - } - else - { - if ( systemprocessstatus.processstatus[i].ProcessName == processName ) - { - //skip if in a BUSY_INIT state -// if ( systemprocessstatus.processstatus[i].ProcessOpState == oam::BUSY_INIT || -// systemprocessstatus.processstatus[i].ProcessOpState == oam::MAN_OFFLINE || -// systemprocessstatus.processstatus[i].ProcessOpState == oam::AUTO_OFFLINE || -// systemprocessstatus.processstatus[i].ProcessOpState == oam::AUTO_INIT || -// systemprocessstatus.processstatus[i].ProcessOpState == oam::MAN_INIT || -// ( systemprocessstatus.processstatus[i].ProcessOpState == oam::COLD_STANDBY && !manualFlag ) ) -// continue; - - if ( systemprocessstatus.processstatus[i].ProcessOpState != oam::ACTIVE ) - continue; - - if ( (processName.find("DDLProc") == 0 || processName.find("DMLProc") == 0) ) - { - string procModuleType = systemprocessstatus.processstatus[i].Module.substr(0, MAX_MODULE_TYPE_SIZE); - - if ( procModuleType == "pm" && PMwithUM == "y" ) - continue; - - try - { - oam.setSystemConfig("PrimaryUMModuleName", systemprocessstatus.processstatus[i].Module); - - processManager.setPMProcIPs(systemprocessstatus.processstatus[i].Module); - - //distribute config file - processManager.distributeConfigFile("system"); - sleep(1); - } - catch (...) {} - } - - // found one, request restart of it - retStatus = processManager.restartProcess(systemprocessstatus.processstatus[i].Module, - processName, - FORCEFUL, - true); - log.writeLog(__LINE__, "restartProcessType: Start ACK received from Process-Monitor, return status = " + oam.itoa(retStatus), LOG_TYPE_DEBUG); - - // if DDL or DMLProc, change IP Address - if ( retStatus == oam::API_SUCCESS ) - { -// sleep(5); - ProcessStatus procstat; - oam.getProcessStatus(processName, systemprocessstatus.processstatus[i].Module, procstat); - - if ( (processName.find("DDLProc") == 0 || processName.find("DMLProc") == 0) ) - { - processManager.setPMProcIPs(systemprocessstatus.processstatus[i].Module, processName); - break; - } - } - } - } - } - } - catch (exception& ex) - { -// string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: " + error, LOG_TYPE_ERROR); - return API_FAILURE; - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: Caught unknown exception!", LOG_TYPE_ERROR); - return API_FAILURE; - } - - return retStatus; -} - -/****************************************************************************************** -* @brief reinitProcessType -* -* purpose: Reinit ACTIVE type of process within the system -* -******************************************************************************************/ -int ProcessManager::reinitProcessType( std::string processName ) -{ - ProcessLog log; - Configuration config; - ProcessManager processManager(config, log); - Oam oam; - SystemProcessStatus systemprocessstatus; - ProcessStatus processstatus; - int retStatus = API_SUCCESS; - - log.writeLog(__LINE__, "reinitProcessType: ReInit all " + processName, LOG_TYPE_DEBUG); - - try - { - oam.getProcessStatus(systemprocessstatus); - - // re-init cpimport on all nodes - if ( processName == "cpimport" ) - { - for ( unsigned int i = 0 ; i < systemprocessstatus.processstatus.size(); i++) - { - if ( systemprocessstatus.processstatus[i].ProcessName == "ServerMonitor" ) - { - // found one, request reinit of it - log.writeLog(__LINE__, "reinitProcessType: cpimport" + systemprocessstatus.processstatus[i].Module, LOG_TYPE_DEBUG); - retStatus = processManager.reinitProcess(systemprocessstatus.processstatus[i].Module, - "cpimport"); - log.writeLog(__LINE__, "reinitProcessType: ACK received from Process-Monitor, return status = " + oam.itoa(retStatus), LOG_TYPE_DEBUG); - } - } - } - else - { - for ( unsigned int i = 0 ; i < systemprocessstatus.processstatus.size(); i++) - { - if ( systemprocessstatus.processstatus[i].ProcessName == processName && - systemprocessstatus.processstatus[i].ProcessOpState == oam::ACTIVE ) - { - // found one, request reinit of it - retStatus = processManager.reinitProcess(systemprocessstatus.processstatus[i].Module, - processName); - log.writeLog(__LINE__, "reinitProcessType: ACK received from Process-Monitor, return status = " + oam.itoa(retStatus), LOG_TYPE_DEBUG); - } - } - } - } - catch (exception& ex) - { -// string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: " + error, LOG_TYPE_ERROR); - return API_FAILURE; - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: Caught unknown exception!", LOG_TYPE_ERROR); - return API_FAILURE; - } - - return retStatus; -} - -/****************************************************************************************** -* @brief addModule -* -* purpose: Add Module to system configuration -* -******************************************************************************************/ -int ProcessManager::addModule(oam::DeviceNetworkList devicenetworklist, std::string password, bool storeHostnames, - bool manualFlag) -{ - ProcessLog log; - Configuration config; - ProcessManager processManager(config, log); - - SystemModuleTypeConfig systemmoduletypeconfig; - ModuleTypeConfig moduletypeconfig; - ModuleTypeConfig setmoduletypeconfig; - DeviceNetworkConfig devicenetworkconfig; - Oam oam; - string Section; - - pthread_mutex_lock(&THREAD_LOCK); - - int AddModuleCount = devicenetworklist.size(); - DeviceNetworkList::iterator listPT = devicenetworklist.begin(); - string moduleType = (*listPT).DeviceName.substr(0, MAX_MODULE_TYPE_SIZE); - - // - //Check hostname and IP Address for availibility - // - try - { - oam.getSystemConfig(systemmoduletypeconfig); - - for ( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++) - { - if ( systemmoduletypeconfig.moduletypeconfig[i].ModuleType.empty() ) - // end of list - break; - - int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount; - string moduletype = systemmoduletypeconfig.moduletypeconfig[i].ModuleType; - - if ( moduleCount > 0 ) - { - DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end() ; pt++) - { - HostConfigList::iterator pt1 = (*pt).hostConfigList.begin(); - - for ( ; pt1 != (*pt).hostConfigList.end() ; pt1++) - { - string hostname = (*pt1).HostName; - - if ( hostname == oam::UnassignedName ) - continue; - - string ipAddr = (*pt1).IPAddr; - - listPT = devicenetworklist.begin(); - - for ( ; listPT != devicenetworklist.end() ; listPT++) - { - HostConfigList::iterator pt1 = (*listPT).hostConfigList.begin(); - string newHostName = (*pt1).HostName; - string newIPAddr = (*pt1).IPAddr; - - if ( newIPAddr == ipAddr || newHostName == hostname ) - { - log.writeLog(__LINE__, "addModule - ERROR: hostName or IP address already in-use: " + newIPAddr + "/" + newHostName, LOG_TYPE_ERROR); - pthread_mutex_unlock(&THREAD_LOCK); - return API_INVALID_PARAMETER; - } - } - } - } - } - } - } - catch (exception& e) - { - log.writeLog(__LINE__, "addModule - ERROR: getSystemConfig", LOG_TYPE_ERROR); - pthread_mutex_unlock(&THREAD_LOCK); - return API_FAILURE; - } - - string calpontPackage; - - string systemID; - string packageType = "rpm"; - - try - { - oam.getSystemConfig("EEPackageType", packageType); - } - catch (...) - { - log.writeLog(__LINE__, "addModule - ERROR: get EEPackageType", LOG_TYPE_ERROR); - } - - // - // check for RPM package - // - - SystemSoftware systemsoftware; - - try - { - oam.getSystemSoftware(systemsoftware); - } - catch (exception& e) - { - log.writeLog(__LINE__, "addModule - ERROR: getSystemSoftware", LOG_TYPE_ERROR); - pthread_mutex_unlock(&THREAD_LOCK); - return API_FAILURE; - } - - string homedir = "/root"; - - if (!rootUser) - { - char* p = getenv("HOME"); - - if (p && *p) - homedir = p; - } - - //clear out the known_host file, sometimes causes a failure on amazon during addModule - if ( amazon ) - { - string cmd = "unlink " + homedir + ".ssh/know_hosts > /dev/null 2>&1"; - system(cmd.c_str()); - } - - if ( packageType == "rpm") - calpontPackage = homedir + "/mariadb-columnstore*" + columnstore_version + "-" + columnstore_release + "*.rpm"; - else if ( packageType == "deb") - calpontPackage = homedir + "/mariadb-columnstore*" + columnstore_version + "-" + columnstore_release + "*.deb"; - else - calpontPackage = homedir + "/mariadb-columnstore*" + columnstore_version + "-" + columnstore_release + "*.bin.tar.gz"; - - // - //Get System Configuration file - // - - try - { - oam.getSystemConfig(moduleType, moduletypeconfig); - } - catch (...) - { - log.writeLog(__LINE__, "addModule - ERROR: getSystemConfig", LOG_TYPE_ERROR); - pthread_mutex_unlock(&THREAD_LOCK); - return API_FAILURE; - } - - setmoduletypeconfig = moduletypeconfig; - - // update Module Type Count - int oldModuleCount = moduletypeconfig.ModuleCount; - int newModuleCount = oldModuleCount + AddModuleCount; - setmoduletypeconfig.ModuleCount = newModuleCount; - - //add new IP Addresses and Hostnames - listPT = devicenetworklist.begin(); - HostConfig hostconfig; - - for ( ; listPT != devicenetworklist.end() ; listPT++) - { - string moduleName = (*listPT).DeviceName; - devicenetworkconfig.DeviceName = (*listPT).DeviceName; - devicenetworkconfig.DisableState = oam::MANDISABLEDSTATE; - - HostConfigList::iterator pt1 = (*listPT).hostConfigList.begin(); - - for ( ; pt1 != (*listPT).hostConfigList.end() ; pt1++) - { - string hostName = (*pt1).HostName; - string IPAddr = (*pt1).IPAddr; - - //if cloud and unassigned, launch a new Instance - if ( ( cloud == "amazon-ec2" && hostName == oam::UnassignedName ) || - ( cloud == "amazon-vpc" && hostName == oam::UnassignedName ) ) - { - string UMinstanceType; - string UMSecurityGroup; - - if ( moduleType == "um") - { - try - { - oam.getSystemConfig("UMInstanceType", UMinstanceType); - oam.getSystemConfig("UMSecurityGroup", UMSecurityGroup); - } - catch (...) {} - } - - log.writeLog(__LINE__, "addModule - Launching a new Instance for: " + moduleName, LOG_TYPE_DEBUG); - - if ( moduleType == "um" ) - hostName = oam.launchEC2Instance(moduleName, IPAddr, UMinstanceType, UMSecurityGroup); - else - hostName = oam.launchEC2Instance(moduleName, IPAddr); - - if ( hostName == "failed" ) - { - log.writeLog(__LINE__, "addModule - Launch New Instance Failure", LOG_TYPE_ERROR); - pthread_mutex_unlock(&THREAD_LOCK); - return API_FAILURE; - } - - // add instance tag - string systemName; - string AmazonAutoTagging; - { - try - { - oam.getSystemConfig("SystemName", systemName); - oam.getSystemConfig("AmazonAutoTagging", AmazonAutoTagging); - } - catch (...) {} - } - - if ( AmazonAutoTagging == "y" ) - { - string tagValue = systemName + "-" + moduleName; - oam.createEC2tag( hostName, "Name", tagValue ); - } - - //wait until login is success until continuing or fail if can't login - log.writeLog(__LINE__, "addModule - Successfully Launch of new Instance, perform login test: " + moduleName, LOG_TYPE_DEBUG); - int retry = 0; - - for ( ; retry < 30 ; retry++) - { - IPAddr = oam.getEC2InstanceIpAddress(hostName); - - if (IPAddr == "terminated") - { - log.writeLog(__LINE__, "addModule - Failed to log in to Instance, it was terminated: " + hostName, LOG_TYPE_ERROR); - pthread_mutex_unlock(&THREAD_LOCK); - return API_FAILURE; - } - - if (IPAddr == "stopped") - { - sleep(5); - continue; - } - - string loginTmp = tmpLogDir + "/login_test.log"; - string cmd = "remote_command.sh " + IPAddr + " " + password + " 'ls' 1 > " + loginTmp; - system(cmd.c_str()); - - if (!oam.checkLogStatus(loginTmp, "README")) { - //check for RSA KEY ISSUE and fix - if (oam.checkLogStatus(loginTmp, "Host key verification failed")) - { - log.writeLog(__LINE__, "addModule - login failed, Host key verification failed, try fixing: " + moduleName, LOG_TYPE_DEBUG); - cmd = "rm -f " + homedir + "/.ssh/known_hosts"; - system(cmd.c_str()); - } - - log.writeLog(__LINE__, "addModule - login failed, retry login test: " + moduleName, LOG_TYPE_DEBUG); - sleep(5); - continue; - } - - // logged in - break; - } - - if ( retry >= 30 ) - { - log.writeLog(__LINE__, "addModule - Failed to log in to Instance: " + hostName, LOG_TYPE_ERROR); - pthread_mutex_unlock(&THREAD_LOCK); - return API_FAILURE; - } - - log.writeLog(__LINE__, "addModule - Successful loggin: " + hostName, LOG_TYPE_DEBUG); - - log.writeLog(__LINE__, "addModule - Launched new Instance: " + hostName + "/" + IPAddr, LOG_TYPE_DEBUG); - - (*pt1).HostName = hostName; - (*pt1).IPAddr = IPAddr; - - //check if any volumes need to be attached - if ( moduleType == "um" ) - { - string UMStorageType = "internal"; - { - try - { - oam.getSystemConfig("UMStorageType", UMStorageType); - } - catch (...) {} - } - - if ( UMStorageType == "external" ) - { - //check if volume already assigned or need to create a new one - int moduleID = atoi((*listPT).DeviceName.substr(MAX_MODULE_TYPE_SIZE, MAX_MODULE_ID_SIZE).c_str()); - - string volumeNameID = "UMVolumeName" + oam.itoa(moduleID); - string volumeName = oam::UnassignedName; - string deviceNameID = "UMVolumeDeviceName" + oam.itoa(moduleID); - string deviceName = oam::UnassignedName; - - try - { - oam.getSystemConfig( volumeNameID, volumeName); - oam.getSystemConfig( deviceNameID, deviceName); - } - catch (...) - {} - - if ( volumeName.empty() || volumeName == oam::UnassignedName ) - { - // need to create a new one - string device; - - try - { - - oam.addUMdisk(moduleID, volumeName, device); - } - catch (...) - { - log.writeLog(__LINE__, "addModule: volume create failed for um: " + moduleName, LOG_TYPE_CRITICAL); - pthread_mutex_unlock(&THREAD_LOCK); - } - - //attach to UM - log.writeLog(__LINE__, "addModule - attach new Volume to " + moduleName, LOG_TYPE_DEBUG); - - if (!oam.attachEC2Volume(volumeName, device, hostName)) - { - log.writeLog(__LINE__, "addModule: volume failed to attach to um: " + moduleName, LOG_TYPE_CRITICAL); - pthread_mutex_unlock(&THREAD_LOCK); - } - - try - { - Config* sysConfig = Config::makeConfig(); - - sysConfig->setConfig("Installation", volumeNameID, volumeName); - sysConfig->setConfig("Installation", deviceNameID, device); - - sysConfig->write(); - } - catch (...) - {} - - log.writeLog(__LINE__, "addModule - create/attach new volume: " + volumeName + "/" + device, LOG_TYPE_DEBUG); - - } - else - { - // one exist, detach and reattach it - - oam.detachEC2Volume( volumeName ); - - if (!oam.attachEC2Volume(volumeName, deviceName, hostName)) - { - log.writeLog(__LINE__, "addModule: volume failed to attached: " + volumeName, LOG_TYPE_CRITICAL); - pthread_mutex_unlock(&THREAD_LOCK); - return API_FAILURE; - } - - log.writeLog(__LINE__, "addModule - attach existing volume: " + volumeName + "/" + deviceName, LOG_TYPE_DEBUG); - } - } - } - } - - hostconfig.HostName = hostName; - if (storeHostnames) - hostconfig.IPAddr = hostName; - else - hostconfig.IPAddr = IPAddr; - hostconfig.NicID = (*pt1).NicID; - devicenetworkconfig.hostConfigList.push_back(hostconfig); - } - - setmoduletypeconfig.ModuleNetworkList.push_back(devicenetworkconfig); - } - - Config* sysConfig = Config::makeConfig(); - - //Add additional Process Ports - // all nodes: ProcessMonitor, ServerMonitor - // dm: NONE - // um: ExeMgr - // pm: NONE - - listPT = devicenetworklist.begin(); - - for ( ; listPT != devicenetworklist.end() ; listPT++) - { - Section = (*listPT).DeviceName + "_ProcessMonitor"; - - HostConfigList::iterator pt1 = (*listPT).hostConfigList.begin(); - sysConfig->setConfig(Section, "IPAddr", (*pt1).IPAddr); - sysConfig->setConfig(Section, "Port", "8800"); - - Section = (*listPT).DeviceName + "_ServerMonitor"; - sysConfig->setConfig(Section, "IPAddr", (*pt1).IPAddr); - sysConfig->setConfig(Section, "Port", "8622"); - } - - if ( moduleType == "um" || - ( moduleType == "pm" && config.ServerInstallType() == oam::INSTALL_COMBINE_DM_UM_PM ) || - ( moduleType == "pm" && PMwithUM == "y") ) - { - - listPT = devicenetworklist.begin(); - - for ( ; listPT != devicenetworklist.end() ; listPT++) - { - int moduleID = atoi((*listPT).DeviceName.substr(MAX_MODULE_TYPE_SIZE, MAX_MODULE_ID_SIZE).c_str()); - int exemgrID = moduleID; - - if ( PMwithUM == "y" ) - { - // then go check for next available ID - exemgrID = 0; - - for ( int id = 2 ; ; id++ ) - { - string Section = "ExeMgr" + oam.itoa(id); - string moduleName; - - try - { - Config* sysConfig = Config::makeConfig(); - moduleName = sysConfig->getConfig(Section, "Module"); - } - catch (...) {} - - if ( moduleName.empty() ) - { - exemgrID = id; - break; - } - } - } - - Section = "ExeMgr" + oam.itoa(exemgrID); - HostConfigList::iterator pt1 = (*listPT).hostConfigList.begin(); - sysConfig->setConfig(Section, "IPAddr", (*pt1).IPAddr); - sysConfig->setConfig(Section, "Port", "8601"); - sysConfig->setConfig(Section, "Module", (*listPT).DeviceName); - } - } - - if ( moduleType == "pm" ) - { - listPT = devicenetworklist.begin(); - - for ( ; listPT != devicenetworklist.end() ; listPT++) - { - Section = (*listPT).DeviceName + "_WriteEngineServer"; - - HostConfigList::iterator pt1 = (*listPT).hostConfigList.begin(); - sysConfig->setConfig(Section, "IPAddr", (*pt1).IPAddr); - sysConfig->setConfig(Section, "Port", "8630"); - } - } - - log.writeLog(__LINE__, "addModule - Updated Process Ports", LOG_TYPE_DEBUG); - - string parentOAMModuleHostName; - string parentOAMModuleIPAddr; - - //setup dbroot entries - if (moduleType == "pm" && manualFlag) - { - const string MODULE_DBROOTID = "ModuleDBRootID"; - const string MODULE_DBROOT_COUNT = "ModuleDBRootCount"; - - listPT = devicenetworklist.begin(); - - for ( ; listPT != devicenetworklist.end() ; listPT++) - { - string moduleID = (*listPT).DeviceName.substr(MAX_MODULE_TYPE_SIZE, MAX_MODULE_ID_SIZE); - - string ModuleDBRootCount = MODULE_DBROOT_COUNT + moduleID + "-3"; - sysConfig->setConfig("SystemModuleConfig", ModuleDBRootCount, "0"); - - string ModuleDBrootID = MODULE_DBROOTID + moduleID + "-1-3"; - sysConfig->setConfig("SystemModuleConfig", ModuleDBrootID, oam::UnassignedName); - } - } - - //update Calpont Config table - try - { - sysConfig->write(); - } - catch (...) - { - log.writeLog(__LINE__, "addModule - ERROR: sysConfig->write", LOG_TYPE_ERROR); - pthread_mutex_unlock(&THREAD_LOCK); - return API_FAILURE; - } - - //write Columnstore.xml Module section - try - { - oam.setSystemConfig(moduleType, setmoduletypeconfig); - log.writeLog(__LINE__, "addModule - Updated Module Section of Config file", LOG_TYPE_DEBUG); - } - catch (...) - { - log.writeLog(__LINE__, "addModule - ERROR: setSystemConfig", LOG_TYPE_ERROR); - pthread_mutex_unlock(&THREAD_LOCK); - return API_FAILURE; - } - - pthread_mutex_unlock(&THREAD_LOCK); - - //check if any added modules are Active OAM - bool activeOAM = false; - listPT = devicenetworklist.begin(); - - for ( ; listPT != devicenetworklist.end() ; listPT++) - { - if ( (*listPT).DeviceName == config.OAMParentName() ) - { - activeOAM = true; - break; - } - } - - // - //send message to Process Monitor to add module/processes to shared memory - // - if ( !activeOAM ) - { - try - { - ByteStream obs; - - obs << (ByteStream::byte) ADD_MODULE; - obs << (ByteStream::byte) AddModuleCount; - - listPT = devicenetworklist.begin(); - - for ( ; listPT != devicenetworklist.end() ; listPT++) - { - obs << (*listPT).DeviceName; - } - - //pass NIC Hostnames - vector nicHostNames; - - listPT = devicenetworklist.begin(); - HostConfig hostconfig; - - for ( ; listPT != devicenetworklist.end() ; listPT++) - { - HostConfigList::iterator pt1 = (*listPT).hostConfigList.begin(); - - for ( ; pt1 != (*listPT).hostConfigList.end() ; pt1++) - { - nicHostNames.push_back((*pt1).HostName); - } - } - - obs << (ByteStream::byte) nicHostNames.size(); - - vector::iterator pt2 = nicHostNames.begin(); - - for ( ; pt2 != nicHostNames.end() ; pt2++) - { - obs << *pt2; - } - - sendStatusUpdate(obs, ADD_MODULE); - log.writeLog(__LINE__, "addModule - Updated Shared Memory", LOG_TYPE_DEBUG); - } - catch (...) - { - log.writeLog(__LINE__, "addModule - ERROR: sendStatusUpdate error", LOG_TYPE_ERROR); - return API_FAILURE; - } - } - - //distribute config file - distributeConfigFile("system"); - - string cmd = "rm -f " + homedir + "/.ssh/known_hosts > /dev/null 2>&1"; - system(cmd.c_str()); - - listPT = devicenetworklist.begin(); - - //distribute config file - distributeConfigFile("system"); - distributeConfigFile("system", "ProcessConfig.xml"); - - for ( ; listPT != devicenetworklist.end() ; listPT++) - { - string remoteModuleName = (*listPT).DeviceName; - string remoteModuleType = remoteModuleName.substr(0, MAX_MODULE_TYPE_SIZE); - HostConfigList::iterator pt1 = (*listPT).hostConfigList.begin(); - string remoteModuleIP = (*pt1).IPAddr; - string remoteHostName = (*pt1).HostName; - - string dir = "/var/lib/columnstore/local/etc" + remoteModuleName; - - cmd = "mkdir " + dir + " > /dev/null 2>&1"; - system(cmd.c_str()); - - if ( remoteModuleType == "um" ) - { - cmd = "cp /var/lib/columnstore/local/etc/um1/* " + dir + "/."; - system(cmd.c_str()); - } - else if ( remoteModuleType == "pm" ) - { - cmd = "cp /var/lib/columnstore/local/etc/pm1/* " + dir + "/."; - system(cmd.c_str()); - } - - log.writeLog(__LINE__, "addModule - created directory and custom OS files for " + remoteModuleName, LOG_TYPE_DEBUG); - - //create module file - if ( !createModuleFile(remoteModuleName) ) - { - log.writeLog(__LINE__, "addModule - ERROR: createModuleFile failed", LOG_TYPE_ERROR); - pthread_mutex_unlock(&THREAD_LOCK); - return API_FAILURE; - } - - log.writeLog(__LINE__, "addModule - create module file for " + remoteModuleName, LOG_TYPE_DEBUG); - - if ( remoteModuleType == "pm" ) - { - //setup Standby OAM Parent, if needed - if ( config.OAMStandbyName() == oam::UnassignedName ) - setStandbyModule(remoteModuleName, false); - } - - string logFile = tmpLogDir + "/" + remoteModuleName + "_mcs_module_installer.log"; - log.writeLog(__LINE__, "addModule - mcs_module_installer run for " + remoteModuleName, LOG_TYPE_DEBUG); - cmd = "mcs_module_installer.sh " + remoteModuleName + " " + remoteModuleIP + " " + password + " 1 >" + logFile; - log.writeLog(__LINE__, "addModule cmd: " + cmd, LOG_TYPE_DEBUG); - int rtnCode = system(cmd.c_str()); - - if (WEXITSTATUS(rtnCode) != 0) - { - log.writeLog(__LINE__, "addModule - ERROR: " + logFile + " failed, retry", LOG_TYPE_DEBUG); - - DeviceNetworkList devicenetworklistR; - DeviceNetworkConfig devicenetworkconfigR; - HostConfig hostconfig; - - devicenetworkconfigR.DeviceName = remoteModuleName; - - hostconfig.IPAddr = oam::UnassignedName; - - hostconfig.HostName = oam::UnassignedName; - hostconfig.NicID = 1; - devicenetworkconfigR.hostConfigList.push_back(hostconfig); - - devicenetworklistR.push_back(devicenetworkconfigR); - - processManager.removeModule(devicenetworklistR, false); - - log.writeLog(__LINE__, "addModule - Remove Module Completed", LOG_TYPE_DEBUG); - - pthread_mutex_unlock(&THREAD_LOCK); - cmd = "/bin/cp -f " + logFile + " " + logFile + "failed"; - system(cmd.c_str()); - processManager.setModuleState(remoteModuleName, oam::FAILED); - return API_FAILURE; - } - if (manualFlag) - //set new module to disable state if manual add - disableModule(remoteModuleName, true); - - // add to monitor list - moduleInfoList.insert(moduleList::value_type(remoteModuleName, 0)); - - processManager.configureModule(remoteModuleName); - } - - //delay to give time for ProcMon to start after the config is sent and procmon restarts - log.writeLog(__LINE__, "addModule - sleep 60 - give ProcMon time to CONFIGURE and restart", LOG_TYPE_DEBUG); - sleep(60); - - //start mysqld on the new modules so mysql replication can be setup - listPT = devicenetworklist.begin(); - - for ( ; listPT != devicenetworklist.end() ; listPT++) - { - processManager.startProcess((*listPT).DeviceName, "mysqld", oam::STATUS_UPDATE); - } - - log.writeLog(__LINE__, "Setup MySQL Replication for new Modules being Added", LOG_TYPE_DEBUG); - processManager.setMySQLReplication(devicenetworklist, oam::UnassignedName, true, password, true, true ); - - //stop mysqld - listPT = devicenetworklist.begin(); - - for ( ; listPT != devicenetworklist.end() ; listPT++) - { - processManager.stopProcess((*listPT).DeviceName, "mysqld", oam::FORCEFUL, true ); - } - - return API_SUCCESS; -} - -/****************************************************************************************** -* @brief removeModule -* -* purpose: Remove Module to system configuration -* -******************************************************************************************/ -int ProcessManager::removeModule(oam::DeviceNetworkList devicenetworklist, bool manualFlag) -{ - ProcessLog log; - Configuration config; - ProcessManager processManager(config, log); - - ModuleTypeConfig moduletypeconfig; - ModuleTypeConfig setmoduletypeconfig; - Oam oam; - string Section; - - pthread_mutex_lock(&THREAD_LOCK); - - //get module count being removed - int RemoveModuleCount = devicenetworklist.size(); - DeviceNetworkList::iterator listPT = devicenetworklist.begin(); - - // - //Get System Configuration - // - listPT = devicenetworklist.begin(); - string moduleType = (*listPT).DeviceName.substr(0, MAX_MODULE_TYPE_SIZE); - - try - { - oam.getSystemConfig(moduleType, moduletypeconfig); - } - catch (...) - { - log.writeLog(__LINE__, "removeModule - ERROR: getSystemConfig", LOG_TYPE_ERROR); - pthread_mutex_unlock(&THREAD_LOCK); - return API_FAILURE; - } - - setmoduletypeconfig = moduletypeconfig; - - // get current Module Type Count and validate request - int oldModuleCount = moduletypeconfig.ModuleCount; - - if ( oldModuleCount < RemoveModuleCount ) - { - log.writeLog(__LINE__, "removeModule - ERROR: remove count is larger than ModuleType count", LOG_TYPE_ERROR); - pthread_mutex_unlock(&THREAD_LOCK); - return API_INVALID_PARAMETER; - } - - //validate the module list to be removed - listPT = devicenetworklist.begin(); - - for ( ; listPT != devicenetworklist.end() ; listPT++) - { - int returnStatus = oam.validateModule((*listPT).DeviceName); - - if (returnStatus != API_SUCCESS) - { - log.writeLog(__LINE__, "removeModule - ERROR: invalid module: " + (*listPT).DeviceName, LOG_TYPE_ERROR); - pthread_mutex_unlock(&THREAD_LOCK); - return API_INVALID_PARAMETER; - } - } - - if (manualFlag) - { - //stopModules being removed with the REMOVE option, which will stop process - listPT = devicenetworklist.begin(); - - for ( ; listPT != devicenetworklist.end() ; listPT++) - { - string moduleName = (*listPT).DeviceName; - log.writeLog(__LINE__, "removeModule - stopping module: " + moduleName, LOG_TYPE_DEBUG); - - //don't allow remove of Active PM Module - if ( moduleName == config.OAMParentName() ) - { - log.writeLog(__LINE__, "removeModule - ERROR: can't remove current module (Active Parent OAM) ", LOG_TYPE_ERROR); - pthread_mutex_unlock(&THREAD_LOCK); - return API_INVALID_PARAMETER; - } - - int status; - status = stopModule(moduleName, REMOVE, true); - - if (status == API_SUCCESS) - { - log.writeLog(__LINE__, "removeModule - stopModule Successfully " + moduleName, LOG_TYPE_DEBUG); - //check for SIMPLEX Processes on mate might need to be started - pthread_mutex_unlock(&THREAD_LOCK); - checkSimplexModule(moduleName); - pthread_mutex_lock(&THREAD_LOCK); - } - else - log.writeLog(__LINE__, "removeModule - stopModule " + moduleName, LOG_TYPE_ERROR); - } - } - - int newModuleCount = oldModuleCount - RemoveModuleCount; - setmoduletypeconfig.ModuleCount = newModuleCount; - - string systemName; - string AmazonAutoTagging; - { - try - { - oam.getSystemConfig("SystemName", systemName); - oam.getSystemConfig("AmazonAutoTagging", AmazonAutoTagging); - } - catch (...) {} - } - - //Clear out Module IP and Hostnames - listPT = devicenetworklist.begin(); - - for ( ; listPT != devicenetworklist.end() ; listPT++) - { - string moduleName = (*listPT).DeviceName; - log.writeLog(__LINE__, "removeModule - removing module: " + moduleName, LOG_TYPE_DEBUG); - - //don't allow remove of Active PM Module - if ( moduleName == config.OAMParentName() ) - { - log.writeLog(__LINE__, "removeModule - ERROR: can't remove current module (Active Parent OAM) ", LOG_TYPE_ERROR); - pthread_mutex_unlock(&THREAD_LOCK); - return API_INVALID_PARAMETER; - } - - DeviceNetworkList::iterator pt = setmoduletypeconfig.ModuleNetworkList.begin(); - - for ( ; pt != setmoduletypeconfig.ModuleNetworkList.end() ; pt++) - { - if ( moduleName == (*pt).DeviceName ) - { - HostConfigList::iterator pt1 = (*pt).hostConfigList.begin(); - - for ( ; pt1 != (*pt).hostConfigList.end() ; pt1++ ) - { - //if cloud, delete instance - if (amazon) - { - log.writeLog(__LINE__, "removeModule - terminate instance: " + (*pt1).HostName, LOG_TYPE_DEBUG); - oam.terminateEC2Instance( (*pt1).HostName ); - - // update instance tag - if ( AmazonAutoTagging == "y" ) - { - string tagValue = systemName + "-" + moduleName + "-terminated"; - oam.createEC2tag( (*pt1).HostName, "Name", tagValue ); - } - - //check if any volumes need to be deleted - if ( moduleType == "um" ) - { - string UMStorageType = "internal"; - { - try - { - oam.getSystemConfig("UMStorageType", UMStorageType); - } - catch (...) {} - } - - if ( UMStorageType == "external" ) - { - //check if volume already assigned or need to create a new one - int moduleID = atoi(moduleName.substr(MAX_MODULE_TYPE_SIZE, MAX_MODULE_ID_SIZE).c_str()); - - string volumeNameID = "UMVolumeName" + oam.itoa(moduleID); - string volumeName = oam::UnassignedName; - string deviceNameID = "UMVolumeDeviceName" + oam.itoa(moduleID); - string deviceName = oam::UnassignedName; - - try - { - oam.getSystemConfig( volumeNameID, volumeName); - oam.getSystemConfig( deviceNameID, deviceName); - } - catch (...) - {} - - if ( !volumeName.empty() || volumeName != oam::UnassignedName ) - { - log.writeLog(__LINE__, "removeModule - detach / remove volume: " + volumeName + "/" + deviceName, LOG_TYPE_DEBUG); - oam.detachEC2Volume( volumeName ); - - oam.deleteEC2Volume( volumeName ); - - try - { - Config* sysConfig = Config::makeConfig(); - - sysConfig->setConfig("Installation", volumeNameID, oam::UnassignedName); - sysConfig->setConfig("Installation", deviceNameID, oam::UnassignedName); - - sysConfig->write(); - } - catch (...) - {} - } - } - } - } - - clearNICAlarms((*pt1).HostName); - (*pt1).IPAddr = oam::UnassignedIpAddr; - (*pt1).HostName = oam::UnassignedName; - } - - break; - } - } - } - - //Remove Process Ports - // all nodes: ProcessMonitor, ServerMonitor - // dm: NONE - // um: ExeMgr - // pm: NONE - - Config* sysConfig = Config::makeConfig(); - - listPT = devicenetworklist.begin(); - - for ( ; listPT != devicenetworklist.end() ; listPT++) - { - Section = (*listPT).DeviceName + "_ProcessMonitor"; - sysConfig->setConfig(Section, "IPAddr", oam::UnassignedName); - - Section = (*listPT).DeviceName + "_ServerMonitor"; - sysConfig->setConfig(Section, "IPAddr", oam::UnassignedName); - } - - if ( moduleType == "um" || - ( moduleType == "pm" && config.ServerInstallType() == oam::INSTALL_COMBINE_DM_UM_PM ) || - ( moduleType == "um" && config.ServerInstallType() == oam::INSTALL_COMBINE_DM_UM ) || - ( moduleType == "pm" && config.ServerInstallType() == oam::INSTALL_COMBINE_PM_UM ) || - ( moduleType == "pm" && PMwithUM == "y" ) ) - { - - listPT = devicenetworklist.begin(); - - for ( ; listPT != devicenetworklist.end() ; listPT++) - { - // go find ExeMgr ID by moduleName - for ( int id = 1 ; ; id++ ) - { - string Section = "ExeMgr" + oam.itoa(id); - string moduleName; - - try - { - Config* sysConfig = Config::makeConfig(); - moduleName = sysConfig->getConfig(Section, "Module"); - - if ( moduleName == (*listPT).DeviceName ) - { - // match - sysConfig->setConfig(Section, "IPAddr", oam::UnassignedName); - sysConfig->setConfig(Section, "Module", oam::UnassignedName); - - break; - } - } - catch (...) {} - - if ( moduleName.empty() ) - break; - } - } - } - - log.writeLog(__LINE__, "removeModule - Updated Process Ports", LOG_TYPE_DEBUG); - - //unassign dbroot entries - if (moduleType == "pm") - { - const string MODULE_DBROOTID = "ModuleDBRootID"; - const string MODULE_DBROOT_COUNT = "ModuleDBRootCount"; - - listPT = devicenetworklist.begin(); - - for ( ; listPT != devicenetworklist.end() ; listPT++) - { - string moduleID = (*listPT).DeviceName.substr(MAX_MODULE_TYPE_SIZE, MAX_MODULE_ID_SIZE); - - string ModuleDBRootCount = MODULE_DBROOT_COUNT + moduleID + "-3"; - sysConfig->setConfig("SystemModuleConfig", ModuleDBRootCount, oam::UnassignedName); - - string ModuleDBrootID = MODULE_DBROOTID + moduleID + "-1-3"; - sysConfig->setConfig("SystemModuleConfig", ModuleDBrootID, oam::UnassignedName); - } - } - - log.writeLog(__LINE__, "removeModule - Updated DBRoot paramaters", LOG_TYPE_DEBUG); - - //update Calpont Config table - try - { - sysConfig->write(); - } - catch (...) - { - log.writeLog(__LINE__, "removeModule - ERROR: sysConfig->write", LOG_TYPE_ERROR); - pthread_mutex_unlock(&THREAD_LOCK); - return API_FAILURE; - } - - //write Columnstore.xml Module section - try - { - oam.setSystemConfig(moduleType, setmoduletypeconfig); - log.writeLog(__LINE__, "removeModule - Updated Module Section of Config file", LOG_TYPE_DEBUG); - } - catch (...) - { - log.writeLog(__LINE__, "removeModule - ERROR: setSystemConfig", LOG_TYPE_ERROR); - pthread_mutex_unlock(&THREAD_LOCK); - return API_FAILURE; - } - - //clear out the known_host file, sometimes causes a failure on amazon during addModule - if ( amazon ) - { - string homedir = "/root"; - - if (!rootUser) - { - char* p = getenv("HOME"); - - if (p && *p) - homedir = p; - } - - string cmd = "unlink " + homedir + ".ssh/know_hosts > /dev/null 2>&1"; - system(cmd.c_str()); - } - - pthread_mutex_unlock(&THREAD_LOCK); - - //check if any removed modules was Standby OAM or Active OAM - bool activeOAM = false; - listPT = devicenetworklist.begin(); - - for ( ; listPT != devicenetworklist.end() ; listPT++) - { - if ( (*listPT).DeviceName == config.OAMStandbyName() ) - clearStandbyModule(); - else if ( (*listPT).DeviceName == config.OAMParentName() ) - activeOAM = true; - } - - // - //send message to Process Monitor to remove module/processes to shared memory - // - if ( !activeOAM ) - { - try - { - ByteStream obs; - - obs << (ByteStream::byte) REMOVE_MODULE; - obs << (ByteStream::byte) RemoveModuleCount; - - listPT = devicenetworklist.begin(); - - for ( ; listPT != devicenetworklist.end() ; listPT++) - { - obs << (*listPT).DeviceName; - } - - sendStatusUpdate(obs, REMOVE_MODULE); - log.writeLog(__LINE__, "removeModule - Updated Shared Memory", LOG_TYPE_DEBUG); - } - catch (...) - { - log.writeLog(__LINE__, "removeModule - ERROR: sendStatusUpdate error", LOG_TYPE_ERROR); - return API_FAILURE; - } - } - - if ( moduleType == "pm" ) - { - if ( updatePMSconfig() != API_SUCCESS ) - return API_FAILURE; - } - - //Update DBRM section of Columnstore.xml - if ( updateWorkerNodeconfig() != API_SUCCESS ) - return API_FAILURE; - - // remove all associated alarms for this modules being removed - listPT = devicenetworklist.begin(); - - for ( ; listPT != devicenetworklist.end() ; listPT++) - { - clearModuleAlarms( (*listPT).DeviceName ); - log.writeLog(__LINE__, "removeModule - successfully removed module: " + (*listPT).DeviceName, LOG_TYPE_DEBUG); - } - - //distribute config file - distributeConfigFile("system"); - - string password; - // check if there is a root password stored - string rpw = oam::UnassignedName; - - try - { - oam.getSystemConfig("rpw", password); - } - catch (...) - { - rpw = "root"; - } - - return API_SUCCESS; -} - -/****************************************************************************************** -* @brief reconfigureModule -* -* purpose: Reconfigure Module in system configuration -* -******************************************************************************************/ -int ProcessManager::reconfigureModule(oam::DeviceNetworkList devicenetworklist) -{ - ModuleTypeConfig reconfiguremoduletypeconfig; - ModuleTypeConfig setreconfiguremoduletypeconfig; - ModuleTypeConfig moduletypeconfig; - DeviceNetworkConfig devicenetworkconfig; - Oam oam; - string Section; - - pthread_mutex_lock(&THREAD_LOCK); - - DeviceNetworkList::iterator listPT = devicenetworklist.begin(); - - //get module name being reconfigured - string moduleName = (*listPT).DeviceName; - string moduleType = moduleName.substr(0, MAX_MODULE_TYPE_SIZE); - - //get module type being configured as - listPT++; - string reconfigureModuleName = (*listPT).DeviceName; - string reconfigureModuleType = reconfigureModuleName.substr(0, MAX_MODULE_TYPE_SIZE); - string reconfigureHostName2; - string reconfigureIpAddr2; - int reconfigureNicId2 = 0; - - if ( !(*listPT).hostConfigList.empty()) - { - HostConfigList::iterator pt1 = (*listPT).hostConfigList.begin(); - reconfigureHostName2 = (*pt1).HostName; - reconfigureIpAddr2 = (*pt1).IPAddr; - reconfigureNicId2 = (*pt1).NicID; - } - - int status = stopModule(moduleName, GRACEFUL, true); - - if (status == API_SUCCESS) - { - log.writeLog(__LINE__, "reconfigureModule - stopModule Successfully " + moduleName, LOG_TYPE_DEBUG); - //check for SIMPLEX Processes on mate might need to be started - pthread_mutex_unlock(&THREAD_LOCK); - checkSimplexModule(moduleName); - pthread_mutex_lock(&THREAD_LOCK); - } - else - log.writeLog(__LINE__, "reconfigureModule - stopModule " + moduleName, LOG_TYPE_ERROR); - - // - //Get Module Configuration - // - - try - { - oam.getSystemConfig(moduleType, moduletypeconfig); - oam.getSystemConfig(reconfigureModuleType, reconfiguremoduletypeconfig); - } - catch (...) - { - log.writeLog(__LINE__, "reconfigureModule - ERROR: getSystemConfig", LOG_TYPE_ERROR); - pthread_mutex_unlock(&THREAD_LOCK); - return API_FAILURE; - } - - setreconfiguremoduletypeconfig = reconfiguremoduletypeconfig; - - // update Module Type Counts - setreconfiguremoduletypeconfig.ModuleCount++; - - Config* sysConfig = Config::makeConfig(); - - //Move Module IP and Hostnames - string IPaddress = oam::UnassignedIpAddr; - HostConfig hostconfig; - DeviceNetworkList::iterator pt = moduletypeconfig.ModuleNetworkList.begin(); - - for ( ; pt != moduletypeconfig.ModuleNetworkList.end() ; pt++) - { - if ( moduleName == (*pt).DeviceName ) - { - devicenetworkconfig.DeviceName = reconfigureModuleName; - HostConfigList::iterator pt1 = (*pt).hostConfigList.begin(); - - for ( ; pt1 != (*pt).hostConfigList.end() ; pt1++) - { - if ( pt1 == (*pt).hostConfigList.begin() ) - //save first IP for Process Port usage - IPaddress = (*pt1).IPAddr; - - hostconfig.IPAddr = (*pt1).IPAddr; - hostconfig.HostName = (*pt1).HostName; - hostconfig.NicID = (*pt1).NicID; - devicenetworkconfig.hostConfigList.push_back(hostconfig); - } - - //configure any secondary NIC info passed from console - if ( ! reconfigureHostName2.empty() ) - { - hostconfig.IPAddr = reconfigureIpAddr2; - hostconfig.HostName = reconfigureHostName2; - hostconfig.NicID = reconfigureNicId2; - devicenetworkconfig.hostConfigList.push_back(hostconfig); - } - - setreconfiguremoduletypeconfig.ModuleNetworkList.push_back(devicenetworkconfig); - break; - } - } - - if ( IPaddress == oam::UnassignedIpAddr ) - { - log.writeLog(__LINE__, "reconfigureModule - ERROR: module IP is unassigned", LOG_TYPE_ERROR); - pthread_mutex_unlock(&THREAD_LOCK); - return API_FAILURE; - } - - //Update Process Ports - // all nodes: ProcessMonitor, ServerMonitor - // dm: NONE - // um: ExeMgr - // pm: NONE - - Section = reconfigureModuleName + "_ProcessMonitor"; - sysConfig->setConfig(Section, "IPAddr", IPaddress); - sysConfig->setConfig(Section, "Port", "8800"); - - Section = reconfigureModuleName + "_ServerMonitor"; - sysConfig->setConfig(Section, "IPAddr", IPaddress); - sysConfig->setConfig(Section, "Port", "8622"); - - if ( moduleType == "um" || - ( moduleType == "pm" && config.ServerInstallType() == oam::INSTALL_COMBINE_DM_UM_PM ) || - ( moduleType == "pm" && config.ServerInstallType() == oam::INSTALL_COMBINE_PM_UM ) ) - { - - int moduleID = atoi(moduleName.substr(MAX_MODULE_TYPE_SIZE, MAX_MODULE_ID_SIZE).c_str()); - Section = "ExeMgr" + oam.itoa(moduleID); - sysConfig->setConfig(Section, "IPAddr", oam::UnassignedIpAddr); - } - else - { - //PM TO UM - int moduleID = atoi(reconfigureModuleName.substr(MAX_MODULE_TYPE_SIZE, MAX_MODULE_ID_SIZE).c_str()); - Section = "ExeMgr" + oam.itoa(moduleID); - sysConfig->setConfig(Section, "IPAddr", IPaddress); - sysConfig->setConfig(Section, "Port", "8601"); - } - - log.writeLog(__LINE__, "reconfigureModule - Updated Process Ports", LOG_TYPE_DEBUG); - - //update Calpont Config table - try - { - sysConfig->write(); - } - catch (...) - { - log.writeLog(__LINE__, "reconfigureModule - ERROR: sysConfig->write", LOG_TYPE_ERROR); - pthread_mutex_unlock(&THREAD_LOCK); - return API_FAILURE; - } - - //write Columnstore.xml Module section - try - { - oam.setSystemConfig(reconfigureModuleType, setreconfiguremoduletypeconfig); - log.writeLog(__LINE__, "reconfigureModule - Updated Module Section of Config file", LOG_TYPE_DEBUG); - } - catch (...) - { - log.writeLog(__LINE__, "reconfigureModule - ERROR: setSystemConfig", LOG_TYPE_ERROR); - pthread_mutex_unlock(&THREAD_LOCK); - return API_FAILURE; - } - - //distribute config file - distributeConfigFile(moduleName); - - // - //Send Reconfigure msg to Module's Process-Monitor being reconfigured - // - ByteStream msg; - ByteStream::byte requestID = RECONFIGURE; - - msg << requestID; - msg << reconfigureModuleName; - - int returnStatus = sendMsgProcMon( moduleName, msg, requestID ); - - if ( returnStatus == API_SUCCESS) - //log the event - log.writeLog(__LINE__, "reconfigureModule - procmon reconfigure successful", LOG_TYPE_DEBUG); - else - { - log.writeLog(__LINE__, "reconfigureModule - procmon reconfigure failed", LOG_TYPE_ERROR); - pthread_mutex_unlock(&THREAD_LOCK); - return API_FAILURE; - } - - ModuleTypeConfig setmoduletypeconfig; - - try - { - oam.getSystemConfig(moduleType, setmoduletypeconfig); - } - catch (...) - { - log.writeLog(__LINE__, "reconfigureModule - ERROR: getSystemConfig", LOG_TYPE_ERROR); - pthread_mutex_unlock(&THREAD_LOCK); - return API_FAILURE; - } - - // update Module Type Counts - setmoduletypeconfig.ModuleCount--; - - //Clear Module IP and Hostnames - pt = setmoduletypeconfig.ModuleNetworkList.begin(); - - for ( ; pt != setmoduletypeconfig.ModuleNetworkList.end() ; pt++) - { - if ( moduleName == (*pt).DeviceName ) - { - HostConfigList::iterator pt1 = (*pt).hostConfigList.begin(); - - for ( ; pt1 != (*pt).hostConfigList.end() ; pt1++) - { - (*pt1).IPAddr = oam::UnassignedIpAddr; - (*pt1).HostName = oam::UnassignedName; - } - - break; - } - } - - //Update Process Ports - // all nodes: ProcessMonitor, ServerMonitor - // dm: NONE - // um: ExeMgr - // pm: NONE - - Section = moduleName + "_ProcessMonitor"; - sysConfig->setConfig(Section, "IPAddr", oam::UnassignedIpAddr); - - Section = moduleName + "_ServerMonitor"; - sysConfig->setConfig(Section, "IPAddr", oam::UnassignedIpAddr); - - log.writeLog(__LINE__, "reconfigureModule - Updated Process Ports", LOG_TYPE_DEBUG); - - //update Calpont Config table - try - { - sysConfig->write(); - } - catch (...) - { - log.writeLog(__LINE__, "reconfigureModule - ERROR: sysConfig->write", LOG_TYPE_ERROR); - pthread_mutex_unlock(&THREAD_LOCK); - return API_FAILURE; - } - - //write Columnstore.xml Module section - try - { - oam.setSystemConfig(moduleType, setmoduletypeconfig); - log.writeLog(__LINE__, "reconfigureModule - Updated Module Section of Config file", LOG_TYPE_DEBUG); - } - catch (...) - { - log.writeLog(__LINE__, "reconfigureModule - ERROR: setSystemConfig", LOG_TYPE_ERROR); - pthread_mutex_unlock(&THREAD_LOCK); - return API_FAILURE; - } - - pthread_mutex_unlock(&THREAD_LOCK); - - // - //send message to Process Monitor to remove/add module/processes to shared memory - // - try - { - ByteStream obs; - - obs << (ByteStream::byte) REMOVE_MODULE; - - obs << (ByteStream::byte) 1; - obs << moduleName; - - sendStatusUpdate(obs, REMOVE_MODULE); - log.writeLog(__LINE__, "reconfigureModule - module removed from Shared Memory", LOG_TYPE_DEBUG); - } - catch (...) - { - log.writeLog(__LINE__, "reconfigureModule - ERROR: sendStatusUpdate error", LOG_TYPE_ERROR); - pthread_mutex_unlock(&THREAD_LOCK); - return API_FAILURE; - } - - try - { - ByteStream obs; - - obs << (ByteStream::byte) ADD_MODULE; - - obs << (ByteStream::byte) 1; - obs << reconfigureModuleName; - - //pass NIC Hostnames - if ( ! reconfigureHostName2.empty() ) - { - obs << (ByteStream::byte) 1; - obs << hostconfig.HostName; - } - else - obs << (ByteStream::byte) 0; - - sendStatusUpdate(obs, ADD_MODULE); - log.writeLog(__LINE__, "reconfigureModule - module added from Shared Memory", LOG_TYPE_DEBUG); - } - catch (...) - { - log.writeLog(__LINE__, "reconfigureModule - ERROR: sendStatusUpdate error", LOG_TYPE_ERROR); - return API_FAILURE; - } - - if ( moduleType == "pm" ) - { - if ( updatePMSconfig() != API_SUCCESS ) - return API_FAILURE; - } - - //Update DBRM section of Columnstore.xml - if ( updateWorkerNodeconfig() != API_SUCCESS ) - return API_FAILURE; - - // remove all associated alarms for this modules being removed - clearModuleAlarms( moduleName ); - - //distribute config file - distributeConfigFile("system"); - - return API_SUCCESS; -} - -/****************************************************************************************** -* @brief configureModule -* -* purpose: Configure Module sends message to procmon to setup modulename -* -******************************************************************************************/ -int ProcessManager::configureModule(std::string moduleName) -{ - log.writeLog(__LINE__, "configureModule: Process module " + moduleName, LOG_TYPE_DEBUG); - - //distribute config file - distributeConfigFile(moduleName); - distributeConfigFile(moduleName, "ProcessConfig.xml"); - - // - //Send Configure msg to Module's Process-Monitor being reconfigured - // - ByteStream msg; - ByteStream::byte requestID = CONFIGURE; - - msg << requestID; - msg << moduleName; - - int returnStatus = sendMsgProcMon( moduleName, msg, requestID ); - - if ( returnStatus == API_SUCCESS) - //log the event - log.writeLog(__LINE__, "configureModule - procmon configure successful", LOG_TYPE_DEBUG); - else - { - log.writeLog(__LINE__, "configureModule - procmon configure failed", LOG_TYPE_ERROR); - return API_FAILURE; - } - - return API_SUCCESS; -} - - -/****************************************************************************************** -* @brief sendMsgProcMon -* -* purpose: Sends a Msg to ProcMon -* -******************************************************************************************/ -int ProcessManager::sendMsgProcMon( std::string module, ByteStream msg, int requestID, int timeout ) -{ - string msgPort; - int returnStatus = API_FAILURE; - Oam oam; - - if ( module != config.moduleName() ) - { - msgPort = module + "_ProcessMonitor"; - - // do a ping test to determine a quick failure - Config* sysConfig = Config::makeConfig(); - - string IPAddr = sysConfig->getConfig(msgPort, "IPAddr"); - - if ( IPAddr == oam::UnassignedIpAddr ) - { - log.writeLog(__LINE__, "sendMsgProcMon ping failure " + module + " " + IPAddr, LOG_TYPE_ERROR); - return oam::API_SUCCESS; - } - - string cmdLine = "ping "; - string cmdOption = " -c 1 -w 5 >> /dev/null"; - string cmd = cmdLine + IPAddr + cmdOption; - - if ( system(cmd.c_str()) != 0) - { - //ping failure - log.writeLog(__LINE__, "sendMsgProcMon ping failure " + module + " " + IPAddr, LOG_TYPE_ERROR); - return oam::API_SUCCESS; - } - } - else - // use the localhost IP Address - msgPort = "localhost_ProcessMonitor"; - - log.writeLog(__LINE__, "sendMsgProcMon: Process module " + module, LOG_TYPE_DEBUG); - - try - { - MessageQueueClient mqRequest(msgPort); - mqRequest.write(msg); - - if ( timeout > 0 ) - { - // wait for response - ByteStream::byte returnACK; - ByteStream::byte returnRequestID; - ByteStream::byte requestStatus; - ByteStream receivedMSG; - - struct timespec ts = { timeout, 0 }; - - // get current time in seconds - time_t startTimeSec; - time (&startTimeSec); - - while (true) - { - try - { - receivedMSG = mqRequest.read(&ts); - } - catch (SocketClosed& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "EXCEPTION ERROR on mqRequest.read, module " + module + " : " + error, LOG_TYPE_ERROR); - return returnStatus; - } - catch (...) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on mqRequest.read: Caught unknown exception! module " + module, LOG_TYPE_ERROR); - return returnStatus; - } - - if (receivedMSG.length() > 0) - { - receivedMSG >> returnACK; - receivedMSG >> returnRequestID; - receivedMSG >> requestStatus; - - if ( requestID == oam::MASTERREP ) - { - receivedMSG >> masterLogFile; - receivedMSG >> masterLogPos; - } - - if ( returnACK == oam::ACK && returnRequestID == requestID) - { - // ACK for this request - returnStatus = requestStatus; - break; - } - else - log.writeLog(__LINE__, "sendMsgProcMon: invalid message " + module, LOG_TYPE_ERROR); - } - else - { - //api timeout occurred, check if retry should be done - // get current time in seconds - time_t endTimeSec; - time (&endTimeSec); - - if ( timeout <= (endTimeSec - startTimeSec) ) - { - log.writeLog(__LINE__, "sendMsgProcMon: ProcMon Msg timeout on module " + module, LOG_TYPE_ERROR); - break; - } - } - } - } - else - returnStatus = oam::API_SUCCESS; - - mqRequest.shutdown(); - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "EXCEPTION ERROR on MessageQueueClient: " + error, LOG_TYPE_ERROR); - } - catch (...) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on MessageQueueClient: Caught unknown exception!", LOG_TYPE_ERROR); - } - - return returnStatus; -} - -/****************************************************************************************** -* @brief sendMsgProcMon1 -* -* purpose: Sends a Msg to ProcMon -* -******************************************************************************************/ -std::string ProcessManager::sendMsgProcMon1( std::string module, ByteStream msg, int requestID ) -{ - string msgPort; - string returnStatus = "FAILED"; - - if ( module != config.moduleName() ) - { - msgPort = module + "_ProcessMonitor"; - - // do a ping test to determine a quick failure - Config* sysConfig = Config::makeConfig(); - - string IPAddr = sysConfig->getConfig(msgPort, "IPAddr"); - - string cmdLine = "ping "; - string cmdOption = " -c 1 -w 5 >> /dev/null"; - string cmd = cmdLine + IPAddr + cmdOption; - - if ( system(cmd.c_str()) != 0 ) - { - //ping failure - log.writeLog(__LINE__, "sendMsgProcMon ping failure", LOG_TYPE_ERROR); - return returnStatus; - } - } - else - // use the localhost IP Address - msgPort = "localhost_ProcessMonitor"; - - try - { - MessageQueueClient mqRequest(msgPort); - mqRequest.write(msg); - - // wait 30 seconds for response - ByteStream::byte returnACK; - ByteStream::byte returnRequestID; - string requestStatus; - ByteStream receivedMSG; - - struct timespec ts = { 30, 0 }; - - try - { - receivedMSG = mqRequest.read(&ts); - } - catch (SocketClosed& ex) - { - string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on mqRequest.read: " + error, LOG_TYPE_ERROR); - return returnStatus; - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on mqRequest.read: Caught unknown exception!", LOG_TYPE_ERROR); - return returnStatus; - } - - if (receivedMSG.length() > 0) - { - receivedMSG >> returnACK; - receivedMSG >> returnRequestID; - receivedMSG >> requestStatus; - - if ( returnACK == oam::ACK && returnRequestID == requestID) - { - // ACK for this request - returnStatus = requestStatus; - } - } - else - log.writeLog(__LINE__, "sendMsgProcMon1: ProcMon Msg timeout on module " + module, LOG_TYPE_ERROR); - - mqRequest.shutdown(); - } - catch (exception& ex) - { - string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on MessageQueueClient: " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on MessageQueueClient: Caught unknown exception!", LOG_TYPE_ERROR); - } - - return returnStatus; -} - -/****************************************************************************************** -* @brief saveBRM -* -* purpose: Execute the reset_locks then save BRM data script -* -******************************************************************************************/ -void ProcessManager::saveBRM(bool skipSession, bool clearshm) -{ - Oam oam; - string logdir("/var/log/mariadb/columnstore"); - - if (access(logdir.c_str(), W_OK) != 0) logdir = tmpLogDir; - - log.writeLog(__LINE__, "Running reset_locks", LOG_TYPE_DEBUG); - - string skip = " "; - - if ( skipSession ) - skip = "-s"; - - string cmd = "reset_locks " + skip + " > " + logdir + "/reset_locks.log1 2>&1"; - int rtnCode = system(cmd.c_str()); - log.writeLog(__LINE__, "Ran reset_locks", LOG_TYPE_DEBUG); - - log.writeLog(__LINE__, "Running DBRM save_brm", LOG_TYPE_DEBUG); - - cmd = "save_brm > " + logdir + "/save_brm.log1 2>&1"; - rtnCode = system(cmd.c_str()); - - if (WEXITSTATUS(rtnCode) != 1) - { - log.writeLog(__LINE__, "Successfully ran DBRM save_brm", LOG_TYPE_DEBUG); - } - else - log.writeLog(__LINE__, "Error running DBRM save_brm", LOG_TYPE_ERROR); - - if ( clearshm ) - { - cmd = "clearShm -c > /dev/null 2>&1"; - rtnCode = system(cmd.c_str()); - - if (WEXITSTATUS(rtnCode) != 1) - { - log.writeLog(__LINE__, "Successfully ran DBRM clearShm", LOG_TYPE_DEBUG); - } - else - log.writeLog(__LINE__, "Error running DBRM clearShm", LOG_TYPE_ERROR); - } - -} - - -/****************************************************************************************** -* @brief setQuerySystemState -* -* purpose: set query system state not ready -* -******************************************************************************************/ -void ProcessManager::setQuerySystemState(bool set) -{ - Oam oam; - BRM::DBRM dbrm; - - try - { - dbrm.setSystemQueryReady(set); - log.writeLog(__LINE__, "setSystemQueryReady = " + oam.itoa(set), LOG_TYPE_DEBUG); - - try { - dbrm.setSystemReady(set); - log.writeLog(__LINE__, "setSystemReady = " + oam.itoa(set), LOG_TYPE_DEBUG); - } - catch(...) - { - log.writeLog(__LINE__, "setSystemReady failed", LOG_TYPE_DEBUG); - log.writeLog(__LINE__, "setSystemReady failed", LOG_TYPE_ERROR); - } - } - catch(...) - { - log.writeLog(__LINE__, "setSystemQueryReady failed", LOG_TYPE_DEBUG); - log.writeLog(__LINE__, "setSystemQueryReady failed", LOG_TYPE_ERROR); - } -} - - -/****************************************************************************************** -* @brief createModuleFile -* -* purpose: Create a module file for remote server -* -******************************************************************************************/ -bool ProcessManager::createModuleFile(string remoteModuleName) -{ - // Read Local Install flag - - string fileName = "/var/lib/columnstore/local/etc/" + remoteModuleName + "/module"; - - unlink (fileName.c_str()); - ofstream newFile (fileName.c_str()); - - string cmd = "echo " + remoteModuleName + " > " + fileName; - system(cmd.c_str()); - - newFile.close(); - - return true; -} - - -/***************************************************************************************** -* @brief startSystemThread -* -* purpose: Send Messages to Module Process Monitors to start Processes -* -*****************************************************************************************/ -void* startSystemThread(oam::DeviceNetworkList* Devicenetworklist) -{ - assert(Devicenetworklist); - oam::DeviceNetworkList devicenetworklist = *Devicenetworklist; - - ProcessLog log; - Configuration config; - ProcessManager processManager(config, log); - Oam oam; - SystemModuleTypeConfig systemmoduletypeconfig; - ALARMManager aManager; - int status = API_SUCCESS; - bool exitThread = false; - int exitThreadStatus = oam::API_SUCCESS; - - pthread_t ThreadId; - ThreadId = pthread_self(); - - log.writeLog(__LINE__, "startSystemThread launched", LOG_TYPE_DEBUG); - - // get system status and exit thread if in AUTO_INIT OR MAN_INIT - SystemStatus systemstatus; - - try - { - oam.getSystemStatus(systemstatus); - - if (systemstatus.SystemOpState == AUTO_INIT || - systemstatus.SystemOpState == MAN_INIT) - { - log.writeLog(__LINE__, "Start already in-progess, exit startSystemThread", LOG_TYPE_DEBUG); - startsystemthreadStatus = oam::API_ALREADY_IN_PROGRESS; - exitThread = true; - exitThreadStatus = oam::API_ALREADY_IN_PROGRESS; - } - } - catch (exception& ex) - { -// string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemStatus: " + error, LOG_TYPE_ERROR); - startsystemthreadStatus = oam::API_FAILURE; - processManager.setSystemState(oam::MAN_OFFLINE); - exitThread = true; - exitThreadStatus = oam::API_FAILURE; - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemStatus: Caught unknown exception!", LOG_TYPE_ERROR); - startsystemthreadStatus = oam::API_FAILURE; - processManager.setSystemState(oam::MAN_OFFLINE); - exitThread = true; - exitThreadStatus = oam::API_FAILURE; - } - - if ( exitThread ) - { - pthread_detach (ThreadId); - pthread_exit(reinterpret_cast(static_cast(exitThreadStatus))); - } - - if (systemstatus.SystemOpState == AUTO_OFFLINE) - processManager.setSystemState(oam::AUTO_INIT); - else - processManager.setSystemState(oam::MAN_INIT); - - //validate the dbroots assignments - //make sure no 1 ID is assigned to 2 PMs - //and a dbroot not assigned to a DISABLED PM - try - { - systemStorageInfo_t t; - t = oam.getStorageConfig(); - - DeviceDBRootList moduledbrootlist1 = boost::get<2>(t); - DeviceDBRootList moduledbrootlist2 = boost::get<2>(t); - - DeviceDBRootList::iterator pt1 = moduledbrootlist1.begin(); - - for ( ; pt1 != moduledbrootlist1.end() ; pt1++) - { - string moduleID1 = oam.itoa((*pt1).DeviceID); - string moduleName = "pm" + moduleID1; - - // check DISABLED modules - int opState = oam::ACTIVE; - bool degraded; - - try - { - oam.getModuleStatus(moduleName, opState, degraded); - } - catch (exception& ex) - { -// string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": " + error, LOG_TYPE_ERROR); - continue; - - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": Caught unknown exception!", LOG_TYPE_ERROR); - continue; - } - - //check if disabled - if (opState == oam::MAN_DISABLED || opState == oam::AUTO_DISABLED) - { - if ( (*pt1).dbrootConfigList.size() != 0 ) - { - //issue log and Set the alarm - log.writeLog(__LINE__, "startSystemThread failed: Disabled Module '" + moduleName + "' has DBRoots assigned to it", LOG_TYPE_CRITICAL); - aManager.sendAlarmReport(config.moduleName().c_str(), STARTUP_DIAGNOTICS_FAILURE, SET); - startsystemthreadStatus = oam::API_FAILURE; - processManager.setSystemState(oam::FAILED); - pthread_detach (ThreadId); - pthread_exit((void*) oam::API_FAILURE); - } - - continue; - } - - // if module has no dbroots assigned, fail startSystem - if ( (*pt1).dbrootConfigList.size() == 0 ) - { - //issue log and Set the alarm - log.writeLog(__LINE__, "startSystemThread failed: Module '" + moduleName + "' has no DBRoots assigned to it", LOG_TYPE_CRITICAL); - aManager.sendAlarmReport(config.moduleName().c_str(), STARTUP_DIAGNOTICS_FAILURE, SET); - startsystemthreadStatus = oam::API_FAILURE; - processManager.setSystemState(oam::FAILED); - pthread_detach (ThreadId); - pthread_exit((void*) oam::API_FAILURE); - } - - DBRootConfigList::iterator pt1a = (*pt1).dbrootConfigList.begin(); - - for ( ; pt1a != (*pt1).dbrootConfigList.end() ; pt1a++) - { - DeviceDBRootList::iterator pt2 = moduledbrootlist2.begin(); - - for ( ; pt2 != moduledbrootlist2.end() ; pt2++) - { - string moduleID2 = oam.itoa((*pt2).DeviceID); - - if ( moduleID1 == moduleID2 ) - continue; - - DBRootConfigList::iterator pt2a = (*pt2).dbrootConfigList.begin(); - - for ( ; pt2a != (*pt2).dbrootConfigList.end() ; pt2a++) - { - if ( *pt1a == *pt2a) - { - log.writeLog(__LINE__, "ERROR: DBRoot ID " + oam.itoa(*pt1a) + " configured on 2 pms: 'pm" + moduleID1 + "' and 'pm" + moduleID2 + "'", LOG_TYPE_CRITICAL); - //Set the alarm - aManager.sendAlarmReport(config.moduleName().c_str(), STARTUP_DIAGNOTICS_FAILURE, SET); - - startsystemthreadStatus = oam::API_FAILURE; - processManager.setSystemState(oam::FAILED); - pthread_detach (ThreadId); - pthread_exit((void*) oam::API_FAILURE); - } - } - } - } - } - } - catch (exception& e) - {} - - try - { - oam.getSystemConfig(systemmoduletypeconfig); - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR); - startsystemthreadStatus = oam::API_FAILURE; - processManager.setSystemState(oam::FAILED); - exitThread = true; - exitThreadStatus = oam::API_FAILURE; - } - catch (...) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR); - startsystemthreadStatus = oam::API_FAILURE; - processManager.setSystemState(oam::FAILED); - exitThread = true; - exitThreadStatus = oam::API_FAILURE; - } - - if ( exitThread ) - { - pthread_detach (ThreadId); - pthread_exit(reinterpret_cast(static_cast(exitThreadStatus))); - } - - if (systemstatus.SystemOpState == AUTO_OFFLINE) - processManager.setSystemState(oam::AUTO_INIT); - else - processManager.setSystemState(oam::MAN_INIT); - - startsystemthreadRunning = true; - - string newStandbyModule = processManager.getStandbyModule(); - - if ( !newStandbyModule.empty() && newStandbyModule != "NONE") - processManager.setStandbyModule(newStandbyModule); - - //update workernode section - processManager.updateWorkerNodeconfig(); - - //configure PMS ports - if ( processManager.updatePMSconfig() != API_SUCCESS ) - { - startsystemthreadStatus = oam::API_FAILURE; - processManager.setSystemState(oam::FAILED); - pthread_detach (ThreadId); - pthread_exit((void*) oam::API_FAILURE); - } - - if ( devicenetworklist.size() != 0 ) - { - //distribute config file - processManager.distributeConfigFile("system"); - - // start modules from devicenetworklist - DeviceNetworkList::iterator listPT = devicenetworklist.begin(); - - //launch start module threads, starting with local module - pthread_t startmodulethread; - string moduleName = config.moduleName(); - int status = pthread_create (&startmodulethread, NULL, (void* (*)(void*)) &startModuleThread, &moduleName); - - if ( status != 0 ) - log.writeLog(__LINE__, "startModuleThread: pthread_create failed, return status = " + oam.itoa(status), LOG_TYPE_ERROR); - - sleep(5); - - for ( ; listPT != devicenetworklist.end() ; listPT++) - { - string moduleName = (*listPT).DeviceName; - - // skip local module name - if ( moduleName == config.moduleName() ) - continue; - - // bypass DISABLED modules - try - { - int opState = oam::ACTIVE; - bool degraded; - oam.getModuleStatus(moduleName, opState, degraded); - - if (opState == oam::MAN_DISABLED || opState == oam::AUTO_DISABLED) - //skip - continue; - } - catch (exception& ex) - { -// string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": Caught unknown exception!", LOG_TYPE_ERROR); - } - - pthread_t startmodulethread; - int status = pthread_create (&startmodulethread, NULL, (void* (*)(void*)) &startModuleThread, &moduleName); - - if ( status != 0 ) - log.writeLog(__LINE__, "startModuleThread: pthread_create failed, return status = " + oam.itoa(status), LOG_TYPE_ERROR); - - sleep(5); - } - } - else - { - // start all modules, like on a systemStart command - //launch start module threads, starting with local module - - if ( config.ServerInstallType() == oam::INSTALL_COMBINE_DM_UM_PM ) - { - try - { - oam.setSystemConfig("PrimaryUMModuleName", config.OAMParentName()); - } - catch (...) {} - - processManager.setPMProcIPs(config.OAMParentName()); - } - - //distribute config file - processManager.distributeConfigFile("system"); - - pthread_t startmodulethread; - string moduleName = config.moduleName(); - int status = pthread_create (&startmodulethread, NULL, (void* (*)(void*)) &startModuleThread, &moduleName); - - if ( status != 0 ) - log.writeLog(__LINE__, "startModuleThread: pthread_create failed, return status = " + oam.itoa(status), LOG_TYPE_ERROR); - - sleep(5); - - for ( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++) - { - int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount; - - if ( moduleCount == 0) - continue; - - DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end(); pt++) - { - string moduleName = (*pt).DeviceName; - - // skip local module name - if ( moduleName == config.moduleName() ) - continue; - - // bypass DISABLED modules - try - { - int opState = oam::ACTIVE; - bool degraded; - oam.getModuleStatus(moduleName, opState, degraded); - - if (opState == oam::MAN_DISABLED || opState == oam::AUTO_DISABLED) - //skip - continue; - - } - catch (exception& ex) - { -// string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": Caught unknown exception!", LOG_TYPE_ERROR); - } - - //setup primary User Module, DML/DDL only start on this module - if ( moduleName.find("um") == 0 && config.ServerInstallType() != oam::INSTALL_COMBINE_DM_UM_PM) - { - string PrimaryUMModuleName; - - try - { - oam.getSystemConfig("PrimaryUMModuleName", PrimaryUMModuleName); - } - catch (...) {} - - if ( PrimaryUMModuleName == oam::UnassignedName ) - { - try - { - oam.setSystemConfig("PrimaryUMModuleName", moduleName); - } - catch (...) {} - - processManager.setPMProcIPs(moduleName); - - //distribute config file - processManager.distributeConfigFile("system"); - } - } - - pthread_t startmodulethread; - string name = moduleName; - int status = pthread_create (&startmodulethread, NULL, (void* (*)(void*)) &startModuleThread, &name); - - if ( status != 0 ) - log.writeLog(__LINE__, "startModuleThread: pthread_create failed, return status = " + oam.itoa(status), LOG_TYPE_ERROR); - - if ( !HDFS ) - sleep(5); - else - //usleep(100000); - sleep(1); - } - } - } - - // check status and process accordingly - int k = 0; - - for ( ; k < 1200 ; k++ ) - { - if ( startsystemthreadStop ) - { - log.writeLog(__LINE__, "startSystemThread exit early, startsystemthreadStop set", LOG_TYPE_DEBUG); - - if ( startmodulethreadStatus != API_SUCCESS ) - { - startsystemthreadStatus = startmodulethreadStatus; - processManager.setSystemState(oam::FAILED); - } - else - { - startsystemthreadStatus = API_FAILURE; - processManager.setSystemState(oam::MAN_OFFLINE); - } - - startsystemthreadRunning = false; - pthread_detach (ThreadId); - pthread_exit((void*) oam::API_FAILURE); - } - - string moduleName; - status = API_SUCCESS; - - for ( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++) - { - int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount; - - if ( moduleCount == 0) - continue; - - DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end(); pt++) - { - moduleName = (*pt).DeviceName; - - // get module status - try - { - int opState = oam::ACTIVE; - bool degraded; - oam.getModuleStatus(moduleName, opState, degraded); - - if ( opState == oam::FAILED ) - { - if ( startmodulethreadStatus != API_SUCCESS ) - status = startmodulethreadStatus; - else - status = API_FAILURE; - - break; - } - - if (opState == oam::ACTIVE || - opState == oam::MAN_DISABLED || - opState == oam::AUTO_DISABLED || - (opState == oam::MAN_OFFLINE && k > 0) ) - //skip - continue; - - status = API_ALREADY_IN_PROGRESS; - } - catch (exception& ex) - { -// string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": " + error, LOG_TYPE_ERROR); - continue; - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": Caught unknown exception!", LOG_TYPE_ERROR); - continue; - } - } - - if ( status == API_FAILURE ) - break; - } - - //get out of loop if all modules started successfully - if ( status == API_SUCCESS ) - { - //send message to start new Standby Process-Manager, if needed - string newStandbyModule = processManager.getStandbyModule(); - - if ( !newStandbyModule.empty() && newStandbyModule != "NONE") - { - // get standby IP address and update entries - processManager.setStandbyModule(newStandbyModule); - - //distribute config file - processManager.distributeConfigFile("system"); - } - - break; - } - else - { - //get out of loop if start module failed - if ( status == API_FAILURE ) - { - //set system status - log.writeLog(__LINE__, "startSystemThread: Module failed, Set System State to FAILED: " + moduleName, LOG_TYPE_CRITICAL); - processManager.setSystemState(oam::FAILED); - break; - } - } - - sleep(5); - } - - if ( k == 1200 ) - { - // system didn't Successfully restart - log.writeLog(__LINE__, "startSystemThread: Modules failed to start after 1200 tries, Set System State to FAILED", LOG_TYPE_CRITICAL); - processManager.setSystemState(oam::FAILED); - status = oam::API_FAILURE; - } - - //set query system state not ready - processManager.setQuerySystemState(false); - - // Bug 4554: Wait until DMLProc is finished with rollback - if (status == oam::API_SUCCESS) - { - BRM::DBRM dbrm; - uint16_t rtn = 0; - bool bfirst = true; - SystemProcessStatus systemprocessstatus; - - string PrimaryUMModuleName; - - try - { - oam.getSystemConfig("PrimaryUMModuleName", PrimaryUMModuleName); - } - catch (...) {} - - if ( PrimaryUMModuleName.empty() ) - { - log.writeLog(__LINE__, "startSystemThread: Failed, PrimaryUMModuleName is unassigned", LOG_TYPE_CRITICAL); - rtn = oam::FAILED; - log.writeLog(__LINE__, "startSystemThread Exit", LOG_TYPE_DEBUG); - processManager.setSystemState(oam::FAILED); - startsystemthreadStatus = status; - startsystemthreadRunning = false; - pthread_detach (ThreadId); - pthread_exit(0); - } - - // waiting until dml are ACTIVE, then mark system ACTIVE - while (rtn == 0) - { - ProcessStatus DMLprocessstatus; - - try - { - oam.getProcessStatus("DMLProc", PrimaryUMModuleName, DMLprocessstatus); - } - catch (exception& ex) - { -// string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: Caught unknown exception!", LOG_TYPE_ERROR); - } - - if (DMLprocessstatus.ProcessOpState == oam::BUSY_INIT) - { - if (bfirst) - { - log.writeLog(__LINE__, "Waiting for DMLProc to finish rollback", LOG_TYPE_INFO); - bfirst = false; - } - } - - if (DMLprocessstatus.ProcessOpState == oam::ACTIVE) - { - rtn = oam::ACTIVE; - break; - } - - if (DMLprocessstatus.ProcessOpState == oam::FAILED) - { - rtn = oam::FAILED; - status = oam::API_FAILURE; - break; - } - - // wait some more - sleep(2); - } - - // This was logical error and possible source of many problems. - if ( rtn == oam::ACTIVE ) - //set query system state not ready - processManager.setQuerySystemState(true); - - processManager.setSystemState(rtn); - } - else - processManager.setSystemState(oam::FAILED); - - - // exit thread - log.writeLog(__LINE__, "startSystemThread Exit", LOG_TYPE_DEBUG); - startsystemthreadStatus = status; - startsystemthreadRunning = false; - pthread_detach (ThreadId); - pthread_exit(0); -} - -/***************************************************************************************** -* @brief startModuleThread -* -* purpose: Send Messages to Module Process Monitors to start Processes -* -*****************************************************************************************/ -void* startModuleThread(string* module) -{ - assert(module); - //store in a local variable - string moduleName = *module; - - ProcessLog log; - Configuration config; - ProcessManager processManager(config, log); - Oam oam; - bool exitThread = false; - int exitThreadStatus = oam::API_SUCCESS; - - pthread_t ThreadId; - ThreadId = pthread_self(); - - if ( moduleName.empty() ) - { - log.writeLog(__LINE__, "startModuleThread received on invalid module name", LOG_TYPE_ERROR); - pthread_detach (ThreadId); - pthread_exit(0); - } - - log.writeLog(__LINE__, "Start Module " + moduleName, LOG_TYPE_DEBUG); - - bool start = false; - - while (true) - { - if ( exitThread ) - { - pthread_detach (ThreadId); - pthread_exit(reinterpret_cast(static_cast(exitThreadStatus))); - } - - // get module status - uint16_t startType = oam::MAN_OFFLINE; - - try - { - int opState = oam::ACTIVE; - bool degraded; - oam.getModuleStatus(moduleName, opState, degraded); - - if ( opState == oam::AUTO_OFFLINE || opState == oam::AUTO_INIT) - startType = oam::AUTO_OFFLINE; - - if (opState == oam::ACTIVE || - opState == oam::MAN_DISABLED || - opState == oam::AUTO_DISABLED || - ( opState == oam::MAN_OFFLINE && start) ) - //quit - break; - - start = true; - } - catch (exception& ex) - { -// string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": Caught unknown exception!", LOG_TYPE_ERROR); - } - - if ( startsystemthreadStop) - { - // set status and exit this thread - processManager.setModuleState(moduleName, oam::MAN_OFFLINE); - log.writeLog(__LINE__, "startModuleThread early exit on " + moduleName, LOG_TYPE_DEBUG); - pthread_detach (ThreadId); - pthread_exit(0); - } - - int retStatus = processManager.startModule(moduleName, oam::FORCEFUL, startType, true); - - log.writeLog(__LINE__, "ACK received from '" + moduleName + "' Process-Monitor, return status = " + oam.itoa(retStatus), LOG_TYPE_DEBUG); - - if (retStatus == API_SUCCESS) - break; - else - { - if (retStatus != API_MINOR_FAILURE) - { - //major failure, set stopsystem flag and exit this thread - startmodulethreadStatus = retStatus; - startsystemthreadStop = true; - break; - } - } - } - - // exit thread - log.writeLog(__LINE__, "startModuleThread Exit on " + moduleName, LOG_TYPE_DEBUG); - pthread_detach (ThreadId); - pthread_exit(0); -} - - -/***************************************************************************************** -* @brief stopSystemThread -* -* purpose: Send Messages to Module Process Monitors to stop Processes -* -*****************************************************************************************/ -void* stopSystemThread(oam::DeviceNetworkList* Devicenetworklist) -{ - assert(Devicenetworklist); - oam::DeviceNetworkList devicenetworklist = *Devicenetworklist; - - ProcessLog log; - Configuration config; - ProcessManager processManager(config, log); - Oam oam; - SystemModuleTypeConfig systemmoduletypeconfig; - ALARMManager aManager; - int status = API_SUCCESS; - //bool exitThread = false; - //int exitThreadStatus = oam::API_SUCCESS; - - pthread_t ThreadId; - ThreadId = pthread_self(); - - log.writeLog(__LINE__, "stopSystemThread launched", LOG_TYPE_DEBUG); - - try - { - oam.getSystemConfig(systemmoduletypeconfig); - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR); - stopsystemthreadStatus = oam::API_FAILURE; - processManager.setSystemState(oam::FAILED); - //exitThread = true; - //exitThreadStatus = oam::API_FAILURE; - } - catch (...) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR); - stopsystemthreadStatus = oam::API_FAILURE; - processManager.setSystemState(oam::FAILED); - //exitThread = true; - //exitThreadStatus = oam::API_FAILURE; - } - - if ( devicenetworklist.size() != 0 ) - { - // stop modules from devicenetworklist - DeviceNetworkList::iterator listPT = devicenetworklist.begin(); - - //launch start module threads, starting with local module - pthread_t stopmodulethread; - string moduleName = config.moduleName(); - int status = pthread_create (&stopmodulethread, NULL, (void* (*)(void*)) &stopModuleThread, &moduleName); - - if ( status != 0 ) - log.writeLog(__LINE__, "stopModuleThread: pthread_create failed, return status = " + oam.itoa(status), LOG_TYPE_ERROR); - - for ( ; listPT != devicenetworklist.end() ; listPT++) - { - string moduleName = (*listPT).DeviceName; - - // bypass DISABLED modules - try - { - int opState; - bool degraded; - oam.getModuleStatus(moduleName, opState, degraded); - - if (opState == oam::MAN_DISABLED || opState == oam::AUTO_DISABLED) - //skip - continue; - } - catch (exception& ex) - { -// string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": Caught unknown exception!", LOG_TYPE_ERROR); - } - - pthread_t stopmodulethread; - int status = pthread_create (&stopmodulethread, NULL, (void* (*)(void*)) &stopModuleThread, &moduleName); - - if ( status != 0 ) - log.writeLog(__LINE__, "stopModuleThread: pthread_create failed, return status = " + oam.itoa(status), LOG_TYPE_ERROR); - - sleep(5); - } - } - else - { - // stop all modules, like on a systemStart command - //launch stop module threads, stoping with local module - - for ( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++) - { - int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount; - - if ( moduleCount == 0) - continue; - - DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end(); pt++) - { - string moduleName = (*pt).DeviceName; - - // bypass DISABLED modules - try - { - int opState = oam::ACTIVE; - bool degraded; - oam.getModuleStatus(moduleName, opState, degraded); - - if (opState == oam::MAN_DISABLED || opState == oam::AUTO_DISABLED) - //skip - continue; - - } - catch (exception& ex) - { -// string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": Caught unknown exception!", LOG_TYPE_ERROR); - } - - pthread_t stopmodulethread; - string name = moduleName; - int status = pthread_create (&stopmodulethread, NULL, (void* (*)(void*)) &stopModuleThread, &name); - - if ( status != 0 ) - log.writeLog(__LINE__, "stopModuleThread: pthread_create failed, return status = " + oam.itoa(status), LOG_TYPE_ERROR); - - usleep(50000); - } - } - } - - // check status and process accordingly - int k = 0; - - for ( ; k < 1200 ; k++ ) - { - string moduleName; - status = API_SUCCESS; - - for ( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++) - { - int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount; - - if ( moduleCount == 0) - continue; - - DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end(); pt++) - { - moduleName = (*pt).DeviceName; - - // get module status - try - { - int opState = oam::ACTIVE; - bool degraded; - oam.getModuleStatus(moduleName, opState, degraded); - - if ( opState == oam::FAILED ) - { - status = API_FAILURE; - break; - } - - if (opState == oam::MAN_DISABLED || - opState == oam::AUTO_DISABLED || - opState == oam::MAN_OFFLINE) - //skip - continue; - } - catch (exception& ex) - { -// string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": " + error, LOG_TYPE_ERROR); - continue; - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": Caught unknown exception!", LOG_TYPE_ERROR); - continue; - } - } - - if ( status == API_FAILURE ) - break; - } - - //get out of loop if all modules stopped successfully - if ( status == API_SUCCESS ) - { - break; - } - else - { - //get out of loop if stop module failed - if ( status == API_FAILURE ) - { - //set system status - log.writeLog(__LINE__, "stopSystemThread: Module failed, Set System State to FAILED: " + moduleName, LOG_TYPE_CRITICAL); - processManager.setSystemState(oam::FAILED); - break; - } - } - - sleep(5); - } - - if ( k == 1200 ) - { - // system didn't Successfully restart - log.writeLog(__LINE__, "stopSystemThread: Modules failed to stop after 1200 tries, Set System State to FAILED", LOG_TYPE_CRITICAL); - processManager.setSystemState(oam::FAILED); - status = oam::API_FAILURE; - } - else - { - processManager.setSystemState(oam::MAN_OFFLINE); - status = oam::API_SUCCESS; - } - - // exit thread - stopsystemthreadStatus = status; - log.writeLog(__LINE__, "stopSystemThread Exit", LOG_TYPE_DEBUG); - pthread_detach (ThreadId); - pthread_exit(0); -} - -/***************************************************************************************** -* @brief stopModuleThread -* -* purpose: Send Messages to Module Process Monitors to stop Processes -* -*****************************************************************************************/ -void* stopModuleThread(string* module) -{ - assert(module); - //store in a local variable - string moduleName = *module; - - ProcessLog log; - Configuration config; - ProcessManager processManager(config, log); - Oam oam; - - pthread_t ThreadId; - ThreadId = pthread_self(); - - if ( moduleName.empty() ) - { - log.writeLog(__LINE__, "stopModuleThread received on invalid module name", LOG_TYPE_ERROR); - pthread_detach (ThreadId); - pthread_exit(0); - } - - log.writeLog(__LINE__, "Stop Module " + moduleName, LOG_TYPE_DEBUG); - - while (true) - { - // get module status - try - { - int opState = oam::ACTIVE; - bool degraded; - oam.getModuleStatus(moduleName, opState, degraded); - - if (opState == oam::MAN_OFFLINE) - //quit - break; - } - catch (exception& ex) - { -// string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": Caught unknown exception!", LOG_TYPE_ERROR); - } - - int retStatus = processManager.stopModule(moduleName, oam::GRACEFUL, true); - - log.writeLog(__LINE__, "ACK received from '" + moduleName + "' Process-Monitor, return status = " + oam.itoa(retStatus), LOG_TYPE_DEBUG); - - if (retStatus == API_SUCCESS) - break; - else - { - if (retStatus != API_MINOR_FAILURE) - { - //major failure, set stopsystem flag and exit this thread - break; - } - } - } - - // exit thread - log.writeLog(__LINE__, "stopModuleThread Exit on " + moduleName, LOG_TYPE_DEBUG); - pthread_detach (ThreadId); - pthread_exit(0); -} - - -/***************************************************************************************** -* @brief checkSimplexModule -* -* purpose: Check for simplex module run-type and start mate processes if needed -* -*****************************************************************************************/ -void ProcessManager::checkSimplexModule(std::string moduleName) -{ - ProcessLog log; - Configuration config; - ProcessManager processManager(config, log); - Oam oam; - SystemModuleTypeConfig systemmoduletypeconfig; - SystemProcessConfig systemprocessconfig; - - log.writeLog(__LINE__, "checkSimplexModule called for " + moduleName, LOG_TYPE_DEBUG); - - try - { - oam.getSystemConfig(systemmoduletypeconfig); - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR); - return; - } - catch (...) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR); - return; - } - - string moduletype = moduleName.substr(0, MAX_MODULE_TYPE_SIZE); - - for ( unsigned int i = 0; i < systemmoduletypeconfig.moduletypeconfig.size(); i++) - { - if ( moduletype == systemmoduletypeconfig.moduletypeconfig[i].ModuleType ) - { - - if ( systemmoduletypeconfig.moduletypeconfig[i].ModuleCount == 0) - return; - - //check for SIMPLEX Processes on mate might need to be started - if ( systemmoduletypeconfig.moduletypeconfig[i].RunType == SIMPLEX ) - { - - DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end(); pt++) - { - if ((*pt).DeviceName != moduleName) - { - //mate module, check for module ACTIVE and SIMPLEX processes - int opState = oam::ACTIVE; - - try - { - bool degraded; - oam.getModuleStatus((*pt).DeviceName, opState, degraded); - - if (opState == oam::ACTIVE || - opState == oam::DEGRADED ) - { - //start COLD_STANDBY processes - try - { - oam.getProcessConfig(systemprocessconfig); - - for ( unsigned int j = 0 ; j < systemprocessconfig.processconfig.size(); j++) - { - if ( systemprocessconfig.processconfig[j].ModuleType == moduletype && - systemprocessconfig.processconfig[j].RunType == oam::SIMPLEX ) - { - int state = oam::ACTIVE; - - try - { - ProcessStatus procstat; - oam.getProcessStatus(systemprocessconfig.processconfig[j].ProcessName, - (*pt).DeviceName, procstat); - state = procstat.ProcessOpState; - } - catch (exception& ex) - { -// string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: " + error, LOG_TYPE_ERROR); - continue; - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: Caught unknown exception!", LOG_TYPE_ERROR); - continue; - } - - if ( state == oam::COLD_STANDBY ) - { - //process DDL/DMLProc - if ( systemprocessconfig.processconfig[j].ProcessName == "DDLProc") - { - setPMProcIPs((*pt).DeviceName); - - log.writeLog(__LINE__, "Set Primary UM Module = " + (*pt).DeviceName, LOG_TYPE_DEBUG); - - oam.setSystemConfig("PrimaryUMModuleName", (*pt).DeviceName); - - //distribute config file - distributeConfigFile("system"); - sleep(2); - } - - int status = processManager.startProcess((*pt).DeviceName, - systemprocessconfig.processconfig[j].ProcessName, - FORCEFUL); - - if ( status == API_SUCCESS ) - { - log.writeLog(__LINE__, "checkSimplexModule: mate process started: " + (*pt).DeviceName + "/" + systemprocessconfig.processconfig[j].ProcessName, LOG_TYPE_DEBUG); - - status = processManager.startProcess((*pt).DeviceName, - "DMLProc", - FORCEFUL); - if ( status == API_SUCCESS ) { - log.writeLog(__LINE__, "checkSimplexModule: mate process started: " + (*pt).DeviceName + "/DMLProc", LOG_TYPE_DEBUG); - } - else - log.writeLog(__LINE__, "checkSimplexModule: mate process failed to start: " + (*pt).DeviceName + "/DMLProc", LOG_TYPE_DEBUG); - } - else - log.writeLog(__LINE__, "checkSimplexModule: mate process failed to start: " + (*pt).DeviceName + "/" + systemprocessconfig.processconfig[j].ProcessName, LOG_TYPE_DEBUG); - - //setup new MariaDB Replication Master - if ( systemprocessconfig.processconfig[j].ProcessName == "DMLProc" ) { - log.writeLog(__LINE__, "Setup MySQL Replication for COLD_STANDBY DMLProc going ACTIVE", LOG_TYPE_DEBUG); - oam::DeviceNetworkList devicenetworklist; - processManager.setMySQLReplication(devicenetworklist, (*pt).DeviceName); - } - } - else - { - // if found ACTIVE, skip to next process - if ( state == oam::ACTIVE ) - return; - } - } - } - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "checkSimplexModule: EXCEPTION ERROR on getProcessConfig: " + error, LOG_TYPE_ERROR); - } - catch (...) - { - log.writeLog(__LINE__, "checkSimplexModule: EXCEPTION ERROR on getProcessConfig: Caught unknown exception!", LOG_TYPE_ERROR); - } - } - } - catch (exception& ex) - { -// string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + moduleName + ": Caught unknown exception!", LOG_TYPE_ERROR); - } - } - } - } - } - } - - return; -} - -/****************************************************************************************** -* @brief updatePMSconfig -* -* purpose: Update PMS Configuration in System Configuration file -* -******************************************************************************************/ -int ProcessManager::updatePMSconfig( bool check ) -{ - Oam oam; - int minPmPorts = 32; - vector IpAddrs; - vector nicIDs; - - pthread_mutex_lock(&THREAD_LOCK); - - ModuleTypeConfig moduletypeconfig; - oam.getSystemConfig("pm", moduletypeconfig); - - Config* sysConfig = Config::makeConfig(); - string pmsIPAddr = sysConfig->getConfig("PMS1", "IPAddr"); - - //exit out if PMS already setup - if ( pmsIPAddr != oam::UnassignedIpAddr && - check) - { - log.writeLog(__LINE__, "updatePMSconfig: no update needed, exiting function", LOG_TYPE_DEBUG); - pthread_mutex_unlock(&THREAD_LOCK); - return API_SUCCESS; - } - - //exit out if PM module count is 1 or less - if ( moduletypeconfig.ModuleCount <= 1 && - check) - { - log.writeLog(__LINE__, "updatePMSconfig: no update needed, exiting function", LOG_TYPE_DEBUG); - pthread_mutex_unlock(&THREAD_LOCK); - return API_SUCCESS; - } - - int maxPMNicID = atoi(sysConfig->getConfig("PrimitiveServers", "ConnectionsPerPrimProc").c_str()) / 2; - int pmCount = 0; - - //get Perfomance module IP addresses - DeviceNetworkList::iterator pt = moduletypeconfig.ModuleNetworkList.begin(); - - for ( ; pt != moduletypeconfig.ModuleNetworkList.end() ; pt++) - { - int opState = oam::ACTIVE; - bool degraded; - - try - { - oam.getModuleStatus((*pt).DeviceName, opState, degraded); - - if (opState == oam::MAN_DISABLED || opState == oam::AUTO_DISABLED) - continue; - } - catch (exception& ex) - { -// string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + (*pt).DeviceName + ": " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + (*pt).DeviceName + ": Caught unknown exception!", LOG_TYPE_ERROR); - } - - pmCount++; - - HostConfigList::iterator pt1 = (*pt).hostConfigList.begin(); - - for ( ; pt1 != (*pt).hostConfigList.end() ; pt1++) - { - if ( (*pt1).IPAddr == oam::UnassignedIpAddr ) - continue; - else - { - //check NIC status and don't assigned if down - try - { - int state = oam::UP; - oam.getNICStatus((*pt1).HostName, state); - - if ( state == oam::UP || state == oam::INITIAL) - { - IpAddrs.push_back((*pt1).IPAddr); - nicIDs.push_back((*pt1).NicID); - } - } - catch (...) - { - IpAddrs.push_back((*pt1).IPAddr); - nicIDs.push_back((*pt1).NicID); - } - } - } - } - - if ( IpAddrs.empty()) - { - log.writeLog(__LINE__, "updatePMSconfig: No up NICS found, exiting function", LOG_TYPE_DEBUG); - pthread_mutex_unlock(&THREAD_LOCK); - return API_SUCCESS; - } - - if ( pmCount == 0) - { - log.writeLog(__LINE__, "updatePMSconfig: No PM modules Enabled, exiting function", LOG_TYPE_DEBUG); - pthread_mutex_unlock(&THREAD_LOCK); - return API_SUCCESS; - } - - if ( pmCount == 1 && - pmsIPAddr != oam::UnassignedIpAddr && - check ) - { - log.writeLog(__LINE__, "updatePMSconfig: no update needed, exiting function", LOG_TYPE_DEBUG); - pthread_mutex_unlock(&THREAD_LOCK); - return API_SUCCESS; - } - - Configuration config; - - //retry 5 times loop just in case - for (int i = 0 ; i < 5; i++) - { - Config* sysConfig1 = Config::makeConfig(); - - //update PM count if needed - sysConfig1->setConfig("PrimitiveServers", "Count", oam.itoa(pmCount)); - - int pmPorts = pmCount * (maxPMNicID * 2); - - if ( pmPorts < minPmPorts ) - pmPorts = minPmPorts; - - const string PM = "PMS"; - int nicID = 1; - - for ( int pmsID = 1; pmsID < pmPorts + 1 ; ) - { - vector::iterator pt = IpAddrs.begin(); - vector::iterator pt1 = nicIDs.begin(); - - for ( ; pt != IpAddrs.end() ; pt++, pt1++) - { - if ( *pt1 == nicID ) - { - string pmsName = PM + oam.itoa(pmsID); - sysConfig1->setConfig(pmsName, "IPAddr", *pt); - pmsID++; - } - - if ( pmsID > pmPorts ) - break; - } - - if ( pmsID > pmPorts ) - break; - - nicID++; - - if ( nicID > maxPMNicID ) - nicID = 1; - } - - //update Calpont Config table - try - { - sysConfig1->write(); - pthread_mutex_unlock(&THREAD_LOCK); - - return API_SUCCESS; - } - catch (...) - { - log.writeLog(__LINE__, "updatePMSconfig - ERROR: sysConfig->write", LOG_TYPE_ERROR); - } - } - - pthread_mutex_unlock(&THREAD_LOCK); - - log.writeLog(__LINE__, "updatePMSconfig failed", LOG_TYPE_DEBUG); - - return API_FAILURE; -} - -/****************************************************************************************** -* @brief updateWorkerNodeconfig -* -* purpose: Update WorkerNode Configuration in System Configuration file -* -******************************************************************************************/ -int ProcessManager::updateWorkerNodeconfig() -{ - Oam oam; - vector module; - vector ipadr; - - pthread_mutex_lock(&THREAD_LOCK); - - //setup current module as work-node #1 by entering it in first - module.push_back(config.moduleName()); - - // get my IP address and update entries - ModuleConfig moduleconfig; - oam.getSystemConfig(config.moduleName(), moduleconfig); - HostConfigList::iterator pt0 = moduleconfig.hostConfigList.begin(); - idbassert(pt0 != moduleconfig.hostConfigList.end()); - ipadr.push_back(pt0->IPAddr); - - SystemModuleTypeConfig systemmoduletypeconfig; - - try - { - oam.getSystemConfig(systemmoduletypeconfig); - - for ( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++) - { - if ( systemmoduletypeconfig.moduletypeconfig[i].ModuleType.empty() ) - // end of list - break; - - int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount; - - if ( moduleCount > 0 ) - { - DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end() ; pt++) - { - //skip current module - if ( (*pt).DeviceName == config.moduleName() ) - continue; - - int opState = oam::ACTIVE; - bool degraded; - - try - { - oam.getModuleStatus((*pt).DeviceName, opState, degraded); - - if (opState == oam::MAN_DISABLED || opState == oam::AUTO_DISABLED) - continue; - } - catch (exception& ex) - { -// string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + (*pt).DeviceName + ": " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + (*pt).DeviceName + ": Caught unknown exception!", LOG_TYPE_ERROR); - } - - module.push_back((*pt).DeviceName); - - HostConfigList::iterator pt1 = (*pt).hostConfigList.begin(); - ipadr.push_back((*pt1).IPAddr); - } - } - } - } - catch (...) - { - log.writeLog(__LINE__, "updateWorkerNodeconfig: getSystemNetworkConfig Failed", LOG_TYPE_ERROR); - pthread_mutex_unlock(&THREAD_LOCK); - return API_SUCCESS; - } - - Configuration config; - - for ( int i = 1 ; i < 5 ; i++ ) - { - Config* sysConfig3 = Config::makeConfig();; - - //update Columnstore.xml - sysConfig3->setConfig("DBRM_Controller", "NumWorkers", oam.itoa(module.size())); - - std::vector::iterator pt = module.begin(); - std::vector::iterator pt1 = ipadr.begin(); - int id = 1; - - for ( ; pt != module.end() ; pt++, pt1++, id++) - { - string Section = "DBRM_Worker" + oam.itoa(id); - sysConfig3->setConfig(Section, "IPAddr", *pt1); - sysConfig3->setConfig(Section, "Module", *pt); - string moduleName = *pt; - sysConfig3->setConfig(Section, "Port", "8700"); - } - - //clear out any leftovers - for ( ; id < MAX_MODULE ; id++ ) - { - string Section = "DBRM_Worker" + oam.itoa(id); - - if ( sysConfig3->getConfig(Section, "IPAddr") != oam::UnassignedIpAddr && - !sysConfig3->getConfig(Section, "IPAddr").empty()) - sysConfig3->setConfig(Section, "IPAddr", oam::UnassignedIpAddr); - - if ( sysConfig3->getConfig(Section, "Module") != oam::UnassignedIpAddr && - !sysConfig3->getConfig(Section, "Module").empty()) - sysConfig3->setConfig(Section, "Module", oam::UnassignedName); - } - - try - { - sysConfig3->write(); - pthread_mutex_unlock(&THREAD_LOCK); - - return API_SUCCESS; - - } - catch (...) - { - log.writeLog(__LINE__, "updateWorkerNodeconfig - ERROR: sysConfig->write", LOG_TYPE_ERROR); - } - } - - pthread_mutex_unlock(&THREAD_LOCK); - log.writeLog(__LINE__, "updateWorkerNodeconfig failed", LOG_TYPE_DEBUG); - - return API_FAILURE; -} - -/****************************************************************************************** -* @brief clearModuleAlarms -* -* purpose: Clears all alarms related to a module -* -******************************************************************************************/ -void ProcessManager::clearModuleAlarms(std::string moduleName) -{ - ALARMManager aManager; - AlarmList alarmList; - aManager.getActiveAlarm (alarmList); - - AlarmList::iterator i; - - for (i = alarmList.begin(); i != alarmList.end(); ++i) - { - // check if the same fault component on same module - if (moduleName.compare((i->second).getComponentID()) == 0 || - moduleName.compare((i->second).getSname()) == 0) - { - // match, go clear it - aManager.sendAlarmReport((i->second).getComponentID().c_str(), - (i->second).getAlarmID(), - CLEAR, - (i->second).getSname().c_str(), - "ProcessManager"); - } - } -} - -/****************************************************************************************** -* @brief clearNICAlarms -* -* purpose: Clears all alarms related to a NIC hostName -* -******************************************************************************************/ -void ProcessManager::clearNICAlarms(std::string hostName) -{ - ALARMManager aManager; - AlarmList alarmList; - aManager.getActiveAlarm (alarmList); - - AlarmList::iterator i; - - for (i = alarmList.begin(); i != alarmList.end(); ++i) - { - // check if the same fault component on same module - if (hostName.compare((i->second).getComponentID()) == 0) - { - // match, go clear it - aManager.sendAlarmReport((i->second).getComponentID().c_str(), - (i->second).getAlarmID(), - CLEAR, - (i->second).getSname().c_str(), - "ProcessManager"); - } - } -} - -/****************************************************************************************** -* @brief updateExtentMap -* -* purpose: update Extent Map section in Columnstore.xml -* -******************************************************************************************/ -bool ProcessManager::updateExtentMap() -{ - string fileName = std::string(MCSSYSCONFDIR) + "/columnstore/Columnstore.xml"; - - ifstream oldFile (fileName.c_str()); - - if (!oldFile) return false; - - vector lines; - char line[200]; - string buf; - string newLine; - - string start = ""; - string firstComment = ""; - - while (oldFile.getline(line, 200)) - { - buf = line; - - string::size_type pos = buf.find(start, 0); - - if (pos != string::npos) - { - //output to temp file and skip next line - lines.push_back(buf); - oldFile.getline(line, 200); - buf = line; - pos = buf.find(firstComment, 0); - - if (pos == string::npos) - { - return true; - } - } - else - { - pos = buf.find(end, 0); - - if (pos != string::npos) - { - //output to temp file and skip next line - lines.push_back(buf); - oldFile.getline(line, 200); - buf = line; - pos = buf.find(lastComment, 0); - - if (pos == string::npos) - { - return true; - } - } - else - //output to temp file - lines.push_back(buf); - } - } - - oldFile.close(); - unlink (fileName.c_str()); - ofstream newFile (fileName.c_str()); - - //create new file - int fd = open(fileName.c_str(), O_RDWR | O_CREAT, 0664); - - copy(lines.begin(), lines.end(), ostream_iterator(newFile, "\n")); - newFile.close(); - - close(fd); - return true; -} - -/****************************************************************************************** -* @brief makeXMInittab -* -* purpose: Make inittab to auto-launch ProcMon -* -******************************************************************************************/ -bool ProcessManager::makeXMInittab(std::string moduleName, std::string systemID, std::string parentOAMModuleHostName) -{ - string fileName = "/var/lib/columnstore/local/etc/" + moduleName + "/inittab.calpont"; - - vector lines; - - string init1 = "1" + systemID + ":2345:respawn:ProcMon " + parentOAMModuleHostName; - - lines.push_back(init1); - - unlink (fileName.c_str()); - ofstream newFile (fileName.c_str()); - - //create new file - int fd = open(fileName.c_str(), O_RDWR | O_CREAT, 0664); - - copy(lines.begin(), lines.end(), ostream_iterator(newFile, "\n")); - newFile.close(); - - close(fd); - - return true; -} - - -/****************************************************************************************** -* @brief setPMProcIPs -* -* purpose: Updates the Columnstore.xml file for DDL/DMLProc IPs during PM switchover -* -* -******************************************************************************************/ -int ProcessManager::setPMProcIPs( std::string moduleName, std::string processName ) -{ - ProcessLog log; - Configuration config; - ProcessManager processManager(config, log); - Oam oam; - ModuleConfig moduleconfig; - - pthread_mutex_lock(&THREAD_LOCK); - - if ( processName == oam::UnassignedName || processName == "DDLProc") - { - for ( int i = 1 ; i < 5 ; i ++) - { - //get Module IP address - try - { - oam.getSystemConfig(moduleName, moduleconfig); - HostConfigList::iterator pt1 = moduleconfig.hostConfigList.begin(); - string ipAdd = (*pt1).IPAddr; - - Config* sysConfig2 = Config::makeConfig(); - - //check if IP address if different than current value, don't update if it is - if ( sysConfig2->getConfig("DDLProc", "IPAddr") == ipAdd ) - { - log.writeLog(__LINE__, "setPMProcIPs for DDLProc: no update needed", LOG_TYPE_DEBUG); - break; - } - - sysConfig2->setConfig("DDLProc", "IPAddr", ipAdd); - - try - { - sysConfig2->write(); - - pthread_mutex_unlock(&THREAD_LOCK); - - log.writeLog(__LINE__, "setPMProcIPs: DDLProc to " + ipAdd, LOG_TYPE_DEBUG); - } - catch (...) - { - log.writeLog(__LINE__, "setPMProcIPs - ERROR: sysConfig->write", LOG_TYPE_ERROR); - } - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "setPMProcIPs: EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR); - } - catch (...) - { - log.writeLog(__LINE__, "setPMProcIPs: EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR); - } - } - } - - if ( processName == oam::UnassignedName || processName == "DMLProc") - { - for ( int i = 1 ; i < 5 ; i ++) - { - //get Module IP address - try - { - oam.getSystemConfig(moduleName, moduleconfig); - HostConfigList::iterator pt1 = moduleconfig.hostConfigList.begin(); - string ipAdd = (*pt1).IPAddr; - - Config* sysConfig2 = Config::makeConfig(); - - //check if IP address if different than current value, don't update if it is - if ( sysConfig2->getConfig("DMLProc", "IPAddr") == ipAdd ) - { - log.writeLog(__LINE__, "setPMProcIPs for DMLProc: no update needed, exiting function", LOG_TYPE_DEBUG); - pthread_mutex_unlock(&THREAD_LOCK); - return API_SUCCESS; - } - - sysConfig2->setConfig("DMLProc", "IPAddr", ipAdd); - - try - { - sysConfig2->write(); - - pthread_mutex_unlock(&THREAD_LOCK); - - log.writeLog(__LINE__, "setPMProcIPs: DMLProc to " + ipAdd, LOG_TYPE_DEBUG); - } - catch (...) - { - log.writeLog(__LINE__, "setPMProcIPs - ERROR: sysConfig->write", LOG_TYPE_ERROR); - } - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "setPMProcIPs: EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR); - } - catch (...) - { - log.writeLog(__LINE__, "setPMProcIPs: EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR); - } - } - } - - pthread_mutex_unlock(&THREAD_LOCK); - - //log.writeLog(__LINE__, "setPMProcIPs failed", LOG_TYPE_DEBUG); - - return API_SUCCESS; - -} - -/****************************************************************************************** -* @brief distributeConfigFile -* -* purpose: Distribute Calpont Config File to system modules -* -******************************************************************************************/ -int ProcessManager::distributeConfigFile(std::string name, std::string file) -{ - ByteStream msg; - ByteStream::byte requestID = UPDATECONFIGFILE; - Oam oam; - int returnStatus = oam::API_SUCCESS; - - string dirName = std::string(MCSSYSCONFDIR) + "/columnstore/"; - string fileName = dirName + file; - - ifstream in (fileName.c_str()); - - if (!in) - { - log.writeLog(__LINE__, "distributeConfigFile failed, file doesn't exist: " + fileName, LOG_TYPE_ERROR); - return oam::API_FAILURE; - } - - //skip any file of size 0 - in.seekg(0, std::ios::end); - int size = in.tellg(); - - if ( size == 0 ) - { - log.writeLog(__LINE__, "distributeConfigFile failed, file doesn't exist: " + fileName, LOG_TYPE_ERROR); - return oam::API_FAILURE; - } - - // distribute using hdfs call, make sure host names are in /etc/pdsh/machines - ifstream in1 ("/etc/pdsh/machines"); - - if (in1) - { - if ( HDFS ) - { - if ( name == "system" ) - { - string cmd = "pdcp -a -x " + localHostName + " " + fileName + " " + dirName; - int rtnCode = system(cmd.c_str()); - - if (WEXITSTATUS(rtnCode) == 0) - { - log.writeLog(__LINE__, "distributeConfigFile using pdcp successful on " + fileName, LOG_TYPE_DEBUG); - return returnStatus; - } - else - { - log.writeLog(__LINE__, "distributeConfigFile using pdcp failed on " + fileName, LOG_TYPE_ERROR); - } - } - else - { - // get module hostname - ModuleConfig moduleconfig; - oam.getSystemConfig(name, moduleconfig); - HostConfigList::iterator pt1 = moduleconfig.hostConfigList.begin(); - string hostName = (*pt1).HostName; - - string cmd = "pdcp -w " + hostName + " " + fileName + " " + dirName; - int rtnCode = system(cmd.c_str()); - - if (WEXITSTATUS(rtnCode) == 0) - { - log.writeLog(__LINE__, "distributeConfigFile using pdcp successful on " + fileName, LOG_TYPE_DEBUG); - return returnStatus; - } - else - { - log.writeLog(__LINE__, "distributeConfigFile using pdcp failed on " + fileName, LOG_TYPE_ERROR); - } - } - } - } - - //send via tcp messaging - msg << requestID; - msg << fileName; - - in.seekg(0, std::ios::beg); - in >> msg; - - SystemModuleTypeConfig systemmoduletypeconfig; - - try - { - oam.getSystemConfig(systemmoduletypeconfig); - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR); - } - catch (...) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR); - } - - if ( name == "system" ) - { - // send config file to all modules - for ( unsigned int i = 0 ; i < systemmoduletypeconfig.moduletypeconfig.size(); i++) - { - int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount; - - if ( moduleCount == 0) - continue; - - DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end(); pt++) - { - //skip local module - if ( (*pt).DeviceName == config.moduleName() ) - continue; - - //skip if AOS - int opState = oam::ACTIVE; - bool degraded; - - try - { - oam.getModuleStatus((*pt).DeviceName, opState, degraded); - } - catch (...) - {} - - if (opState == oam::AUTO_DISABLED) - continue; - - returnStatus = sendMsgProcMon( (*pt).DeviceName, msg, requestID, 0 ); - - if ( returnStatus == API_SUCCESS) - { - //log the success event - log.writeLog(__LINE__, (*pt).DeviceName + " distributeConfigFile success.", LOG_TYPE_DEBUG); - } - else - { - //log the error event - log.writeLog(__LINE__, (*pt).DeviceName + " distributeConfigFile failed!!", LOG_TYPE_WARNING); - } - } - } - } - else - { - returnStatus = sendMsgProcMon( name, msg, requestID, 0 ); - - if ( returnStatus == API_SUCCESS) - { - //log the success event - log.writeLog(__LINE__, name + " distributeConfigFile success.", LOG_TYPE_DEBUG); - } - else - { - //log the error event - log.writeLog(__LINE__, name + " distributeConfigFile failed!!", LOG_TYPE_WARNING); - } - } - - return returnStatus; -} - -/****************************************************************************************** -* @brief getDBRMData -* -* purpose: get DBRM Data and send to requester -* -******************************************************************************************/ -int ProcessManager::getDBRMData(messageqcpp::IOSocket fIos, std::string moduleName) -{ - ByteStream msg; - Oam oam; - int returnStatus = oam::API_SUCCESS; - - pthread_mutex_lock(&THREAD_LOCK); - - messageqcpp::IOSocket cfIos = fIos; - - string DBRMroot; - oam.getSystemConfig("DBRMRoot", DBRMroot); - - string currentFileName = DBRMroot + "_current"; - string journalFileName = DBRMroot + "_journal"; - - string oidFile; - oam.getSystemConfig("OIDBitmapFile", oidFile); - - // StorageManager: Need to make these existence checks use an idbfilesystem op if we - // decide to put the BRM-managed files in cloud storage - string currentDbrmFile; - IDBFileSystem &fs = IDBPolicy::getFs(currentFileName.c_str()); - boost::scoped_ptr oldFile(IDBDataFile::open(IDBPolicy::getType(currentFileName.c_str(), - IDBPolicy::WRITEENG), - currentFileName.c_str(), "r", 0)); - //ifstream oldFile (currentFileName.c_str()); - - if (fs.exists(currentFileName.c_str())) - { - // current file found, check for OIDBitmapFile - boost::scoped_ptr mapFile(IDBDataFile::open(IDBPolicy::getType(oidFile.c_str(), - IDBPolicy::WRITEENG), - oidFile.c_str(), "r", 0)); - - //ifstream mapFile (oidFile.c_str()); - - if (!mapFile) - { - // no OIDBitmapFile, with current file, dbrm files are hosed - log.writeLog(__LINE__, "getDBRMData: DBRM data files error, current file exist without OIDBitmapFile", LOG_TYPE_CRITICAL); - pthread_mutex_unlock(&THREAD_LOCK); - return oam::API_FAILURE_DB_ERROR; - } - - char line[200]; - memset(line, 0, 200); - int err = oldFile->read(line, 200); - // XXXPAT. HACK! This is brittle, need to fix later. Need to eat a \n char. Need to move forward now. - if (err > 0) - line[err-1] = 0; - - //oldFile.getline(line, 200); - // MCOL-1558. Handle absolute and relative paths. - if (line[0] == '/') - currentDbrmFile = line; - else - currentDbrmFile = DBRMroot.substr(0, DBRMroot.find_last_of('/') + 1) + line; - } - else - { - log.writeLog(__LINE__, "getDBRMData: no DBRM current file found, must be initial install", LOG_TYPE_DEBUG); - - msg << "initial"; - - try - { - cfIos.write(msg); - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "EXCEPTION ERROR on cfIos.write: " + error, LOG_TYPE_ERROR); - } - catch (...) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on cfIos.write: Unknown exception", LOG_TYPE_ERROR); - returnStatus = oam::API_FAILURE; - } - - pthread_mutex_unlock(&THREAD_LOCK); - return returnStatus; - } - - //string fileName = startup::StartUp::installDir() + "/local/dbrmfiles"; - //unlink(fileName.c_str()); - - // this replaces the stuff that's if-0'd below - boost::filesystem::path pCurrentDbrmFile(currentDbrmFile + "_"); - boost::filesystem::path dbrmDir(pCurrentDbrmFile.parent_path()); - list fileListing; - vector dbrmFiles; - fs.listDirectory(dbrmDir.string().c_str(), fileListing); - for (const auto &file : fileListing) - { - if (file.find(pCurrentDbrmFile.filename().string()) == 0 && - fs.size((dbrmDir / file).string().c_str()) != 0) - { - log.writeLog(__LINE__, "adding " + (dbrmDir/file).string() + " to dbrmFiles", LOG_TYPE_DEBUG); - dbrmFiles.push_back((dbrmDir / file).string()); - } - } - fileListing.clear(); - - #if 0 - string cmd; - string storageType = config::Config::makeConfig()->getConfig("Installation", "DBRootStorageType"); - if (storageType == "storagemanager") - cmd = startup::StartUp::installDir() + "/bin/smls " + currentDbrmFile + "_* | awk '// { print $3 }' >> " + - startup::StartUp::installDir() + "/local/dbrmfiles"; - else - cmd = "ls " + currentDbrmFile + "_* >> " + startup::StartUp::installDir() + "/local/dbrmfiles"; - log.writeLog(__LINE__, "Running '" + cmd + "'", LOG_TYPE_DEBUG); - system(cmd.c_str()); - - ifstream file (fileName.c_str()); - - if (!file) - { - log.writeLog(__LINE__, "getDBRMData: no DBRM files found, must be initial install", LOG_TYPE_DEBUG); - - msg << "initial"; - - try - { - cfIos.write(msg); - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "EXCEPTION ERROR on cfIos.write: " + error, LOG_TYPE_ERROR); - } - catch (...) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on cfIos.write: Unknow exception", LOG_TYPE_ERROR); - returnStatus = oam::API_FAILURE; - } - - pthread_mutex_unlock(&THREAD_LOCK); - return returnStatus; - } - - - vector dbrmFiles; - - char line[200]; - string buf; - - while (file.getline(line, 200)) - { - buf = line; - dbrmFiles.push_back(buf); - } - - file.close(); - #endif - - if ( dbrmFiles.size() < 1 ) - { - log.writeLog(__LINE__, "getDBRMData: dbrmFiles size = 0, must be initial install", LOG_TYPE_DEBUG); - - msg << "initial"; - - try - { - cfIos.write(msg); - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "EXCEPTION ERROR on cfIos.write: " + error, LOG_TYPE_ERROR); - } - catch (...) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on cfIos.write: Unknown exception", LOG_TYPE_ERROR); - returnStatus = oam::API_FAILURE; - } - - pthread_mutex_unlock(&THREAD_LOCK); - return returnStatus; - } - - // put oid file and current file in list - dbrmFiles.push_back(currentFileName); - - if (fs.exists(journalFileName.c_str()) && fs.size(journalFileName.c_str()) > 0) - dbrmFiles.push_back(journalFileName); - if (fs.exists(oidFile.c_str()) && fs.size(oidFile.c_str()) > 0) - dbrmFiles.push_back(oidFile); - - //type - msg << "files"; - - try - { - cfIos.write(msg); - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "EXCEPTION ERROR on cfIos.write: " + error, LOG_TYPE_ERROR); - } - catch (...) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on cfIos.write: Unknown exception", LOG_TYPE_ERROR); - pthread_mutex_unlock(&THREAD_LOCK); - return oam::API_FAILURE; - } - - //remove any file of size 0 - - std::vector::iterator pt1 = dbrmFiles.begin(); - #if 0 - for ( ; pt1 != dbrmFiles.end() ; pt1++) - { - if (fs.size(pt1->c_str()) == 0) - dbrmFiles.erase(pt1); - } - #endif - - ByteStream fcmsg; - - // number of files - fcmsg << (ByteStream::byte) dbrmFiles.size(); - - try - { - cfIos.write(fcmsg); - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "EXCEPTION ERROR on cfIos.write: " + error, LOG_TYPE_ERROR); - } - catch (...) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on cfIos.write: Unknown exception", LOG_TYPE_ERROR); - pthread_mutex_unlock(&THREAD_LOCK); - return oam::API_FAILURE; - } - - pt1 = dbrmFiles.begin(); - for ( ; pt1 != dbrmFiles.end() ; pt1++) - { - ByteStream fnmsg, fdmsg; - - string fileName = *pt1; - - //Goal of the stuff below is to load a file's data into fdmsg - //and it's filename into fnmsg. - - boost::scoped_ptr in(IDBDataFile::open( - IDBPolicy::getType(fileName.c_str(), - IDBPolicy::WRITEENG), - fileName.c_str(), "r", 0)); - - ssize_t size = in->size(); - fdmsg.needAtLeast(size); - uint8_t *buf = fdmsg.getInputPtr(); - ssize_t progress = 0; - ssize_t err; - char errbuf[80]; - while (progress < size) - { - err = in->read(&buf[progress], size - progress); - if (err < 0) - { - int saved_errno = errno; - log.writeLog(__LINE__, "getDBRMData(): failed reading " + fileName + ", got " + - strerror_r(saved_errno, errbuf, 80), LOG_TYPE_ERROR); - pthread_mutex_unlock(&THREAD_LOCK); - return oam::API_FAILURE; - } - else if (err == 0) - { - log.writeLog(__LINE__, "getDBRMData(): failed reading " + fileName + ", got early EOF", LOG_TYPE_ERROR); - pthread_mutex_unlock(&THREAD_LOCK); - return oam::API_FAILURE; - } - progress += err; - } - fdmsg.advanceInputPtr(size); - - log.writeLog(__LINE__, fileName, LOG_TYPE_DEBUG); - fnmsg << fileName; - - #if 0 - ifstream in(fileName.c_str()); - - //skip any file of size 0 - in.seekg(0, std::ios::end); - size = in.tellg(); - - if ( size == 0 ) - continue; - - in.seekg(0, std::ios::beg); - - log.writeLog(__LINE__, fileName, LOG_TYPE_DEBUG); - fnmsg << fileName; - #endif - - try - { - cfIos.write(fnmsg); - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "EXCEPTION ERROR on cfIos.write: " + error, LOG_TYPE_ERROR); - pthread_mutex_unlock(&THREAD_LOCK); - return oam::API_FAILURE; - } - catch (...) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on cfIos.write: Unknown exception", LOG_TYPE_ERROR); - pthread_mutex_unlock(&THREAD_LOCK); - return oam::API_FAILURE; - } - - //in >> fdmsg; - - log.writeLog(__LINE__, "Sending " + to_string(fdmsg.length()) + " bytes.", LOG_TYPE_DEBUG); - try - { - cfIos.write(fdmsg); - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "EXCEPTION ERROR on cfIos.write: " + error, LOG_TYPE_ERROR); - pthread_mutex_unlock(&THREAD_LOCK); - return oam::API_FAILURE; - } - catch (...) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on cfIos.write: Unknown exception", LOG_TYPE_ERROR); - pthread_mutex_unlock(&THREAD_LOCK); - return oam::API_FAILURE; - } - } - - try - { - cfIos.write(msg); - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "EXCEPTION ERROR on cfIos.write: " + error, LOG_TYPE_ERROR); - } - catch (...) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on cfIos.write: Unknown exception", LOG_TYPE_ERROR); - returnStatus = oam::API_FAILURE; - } - - pthread_mutex_unlock(&THREAD_LOCK); - return returnStatus; -} - - -/****************************************************************************************** -* @brief switchParentOAMModule -* -* purpose: Switch OAM Parent Module -* -******************************************************************************************/ -int ProcessManager::switchParentOAMModule(std::string newActiveModuleName) -{ - ProcessLog log; - Configuration config; - ProcessManager processManager(config, log); - Oam oam; - int returnStatus = oam::API_SUCCESS; - ALARMManager aManager; - - log.writeLog(__LINE__, "switchParentOAMModule Function Started", LOG_TYPE_DEBUG); - - //storage config - string DBRootStorageType; - - try - { - oam.getSystemConfig( "DBRootStorageType", DBRootStorageType); - } - catch (...) {} - - log.writeLog(__LINE__, "switchParentOAMModule: DBRootStorageType = " + DBRootStorageType, LOG_TYPE_DEBUG); - - if ( DBRootStorageType == "internal" && DataRedundancyConfig == "n") - { - log.writeLog(__LINE__, "ERROR: DBRootStorageType = internal", LOG_TYPE_ERROR); - pthread_mutex_unlock(&THREAD_LOCK); - return API_INVALID_PARAMETER; - } - - // set alarm - aManager.sendAlarmReport(newActiveModuleName.c_str(), MODULE_SWITCH_ACTIVE, SET); - - //clear run standby flag; - runStandby = false; - int retryCount = 0; - //sleep, give time for message thread to startup - while (!MsgThreadActive && retryCount < 10) - { - log.writeLog(__LINE__, "Waiting for Message Thread...", LOG_TYPE_DEBUG); - sleep(5); - ++retryCount; - } - - int moduleID = atoi(newActiveModuleName.substr(MAX_MODULE_TYPE_SIZE, MAX_MODULE_ID_SIZE).c_str()); - - // update Columnstore.xml entries - string newActiveIPaddr; - - try - { - pthread_mutex_lock(&THREAD_LOCK); - - //move a newparent dbroot to old parent for balancing - DBRootConfigList residedbrootConfigList; - bool doDBRootMove = true; - - try - { - oam.getPmDbrootConfig(moduleID, residedbrootConfigList); - - if ( residedbrootConfigList.size() > 0 ) - { - DBRootConfigList::iterator pt = residedbrootConfigList.begin(); - - if (*pt != 1) - { - try - { - oam.manualMovePmDbroot(newActiveModuleName, oam.itoa(*pt), config.OAMParentName()); - } - catch (...) - { - log.writeLog(__LINE__, "ERROR: manualMovePmDbroot Failed", LOG_TYPE_ERROR); - pthread_mutex_unlock(&THREAD_LOCK); - return API_FAILURE; - } - } - else - { - doDBRootMove = false; - } - } - } - catch (...) - { - log.writeLog(__LINE__, "ERROR: getPmDbrootConfig Failed", LOG_TYPE_ERROR); - pthread_mutex_unlock(&THREAD_LOCK); - return API_FAILURE; - } - - //move dbroot #1 to new parent - if (doDBRootMove) - { - try - { - oam.manualMovePmDbroot(config.OAMParentName(), "1", newActiveModuleName); - } - catch (...) - { - log.writeLog(__LINE__, "ERROR: manualMovePmDbroot Failed", LOG_TYPE_ERROR); - pthread_mutex_unlock(&THREAD_LOCK); - return API_FAILURE; - } - } - - Config* sysConfig4 = Config::makeConfig(); - - // get new Active address - ModuleConfig moduleconfig; - oam.getSystemConfig(newActiveModuleName, moduleconfig); - HostConfigList::iterator pt2 = moduleconfig.hostConfigList.begin(); - newActiveIPaddr = (*pt2).IPAddr; - - sysConfig4->setConfig("ProcMgr", "IPAddr", newActiveIPaddr); - sysConfig4->setConfig("ProcMgr_Alarm", "IPAddr", newActiveIPaddr); - sysConfig4->setConfig("ProcStatusControl", "IPAddr", newActiveIPaddr); - sysConfig4->setConfig("DBRM_Controller", "IPAddr", newActiveIPaddr); - - // update Parent OAM Module name to current module name - sysConfig4->setConfig("SystemConfig", "ParentOAMModuleName", newActiveModuleName); - - // clear Standby OAM Module - sysConfig4->setConfig("SystemConfig", "StandbyOAMModuleName", oam::UnassignedName); - sysConfig4->setConfig("ProcStatusControlStandby", "IPAddr", oam::UnassignedIpAddr); - - //update Calpont Config table - try - { - sysConfig4->write(); - } - catch (...) - { - log.writeLog(__LINE__, "ERROR: sysConfig->write", LOG_TYPE_ERROR); - pthread_mutex_unlock(&THREAD_LOCK); - return API_FAILURE; - } - - pthread_mutex_unlock(&THREAD_LOCK); - - if ( config.ServerInstallType() == oam::INSTALL_COMBINE_DM_UM_PM ) - { - //set DDL/DMLproc IPs to new module - setPMProcIPs(newActiveModuleName); - - //set Primary UM to new module - try - { - oam.setSystemConfig("PrimaryUMModuleName", newActiveModuleName); - } - catch (...) {} - } - - log.writeLog(__LINE__, "Columnstore.xml entries update to local IP address of " + newActiveIPaddr, LOG_TYPE_DEBUG); - - //distribute config file - processManager.distributeConfigFile("system"); - sleep(1); - - //change master MySQL Replication setup - log.writeLog(__LINE__, "Setup MySQL Replication for new Parent Module during switch-over", LOG_TYPE_DEBUG); - oam::DeviceNetworkList devicenetworklist; - processManager.setMySQLReplication(devicenetworklist, newActiveModuleName, false, oam::UnassignedName); - - } - catch (exception& ex) - { - pthread_mutex_unlock(&THREAD_LOCK); - string error = ex.what(); - log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR); - return API_FAILURE; - } - catch (...) - { - pthread_mutex_unlock(&THREAD_LOCK); - log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR); - return API_FAILURE; - } - - //send message to local Process Monitor for OAM Cold Activation - ByteStream msg1; - ByteStream::byte requestID = OAMPARENTCOLD; - - msg1 << requestID; - - while (true) - { - int returnStatus = sendMsgProcMon( config.moduleName(), msg1, requestID ); - - log.writeLog(__LINE__, "sent OAM Parent Cold message to local Process-Monitor, status: " + oam.itoa(returnStatus), LOG_TYPE_DEBUG); - - if ( returnStatus == oam::API_SUCCESS) - break; - } - - //send message to new Active Process Monitor for OAM Parent Activation - ByteStream msg; - requestID = OAMPARENTACTIVE; - - msg << requestID; - - while (true) - { - int returnStatus = sendMsgProcMon( newActiveModuleName, msg, requestID ); - - log.writeLog(__LINE__, "sent OAM Parent Activate message to New Active Process-Monitor, status: " + oam.itoa(returnStatus), LOG_TYPE_DEBUG); - - if ( returnStatus == oam::API_SUCCESS) - break; - } - - // start processmanager on new active node - startProcess(newActiveModuleName, "ProcessManager", oam::FORCEFUL); - - // clear alarm - aManager.sendAlarmReport(newActiveModuleName.c_str(), MODULE_SWITCH_ACTIVE, CLEAR); - - //DOING THIS JUST TO UPDATE THE TIMESTAMP OF THE CALPONT.XML FILE AS A WORK-AROUND FIX - //BECAUSE PROCMON ISN'T READING UPDATES FROM DISK ON HDFS SYSTEMS - - if (HDFS) - { - sleep(60); - Config* sysConfig = Config::makeConfig(); - - try - { - sysConfig->write(); - } - catch (...) - { - log.writeLog(__LINE__, "ERROR: sysConfig->write", LOG_TYPE_ERROR); - pthread_mutex_unlock(&THREAD_LOCK); - return API_FAILURE; - } - } - - return returnStatus; -} - -/****************************************************************************************** -* @brief OAMParentModuleChange -* -* purpose: OAM Parent Module Change-over -* The module will take over running as the OAM Parent module -* after a detected outage -* -* -******************************************************************************************/ -int ProcessManager::OAMParentModuleChange() -{ - ProcessLog log; - Configuration config; - ProcessManager processManager(config, log); - Oam oam; - - // - //monitor OAM Parent module for outage - // - - log.writeLog(__LINE__, "OAMParentModuleChange Function Started", LOG_TYPE_DEBUG); - - // Get Module Info - SystemModuleTypeConfig systemModuleTypeConfig; - - try - { - oam.getSystemConfig(systemModuleTypeConfig); - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR); - } - catch (...) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR); - } - - string downOAMParentIPAddress; - string downOAMParentHostname; - string downOAMParentName = config.OAMParentName(); - - //Build module list - vector moduleNameList; - vector moduleIPAddrList; - - for ( unsigned int i = 0 ; i < systemModuleTypeConfig.moduletypeconfig.size(); i++) - { - int moduleCount = systemModuleTypeConfig.moduletypeconfig[i].ModuleCount; - - if ( moduleCount == 0 ) - // skip of no modules configured - continue; - - DeviceNetworkList::iterator pt = systemModuleTypeConfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for ( ; pt != systemModuleTypeConfig.moduletypeconfig[i].ModuleNetworkList.end() ; pt++) - { - HostConfigList::iterator pt1 = (*pt).hostConfigList.begin(); - - //get parent module IP address - if ( (*pt).DeviceName == downOAMParentName ) - { - downOAMParentIPAddress = (*pt1).IPAddr; - downOAMParentHostname = (*pt1).HostName; - continue; - } - - //store the other modules - if ( (*pt).DeviceName != config.moduleName() ) - { - moduleNameList.push_back((*pt).DeviceName); - moduleIPAddrList.push_back((*pt1).IPAddr); - } - } - } - - string HA_IPAddr; - - if ( moduleIPAddrList.empty() ) - { - //get HA IP Address - Config* sysConfig = Config::makeConfig(); - HA_IPAddr = sysConfig->getConfig("ProcMgr_HA", "IPAddr"); - - log.writeLog(__LINE__, "Get HA_IPAddr = " + HA_IPAddr, LOG_TYPE_DEBUG); - - if ( !HA_IPAddr.empty() ) - { - moduleNameList.push_back("HA_device"); - moduleIPAddrList.push_back(HA_IPAddr); - } - } - - int ModuleHeartbeatCount; - - try - { - oam.getSystemConfig("ModuleHeartbeatCount", ModuleHeartbeatCount); - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR); - } - catch (...) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR); - } - - string cmdLine = "ping "; - string cmdOption = " -c 1 -w 5 >> /dev/null"; - string cmd; - - int pingFailure = 0; - bool failover = false; - bool recoveryTest = false; - int disableCount = 0; - int noAckCount = 0; - bool amazonParentRestart = false; - - while (!failover) - { - // check if a signal was received to start failover - if (startFailOver) - { - //send notification going from standby to active - oam.sendDeviceNotification(config.moduleName(), START_STANDBY_TO_MASTER); - break; - } - - // perform ping test of Active Parent Module - string cmd = cmdLine + downOAMParentIPAddress + cmdOption; - int rtnCode = system(cmd.c_str()); - - switch (WEXITSTATUS(rtnCode)) - { - case 0: - { - //Ack ping - pingFailure = 0; - - if ( noAckCount != 0 ) - oam.sendDeviceNotification(config.moduleName(), MODULE_UP); - - noAckCount = 0; - - //if Amazon Parent PM is restarting, monitor when back active and take needed actions - if (amazonParentRestart) - { - log.writeLog(__LINE__, "Amazon Parent pinging, waiting until it's active", LOG_TYPE_DEBUG); - sleep(60); - - while (true) - { - SystemStatus systemstatus; - - try - { - oam.getSystemStatus(systemstatus); - } - catch (...) - {} - - if (systemstatus.SystemOpState == ACTIVE) - { - log.writeLog(__LINE__, "System Active, restart needed processes", LOG_TYPE_DEBUG); - - processManager.restartProcessType("mysqld"); - processManager.restartProcessType("ExeMgr"); - processManager.restartProcessType("WriteEngineServer"); - processManager.reinitProcessType("DBRMWorkerNode"); - sleep(1); - processManager.restartProcessType("DDLProc"); - sleep(1); - processManager.restartProcessType("DMLProc"); - - amazonParentRestart = false; - break; - } - - sleep(5); - } - } - - sleep(1); - break; - } - - default: - { - //failed to respond to ping - pingFailure++; - log.writeLog(__LINE__, "OAMParentModule ping failure (" + downOAMParentName + ")", LOG_TYPE_WARNING); - - if ( pingFailure >= ModuleHeartbeatCount ) - { - - bool ack = false; - bool noack = false; - - //check NIC #1 status - int sockfd; - struct ifreq ifr; - - sockfd = socket(AF_INET, SOCK_DGRAM, 0); - - if (sockfd == -1) - { - log.writeLog(__LINE__, "Could not get socket to check NIC #1", LOG_TYPE_ERROR); - close(sockfd); - break; - } - - /* get interface name */ - strncpy(ifr.ifr_name, iface_name.c_str(), IFNAMSIZ); - - /* Read interface flags */ - if (ioctl(sockfd, SIOCGIFFLAGS, &ifr) < 0) - { - // not supported - close(sockfd); - break; - } - - if (ifr.ifr_flags & IFF_UP) - { - log.writeLog(__LINE__, "Local Interface is UP", LOG_TYPE_INFO); - - // any additional devices/modules to test - if ( !moduleNameList.empty()) - { - // Active Parent not talking, check other modules or HA IP address - for ( int count = 0 ; count <= ModuleHeartbeatCount ; count++ ) - { - vector::iterator pt1 = moduleNameList.begin(); - vector::iterator pt2 = moduleIPAddrList.begin(); - - for ( ; pt1 != moduleNameList.end() ; pt1++, pt2++) - { - string cmd = cmdLine + *pt2 + cmdOption; - int rtnCode = system(cmd.c_str()); - - switch (WEXITSTATUS(rtnCode)) - { - case 0: - { - //Ack ping - log.writeLog(__LINE__, *pt1 + " ping successful", LOG_TYPE_DEBUG); - ack = true; - break; - } - - default: - { - // ping failure - log.writeLog(__LINE__, *pt1 + " ping failure", LOG_TYPE_WARNING); - - noack = true; - - //save module name - if ( *pt1 != "HA_device" ) - downModuleList.push_back(*pt1); - - break; - } - } - - // exit loop if ping was successfuly - if ( ack ) - break; - - sleep (2); - } - - // exit loop if ping was successfuly - if ( ack ) - break; - } - } - else - { - // NIC #1 up, procede with failover - failover = true; - } - } - else - { - log.writeLog(__LINE__, "NIC #1 is DOWN", LOG_TYPE_WARNING); - // NIC #1 down, dont switch - noack = true; - - if ( noAckCount == 0 ) - oam.sendDeviceNotification(config.moduleName(), MODULE_DOWN); - - noAckCount++; - } - - close(sockfd); - - //check if all modules are not responding to ping - if ( !ack && noack ) - { - // yes, go into hold state by setting local module to cold-state - ByteStream msg; - ByteStream::byte requestID = OAMPARENTCOLD; - - msg << requestID; - - int returnStatus = processManager.sendMsgProcMon( config.moduleName(), msg, requestID ); - log.writeLog(__LINE__, "sent OAM Parent Cold message to local Process-Monitor, status: " + oam.itoa(returnStatus), LOG_TYPE_DEBUG); - } - else - { - if ( ack && !noack ) - { - // all other modules ACK, only parent failed, procede with failover - failover = true; - break; - } - else - { - if ( ack && noack && !recoveryTest) - { - // some other modules ACK, some didn't - // try 1 more time and mark sure didn't catch in the middle of a LAN recovery - recoveryTest = true; - } - else - { - if ( ack && noack && recoveryTest) - { - // some other modules ACK, some didn't, partial outage, do failover - failover = true; - break; - } - } - } - } - } - } - } - - if ( !failover ) - { - sleep(5); - downModuleList.clear(); - } - else - { - // PARENT PM OUTAGE DETECTED - // check if disable flag is set, if so call the notification API - string activePmFailoverDisabled; - - try - { - oam.getSystemConfig("ActivePmFailoverDisabled", activePmFailoverDisabled); - - if ( activePmFailoverDisabled == "y" ) - { - - log.writeLog(__LINE__, "ActivePmFailoverDisabled is set, send notication", LOG_TYPE_DEBUG); - - oam.sendDeviceNotification(downOAMParentName, PM_MASTER_FAILED_DISABLED); - failover = false; - sleep(5); - disableCount++; - - if ( disableCount > 4 ) - { - //no manually failover has been called, go ahead and do auto-failover - //send notification going from standby to active - - log.writeLog(__LINE__, "ActivePmFailoverDisabled is set, but no manual action has been taken. Do Auto-Failover", LOG_TYPE_DEBUG); - - oam.sendDeviceNotification(config.moduleName(), START_STANDBY_TO_MASTER); - } - } - else - { - //send notification going from standby to active - oam.sendDeviceNotification(config.moduleName(), START_STANDBY_TO_MASTER); - } - } - catch (exception& ex) - {} - - //do amazon failover - if (amazon && AmazonPMFailover == "n") - { - log.writeLog(__LINE__, " ", LOG_TYPE_DEBUG); - log.writeLog(__LINE__, "*** OAMParentModule outage, AmazonPMFailover not set, waiting for instance to restart ***", LOG_TYPE_DEBUG); - - string currentIPAddr = oam.getEC2InstanceIpAddress(downOAMParentHostname); - - if (currentIPAddr == "stopped") - { - // start instance - int retryCount = 6; // 1 minutes - - if ( PMInstanceType == "m2.4xlarge" ) - retryCount = 15; // 2.5 minutes - - log.writeLog(__LINE__, "Instance in stopped state, try starting it: " + downOAMParentHostname, LOG_TYPE_DEBUG); - int retry = 0; - - for ( ; retry < retryCount ; retry++ ) - { - if ( oam.startEC2Instance(downOAMParentHostname) ) - { - log.writeLog(__LINE__, "Instance started, sleep for 30 seconds to allow it to fully come up: " + downOAMParentHostname, LOG_TYPE_DEBUG); - - //delay then get new IP Address - sleep(30); - string currentIPAddr = oam.getEC2InstanceIpAddress(downOAMParentHostname); - - if (currentIPAddr == "stopped" || currentIPAddr == "terminated") - { - log.writeLog(__LINE__, "Instance failed to start (no ip-address), retry: " + downOAMParentHostname, LOG_TYPE_DEBUG); - } - else - { - // update the Columnstore.xml with the new IP Address - string cmd = "sed -i s/" + downOAMParentIPAddress + "/" + currentIPAddr + "/g " + MCSSYSCONFDIR + "/columnstore/Columnstore.xml"; - system(cmd.c_str()); - - // get parent hotsname and IP address in case it changed - downOAMParentIPAddress = currentIPAddr; - - amazonParentRestart = true; - - break; - } - } - else - { - log.writeLog(__LINE__, "Instance failed to start, retry: " + downOAMParentHostname, LOG_TYPE_DEBUG); - - sleep(5); - } - } - - if ( retry >= retryCount ) - { - log.writeLog(__LINE__, "Instance failed to start, restart a new instance: " + downOAMParentHostname, LOG_TYPE_DEBUG); - currentIPAddr = "terminated"; - } - } - - if ( currentIPAddr != "terminated") - { - log.writeLog(__LINE__, "Instance rebooting, monitor", LOG_TYPE_DEBUG); - - //clear and go monitor again - failover = false; - - amazonParentRestart = true; - } - else - log.writeLog(__LINE__, "Instance terminated, do standby-active failover", LOG_TYPE_DEBUG); - - } - - //storage config - string DBRootStorageType; - - try - { - oam.getSystemConfig( "DBRootStorageType", DBRootStorageType); - } - catch (...) {} - - log.writeLog(__LINE__, "OAMParentModuleChange: DBRootStorageType = " + DBRootStorageType, LOG_TYPE_DEBUG); - - if ( DBRootStorageType == "internal" && failover && DataRedundancyConfig == "n") - { - log.writeLog(__LINE__, "DBRoot Storage configured for internal, don't do standby-active failover", LOG_TYPE_DEBUG); - - //clear and go monitor again - failover = false; - } - } - } - - log.writeLog(__LINE__, " ", LOG_TYPE_DEBUG); - log.writeLog(__LINE__, "*** OAMParentModule outage, OAM Parent Module change-over started ***", LOG_TYPE_DEBUG); - - gdownActiveOAMModule = downOAMParentName; - - // update Columnstore.xml entries - string localIPaddr; - string newStandbyModule = downOAMParentName; - string standbyIPaddr = downOAMParentIPAddress; - - try - { - pthread_mutex_lock(&THREAD_LOCK); - - Config* sysConfig4 = Config::makeConfig(); - - // get my IP address - ModuleConfig moduleconfig; - oam.getSystemConfig(config.moduleName(), moduleconfig); - HostConfigList::iterator pt1 = moduleconfig.hostConfigList.begin(); - localIPaddr = (*pt1).IPAddr; - - sysConfig4->setConfig("ProcMgr", "IPAddr", localIPaddr); - sysConfig4->setConfig("ProcMgr_Alarm", "IPAddr", localIPaddr); - sysConfig4->setConfig("ProcStatusControl", "IPAddr", localIPaddr); - sysConfig4->setConfig("DBRM_Controller", "IPAddr", localIPaddr); - - // update Parent OAM Module name to current module name - sysConfig4->setConfig("SystemConfig", "ParentOAMModuleName", config.moduleName()); - - // clear Standby OAM Module - sysConfig4->setConfig("SystemConfig", "StandbyOAMModuleName", oam::UnassignedName); - sysConfig4->setConfig("ProcStatusControlStandby", "IPAddr", oam::UnassignedIpAddr); - - //update Calpont Config table - try - { - sysConfig4->write(); - } - catch (...) - { - log.writeLog(__LINE__, "ERROR: sysConfig->write", LOG_TYPE_ERROR); - pthread_mutex_unlock(&THREAD_LOCK); - return API_FAILURE; - } - - pthread_mutex_unlock(&THREAD_LOCK); - - //clear run standby flag; - runStandby = false; - int retryCount = 0; - //sleep, give time for message thread to startup - while (!MsgThreadActive && retryCount < 10) - { - log.writeLog(__LINE__, "Waiting for Message Thread...", LOG_TYPE_DEBUG); - sleep(5); - ++retryCount; - } - - //run save.brm script - //Nope turns out this has to be done first... - - processManager.saveBRM(false); - try - { - oam.autoMovePmDbroot(downOAMParentName); - } - catch (...) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on autoMovePmDbroot: Caught unknown exception!", LOG_TYPE_ERROR); - } - - //distribute config file - distributeConfigFile("system"); - - //re-read config info again - Configuration config; - oam.setHotStandbyPM(standbyIPaddr); - - log.writeLog(__LINE__, "Columnstore.xml Standby OAM updated : " + newStandbyModule + ":" + standbyIPaddr, LOG_TYPE_DEBUG); - log.writeLog(__LINE__, "Columnstore.xml entries update to local IP address of " + localIPaddr, LOG_TYPE_DEBUG); - } - catch (exception& ex) - { - pthread_mutex_unlock(&THREAD_LOCK); - string error = ex.what(); - log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR); - return API_FAILURE; - } - catch (...) - { - pthread_mutex_unlock(&THREAD_LOCK); - log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR); - return API_FAILURE; - } - - if ( config.ServerInstallType() == oam::INSTALL_COMBINE_DM_UM_PM ) - { - //set DDL/DMLproc IPs to local module - setPMProcIPs(config.moduleName()); - - try - { - oam.setSystemConfig("PrimaryUMModuleName", config.moduleName()); - } - catch (...) {} - } - - //send message to local Process Monitor for OAM Parent Activation - ByteStream msg; - ByteStream::byte requestID = OAMPARENTACTIVE; - - msg << requestID; - - while (true) - { - int returnStatus = sendMsgProcMon( config.moduleName(), msg, requestID ); - - log.writeLog(__LINE__, "sent OAM Parent Activate message to local Process-Monitor, status: " + oam.itoa(returnStatus), LOG_TYPE_DEBUG); - - if ( returnStatus == oam::API_SUCCESS) - break; - } - - //set Process Manager state, will make sure process-monitor status control is working - while (true) - { - try - { - ProcessStatus procstat; - oam.getProcessStatus("ProcessManager", config.moduleName(), procstat); - - int ret = setProcessState(config.moduleName(), "ProcessManager", oam::ACTIVE, 0); - - if ( ret == oam::API_SUCCESS ) - { - oam.getProcessStatus("ProcessManager", config.moduleName(), procstat); - - if ( procstat.ProcessOpState == oam::ACTIVE ) - break; - } - } - catch (...) - {} - - sleep(1); - } - - // set alarm - ALARMManager aManager; - aManager.sendAlarmReport(config.moduleName().c_str(), MODULE_SWITCH_ACTIVE, SET); - - //set down Active module to disable state - disableModule(downOAMParentName, false); - - //do it here to get current processes active faster to process queries faster - processManager.setProcessStates(downOAMParentName, oam::AUTO_OFFLINE); - - //set OTHER down modules to disable state - vector::iterator pt1 = downModuleList.begin(); - - for ( ; pt1 != downModuleList.end() ; pt1++) - { - // Don't do this again for downOAMParentName we just did it 3 lines ago - if (*pt1 != downOAMParentName) - { - disableModule(*pt1, false); - processManager.setProcessStates(*pt1, oam::AUTO_OFFLINE); - } - } - - //distribute config file - distributeConfigFile("system"); - - //restart local module WHY?? - processManager.stopModule(config.moduleName(), oam::FORCEFUL, true); - - string localModule = config.moduleName(); - pthread_t startmodulethread; - int status = pthread_create (&startmodulethread, NULL, (void* (*)(void*)) &startModuleThread, &localModule); - - if ( status != 0 ) - log.writeLog(__LINE__, "startModuleThread: pthread_create failed, return status = " + oam.itoa(status), LOG_TYPE_ERROR); - - if (status == 0) - { - pthread_join(startmodulethread, NULL); - status = startsystemthreadStatus; - } - - reinitProcessType("cpimport"); - - // waiting until dml are ACTIVE - int retry = 0; - while (retry < 30) - { - ProcessStatus DMLprocessstatus; - - try - { - oam.getProcessStatus("DMLProc", config.moduleName(), DMLprocessstatus); - } - catch (exception& ex) - {} - catch (...) - {} - - if (DMLprocessstatus.ProcessOpState == oam::BUSY_INIT) - log.writeLog(__LINE__, "Waiting for DMLProc to finish rollback", LOG_TYPE_DEBUG); - - if (DMLprocessstatus.ProcessOpState == oam::ACTIVE) - break; - - if (DMLprocessstatus.ProcessOpState == oam::FAILED) - break; - - // wait some more - sleep(2); - ++retry; - } - - - //restart/reinit processes to force their release of the controller node port - if ( ( config.ServerInstallType() == oam::INSTALL_COMBINE_DM_UM_PM) && - ( moduleNameList.size() <= 0 && config.moduleType() == "pm") ) - { - // Do Nothing - } - else - { - //send message to start new Standby Process-Manager, if needed - newStandbyModule = getStandbyModule(); - - if ( !newStandbyModule.empty() && newStandbyModule != downOAMParentName - && newStandbyModule != "NONE") - { - // get standby IP address and update entries - setStandbyModule(newStandbyModule); - } - - //send message to each child process to start any COLD_STANDBY processes - SystemModuleTypeConfig systemmoduletypeconfig; - - try - { - oam.getSystemConfig(systemmoduletypeconfig); - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR); - } - catch (...) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR); - } - - for ( unsigned int i = 0; i < systemmoduletypeconfig.moduletypeconfig.size(); i++) - { - int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount; - - if ( moduleCount == 0) - continue; - - DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end(); pt++) - { - int opState = oam::ACTIVE; - bool degraded; - - try - { - oam.getModuleStatus((*pt).DeviceName, opState, degraded); - } - catch (exception& ex) - { - string error = ex.what(); - // log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + (*pt).DeviceName + ": " + error, LOG_TYPE_ERROR); - } - catch (...) - { - // log.writeLog(__LINE__, "EXCEPTION ERROR on getModuleStatus on module " + (*pt).DeviceName + ": Caught unknown exception!", LOG_TYPE_ERROR); - } - - if (opState != oam::MAN_DISABLED) - { - if (opState != oam::AUTO_DISABLED) - { - if ((*pt).DeviceName != downOAMParentName ) - { - if ((*pt).DeviceName != config.moduleName() ) - { - // processManager.setModuleState((*pt).DeviceName, oam::AUTO_INIT); - pthread_t startmodulethread; - string moduleName = (*pt).DeviceName; - int status = pthread_create (&startmodulethread, NULL, (void* (*)(void*)) &startModuleThread, &moduleName); - - if ( status != 0 ) - log.writeLog(__LINE__, "startModuleThread: pthread_create failed, return status = " + oam.itoa(status), LOG_TYPE_ERROR); - - sleep(1); - } - } - } - } - } - } - } - - if ( config.ServerInstallType() == oam::INSTALL_COMBINE_DM_UM_PM ) - { - //change master MySQL Replication setup - log.writeLog(__LINE__, "Setup this node as MySQL Replication Master", LOG_TYPE_DEBUG); - oam::DeviceNetworkList devicenetworklist; - processManager.setMySQLReplication(devicenetworklist, config.moduleName()); - } - - processManager.restartProcessType("DBRMControllerNode"); - - processManager.reinitProcesses(); - - // waiting until dml are ACTIVE - retry = 0; - while (retry < 30) - { - ProcessStatus DMLprocessstatus; - - try - { - oam.getProcessStatus("DMLProc", config.moduleName(), DMLprocessstatus); - } - catch (exception& ex) - {} - catch (...) - {} - - if (DMLprocessstatus.ProcessOpState == oam::BUSY_INIT) - log.writeLog(__LINE__, "Waiting for DMLProc to finish rollback", LOG_TYPE_DEBUG); - - if (DMLprocessstatus.ProcessOpState == oam::ACTIVE) - break; - - if (DMLprocessstatus.ProcessOpState == oam::FAILED) - break; - - // wait some more - sleep(2); - ++retry; - } - - // clear alarm - aManager.sendAlarmReport(config.moduleName().c_str(), MODULE_SWITCH_ACTIVE, CLEAR); - - //set status to ACTIVE while failover is in progress - processManager.setSystemState(oam::ACTIVE); - - log.writeLog(__LINE__, "*** Exiting OAMParentModuleChange function ***", LOG_TYPE_DEBUG); - - return API_SUCCESS; -} - -/****************************************************************************************** -* @brief sendStatusUpdate -* -* purpose: Send Status Update to Process Monitor -* -* -******************************************************************************************/ -void ProcessManager::sendStatusUpdate(ByteStream obs, ByteStream::byte returnRequestType) -{ - try - { - MessageQueueClient processor("ProcStatusControl"); - ByteStream ibs; - - processor.write(obs); - - // wait 10 seconds for ACK from Process Monitor - struct timespec ts = { 10, 0 }; - - ibs = processor.read(&ts); - - if (ibs.length() > 0) - { - ByteStream::byte status; - ibs >> status; - - if ( status == oam::API_SUCCESS ) - { - processor.shutdown(); - } - else - { - // shutdown connection - processor.shutdown(); - throw std::runtime_error("error"); - } - } - else - { - // timeout occurred, shutdown connection - processor.shutdown(); - throw std::runtime_error("timeout"); - } - } - catch (...) - { - throw std::runtime_error("timeout"); - } - - Configuration config; - Config* sysConfig5 = Config::makeConfig(); - - if ( sysConfig5->getConfig("ProcStatusControlStandby", "IPAddr") == oam::UnassignedIpAddr ) - return; - - try - { - MessageQueueClient processor("ProcStatusControlStandby"); - ByteStream ibs; - - processor.write(obs); - - processor.shutdown(); - } - catch (...) - {} - - return; - -} - -/****************************************************************************************** -* @brief getStandbyModule -* -* purpose: find an avaliable hot-standby module based on Process-Manager status, if one exist -* -* -******************************************************************************************/ -std::string ProcessManager::getStandbyModule() -{ - Oam oam; - SystemProcessStatus systemprocessstatus; - ProcessStatus processstatus; - string backupStandbyModule = "NONE"; - string newStandbyModule = "NONE"; - - //check if gluster, if so then find PMs that have copies of DBROOT #1 - string pmList = ""; - - try - { - oam.getProcessStatus(systemprocessstatus); - for ( unsigned int i = 0 ; i < systemprocessstatus.processstatus.size(); i++) - { - if ( systemprocessstatus.processstatus[i].ProcessName == "ProcessManager" && - systemprocessstatus.processstatus[i].ProcessOpState == oam::STANDBY ) - //already have a hot-standby - return ""; - } - } - catch (exception& ex) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: " + string(ex.what()), LOG_TYPE_ERROR); - } - catch (...) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: Caught unknown exception!", LOG_TYPE_ERROR); - } - - if (DataRedundancyConfig == "y") - { - - try - { - string errmsg; - oam.glusterctl(oam::GLUSTER_WHOHAS, "1", pmList, errmsg); - - boost::char_separator sep(" "); - boost::tokenizer< boost::char_separator > tokens(pmList, sep); - - for ( boost::tokenizer< boost::char_separator >::iterator it = tokens.begin(); - it != tokens.end(); - ++it) - { - string pm = "pm" + *it; - - // skip if current module - if ( pm == config.moduleName() ) - continue; - - int opState; - bool degraded; - - try - { - oam.getModuleStatus(pm, opState, degraded); - } - catch (...) - {} - - if (opState == oam::MAN_DISABLED || opState == oam::AUTO_DISABLED) - { - continue; - } - else - return pm; - } - - } - catch (...) - {} - - return "NONE"; - } - - //not gluster, check by status - try - { - for ( unsigned int i = 0 ; i < systemprocessstatus.processstatus.size(); i++) - { - if ( systemprocessstatus.processstatus[i].ProcessName == "ProcessManager" && - systemprocessstatus.processstatus[i].ProcessOpState == oam::STANDBY ) - //already have a hot-standby - return ""; - - if ( ( backupStandbyModule != "NONE" ) || - ( newStandbyModule != "NONE" ) ) - continue; - - if ( systemprocessstatus.processstatus[i].ProcessName == "ProcessManager" && - systemprocessstatus.processstatus[i].ProcessOpState == oam::COLD_STANDBY ) - { - // Found a ProcessManager in a COLD_STANDBY state - newStandbyModule = systemprocessstatus.processstatus[i].Module; - continue; - } - - if ( systemprocessstatus.processstatus[i].ProcessName == "ProcessManager" && - systemprocessstatus.processstatus[i].ProcessOpState == oam::MAN_OFFLINE && - backupStandbyModule == "NONE" && - newStandbyModule == "NONE" ) - { - // Found a ProcessManager in a MAN_OFFLINE state, use if no COLD_STANDBY is found - // and module is not disabled - int opState = oam::ACTIVE; - bool degraded; - - try - { - oam.getModuleStatus(systemprocessstatus.processstatus[i].Module, opState, degraded); - } - catch (...) - {} - - if (opState == oam::MAN_DISABLED || opState == oam::AUTO_DISABLED) - { - continue; - } - else - backupStandbyModule = systemprocessstatus.processstatus[i].Module; - } - } - } - catch (exception& ex) - { -// string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: Caught unknown exception!", LOG_TYPE_ERROR); - } - - if ( newStandbyModule != "NONE" ) - return newStandbyModule; - - return backupStandbyModule; -} - -/****************************************************************************************** -* @brief setStandbyModule -* -* purpose: set Standby Module info in Columnstore.xml -* -* -******************************************************************************************/ -bool ProcessManager::setStandbyModule(std::string newStandbyModule, bool send) -{ - Oam oam; - - if ( newStandbyModule.empty() ) - return true; - - pthread_mutex_lock(&THREAD_LOCK); - - for (int i = 0 ; i < 5; i++) - { - // get standby IP address and update entries - ModuleConfig moduleconfig; - oam.getSystemConfig(newStandbyModule, moduleconfig); - HostConfigList::iterator pt1 = moduleconfig.hostConfigList.begin(); - string standbyIPaddr = (*pt1).IPAddr; - - Configuration config; - Config* sysConfig6 = Config::makeConfig(); - sysConfig6->setConfig("SystemConfig", "StandbyOAMModuleName", newStandbyModule); - sysConfig6->setConfig("ProcStatusControlStandby", "IPAddr", standbyIPaddr); - - try - { - sysConfig6->write(); - pthread_mutex_unlock(&THREAD_LOCK); - - oam.setHotStandbyPM(standbyIPaddr); - - //distribute config file - distributeConfigFile("system"); - - log.writeLog(__LINE__, "Columnstore.xml Standby OAM updated to : " + newStandbyModule + ":" + standbyIPaddr, LOG_TYPE_DEBUG); - - if (send) - { - log.writeLog(__LINE__, "Send Message for new Hot-Standby ProcessManager to module = " + newStandbyModule, LOG_TYPE_DEBUG); - int retStatus = startProcess(newStandbyModule, "ProcessManager", oam::GRACEFUL_STANDBY); - - log.writeLog(__LINE__, "Hot-Standby ProcessManager ACK received from Process-Monitor, return status = " + oam.itoa(retStatus), LOG_TYPE_DEBUG); - } - - return true; - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "setStandbyModule: EXCEPTION ERROR on sysConfig->write(): " + error, LOG_TYPE_ERROR); - } - catch (...) - { - log.writeLog(__LINE__, "setStandbyModule :EXCEPTION ERROR on sysConfig->write(): Caught unknown exception!", LOG_TYPE_ERROR); - } - } - - log.writeLog(__LINE__, "setStandbyModule: failed to set enable state", LOG_TYPE_ERROR); - - pthread_mutex_unlock(&THREAD_LOCK); - return false; - -} - -/****************************************************************************************** -* @brief clearStandbyModule -* -* purpose: clear Standby Module info in Columnstore.xml -* -* -******************************************************************************************/ -bool ProcessManager::clearStandbyModule() -{ - Oam oam; - - pthread_mutex_lock(&THREAD_LOCK); - - Configuration config; - - for (int i = 0 ; i < 5; i++) - { - Config* sysConfig7 = Config::makeConfig(); - sysConfig7->setConfig("SystemConfig", "StandbyOAMModuleName", oam::UnassignedName); - sysConfig7->setConfig("ProcStatusControlStandby", "IPAddr", oam::UnassignedIpAddr); - - try - { - sysConfig7->write(); - pthread_mutex_unlock(&THREAD_LOCK); - - oam.setHotStandbyPM(" "); - log.writeLog(__LINE__, "Clear Columnstore.xml Standby OAM", LOG_TYPE_DEBUG); - - //distribute config file - distributeConfigFile("system"); - - return true; - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "clearStandbyModule: EXCEPTION ERROR on sysConfig->write(): " + error, LOG_TYPE_ERROR); - } - catch (...) - { - log.writeLog(__LINE__, "clearStandbyModule :EXCEPTION ERROR on sysConfig->write(): Caught unknown exception!", LOG_TYPE_ERROR); - } - - sleep(1); - } - - log.writeLog(__LINE__, "clearStandbyModule: failed to set enable state", LOG_TYPE_ERROR); - - pthread_mutex_unlock(&THREAD_LOCK); - return false; - -} - -/****************************************************************************************** -* @brief setEnableState -* -* purpose: set Enable State info in Columnstore.xml -* -* -******************************************************************************************/ -int ProcessManager::setEnableState(std::string target, std::string state) -{ - Oam oam; - ModuleConfig moduleconfig; - - pthread_mutex_lock(&THREAD_LOCK); - - for (int i = 0 ; i < 5; i++) - { - try - { - oam.getSystemConfig(target, moduleconfig); - - moduleconfig.DisableState = state; - - try - { - oam.setSystemConfig(target, moduleconfig); - pthread_mutex_unlock(&THREAD_LOCK); - return API_SUCCESS; - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "setEnableState: EXCEPTION ERROR on setSystemConfig: " + error, LOG_TYPE_ERROR); - } - catch (...) - { - log.writeLog(__LINE__, "setEnableState: EXCEPTION ERROR on setSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR); - } - - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "setEnableState: EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR); - } - catch (...) - { - log.writeLog(__LINE__, "setEnableState: EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR); - } - - sleep(1); - } - - log.writeLog(__LINE__, "setEnableState: failed to set enable state", LOG_TYPE_ERROR); - - pthread_mutex_unlock(&THREAD_LOCK); - return API_SUCCESS; - -} - -/****************************************************************************************** -* @brief stopProcessTypes -* -* purpose: stop by process type -* -* -******************************************************************************************/ -void ProcessManager::stopProcessTypes(bool manualFlag) -{ - ProcessLog log; - Configuration config; - ProcessManager processManager(config, log); - Oam oam; - - // skip if single server install, meaning only 1 worker node - try - { - Config* sysConfig = Config::makeConfig(); - - if ( sysConfig->getConfig("DBRM_Controller", "NumWorkers") == "1" ) - return; - } - catch (...) - { - return; - } - - log.writeLog(__LINE__, "stopProcessTypes Called"); - - //front-end first - processManager.stopProcessType("mysqld", manualFlag); - processManager.stopProcessType("DMLProc", manualFlag); - processManager.stopProcessType("DDLProc", manualFlag); - processManager.stopProcessType("ExeMgr", manualFlag); - - //back-end - processManager.stopProcessType("WriteEngineServer", manualFlag); - processManager.stopProcessType("PrimProc", manualFlag); - - //dbrm - processManager.stopProcessType("DBRMControllerNode", manualFlag); - processManager.stopProcessType("DBRMWorkerNode", manualFlag); - - processManager.stopProcessType("StorageManager", manualFlag); - - log.writeLog(__LINE__, "stopProcessTypes Completed"); -} - -/****************************************************************************************** -* @brief unmountDBRoot -* -* purpose: unmount a dbroot -* -* -******************************************************************************************/ -int ProcessManager::unmountDBRoot(std::string dbrootID) -{ - ProcessLog log; - Configuration config; - ProcessManager processManager(config, log); - Oam oam; - - //get pm assigned to that dbroot - int pmID; - oam.getDbrootPmConfig(atoi(dbrootID.c_str()), pmID); - string moduleName = "pm" + oam.itoa(pmID); - - log.writeLog(__LINE__, "send unmountDBRoot to pm: " + dbrootID + "/" + moduleName, LOG_TYPE_DEBUG ); - - ByteStream msg; - msg << (ByteStream::byte) PROCUNMOUNT; - msg << dbrootID; - - return sendMsgProcMon( moduleName, msg, PROCUNMOUNT ); - -} - -/****************************************************************************************** -* @brief mountDBRoot -* -* purpose: mount a dbroot -* -* -******************************************************************************************/ -int ProcessManager::mountDBRoot(std::string dbrootID) -{ - ProcessLog log; - Configuration config; - ProcessManager processManager(config, log); - Oam oam; - - if (DataRedundancyConfig == "y") - return oam::API_SUCCESS; - - //get pm assigned to that dbroot - int pmID; - oam.getDbrootPmConfig(atoi(dbrootID.c_str()), pmID); - string moduleName = "pm" + oam.itoa(pmID); - - log.writeLog(__LINE__, "send mountDBRoot to pm: " + dbrootID + "/" + moduleName, LOG_TYPE_DEBUG ); - - //send msg to ProcMon if not local module - if ( config.moduleName() == moduleName ) - { - string tmpMount = tmpLogDir + "/mount.log"; - string cmd = "export LC_ALL=C;mount /var/lib/columnstore/data" + dbrootID + " > " + tmpMount; - system(cmd.c_str()); - - if ( !rootUser) - { - cmd = "chown -R " + USER + ":" + USER + " /var/lib/columnstore/data" + dbrootID + " > /dev/null"; - system(cmd.c_str()); - } - - ifstream in(tmpMount.c_str()); - - in.seekg(0, std::ios::end); - int size = in.tellg(); - - if ( size != 0 ) - { - if (!oam.checkLogStatus(tmpMount, "already")) - { - log.writeLog(__LINE__, "mount failed, dbroot: " + dbrootID); - return API_FAILURE; - } - } - } - else - { - ByteStream msg; - msg << (ByteStream::byte) PROCMOUNT; - msg << dbrootID; - - return sendMsgProcMon( moduleName, msg, PROCMOUNT ); - } - - return oam::API_SUCCESS; -} - -/****************************************************************************************** -* @brief flushInodeCache -* -* purpose: flush cache -* -* -******************************************************************************************/ -void ProcessManager::flushInodeCache() -{ - int fd; - ByteStream reply; - -#ifdef __linux__ - fd = open("/proc/sys/vm/drop_caches", O_WRONLY); - - if (fd >= 0) - { - if (write(fd, "3\n", 2) == 2) - { - log.writeLog(__LINE__, "flushInodeCache successful", LOG_TYPE_DEBUG); - } - else - { - log.writeLog(__LINE__, "flushInodeCache failed", LOG_TYPE_DEBUG); - } - - close(fd); - } - else - { - log.writeLog(__LINE__, "flushInodeCache failed to open file", LOG_TYPE_DEBUG); - } - -#endif -} - -/****************************************************************************************** -* @brief setMySQLReplication -* -* purpose: setMySQLReplication -* -* -******************************************************************************************/ -int ProcessManager::setMySQLReplication(oam::DeviceNetworkList devicenetworklist, std::string masterModule, bool distributeDB, std::string password, bool enable, bool addModule) -{ - Oam oam; - - string MySQLRep; - - try - { - oam.getSystemConfig("MySQLRep", MySQLRep); - } - catch (...) - { - MySQLRep = "n"; - } - - if ( MySQLRep == "n" && enable ) - { - log.writeLog(__LINE__, "setMySQLReplication: MySQLRep not set, exiting", LOG_TYPE_DEBUG); - return oam::API_SUCCESS; - } - - log.writeLog(__LINE__, "Setup MySQL Replication", LOG_TYPE_DEBUG); - - //get master info - if ( masterModule == oam::UnassignedName) - { - try - { - oam.getSystemConfig("PrimaryUMModuleName", masterModule); - } - catch (...) - { - masterModule = oam::UnassignedName; - } - - if ( masterModule == oam::UnassignedName ) - { - // use default setting - masterModule = "um1"; - - if ( config.ServerInstallType() == oam::INSTALL_COMBINE_DM_UM_PM ) - masterModule = "pm1"; - } - } - - //send distubute DB - if ( distributeDB ) - { - if ( devicenetworklist.size() == 0 ) - { - //dist to all slaves - ByteStream msg; - ByteStream::byte requestID = oam::MASTERDIST; - msg << requestID; - msg << password; - msg << "all"; - - log.writeLog(__LINE__, "Distribute Master DB, master module=" + masterModule, LOG_TYPE_DEBUG); - - int returnStatus = sendMsgProcMon( masterModule, msg, requestID, 60 ); - - if ( returnStatus != API_SUCCESS) - { - log.writeLog(__LINE__, "setMySQLReplication: ERROR: Error getting MySQL Replication Master Information", LOG_TYPE_ERROR); - return API_FAILURE; - } - } - else - { - DeviceNetworkList::iterator listPT = devicenetworklist.begin(); - - for ( ; listPT != devicenetworklist.end() ; listPT++) - { - string remoteModuleName = (*listPT).DeviceName; - - //skip master - if ( remoteModuleName == masterModule ) - continue; - - if ( !addModule ) - { - // skip disabled modules - int opState = oam::ACTIVE; - bool degraded; - - try - { - oam.getModuleStatus(remoteModuleName, opState, degraded); - } - catch (...) - {} - - if (opState == oam::MAN_DISABLED || opState == oam::AUTO_DISABLED) - continue; - } - - // don't do PMs unless PMwithUM flag is set - if ( config.ServerInstallType() != oam::INSTALL_COMBINE_DM_UM_PM ) - { - string moduleType = remoteModuleName.substr(0, MAX_MODULE_TYPE_SIZE); - - if ( moduleType == "pm" && PMwithUM == "n" ) - continue; - } - - ByteStream msg; - ByteStream::byte requestID = oam::MASTERDIST; - msg << requestID; - msg << password; - msg << remoteModuleName; - - log.writeLog(__LINE__, "Distribute Master DB, master module=" + masterModule, LOG_TYPE_DEBUG); - - int returnStatus = sendMsgProcMon( masterModule, msg, requestID, 60 ); - - if ( returnStatus != API_SUCCESS) - { - log.writeLog(__LINE__, "setMySQLReplication: ERROR: Error getting MySQL Replication Master Information", LOG_TYPE_ERROR); - return API_FAILURE; - } - } - } - } - - //send setup master - ByteStream msg; - ByteStream::byte requestID = oam::MASTERREP; - - if ( !enable ) - { - requestID = oam::DISABLEREP; - log.writeLog(__LINE__, "Disable MySQL Replication, master module=" + masterModule, LOG_TYPE_DEBUG); - } - else - log.writeLog(__LINE__, "Setup MySQL Replication, master module=" + masterModule, LOG_TYPE_DEBUG); - - msg << requestID; - - int returnStatus = sendMsgProcMon( masterModule, msg, requestID, 60 ); - - if ( returnStatus != API_SUCCESS) - { - log.writeLog(__LINE__, "setMySQLReplication: ERROR: Error getting MySQL Replication Master Information", LOG_TYPE_ERROR); - return API_FAILURE; - } - - // - // send msg to setup slave - // - - // check if a list was provide, if not, do all modules - if ( devicenetworklist.size() == 0 ) - { - log.writeLog(__LINE__, "Setup MySQL Replication on all modules", LOG_TYPE_DEBUG); - SystemModuleTypeConfig systemmoduletypeconfig; - - try - { - oam.getSystemConfig(systemmoduletypeconfig); - } - catch (exception& ex) - {} - - for ( unsigned int i = 0; i < systemmoduletypeconfig.moduletypeconfig.size(); i++) - { - int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount; - - if ( moduleCount == 0) - continue; - - string moduleType = systemmoduletypeconfig.moduletypeconfig[i].ModuleType; - - DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for ( ; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end(); pt++ ) - { - string remoteModuleName = (*pt).DeviceName; - - //skip master - if ( remoteModuleName == masterModule ) - continue; - - if ( !addModule ) - { - // skip disabled modules - int opState = oam::ACTIVE; - bool degraded; - - try - { - oam.getModuleStatus(remoteModuleName, opState, degraded); - } - catch (...) - {} - - if (opState == oam::MAN_DISABLED || opState == oam::AUTO_DISABLED) - continue; - } - - // don't do PMs unless PMwithUM flag is set - if ( config.ServerInstallType() != oam::INSTALL_COMBINE_DM_UM_PM ) - { - string moduleType = remoteModuleName.substr(0, MAX_MODULE_TYPE_SIZE); - - if ( moduleType == "pm" && PMwithUM == "n" ) - continue; - } - - ByteStream msg1; - ByteStream::byte requestID = oam::SLAVEREP; - - if ( !enable ) - { - requestID = oam::DISABLEREP; - log.writeLog(__LINE__, "Disable MySQL Replication, slave module=" + remoteModuleName, LOG_TYPE_DEBUG); - } - else - log.writeLog(__LINE__, "Setup MySQL Replication, slave module=" + remoteModuleName, LOG_TYPE_DEBUG); - - msg1 << requestID; - - if ( enable ) - { - if ( masterLogFile == oam::UnassignedName || - masterLogPos == oam::UnassignedName ) - return API_FAILURE; - - msg1 << masterLogFile; - msg1 << masterLogPos; - } - - returnStatus = sendMsgProcMon( remoteModuleName, msg1, requestID, 60 ); - - if ( returnStatus != API_SUCCESS) - { - log.writeLog(__LINE__, "setMySQLReplication: ERROR: Error setting MySQL Replication Slave", LOG_TYPE_ERROR); - return API_FAILURE; - } - } - } - } - else - { - DeviceNetworkList::iterator listPT = devicenetworklist.begin(); - - for ( ; listPT != devicenetworklist.end() ; listPT++) - { - string remoteModuleName = (*listPT).DeviceName; - - //skip master - if ( remoteModuleName == masterModule ) - continue; - - if ( !addModule ) - { - // skip disabled modules - int opState = oam::ACTIVE; - bool degraded; - - try - { - oam.getModuleStatus(remoteModuleName, opState, degraded); - } - catch (...) - {} - - if (opState == oam::MAN_DISABLED || opState == oam::AUTO_DISABLED) - continue; - } - - log.writeLog(__LINE__, "Setup Slave MySQL Replication on " + remoteModuleName, LOG_TYPE_DEBUG); - - ByteStream msg1; - ByteStream::byte requestID = oam::SLAVEREP; - - if ( !enable ) - { - requestID = oam::DISABLEREP; - log.writeLog(__LINE__, "Disable MySQL Replication, slave module=" + remoteModuleName, LOG_TYPE_DEBUG); - } - else - log.writeLog(__LINE__, "Setup MySQL Replication, slave module=" + remoteModuleName, LOG_TYPE_DEBUG); - - msg1 << requestID; - - if ( masterLogFile == oam::UnassignedName || - masterLogPos == oam::UnassignedName ) - { - log.writeLog(__LINE__, "setMySQLReplication: ERROR: Unassigned masterLogFile or masterLogPos", LOG_TYPE_ERROR); - return API_FAILURE; - } - - if ( enable ) - { - if ( masterLogFile == oam::UnassignedName || - masterLogPos == oam::UnassignedName ) - return API_FAILURE; - - msg1 << masterLogFile; - msg1 << masterLogPos; - } - - returnStatus = sendMsgProcMon( remoteModuleName, msg1, requestID, 60 ); - - if ( returnStatus != API_SUCCESS) - { - log.writeLog(__LINE__, "setMySQLReplication: ERROR: Error setting MySQL Replication Slave", LOG_TYPE_ERROR); - return API_FAILURE; - } - } - } - - return oam::API_SUCCESS; -} - -/****************************************************************************************** -* @brief glusterAssign -* -* purpose: Gluster assign dbroot to a module -* -******************************************************************************************/ -int ProcessManager::glusterAssign(std::string moduleName, std::string dbroot) -{ - ByteStream msg; - ByteStream::byte requestID = PROCGLUSTERASSIGN; - - msg << requestID; - msg << dbroot; - - int returnStatus = sendMsgProcMon( moduleName, msg, requestID, 30 ); - int retry = 0; - // Try this for a minute because in failover the node returning to service may not be listening yet - while(returnStatus != API_SUCCESS && retry < 60) - { - log.writeLog(__LINE__, "glusterAssign retrying...", LOG_TYPE_DEBUG); - returnStatus = sendMsgProcMon( moduleName, msg, requestID, 30 ); - sleep(1); - ++retry; - } - if ( returnStatus == API_SUCCESS) - { - //log the success event - log.writeLog(__LINE__, "glusterAssign Success: " + moduleName + "/" + dbroot, LOG_TYPE_DEBUG); - } - else - { - //log the error event - log.writeLog(__LINE__, "glusterAssign FAILED: " + moduleName + "/" + dbroot, LOG_TYPE_ERROR); - } - - return returnStatus; -} - -/****************************************************************************************** -* @brief glusterUnassign -* -* purpose: Gluster Unassign dbroot to a module -* -******************************************************************************************/ -int ProcessManager::glusterUnassign(std::string moduleName, std::string dbroot) -{ - ByteStream msg; - ByteStream::byte requestID = PROCGLUSTERUNASSIGN; - - msg << requestID; - msg << dbroot; - - int returnStatus = sendMsgProcMon( moduleName, msg, requestID, 30 ); - int retry = 0; - // Try this for a minute because in failover the node returning to service may not be listening yet - while(returnStatus != API_SUCCESS && retry < 60) - { - log.writeLog(__LINE__, "glusterUnassign retrying...", LOG_TYPE_DEBUG); - returnStatus = sendMsgProcMon( moduleName, msg, requestID, 30 ); - sleep(1); - ++retry; - } - if ( returnStatus == API_SUCCESS) - { - //log the success event - log.writeLog(__LINE__, "glusterUnassign Success: " + moduleName + "/" + dbroot, LOG_TYPE_DEBUG); - } - else - { - //log the error event - log.writeLog(__LINE__, "glusterUnassign FAILED: " + moduleName + "/" + dbroot, LOG_TYPE_ERROR); - } - - return returnStatus; -} - -/****************************************************************************************** -* @brief syncFsALL -* -* purpose: Sync filesystem for backup snapshots on suspenddatabasewrites -* -******************************************************************************************/ -int ProcessManager::syncFsAll(std::string moduleName) -{ - - ByteStream msg; - ByteStream::byte requestID = SYNCFSALL; - - msg << requestID; - - int returnStatus = sendMsgProcMon( moduleName, msg, requestID, 30 ); - - if ( returnStatus == API_SUCCESS) - { - //log the success event - log.writeLog(__LINE__, "syncFsALL Success: " + moduleName, LOG_TYPE_DEBUG); - } - else - { - //log the error event - log.writeLog(__LINE__, "syncFsALL FAILED: " + moduleName, LOG_TYPE_ERROR); - } - - return returnStatus; -} - -} //end of namespace -// vim:ts=4 sw=4: - diff --git a/procmgr/processmanager.h b/procmgr/processmanager.h deleted file mode 100644 index 387d67e79..000000000 --- a/procmgr/processmanager.h +++ /dev/null @@ -1,607 +0,0 @@ -/* Copyright (C) 2014 InfiniDB, Inc. - Copyright (C) 2016 MariaDB Corporation - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; version 2 of - the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301, USA. */ - -/****************************************************************************************** -* $Id: processmanager.h 2163 2013-04-04 18:40:54Z rdempsey $ -* -******************************************************************************************/ - - -#ifndef _PROCESSMANAGER_H_ -#define _PROCESSMANAGER_H_ - -#include - -#include "liboamcpp.h" -#include "threadpool.h" -#include "socketclosed.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#if defined(__GLIBC__) && __GLIBC__ >=2 && __GLIBC_MINOR__ >= 1 -#include -#include -#else -#include -#include -#endif - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#if defined(__GLIBC__) && __GLIBC__ >=2 && __GLIBC_MINOR__ >= 1 -#include -#include -#else -#include -#include -#endif - -#include -#include - -void pingDeviceThread(); - -namespace processmanager -{ - -void* startSystemThread(oam::DeviceNetworkList* devicenetworklist); -void* stopSystemThread(oam::DeviceNetworkList* devicenetworklist); -void* startModuleThread(std::string* moduleName); -void* stopModuleThread(std::string* moduleName); -void* processMSG(messageqcpp::IOSocket* fIos); - -/** @brief Timeset for Milleseconds -*/ -#define TS_MS(x) ((x) * 1000000) - -struct HeartBeatProc -{ - std::string ModuleName; //!< Module Name - std::string ProcessName; //!< Process Name - int ID; //!< Heartbeat ID - bool receiveFlag; //!< Heartbeat indication flag -}; - -typedef std::list HeartBeatProcList; - -typedef std::map srvStateList; - -const int MAX_ARGUMENTS = 10; -const std::string DEFAULT_LOG_FILE = "/var/log/mariadb/columnstore/ProcessManager.log"; - - -/** - * parentFlag accessor - */ - -class Configuration -{ -public: - /** - * @brief Constructor - */ - Configuration(); - /** - * Destructor - */ - ~Configuration(); - - /** - * @brief Return the module opstate tag - */ - std::string getstateInfo(std::string moduleName); - - /** - * moduleName accessor - */ - const std::string& moduleName() const - { - return flocalModuleName; - } - - /** - * moduleType accessor - */ - const std::string& moduleType() const - { - return flocalModuleType; - } - - /** - * moduleName accessor - */ - const uint16_t& moduleID() const - { - return flocalModuleID; - } - - /** - * parentName accessor - */ - const std::string& OAMParentName() const - { - return fOAMParentModuleName; - } - - /** - * parentFlag accessor - */ - const bool& OAMParentFlag() const - { - return fOAMParentModuleFlag; - } - - /** - * ServerInstallType accessor - */ - const uint16_t& ServerInstallType() const - { - return fserverInstallType; - } - - /** - * standbyName accessor - */ - const std::string& OAMStandbyName() const - { - return fOAMStandbyModuleName; - } - - /** - * standbyParentFlag accessor - */ - const bool& OAMStandbyParentFlag() const - { - return fOAMStandbyModuleFlag; - } - - /** - * parentFlag accessor - */ - -private: - srvStateList stateInfoList; - std::string flocalModuleName; - std::string flocalModuleType; - uint16_t flocalModuleID; - std::string fOAMParentModuleName; - bool fOAMParentModuleFlag; - uint16_t fserverInstallType; - std::string fOAMStandbyModuleName; - bool fOAMStandbyModuleFlag; -}; - -class ProcessLog -{ -public: - /** - * @brief Constructor - */ - - ProcessLog(); - - /** - * @brief Destructor - */ - - ~ProcessLog(); - - /** - * @brief Write the message to the log - */ - void writeLog(const int lineNumber, const std::string logContent, const logging::LOG_TYPE logType = logging::LOG_TYPE_INFO); - - /** - * @brief Write the message to the log - */ - void writeLog(const int lineNumber, const int logContent, const logging::LOG_TYPE logType = logging::LOG_TYPE_INFO); - - /** - * @brief Compose a log data in the required format - */ - void setSysLogData(); - - /** - * @brief return the sysLogData - */ - std::string getSysLogData(); - - /** - * @brief log process status change into system log - */ - - void writeSystemLog(); - -private: - std::ofstream PMLog; - std::string sysLogData; -}; - -class ProcessManager -{ -public: - - /** - * @brief Constructor - */ - ProcessManager(Configuration& config, ProcessLog& log); - - /** - * @brief Default Destructor - */ - ~ProcessManager(); - - /** - * @brief Process the received message - */ - -// void processMSG(messageqcpp::IOSocket fIos, messageqcpp::ByteStream msg); - - /** - *@brief send a request to the associated Process Monitor - */ - // void sendRequestToMonitor(ByteStream::byte target, ByteStream request); - - /** - *@brief Build a request message - */ - messageqcpp::ByteStream buildRequestMessage(messageqcpp::ByteStream::byte requestID, messageqcpp::ByteStream::byte actionIndicator, std::string processName, bool manualFlag = true); - - /** - *@brief Start all processes on the specified module - */ - - int startModule(std::string target, messageqcpp::ByteStream::byte actionIndicator, uint16_t startType, bool systemStart = false); - - /** - *@brief Stop all processes on the specified module - */ - int stopModule(std::string target, messageqcpp::ByteStream::byte actionIndicator, bool manualFlag, int timeout = 60 ); - - /** - *@brief power off the specified module - */ - int shutdownModule(std::string target, messageqcpp::ByteStream::byte actionIndicator, bool manualFlag, int timeout = 10 ); - - /** - *@brief Disable a specified module - */ - int disableModule(std::string target, bool manualFlag); - - /** - *@brief reinit Processes trying to replace recycleProcess - */ - void reinitProcesses(std::string skipModule = "none"); - /** - *@brief recycle Processes - */ - void recycleProcess(std::string module, bool enableModule = false); - - /** - *@brief Enable a specified module - */ - int enableModule(std::string target, int state, bool failover = false); - - /** - *@brief Enable a specified module - */ - int enableModuleStatus(std::string target); - - void dbrmctl(std::string command); - - /** - *@brief start all Mgr Controlled processes for a module - */ - void startMgrProcesses(std::string moduleName); - - /** - *@brief stop process on a specific module - */ - int stopProcess(std::string moduleName, std::string processName, messageqcpp::ByteStream::byte actionIndicator, bool manualFlag, int timeout = 10); - - /** - *@brief start process on a specific module - */ - int startProcess(std::string moduleName, std::string processName, messageqcpp::ByteStream::byte actionIndicator); - - /** - *@brief restart process on a specific module - */ - int restartProcess(std::string moduleName, std::string processName, messageqcpp::ByteStream::byte actionIndicator, bool manualFlag); - - /** - *@brief reinit process on a specific module - */ - int reinitProcess(std::string moduleName, std::string processName); - - /** - *@brief set the state of the specified module - */ - void setModuleState(std::string moduleName, uint16_t state); - - /** - *@brief set the state of the specified Ext device - */ - void setExtdeviceState(std::string extDeviceName, uint16_t state); - - /** - *@brief set the state of the specified NIC - */ - void setNICState(std::string hostName, uint16_t state); - - /** - *@brief set the state of the system - */ - void setSystemState(uint16_t state); - - /** - *@brief set all processes running on module auto or manual offline - */ - void setProcessStates(std::string moduleName, uint16_t state, std::string processNameSkip = ""); - - /** - *@brief set the state of the specified process - */ - int setProcessState (std::string moduleName, std::string processName, uint16_t state, pid_t PID); - - /** - *@brief updatelog on a specific module - */ - int updateLog (std::string action, std::string moduleName, std::string level); - - /** - *@brief get log configuration on a specific module - */ - int getConfigLog (std::string moduleName); - - /** - *@brief update process configuration - */ - int updateConfig (std::string moduleName); - - /** - *@brief Build System Tables request - */ - int buildSystemTables(std::string moduleName); - - /** - *@brief Stop a Process Type - */ - int stopProcessType(std::string processName, bool manualFlag = true ); - - /** - *@brief Start a Process Type - */ - int startProcessType(std::string processName); - - /** - *@brief Restart a Process Type - */ - int restartProcessType(std::string processName, std::string skipModule = "none", bool manualFlag = true); - - /** - *@brief ReInit a Process Type - */ - int reinitProcessType(std::string processName); - - /** - *@brief Add Module - */ - int addModule(oam::DeviceNetworkList devicenetworklist, std::string password, bool storeHostnames, - bool manualFlag = true); - - /** - *@brief Configure Module - */ - int configureModule(std::string moduleName); - - /** - *@brief Reconfigure Module - */ - int reconfigureModule(oam::DeviceNetworkList devicenetworklist); - - /** - *@brief Remove Module - */ - int removeModule(oam::DeviceNetworkList devicenetworklist, bool manualFlag = true); - - /** - *@brief Check for simplex module run-type and start mate processes if needed - */ - void checkSimplexModule(std::string moduleName); - - /** - *@brief update core on a specific module - */ - int updateCore (std::string action, std::string moduleName); - - /** - *@brief update PMS Configuration - */ - int updatePMSconfig(bool check = false); - - /** - *@brief update WorkerNode Configuration - */ - int updateWorkerNodeconfig(); - - /** - *@brief Clears all alarms related to a module - */ - void clearModuleAlarms(std::string moduleName); - - /** - *@brief Clears all alarms related to a NIC hostName - */ - void clearNICAlarms(std::string hostName); - - /** - *@brief Send Msg to Process Monitor - */ - std::string sendMsgProcMon1( std::string module, messageqcpp::ByteStream msg, int requestID ); - - /* - * Updates the Columnstore.xml file for DDL/DMLProc IPs during PM switchover - */ - int setPMProcIPs( std::string moduleName, std::string processName = oam::UnassignedName); - - /* - * OAM Parent Module change-over - */ - int OAMParentModuleChange(); - - /** @brief find a new hot-standby module based on Process-Manager status, if one exist - */ - std::string getStandbyModule(); - - /** @brief set Standby Module info in Columnstore.xml - */ - bool setStandbyModule(std::string newStandbyModule, bool send = true); - - /** @brief clear Standby Module info in Columnstore.xml - */ - bool clearStandbyModule(); - - int setEnableState(std::string target, std::string state); - - /** @brief Distribute Calpont Config File to system modules - */ - int distributeConfigFile(std::string name, std::string file = "Columnstore.xml"); - - /** @brief Switch OAM Parent Module - */ - int switchParentOAMModule(std::string moduleName); - - /** @brief get DBRM Data and send to requester - */ - int getDBRMData(messageqcpp::IOSocket fIos, std::string moduleName); - - /** @brief remount the dbroot disk - */ -// int remountDbroots(std::string option); - - /** - *@brief Send Msg to Process Monitor - */ - int sendMsgProcMon( std::string module, messageqcpp::ByteStream msg, int requestID, int timeout = 240 ); - - /** @brief get Alarm Data and send to requester - */ - int getAlarmData(messageqcpp::IOSocket fIos, int type, std::string date); - - /** - *@brief Save BRM database - */ - - void saveBRM(bool skipSession = false, bool clearshm = true); - - /** - *@brief set query system state not ready - */ - - void setQuerySystemState(bool set); - - /** @brief stop by process type - */ - void stopProcessTypes(bool manualFlag = true); - - /** @brief unmount a dbroot - */ - int unmountDBRoot(std::string dbrootID); - - /** @brief mount a dbroot - */ - int mountDBRoot(std::string dbrootID); - - /** @brief distribute fstab update message - */ - int updateFstab(std::string moduleName, std::string entry); - - /** @brief Set MySQL Replication - */ - int setMySQLReplication(oam::DeviceNetworkList devicenetworklist, std::string masterModule = oam::UnassignedName, bool distributeDB = false, std::string password = oam::UnassignedName, bool enable = true, bool addModule = false); - - /** @brief Gluster Assign dbroot to a module - */ - int glusterAssign(std::string moduleName, std::string dbroot); - - /** @brief Gluster Unassign dbroot to a module - */ - int glusterUnassign(std::string moduleName, std::string dbroot); - - /** @brief sync filesystem for snapshot backups - */ - int syncFsAll(std::string moduleName); - - -private: - - Configuration& config; - ProcessLog& log; - - /** - *@brief Create a /ect/module file for remote server - */ - - bool createModuleFile(std::string remoteModuleName); - - /** - *@brief pdate Extent Map section in Columnstore.xml - */ - bool updateExtentMap(); - - /* - * Make inittab to auto-launch ProcMon - */ - bool makeXMInittab(std::string moduleName, std::string systemID, std::string parentOAMModuleHostName); - - /* - * setup External Module mount file - */ - bool setXMmount(std::string moduleName, std::string parentOAMModuleHostName, std::string parentOAMModuleIPAddr); - - /** @brief send status updates to process monitor - */ - void sendStatusUpdate(messageqcpp::ByteStream obs, messageqcpp::ByteStream::byte returnRequestType); - - /** @brief flush inode cache - */ - void flushInodeCache(); - -}; - -} //end of namespace -#endif // _PROCESSMANAGER_H_ - - diff --git a/procmon/CMakeLists.txt b/procmon/CMakeLists.txt deleted file mode 100644 index 186714e09..000000000 --- a/procmon/CMakeLists.txt +++ /dev/null @@ -1,16 +0,0 @@ - -include_directories( ${ENGINE_COMMON_INCLUDES} ) - - -########### next target ############### - -set(ProcMon_SRCS main.cpp processmonitor.cpp ../utils/common/crashtrace.cpp) - -add_executable(ProcMon ${ProcMon_SRCS}) - -target_compile_options(ProcMon PRIVATE -Wno-unused-result) - -target_link_libraries(ProcMon ${ENGINE_LDFLAGS} cacheutils ${NETSNMP_LIBRARIES} ${MARIADB_CLIENT_LIBS} ${ENGINE_EXEC_LIBS}) - -install(TARGETS ProcMon DESTINATION ${ENGINE_BINDIR} COMPONENT columnstore-engine) - diff --git a/procmon/main.cpp b/procmon/main.cpp deleted file mode 100644 index 866881caf..000000000 --- a/procmon/main.cpp +++ /dev/null @@ -1,3696 +0,0 @@ -/* Copyright (C) 2014 InfiniDB, Inc. - Copyright (C) 2016 MariaDB Corporation - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; version 2 of - the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301, USA. */ - - -#include -#include -#include -namespace bi = boost::interprocess; - -#include "processmonitor.h" -#include "installdir.h" - -#include "IDBPolicy.h" -#include "utils_utf8.h" -#include "crashtrace.h" -#include "checks.h" - -using namespace std; -using namespace messageqcpp; -using namespace processmonitor; -using namespace oam; -using namespace logging; -using namespace alarmmanager; -using namespace config; -using namespace idbdatafile; - -//using namespace procheartbeat; - -static void* messageThread(MonitorConfig* config); -static void* statusControlThread(void*); -static void* sigchldHandleThread(void*); -static void SIGCHLDHandler(int signal_number); -static void* chldHandleThread(MonitorConfig* config); -static void sigHupHandler(int sig); -static void* mysqlMonitorThread(MonitorConfig* config); -string systemOAM; -string dm_server; -string cloud; -string DataRedundancyConfig = "n"; -bool HDFS = false; - -void updateShareMemory(processStatusList* aPtr); - -bool runStandby = false; -bool processInitComplete = false; -bool rootUser = true; -bool mainResumeFlag; -string USER = "root"; -string PMwithUM = "n"; -bool startProcMon = false; -string tmpLogDir; -string SUDO = ""; - -//extern std::string gOAMParentModuleName; -extern bool gOAMParentModuleFlag; - -pthread_mutex_t STATUS_LOCK; - -bool getshm(const string &name, int size, bi::shared_memory_object &target) { - MonitorLog log; - bool created = false; - try - { - bi::permissions perms; - perms.set_unrestricted(); - bi::shared_memory_object shm(bi::create_only, name.c_str(), bi::read_write, perms); - created = true; - shm.truncate(size); - target.swap(shm); - } - catch (bi::interprocess_exception& biex) - { - if (biex.get_error_code() == bi::already_exists_error) { - try { - bi::shared_memory_object shm(bi::open_only, name.c_str(), bi::read_write); - target.swap(shm); - } - catch (exception &e) { - ostringstream os; - os << "ProcMon failed to attach to the " << name << " shared mem segment, got " << e.what(); - log.writeLog(__LINE__, os.str(), LOG_TYPE_CRITICAL); - exit(1); - } - } - else { - ostringstream os; - os << "ProcMon failed to create the '" << name << "' shared mem segment, got " << biex.what() << "."; - os << " Check the permissions on /dev/shm; should be 1777"; - log.writeLog(__LINE__, os.str(), LOG_TYPE_CRITICAL); - exit(1); - } - } - return created; -} - - -/****************************************************************************************** -* @brief main -* -* purpose: Launch boot child processes and sit on read for incoming messages -* -******************************************************************************************/ -int main(int argc, char** argv) -{ -#ifndef _MSC_VER - setuid(0); // set effective ID to root; ignore return status -#endif - - struct sigaction ign; - - memset(&ign, 0, sizeof(ign)); - ign.sa_handler = fatalHandler; - sigaction(SIGSEGV, &ign, 0); - sigaction(SIGABRT, &ign, 0); - sigaction(SIGFPE, &ign, 0); - - if (argc > 1 && string(argv[1]) == "--daemon") - { - if (fork() != 0) return 0; - - umask(0); - setsid(); - chdir("/"); - close(0); - close(1); - close(2); - open("/dev/null", O_RDONLY); - open("/dev/null", O_WRONLY); - open("/dev/null", O_WRONLY); - } - - // setup environment for using HDFS. - idbdatafile::IDBPolicy::configIDBPolicy(); - - Oam oam; - MonitorLog log; - MonitorConfig config; - ProcessMonitor aMonitor(config, log); - - log.writeLog(__LINE__, " "); - log.writeLog(__LINE__, "**********Process Monitor Started**********"); - log.writeLog(__LINE__, " ", LOG_TYPE_DEBUG); - log.writeLog(__LINE__, "**********Process Monitor Started**********", LOG_TYPE_DEBUG); - - //Ignore SIGPIPE signals - signal(SIGPIPE, SIG_IGN); - - //create SIGHUP handler to get configuration updates - signal(SIGHUP, sigHupHandler); - - //check if root-user - int user; - user = getuid(); - - if (user != 0) - { - rootUser = false; - SUDO = "sudo "; - } - - char* p = getenv("USER"); - - if (p && *p) - USER = p; - - // Set locale language - setlocale(LC_ALL, ""); - setlocale(LC_NUMERIC, "C"); - - //get tmp log directory - tmpLogDir = startup::StartUp::tmpDir(); - - string cmd = "mkdir -p " + tmpLogDir; - system(cmd.c_str()); - - // create message thread - pthread_t MessageThread; - int ret = pthread_create (&MessageThread, NULL, (void*(*)(void*))&messageThread, &config); - - if ( ret != 0 ) - { - log.writeLog(__LINE__, "pthread_create failed, exiting..., return code = " + oam.itoa(ret), LOG_TYPE_CRITICAL); - string cmd = "columnstore stop > /dev/null 2>&1"; - system(cmd.c_str()); - exit(1); - } - - //check if this is a fresh install, meaning the Columnstore.xml file is not setup - //if so, wait for messages from Procmgr to start us up - Config* sysConfig = Config::makeConfig(); - string exemgrIpadd = sysConfig->getConfig("ExeMgr1", "IPAddr"); - - if ( exemgrIpadd == "0.0.0.0" ) - { - int count = 0; - - while (true) - { - if ( startProcMon ) - break; - else - { - count++; - - if (count > 10 ) - { - count = 0; - log.writeLog(__LINE__, "Waiting for ProcMgr to start up", LOG_TYPE_DEBUG); - } - - sleep(1); - } - } - - //re-read local system info with updated Columnstore.xml - sleep(1); -// Config* sysConfig = Config::makeConfig(); - MonitorConfig config; - - //PMwithUM config - try - { - oam.getSystemConfig( "PMwithUM", PMwithUM); - } - catch (...) - { - PMwithUM = "n"; - } - - string modType = config.moduleType(); - - string mysqlpw = oam.getMySQLPassword(); - - string passwordOption = ""; - if ( mysqlpw != oam::UnassignedName ) - passwordOption = " --password=" + mysqlpw; - - - //run the module install script - string cmd = "columnstore_module_installer.sh --module=" + modType + " " + passwordOption + " > " + tmpLogDir + "/module_installer.log 2>&1"; - log.writeLog(__LINE__, "run columnstore_module_installer.sh", LOG_TYPE_DEBUG); - log.writeLog(__LINE__, cmd, LOG_TYPE_DEBUG); - - int ret = system(cmd.c_str()); - - if ( ret != 0 ) - { - log.writeLog(__LINE__, "columnstore_module_installer.sh error, exiting..., return code = " + oam.itoa(ret), LOG_TYPE_CRITICAL); - string cmd = "columnstore stop > /dev/null 2>&1"; - system(cmd.c_str()); - exit(1); - } - - //exit to allow ProcMon to restart in a setup state - log.writeLog(__LINE__, "restarting for a initial setup", LOG_TYPE_DEBUG); - - exit (0); - } - - // if amazon cloud, check and update Instance IP Addresses and volumes - try - { - oam.getSystemConfig( "Cloud", cloud); - log.writeLog(__LINE__, "Cloud setting = " + cloud, LOG_TYPE_DEBUG); - } - catch (...) {} - - if ( cloud == "amazon-ec2" || cloud == "amazon-vpc" ) - { - if (!aMonitor.amazonIPCheck()) - { - log.writeLog(__LINE__, "ERROR: amazonIPCheck failed, exiting", LOG_TYPE_CRITICAL); - sleep(2); - string cmd = "columnstore stop > /dev/null 2>&1"; - system(cmd.c_str()); - exit(1); - } - } - - //get gluster config - try - { - oam.getSystemConfig( "DataRedundancyConfig", DataRedundancyConfig); - } - catch (...) - { - DataRedundancyConfig = "n"; - } - - if ( DataRedundancyConfig == "y" ) - { - system("mount -a > /dev/null 2>&1"); - } - - //hdfs / hadoop config - string DBRootStorageType; - - try - { - oam.getSystemConfig( "DBRootStorageType", DBRootStorageType); - } - catch (...) {} - - if ( DBRootStorageType == "hdfs" ) - HDFS = true; - - //PMwithUM config - try - { - oam.getSystemConfig( "PMwithUM", PMwithUM); - } - catch (...) - { - PMwithUM = "n"; - } - - //define entry if missing - if ( gOAMParentModuleFlag ) - { - string PrimaryUMModuleName; - - try - { - oam.getSystemConfig("PrimaryUMModuleName", PrimaryUMModuleName); - } - catch (...) - { - sysConfig->setConfig("SystemConfig", "PrimaryUMModuleName", oam::UnassignedName); - sysConfig->write(); - } - } - - if ( config.moduleType() == "pm" ) - { - if ( gOAMParentModuleFlag ) - log.writeLog(__LINE__, "ProcMon: Starting as ACTIVE Parent", LOG_TYPE_DEBUG); - else - log.writeLog(__LINE__, "ProcMon: Starting as NON-ACTIVE Parent", LOG_TYPE_DEBUG); - } - - //create and mount data directories - aMonitor.createDataDirs(cloud); - - //check if this module is recovering after a reboot for an active OAM parent state - ByteStream msg; - ByteStream::byte requestID = GETPARENTOAMMODULE; - msg << requestID; - - int moduleStatus = oam::ACTIVE; - - //check if currently configured as Parent OAM Module on startup - if ( gOAMParentModuleFlag ) - { - try - { - oam.getSystemConfig( "DBRootStorageType", DBRootStorageType); - } - catch (...) {} - - if ( ( config.OAMStandbyName() != oam::UnassignedName ) && - DBRootStorageType != "internal" ) - { - //try for 20 minutes checking if the standby node is up - string parentOAMModule; - log.writeLog(__LINE__, "starting has parent, double check. checking with old Standby Module", LOG_TYPE_DEBUG); - int count = 0; - - for (; count < 120 ; count++) - { - parentOAMModule = aMonitor.sendMsgProcMon1( config.OAMStandbyName(), msg, requestID ); - - if ( parentOAMModule != "FAILED" ) - break; - - log.writeLog(__LINE__, "Standby PM not responding, retrying", LOG_TYPE_WARNING); - sleep(10); - } - - // check if standby never replied, if so, shutdown - if ( count >= 120 ) - { - log.writeLog(__LINE__, "Standby PM not responding, ColumnStore shutting down", LOG_TYPE_CRITICAL); - //Set the alarm - // aMonitor.sendAlarm(config.moduleName().c_str(), STARTUP_DIAGNOTICS_FAILURE, SET); - // sleep (1); - - string cmd = "columnstore stop > /dev/null 2>&1"; - - system(cmd.c_str()); - } - - log.writeLog(__LINE__, "Old Standby has moduleparentOAMModule = " + parentOAMModule, LOG_TYPE_DEBUG); - - if ( parentOAMModule != config.moduleName() ) - { - gOAMParentModuleFlag = false; - - log.writeLog(__LINE__, "NOT Parent OAM Module", LOG_TYPE_DEBUG); - log.writeLog(__LINE__, "NOT Parent OAM Module"); - - try - { - Config* sysConfig = Config::makeConfig(); - - // get Standby IP address - ModuleConfig moduleconfig; - oam.getSystemConfig(config.OAMStandbyName(), moduleconfig); - HostConfigList::iterator pt1 = moduleconfig.hostConfigList.begin(); - string IPaddr = (*pt1).IPAddr; - - sysConfig->setConfig("ProcMgr", "IPAddr", IPaddr); - sysConfig->setConfig("ProcMgr_Alarm", "IPAddr", IPaddr); - - log.writeLog(__LINE__, "set ProcMgr IPaddr to Old Standby Module: " + IPaddr, LOG_TYPE_DEBUG); - //update MariaDB ColumnStore Config table - try - { - sysConfig->write(); - sleep(1); - } - catch (...) - { - log.writeLog(__LINE__, "ERROR: sysConfig->write", LOG_TYPE_ERROR); - } - } - catch (...) - { - log.writeLog(__LINE__, "ERROR: makeConfig failed", LOG_TYPE_ERROR); - } - - // get updated Columnstore.xml and ProcessConfig.xml, retry in case ProcMgr isn't up yet - if (!HDFS) - { - int count = 0; - - while (true) - { - try - { - oam.distributeConfigFile(config.moduleName()); - log.writeLog(__LINE__, "Successfull return from distributeConfigFile", LOG_TYPE_DEBUG); - - oam.distributeConfigFile(config.moduleName(), "ProcessConfig.xml"); - log.writeLog(__LINE__, "Successfull return from distributeProcessFile", LOG_TYPE_DEBUG); - break; - } - catch (...) - { - count++; - - if (count > 10 ) - { - count = 0; - log.writeLog(__LINE__, "error return from distributeConfigFile, waiting for Active ProcMgr to start", LOG_TYPE_DEBUG); - } - - sleep(1); - } - } - } - - // not OAM parent module, delay starting until a successful get status is performed - // makes sure the Parent OAM ProcMon is fully ready - while (true) - { - try - { - bool degraded; - oam.getModuleStatus(config.moduleName(), moduleStatus, degraded); - - // if HDFS, wait until module state is MAN_INIT before continuing - if (HDFS) - { - if ( moduleStatus == oam::MAN_INIT) - break; - } - - break; - } - catch (...) - { - log.writeLog(__LINE__, "waiting for good return from getModuleStatus", LOG_TYPE_DEBUG); - sleep (1); - } - } - } - } - } - else - { - // not active Parent, get updated Columnstore.xml, retry in case ProcMgr isn't up yet - if (!HDFS) - { - int count = 0; - - while (true) - { - try - { - oam.distributeConfigFile(config.moduleName()); - log.writeLog(__LINE__, "Successfull return from distributeConfigFile", LOG_TYPE_DEBUG); - - oam.distributeConfigFile(config.moduleName(), "ProcessConfig.xml"); - log.writeLog(__LINE__, "Successfull return from distributeProcessFile", LOG_TYPE_DEBUG); - - break; - } - catch (...) - { - count++; - - if (count > 10 ) - { - count = 0; - log.writeLog(__LINE__, "error return from distributeConfigFile, waiting for Active ProcMgr to start", LOG_TYPE_DEBUG); - } - - sleep(1); - } - } - } - - // not OAM parent module, delay starting until a successful get status is performed - // makes sure the Parent OAM ProcMon is fully ready - while (true) - { - try - { - bool degraded; - oam.getModuleStatus(config.moduleName(), moduleStatus, degraded); - - // if HDFS, wait until module state is MAN_INIT before continuing - if (HDFS) - { - if ( moduleStatus == oam::MAN_INIT) - break; - } - - break; - } - catch (...) - { - log.writeLog(__LINE__, "waiting for good return from getModuleStatus", LOG_TYPE_DEBUG); - sleep (1); - } - } - } - - // this will occur on non-distributed installs the first time ProcMon runs - if ( config.OAMParentName() == oam::UnassignedName ) - { - cerr << endl << "OAMParentModuleName == oam::UnassignedName, exiting " << endl; - log.writeLog(__LINE__, "OAMParentModuleName == oam::UnassignedName, restarting"); - exit (1); - } - - //check if module is in a DISABLED state - bool DISABLED = false; - - if ( moduleStatus == oam::MAN_DISABLED || - moduleStatus == oam::AUTO_DISABLED ) - DISABLED = true; - - if ( config.moduleType() == "pm" ) - { - int retry = 0; - - for ( ; retry < 20 ; retry++ ) - { - int ret = aMonitor.checkDataMount(); - - if ( ret == oam::API_SUCCESS) - break; - - if (ret == API_INVALID_PARAMETER) - { - //no dbroots assigned, treat as disabled - if ( !DISABLED ) - DISABLED = true; - } - - if ( DISABLED ) - { - log.writeLog(__LINE__, "ERROR: checkDataMount to failed, module is disabled, continuing", LOG_TYPE_WARNING); - break; - } - else - log.writeLog(__LINE__, "ERROR: checkDataMount to failed, retrying", LOG_TYPE_WARNING); - - //send notification about the mount setup failure - oam.sendDeviceNotification(config.moduleName(), DBROOT_MOUNT_FAILURE); - sleep(30); - } - - if ( retry == 20 ) - { - log.writeLog(__LINE__, "Check DB mounts failed, shutting down", LOG_TYPE_CRITICAL); - - //Set the alarm - // aMonitor.sendAlarm(config.moduleName().c_str(), STARTUP_DIAGNOTICS_FAILURE, SET); - // sleep (1); - string cmd = "columnstore stop > /dev/null 2>&1"; - system(cmd.c_str()); - } - - if ( !gOAMParentModuleFlag ) - { - runStandby = true; - // delete any old active alarm log file - unlink ("/var/log/mariadb/columnstore/activeAlarms"); - } - - //Clear mainResumeFlag - - mainResumeFlag = false; - - //launch Status table control thread on 'pm' modules - pthread_t statusThread; - int ret = pthread_create (&statusThread, NULL, &statusControlThread, NULL); - - if ( ret != 0 ) - log.writeLog(__LINE__, "pthread_create failed, return code = " + oam.itoa(ret), LOG_TYPE_ERROR); - - //wait for flag to be set - - while (!mainResumeFlag) - { - log.writeLog(__LINE__, "WAITING FOR mainResumeFlag to be set", LOG_TYPE_DEBUG); - - sleep(1); - } - } - - SystemStatus systemstatus; - - try - { - oam.getSystemStatus(systemstatus, false); - } - catch (...) - { - } - - // determine Standby OAM Module, if needed - if ( gOAMParentModuleFlag && - config.OAMStandbyName() == oam::UnassignedName && - config.ServerInstallType() != oam::INSTALL_COMBINE_DM_UM_PM ) - { - - - string standbyPM = ""; - - //check if gluster, if so then find PMs that have copies of DBROOT #1 - string pmList = ""; - - if (DataRedundancyConfig == "y") - { - - try - { - string errmsg; - oam.glusterctl(oam::GLUSTER_WHOHAS, "1", pmList, errmsg); - - log.writeLog(__LINE__, "glusterctl called :" + pmList, LOG_TYPE_DEBUG); - - boost::char_separator sep(" "); - boost::tokenizer< boost::char_separator > tokens(pmList, sep); - - for ( boost::tokenizer< boost::char_separator >::iterator it = tokens.begin(); - it != tokens.end(); - ++it) - { - string pm = "pm" + *it; - - // skip if current module - if ( pm == config.moduleName() ) - continue; - - int opState; - bool degraded; - - try - { - oam.getModuleStatus(pm, opState, degraded); - } - catch (...) - {} - - if (opState == oam::MAN_DISABLED || opState == oam::AUTO_DISABLED) - { - continue; - } - else - { - standbyPM = pm; - break; - } - } - - } - catch (...) - {} - } - else - { - for ( unsigned int i = 0 ; i < systemstatus.systemmodulestatus.modulestatus.size(); i++) - { - string moduleName = systemstatus.systemmodulestatus.modulestatus[i].Module; - - if ( moduleName.substr(0, MAX_MODULE_TYPE_SIZE) == "pm" && - moduleName != config.moduleName() ) - { - // multi pm system - int moduleStatus = systemstatus.systemmodulestatus.modulestatus[i].ModuleOpState; - - if ( moduleStatus != oam::MAN_DISABLED && - moduleStatus != oam::AUTO_DISABLED ) - { - - standbyPM = moduleName; - break; - } - } - } - } - - if ( standbyPM != "" ) - { - // found a standby candidate - oam.setSystemConfig("StandbyOAMModuleName", standbyPM); - - // update Standby IP Address - ModuleConfig moduleconfig; - oam.getSystemConfig(standbyPM, moduleconfig); - HostConfigList::iterator pt1 = moduleconfig.hostConfigList.begin(); - string standbyIPaddr = (*pt1).IPAddr; - - Config* sysConfig2 = Config::makeConfig(); - - sysConfig2->setConfig("ProcStatusControlStandby", "IPAddr", standbyIPaddr); - sysConfig2->write(); - - oam.setHotStandbyPM(standbyIPaddr); - - log.writeLog(__LINE__, "Columnstore.xml Standby OAM updated : " + standbyPM + ":" + standbyIPaddr, LOG_TYPE_DEBUG); - log.writeLog(__LINE__, "Set Standby Module = " + standbyPM, LOG_TYPE_DEBUG); - - try - { - oam.distributeConfigFile(config.moduleName()); - log.writeLog(__LINE__, "successfull return from distributeConfigFile", LOG_TYPE_DEBUG); - } - catch (...) - {} - } - } - - // non Parent Module, don't start until process-manager is up on parent module - // away to control starting mutliple Active Process-Managers - if ( !gOAMParentModuleFlag && config.moduleType() == "pm" ) - { - string parentOAMModuleName; - - while (true) - { - try - { - Config* sysConfig = Config::makeConfig(); - parentOAMModuleName = sysConfig->getConfig("SystemConfig", "ParentOAMModuleName"); - - if ( parentOAMModuleName != oam::UnassignedName ) - break; - - sleep(1); - log.writeLog(__LINE__, "Waiting for process-manager on parent module", LOG_TYPE_ERROR); - } - catch (...) - { - log.writeLog(__LINE__, "Problem getting the ParentOAMModuleName key from the Columnstore System Configuration file", LOG_TYPE_CRITICAL); - exit(1); - } - } - - while (true) - { - try - { - Oam oam; - ProcessStatus procstat; - oam.getProcessStatus("ProcessManager", parentOAMModuleName, procstat); - - if ( procstat.ProcessOpState == oam::ACTIVE ) - break; - - sleep(1); - log.writeLog(__LINE__, "Waiting for process-manager to go ACTIVE", LOG_TYPE_DEBUG); - } - catch (exception& ex) - { -// string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: Caught unknown exception!", LOG_TYPE_ERROR); - } - } - } - - //Mark this process AUTO-OFFLINE - aMonitor.updateProcessInfo("ProcessMonitor", oam::AUTO_OFFLINE, getpid()); - - //handle SIGCHLD signal - pthread_t signalThread; - ret = pthread_create (&signalThread, NULL, &sigchldHandleThread, NULL); - - if ( ret != 0 ) - log.writeLog(__LINE__, "pthread_create failed, return code = " + oam.itoa(ret), LOG_TYPE_ERROR); - - //mysqld status monitor thread - if ( config.moduleType() == "um" || - ( config.moduleType() == "pm" && config.ServerInstallType() == oam::INSTALL_COMBINE_DM_UM_PM ) || - ( config.moduleType() == "pm" && PMwithUM == "y") ) - { - pthread_t mysqlThread; - ret = pthread_create (&mysqlThread, NULL, (void*(*)(void*))&mysqlMonitorThread, NULL); - - if ( ret != 0 ) - log.writeLog(__LINE__, "pthread_create failed, return code = " + oam.itoa(ret), LOG_TYPE_ERROR); - } - - //update syslog file priviledges - aMonitor.changeModLog(); - - //Read ProcessConfig file to get process list belong to this process monitor - SystemProcessConfig systemprocessconfig; - - try - { - oam.getProcessConfig(systemprocessconfig); - } - catch (exception& ex) - { - string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessConfig: " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessConfig: Caught unknown exception!", LOG_TYPE_ERROR); - } - - string OAMParentModuleType = config.OAMParentName().substr(0, 2); - - //Build a map for application name tag and launch ID for this Process-Monitor - for ( unsigned int i = 0 ; i < systemprocessconfig.processconfig.size(); i++) - { - //skip if both BootLaunch and LaunchID are 0 - if ( systemprocessconfig.processconfig[i].BootLaunch == 0 && - systemprocessconfig.processconfig[i].LaunchID == 0 ) - continue; - - if ( (systemprocessconfig.processconfig[i].ModuleType == config.moduleType() ) || - ( systemprocessconfig.processconfig[i].ModuleType == "um" && - config.moduleType() == "pm" && PMwithUM == "y") || - ( systemprocessconfig.processconfig[i].ModuleType == "ChildExtOAMModule") || - ( systemprocessconfig.processconfig[i].ModuleType == "ChildOAMModule" ) || - ( systemprocessconfig.processconfig[i].ModuleType == "ParentOAMModule" && - config.moduleType() == OAMParentModuleType ) ) - { - // If Process Monitor, update local state - if ( systemprocessconfig.processconfig[i].ProcessName == "ProcessMonitor") - { - config.buildList(systemprocessconfig.processconfig[i].ModuleType, - systemprocessconfig.processconfig[i].ProcessName, - systemprocessconfig.processconfig[i].ProcessLocation, - systemprocessconfig.processconfig[i].ProcessArgs, - systemprocessconfig.processconfig[i].LaunchID, - getpid(), - oam::AUTO_OFFLINE, - systemprocessconfig.processconfig[i].BootLaunch, - systemprocessconfig.processconfig[i].RunType, - systemprocessconfig.processconfig[i].DepProcessName, - systemprocessconfig.processconfig[i].DepModuleName, - systemprocessconfig.processconfig[i].LogFile); - } - else - { - if ( systemprocessconfig.processconfig[i].ModuleType == "um" && - config.moduleType() == "pm" && PMwithUM == "y" && - systemprocessconfig.processconfig[i].ProcessName == "DMLProc" ) - continue; - - - if ( systemprocessconfig.processconfig[i].ModuleType == "um" && - config.moduleType() == "pm" && PMwithUM == "y" && - systemprocessconfig.processconfig[i].ProcessName == "DDLProc" ) - continue; - - // Get Last Known Process Status and PID - int state = oam::AUTO_OFFLINE; - int PID = 0; - - try - { - Oam oam; - ProcessStatus procstat; - oam.getProcessStatus(systemprocessconfig.processconfig[i].ProcessName, config.moduleName(), procstat); - state = procstat.ProcessOpState; - PID = procstat.ProcessID; - } - catch (exception& ex) - { -// string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: Caught unknown exception!", LOG_TYPE_ERROR); - } - - config.buildList(systemprocessconfig.processconfig[i].ModuleType, - systemprocessconfig.processconfig[i].ProcessName, - systemprocessconfig.processconfig[i].ProcessLocation, - systemprocessconfig.processconfig[i].ProcessArgs, - systemprocessconfig.processconfig[i].LaunchID, - PID, - state, - systemprocessconfig.processconfig[i].BootLaunch, - systemprocessconfig.processconfig[i].RunType, - systemprocessconfig.processconfig[i].DepProcessName, - systemprocessconfig.processconfig[i].DepModuleName, - systemprocessconfig.processconfig[i].LogFile); - } - } - } - - log.writeLog(__LINE__, "SYSTEM STATUS = " + oam.itoa(systemstatus.SystemOpState), LOG_TYPE_DEBUG); - - if ( systemstatus.SystemOpState != MAN_OFFLINE && !DISABLED) - { - - // Loop through the process list to check the process current state - // Launch the Processes controlled by the Process-Monitor - processList::iterator listPtr; - processList* aPtr = config.monitoredListPtr(); - listPtr = aPtr->begin(); - - for (; listPtr != aPtr->end(); ++listPtr) - { - // If Process Monitor, skip - if ( (*listPtr).ProcessName == "ProcessMonitor") - continue; - - if ((*listPtr).processID != 0) - { - if ((*listPtr).BootLaunch == BOOT_LAUNCH) - { - //Check for SIMPLEX runtype processes - int initType = aMonitor.checkSpecialProcessState( (*listPtr).ProcessName, (*listPtr).RunType, (*listPtr).ProcessModuleType ); - - if ( initType == oam::COLD_STANDBY ) - { - //there is a mate active, skip - (*listPtr).state = oam::COLD_STANDBY; - // sleep(1); - continue; - } - else if ( initType == oam::MAN_INIT ) - initType = oam::AUTO_INIT; - - //Check the process current state - if ((kill((*listPtr).processID, 0)) != 0 - && (*listPtr).state != oam::MAN_OFFLINE) - { - //The process died, start the process, reset the pid and time - - //Set the alarm - aMonitor.sendAlarm((*listPtr).ProcessName.c_str(), PROCESS_DOWN_AUTO, SET); - - //stop the process first to make sure it's gone - aMonitor.stopProcess((*listPtr).processID, - (*listPtr).ProcessName, - (*listPtr).ProcessLocation, - oam::FORCEFUL, - false); - - //Start the process - (*listPtr).processID = aMonitor.startProcess( (*listPtr).ProcessModuleType, - (*listPtr).ProcessName, - (*listPtr).ProcessLocation, - (*listPtr).ProcessArgs, - (*listPtr).launchID, - (*listPtr).BootLaunch, - (*listPtr).RunType, - (*listPtr).DepProcessName, - (*listPtr).DepModuleName, - (*listPtr).LogFile, - initType); - - // StorageManager doesn't send the "I'm online" msg to Proc*. - // Just mark it active for now. TODO: make it use the ping fcn in IDB* instead. - if (listPtr->ProcessName == "StorageManager") - oam.setProcessStatus("StorageManager", boost::get<0>(oam.getModuleInfo()), - oam::ACTIVE, listPtr->processID); - - string restartStatus; - - if ( (*listPtr).processID == oam::API_MINOR_FAILURE || - (*listPtr).processID == oam::API_FAILURE ) - // restart failed - string restartStatus = " restart failed!!"; - else - string restartStatus = " restarted successfully!!"; - - log.writeLog(__LINE__, restartStatus, LOG_TYPE_INFO); - } - } - } - else if ((*listPtr).BootLaunch == BOOT_LAUNCH) - { - //Check for SIMPLEX runtype processes - int initType = aMonitor.checkSpecialProcessState( (*listPtr).ProcessName, (*listPtr).RunType, (*listPtr).ProcessModuleType ); - - if ( initType == oam::COLD_STANDBY ) - { - //there is a mate active, skip - (*listPtr).state = oam::COLD_STANDBY; - sleep(1); - continue; - } - else if ( initType == oam::MAN_INIT ) - initType = oam::AUTO_INIT; - - if ((*listPtr).state == oam::MAN_OFFLINE) - continue; - - //stop the process first to make sure it's gone - aMonitor.stopProcess((*listPtr).processID, - (*listPtr).ProcessName, - (*listPtr).ProcessLocation, - oam::FORCEFUL, - false); - - //Start the boot time processes, set its state, ProcessID - (*listPtr).processID = aMonitor.startProcess((*listPtr).ProcessModuleType, - (*listPtr).ProcessName, - (*listPtr).ProcessLocation, - (*listPtr).ProcessArgs, - (*listPtr).launchID, - (*listPtr).BootLaunch, - (*listPtr).RunType, - (*listPtr).DepProcessName, - (*listPtr).DepModuleName, - (*listPtr).LogFile, - initType); - - // StorageManager doesn't send the "I'm online" msg to Proc*. - // Just mark it active for now. TODO: make it use the ping fcn in IDB* instead. - if (listPtr->ProcessName == "StorageManager") - oam.setProcessStatus("StorageManager", boost::get<0>(oam.getModuleInfo()), - oam::ACTIVE, listPtr->processID); - - string restartStatus; - - if ( (*listPtr).processID == oam::API_MINOR_FAILURE || - (*listPtr).processID == oam::API_FAILURE ) - // restart failed - string restartStatus = " restart failed!!"; - else - string restartStatus = " restarted successfully!!"; - - log.writeLog(__LINE__, restartStatus, LOG_TYPE_INFO); - } - } //end of for loop - } - - // create process health (monitor) thread - pthread_t processHealthThread; - ret = pthread_create (&processHealthThread, NULL, (void*(*)(void*))&chldHandleThread, &config); - - if ( ret != 0 ) - log.writeLog(__LINE__, "pthread_create failed, return code = " + oam.itoa(ret), LOG_TYPE_ERROR); - - //Mark this process Init Complete - while (true) - { - try - { - oam.processInitComplete("ProcessMonitor"); - log.writeLog(__LINE__, "processInitComplete Successfully Called", LOG_TYPE_DEBUG); - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "EXCEPTION ERROR on processInitComplete: " + error, LOG_TYPE_ERROR); - // this would fail if Parent OAM Node is down - sleep(1); - continue; - } - catch (...) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on processInitComplete: Caught unknown exception!", LOG_TYPE_ERROR); - // this would fail if Parent OAM Node is down - sleep(1); - continue; - } - - for ( unsigned int i = 0 ; i < systemprocessconfig.processconfig.size(); i++) - { - // If Process Monitor, update local state - if ( systemprocessconfig.processconfig[i].ProcessName == "ProcessMonitor") - { - config.buildList(systemprocessconfig.processconfig[i].ModuleType, - systemprocessconfig.processconfig[i].ProcessName, - systemprocessconfig.processconfig[i].ProcessLocation, - systemprocessconfig.processconfig[i].ProcessArgs, - systemprocessconfig.processconfig[i].LaunchID, - getpid(), - oam::ACTIVE, - systemprocessconfig.processconfig[i].BootLaunch, - systemprocessconfig.processconfig[i].RunType, - systemprocessconfig.processconfig[i].DepProcessName, - systemprocessconfig.processconfig[i].DepModuleName, - systemprocessconfig.processconfig[i].LogFile); - break; - } - } - - break; - - //Clear the alarms - aMonitor.sendAlarm("ProcessMonitor", PROCESS_DOWN_MANUAL, CLEAR); - aMonitor.sendAlarm("ProcessMonitor", PROCESS_DOWN_AUTO, CLEAR); - } - - //set process init complete and ready to process message request - processInitComplete = true; - - // suspend forever - while (true) - { - sleep(1000); - } -} - -/****************************************************************************************** -* @brief messageThread -* -* purpose: Read incoming messages -* -******************************************************************************************/ -static void* messageThread(MonitorConfig* config) -{ - //ProcMon log file - MonitorLog log; - assert(config); - ProcessMonitor aMonitor(*config, log); - log.writeLog(__LINE__, "Message Thread started ..", LOG_TYPE_DEBUG); - Oam oam; - - string msgPort = config->moduleName() + "_ProcessMonitor"; - string port = ""; - - //ProcMon will wait for request - IOSocket fIos; - Config* sysConfig = Config::makeConfig(); - - //read and cleanup port before trying to use - try - { - port = sysConfig->getConfig(msgPort, "Port"); - } - catch (...) - {} - - //check if enter doesnt exist, if not use pm1's - if (port.empty() or port == "" ) - { - msgPort = "pm1_ProcessMonitor"; - port = sysConfig->getConfig(msgPort, "Port"); - } - - log.writeLog(__LINE__, "PORTS: " + msgPort + "/" + port, LOG_TYPE_DEBUG); - - string cmd = "fuser -k " + port + "/tcp >/dev/null 2>&1"; - - system(cmd.c_str()); - - for (;;) - { - try - { - ByteStream msg; - MessageQueueServer mqs(msgPort); - - for (;;) - { - try - { - fIos = mqs.accept(); - - try - { - msg = fIos.read(); - - if (msg.length() > 0) - { - aMonitor.processMessage(msg, fIos); - } - } - catch (exception& ex) - { - string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on fIos.read() for " + msgPort + ", error: " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on fIos.read() for " + msgPort + ", Caught unknown exception!", LOG_TYPE_ERROR); - } - } - catch (exception& ex) - { - string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on mqs.accept() for " + msgPort + ", error: " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on mqs.accept() for " + msgPort + ", Caught unknown exception!", LOG_TYPE_ERROR); - } - - // give time to allow Mgr to read any acks before closing - sleep(1); - fIos.close(); - } - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "EXCEPTION ERROR on MessageQueueServer for " + msgPort + ": " + error, LOG_TYPE_ERROR); - - // takes 2 - 4 minites to free sockets, sleep and retry - sleep(1); - } - catch (...) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on MessageQueueServer for " + msgPort + ": Caught unknown exception!", LOG_TYPE_ERROR); - - // takes 2 - 4 minites to free sockets, sleep and retry - sleep(1); - } - } - - return NULL; -} - -/****************************************************************************************** -* @brief mysqlMonitorThread -* -* purpose: monitor mysqld by getting status -* -******************************************************************************************/ -static void* mysqlMonitorThread(MonitorConfig* config) -{ - MonitorLog log; - assert(config); - ProcessMonitor aMonitor(*config, log); - log.writeLog(__LINE__, "mysqld Monitoring Thread started ..", LOG_TYPE_DEBUG); - Oam oam; - - while (true) - { - //read status, whichs set process status - try - { - oam.actionMysqlCalpont(MYSQL_STATUS); - } - catch (...) - {} - - sleep(5); - } - - return NULL; -} - -/****************************************************************************************** -* @brief sigchldHandleThread / SIGCHLDHandler -* -* purpose: Catch and process dieing child processes -* -******************************************************************************************/ -static void* sigchldHandleThread(void*) -{ - struct sigaction sigchld_action; - memset (&sigchld_action, 0, sizeof (sigchld_action)); - sigchld_action.sa_handler = &SIGCHLDHandler; - sigaction(SIGCHLD, &sigchld_action, NULL); - return NULL; -} - -static void SIGCHLDHandler(int signal_number) -{ - int status; - - waitpid(-1, &status, WNOHANG); - - return; -} - -/****************************************************************************************** -* @brief chldHandleThread -* -* purpose: Monitor and process dieing Non SIGCHILD SNMP child processes -* Also validate the internal Process status with the Process-Status disk file -* -******************************************************************************************/ -static void* chldHandleThread(MonitorConfig* config) -{ - //ProcMon log file - MonitorLog log; - assert(config); - ProcessMonitor aMonitor(*config, log); - log.writeLog(__LINE__, "Child Process Monitoring Thread started ..", LOG_TYPE_DEBUG); - Oam oam; - SystemProcessStatus systemprocessstatus; - - //Loop through the process list to check the process current state - processList::iterator listPtr; - processList* aPtr = config->monitoredListPtr(); - - //get dbhealth flag - string DBFunctionalMonitorFlag; - - try - { - oam.getSystemConfig( "DBFunctionalMonitorFlag", DBFunctionalMonitorFlag); - } - catch (...) {} - - int delayCount = 0; - - while (true) - { - //get process restart configured settings - int processRestartCount = 10; - int processRestartPeriod = 120; - - try - { - oam.getSystemConfig("ProcessRestartCount", processRestartCount); - oam.getSystemConfig("ProcessRestartPeriod", processRestartPeriod); - } - catch (...) - { - processRestartCount = 10; - processRestartPeriod = 120; - } - - listPtr = aPtr->begin(); - - for (; listPtr != aPtr->end(); ++listPtr) - { - // compare internal process state and PID with system process status - // Issue alarm if system state is INIT for longer than 1 minute - // Update internal process state when in INIT and System is ACTIVE/FAILED - // Updated System process state when AOS and different from internal - int outOfSyncCount = 0; - - if ( delayCount == 2 ) - { - while (true) - { - int state = (*listPtr).state; //set as default - int PID = (*listPtr).processID; //set as default - - try - { - ProcessStatus procstat; - oam.getProcessStatus((*listPtr).ProcessName, config->moduleName(), procstat); - state = procstat.ProcessOpState; - PID = procstat.ProcessID; - - if (state == oam::BUSY_INIT ) - { - // updated local state ot BUSY_INIT - (*listPtr).state = state; - break; - } - - if ( (state == oam::AUTO_INIT && (*listPtr).state == oam::AUTO_INIT) || - (state == oam::MAN_INIT && (*listPtr).state == oam::MAN_INIT) ) - { - // get current time in seconds - time_t cal; - time (&cal); - - if ( (cal - (*listPtr).currentTime) > 20 ) - { - // issue ALARM and update status to FAILED - aMonitor.sendAlarm((*listPtr).ProcessName, PROCESS_INIT_FAILURE, SET); -// (*listPtr).state = oam::FAILED; -// aMonitor.updateProcessInfo((*listPtr).ProcessName, oam::FAILED, (*listPtr).processID); - - //force restart the un-initted process - log.writeLog(__LINE__, (*listPtr).ProcessName + "/" + oam.itoa((*listPtr).processID) + " failed to init in 20 seconds, force killing it so it can restart", LOG_TYPE_CRITICAL); - - //skip killing 0 or 1 - if ( (*listPtr).processID > 1 ) - kill((*listPtr).processID, SIGKILL); - - break; - } - - break; - } - } - catch (exception& ex) - { - string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: " + error, LOG_TYPE_ERROR); - break; - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: Caught unknown exception!", LOG_TYPE_ERROR); - break; - } - - if (state != (*listPtr).state || PID != (*listPtr).processID) - { - if ( state == oam::STANDBY && (*listPtr).state == oam::ACTIVE ) - break; - else - { - if ( (state == oam::ACTIVE && (*listPtr).state == oam::AUTO_INIT) || - (state == oam::ACTIVE && (*listPtr).state == oam::MAN_INIT) || - (state == oam::ACTIVE && (*listPtr).state == oam::STANDBY) || - (state == oam::ACTIVE && (*listPtr).state == oam::INITIAL) || - (state == oam::ACTIVE && (*listPtr).state == oam::STANDBY_INIT) || - (state == oam::ACTIVE && (*listPtr).state == oam::BUSY_INIT) || - (state == oam::STANDBY && (*listPtr).state == oam::AUTO_INIT) || - (state == oam::STANDBY && (*listPtr).state == oam::MAN_INIT) || - (state == oam::STANDBY && (*listPtr).state == oam::INITIAL) || - (state == oam::STANDBY && (*listPtr).state == oam::BUSY_INIT) || - (state == oam::STANDBY && (*listPtr).state == oam::STANDBY_INIT) ) - { - // updated local state to ACTIVE - (*listPtr).state = state; - break; - } - - if ( (state == oam::FAILED && (*listPtr).state == oam::AUTO_INIT) || - (state == oam::FAILED && (*listPtr).state == oam::BUSY_INIT) || - (state == oam::FAILED && (*listPtr).state == oam::MAN_INIT) ) - { - // issue ALARM and update local status to FAILED - log.writeLog(__LINE__, (*listPtr).ProcessName + " failed initialization", LOG_TYPE_WARNING); - aMonitor.sendAlarm((*listPtr).ProcessName, PROCESS_INIT_FAILURE, SET); - (*listPtr).state = state; - - //setModule status to failed - try - { - oam.setModuleStatus(config->moduleName(), oam::FAILED); - } - catch (exception& ex) - { - string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on setModuleStatus: " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on setModuleStatus: Caught unknown exception!", LOG_TYPE_ERROR); - } - - break; - } - - if (state == oam::AUTO_OFFLINE || state == oam::INITIAL || - PID != (*listPtr).processID) - { - //due to a small window, only process if out-of-sync for more than 1 second - outOfSyncCount++; - - if ( outOfSyncCount == 2 ) - { - // out of sync, update with internal state/PID - log.writeLog(__LINE__, "State out-of-sync, update on " + (*listPtr).ProcessName + "/" + oam.itoa((*listPtr).state) + "/" + oam.itoa((*listPtr).processID), LOG_TYPE_DEBUG); - - aMonitor.updateProcessInfo((*listPtr).ProcessName, (*listPtr).state, (*listPtr).processID); - break; - } - - sleep(1); - } - else - break; - } - } - else - break; - } - } - - //Handle died or out of sync process if in the right state - if ( (*listPtr).state == oam::MAN_OFFLINE ) - //skip - continue; - - //log.writeLog(__LINE__, "check status " + (*listPtr).ProcessName + "/" + oam.itoa((*listPtr).processID) + " " + oam.itoa(kill((*listPtr).processID, 0)) + " " + oam.itoa((*listPtr).state) , LOG_TYPE_CRITICAL); - if ( ( (kill((*listPtr).processID, 0)) != 0 && (*listPtr).state == oam::ACTIVE ) || - ( (kill((*listPtr).processID, 0)) != 0 && (*listPtr).state == oam::STANDBY ) || - ( (kill((*listPtr).processID, 0)) != 0 && (*listPtr).state == oam::MAN_INIT ) || - ( (kill((*listPtr).processID, 0)) != 0 && (*listPtr).state == oam::BUSY_INIT ) || - ( (kill((*listPtr).processID, 0)) != 0 && (*listPtr).state == oam::AUTO_INIT && - (*listPtr).processID != 0 ) || - ( (*listPtr).state == oam::ACTIVE && (*listPtr).processID == 0 ) ) - { - log.writeLog(__LINE__, "*****MariaDB ColumnStore Process Restarting: " + (*listPtr).ProcessName + ", old PID = " + oam.itoa((*listPtr).processID), LOG_TYPE_CRITICAL); - - if ( (*listPtr).dieCounter >= processRestartCount || - processRestartCount == 0) - { - // don't restart it - config->buildList((*listPtr).ProcessModuleType, - (*listPtr).ProcessName, - (*listPtr).ProcessLocation, - (*listPtr).ProcessArgs, - (*listPtr).launchID, - 0, - oam::AUTO_OFFLINE, - (*listPtr).BootLaunch, - (*listPtr).RunType, - (*listPtr).DepProcessName, - (*listPtr).DepModuleName, - (*listPtr).LogFile); - - //Set the alarm - aMonitor.sendAlarm((*listPtr).ProcessName, PROCESS_DOWN_AUTO, SET); - - //Update ProcessConfig file - aMonitor.updateProcessInfo((*listPtr).ProcessName, oam::AUTO_OFFLINE, 0); - - //Log this event - if ( processRestartCount == 0) - log.writeLog(__LINE__, "*****Process not restarted, restart count set to 0: " + (*listPtr).ProcessName, LOG_TYPE_CRITICAL); - else - log.writeLog(__LINE__, "*****Process continually dying, stopped trying to restore it: " + (*listPtr).ProcessName, LOG_TYPE_CRITICAL); - - //setModule status to degraded - try - { - bool degraded; - int moduleStatus; - oam.getModuleStatus(config->moduleName(), moduleStatus, degraded); - - if ( moduleStatus == oam::ACTIVE) - { - try - { - oam.setModuleStatus(config->moduleName(), oam::DEGRADED); - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "EXCEPTION ERROR on setModuleStatus: " + error, LOG_TYPE_ERROR); - } - catch (...) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on setModuleStatus: Caught unknown exception!", LOG_TYPE_ERROR); - } - - } - } - catch (...) - {} - - // check if Mdoule failover is needed due to process outage - aMonitor.checkModuleFailover((*listPtr).ProcessName); - - //check the db health - if (DBFunctionalMonitorFlag == "y" ) - { - log.writeLog(__LINE__, "Call the check DB Functional API", LOG_TYPE_DEBUG); - - try - { - oam.checkDBFunctional(); - log.writeLog(__LINE__, "check DB Functional passed", LOG_TYPE_DEBUG); - } - catch (...) - { - log.writeLog(__LINE__, "check DB Functional FAILED", LOG_TYPE_ERROR); - } - } - } - else - { - time_t cal; - time (&cal); - - if ( (cal - (*listPtr).currentTime) > (int) processRestartPeriod ) - (*listPtr).dieCounter = 0; - else - ++(*listPtr).dieCounter; - - int initStatus = oam::AUTO_INIT; - - if ( (*listPtr).RunType == oam::ACTIVE_STANDBY && runStandby) - initStatus = oam::STANDBY; - - //record the process information into processList - config->buildList((*listPtr).ProcessModuleType, - (*listPtr).ProcessName, - (*listPtr).ProcessLocation, - (*listPtr).ProcessArgs, - (*listPtr).launchID, - 0, - oam::AUTO_OFFLINE, - (*listPtr).BootLaunch, - (*listPtr).RunType, - (*listPtr).DepProcessName, - (*listPtr).DepModuleName, - (*listPtr).LogFile); - - //Set the alarm - aMonitor.sendAlarm((*listPtr).ProcessName, PROCESS_DOWN_AUTO, SET); - - int i = 0; - string restartStatus; - - for ( ; i < 10 ; i++ ) - { - //stop the process first to make sure it's gone - aMonitor.stopProcess((*listPtr).processID, - (*listPtr).ProcessName, - (*listPtr).ProcessLocation, - oam::FORCEFUL, - false); - - //Start the process - (*listPtr).processID = aMonitor.startProcess( (*listPtr).ProcessModuleType, - (*listPtr).ProcessName, - (*listPtr).ProcessLocation, - (*listPtr).ProcessArgs, - (*listPtr).launchID, - (*listPtr).BootLaunch, - (*listPtr).RunType, - (*listPtr).DepProcessName, - (*listPtr).DepModuleName, - (*listPtr).LogFile, - initStatus); - - // StorageManager doesn't send the "I'm online" msg to Proc*. - // Just mark it active for now. TODO: make it use the ping fcn in IDB* instead. - if (listPtr->ProcessName == "StorageManager") - oam.setProcessStatus("StorageManager", boost::get<0>(oam.getModuleInfo()), - oam::ACTIVE, listPtr->processID); - - if ( (*listPtr).processID == oam::API_FAILURE ) - { - // restart hard failure - restartStatus = " restart failed with hard failure, don't retry!!"; - (*listPtr).processID = 0; - - // check if Module failover is needed due to process outage - aMonitor.checkModuleFailover((*listPtr).ProcessName); - break; - } - else - { - if ( (*listPtr).processID != oam::API_MINOR_FAILURE ) - { - //restarted successful - //Inform Process Manager that Process restart - aMonitor.processRestarted( (*listPtr).ProcessName, false); - break; - } - } - // restart failed with minor error, sleep and try - sleep(5); - } - - if ( i == 10 || (*listPtr).processID == oam::API_FAILURE) - { - //setModule status to degraded - try - { - bool degraded; - int moduleStatus; - oam.getModuleStatus(config->moduleName(), moduleStatus, degraded); - - if ( moduleStatus == oam::ACTIVE) - { - try - { - oam.setModuleStatus(config->moduleName(), oam::DEGRADED); - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "EXCEPTION ERROR on setModuleStatus: " + error, LOG_TYPE_ERROR); - } - catch (...) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on setModuleStatus: Caught unknown exception!", LOG_TYPE_ERROR); - } - - } - } - catch (...) - {} - - //check the db health - if (DBFunctionalMonitorFlag == "y" ) - { - log.writeLog(__LINE__, "Call the check DB Functional API", LOG_TYPE_DEBUG); - - try - { - oam.checkDBFunctional(); - log.writeLog(__LINE__, "check DB Functional passed", LOG_TYPE_DEBUG); - } - catch (...) - { - log.writeLog(__LINE__, "check DB Functional FAILED", LOG_TYPE_ERROR); - } - } - } - - if ( i == 10 ) - { - // restart timeout - restartStatus = " restart failed after 10 retries"; - (*listPtr).processID = 0; - } - else - { - restartStatus = " restarted successfully!!"; - - //Inform Process Manager that Process restart - aMonitor.processRestarted( (*listPtr).ProcessName, false); - } - - //Log this event - log.writeLog(__LINE__, "MariaDB ColumnStore Process " + (*listPtr).ProcessName + restartStatus, LOG_TYPE_INFO); - } - } - } - - delayCount++; - - if ( delayCount > 2 ) - delayCount = 0; - - sleep(5); - } - return NULL; -} - -/****************************************************************************************** -* @brief sigHupHandler -* -* purpose: Hanlder SIGHUP signal and update internal DB -* -******************************************************************************************/ -static void sigHupHandler(int sig) -{ - MonitorLog log; - MonitorConfig config; - ProcessMonitor aMonitor(config, log); - log.writeLog(__LINE__, "SIGHUP Thread started ..", LOG_TYPE_DEBUG); - - aMonitor.updateConfig(); - -} - -static int PROCSTATshmsize = 0; -shmProcessStatus* fShmProcessStatus = 0; -boost::interprocess::shared_memory_object fProcStatShmobj; -boost::interprocess::mapped_region fProcStatMapreg; - -int fmoduleNumber = 0; -int extDeviceNumber = 0; -int NICNumber = 0; -int dbrootNumber = 0; -int processNumber = 0; - -boost::interprocess::shared_memory_object fSysStatShmobj; -boost::interprocess::mapped_region fSysStatMapreg; - -void* processStatusMSG(messageqcpp::IOSocket* fIos); - -processStatusList* aPtr; -SystemProcessConfig systemprocessconfig; -ModuleTypeConfig moduletypeconfig; -SystemModuleTypeConfig systemModuleTypeConfig; -SystemExtDeviceConfig systemextdeviceconfig; - -std::vector moduleDisableStateList; -std::vector hostNameList; -std::vector ipaddrNameList; -std::vector moduleNameList; -std::vector extDeviceNameList; - -shmDeviceStatus* fShmNICStatus = 0; -shmDeviceStatus* fShmDbrootStatus = 0; -shmDeviceStatus* fShmExtDeviceStatus = 0; -shmDeviceStatus* fShmSystemStatus = 0; - -processStatusList fstatusListPtr; - -processStatusList* statusListPtr() -{ - return &fstatusListPtr; -} - - -/****************************************************************************************** -* @brief statusControlThread -* -* purpose: Setup Status Shared-Memory table and process request to get and set -* into the Status Shared-Memory table -* -******************************************************************************************/ -static void* statusControlThread(void*) -{ - MonitorLog log; - MonitorConfig config; - ProcessMonitor aMonitor(config, log); - Oam oam; - BRM::ShmKeys fShmKeys; - - log.writeLog(__LINE__, "statusControlThread Thread started ..", LOG_TYPE_DEBUG); - - // - //Read ProcessConfig file to get process list and build Status List - // - try - { - oam.getProcessConfig(systemprocessconfig); - } - catch (exception& ex) - { - string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessConfig: " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessConfig: Caught unknown exception!", LOG_TYPE_ERROR); - } - - try - { - oam.getSystemConfig(systemModuleTypeConfig); - } - catch (exception& ex) - { - string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR); - } - - // build status list - - for ( unsigned int i = 0 ; i < systemModuleTypeConfig.moduletypeconfig.size(); i++) - { - int moduleCount = systemModuleTypeConfig.moduletypeconfig[i].ModuleCount; - - if ( moduleCount == 0 ) - // skip of no modules configured - continue; - - // dm/um/pm - string systemModuleType = systemModuleTypeConfig.moduletypeconfig[i].ModuleType; - - fmoduleNumber = fmoduleNumber + moduleCount; - - // store ModuleNames / HostNames and IP Addresses (NIC) - DeviceNetworkList::iterator pt = systemModuleTypeConfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for ( ; pt != systemModuleTypeConfig.moduletypeconfig[i].ModuleNetworkList.end() ; pt++) - { - moduleNameList.push_back((*pt).DeviceName); - moduleDisableStateList.push_back((*pt).DisableState); - - HostConfigList::iterator pt1 = (*pt).hostConfigList.begin(); - - for ( ; pt1 != (*pt).hostConfigList.end() ; pt1++ ) - { - ipaddrNameList.push_back((*pt1).IPAddr); - hostNameList.push_back((*pt1).HostName); - } - } - - NICNumber = hostNameList.size(); - string OAMParentModuleType = config.OAMParentName().substr(0, 2); - - pt = systemModuleTypeConfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for ( ; pt != systemModuleTypeConfig.moduletypeconfig[i].ModuleNetworkList.end() ; pt++) - { - for ( unsigned int j = 0; j < systemprocessconfig.processconfig.size(); j++) - { - //skip if both BootLaunch and LaunchID are 0 - if ( systemprocessconfig.processconfig[j].BootLaunch == 0 && - systemprocessconfig.processconfig[j].LaunchID == 0 ) - continue; - - // "ChildOAMModule" "ParentOAMModule" dm/um/pm - string processModuleType = systemprocessconfig.processconfig[j].ModuleType; - - if (processModuleType == systemModuleType - || ( processModuleType == "um" && - systemModuleType == "pm" && PMwithUM == "y") - || processModuleType == "ChildExtOAMModule" - || (processModuleType == "ChildOAMModule" ) - || (processModuleType == "ParentOAMModule" && systemModuleType == OAMParentModuleType) ) - { - if ( processModuleType == "um" && - systemModuleType == "pm" && PMwithUM == "y" && - systemprocessconfig.processconfig[j].ProcessName == "DMLProc" ) - continue; - - - if ( processModuleType == "um" && - systemModuleType == "pm" && PMwithUM == "y" && - systemprocessconfig.processconfig[j].ProcessName == "DDLProc" ) - continue; - - processstatus procstat; - procstat.ProcessName = systemprocessconfig.processconfig[j].ProcessName; - procstat.ModuleName = (*pt).DeviceName; - procstat.tableIndex = processNumber; - fstatusListPtr.push_back(procstat); - processNumber++; - } - } - } - } - - aPtr = statusListPtr(); - - // - //Allocate Shared Memory for storing Process Status Data - // - - string shmLocation = "/dev/shm/"; - - PROCSTATshmsize = MAX_PROCESS * sizeof(shmProcessStatus); - bool memInit = true; -#if 0 - int shmid = shmget(fShmKeys.PROCESSSTATUS_SYSVKEY, PROCSTATshmsize, IPC_EXCL | IPC_CREAT | 0666); - - if (shmid == -1) - { - // table already exist - memInit = false; - shmid = shmget(fShmKeys.PROCESSSTATUS_SYSVKEY, PROCSTATshmsize, 0666); - - if (shmid == -1) - { - log.writeLog(__LINE__, "*****ProcessStatusTable shmget failed.", LOG_TYPE_ERROR); - exit(1); - } - } - - fShmProcessStatus = static_cast(shmat(shmid, NULL, 0)); -#endif - string keyName = BRM::ShmKeys::keyToName(fShmKeys.PROCESSSTATUS_SYSVKEY); - memInit = getshm(keyName, PROCSTATshmsize, fProcStatShmobj); - - bi::mapped_region region(fProcStatShmobj, bi::read_write); - fProcStatMapreg.swap(region); - fShmProcessStatus = static_cast(fProcStatMapreg.get_address()); - - if (fShmProcessStatus == 0) - { - log.writeLog(__LINE__, "*****ProcessStatusTable shmat failed.", LOG_TYPE_CRITICAL); - exit(1); - } - - //Initialize Shared memory - if (memInit) - { - memset(fShmProcessStatus, 0, PROCSTATshmsize); - - for ( int i = 0; i < processNumber ; ++i) - { - fShmProcessStatus[i].ProcessOpState = oam::INITIAL; - } - - log.writeLog(__LINE__, "Process Status shared Memory allocated and Initialized", LOG_TYPE_DEBUG); - } - - // - //Allocate Shared Memory for storing System/Module Status Data - // - fmoduleNumber++; //add 1 to cover system status entry - - static const int SYSTEMSTATshmsize = MAX_MODULE * sizeof(shmDeviceStatus); - memInit = true; -#if 0 - shmid = shmget(fShmKeys.SYSTEMSTATUS_SYSVKEY, SYSTEMSTATshmsize, IPC_EXCL | IPC_CREAT | 0666); - - if (shmid == -1) - { - // table already exist - memInit = false; - shmid = shmget(fShmKeys.SYSTEMSTATUS_SYSVKEY, SYSTEMSTATshmsize, 0666); - - if (shmid == -1) - { - log.writeLog(__LINE__, "*****SystemStatusTable shmget failed.", LOG_TYPE_ERROR); - exit(1); - } - } - - fShmSystemStatus = static_cast(shmat(shmid, NULL, 0)); -#endif - keyName = BRM::ShmKeys::keyToName(fShmKeys.SYSTEMSTATUS_SYSVKEY); - memInit = getshm(keyName, SYSTEMSTATshmsize, fSysStatShmobj); - - bi::mapped_region region2(fSysStatShmobj, bi::read_write); - fSysStatMapreg.swap(region2); - fShmSystemStatus = static_cast(fSysStatMapreg.get_address()); - - if (fShmSystemStatus == 0) - { - log.writeLog(__LINE__, "*****SystemStatusTable shmat failed.", LOG_TYPE_CRITICAL); - exit(1); - } - - //Initialize Shared memory - if (memInit) - { - // Init System/Module Status Memory - memset(fShmSystemStatus, 0, SYSTEMSTATshmsize); - - //set system status - memcpy(fShmSystemStatus[0].Name, "system", sizeof("system")); - - if (runStandby) - { - try - { - SystemStatus systemstatus; - oam.getSystemStatus(systemstatus); - fShmSystemStatus[0].OpState = systemstatus.SystemOpState; - memcpy(fShmSystemStatus[0].StateChangeDate, oam.getCurrentTime().c_str(), DATESIZE); - } - catch (...) - { - fShmSystemStatus[0].OpState = oam::DOWN; - } - } - else - fShmSystemStatus[0].OpState = oam::DOWN; - - //set module status - for ( int i = 1; i < fmoduleNumber ; ++i) - { - memcpy(fShmSystemStatus[i].Name, moduleNameList[i - 1].c_str(), NAMESIZE); - - if (runStandby) - { - try - { - int opState; - bool degraded; - oam.getModuleStatus(moduleNameList[i - 1], opState, degraded); - fShmSystemStatus[i].OpState = opState; - memcpy(fShmSystemStatus[i].StateChangeDate, oam.getCurrentTime().c_str(), DATESIZE); - } - catch (...) - { - fShmSystemStatus[i].OpState = oam::INITIAL; - } - } - else - { - if ( moduleDisableStateList[i - 1] == oam::MANDISABLEDSTATE ) - fShmSystemStatus[i].OpState = oam::MAN_DISABLED; - else if ( moduleDisableStateList[i - 1] == oam::AUTODISABLEDSTATE ) - fShmSystemStatus[i].OpState = oam::AUTO_DISABLED; - else - fShmSystemStatus[i].OpState = oam::INITIAL; - } - } - - log.writeLog(__LINE__, "System/Module Status shared Memory allocated and Initialized", LOG_TYPE_DEBUG); - } - - // - //Allocate Shared Memory for storing NIC Status Data - // - boost::interprocess::shared_memory_object fNICStatShmobj; - static const int NICSTATshmsize = (MAX_MODULE * MAX_NIC) * sizeof(shmDeviceStatus); - - keyName = BRM::ShmKeys::keyToName(fShmKeys.NICSTATUS_SYSVKEY); - memInit = getshm(keyName, NICSTATshmsize, fNICStatShmobj); - - bi::mapped_region fNICStatMapreg(fNICStatShmobj, bi::read_write); - fShmNICStatus = static_cast(fNICStatMapreg.get_address()); - - if (fShmNICStatus == 0) - { - log.writeLog(__LINE__, "*****NICStatusTable shmat failed.", LOG_TYPE_CRITICAL); - exit(1); - } - - //Initialize Shared memory - if (memInit) - { - // Init NIC Status Memory - memset(fShmNICStatus, 0, NICSTATshmsize); - - for ( int i = 0; i < NICNumber ; ++i) - { - fShmNICStatus[i].OpState = oam::INITIAL; - memcpy(fShmNICStatus[i].Name, hostNameList[i].c_str(), NAMESIZE); - } - - log.writeLog(__LINE__, "NIC Status shared Memory allocated and Initialized", LOG_TYPE_DEBUG); - } - - // - //Allocate Shared Memory for storing External Device Status Data - // - - try - { - oam.getSystemConfig(systemextdeviceconfig); - } - catch (exception& ex) - { - string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR); - } - - for ( unsigned int i = 0 ; i < systemextdeviceconfig.Count; i++) - { - if ( systemextdeviceconfig.extdeviceconfig[i].Name == oam::UnassignedName || - systemextdeviceconfig.extdeviceconfig[i].Name.empty() ) - continue; - - extDeviceNameList.push_back(systemextdeviceconfig.extdeviceconfig[i].Name); - extDeviceNumber++; - } - - boost::interprocess::shared_memory_object fExtStatShmobj; - static const int EXTDEVICESTATshmsize = MAX_EXT_DEVICE * sizeof(shmDeviceStatus); - keyName = BRM::ShmKeys::keyToName(fShmKeys.SWITCHSTATUS_SYSVKEY); - memInit = getshm(keyName, EXTDEVICESTATshmsize, fExtStatShmobj); - - bi::mapped_region fExtStatMapreg(fExtStatShmobj, bi::read_write); - fShmExtDeviceStatus = static_cast(fExtStatMapreg.get_address()); - - if (fShmExtDeviceStatus == 0) - { - log.writeLog(__LINE__, "*****ExtDeviceStatusTable shmat failed.", LOG_TYPE_CRITICAL); - exit(1); - } - - //Initialize Shared memory - if (memInit) - { - // Init Ext Device Status Memory - memset(fShmExtDeviceStatus, 0, EXTDEVICESTATshmsize); - - for ( int i = 0; i < extDeviceNumber ; ++i) - { - fShmExtDeviceStatus[i].OpState = oam::INITIAL; - memcpy(fShmExtDeviceStatus[i].Name, extDeviceNameList[i].c_str(), NAMESIZE); - } - - log.writeLog(__LINE__, "Ext Device Status shared Memory allocated and Initialized", LOG_TYPE_DEBUG); - } - - // - //Allocate Shared Memory for storing DBRoot Status Data - // - - string DBRootStorageType; - - try - { - oam.getSystemConfig("DBRootStorageType", DBRootStorageType); - } - catch (...) {} - - std::vectordbrootList; - - if ( DBRootStorageType == "external" || - DataRedundancyConfig == "y") - { - //get system dbroots - DBRootConfigList dbrootConfigList; - - try - { - oam.getSystemDbrootConfig(dbrootConfigList); - } - catch (exception& e) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemDbrootConfig: Caught unknown exception!", LOG_TYPE_ERROR); - } - - DBRootConfigList::iterator pt = dbrootConfigList.begin(); - - for ( ; pt != dbrootConfigList.end() ; pt++) - { - dbrootList.push_back(oam.itoa(*pt)); - dbrootNumber++; - } - } - - boost::interprocess::shared_memory_object fDbrootShmobj; - static const int DBROOTSTATshmsize = MAX_DBROOT * sizeof(shmDeviceStatus); - keyName = BRM::ShmKeys::keyToName(fShmKeys.DBROOTSTATUS_SYSVKEY); - memInit = getshm(keyName, DBROOTSTATshmsize, fDbrootShmobj); - - bi::mapped_region fdDbrootStatMapreg(fDbrootShmobj, bi::read_write); - fShmDbrootStatus = static_cast(fdDbrootStatMapreg.get_address()); - - if (fShmDbrootStatus == 0) - { - log.writeLog(__LINE__, "*****DbrootStatusTable shmat failed.", LOG_TYPE_CRITICAL); - exit(1); - } - - //Initialize Shared memory - if (memInit) - { - // Init DBRoot Status Memory - memset(fShmDbrootStatus, 0, DBROOTSTATshmsize); - - for ( int i = 0; i < dbrootNumber ; ++i) - { - fShmDbrootStatus[i].OpState = oam::INITIAL; - memcpy(fShmDbrootStatus[i].Name, dbrootList[i].c_str(), NAMESIZE); - } - - log.writeLog(__LINE__, "Dbroot Status shared Memory allocated and Initialized", LOG_TYPE_DEBUG); - } - - //Set mainResumeFlag, to start up main thread - - mainResumeFlag = true; - - string portName = "ProcStatusControl"; - - if (runStandby) - { - portName = "ProcStatusControlStandby"; - processStatusList* aPtr = statusListPtr(); - updateShareMemory(aPtr); - } - - // - //Now wait for Process Status Get and Set request - // - - //read and cleanup port before trying to use - try - { - Config* sysConfig = Config::makeConfig(); - string port = sysConfig->getConfig(portName, "Port"); - string cmd = "fuser -k " + port + "/tcp >/dev/null 2>&1"; - - system(cmd.c_str()); - } - catch (...) - { - } - - log.writeLog(__LINE__, "statusControlThread Thread reading " + portName + " port", LOG_TYPE_DEBUG); - - IOSocket* fIos; - MessageQueueServer* mqs; - int standbyUpdateCount = 0; - - mqs = new MessageQueueServer(portName); - struct timespec ts = { 1, 0 }; - - for (;;) - { - if (!runStandby && portName == "ProcStatusControlStandby") - { - portName = "ProcStatusControl"; - delete mqs; - mqs = new MessageQueueServer(portName); - - log.writeLog(__LINE__, "statusControlThread Thread reading " + portName + " port", LOG_TYPE_DEBUG); - - processStatusList* aPtr = statusListPtr(); - updateShareMemory(aPtr); - } - - if (runStandby && portName == "ProcStatusControl") - { - portName = "ProcStatusControlStandby"; - delete mqs; - mqs = new MessageQueueServer(portName); - - log.writeLog(__LINE__, "statusControlThread Thread reading " + portName + " port", LOG_TYPE_DEBUG); - } - - fIos = NULL; - try - { - //log.writeLog(__LINE__, "***before accept", LOG_TYPE_DEBUG); - fIos = new IOSocket(); - *fIos = mqs->accept(&ts); - - if ( fIos->isOpen() ) - { - //log.writeLog(__LINE__, "***before create thread", LOG_TYPE_DEBUG); - pthread_t messagethread; - int status = pthread_create (&messagethread, NULL, (void*(*)(void*))&processStatusMSG, fIos); - - //log.writeLog(__LINE__, "***after create thread", LOG_TYPE_DEBUG); - - if ( status != 0 ) - { - log.writeLog(__LINE__, "messagethread: pthread_create failed, return status = " + oam.itoa(status), LOG_TYPE_ERROR); - delete fIos; - } - } - else - delete fIos; - - } - catch (...) - { - if (fIos) - delete fIos; - } - - if ( runStandby ) - { - standbyUpdateCount++; - - if ( standbyUpdateCount >= 3 ) - { - //processStatusList* aPtr = statusListPtr(); - updateShareMemory(aPtr); - standbyUpdateCount = 0; - } - } - } // end of for loop - return NULL; -} - -/****************************************************************************************** -* @brief processStatusMSG -* -* purpose: Process the status message -* -******************************************************************************************/ -void* processStatusMSG(messageqcpp::IOSocket* cfIos) -{ - messageqcpp::IOSocket* fIos = cfIos; - - pthread_t ThreadId; - ThreadId = pthread_self(); - - MonitorLog log; - MonitorConfig config; - ProcessMonitor aMonitor(config, log); - Oam oam; - - ByteStream* msg; - msg = new ByteStream(); - - //log.writeLog(__LINE__, "***start create thread", LOG_TYPE_DEBUG); - struct timespec ts = { 20, 0 }; - - try - { - *msg = fIos->read(&ts); - } - catch (exception& ex) - { - string error = ex.what(); -// log.writeLog(__LINE__, "***read error, close create thread: " + error, LOG_TYPE_DEBUG); - fIos->close(); - delete fIos; - delete msg; - pthread_detach (ThreadId); - pthread_exit(0); - } - catch (...) - { -// log.writeLog(__LINE__, "***read error, close create thread", LOG_TYPE_DEBUG); - fIos->close(); - delete fIos; - delete msg; - pthread_detach (ThreadId); - pthread_exit(0); - } - - if (msg->length() <= 0) - { -// log.writeLog(__LINE__, "***0 bytes, close create thread", LOG_TYPE_DEBUG); - fIos->close(); - delete fIos; - delete msg; - pthread_detach (ThreadId); - pthread_exit(0); - } - - ByteStream::byte requestType; - *msg >> requestType; - //log.writeLog(__LINE__, "statusControl: Msg received, requestType = " + oam.itoa(requestType), LOG_TYPE_DEBUG); - - switch (requestType) - { - case GET_PROC_STATUS: - { - std::string moduleName; - std::string processName; - - ByteStream::byte state; - ByteStream::quadbyte PID; - std::string changeDate; - ByteStream ackmsg; - - *msg >> moduleName; - *msg >> processName; - - processStatusList::iterator listPtr; - //processStatusList* aPtr = statusListPtr(); - listPtr = aPtr->begin(); - - int shmIndex = 0; - - for (; listPtr != aPtr->end(); ++listPtr) - { - if ((*listPtr).ProcessName == processName && - (*listPtr).ModuleName == moduleName) - { - shmIndex = (*listPtr).tableIndex; - break; - } - } - - if (listPtr == aPtr->end()) - { - // not in list -// log.writeLog(__LINE__, "statusControl: GET_PROC_STATUS: Process not valid: " + processName + " / " + moduleName, LOG_TYPE_DEBUG); - ackmsg << (ByteStream::byte) API_FAILURE; - fIos->write(ackmsg); - break; - } - - //get table info - state = fShmProcessStatus[shmIndex].ProcessOpState; - PID = fShmProcessStatus[shmIndex].ProcessID; - changeDate = fShmProcessStatus[shmIndex].StateChangeDate; - - ackmsg << (ByteStream::byte) API_SUCCESS; - ackmsg << state; - ackmsg << PID; - ackmsg << changeDate; - fIos->write(ackmsg); - } - break; - - case SET_PROC_STATUS: - { - std::string moduleName; - std::string processName; - ByteStream::byte state; - ByteStream::quadbyte PID; - std::string shmName; - char charName[NAMESIZE]; - - *msg >> moduleName; - *msg >> processName; - *msg >> state; - *msg >> PID; - - if (!runStandby) - { - ByteStream ackmsg; - ackmsg << (ByteStream::byte) requestType; - fIos->write(ackmsg); - } - - log.writeLog(__LINE__, "statusControl: REQUEST RECEIVED: Set Process " + moduleName + "/" + processName + " State = " + oamState[state], LOG_TYPE_DEBUG); - - processStatusList::iterator listPtr; - //processStatusList* aPtr = statusListPtr(); - listPtr = aPtr->begin(); - - int shmIndex = 0; - - for (; listPtr != aPtr->end(); ++listPtr) - { - if ((*listPtr).ProcessName == processName && - (*listPtr).ModuleName == moduleName) - { - shmIndex = (*listPtr).tableIndex; - break; - } - } - - if (listPtr == aPtr->end()) - { - // not in list - log.writeLog(__LINE__, "statusControl: SET_PROC_STATUS: Process not valid: " + moduleName + "/" + processName, LOG_TYPE_DEBUG); - break; - } - - //check and process for Active/Standby process run-type - if ( state == oam::ACTIVE ) - { - - std::string moduleType = moduleName.substr(0, 2); - - for ( unsigned int i = 0 ; i < systemprocessconfig.processconfig.size(); i++) - { - if ( systemprocessconfig.processconfig[i].ModuleType == moduleType && - systemprocessconfig.processconfig[i].ProcessName == processName ) - { - if ( systemprocessconfig.processconfig[i].RunType == oam::ACTIVE_STANDBY ) - { - // process is ACTIVE_STANDBY run-state, get Module run-type and state - try - { - oam.getSystemConfig(moduleType, moduletypeconfig); - - if ( moduletypeconfig.RunType == oam::ACTIVE_STANDBY ) - { - for ( int i = 1; i < fmoduleNumber; ++i) - { - memcpy(charName, fShmSystemStatus[i].Name, NAMESIZE); - shmName = charName; - - if ( moduleName == shmName ) - { - if ( fShmSystemStatus[i].OpState == oam::STANDBY ) - { - //set current state to STANDBY - state = oam::STANDBY; - break; - } - } - } - } - } - catch (exception& ex) - { - string error = ex.what(); -// log.writeLog(__LINE__, "statusControl: EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR); - break; - } - catch (...) - { -// log.writeLog(__LINE__, "statusControl: EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR); - break; - } - } - else - // not oam::ACTIVE/STANDBY - break; - } - } - } - - // invalid state change ACTIVE TO MAN_INIT / AUTO_INIT - if ( fShmProcessStatus[shmIndex].ProcessOpState == oam::ACTIVE ) - { - if ( state == oam::MAN_INIT || state == oam::AUTO_INIT ) - { - log.writeLog(__LINE__, "statusControl: " + moduleName + "/" + processName + " Current State = ACTIVE, invalid update request to " + oamState[state], LOG_TYPE_DEBUG); - break; - } - } - - if (!utils::is_nonnegative(PID)) - PID = 0; - - log.writeLog(__LINE__, "statusControl: Set Process " + moduleName + "/" + processName + + " State = " + oamState[state] + " PID = " + oam.itoa(PID), LOG_TYPE_DEBUG); - - //update table - if ( state < PID_UPDATE ) - fShmProcessStatus[shmIndex].ProcessOpState = state; - - if ( PID != 1 ) - fShmProcessStatus[shmIndex].ProcessID = PID; - - memcpy(fShmProcessStatus[shmIndex].StateChangeDate, oam.getCurrentTime().c_str(), DATESIZE); - - //if DMLProc set to BUSY_INIT, set system state to BUSY_INIT - if ( processName == "DMLProc" && state == oam::BUSY_INIT ) - { - fShmSystemStatus[0].OpState = state; - memcpy(fShmSystemStatus[0].StateChangeDate, oam.getCurrentTime().c_str(), DATESIZE); - log.writeLog(__LINE__, "statusControl: REQUEST RECEIVED: Set System State = " + oamState[state], LOG_TYPE_DEBUG); - } - - //if DMLProc set to ACTIVE, set system state to ACTIVE if in an INIT state - if ( processName == "DMLProc" && state == oam::ACTIVE ) - { - if ( fShmSystemStatus[0].OpState == oam::BUSY_INIT || - fShmSystemStatus[0].OpState == oam::MAN_INIT || - fShmSystemStatus[0].OpState == oam::AUTO_INIT ) - { - fShmSystemStatus[0].OpState = state; - memcpy(fShmSystemStatus[0].StateChangeDate, oam.getCurrentTime().c_str(), DATESIZE); - log.writeLog(__LINE__, "statusControl: REQUEST RECEIVED: Set System State = " + oamState[state], LOG_TYPE_DEBUG); - } - - BRM::DBRM dbrm; - dbrm.setSystemQueryReady(true); - } - - } - break; - - case GET_ALL_PROC_STATUS: - { - ByteStream ackmsg; - ByteStream::byte state; - ByteStream::quadbyte PID; - std::string changeDate; - std::string processName; - std::string moduleName; - - processStatusList::iterator listPtr; - ////processStatusList* aPtr = statusListPtr(); - - ackmsg << (ByteStream::quadbyte) aPtr->size(); - - for ( unsigned int i = 0 ; i < systemModuleTypeConfig.moduletypeconfig.size(); i++) - { - int moduleCount = systemModuleTypeConfig.moduletypeconfig[i].ModuleCount; - - if ( moduleCount == 0 ) - // skip of no modules configured - continue; - - string moduleType = systemModuleTypeConfig.moduletypeconfig[i].ModuleType; - - listPtr = aPtr->begin(); - - for (; listPtr != aPtr->end(); ++listPtr) - { - moduleName = (*listPtr).ModuleName; - - if ( moduleName.find(moduleType) != string::npos ) - { - processName = (*listPtr).ProcessName; - int shmIndex = (*listPtr).tableIndex; - state = fShmProcessStatus[shmIndex].ProcessOpState; - PID = fShmProcessStatus[shmIndex].ProcessID; - changeDate = fShmProcessStatus[shmIndex].StateChangeDate; - - ackmsg << processName; - ackmsg << moduleName; - ackmsg << state; - ackmsg << PID; - ackmsg << changeDate; - } - } - } - - fIos->write(ackmsg); - } - break; - - case GET_PROC_STATUS_BY_PID: - { - std::string moduleName; - std::string processName; - - ByteStream ackmsg; - ByteStream::byte state; - ByteStream::quadbyte PID; - - *msg >> moduleName; - *msg >> PID; - - processStatusList::iterator listPtr; - //processStatusList* aPtr = statusListPtr(); - listPtr = aPtr->begin(); - - int shmIndex = 0; - - for (; listPtr != aPtr->end(); ++listPtr) - { - if ((*listPtr).ModuleName == moduleName) - { - shmIndex = (*listPtr).tableIndex; - - //get PID - if ( PID == (ByteStream::quadbyte) fShmProcessStatus[shmIndex].ProcessID) - { - // match found, get state - state = fShmProcessStatus[shmIndex].ProcessOpState; - //get process name - processName = (*listPtr).ProcessName; - - ackmsg << (ByteStream::byte) API_SUCCESS; - ackmsg << state; - ackmsg << processName; - fIos->write(ackmsg); - break; - } - } - } - - if (listPtr == aPtr->end()) - { - // not in list - ackmsg << (ByteStream::byte) API_FAILURE; - fIos->write(ackmsg); -// log.writeLog(__LINE__, "statusControl: GET_PROC_STATUS_BY_PID: PID not valid: " + oam.itoa(PID) + " / " + moduleName); - break; - } - } - break; - - case GET_SYSTEM_STATUS: - { - ByteStream ackmsg; - ByteStream::byte state; - std::string name; - std::string changeDate; - ByteStream::byte systemStatusOnly; - - *msg >> systemStatusOnly; - - if ( systemStatusOnly == 1 ) - { - for (int j = 0 ; j < fmoduleNumber; ++j) - { - name = fShmSystemStatus[j].Name; - - if ( name.find("system") != string::npos ) - { - state = fShmSystemStatus[j].OpState; - changeDate = fShmSystemStatus[j].StateChangeDate; - - ackmsg << name; - ackmsg << state; - ackmsg << changeDate; - break; - } - } - } - else - { - ackmsg << (ByteStream::byte) fmoduleNumber; - - for (int j = 0 ; j < fmoduleNumber; ++j) - { - name = fShmSystemStatus[j].Name; - - if ( name.find("system") != string::npos ) - { - state = fShmSystemStatus[j].OpState; - changeDate = fShmSystemStatus[j].StateChangeDate; - - ackmsg << name; - ackmsg << state; - ackmsg << changeDate; - } - } - - for ( unsigned int i = 0 ; i < systemModuleTypeConfig.moduletypeconfig.size(); i++) - { - int moduleCount = systemModuleTypeConfig.moduletypeconfig[i].ModuleCount; - - if ( moduleCount == 0 ) - // skip of no modules configured - continue; - - string moduleType = systemModuleTypeConfig.moduletypeconfig[i].ModuleType; - - for (int j = 0 ; j < fmoduleNumber; ++j) - { - name = fShmSystemStatus[j].Name; - - if ( name.find(moduleType) != string::npos ) - { - state = fShmSystemStatus[j].OpState; - changeDate = fShmSystemStatus[j].StateChangeDate; - - ackmsg << name; - ackmsg << state; - ackmsg << changeDate; - } - } - } - - ackmsg << (ByteStream::byte) extDeviceNumber; - - for (int i = 0 ; i < extDeviceNumber; ++i) - { - name = fShmExtDeviceStatus[i].Name; - state = fShmExtDeviceStatus[i].OpState; - changeDate = fShmExtDeviceStatus[i].StateChangeDate; - - ackmsg << name; - ackmsg << state; - ackmsg << changeDate; - } - - ackmsg << (ByteStream::byte) NICNumber; - - for (int i = 0 ; i < NICNumber; ++i) - { - name = fShmNICStatus[i].Name; - state = fShmNICStatus[i].OpState; - changeDate = fShmNICStatus[i].StateChangeDate; - - ackmsg << name; - ackmsg << state; - ackmsg << changeDate; - } - - ackmsg << (ByteStream::byte) dbrootNumber; - - for (int i = 0 ; i < dbrootNumber; ++i) - { - name = fShmDbrootStatus[i].Name; - state = fShmDbrootStatus[i].OpState; - changeDate = fShmDbrootStatus[i].StateChangeDate; - - ackmsg << name; - ackmsg << state; - ackmsg << changeDate; - } - } - - fIos->write(ackmsg); - } - break; - - case SET_SYSTEM_STATUS: - { - ByteStream::byte state; - *msg >> state; - fShmSystemStatus[0].OpState = state; - memcpy(fShmSystemStatus[0].StateChangeDate, oam.getCurrentTime().c_str(), DATESIZE); - log.writeLog(__LINE__, "statusControl: REQUEST RECEIVED: Set System State = " + oamState[state], LOG_TYPE_DEBUG); - - if (!runStandby) - { - ByteStream ackmsg; - ackmsg << (ByteStream::byte) requestType; - fIos->write(ackmsg); - } - } - break; - - case SET_MODULE_STATUS: - { - ByteStream::byte state; - std::string moduleName; - std::string shmName; - char charName[NAMESIZE]; - - *msg >> moduleName; - *msg >> state; - - if (!runStandby) - { - ByteStream ackmsg; - ackmsg << (ByteStream::byte) requestType; - fIos->write(ackmsg); - } - - log.writeLog(__LINE__, "statusControl: REQUEST RECEIVED: Set Module " + moduleName + " State = " + oamState[state], LOG_TYPE_DEBUG); - - //Handle Module RunType of ACTIVE_STANDBY - string moduletype = moduleName.substr(0, MAX_MODULE_TYPE_SIZE); - string moduleID = moduleName.substr(MAX_MODULE_TYPE_SIZE, MAX_MODULE_ID_SIZE); - - try - { - oam.getSystemConfig(moduletype, moduletypeconfig); - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "statusControl: EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR); - break; - } - catch (...) - { -// log.writeLog(__LINE__, "statusControl: EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR); - break; - } - - if ( moduletypeconfig.RunType == oam::ACTIVE_STANDBY ) - { - if ( state == oam::ACTIVE ) - { - for ( int i = 1; i < fmoduleNumber; ++i) - { - memcpy(charName, fShmSystemStatus[i].Name, NAMESIZE); - shmName = charName; - string othermoduletype = shmName.substr(0, MAX_MODULE_TYPE_SIZE); - string othermoduleID = shmName.substr(MAX_MODULE_TYPE_SIZE, MAX_MODULE_ID_SIZE); - - if ( moduletype == othermoduletype && - moduleID != othermoduleID ) - { - if ( fShmSystemStatus[i].OpState == oam::ACTIVE ) - { - //found one, set current state to STANDBY - state = oam::STANDBY; - - //set ACTIVE_STANDBY process to STANDBY state - try - { - oam.getProcessConfig(systemprocessconfig); - - for ( unsigned int i = 0 ; i < systemprocessconfig.processconfig.size(); i++) - { - if ( systemprocessconfig.processconfig[i].ModuleType == moduletype && - systemprocessconfig.processconfig[i].RunType == oam::ACTIVE_STANDBY ) - { - - processStatusList::iterator listPtr; - //processStatusList* aPtr = statusListPtr(); - listPtr = aPtr->begin(); - - for (; listPtr != aPtr->end(); ++listPtr) - { - if ( systemprocessconfig.processconfig[i].ProcessName == (*listPtr).ProcessName && - moduleName == (*listPtr).ModuleName ) - { - int shmIndex = (*listPtr).tableIndex; - fShmProcessStatus[shmIndex].ProcessOpState = oam::STANDBY; - break; - } - } - } - } - } - catch (exception& ex) - { - string error = ex.what(); -// log.writeLog(__LINE__, "statusControl: EXCEPTION ERROR on getProcessConfig: " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "statusControl: EXCEPTION ERROR on getProcessConfig: Caught unknown exception!", LOG_TYPE_ERROR); - } - - break; - } - } - } - } - else - { - //check to see if a STANDBY Mate needs to go ACTIVE - if ( state == oam::DOWN || state == oam::MAN_OFFLINE - || state == oam::FAILED) - { - for ( int i = 1; i < fmoduleNumber; ++i) - { - memcpy(charName, fShmSystemStatus[i].Name, NAMESIZE); - shmName = charName; - string othermoduletype = shmName.substr(0, MAX_MODULE_TYPE_SIZE); - string othermoduleID = shmName.substr(MAX_MODULE_TYPE_SIZE, MAX_MODULE_ID_SIZE); - - if ( moduletype == othermoduletype && - moduleID != othermoduleID ) - { - if ( fShmSystemStatus[i].OpState == oam::STANDBY ) - { - //found one, set it to ACTIVE - fShmSystemStatus[i].OpState = oam::ACTIVE; - memcpy(fShmSystemStatus[i].StateChangeDate, oam.getCurrentTime().c_str(), DATESIZE); - - //set ACTIVE_STANDBY process to ACTIVE state - try - { - oam.getProcessConfig(systemprocessconfig); - - for ( unsigned int i = 0 ; i < systemprocessconfig.processconfig.size(); i++) - { - if ( systemprocessconfig.processconfig[i].ModuleType == moduletype && - systemprocessconfig.processconfig[i].RunType == oam::ACTIVE_STANDBY ) - { - - processStatusList::iterator listPtr; - //processStatusList* aPtr = statusListPtr(); - listPtr = aPtr->begin(); - - for (; listPtr != aPtr->end(); ++listPtr) - { - if ( systemprocessconfig.processconfig[i].ProcessName == (*listPtr).ProcessName && - shmName == (*listPtr).ModuleName ) - { - int shmIndex = (*listPtr).tableIndex; - fShmProcessStatus[shmIndex].ProcessOpState = oam::ACTIVE; - break; - } - } - } - } - } - catch (exception& ex) - { - string error = ex.what(); -// log.writeLog(__LINE__, "statusControl: EXCEPTION ERROR on getProcessConfig: " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "statusControl: EXCEPTION ERROR on getProcessConfig: Caught unknown exception!", LOG_TYPE_ERROR); - } - - break; - } - } - } - } - } - } - - //set current Module state - int i = 1; - - for ( ; i < fmoduleNumber; ++i) - { - memcpy(charName, fShmSystemStatus[i].Name, NAMESIZE); - shmName = charName; - - if ( moduleName == shmName ) - { - fShmSystemStatus[i].OpState = state; - memcpy(fShmSystemStatus[i].StateChangeDate, oam.getCurrentTime().c_str(), DATESIZE); - break; - } - } - - if ( i == fmoduleNumber) - { - // not in list - log.writeLog(__LINE__, "statusControl: SET_MODULE_STATUS: Module not valid: " + moduleName, LOG_TYPE_ERROR); - break; - } - } - break; - - case SET_EXT_DEVICE_STATUS: - { - ByteStream::byte state; - std::string name; - std::string shmName; - char charName[NAMESIZE]; - - *msg >> name; - *msg >> state; - - if (!runStandby) - { - ByteStream ackmsg; - ackmsg << (ByteStream::byte) requestType; - fIos->write(ackmsg); - } - - log.writeLog(__LINE__, "statusControl: REQUEST RECEIVED: Set Ext Device " + name + " State = " + oamState[state], LOG_TYPE_DEBUG); - - int i = 0; - - for ( ; i < extDeviceNumber; ++i) - { - memcpy(charName, fShmExtDeviceStatus[i].Name, NAMESIZE); - shmName = charName; - - if ( name == shmName ) - { - fShmExtDeviceStatus[i].OpState = state; - memcpy(fShmExtDeviceStatus[i].StateChangeDate, oam.getCurrentTime().c_str(), DATESIZE); - break; - } - } - - if ( i == extDeviceNumber) - { - // not in list - log.writeLog(__LINE__, "statusControl: SET_SWITCH_STATUS: Switch not valid: " + name, LOG_TYPE_ERROR); - break; - } - } - break; - - case SET_DBROOT_STATUS: - { - ByteStream::byte state; - std::string name; - std::string shmName; - char charName[NAMESIZE]; - - *msg >> name; - *msg >> state; - - if (!runStandby) - { - ByteStream ackmsg; - ackmsg << (ByteStream::byte) requestType; - fIos->write(ackmsg); - } - - log.writeLog(__LINE__, "statusControl: REQUEST RECEIVED: Set DBroot " + name + " State = " + oamState[state], LOG_TYPE_DEBUG); - - if ( dbrootNumber == 0 ) - { - // no dbroots setup in shared memory, must be internal - log.writeLog(__LINE__, "statusControl: SET_DBROOT_STATUS: DBroot not valid: " + name, LOG_TYPE_ERROR); - break; - } - - int i = 0; - - for ( ; i < dbrootNumber; ++i) - { - memcpy(charName, fShmDbrootStatus[i].Name, NAMESIZE); - shmName = charName; - - if ( name == shmName ) - { - fShmDbrootStatus[i].OpState = state; - memcpy(fShmDbrootStatus[i].StateChangeDate, oam.getCurrentTime().c_str(), DATESIZE); - break; - } - } - - if ( i == dbrootNumber) - { - // not in list - log.writeLog(__LINE__, "statusControl: SET_DBROOT_STATUS: DBroot not valid: " + name, LOG_TYPE_ERROR); - break; - } - } - break; - - case SET_NIC_STATUS: - { - ByteStream::byte state; - std::string hostName; - std::string shmName; - char charName[NAMESIZE]; - - *msg >> hostName; - *msg >> state; - - if (!runStandby) - { - ByteStream ackmsg; - ackmsg << (ByteStream::byte) requestType; - fIos->write(ackmsg); - } - - log.writeLog(__LINE__, "statusControl: REQUEST RECEIVED: Set NIC " + hostName + " State = " + oamState[state], LOG_TYPE_DEBUG); - - int i = 0; - - for ( ; i < NICNumber; ++i) - { - memcpy(charName, fShmNICStatus[i].Name, NAMESIZE); - shmName = charName; - - if ( hostName == shmName ) - { - fShmNICStatus[i].OpState = state; - memcpy(fShmNICStatus[i].StateChangeDate, oam.getCurrentTime().c_str(), DATESIZE); - break; - } - } - - if ( i == NICNumber) - { - // not in list - log.writeLog(__LINE__, "statusControl: SET_NIC_STATUS: NIC not valid: " + hostName, LOG_TYPE_ERROR); - break; - } - } - break; - - case ADD_MODULE: - { - ByteStream ackmsg; - ByteStream::byte moduleCount, nicCount; - oam::DeviceNetworkConfig devicenetworkconfig; - oam::DeviceNetworkList devicenetworklist; - string value; - MonitorConfig currentConfig; - - *msg >> moduleCount; - - for (int i = 0; i < moduleCount; i++) - { - *msg >> value; - devicenetworkconfig.DeviceName = value; - devicenetworklist.push_back(devicenetworkconfig); - } - - log.writeLog(__LINE__, "statusControl: REQUEST RECEIVED: Add Module"); - - string moduleType = devicenetworkconfig.DeviceName.substr(0, MAX_MODULE_TYPE_SIZE); - string OAMParentModuleType = currentConfig.OAMParentName().substr(0, 2); - - // add to module status shared memory - DeviceNetworkList::iterator pt = devicenetworklist.begin(); - - for ( ; pt != devicenetworklist.end() ; pt++) - { - moduleNameList.push_back((*pt).DeviceName); - - string moduleName = (*pt).DeviceName; - memcpy(fShmSystemStatus[fmoduleNumber].Name, moduleName.c_str(), NAMESIZE); - fShmSystemStatus[fmoduleNumber].OpState = oam::MAN_DISABLED; - memcpy(fShmSystemStatus[fmoduleNumber].StateChangeDate, oam.getCurrentTime().c_str(), DATESIZE); - fmoduleNumber++; - } - - // add to NIC status shared memory - *msg >> nicCount; - - for (int i = 0; i < nicCount; i++) - { - *msg >> value; - memcpy(fShmNICStatus[NICNumber].Name, value.c_str(), NAMESIZE); - fShmNICStatus[NICNumber].OpState = oam::INITIAL; - memcpy(fShmNICStatus[NICNumber].StateChangeDate, oam.getCurrentTime().c_str(), DATESIZE); - NICNumber++; - } - - processStatusList::iterator listPtr; - listPtr = aPtr->begin(); - - // add to process status shared memory - pt = devicenetworklist.begin(); - - for ( ; pt != devicenetworklist.end() ; pt++) - { - for ( unsigned int j = 0; j < systemprocessconfig.processconfig.size(); j++) - { - //skip if both BootLaunch and LaunchID are 0 - if ( systemprocessconfig.processconfig[j].BootLaunch == 0 && - systemprocessconfig.processconfig[j].LaunchID == 0 ) - continue; - - // "ChildOAMModule" "ParentOAMModule" dm/um/pm - string processModuleType = systemprocessconfig.processconfig[j].ModuleType; - - if (processModuleType == moduleType - || ( processModuleType == "um" && - moduleType == "pm" && PMwithUM == "y") - || processModuleType == "ChildExtOAMModule" - || (processModuleType == "ChildOAMModule" ) - || (processModuleType == "ParentOAMModule" && moduleType == OAMParentModuleType) ) - { - if ( processModuleType == "um" && - moduleType == "pm" && PMwithUM == "y" && - systemprocessconfig.processconfig[j].ProcessName == "DMLProc" ) - continue; - - if ( processModuleType == "um" && - moduleType == "pm" && PMwithUM == "y" && - systemprocessconfig.processconfig[j].ProcessName == "DDLProc" ) - continue; - - processstatus procstat; - procstat.ProcessName = systemprocessconfig.processconfig[j].ProcessName; - procstat.ModuleName = (*pt).DeviceName; - procstat.tableIndex = processNumber; - fstatusListPtr.push_back(procstat); - - fShmProcessStatus[processNumber].ProcessOpState = oam::MAN_OFFLINE; - fShmProcessStatus[processNumber].ProcessID = 0; - memcpy(fShmProcessStatus[processNumber].StateChangeDate, oam.getCurrentTime().c_str(), DATESIZE); - processNumber++; - } - } - } - - ackmsg << (ByteStream::byte) API_SUCCESS; - fIos->write(ackmsg); - - try - { - oam.getSystemConfig(systemModuleTypeConfig); - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR); - } - catch (...) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR); - } - } - break; - - case REMOVE_MODULE: - { - ByteStream ackmsg; - ByteStream::byte moduleCount; - oam::DeviceNetworkConfig devicenetworkconfig; - oam::DeviceNetworkList devicenetworklist; - string value; - std::string shmName; - char charName[NAMESIZE]; - - *msg >> moduleCount; - - for (int i = 0; i < moduleCount; i++) - { - *msg >> value; - devicenetworkconfig.DeviceName = value; - devicenetworklist.push_back(devicenetworkconfig); - } - - log.writeLog(__LINE__, "statusControl: REQUEST RECEIVED: Remove Module"); - - // remove from module status shared memory - DeviceNetworkList::iterator pt = devicenetworklist.begin(); - - for ( ; pt != devicenetworklist.end() ; pt++) - { - string moduleName = (*pt).DeviceName; - - for ( int j = 0 ; j < fmoduleNumber ; j++ ) - { - memcpy(charName, fShmSystemStatus[j].Name, NAMESIZE); - shmName = charName; - - if ( moduleName == shmName ) - { - for ( int k = j + 1 ; k < fmoduleNumber ; k++) - { - string name = fShmSystemStatus[k].Name; - int state = fShmSystemStatus[k].OpState; - string changeDate = fShmSystemStatus[k].StateChangeDate; - - memcpy(fShmSystemStatus[j].Name, name.c_str(), NAMESIZE); - fShmSystemStatus[j].OpState = state; - memcpy(fShmSystemStatus[j].StateChangeDate, changeDate.c_str(), DATESIZE); - } - - fmoduleNumber--; - } - } - } - - // remove from process status shared memory - pt = devicenetworklist.begin(); - - for ( ; pt != devicenetworklist.end() ; pt++) - { - string moduleName = (*pt).DeviceName; - - processStatusList::iterator listPtr; - //processStatusList* aPtr = statusListPtr(); - listPtr = aPtr->begin(); - - for (; listPtr != aPtr->end(); ) - { - if ( moduleName == (*listPtr).ModuleName ) - aPtr->erase(listPtr); - else - ++listPtr; - } - } - - ackmsg << (ByteStream::byte) API_SUCCESS; - fIos->write(ackmsg); - - try - { - oam.getSystemConfig(systemModuleTypeConfig); - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR); - } - catch (...) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR); - } - } - break; - - case ADD_EXT_DEVICE: - { - ByteStream ackmsg; - string device; - - log.writeLog(__LINE__, "statusControl: REQUEST RECEIVED: Add External Device"); - - *msg >> device; - - fShmExtDeviceStatus[extDeviceNumber].OpState = oam::INITIAL; - memcpy(fShmExtDeviceStatus[extDeviceNumber].Name, device.c_str(), NAMESIZE); - extDeviceNumber++; - - if (!runStandby) - { - ackmsg << (ByteStream::byte) ADD_EXT_DEVICE; - fIos->write(ackmsg); - } - } - break; - - case REMOVE_EXT_DEVICE: - { - ByteStream ackmsg; - string device; - std::string shmName; - char charName[NAMESIZE]; - - log.writeLog(__LINE__, "statusControl: REQUEST RECEIVED: Remove External Device"); - - *msg >> device; - - for ( int j = 0 ; j < extDeviceNumber ; j++ ) - { - memcpy(charName, fShmExtDeviceStatus[j].Name, NAMESIZE); - shmName = charName; - - if ( device == shmName ) - { - for ( int k = j + 1 ; k < extDeviceNumber ; k++) - { - string name = fShmExtDeviceStatus[k].Name; - int state = fShmExtDeviceStatus[k].OpState; - string changeDate = fShmExtDeviceStatus[k].StateChangeDate; - - memcpy(fShmExtDeviceStatus[j].Name, name.c_str(), NAMESIZE); - fShmExtDeviceStatus[j].OpState = state; - memcpy(fShmExtDeviceStatus[j].StateChangeDate, changeDate.c_str(), DATESIZE); - } - - extDeviceNumber--; - } - } - - if (!runStandby) - { - ackmsg << (ByteStream::byte) REMOVE_EXT_DEVICE; - fIos->write(ackmsg); - } - } - break; - - case ADD_DBROOT: - { - ByteStream ackmsg; - string device; - - log.writeLog(__LINE__, "statusControl: REQUEST RECEIVED: Add DBRoot"); - - *msg >> device; - - fShmDbrootStatus[dbrootNumber].OpState = oam::INITIAL; - memcpy(fShmDbrootStatus[dbrootNumber].Name, device.c_str(), NAMESIZE); - memcpy(fShmDbrootStatus[dbrootNumber].StateChangeDate, oam.getCurrentTime().c_str(), DATESIZE); - - dbrootNumber++; - - if (!runStandby) - { - ackmsg << (ByteStream::byte) ADD_DBROOT; - fIos->write(ackmsg); - } - } - break; - - case REMOVE_DBROOT: - { - ByteStream ackmsg; - string device; - std::string shmName; - char charName[NAMESIZE]; - - log.writeLog(__LINE__, "statusControl: REQUEST RECEIVED: Remove DBRoot"); - - *msg >> device; - - for ( int j = 0 ; j < dbrootNumber ; j++ ) - { - memcpy(charName, fShmDbrootStatus[j].Name, NAMESIZE); - shmName = charName; - - if ( device == shmName ) - { - for ( int k = j + 1 ; k < dbrootNumber ; k++) - { - string name = fShmDbrootStatus[k].Name; - int state = fShmDbrootStatus[k].OpState; - string changeDate = fShmDbrootStatus[k].StateChangeDate; - - memcpy(fShmDbrootStatus[j].Name, name.c_str(), NAMESIZE); - fShmDbrootStatus[j].OpState = state; - memcpy(fShmDbrootStatus[j].StateChangeDate, changeDate.c_str(), DATESIZE); - } - - dbrootNumber--; - } - } - - if (!runStandby) - { - ackmsg << (ByteStream::byte) REMOVE_DBROOT; - fIos->write(ackmsg); - } - } - break; - - case GET_SHARED_MEM: - { - ByteStream ackmsg; - ByteStream::byte type; - - *msg >> type; - - switch (type) - { - case 1: - { - log.writeLog(__LINE__, "statusControl: REQUEST RECEIVED: GET_SHARED_MEM for process"); - - ByteStream::byte processNumber; - - *msg >> processNumber; - - ackmsg << (ByteStream::byte) GET_SHARED_MEM; - - for ( int i = 0 ; i < processNumber ; i++ ) - { - ackmsg << (ByteStream::quadbyte) fShmProcessStatus[i].ProcessID; - ackmsg << fShmProcessStatus[i].ProcessOpState; - } - - fIos->write(ackmsg); - - break; - } - - default: - break; - } - } - break; - - default: - break; - - } // end of switch - - //log.writeLog(__LINE__, "***end, close create thread", LOG_TYPE_DEBUG); - fIos->close(); - delete fIos; - delete msg; - pthread_detach (ThreadId); - pthread_exit(0); - return NULL; -} - -/****************************************************************************************** -* @brief updateShareMemory -* -* purpose: Get and update shared memory from Parent OAM module -* -******************************************************************************************/ -void updateShareMemory(processStatusList* aPtr) -{ - MonitorLog log; - MonitorConfig config; - ProcessMonitor aMonitor(config, log); - Oam oam; - -// log.writeLog(__LINE__, "Get Process Status shared Memory from Active OAM", LOG_TYPE_DEBUG); - - SystemProcessStatus systemprocessstatus; - ProcessStatus processstatus; - - processStatusList::iterator listPtr; - listPtr = aPtr->begin(); - - try - { - oam.getProcessStatus(systemprocessstatus); - - for ( unsigned int i = 0 ; i < systemprocessstatus.processstatus.size(); i++) - { - int shmIndex = 0; - - for (; listPtr != aPtr->end(); ++listPtr) - { - if ((*listPtr).ProcessName == systemprocessstatus.processstatus[i].ProcessName && - (*listPtr).ModuleName == systemprocessstatus.processstatus[i].Module) - { - shmIndex = (*listPtr).tableIndex; - break; - } - } - - if (listPtr == aPtr->end()) - continue; - - //update table - fShmProcessStatus[shmIndex].ProcessOpState = systemprocessstatus.processstatus[i].ProcessOpState; - fShmProcessStatus[shmIndex].ProcessID = systemprocessstatus.processstatus[i].ProcessID; - string stime = systemprocessstatus.processstatus[i].StateChangeDate ; - memcpy(fShmProcessStatus[shmIndex].StateChangeDate, stime.c_str(), DATESIZE); - } - -// log.writeLog(__LINE__, "Process Status shared Memory Initialized from Active OAM Module", LOG_TYPE_DEBUG); - } - catch (...) - { - return; - } - -// log.writeLog(__LINE__, "Get System Status shared Memory from Active OAM", LOG_TYPE_DEBUG); - - SystemStatus systemstatus; - - try - { - oam.getSystemStatus(systemstatus, false); - fShmSystemStatus[0].OpState = systemstatus.SystemOpState; - string stime = systemstatus.systemmodulestatus.modulestatus[0].StateChangeDate ; - memcpy(fShmSystemStatus[0].StateChangeDate, stime.c_str(), DATESIZE); - } - catch (...) - { - return; - } - -// log.writeLog(__LINE__, "Get Module Status shared Memory from Active OAM", LOG_TYPE_DEBUG); - - std::string shmName; - char charName[NAMESIZE]; - - for ( unsigned int i = 0 ; i < systemstatus.systemmodulestatus.modulestatus.size(); i++) - { - if ( systemstatus.systemmodulestatus.modulestatus[i].Module.empty() ) - // end of list - break; - - int j = 1; - - for ( ; j < fmoduleNumber; ++j) - { - memcpy(charName, fShmSystemStatus[j].Name, NAMESIZE); - shmName = charName; - - if ( systemstatus.systemmodulestatus.modulestatus[i].Module == shmName ) - { - fShmSystemStatus[j].OpState = systemstatus.systemmodulestatus.modulestatus[i].ModuleOpState; - string stime = systemstatus.systemmodulestatus.modulestatus[i].StateChangeDate ; - memcpy(fShmSystemStatus[j].StateChangeDate, stime.c_str(), DATESIZE); - break; - } - } - } -} -// vim:ts=4 sw=4: - diff --git a/procmon/processmonitor.cpp b/procmon/processmonitor.cpp deleted file mode 100644 index 1d8caf9f9..000000000 --- a/procmon/processmonitor.cpp +++ /dev/null @@ -1,6248 +0,0 @@ -/* Copyright (C) 2014 InfiniDB, Inc. - Copyright (C) 2016 MariaDB Corporation - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; version 2 of - the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301, USA. */ - -/*************************************************************************** -* $Id: processmonitor.cpp 2044 2013-08-07 19:47:37Z dhill $ -* - ***************************************************************************/ - -#include -#include -#include -#include -#include - -#include "columnstoreversion.h" -#include "mcsconfig.h" -#include "IDBDataFile.h" -#include "IDBPolicy.h" -#include "processmonitor.h" -#include "installdir.h" -#include "cacheutils.h" -#include "ddlcleanuputil.h" -using namespace cacheutils; - -using namespace std; -using namespace oam; -using namespace messageqcpp; -using namespace alarmmanager; -using namespace logging; -using namespace config; - -using namespace idbdatafile; -namespace bf = boost::filesystem; - -extern string systemOAM; -extern string dm_server; -extern bool runStandby; -extern bool processInitComplete; -extern int fmoduleNumber; -extern string cloud; -extern string DataRedundancyConfig; -extern bool rootUser; -extern string USER; -extern bool HDFS; -extern string PMwithUM; -extern bool startProcMon; -extern string tmpLogDir; -extern string SUDO; - -//std::string gOAMParentModuleName; -bool gOAMParentModuleFlag; -bool gOAMStandbyModuleFlag; - -typedef boost::tuple sendAlarmInfo_t; -typedef boost::tuple sendProcessInfo_t; - -pthread_mutex_t ALARM_LOCK; -pthread_mutex_t LIST_LOCK; -pthread_mutex_t PROCESS_LOCK; - -namespace processmonitor -{ - -void* sendAlarmThread (sendAlarmInfo_t* t); -void* sendProcessThread (sendProcessInfo_t* t); - -using namespace oam; - -/****************************************************************************************** -* @brief MonitorConfig -* -* purpose: MonitorConfig constructor -* -******************************************************************************************/ - -MonitorConfig::MonitorConfig() -{ - Oam oam; - oamModuleInfo_t t; - - //get local module info - try - { - t = oam.getModuleInfo(); - flocalModuleName = boost::get<0>(t); - flocalModuleType = boost::get<1>(t); - flocalModuleID = boost::get<2>(t); - fOAMParentModuleName = boost::get<3>(t); - fOAMParentModuleFlag = boost::get<4>(t); - fserverInstallType = boost::get<5>(t); - fOAMStandbyModuleName = boost::get<6>(t); - fOAMStandbyModuleFlag = boost::get<7>(t); - - gOAMStandbyModuleFlag = boost::get<7>(t); - gOAMParentModuleFlag = boost::get<4>(t); - } - catch (exception& e) - { - cout << endl << "ProcMon Construct Error reading getModuleInfo = " << e.what() << endl; - } - -// cout << "OAMParentModuleName = " << fOAMParentModuleName << endl; - -// if ( fOAMParentModuleName == oam::UnassignedName ) { -// cout << endl << "OAMParentModuleName == oam::UnassignedName, exiting " << endl; -// exit (-1); -// } - - //get calpont software version and release - fsoftwareVersion = columnstore_version; - fsoftwareRelease = columnstore_release; - -} - - -/****************************************************************************************** -* @brief MonitorConfig destructor -* -* purpose: MonitorConfig destructor -* -******************************************************************************************/ -MonitorConfig::~MonitorConfig() -{ -} - -/****************************************************************************************** -* @brief MonitorLog Constructor -* -* purpose: Constructor:open the log file for writing -* -******************************************************************************************/ -MonitorLog::MonitorLog() -{ -} - -/****************************************************************************************** -* @brief MonitorLog Destructor -* -* purpose: Destructor:close the log file -* -******************************************************************************************/ -MonitorLog::~MonitorLog() -{ -} - -/****************************************************************************************** -* @brief writeLog for string -* -* purpose: write string message to the log file -* -******************************************************************************************/ -void MonitorLog::writeLog(const int lineNumber, const string logContent, const LOG_TYPE logType) -{ - //Log this event - LoggingID lid(18); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add(logContent); - msg.format(args); - - switch (logType) - { - case LOG_TYPE_DEBUG: - ml.logDebugMessage(msg); - break; - - case LOG_TYPE_INFO: - ml.logInfoMessage(msg); - break; - - case LOG_TYPE_WARNING: - ml.logWarningMessage(msg); - break; - - case LOG_TYPE_ERROR: - args.add("line:"); - args.add(lineNumber); - ml.logErrorMessage(msg); - break; - - case LOG_TYPE_CRITICAL: - ml.logCriticalMessage(msg); - break; - } - - return; -} - -/****************************************************************************************** -* @brief writeLog for integer -* -* purpose: write integer information to the log file -* -******************************************************************************************/ -void MonitorLog::writeLog(const int lineNumber, const int logContent, const LOG_TYPE logType) -{ - //Log this event - LoggingID lid(18); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add(logContent); - msg.format(args); - - switch (logType) - { - case LOG_TYPE_DEBUG: - ml.logDebugMessage(msg); - break; - - case LOG_TYPE_INFO: - ml.logInfoMessage(msg); - break; - - case LOG_TYPE_WARNING: - ml.logWarningMessage(msg); - break; - - case LOG_TYPE_ERROR: - args.add("line:"); - args.add(lineNumber); - ml.logErrorMessage(msg); - break; - - case LOG_TYPE_CRITICAL: - ml.logCriticalMessage(msg); - break; - } - - return; -} - -/****************************************************************************************** -* @brief ProcessMonitor Constructor -* -* purpose: ProcessMonitor Constructor -* -******************************************************************************************/ -ProcessMonitor::ProcessMonitor(MonitorConfig& aconfig, MonitorLog& alog): - config(aconfig), log(alog) -{ -// log.writeLog(__LINE__, "Process Monitor starts"); -} - -/****************************************************************************************** -* @brief ProcessMonitor Default Destructor -* -* purpose: ProcessMonitor Default Destructor -* -******************************************************************************************/ -ProcessMonitor::~ProcessMonitor() -{ -} - -/****************************************************************************************** -* @brief statusListPtr -* -* purpose: return the process status list -* -******************************************************************************************/ -//processStatusList* ProcessMonitor::statusListPtr() -//{ -// return &fstatusListPtr; -//} - -/****************************************************************************************** -* @brief buildList -* -* purpose: Build a list of processes the monitor started -* -******************************************************************************************/ -void MonitorConfig::buildList(string ProcessModuleType, string processName, string ProcessLocation, - string arg_list[MAXARGUMENTS], uint16_t launchID, pid_t processID, - uint16_t state, uint16_t BootLaunch, string RunType, - string DepProcessName[MAXDEPENDANCY], string DepModuleName[MAXDEPENDANCY], - string LogFile) -{ - //check if the process is already in the list - MonitorLog log; - Oam oam; - - if ( processName == "mysqld" ) - return; - - // Might need to add a similar do-nothing clause for StorageManager? - - pthread_mutex_lock(&LIST_LOCK); - - // get current time in seconds - time_t cal; - time (&cal); - - processList::iterator listPtr; - processList* aPtr = monitoredListPtr(); - - //Log the current list - /* log.writeLog(__LINE__, ""); - log.writeLog(__LINE__, "BEGIN: The current list in this monitor is"); - - for (listPtr=aPtr->begin(); listPtr != aPtr->end(); ++listPtr) - { - log.writeLog(__LINE__, (*listPtr).ProcessModuleType); - log.writeLog(__LINE__, (*listPtr).ProcessName); - log.writeLog(__LINE__, (*listPtr).ProcessLocation); - log.writeLog(__LINE__, (*listPtr).currentTime); - log.writeLog(__LINE__, (*listPtr).processID); - log.writeLog(__LINE__, (*listPtr).state); - } - */ - - listPtr = aPtr->begin(); - - for (; listPtr != aPtr->end(); ++listPtr) - { - if ((*listPtr).ProcessName == processName) - break; - } - - if (listPtr == aPtr->end()) - { - // not in list, add it - processInfo proInfo; - proInfo.ProcessModuleType = ProcessModuleType; - proInfo.ProcessName = processName; - proInfo.ProcessLocation = ProcessLocation; - - for (unsigned int i = 0; i < MAXARGUMENTS; i++) - { - if (arg_list[i].length() == 0) - break; - - proInfo.ProcessArgs[i] = arg_list[i]; - } - - proInfo.launchID = launchID; - proInfo.currentTime = cal; - proInfo.processID = processID; - proInfo.state = state; - proInfo.BootLaunch = BootLaunch; - proInfo.RunType = RunType; - proInfo.LogFile = LogFile; - proInfo.dieCounter = 0; - - for (unsigned int i = 0; i < MAXDEPENDANCY; i++) - { - if (DepProcessName[i].length() == 0) - break; - - proInfo.DepProcessName[i] = DepProcessName[i]; - } - - for (unsigned int i = 0; i < MAXDEPENDANCY; i++) - { - if (DepModuleName[i].length() == 0) - break; - - proInfo.DepModuleName[i] = DepModuleName[i]; - } - - listPtr = aPtr->begin(); - - if ( listPtr == aPtr->end()) - { - // list empty, add first one - fmonitoredListPtr.push_back(proInfo); - } - else - { - for (; listPtr != aPtr->end(); ++listPtr) - { - if ((*listPtr).launchID > launchID) - { - fmonitoredListPtr.insert(listPtr, proInfo); - break; - } - } - - if ( listPtr == aPtr->end()) - fmonitoredListPtr.push_back(proInfo); - } - } - else - { - // in list, just update the information - - if ( ProcessLocation.empty() ) - //status update only - (*listPtr).state = state; - else - { - (*listPtr).processID = processID; - (*listPtr).currentTime = cal; - (*listPtr).state = state; - (*listPtr).launchID = launchID; - (*listPtr).BootLaunch = BootLaunch; - (*listPtr).RunType = RunType; - (*listPtr).LogFile = LogFile; - - for (unsigned int i = 0; i < MAXARGUMENTS; i++) - { - (*listPtr).ProcessArgs[i] = arg_list[i]; - } - } - } - - //Log the current list - /* log.writeLog(__LINE__, ""); - log.writeLog(__LINE__, "END: The current list in this monitor is"); - - for (listPtr=aPtr->begin(); listPtr != aPtr->end(); ++listPtr) - { - log.writeLog(__LINE__, (*listPtr).ProcessModuleType); - log.writeLog(__LINE__, (*listPtr).ProcessName); - log.writeLog(__LINE__, (*listPtr).ProcessLocation); - log.writeLog(__LINE__, (*listPtr).currentTime); - log.writeLog(__LINE__, (*listPtr).processID); - log.writeLog(__LINE__, (*listPtr).state); - } - */ - pthread_mutex_unlock(&LIST_LOCK); - - return; -} - -/****************************************************************************************** -* @brief monitoredListPtr -* -* purpose: return the process list -* -******************************************************************************************/ -processList* MonitorConfig::monitoredListPtr() -{ - return &fmonitoredListPtr; -} - -/****************************************************************************************** -* @brief processMessage -* -* purpose: receive and process message -* -******************************************************************************************/ -void ProcessMonitor::processMessage(messageqcpp::ByteStream msg, messageqcpp::IOSocket mq) - -{ - Oam oam; - ByteStream ackMsg; - MonitorConfig currentConfig; - - ByteStream::byte messageType; - ByteStream::byte requestID; - ByteStream::byte actIndicator; - ByteStream::byte manualFlag; - string processName; - - msg >> messageType; - - switch (messageType) - { - case REQUEST: - { - msg >> requestID; - msg >> actIndicator; - - if (!processInitComplete) - { - ackMsg << (ByteStream::byte) ACK; - ackMsg << (ByteStream::byte) requestID; - ackMsg << (ByteStream::byte) oam::API_FAILURE; - mq.write(ackMsg); - break; - } - - switch (requestID) - { - case STOP: - { - msg >> processName; - msg >> manualFlag; - log.writeLog(__LINE__, "MSG RECEIVED: Stop process request on " + processName); - int requestStatus = API_SUCCESS; - - // check for mysqld - if ( processName == "mysqld" ) - { - try - { - oam.actionMysqlCalpont(MYSQL_STOP); - } - catch (...) - {} - - ackMsg << (ByteStream::byte) ACK; - ackMsg << (ByteStream::byte) STOP; - ackMsg << (ByteStream::byte) API_SUCCESS; - mq.write(ackMsg); - - log.writeLog(__LINE__, "STOP: ACK back to ProcMgr, return status = " + oam.itoa((int) API_SUCCESS)); - - break; - } - processList::iterator listPtr; - processList* aPtr = config.monitoredListPtr(); - listPtr = aPtr->begin(); - - for (; listPtr != aPtr->end(); ++listPtr) - { - if ((*listPtr).ProcessName == processName) - { - // update local process state - if ( manualFlag ) - { - (*listPtr).state = oam::MAN_OFFLINE; - (*listPtr).dieCounter = 0; - } - else - (*listPtr).state = oam::AUTO_OFFLINE; - - //stop the process first - if (stopProcess((*listPtr).processID, (*listPtr).ProcessName, (*listPtr).ProcessLocation, actIndicator, manualFlag)) - requestStatus = API_FAILURE; - else - (*listPtr).processID = 0; - - break; - } - } - - if (listPtr == aPtr->end()) - { - log.writeLog(__LINE__, "ERROR: No such process: " + processName); - requestStatus = API_FAILURE; - } - - ackMsg << (ByteStream::byte) ACK; - ackMsg << (ByteStream::byte) STOP; - ackMsg << (ByteStream::byte) requestStatus; - mq.write(ackMsg); - - log.writeLog(__LINE__, "STOP: ACK back to ProcMgr, return status = " + oam.itoa((int) requestStatus)); - - break; - } - - case START: - { - msg >> processName; - msg >> manualFlag; - log.writeLog(__LINE__, "MSG RECEIVED: Start process request on: " + processName); - - // check for mysqld - if ( processName == "mysqld" ) - { - try - { - oam.actionMysqlCalpont(MYSQL_START); - } - catch (...) - {} - - ackMsg << (ByteStream::byte) ACK; - ackMsg << (ByteStream::byte) START; - ackMsg << (ByteStream::byte) API_SUCCESS; - mq.write(ackMsg); - - log.writeLog(__LINE__, "START: ACK back to ProcMgr, return status = " + oam.itoa((int) API_SUCCESS)); - - break; - } - - ProcessConfig processconfig; - ProcessStatus processstatus; - - try - { - //Get the process information - Oam oam; - oam.getProcessConfig(processName, config.moduleName(), processconfig); - - oam.getProcessStatus(processName, config.moduleName(), processstatus); - } - catch (exception& ex) - { -// string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessConfig: " + error, LOG_TYPE_ERROR ); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessConfig: Caught unknown exception!", LOG_TYPE_ERROR ); - } - - int requestStatus = API_SUCCESS; - - //check the process current status & start the requested process - if (processstatus.ProcessOpState != oam::ACTIVE) - { - - int initType = oam::STANDBY_INIT; - - if ( actIndicator == oam::GRACEFUL_STANDBY) - { - //this module running Parent OAM Standby - runStandby = true; - log.writeLog(__LINE__, "ProcMon Running Hot-Standby"); - - // delete any old active alarm log file - unlink ("/var/log/mariadb/columnstore/activeAlarms"); - } - - //Check for SIMPLEX runtype processes - initType = checkSpecialProcessState( processconfig.ProcessName, processconfig.RunType, processconfig.ModuleType ); - - if ( initType == oam::COLD_STANDBY) - { - //there is a mate active, skip - config.buildList(processconfig.ModuleType, - processconfig.ProcessName, - processconfig.ProcessLocation, - processconfig.ProcessArgs, - processconfig.LaunchID, - 0, - oam::COLD_STANDBY, - processconfig.BootLaunch, - processconfig.RunType, - processconfig.DepProcessName, - processconfig.DepModuleName, - processconfig.LogFile); - - requestStatus = API_SUCCESS; - ackMsg << (ByteStream::byte) ACK; - ackMsg << (ByteStream::byte) START; - ackMsg << (ByteStream::byte) requestStatus; - mq.write(ackMsg); - //sleep(1); - - log.writeLog(__LINE__, "START: process left STANDBY " + processName); - log.writeLog(__LINE__, "START: ACK back to ProcMgr, return status = " + oam.itoa((int) requestStatus)); - break; - } - - pid_t processID = startProcess(processconfig.ModuleType, - processconfig.ProcessName, - processconfig.ProcessLocation, - processconfig.ProcessArgs, - processconfig.LaunchID, - processconfig.BootLaunch, - processconfig.RunType, - processconfig.DepProcessName, - processconfig.DepModuleName, - processconfig.LogFile, - initType, - actIndicator); - - // StorageManager doesn't send the "I'm online" msg to Proc*. - // Just mark it active for now. TODO: make it use the ping fcn in IDB* instead. - if (processconfig.ProcessName == "StorageManager") - oam.setProcessStatus("StorageManager", boost::get<0>(oam.getModuleInfo()), - oam::ACTIVE, processID); - - if ( processID > oam::API_MAX ) - processID = oam::API_SUCCESS; - - requestStatus = processID; - } - else - log.writeLog(__LINE__, "START: process already active " + processName); - - ackMsg << (ByteStream::byte) ACK; - ackMsg << (ByteStream::byte) START; - ackMsg << (ByteStream::byte) requestStatus; - mq.write(ackMsg); - - log.writeLog(__LINE__, "START: ACK back to ProcMgr, return status = " + oam.itoa((int) requestStatus)); - - break; - } - - case RESTART: - { - msg >> processName; - msg >> manualFlag; - log.writeLog(__LINE__, "MSG RECEIVED: Restart process request on " + processName); - int requestStatus = API_SUCCESS; - - // check for mysqld restart - if ( processName == "mysqld" ) - { - try - { - oam.actionMysqlCalpont(MYSQL_RESTART); - } - catch (...) - {} - - ackMsg << (ByteStream::byte) ACK; - ackMsg << (ByteStream::byte) RESTART; - ackMsg << (ByteStream::byte) API_SUCCESS; - mq.write(ackMsg); - - log.writeLog(__LINE__, "RESTART: ACK back to ProcMgr, return status = " + oam.itoa((int) API_SUCCESS)); - - break; - } - - processList::iterator listPtr; - processList* aPtr = config.monitoredListPtr(); - listPtr = aPtr->begin(); - - for (; listPtr != aPtr->end(); ++listPtr) - { - if ((*listPtr).ProcessName == processName) - { - // update local process state - if ( manualFlag ) - { - (*listPtr).state = oam::MAN_OFFLINE; - (*listPtr).dieCounter = 0; - } - else - (*listPtr).state = oam::AUTO_OFFLINE; - - //stop the process first - if (stopProcess((*listPtr).processID, (*listPtr).ProcessName, (*listPtr).ProcessLocation, actIndicator, manualFlag)) - requestStatus = API_FAILURE; - else - { -// sleep(1); - (*listPtr).processID = 0; - - //Check for SIMPLEX runtype processes - int initType = checkSpecialProcessState( (*listPtr).ProcessName, (*listPtr).RunType, (*listPtr).ProcessModuleType ); - - if ( initType == oam::COLD_STANDBY ) - { - //there is a mate active, skip - (*listPtr).state = oam::COLD_STANDBY; - requestStatus = API_SUCCESS; - //sleep(1); - break; - } - - //start the process again - pid_t processID = startProcess((*listPtr).ProcessModuleType, - (*listPtr).ProcessName, - (*listPtr).ProcessLocation, - (*listPtr).ProcessArgs, - (*listPtr).launchID, - (*listPtr).BootLaunch, - (*listPtr).RunType, - (*listPtr).DepProcessName, - (*listPtr).DepModuleName, - (*listPtr).LogFile, - initType); - - // StorageManager doesn't send the "I'm online" msg to Proc*. - // Just mark it active for now. TODO: make it use the ping fcn in IDB* instead. - if (listPtr->ProcessName == "StorageManager") - oam.setProcessStatus("StorageManager", boost::get<0>(oam.getModuleInfo()), - oam::ACTIVE, listPtr->processID); - - if ( processID > oam::API_MAX ) - processID = oam::API_SUCCESS; - - requestStatus = processID; - - } - - break; - } - } - - if (listPtr == aPtr->end()) - { - log.writeLog(__LINE__, "ERROR: No such process: " + processName, LOG_TYPE_ERROR ); - requestStatus = API_FAILURE; - } - - ackMsg << (ByteStream::byte) ACK; - ackMsg << (ByteStream::byte) RESTART; - ackMsg << (ByteStream::byte) requestStatus; - mq.write(ackMsg); - - log.writeLog(__LINE__, "RESTART: ACK back to ProcMgr, return status = " + oam.itoa((int) requestStatus)); - - break; - } - - case PROCREINITPROCESS: - { - msg >> processName; - msg >> manualFlag; - - log.writeLog(__LINE__, "MSG RECEIVED: Re-Init process request on: " + processName); - - if ( processName == "cpimport" ) - { - system("pkill -sighup cpimport"); - for (int i=0; i < 10; i++) - { - //get pid - char buf[512]; - FILE *cmd_pipe = popen("pidof -s cpimport", "r"); - - fgets(buf, 512, cmd_pipe); - pid_t pid = strtoul(buf, NULL, 10); - - pclose( cmd_pipe ); - - if (pid) - sleep(2); - else - break; - } - // kill other processes - system("pkill -9 cpimport.bin"); - } - else - { - processList::iterator listPtr; - processList* aPtr = config.monitoredListPtr(); - listPtr = aPtr->begin(); - - for (; listPtr != aPtr->end(); ++listPtr) - { - if ((*listPtr).ProcessName == processName) - { - if ( (*listPtr).processID <= 1 ) - { - log.writeLog(__LINE__, "ERROR: process not active", LOG_TYPE_DEBUG ); - break; - } - - reinitProcess((*listPtr).processID, (*listPtr).ProcessName, actIndicator); - break; - } - } - - if (listPtr == aPtr->end()) - { - log.writeLog(__LINE__, "ERROR: No such process: " + processName, LOG_TYPE_ERROR ); - } - } - - log.writeLog(__LINE__, "PROCREINITPROCESS: completed, no ack to ProcMgr"); - break; - } - - case STOPALL: - { - msg >> manualFlag; - log.writeLog(__LINE__, "MSG RECEIVED: Stop All process request..."); - - if ( actIndicator == STATUS_UPDATE ) - { - //check and send notification - MonitorConfig config; - - if ( config.moduleType() == "um" ) - oam.sendDeviceNotification(config.moduleName(), START_UM_DOWN); - else if ( gOAMParentModuleFlag ) - oam.sendDeviceNotification(config.moduleName(), START_PM_MASTER_DOWN); - else if (gOAMStandbyModuleFlag) - oam.sendDeviceNotification(config.moduleName(), START_PM_STANDBY_DOWN); - else - oam.sendDeviceNotification(config.moduleName(), START_PM_COLD_DOWN); - - ackMsg << (ByteStream::byte) ACK; - ackMsg << (ByteStream::byte) STOPALL; - ackMsg << (ByteStream::byte) API_SUCCESS; - mq.write(ackMsg); - - log.writeLog(__LINE__, "STOPALL: ACK back to ProcMgr, STATUS_UPDATE only performed"); - break; - } - - //get local module run-type - string runType = oam::LOADSHARE; //default - - try - { - ModuleTypeConfig moduletypeconfig; - oam.getSystemConfig(config.moduleType(), moduletypeconfig); - runType = moduletypeconfig.RunType; - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR); - } - catch (...) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR); - } - - //Loop reversely through the process list, stop all processes - processList* aPtr = config.monitoredListPtr(); - processList::reverse_iterator rPtr; - uint16_t rtnCode; - int requestStatus = API_SUCCESS; - - for (rPtr = aPtr->rbegin(); rPtr != aPtr->rend(); ++rPtr) - { - if ( (*rPtr).BootLaunch == INIT_LAUNCH) - //skip - continue; - - if ( (*rPtr).BootLaunch == BOOT_LAUNCH && - gOAMParentModuleFlag ) - if ( actIndicator != INSTALL ) - //skip - continue; - - // update local process state here so monitor thread doesn't jump on it - if ( manualFlag ) - { - (*rPtr).state = oam::MAN_OFFLINE; - (*rPtr).dieCounter = 0; - } - else - (*rPtr).state = oam::AUTO_OFFLINE; - - rtnCode = stopProcess((*rPtr).processID, (*rPtr).ProcessName, (*rPtr).ProcessLocation, actIndicator, manualFlag); - - if (rtnCode) - // error in stopping a process - requestStatus = API_FAILURE; - else - (*rPtr).processID = 0; - } - - //reset BRM locks and clearShm - if ( requestStatus == oam::API_SUCCESS ) - { - string logdir("/var/log/mariadb/columnstore"); - - if (access(logdir.c_str(), W_OK) != 0) logdir = tmpLogDir; - - string cmd = "reset_locks > " + logdir + "/reset_locks.log1 2>&1"; - system(cmd.c_str()); - log.writeLog(__LINE__, "BRM reset_locks script run", LOG_TYPE_DEBUG); - - if ( !gOAMParentModuleFlag ) - { - cmd = "clearShm -c > /dev/null 2>&1"; - rtnCode = system(cmd.c_str()); - - if (WEXITSTATUS(rtnCode) != 1) - { - log.writeLog(__LINE__, "Successfully ran DBRM clearShm", LOG_TYPE_DEBUG); - } - else - log.writeLog(__LINE__, "Error running DBRM clearShm", LOG_TYPE_ERROR); - } - - //stop the mysqld daemon - try - { - oam.actionMysqlCalpont(MYSQL_STOP); - log.writeLog(__LINE__, "Stop MySQL Process", LOG_TYPE_DEBUG); - } - catch (...) - {} - - //send down notification - oam.sendDeviceNotification(config.moduleName(), MODULE_DOWN); - - //setModule status to offline - if ( manualFlag ) - { - try - { - oam.setModuleStatus(config.moduleName(), oam::MAN_OFFLINE); - } - catch (exception& ex) - { - string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on setModuleStatus: " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on setModuleStatus: Caught unknown exception!", LOG_TYPE_ERROR); - } - } - else - { - try - { - oam.setModuleStatus(config.moduleName(), oam::AUTO_OFFLINE); - } - catch (exception& ex) - { - string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on setModuleStatus: " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on setModuleStatus: Caught unknown exception!", LOG_TYPE_ERROR); - } - } - } - else - { - //setModule status to failed - try - { - oam.setModuleStatus(config.moduleName(), oam::FAILED); - } - catch (exception& ex) - { - string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on setModuleStatus: " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on setModuleStatus: Caught unknown exception!", LOG_TYPE_ERROR); - } - } - - - if ( config.moduleType() == "pm" ) - { - //go unmount disk NOT assigned to this pm - unmountExtraDBroots(); - } - - ackMsg << (ByteStream::byte) ACK; - ackMsg << (ByteStream::byte) STOPALL; - ackMsg << (ByteStream::byte) requestStatus; - mq.write(ackMsg); - - log.writeLog(__LINE__, "STOPALL: ACK back to ProcMgr, return status = " + oam.itoa((int) requestStatus)); - - break; - } - - case STARTALL: - { - msg >> manualFlag; - int requestStatus = oam::API_SUCCESS; - log.writeLog(__LINE__, "MSG RECEIVED: Start All process request..."); - - //start the mysqld daemon - - try - { - oam.actionMysqlCalpont(MYSQL_START); - } - catch (...) - { - // mysqld didn't start, return with error - // mysql didn't start, return with error - log.writeLog(__LINE__, "STARTALL: MySQL failed to start, start-module failure", LOG_TYPE_CRITICAL); - - ackMsg << (ByteStream::byte) ACK; - ackMsg << (ByteStream::byte) STARTALL; - ackMsg << (ByteStream::byte) oam::API_FAILURE; - mq.write(ackMsg); - - try - { - oam.setProcessStatus("mysqld", config.moduleName(), oam::FAILED, 0); - } - catch (...) - {} - - log.writeLog(__LINE__, "STARTALL: ACK back to ProcMgr, return status = " + oam.itoa((int) oam::API_FAILURE)); - - break; - } - - if ( config.moduleType() == "pm" ) - { - //setup DBRoot mounts - createDataDirs(cloud); - int ret = checkDataMount(); - - if (ret != oam::API_SUCCESS) - { - int ret_status = oam::API_FAILURE; - - log.writeLog(__LINE__, "checkDataMount error, startmodule failed", LOG_TYPE_CRITICAL); - - ackMsg << (ByteStream::byte) ACK; - ackMsg << (ByteStream::byte) STARTALL; - ackMsg << (ByteStream::byte) ret_status; - mq.write(ackMsg); - - log.writeLog(__LINE__, "STARTALL: ACK back to ProcMgr, return status = " + oam.itoa((int) oam::API_FAILURE)); - - break; - } - } - - //Loop through all Process belong to this module - processList::iterator listPtr; - processList* aPtr = config.monitoredListPtr(); - listPtr = aPtr->begin(); - requestStatus = API_SUCCESS; - - //launched any processes controlled by ProcMon that aren't active - for (; listPtr != aPtr->end(); ++listPtr) - { - if ( (*listPtr).BootLaunch != BOOT_LAUNCH) - continue; - - int opState = oam::ACTIVE; - bool degraded; - oam.getModuleStatus(config.moduleName(), opState, degraded); - - if (opState == oam::FAILED) - { - requestStatus = oam::API_FAILURE; - break; - } - - //check the process current status & start the requested process - if ((*listPtr).state == oam::MAN_OFFLINE || - (*listPtr).state == oam::AUTO_OFFLINE || - (*listPtr).state == oam::COLD_STANDBY || - (*listPtr).state == oam::INITIAL) - { - - //Check for SIMPLEX runtype processes - int initType = checkSpecialProcessState( (*listPtr).ProcessName, (*listPtr).RunType, (*listPtr).ProcessModuleType ); - - if ( initType == oam::COLD_STANDBY ) - { - //there is a mate active, skip - (*listPtr).state = oam::COLD_STANDBY; - requestStatus = API_SUCCESS; - //sleep(1); - continue; - } - - pid_t processID = startProcess((*listPtr).ProcessModuleType, - (*listPtr).ProcessName, - (*listPtr).ProcessLocation, - (*listPtr).ProcessArgs, - (*listPtr).launchID, - (*listPtr).BootLaunch, - (*listPtr).RunType, - (*listPtr).DepProcessName, - (*listPtr).DepModuleName, - (*listPtr).LogFile, - initType); - - // StorageManager doesn't send the "I'm online" msg to Proc*. - // Just mark it active for now. TODO: make it use the ping fcn in IDB* instead. - if (listPtr->ProcessName == "StorageManager") - oam.setProcessStatus("StorageManager", boost::get<0>(oam.getModuleInfo()), - oam::ACTIVE, processID); - - if ( processID > oam::API_MAX ) - { - processID = oam::API_SUCCESS; - } - - requestStatus = processID; - - if ( requestStatus != oam::API_SUCCESS ) - { - // error in launching a process - break; - } - -// sleep(1); - } - } - - if ( requestStatus == oam::API_SUCCESS ) - { - //launched any processes controlled by ProcMgr - for (listPtr = aPtr->begin(); listPtr != aPtr->end(); ++listPtr) - { - if ((*listPtr).BootLaunch != MGR_LAUNCH) - continue; - - int opState = oam::ACTIVE; - bool degraded; - oam.getModuleStatus(config.moduleName(), opState, degraded); - - if (opState == oam::FAILED) - { - requestStatus = oam::API_FAILURE; - break; - } - - //check the process current status & start the requested process - if ((*listPtr).state == oam::MAN_OFFLINE || - (*listPtr).state == oam::AUTO_OFFLINE || - (*listPtr).state == oam::COLD_STANDBY || - (*listPtr).state == oam::INITIAL) - { - - //Check for SIMPLEX runtype processes - int initType = checkSpecialProcessState( (*listPtr).ProcessName, (*listPtr).RunType, (*listPtr).ProcessModuleType ); - - if ( initType == oam::COLD_STANDBY ) - { - //there is a mate active, skip - (*listPtr).state = oam::COLD_STANDBY; - requestStatus = API_SUCCESS; - //sleep(1); - continue; - } - - pid_t processID = startProcess((*listPtr).ProcessModuleType, - (*listPtr).ProcessName, - (*listPtr).ProcessLocation, - (*listPtr).ProcessArgs, - (*listPtr).launchID, - (*listPtr).BootLaunch, - (*listPtr).RunType, - (*listPtr).DepProcessName, - (*listPtr).DepModuleName, - (*listPtr).LogFile, - initType); - - // StorageManager doesn't send the "I'm online" msg to Proc*. - // Just mark it active for now. TODO: make it use the ping fcn in IDB* instead. - if (listPtr->ProcessName == "StorageManager") - oam.setProcessStatus("StorageManager", boost::get<0>(oam.getModuleInfo()), - oam::ACTIVE, processID); - - if ( processID > oam::API_MAX ) - processID = oam::API_SUCCESS; - - requestStatus = processID; - - if ( requestStatus != oam::API_SUCCESS ) - { - // error in launching a process - if ( requestStatus == oam::API_FAILURE && - (*listPtr).RunType == SIMPLEX) - checkModuleFailover((*listPtr).ProcessName); - else - break; - } - else - { - //run startup test script to perform basic DB sanity testing - if ( gOAMParentModuleFlag - && (*listPtr).ProcessName == "DBRMWorkerNode" - && opState != oam::MAN_INIT ) - { - if ( runStartupTest() != oam::API_SUCCESS ) - { - requestStatus = oam::API_FAILURE_DB_ERROR; - break; - } - } - } - -// sleep(2); - } - else - { - // if DBRMWorkerNode and ACTIVE, run runStartupTest - if ( gOAMParentModuleFlag - && (*listPtr).ProcessName == "DBRMWorkerNode" - && (*listPtr).state == oam::ACTIVE - && opState != oam::MAN_INIT ) - { - if ( runStartupTest() != oam::API_SUCCESS ) - { - requestStatus = oam::API_FAILURE_DB_ERROR; - break; - } - } - } - } - } - - if ( requestStatus == oam::API_SUCCESS ) - { - - //check and send noitification - MonitorConfig config; - - if ( config.moduleType() == "um" ) - oam.sendDeviceNotification(config.moduleName(), UM_ACTIVE); - else if ( gOAMParentModuleFlag ) - oam.sendDeviceNotification(config.moduleName(), PM_MASTER_ACTIVE); - else if (gOAMStandbyModuleFlag) - oam.sendDeviceNotification(config.moduleName(), PM_STANDBY_ACTIVE); - else - oam.sendDeviceNotification(config.moduleName(), PM_COLD_ACTIVE); - } - - ackMsg << (ByteStream::byte) ACK; - ackMsg << (ByteStream::byte) STARTALL; - ackMsg << (ByteStream::byte) requestStatus; - mq.write(ackMsg); - - log.writeLog(__LINE__, "STARTALL: ACK back to ProcMgr, return status = " + oam.itoa((int) requestStatus)); - - break; - } - - case SHUTDOWNMODULE: - { - msg >> manualFlag; - log.writeLog(__LINE__, "MSG RECEIVED: Shutdown Module request..."); - - //Loop reversely thorugh the process list - - /* processList* aPtr = config.monitoredListPtr(); - processList::reverse_iterator rPtr; - uint16_t rtnCode; - - for (rPtr = aPtr->rbegin(); rPtr != aPtr->rend(); ++rPtr) - { - // don't shut yourself or ProcessManager down" - if ((*rPtr).ProcessName == "ProcessMonitor" || (*rPtr).ProcessName == "ProcessManager") - continue; - - // update local process state - if ( manualFlag ) - (*rPtr).state = oam::MAN_OFFLINE; - else - (*rPtr).state = oam::AUTO_OFFLINE; - - rtnCode = stopProcess((*rPtr).processID, (*rPtr).ProcessName, (*rPtr).ProcessLocation, actIndicator, manualFlag); - if (rtnCode) - log.writeLog(__LINE__, "Process cannot be stopped:" + (*rPtr).ProcessName, LOG_TYPE_DEBUG); - else - (*rPtr).processID = 0; - } - - //send down notification - oam.sendDeviceNotification(config.moduleName(), MODULE_DOWN); - - //stop the mysqld daemon and then columnstore - try { - oam.actionMysqlCalpont(MYSQL_STOP); - } - catch(...) - {} - */ - ackMsg << (ByteStream::byte) ACK; - ackMsg << (ByteStream::byte) SHUTDOWNMODULE; - ackMsg << (ByteStream::byte) API_SUCCESS; - mq.write(ackMsg); - - log.writeLog(__LINE__, "SHUTDOWNMODULE: ACK back to ProcMgr, return status = " + oam.itoa((int) API_SUCCESS)); - - //sleep to give time for process-manager to finish up - sleep(5); - string cmd = "columnstore stop > /dev/null 2>&1"; - system(cmd.c_str()); - exit (0); - - break; - } - - - default: - break; - } //end of switch - - break; - } - - case PROCUPDATELOG: - { - string action; - string level; - - msg >> action; - msg >> level; - - log.writeLog(__LINE__, "MSG RECEIVED: " + action + " logging at level " + level); - - uint16_t rtnCode; - int requestStatus = API_SUCCESS; - - rtnCode = updateLog(action, level); - - if (rtnCode) - // error in updating log - requestStatus = API_FAILURE; - - ackMsg << (ByteStream::byte) ACK; - ackMsg << (ByteStream::byte) PROCUPDATELOG; - ackMsg << (ByteStream::byte) requestStatus; - mq.write(ackMsg); - - log.writeLog(__LINE__, "PROCUPDATELOG: ACK back to ProcMgr, return status = " + oam.itoa((int) requestStatus)); - - break; - } - - case PROCGETCONFIGLOG: - { - log.writeLog(__LINE__, "MSG RECEIVED: Get Module Log Configuration data"); - - int16_t requestStatus = getConfigLog(); - - ackMsg << (ByteStream::byte) ACK; - ackMsg << (ByteStream::byte) PROCGETCONFIGLOG; - ackMsg << (ByteStream::byte) requestStatus; - mq.write(ackMsg); - - log.writeLog(__LINE__, "PROCGETCONFIGLOG: ACK back to ProcMgr, return status = " + oam.itoa((int) requestStatus)); - - break; - } - - case CHECKPOWERON: - { - log.writeLog(__LINE__, "MSG RECEIVED: Check Power-On Test results log file"); - checkPowerOnResults(); - - break; - } - - case PROCUPDATECONFIG: - { - log.writeLog(__LINE__, "MSG RECEIVED: Update Process Configuration"); - - int16_t requestStatus = updateConfig(); - - ackMsg << (ByteStream::byte) ACK; - ackMsg << (ByteStream::byte) PROCUPDATECONFIG; - ackMsg << (ByteStream::byte) requestStatus; - mq.write(ackMsg); - - log.writeLog(__LINE__, "PROCUPDATECONFIG: ACK back to ProcMgr, return status = " + oam.itoa((int) requestStatus)); - - break; - } - - case PROCBUILDSYSTEMTABLES: - { - log.writeLog(__LINE__, "MSG RECEIVED: Check and Build System Tables"); - - int16_t requestStatus = buildSystemTables(); - - ackMsg << (ByteStream::byte) ACK; - ackMsg << (ByteStream::byte) PROCBUILDSYSTEMTABLES; - ackMsg << (ByteStream::byte) requestStatus; - mq.write(ackMsg); - - log.writeLog(__LINE__, "PROCBUILDSYSTEMTABLES: ACK back to ProcMgr, return status = " + oam.itoa((int) requestStatus)); - - break; - } - - case LOCALHEARTBEAT: - { - ackMsg << (ByteStream::byte) ACK; - ackMsg << (ByteStream::byte) LOCALHEARTBEAT; - ackMsg << (ByteStream::byte) API_SUCCESS; - mq.write(ackMsg); - break; - } - - case CONFIGURE: - { - log.writeLog(__LINE__, "MSG RECEIVED: Configure Module"); - string configureModuleName; - msg >> configureModuleName; - - int requestStatus = API_SUCCESS; - - configureModule(configureModuleName); - - ackMsg << (ByteStream::byte) ACK; - ackMsg << (ByteStream::byte) CONFIGURE; - ackMsg << (ByteStream::byte) requestStatus; - mq.write(ackMsg); - - log.writeLog(__LINE__, "CONFIGURE: ACK back to ProcMgr, return status = " + oam.itoa((int) requestStatus)); - - //set startmodule flag - startProcMon = true; - break; - } - - case RECONFIGURE: - { - log.writeLog(__LINE__, "MSG RECEIVED: Reconfigure Module"); - string reconfigureModuleName; - msg >> reconfigureModuleName; - - uint16_t rtnCode; - int requestStatus = API_SUCCESS; - - //validate that I should be receiving this message - if ( config.moduleType() != "um" && - config.moduleType() != "pm" ) - requestStatus = oam::API_FAILURE; - else - { - if ( config.moduleType() == "um" && - reconfigureModuleName.find("pm") == string::npos ) - requestStatus = oam::API_FAILURE; - else - { - if ( config.moduleType() == "pm" && - reconfigureModuleName.find("um") == string::npos ) - requestStatus = oam::API_FAILURE; - else - { - rtnCode = reconfigureModule(reconfigureModuleName); - - if (rtnCode) - // error in updating log - requestStatus = rtnCode; - } - } - } - - // install mysqld rpms if being reconfigured as a um - if ( reconfigureModuleName.find("um") != string::npos ) - { - string cmd = "post-mysqld-install >> " + tmpLogDir + "/rpminstall"; - system(cmd.c_str()); - cmd = "post-mysql-install >> " + tmpLogDir + "/rpminstall"; - system(cmd.c_str()); - int ret = system("systemctl cat mariadb.service > /dev/null 2>&1"); - if (!ret) - { - cmd = "systemctl start mariadb.service > " + tmpLogDir + "/mysqldstart"; - system(cmd.c_str()); - } - else - { - cmd = "/usr/bin/mysqld_safe & > " + tmpLogDir + "/mysqldstart"; - system(cmd.c_str()); - } - - string tmpFile = tmpLogDir + "/mysqldstart"; - ifstream file (tmpFile.c_str()); - - if (!file) - { - requestStatus = oam::API_FAILURE; - log.writeLog(__LINE__, "RECONFIGURE: mysqld failed to start", LOG_TYPE_ERROR); - } - else - { - char line[200]; - string buf; - int count = 0; - - while (file.getline(line, 200)) - { - buf = line; - - if ( buf.find("OK", 0) != string::npos ) - count++; - } - - file.close(); - - if (count == 0) - { - requestStatus = oam::API_FAILURE; - log.writeLog(__LINE__, "RECONFIGURE: mysqld failed to start", LOG_TYPE_ERROR); - } - else - log.writeLog(__LINE__, "RECONFIGURE: install started mysqld"); - } - } - - ackMsg << (ByteStream::byte) ACK; - ackMsg << (ByteStream::byte) RECONFIGURE; - ackMsg << (ByteStream::byte) requestStatus; - mq.write(ackMsg); - - log.writeLog(__LINE__, "RECONFIGURE: ACK back to ProcMgr, return status = " + oam.itoa((int) requestStatus)); - - //now exit so Process Monitor can restart and reinitialzation as the New module type - if ( requestStatus != oam::API_FAILURE ) - { - log.writeLog(__LINE__, "RECONFIGURE: ProcMon exiting so it can restart as new module type", LOG_TYPE_DEBUG); -// sleep(1); - - exit(1); - } - - break; - } - - case GETSOFTWAREINFO: - { - log.writeLog(__LINE__, "MSG RECEIVED: Get Calpont Software Info"); - - ackMsg << (ByteStream::byte) ACK; - ackMsg << (ByteStream::byte) GETSOFTWAREINFO; - ackMsg << config.SoftwareVersion() + config.SoftwareRelease(); - mq.write(ackMsg); - - log.writeLog(__LINE__, "GETSOFTWAREINFO: ACK back to ProcMgr with " + config.SoftwareVersion() + config.SoftwareRelease()); - - break; - } - - case UPDATEPARENTNFS: - { - string IPAddress; - - msg >> IPAddress; - - log.writeLog(__LINE__, "MSG RECEIVED: Update fstab file with new Parent OAM IP of " + IPAddress); - - int requestStatus = API_SUCCESS; - - ackMsg << (ByteStream::byte) ACK; - ackMsg << (ByteStream::byte) UPDATEPARENTNFS; - ackMsg << (ByteStream::byte) requestStatus; - mq.write(ackMsg); - - log.writeLog(__LINE__, "UPDATEPARENTNFS: ACK back to ProcMgr"); - - break; - } - - case OAMPARENTACTIVE: - { - log.writeLog(__LINE__, "MSG RECEIVED: OAM Parent Activate"); - - runStandby = false; - - log.writeLog(__LINE__, "Running Active", LOG_TYPE_INFO); - - //give time for Status Control thread to start reading incoming messages - sleep(3); - - ackMsg << (ByteStream::byte) ACK; - ackMsg << (ByteStream::byte) OAMPARENTACTIVE; - ackMsg << (ByteStream::byte) API_SUCCESS; - mq.write(ackMsg); - - log.writeLog(__LINE__, "OAMPARENTACTIVE: ACK back to ProcMgr"); - - MonitorConfig config; - break; - } - - case UPDATECONFIGFILE: - { - log.writeLog(__LINE__, "MSG RECEIVED: Update Calpont Config file"); - - (void)updateConfigFile(msg); - - log.writeLog(__LINE__, "UPDATECONFIGFILE: Completed"); - - MonitorConfig config; - break; - } - - case GETPARENTOAMMODULE: - { - log.writeLog(__LINE__, "MSG RECEIVED: Get Parent OAM Module"); - - MonitorConfig config; - ackMsg << (ByteStream::byte) ACK; - ackMsg << (ByteStream::byte) GETPARENTOAMMODULE; - ackMsg << config.OAMParentName(); - mq.write(ackMsg); - - log.writeLog(__LINE__, "GETPARENTOAMMODULE: ACK back with " + config.OAMParentName()); - - break; - } - - case OAMPARENTCOLD: - { - log.writeLog(__LINE__, "MSG RECEIVED: OAM Parent Standby "); - - runStandby = true; - - // delete any old active alarm log file - unlink ("/var/log/mariadb/columnstore/activeAlarms"); - - log.writeLog(__LINE__, "Running Standby", LOG_TYPE_INFO); - //give time for Status Control thread to start reading incoming messages - sleep(3); - - ackMsg << (ByteStream::byte) ACK; - ackMsg << (ByteStream::byte) OAMPARENTCOLD; - ackMsg << (ByteStream::byte) API_SUCCESS; - mq.write(ackMsg); - - log.writeLog(__LINE__, "OAMPARENTCOLD: ACK back to ProcMgr"); - - MonitorConfig config; - break; - } - - case PROCUNMOUNT: - { - string dbrootID; - msg >> dbrootID; - - log.writeLog(__LINE__, "MSG RECEIVED: Unmount DBRoot: " + dbrootID); - - //Flush the cache - cacheutils::flushPrimProcCache(); - cacheutils::dropPrimProcFdCache(); - flushInodeCache(); - - int return_status = API_SUCCESS; - - if (DataRedundancyConfig == "n") - { - int retry = 1; - - string tmpUmount = tmpLogDir + "/umount.log"; - for ( ; retry < 5 ; retry++) - { - string cmd = "export LC_ALL=C;" + SUDO + "umount /var/lib/columnstore/data" + dbrootID + " > " + tmpUmount + " 2>&1"; - - system(cmd.c_str()); - - return_status = API_SUCCESS; - - if (!oam.checkLogStatus(tmpUmount, "busy")) - break; - - cmd = "lsof /var/lib/columnstore/data" + dbrootID + " >> " + tmpUmount + " 2>&1"; - system(cmd.c_str()); - cmd = "fuser -muvf /var/lib/columnstore/data" + dbrootID + " >> " + tmpUmount + " 2>&1"; - system(cmd.c_str()); - - sleep(2); - //Flush the cache - cacheutils::flushPrimProcCache(); - cacheutils::dropPrimProcFdCache(); - flushInodeCache(); - } - - if ( retry >= 5 ) - { - log.writeLog(__LINE__, "unmount failed, device busy, DBRoot: " + dbrootID, LOG_TYPE_ERROR); - return_status = API_FAILURE; - string cmd = "mv -f " + tmpUmount + " " + tmpUmount + "failed"; - system(cmd.c_str()); - } - } - else - { - try - { - int ret = glusterUnassign(dbrootID); - - if ( ret != 0 ) - log.writeLog(__LINE__, "Error unassigning gluster dbroot# " + dbrootID, LOG_TYPE_ERROR); - else - log.writeLog(__LINE__, "Gluster unassign gluster dbroot# " + dbrootID); - } - catch (...) - { - log.writeLog(__LINE__, "Exception unassigning gluster dbroot# " + dbrootID, LOG_TYPE_ERROR); - } - } - - ackMsg << (ByteStream::byte) ACK; - ackMsg << (ByteStream::byte) PROCUNMOUNT; - ackMsg << (ByteStream::byte) return_status; - mq.write(ackMsg); - - log.writeLog(__LINE__, "PROCUNMOUNT: ACK back to ProcMgr, status: " + oam.itoa(return_status)); - - break; - } - - case PROCMOUNT: - { - string dbrootID; - msg >> dbrootID; - - log.writeLog(__LINE__, "MSG RECEIVED: Mount DBRoot: " + dbrootID);; - - int return_status = API_SUCCESS; - - if (DataRedundancyConfig == "n") - { - string tmpMount = tmpLogDir + "/mount.log"; - string cmd = SUDO + "export LC_ALL=C;" + SUDO + "mount /var/lib/columnstore/data" + dbrootID + " > " + tmpMount + "2>&1"; - system(cmd.c_str()); - - if ( !rootUser ) - { - cmd = SUDO + "chown -R " + USER + ":" + USER + " /var/lib/columnstore/data" + dbrootID + " > /dev/null 2>&1"; - system(cmd.c_str()); - } - - return_status = API_SUCCESS; - ifstream in(tmpMount.c_str()); - - in.seekg(0, std::ios::end); - int size = in.tellg(); - - if ( size != 0 ) - { - if (!oam.checkLogStatus(tmpMount, "already")) - { - log.writeLog(__LINE__, "mount failed, DBRoot: " + dbrootID, LOG_TYPE_ERROR); - return_status = API_FAILURE; - string cmd = "mv -f " + tmpMount + " " + tmpMount + "failed"; - system(cmd.c_str()); - } - } - } - - ackMsg << (ByteStream::byte) ACK; - ackMsg << (ByteStream::byte) PROCMOUNT; - ackMsg << (ByteStream::byte) return_status; - mq.write(ackMsg); - - log.writeLog(__LINE__, "PROCMOUNT: ACK back to ProcMgr, status: " + oam.itoa(return_status)); - - break; - } - - case PROCFSTABUPDATE: - { - string entry; - msg >> entry; - - //check if entry already exist in /etc/fstab - string cmd = "grep " + entry + " /etc/fstab /dev/null 2>&1"; - int status = system(cmd.c_str()); - - if (WEXITSTATUS(status) != 0 ) - { - //chmod before update, used on amazon ami EBS. not other systems - system("sudo chmod 666 /etc/fstab"); - - cmd = "echo " + entry + " >> /etc/fstab"; - system(cmd.c_str()); - - log.writeLog(__LINE__, "Add line entry to /etc/fstab : " + entry); - } - - //check if entry already exist in ../local/etc/pm1/fstab - cmd = "grep " + entry + "/var/lib/columnstore/local/etc/pm1/fstab /dev/null 2>&1"; - status = system(cmd.c_str()); - - if (WEXITSTATUS(status) != 0 ) - { - cmd = "echo " + entry + " >> /var/lib/columnstore/local/etc/pm1/fstab"; - - system(cmd.c_str()); - - log.writeLog(__LINE__, "Add line entry to /var/lib/columnstore/local/etc/pm1/fstab : " + entry); - } - - //mkdir on entry directory - string::size_type pos = entry.find(" ", 0); - string::size_type pos1 = entry.find(" ", pos + 1); - string directory = entry.substr(pos + 1, pos1 - pos); - - cmd = "mkdir " + directory; - - system(cmd.c_str()); - log.writeLog(__LINE__, "create directory: " + directory); - - ackMsg << (ByteStream::byte) ACK; - ackMsg << (ByteStream::byte) PROCFSTABUPDATE; - ackMsg << (ByteStream::byte) API_SUCCESS; - mq.write(ackMsg); - - log.writeLog(__LINE__, "PROCFSTABUPDATE: ACK back to ProcMgr"); - - break; - } - - case MASTERREP: - { - log.writeLog(__LINE__, "MSG RECEIVED: Run Master Replication script "); - - string masterLogFile = oam::UnassignedName; - string masterLogPos = oam::UnassignedName; - - if ( ( (PMwithUM == "n") && (config.moduleType() == "pm") ) && - ( config.ServerInstallType() != oam::INSTALL_COMBINE_DM_UM_PM) ) - { - ackMsg << (ByteStream::byte) ACK; - ackMsg << (ByteStream::byte) MASTERREP; - ackMsg << (ByteStream::byte) oam::API_FAILURE; - ackMsg << masterLogFile; - ackMsg << masterLogPos; - mq.write(ackMsg); - - log.writeLog(__LINE__, "MASTERREP: Error PM invalid msg - ACK back to ProcMgr return status = " + oam.itoa((int) oam::API_FAILURE)); - break; - } - - //change local my.cnf file - int ret; - int retry; - - for ( retry = 0 ; retry < 3 ; retry++ ) - { - ret = changeMyCnf("master"); - - if ( ret == oam::API_FAILURE ) - { - ackMsg << (ByteStream::byte) ACK; - ackMsg << (ByteStream::byte) MASTERREP; - ackMsg << (ByteStream::byte) ret; - ackMsg << masterLogFile; - ackMsg << masterLogPos; - mq.write(ackMsg); - - log.writeLog(__LINE__, "MASTERREP: Error in changeMyCnf - ACK back to ProcMgr return status = " + oam.itoa((int) ret)); - break; - } - - // run Master Rep script - ret = runMasterRep(masterLogFile, masterLogPos); - - if ( ret == API_FAILURE ) - { - log.writeLog(__LINE__, "MASTERREP: runMasterRep failure, retry", LOG_TYPE_ERROR); - sleep(5); - continue; - } - else - break; - } - - if ( retry >= 3 ) - log.writeLog(__LINE__, "MASTERREP: runMasterRep failure", LOG_TYPE_CRITICAL); - else - runDisableRep(); //disable slave on new master - - ackMsg << (ByteStream::byte) ACK; - ackMsg << (ByteStream::byte) MASTERREP; - ackMsg << (ByteStream::byte) ret; - ackMsg << masterLogFile; - ackMsg << masterLogPos; - mq.write(ackMsg); - - log.writeLog(__LINE__, "MASTERREP: ACK back to ProcMgr return status = " + oam.itoa((int) ret)); - - break; - } - - case SLAVEREP: - { - log.writeLog(__LINE__, "MSG RECEIVED: Run Slave Replication script "); - - string masterLogFile; - msg >> masterLogFile; - string masterLogPos; - msg >> masterLogPos; - - if ( ( (PMwithUM == "n") && (config.moduleType() == "pm") ) && - ( config.ServerInstallType() != oam::INSTALL_COMBINE_DM_UM_PM) ) - { - ackMsg << (ByteStream::byte) ACK; - ackMsg << (ByteStream::byte) SLAVEREP; - ackMsg << (ByteStream::byte) oam::API_FAILURE; - mq.write(ackMsg); - - log.writeLog(__LINE__, "SLAVEREP: Error PM invalid msg - ACK back to ProcMgr return status = " + oam.itoa((int) oam::API_FAILURE)); - break; - } - - //change local my.cnf file - int ret = changeMyCnf("slave"); - - if ( ret == oam::API_FAILURE ) - { - ackMsg << (ByteStream::byte) ACK; - ackMsg << (ByteStream::byte) SLAVEREP; - ackMsg << (ByteStream::byte) ret; - mq.write(ackMsg); - - log.writeLog(__LINE__, "SLAVEREP: Error in changeMyCnf - ACK back to ProcMgr return status = " + oam.itoa((int) ret)); - break; - } - - // run Slave Rep script - ret = runSlaveRep(masterLogFile, masterLogPos); - - ackMsg << (ByteStream::byte) ACK; - ackMsg << (ByteStream::byte) SLAVEREP; - ackMsg << (ByteStream::byte) ret; - mq.write(ackMsg); - - log.writeLog(__LINE__, "SLAVEREP: ACK back to ProcMgr return status = " + oam.itoa((int) ret)); - - break; - } - - case MASTERDIST: - { - log.writeLog(__LINE__, "MSG RECEIVED: Run Master DB Distribute command "); - - string password; - msg >> password; - string module; - msg >> module; - - if ( ( (PMwithUM == "n") && (config.moduleType() == "pm") ) && - ( config.ServerInstallType() != oam::INSTALL_COMBINE_DM_UM_PM) ) - { - ackMsg << (ByteStream::byte) ACK; - ackMsg << (ByteStream::byte) MASTERDIST; - ackMsg << (ByteStream::byte) oam::API_FAILURE; - mq.write(ackMsg); - - log.writeLog(__LINE__, "MASTERDIST: runMasterRep - ACK back to ProcMgr return status = " + oam.itoa((int) oam::API_FAILURE)); - } - - if ( password == oam::UnassignedName ) - password = "ssh"; - - // run Master Dist - int ret = runMasterDist(password, module); - - ackMsg << (ByteStream::byte) ACK; - ackMsg << (ByteStream::byte) MASTERDIST; - ackMsg << (ByteStream::byte) ret; - mq.write(ackMsg); - - log.writeLog(__LINE__, "MASTERDIST: runMasterRep - ACK back to ProcMgr return status = " + oam.itoa((int) ret)); - - break; - } - - case DISABLEREP: - { - log.writeLog(__LINE__, "MSG RECEIVED: Disable MySQL Replication "); - - //change local my.cnf file - int ret = changeMyCnf("disable"); - - // run Disable rep - ret = runDisableRep(); - - ackMsg << (ByteStream::byte) ACK; - ackMsg << (ByteStream::byte) DISABLEREP; - ackMsg << (ByteStream::byte) ret; - mq.write(ackMsg); - - log.writeLog(__LINE__, "DISABLEREP: ACK back to ProcMgr return status = " + oam.itoa((int) ret)); - - break; - } - - case PROCGLUSTERASSIGN: - { - string dbrootID; - msg >> dbrootID; - - log.writeLog(__LINE__, "MSG RECEIVED: Gluster Assign DBRoot: " + dbrootID); - - // run Master Dist - int ret = glusterAssign(dbrootID); - - ackMsg << (ByteStream::byte) ACK; - ackMsg << (ByteStream::byte) PROCGLUSTERASSIGN; - ackMsg << (ByteStream::byte) ret; - mq.write(ackMsg); - - log.writeLog(__LINE__, "PROCGLUSTERASSIGN: ACK back to ProcMgr return status = " + oam.itoa((int) ret)); - - break; - } - - case PROCGLUSTERUNASSIGN: - { - string dbrootID; - msg >> dbrootID; - - log.writeLog(__LINE__, "MSG RECEIVED: Gluster Unassign DBRoot: " + dbrootID); - - // run Master Dist - int ret = glusterUnassign(dbrootID); - - ackMsg << (ByteStream::byte) ACK; - ackMsg << (ByteStream::byte) PROCGLUSTERUNASSIGN; - ackMsg << (ByteStream::byte) ret; - mq.write(ackMsg); - - log.writeLog(__LINE__, "PROCGLUSTERUNASSIGN: ACK back to ProcMgr return status = " + oam.itoa((int) ret)); - - break; - } - - case SYNCFSALL: - { - log.writeLog(__LINE__, "MSG RECEIVED: SYNC FileSystem..."); - int requestStatus = API_SUCCESS; - requestStatus = syncFS(); - if (requestStatus == API_SUCCESS) - { - ackMsg << (ByteStream::byte) ACK; - ackMsg << (ByteStream::byte) SYNCFSALL; - ackMsg << (ByteStream::byte) API_SUCCESS; - } - else - { - ackMsg << (ByteStream::byte) ACK; - ackMsg << (ByteStream::byte) SYNCFSALL; - ackMsg << (ByteStream::byte) API_FAILURE; - } - mq.write(ackMsg); - - log.writeLog(__LINE__, "SYNCFSALL: ACK back to ProcMgr, return status = " + oam.itoa((int) API_SUCCESS)); - break; - } - - default: - break; - } //end of switch - - return; -} - -/****************************************************************************************** -* @brief stopProcess -* -* purpose: stop a process -* -******************************************************************************************/ -int ProcessMonitor::stopProcess(pid_t processID, std::string processName, std::string processLocation, int actionIndicator, bool manualFlag) -{ - int status; - MonitorLog log; - Oam oam; - - log.writeLog(__LINE__, "STOPPING Process: " + processName, LOG_TYPE_DEBUG); - - sendAlarm(processName, PROCESS_INIT_FAILURE, CLEAR); - - if ( manualFlag ) - { - // Mark the process offline - updateProcessInfo(processName, oam::MAN_OFFLINE, 0); - - // Set the alarm - sendAlarm(processName, PROCESS_DOWN_MANUAL, SET); - - } - else - { - // Mark the process offline - updateProcessInfo(processName, oam::AUTO_OFFLINE, 0); - - // Set the alarm - sendAlarm(processName, PROCESS_DOWN_AUTO, SET); - } - - // bypass if pid = 0 - if ( processID == 0 ) - status = API_SUCCESS; - else - { - // XXXPAT: StorageManager shouldn't be killed with KILL, or there's a chance of data corruption. - // once we minimize that chance, we could allow KILL to be sent. - if (actionIndicator == GRACEFUL || processName == "StorageManager") - { - status = kill(processID, SIGTERM); - } - else - { - status = kill(processID, SIGKILL); - } - - // processID not found, set as success - if ( errno == ESRCH) - status = API_SUCCESS; - - if ( status != API_SUCCESS) - { - status = errno; - log.writeLog(__LINE__, "Failure to stop Process: " + processName + ", error = " + oam.itoa(errno), LOG_TYPE_ERROR ); - } - } - - // sending sigkill to StorageManager right after sigterm would not allow it to - // exit gracefully. This will wait until StorageManager goes down to prevent - // weirdness that I suspect will happen if we combine a slow connection with a restart - // command. - if (processName == "StorageManager" && processID != 0) - { - while (status == API_SUCCESS) - { - sleep(1); - ostringstream os; - os << "Waiting for StorageManager to exit gracefully... pid is " << processID; - log.writeLog(__LINE__, os.str(), LOG_TYPE_DEBUG); - status = kill(processID, SIGTERM); - break; - } - - return API_SUCCESS; - } - - //now do a pkill on process just to make sure all is clean - string::size_type pos = processLocation.find("bin/", 0); - string procName = processLocation.substr(pos + 4, 15) + "\\*"; - string cmd = "pkill -9 " + procName; - system(cmd.c_str()); - log.writeLog(__LINE__, "Pkill Process just to make sure: " + procName, LOG_TYPE_DEBUG); - - return status; -} - -/****************************************************************************************** -* @brief startProcess -* -* purpose: Start a process -* -******************************************************************************************/ -pid_t ProcessMonitor::startProcess(string processModuleType, string processName, string processLocation, - string arg_list[MAXARGUMENTS], uint16_t launchID, uint16_t BootLaunch, - string RunType, string DepProcessName[MAXDEPENDANCY], - string DepModuleName[MAXDEPENDANCY], string LogFile, uint16_t initType, uint16_t actIndicator) -{ - // Compiler complains about non-initialiased variable here. - pid_t newProcessID = 0; - char* argList[MAXARGUMENTS]; - unsigned int i = 0; - MonitorLog log; - MonitorConfig currentConfig; - unsigned int numAugs = 0; - Oam oam; - SystemProcessStatus systemprocessstatus; - ProcessStatus processstatus; - Config *cs_config = Config::makeConfig(); - string DBRootStorageType = cs_config->getConfig("Installation", "DBRootStorageType"); - - log.writeLog(__LINE__, "STARTING Process: " + processName, LOG_TYPE_DEBUG); - log.writeLog(__LINE__, "Process location: " + processLocation, LOG_TYPE_DEBUG); - - //check process location - if (access(processLocation.c_str(), X_OK) != 0) - { - log.writeLog(__LINE__, "Process location: " + processLocation + " not found", LOG_TYPE_ERROR); - - //record the process information into processList - config.buildList(processModuleType, processName, processLocation, arg_list, - launchID, newProcessID, FAILED, BootLaunch, RunType, - DepProcessName, DepModuleName, LogFile); - - //Update Process Status: Mark Process INIT state - updateProcessInfo(processName, FAILED, newProcessID); - - return oam::API_FAILURE; - } - - //check process dependency - if (DepProcessName[i].length() != 0) - { - for (int i = 0; i < MAXDEPENDANCY; i++) - { - //Get dependent process status - if (DepProcessName[i].length() == 0) - { - break; - } - - //check for System wild card on Module Name - if ( DepModuleName[i] == "*" ) - { - //check for all accurrances of this Module Name - try - { - oam.getProcessStatus(systemprocessstatus); - - for ( unsigned int j = 0 ; j < systemprocessstatus.processstatus.size(); j++) - { - if ( systemprocessstatus.processstatus[j].ProcessName == DepProcessName[i] ) - { - - log.writeLog(__LINE__, "Dependent process of " + DepProcessName[i] + "/" + systemprocessstatus.processstatus[j].Module + " is " + oam.itoa(systemprocessstatus.processstatus[j].ProcessOpState), LOG_TYPE_DEBUG); - - int opState = oam::ACTIVE; - bool degraded; - oam.getModuleStatus(systemprocessstatus.processstatus[j].Module, opState, degraded); - - if (opState == oam::MAN_DISABLED || opState == oam::AUTO_DISABLED || - opState == oam::AUTO_OFFLINE) - continue; - - if (systemprocessstatus.processstatus[j].ProcessOpState != oam::ACTIVE ) - { - log.writeLog(__LINE__, "Dependent Process is not in correct state, Failed Restoral", LOG_TYPE_DEBUG); - return oam::API_MINOR_FAILURE; - } - } - } - } - catch (exception& ex) - { -// string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: " + error, LOG_TYPE_ERROR); - return oam::API_MINOR_FAILURE; - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: Caught unknown exception!", LOG_TYPE_ERROR); - return oam::API_MINOR_FAILURE; - } - } - else - { - //check for a Module Type wildcard - if ( DepModuleName[i].find("*") != string::npos) - { - string moduleName = DepModuleName[i].substr(0, MAX_MODULE_TYPE_SIZE); - - try - { - oam.getProcessStatus(systemprocessstatus); - - for ( unsigned int j = 0 ; j < systemprocessstatus.processstatus.size(); j++) - { - if ( systemprocessstatus.processstatus[j].ProcessName == DepProcessName[i] - && systemprocessstatus.processstatus[j].Module.find(moduleName, 0) != string::npos) - { - - log.writeLog(__LINE__, "Dependent process of " + DepProcessName[i] + "/" + systemprocessstatus.processstatus[j].Module + " is " + oam.itoa(systemprocessstatus.processstatus[j].ProcessOpState), LOG_TYPE_DEBUG); - - int opState = oam::ACTIVE; - bool degraded; - oam.getModuleStatus(systemprocessstatus.processstatus[j].Module, opState, degraded); - - if (opState == oam::MAN_DISABLED || opState == oam::AUTO_DISABLED || - opState == oam::AUTO_OFFLINE) - continue; - - if (systemprocessstatus.processstatus[j].ProcessOpState != oam::ACTIVE ) - { - log.writeLog(__LINE__, "Dependent Process is not in correct state, Failed Restoral", LOG_TYPE_DEBUG); - return oam::API_MINOR_FAILURE; - } - } - } - } - catch (exception& ex) - { -// string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: " + error, LOG_TYPE_ERROR); - return oam::API_MINOR_FAILURE; - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: Caught unknown exception!", LOG_TYPE_ERROR); - return oam::API_MINOR_FAILURE; - } - } - else - { - //check for a Current Module wildcard - if ( DepModuleName[i] == "@") - { - int state = oam::ACTIVE;; - - try - { - ProcessStatus procstat; - oam.getProcessStatus(DepProcessName[i], - config.moduleName(), - procstat); - state = procstat.ProcessOpState; - } - catch (exception& ex) - { -// string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: " + error, LOG_TYPE_ERROR); - return oam::API_MINOR_FAILURE; - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: Caught unknown exception!", LOG_TYPE_ERROR); - return oam::API_MINOR_FAILURE; - } - - log.writeLog(__LINE__, "Dependent process of " + DepProcessName[i] + "/" + config.moduleName() + " is " + oam.itoa(state), LOG_TYPE_DEBUG); - - if (state == oam::FAILED) - { - log.writeLog(__LINE__, "Dependent Process is FAILED state, Hard Failed Restoral", LOG_TYPE_DEBUG); - - //record the process information into processList - config.buildList(processModuleType, processName, processLocation, arg_list, - launchID, newProcessID, FAILED, BootLaunch, RunType, - DepProcessName, DepModuleName, LogFile); - - //Update Process Status: Mark Process INIT state - updateProcessInfo(processName, FAILED, newProcessID); - - return oam::API_FAILURE; - } - - if (state != oam::ACTIVE) - { - log.writeLog(__LINE__, "Dependent Process is not in correct state, Failed Restoral", LOG_TYPE_DEBUG); - return oam::API_MINOR_FAILURE; - } - } - else - { - // specific module name and process dependency - int state = oam::ACTIVE; - - try - { - ProcessStatus procstat; - oam.getProcessStatus(DepProcessName[i], - DepModuleName[i], - procstat); - state = procstat.ProcessOpState; - } - catch (exception& ex) - { -// string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: " + error, LOG_TYPE_ERROR); - return oam::API_MINOR_FAILURE; - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: Caught unknown exception!", LOG_TYPE_ERROR); - return oam::API_MINOR_FAILURE; - } - - log.writeLog(__LINE__, "Dependent process of " + DepProcessName[i] + "/" + DepModuleName[i] + " is " + oam.itoa(state), LOG_TYPE_DEBUG); - - int opState = oam::ACTIVE; - bool degraded; - oam.getModuleStatus(DepModuleName[i], opState, degraded); - - if (opState == oam::MAN_DISABLED || opState == oam::AUTO_DISABLED || - opState == oam::AUTO_OFFLINE) - continue; - - if (state != oam::ACTIVE) - { - log.writeLog(__LINE__, "Dependent Process is not in correct state, Failed Restoral", LOG_TYPE_DEBUG); - return oam::API_MINOR_FAILURE; - } - } - } - } - }//end of FOR - } - - for (i = 0; i < MAXARGUMENTS - 1; i++) - { - if (arg_list[i].length() == 0) - break; - - //check if workernode argument, if so setup argument #2 as the slave ID for this module - string::size_type pos = arg_list[i].find("DBRM_Worker", 0); - - if (pos != string::npos) - { - try - { - int slavenodeID = oam.getLocalDBRMID(config.moduleName()); - arg_list[i] = "DBRM_Worker" + oam.itoa(slavenodeID); - log.writeLog(__LINE__, "getLocalDBRMID Worker Node ID = " + oam.itoa(slavenodeID), LOG_TYPE_DEBUG); - } - catch (...) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on getLocalDBRMID: no DBRM for module", LOG_TYPE_ERROR); - - //record the process information into processList - config.buildList(processModuleType, processName, processLocation, arg_list, - launchID, newProcessID, FAILED, BootLaunch, RunType, - DepProcessName, DepModuleName, LogFile); - - //Update Process Status: Mark Process INIT state - updateProcessInfo(processName, FAILED, newProcessID); - - return oam::API_FAILURE; - } - } - - - argList[i] = new char[arg_list[i].length() + 1]; - - strcpy(argList[i], arg_list[i].c_str()) ; -// log.writeLog(__LINE__, "Arg list "); -// log.writeLog(__LINE__, argList[i]); - numAugs++; - } - - argList[i] = NULL; - - //run load-brm script before brm processes started - if ( actIndicator != oam::GRACEFUL) - { - if ( ( gOAMParentModuleFlag && processName == "DBRMControllerNode") || - ( !gOAMParentModuleFlag && processName == "DBRMWorkerNode") ) - { - string DBRMDir; -// string tempDBRMDir = startup::StartUp::installDir() + "/data/dbrm"; - - // get DBRMroot config setting - string DBRMroot; - oam.getSystemConfig("DBRMRoot", DBRMroot); - - string::size_type pos = DBRMroot.find("/BRM_saves", 0); - - if (pos != string::npos) - //get directory path - DBRMDir = DBRMroot.substr(0, pos); - else - { - log.writeLog(__LINE__, "Error: /BRM_saves not found in DBRMRoot config setting", LOG_TYPE_CRITICAL); - - //record the process information into processList - config.buildList(processModuleType, processName, processLocation, arg_list, - launchID, newProcessID, FAILED, BootLaunch, RunType, - DepProcessName, DepModuleName, LogFile); - - //Update Process Status: Mark Process INIT state - updateProcessInfo(processName, FAILED, newProcessID); - - return oam::API_FAILURE; - } - - //create dbrm directory, just to make sure its there - string cmd = "mkdir -p " + DBRMDir + " > /dev/null 2>&1"; - system(cmd.c_str()); - - // if Non Parent OAM Module, get the dbmr data from Parent OAM Module - if ( !gOAMParentModuleFlag && !HDFS) - { - - //create temp dbrm directory -// string cmd = "mkdir " + tempDBRMDir + " > /dev/null 2>&1"; -// system(cmd.c_str()); - - //setup softlink for editem on the 'um' or shared-nothing non active pm - /* if( config.moduleType() == "um" || - (config.moduleType() == "pm") ) { - cmd = "mv -f " + DBRMDir + " /root/ > /dev/null 2>&1"; - system(cmd.c_str()); - - cmd = "ln -s " + tempDBRMDir + " " + DBRMDir + " > /dev/null 2>&1"; - system(cmd.c_str()); - } - */ - //change DBRMDir to temp DBRMDir -// DBRMDir = tempDBRMDir; - - // remove all files for temp directory -// cmd = "rm -f " + DBRMDir + "/*"; -// system(cmd.c_str()); - - // go request files from parent OAM module - if ( getDBRMdata(&DBRMDir) != oam::API_SUCCESS ) - { - log.writeLog(__LINE__, "Error: getDBRMdata failed", LOG_TYPE_ERROR); - sendAlarm("DBRM", DBRM_LOAD_DATA_ERROR, SET); - return oam::API_MINOR_FAILURE; - } - // DBRMDir might have changed, so need to change DBRMroot - bf::path tmp(DBRMroot); - tmp = tmp.filename(); - DBRMroot = (bf::path(DBRMDir) / tmp).string(); - - sendAlarm("DBRM", DBRM_LOAD_DATA_ERROR, CLEAR); - // change DBRMroot to temp DBRMDir path -// DBRMroot = tempDBRMDir + "/BRM_saves"; - } - - - // - // run the 'load_brm' script first if files exist - // - string loadScript = "load_brm"; - - string fileName = DBRMroot + "_current"; - - ssize_t fileSize = IDBPolicy::size(fileName.c_str()); - boost::scoped_ptr oldFile(IDBDataFile::open( - IDBPolicy::getType(fileName.c_str(), - IDBPolicy::WRITEENG), - fileName.c_str(), "r", 0)); - - if (oldFile && fileSize > 0) - { - char line[200] = {0}; - oldFile->pread(line, 0, fileSize - 1); // skip the \n - line[fileSize] = '\0'; // not necessary, but be sure. - // MCOL-1558 - the _current file is now relative to DBRMRoot - string dbrmFile; - if (line[0] == '/') // handle absolute paths (saved by an old version) - dbrmFile = line; - else - dbrmFile = DBRMroot.substr(0, DBRMroot.find_last_of('/') + 1) + line; - -// if ( !gOAMParentModuleFlag ) { - -// string::size_type pos = dbrmFile.find("/BRM_saves",0); -// if (pos != string::npos) -// dbrmFile = tempDBRMDir + dbrmFile.substr(pos,80);; -// } - - string logdir("/var/log/mariadb/columnstore"); - - if (access(logdir.c_str(), W_OK) != 0) logdir = tmpLogDir; - - string cmd = "reset_locks > " + logdir + "/reset_locks.log1 2>&1"; - system(cmd.c_str()); - log.writeLog(__LINE__, "BRM reset_locks script run", LOG_TYPE_DEBUG); - - cmd = "clearShm -c > /dev/null 2>&1"; - system(cmd.c_str()); - log.writeLog(__LINE__, "Clear Shared Memory script run", LOG_TYPE_DEBUG); - - cmd = loadScript + " " + dbrmFile + " > " + logdir + "/load_brm.log1 2>&1"; - log.writeLog(__LINE__, loadScript + " cmd = " + cmd, LOG_TYPE_DEBUG); - system(cmd.c_str()); - - cmd = logdir + "/load_brm.log1"; - - if (oam.checkLogStatus(cmd, "OK")) - log.writeLog(__LINE__, "Successfully return from " + loadScript, LOG_TYPE_DEBUG); - else - { - log.writeLog(__LINE__, "Error return DBRM " + loadScript, LOG_TYPE_ERROR); - sendAlarm("DBRM", DBRM_LOAD_DATA_ERROR, SET); - - //record the process information into processList - config.buildList(processModuleType, processName, processLocation, arg_list, - launchID, 0, FAILED, BootLaunch, RunType, - DepProcessName, DepModuleName, LogFile); - - //Update Process Status: Mark Process FAILED state - updateProcessInfo(processName, FAILED, 0); - - return oam::API_FAILURE; - } - - // now delete the dbrm data from local disk - if ( !gOAMParentModuleFlag && !HDFS && DataRedundancyConfig == "n") - { - IDBFileSystem &fs = IDBPolicy::getFs(DBRMDir); - fs.remove(DBRMDir.c_str()); - log.writeLog(__LINE__, "removed downloaded DBRM files at " + DBRMDir, LOG_TYPE_DEBUG); - - #if 0 - string cmd = "rm -f " + DBRMDir + "/*"; - system(cmd.c_str()); - log.writeLog(__LINE__, "removed DBRM file with command: " + cmd, LOG_TYPE_DEBUG); - #endif - } - } - else - log.writeLog(__LINE__, "No DBRM files exist, must be a initial startup", LOG_TYPE_DEBUG); - } - - sendAlarm("DBRM", DBRM_LOAD_DATA_ERROR, CLEAR); - } - - //do a pkill on process just to make sure there is no rouge version running - string::size_type pos = processLocation.find("bin/", 0); - string procName = processLocation.substr(pos + 4, 15) + "\\*"; - string cmd = "pkill -9 " + procName; - system(cmd.c_str()); - log.writeLog(__LINE__, "Pkill Process just to make sure: " + procName, LOG_TYPE_DEBUG); - - //Update Process Status: Mark Process INIT state - updateProcessInfo(processName, initType, 0); - - //sleep, give time for INIT state to be update, prevent race condition with ACTIVE - sleep(1); - - //check and setup for logfile - time_t now; - now = time(NULL); - struct tm tm; - localtime_r(&now, &tm); - char timestamp[200]; - strftime (timestamp, 200, "%m:%d:%y-%H:%M:%S", &tm); - - string logdir("/var/log/mariadb/columnstore"); - - if (access(logdir.c_str(), W_OK) != 0) logdir = tmpLogDir; - - string outFileName = logdir + "/" + processName + ".out"; - string errFileName = logdir + "/" + processName + ".err"; - - string saveoutFileName = outFileName + "." + timestamp + ".log1"; - string saveerrFileName = errFileName + "." + timestamp + ".log1"; - - if ( LogFile == "off" ) - { - string cmd = "mv " + outFileName + " " + saveoutFileName + " > /dev/null 2>&1"; - system(cmd.c_str()); - cmd = "mv " + errFileName + " " + saveerrFileName + " > /dev/null 2>&1"; - system(cmd.c_str()); - } - else - { - string cmd = "mv " + outFileName + " " + saveoutFileName + " > /dev/null 2>&1"; - system(cmd.c_str()); - cmd = "mv " + errFileName + " " + saveerrFileName + " > /dev/null 2>&1"; - system(cmd.c_str()); - } - - //fork and exec new process - newProcessID = fork(); - - if (newProcessID != 0) - { - // - // parent processing - // - - if ( newProcessID == -1) - { - log.writeLog(__LINE__, "New Process ID = -1, failed StartProcess", LOG_TYPE_DEBUG); - return oam::API_MINOR_FAILURE; - } - - //FYI - NEEDS TO STAY HERE TO HAVE newProcessID - - //record the process information into processList - config.buildList(processModuleType, processName, processLocation, arg_list, - launchID, newProcessID, initType, BootLaunch, RunType, - DepProcessName, DepModuleName, LogFile); - - //Update Process Status: Update PID - updateProcessInfo(processName, PID_UPDATE, newProcessID); - - log.writeLog(__LINE__, processName + " PID is " + oam.itoa(newProcessID), LOG_TYPE_DEBUG); - - sendAlarm(processName, PROCESS_DOWN_MANUAL, CLEAR); - sendAlarm(processName, PROCESS_DOWN_AUTO, CLEAR); - sendAlarm(processName, PROCESS_INIT_FAILURE, CLEAR); - - //give time to get status updates from process before starting next process - if ( processName == "DBRMWorkerNode" || processName == "ExeMgr" || processName == "DDLProc") - sleep(3); - else - { - if ( config.ServerInstallType() == oam::INSTALL_COMBINE_DM_UM_PM ) - { - if ( processName == "PrimProc" || processName == "WriteEngineServer") - sleep(3); - } - else - { - if ( (PMwithUM == "y") && processName == "PrimProc" ) - sleep(3); - } - } - - for (i = 0; i < numAugs; i++) - { - if (strlen(argList[i]) == 0) - break; - - delete [] argList[i]; - } - } - else - { - // - //child processing - // - - //Close all files opened by parent process - for (int i = 0; i < sysconf(_SC_OPEN_MAX); i++) - { - close(i); - } - - { - int fd; - fd = open("/dev/null", O_RDONLY); - - if (fd != 0) - { - dup2(fd, 0); - close(fd); - } - - if ( LogFile == "off" ) - { - fd = open("/dev/null", O_WRONLY); //Should be fd 1 - - if ( fd != 1 ) - { - dup2(fd, 1); - close(fd); - } - - fd = open("/dev/null", O_WRONLY); //Should be fd 2 - - if ( fd != 2 ) - { - dup2(fd, 2); - close(fd); - } - } - else - { - // open STDOUT & STDERR to log file -// log.writeLog(__LINE__, "STDOUT to " + outFileName, LOG_TYPE_DEBUG); - fd = open(outFileName.c_str(), O_CREAT | O_WRONLY, 0644); - - if (fd != 1) - { - dup2(fd, 1); - close(fd); - } - -// log.writeLog(__LINE__, "STDERR to " + errFileName, LOG_TYPE_DEBUG); - fd = open(errFileName.c_str(), O_CREAT | O_WRONLY, 0644); - - if (fd != 2) - { - dup2(fd, 2); - close(fd); - } - } - } - - //give time to get INIT status updated in shared memory - sleep(1); - execv(processLocation.c_str(), argList); - - //record the process information into processList - config.buildList(processModuleType, processName, processLocation, arg_list, - launchID, newProcessID, FAILED, BootLaunch, RunType, - DepProcessName, DepModuleName, LogFile); - - //Update Process Status: Mark Process INIT state - updateProcessInfo(processName, FAILED, newProcessID); - - return (oam::API_FAILURE); - } - - return newProcessID; -} - -/****************************************************************************************** -* @brief reinitProcess -* -* purpose: re-Init a process -* -******************************************************************************************/ -int ProcessMonitor::reinitProcess(pid_t processID, std::string processName, int actionIndicator) -{ - MonitorLog log; - - log.writeLog(__LINE__, "REINITTING Process: " + processName, LOG_TYPE_DEBUG); - - kill(processID, SIGHUP); - - return API_SUCCESS; -} - -/****************************************************************************************** -* @brief stopAllProcess -* -* purpose: Stop all processes started by this monitor -* -******************************************************************************************/ -int stopAllProcess(int actionIndicator) -{ - int i; - - if (actionIndicator == GRACEFUL) - { - i = kill(0, SIGTERM); - - } - else - { - i = kill(0, SIGKILL); - } - - if ( i != API_SUCCESS) - { - i = errno; - } - - return i; -} - -/****************************************************************************************** -* @brief sendMessage -* -* purpose: send message to the monitored process or the process manager -* -******************************************************************************************/ -int ProcessMonitor::sendMessage(const string& toWho, const string& message) -{ - int i = 0; - return i; -} - -/****************************************************************************************** -* @brief checkHeartBeat -* -* purpose: check child process heart beat -* -******************************************************************************************/ -int ProcessMonitor::checkHeartBeat(const string processName) -{ - int i = 0; - return i; -} - -/****************************************************************************************** -* @brief sendAlarm -* -* purpose: send a trap and log the process information -* -******************************************************************************************/ -void ProcessMonitor::sendAlarm(string alarmItem, ALARMS alarmID, int action) -{ - MonitorLog log; - Oam oam; - -// cout << "sendAlarm" << endl; -// cout << alarmItem << endl; -// cout << oam.itoa(alarmID) << endl; -// cout << oam.itoa(action) << endl; - - - sendAlarmInfo_t* t1 = new sendAlarmInfo_t; - *t1 = boost::make_tuple(alarmItem, alarmID, action); - - pthread_t SendAlarmThread; - int status = pthread_create (&SendAlarmThread, NULL, (void* (*)(void*)) &sendAlarmThread, t1); - - if ( status != 0 ) - log.writeLog(__LINE__, "SendAlarmThread: pthread_create failed, return status = " + oam.itoa(status), LOG_TYPE_ERROR); - - return; -} - - - -/****************************************************************************************** -* @brief sendAlarmThread -* -* purpose: send a trap and log the process information -* -******************************************************************************************/ -void* sendAlarmThread(sendAlarmInfo_t* t) -{ - MonitorLog log; - Oam oam; - ALARMManager alarmMgr; - - pthread_mutex_lock(&ALARM_LOCK); - - string alarmItem = boost::get<0>(*t); - ALARMS alarmID = boost::get<1>(*t); - int action = boost::get<2>(*t); - - //valid alarmID - if ( alarmID < 1 || alarmID > oam::MAX_ALARM_ID ) - { - log.writeLog(__LINE__, "sendAlarmThread error: Invalid alarm ID", LOG_TYPE_DEBUG); - - delete t; - pthread_mutex_unlock(&ALARM_LOCK); - - pthread_exit(0); - } - - if ( action == SET ) - { - log.writeLog(__LINE__, "Send SET Alarm ID " + oam.itoa(alarmID) + " on device " + alarmItem, LOG_TYPE_DEBUG); - } - else - { - log.writeLog(__LINE__, "Send CLEAR Alarm ID " + oam.itoa(alarmID) + " on device " + alarmItem, LOG_TYPE_DEBUG); - } - -// cout << "sendAlarmThread" << endl; -// cout << alarmItem << endl; -// cout << oam.itoa(alarmID) << endl; -// cout << oam.itoa(action) << endl; - - try - { - alarmMgr.sendAlarmReport(alarmItem.c_str(), alarmID, action); - } - catch (...) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on sendAlarmReport", LOG_TYPE_ERROR ); - } - - delete t; - pthread_mutex_unlock(&ALARM_LOCK); - - pthread_exit(0); -} - -/****************************************************************************************** -* @brief updateProcessInfo -* -* purpose: Send msg to update process state and status change time on disk -* -******************************************************************************************/ -bool ProcessMonitor::updateProcessInfo(std::string processName, int state, pid_t PID) -{ - MonitorLog log; - Oam oam; - - log.writeLog(__LINE__, "StatusUpdate of Process " + processName + " State = " + oam.itoa(state) + " PID = " + oam.itoa(PID), LOG_TYPE_DEBUG); - - sendProcessInfo_t* t1 = new sendProcessInfo_t; - *t1 = boost::make_tuple(processName, state, PID); - - // if state is offline, use thread for faster results - if ( state == oam::MAN_OFFLINE || state == oam::AUTO_OFFLINE ) - { - pthread_t SendProcessThread; - int status = pthread_create (&SendProcessThread, NULL, (void* (*)(void*)) &sendProcessThread, t1); - - if ( status != 0 ) - log.writeLog(__LINE__, "SendProcessThread: pthread_create failed, return status = " + oam.itoa(status), LOG_TYPE_ERROR); - } - else - { - sendProcessThread(t1); - } - - return true; -} - -/****************************************************************************************** -* @brief sendProcessThread -* -* purpose: Send msg to update process state and status change time on disk -* -******************************************************************************************/ -void* sendProcessThread(sendProcessInfo_t* t) -{ - MonitorLog log; - MonitorConfig config; - Oam oam; - -// pthread_mutex_lock(&PROCESS_LOCK); - - string processName = boost::get<0>(*t); - int state = boost::get<1>(*t); - pid_t PID = boost::get<2>(*t); - - try - { - oam.setProcessStatus(processName, config.moduleName(), state, PID); - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "EXCEPTION ERROR on setProcessStatus: " + error, LOG_TYPE_ERROR); - } - catch (...) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on setProcessStatus: Caught unknown exception!", LOG_TYPE_ERROR ); - } - - delete t; -// pthread_mutex_unlock(&PROCESS_LOCK); - - return NULL; -} - -/****************************************************************************************** -* @brief updateLog -* -* purpose: Enable/Disable Logging configuration within the syslog.conf file -* -* action: LOG_ENABLE / LOG_DISABLE -* level: all, critical, error, warning, info, debug, data -******************************************************************************************/ -int ProcessMonitor::updateLog(std::string action, std::string level) -{ - MonitorLog log; - Oam oam; - - struct flock fl; - int fd; - - string fileName; - oam.getSystemConfig("SystemLogConfigFile", fileName); - - if (fileName == oam::UnassignedName ) - { - // unassigned - log.writeLog(__LINE__, "ERROR: syslog file not configured ", LOG_TYPE_ERROR ); - return -1; - } - - string::size_type pos = fileName.find("syslog-ng", 0); - - if (pos != string::npos) - { - // not supported - log.writeLog(__LINE__, "ERROR: config file not support, " + fileName, LOG_TYPE_ERROR ); - return -1; - } - - bool syslog7 = false; - pos = fileName.find("49", 0); - - if (pos != string::npos) - { - syslog7 = true; - } - - vector lines; - - if ( level == "data" ) - return 0; - - ifstream oldFile (fileName.c_str()); - - if (!oldFile ) - { - // no file found - log.writeLog(__LINE__, "ERROR: syslog file not found at " + fileName, LOG_TYPE_ERROR ); - return -1; - } - - //if non-root, change file permissions so we can update it - if ( !rootUser) - { - string cmd = "chmod 666 " + fileName + " > /dev/null 2>&1"; - system(cmd.c_str()); - } - - char line[200]; - string buf; - bool restart = true; - - if ( action == oam::ENABLEDSTATE ) - { - // check if enabling all log levels - if ( level == "all") - { - vector fileIDs; - - while (oldFile.getline(line, 200)) - { - buf = line; - - for ( int i = 0;; i++) - { - string localLogFile = oam::LogFile[i]; - - if (syslog7) - localLogFile = oam::LogFile7[i]; - - if ( localLogFile == "" ) - { - // end of list - break; - } - - string logFile = localLogFile; - - pos = buf.find(logFile, 0); - - if (pos != string::npos) - { - // log file already there, save fileID - fileIDs.push_back(i); - break; - } - } - - // output to temp file - lines.push_back(buf); - } //end of while - - // check which fileIDs aren't in the syslog.conf - bool update = false; - - for ( int i = 0;; i++) - { - bool found = false; - string localLogFile = oam::LogFile[i]; - - if (syslog7) - localLogFile = oam::LogFile7[i]; - - if ( localLogFile == "" ) - { - // end of list - break; - } - - vector::iterator p = fileIDs.begin(); - - while ( p != fileIDs.end() ) - { - if ( i == *p ) - { - //already there - found = true; - break; - } - - p++; - } - - if (!found) - { - lines.push_back(localLogFile); - log.writeLog(__LINE__, "Add in syslog.conf log file " + localLogFile, LOG_TYPE_DEBUG); - update = true; - } - } - - if (!update) - { - log.writeLog(__LINE__, "Log level file's already in syslog.conf", LOG_TYPE_DEBUG); - restart = false; - } - } - else - { - // enable a specific level - // get log file for level - for ( int i = 0;; i++) - { - if ( oam::LogLevel[i] == "" ) - { - // end of list - log.writeLog(__LINE__, "ERROR: log level file not found for level " + level, LOG_TYPE_ERROR ); - oldFile.close(); - return -1; - } - - if ( level == oam::LogLevel[i] ) - { - // match found - string localLogFile = oam::LogFile[i]; - - if (syslog7) - localLogFile = oam::LogFile7[i]; - - string logFile = localLogFile; - - while (oldFile.getline(line, 200)) - { - buf = line; - string::size_type pos = buf.find(logFile, 0); - - if (pos != string::npos) - { - log.writeLog(__LINE__, "Log level file already in syslog.conf", LOG_TYPE_DEBUG); - restart = false; - } - - // output to temp file - lines.push_back(buf); - } - - // file not found, add at the bottom of syslog.conf - lines.push_back(logFile); - break; - } - } - } - } - else - { - // DISABLE LOG - // check if disabling all log levels - if ( level == "all") - { - bool update = false; - - while (oldFile.getline(line, 200)) - { - buf = line; - bool found = false; - - for ( int i = 0;; i++) - { - string localLogFile = oam::LogFile[i]; - - if (syslog7) - localLogFile = oam::LogFile7[i]; - - if ( localLogFile == "" ) - { - // end of list - break; - } - - string logFile = localLogFile; - - pos = buf.find(logFile, 0); - - if (pos != string::npos) - { - // log file found - found = true; - update = true; - break; - } - } - - if (!found) - // output to temp file - lines.push_back(buf); - } //end of while - - if (!update) - { - log.writeLog(__LINE__, "No Log level file's in syslog.conf", LOG_TYPE_DEBUG); - restart = false; - } - } - else - { - // disable a specific level - // get log file for level - for ( int i = 0;; i++) - { - if ( oam::LogLevel[i] == "" ) - { - // end of list - log.writeLog(__LINE__, "ERROR: log level file not found for level " + level, LOG_TYPE_ERROR ); - oldFile.close(); - return -1; - } - - if ( level == oam::LogLevel[i] ) - { - // match found - string localLogFile = oam::LogFile[i]; - - if (syslog7) - localLogFile = oam::LogFile7[i]; - - string logFile = localLogFile; - bool found = false; - - while (oldFile.getline(line, 200)) - { - buf = line; - string::size_type pos = buf.find(logFile, 0); - - if (pos != string::npos) - { - // file found, don't push into new file - log.writeLog(__LINE__, "Log level file to DISABLE found in syslog.conf", LOG_TYPE_DEBUG); - found = true; - } - else - { - // no match, push into new temp file - lines.push_back(buf); - } - } - - if (found) - break; - else - { - log.writeLog(__LINE__, "Log level file not in syslog.conf", LOG_TYPE_DEBUG); - restart = false; - } - } - } - } - } - - oldFile.close(); - - // - // go write out new file if required - // - - if (restart) - { - unlink (fileName.c_str()); - ofstream newFile (fileName.c_str()); - - memset(&fl, 0, sizeof(fl)); - fl.l_type = F_RDLCK; // read lock - fl.l_whence = SEEK_SET; - fl.l_start = 0; - fl.l_len = 0; //lock whole file - - // create new file - if ((fd = open(fileName.c_str(), O_RDWR | O_CREAT, 0644)) >= 0) - { - // lock file - - if (fcntl(fd, F_SETLKW, &fl) != 0) - { - ostringstream oss; - oss << "ProcessMonitor::updateLog: error locking file " << - fileName << - ": " << - strerror(errno) << - ", proceding anyway."; - cerr << oss.str() << endl; - } - - copy(lines.begin(), lines.end(), ostream_iterator(newFile, "\n")); - newFile.close(); - - fl.l_type = F_UNLCK; //unlock - fcntl(fd, F_SETLK, &fl); - - close(fd); - - oam.syslogAction("restart"); - } - else - { - ostringstream oss; - oss << "ProcessMonitor::updateLog: error opening file " << - fileName << - ": " << - strerror(errno); - throw runtime_error(oss.str()); - } - } - - //update file priviledges - changeModLog(); - - return 0; -} - -/****************************************************************************************** -* @brief changeModLog -* -* purpose: Updates the file mods so files can be read/write -* from external modules -* -******************************************************************************************/ -void ProcessMonitor::changeModLog() -{ - for ( int i = 0;; i++) - { - if ( oam::LogFile[i].empty() ) - //end of list - break; - - string logFile = oam::LogFile[i]; - string::size_type pos = logFile.find('/', 0); - logFile = logFile.substr(pos, 200); - string cmd = "chmod 755 " + logFile + " > /dev/null 2>&1"; - - system(cmd.c_str()); - } - - return; -} - -/****************************************************************************************** -* @brief getConfigLog -* -* purpose: Get Logging configuration within the syslog.conf file -* -******************************************************************************************/ -int ProcessMonitor::getConfigLog() -{ - MonitorLog log; - Oam oam; - - string fileName; - oam.getSystemConfig("SystemLogConfigFile", fileName); - - if (fileName == oam::UnassignedName ) - { - // unassigned - log.writeLog(__LINE__, "ERROR: syslog file not configured ", LOG_TYPE_ERROR ); - return API_FAILURE; - } - - string::size_type pos = fileName.find("syslog-ng", 0); - - if (pos != string::npos) - { - // not supported - log.writeLog(__LINE__, "ERROR: config file not support, " + fileName, LOG_TYPE_ERROR ); - return API_FAILURE; - } - - ifstream oldFile (fileName.c_str()); - - if (!oldFile) - { - // no file found - log.writeLog(__LINE__, "ERROR: syslog file not found at " + fileName, LOG_TYPE_ERROR ); - return API_FILE_OPEN_ERROR; - } - - int configData = 0; - char line[200]; - string buf; - - while (oldFile.getline(line, 200)) - { - buf = line; - - for ( int i = 0;; i++) - { - if ( oam::LogFile[i] == "" ) - { - // end of list - break; - } - - string logFile = oam::LogFile[i]; - logFile = logFile.substr(14, 80); - - string::size_type pos = buf.find(logFile, 0); - - if (pos != string::npos) - { - // match found - switch (i + 1) - { - case 1: - configData = configData | LEVEL_CRITICAL; - break; - - case 2: - configData = configData | LEVEL_ERROR; - break; - - case 3: - configData = configData | LEVEL_WARNING; - break; - - case 4: - configData = configData | LEVEL_INFO; - break; - - case 5: - configData = configData | LEVEL_DEBUG; - break; - - default: - break; - } //end of switch - } - } - } //end of while - - oldFile.close(); - - // adjust data to be different from API RETURN CODES - configData = configData + API_MAX; - - return configData; -} - -/****************************************************************************************** -* @brief checkPowerOnResults -* -* purpose: Read Power-On TEst results log file and report issues via Alarms -* -******************************************************************************************/ -void ProcessMonitor::checkPowerOnResults() -{ - string POWERON_TEST_RESULTS_FILE = "/var/lib/columnstore/st_status"; - MonitorLog log; - - ifstream oldFile (POWERON_TEST_RESULTS_FILE.c_str()); - - if (!oldFile) - { - // no file found - log.writeLog(__LINE__, "ERROR: Power-On Test results file not found at " + POWERON_TEST_RESULTS_FILE, LOG_TYPE_ERROR ); - return; - } - - int configData = 0; - char line[200]; - string buf; - - while (oldFile.getline(line, 200)) - { - buf = line; - string name; - string level; - string info = ""; - - // extract name key word - string::size_type pos = buf.find("name:", 0); - string::size_type pos1; - - if (pos != string::npos) - { - // match found - pos1 = buf.find("|", pos); - - if (pos1 != string::npos) - // end of name found - name = buf.substr(pos + 5, pos1 - (pos + 5)); - else - { - // name not found, skip this line - continue; - } - } - else - { - // name not found, skip this line - continue; - } - - // extract level key word - pos = buf.find("level:", pos1); - - if (pos != string::npos) - { - // match found - pos1 = buf.find("|", pos); - - if (pos1 != string::npos) - // end of level found - level = buf.substr(pos + 6, pos1 - (pos + 6)); - else - { - // level not found, skip this line - continue; - } - } - else - { - // level not found, skip this line - continue; - } - - // extract info key word, if any exist - pos = buf.find("info:", pos1); - - if (pos != string::npos) - // match found - info = buf.substr(pos + 5, 200); - - // log findings - LoggingID lid(18); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("Power-On self test: name = "); - args.add(name); - args.add(", level = "); - args.add(level); - args.add(", info = "); - args.add(info); - msg.format(args); - ml.logDebugMessage(msg); - - // Issue alarm based on level - - pos = level.find("3", 0); - - if (pos != string::npos) - { - // Severe Warning found, Issue alarm - sendAlarm(name, POWERON_TEST_SEVERE, SET); - - //Log this event - LoggingID lid(18); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("Power-On self test Severe Warning on "); - args.add(name); - args.add(": "); - args.add(info); - msg.format(args); - ml.logWarningMessage(msg); - continue; - } - - pos = level.find("2", 0); - - if (pos != string::npos) - { - // Warning found, Issue alarm - sendAlarm(name, POWERON_TEST_WARNING, SET); - - //Log this event - LoggingID lid(18); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("Power-On self test Warning on "); - args.add(name); - args.add(": "); - args.add(info); - msg.format(args); - ml.logWarningMessage(msg); - continue; - } - - pos = level.find("1", 0); - - if (pos != string::npos) - { - // Info found, Log this event - - LoggingID lid(18); - MessageLog ml(lid); - Message msg; - Message::Args args; - args.add("Power-On self test Info on "); - args.add(name); - args.add(": "); - args.add(info); - msg.format(args); - ml.logInfoMessage(msg); - continue; - } - - } //end of while - - oldFile.close(); - - // adjust data to be different from API RETURN CODES - configData = configData + API_MAX; - - return; -} - -/****************************************************************************************** -* @brief updateConfig -* -* purpose: Update Config data from disk -* -******************************************************************************************/ -int ProcessMonitor::updateConfig() -{ - //ProcMon log file - MonitorLog log; - MonitorConfig currentConfig; -// ProcessMonitor aMonitor(config, log); - Oam oam; - - //Read ProcessConfig file to get process list belong to this process monitor - SystemProcessConfig systemprocessconfig; - - try - { - oam.getProcessConfig(systemprocessconfig); - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessConfig: " + error, LOG_TYPE_ERROR ); - } - catch (...) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessConfig: Caught unknown exception!", LOG_TYPE_ERROR ); - } - - //Update a map for application launch ID for this Process-Monitor - string OAMParentModuleType = currentConfig.OAMParentName().substr(0, MAX_MODULE_TYPE_SIZE); - string systemModuleType = config.moduleName().substr(0, MAX_MODULE_TYPE_SIZE); - - for ( unsigned int i = 0 ; i < systemprocessconfig.processconfig.size(); i++) - { - if (systemprocessconfig.processconfig[i].ModuleType == systemModuleType - || ( systemprocessconfig.processconfig[i].ModuleType == "um" && - systemModuleType == "pm" && PMwithUM == "y" ) - || systemprocessconfig.processconfig[i].ModuleType == "ChildExtOAMModule" - || ( systemprocessconfig.processconfig[i].ModuleType == "ChildOAMModule" ) - || (systemprocessconfig.processconfig[i].ModuleType == "ParentOAMModule" && - systemModuleType == OAMParentModuleType) ) - { - if ( systemprocessconfig.processconfig[i].ModuleType == "um" && - systemModuleType == "pm" && PMwithUM == "y" && - systemprocessconfig.processconfig[i].ProcessName == "DMLProc" ) - continue; - - - if ( systemprocessconfig.processconfig[i].ModuleType == "um" && - systemModuleType == "pm" && PMwithUM == "y" && - systemprocessconfig.processconfig[i].ProcessName == "DDLProc" ) - continue; - - ProcessStatus processstatus; - - try - { - //Get the process information - oam.getProcessStatus(systemprocessconfig.processconfig[i].ProcessName, config.moduleName(), processstatus); - } - catch (exception& ex) - { -// string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: " + error, LOG_TYPE_ERROR ); - return API_FAILURE; - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: Caught unknown exception!", LOG_TYPE_ERROR ); - return API_FAILURE; - } - - config.buildList(systemprocessconfig.processconfig[i].ModuleType, - systemprocessconfig.processconfig[i].ProcessName, - systemprocessconfig.processconfig[i].ProcessLocation, - systemprocessconfig.processconfig[i].ProcessArgs, - systemprocessconfig.processconfig[i].LaunchID, - processstatus.ProcessID, - processstatus.ProcessOpState, - systemprocessconfig.processconfig[i].BootLaunch, - systemprocessconfig.processconfig[i].RunType, - systemprocessconfig.processconfig[i].DepProcessName, - systemprocessconfig.processconfig[i].DepModuleName, - systemprocessconfig.processconfig[i].LogFile); - } - } - - return API_SUCCESS; -} - -/****************************************************************************************** -* @brief buildSystemTables -* -* purpose: Check for and build System Tables if not there -* Only will be run from 'pm1' -* -******************************************************************************************/ -int ProcessMonitor::buildSystemTables() -{ - Oam oam; - string DBdir; - oam.getSystemConfig("DBRoot1", DBdir); - - string fileName = DBdir + "/000.dir"; - - //check if postConfigure or dbbuilder is already running - string cmd = "ps aux | grep postConfigure | grep -v grep"; - int rtnCode = system(cmd.c_str()); - - if (WEXITSTATUS(rtnCode) == 0) - return API_ALREADY_IN_PROGRESS; - - cmd = "ps aux | grep dbbuilder | grep -v grep"; - rtnCode = system(cmd.c_str()); - - if (WEXITSTATUS(rtnCode) == 0) - return API_ALREADY_IN_PROGRESS; - - if (!IDBPolicy::exists(fileName.c_str())) - { - string logdir("/var/log/mariadb/columnstore"); - - if (access(logdir.c_str(), W_OK) != 0) logdir = tmpLogDir; - - string cmd = "dbbuilder 7 > " + logdir + "/dbbuilder.log &"; - system(cmd.c_str()); - - log.writeLog(__LINE__, "buildSystemTables: dbbuilder 7 Successfully Launched", LOG_TYPE_DEBUG); - return API_SUCCESS; - } - - log.writeLog(__LINE__, "buildSystemTables: System Tables Already Exist", LOG_TYPE_DEBUG ); - return API_FILE_ALREADY_EXIST; -} - -/****************************************************************************************** -* @brief reconfigureModule -* -* purpose: reconfigure Module functionality -* Edit the moduleFile file with new Module Name -* -******************************************************************************************/ -int ProcessMonitor::reconfigureModule(std::string reconfigureModuleName) -{ - Oam oam; - - //create custom files - string dir = "/var/lib/columnstore/local/etc/" + reconfigureModuleName; - - string cmd = "mkdir " + dir + " > /dev/null 2>&1"; - system(cmd.c_str()); - - cmd = "rm -f " + dir + "/* > /dev/null 2>&1"; - system(cmd.c_str()); - - if ( reconfigureModuleName.find("um") != string::npos) - { - - cmd = "cp /var/lib/columnstore/local/etc/um1/* " + dir + "/."; - system(cmd.c_str()); - } - else - { - cmd = "cp /var/lib/columnstore/local/etc/pm1/* " + dir + "/."; - system(cmd.c_str()); - } - - //update module file - string fileName = "/var/lib/columnstore/local/module"; - - unlink (fileName.c_str()); - ofstream newFile (fileName.c_str()); - - cmd = "echo " + reconfigureModuleName + " > " + fileName; - system(cmd.c_str()); - - newFile.close(); - - return API_SUCCESS; -} - -/****************************************************************************************** -* @brief configureModule -* -* purpose: configure Module functionality -* Edit the moduleFile file with new Module Name -* -******************************************************************************************/ -int ProcessMonitor::configureModule(std::string configureModuleName) -{ - Oam oam; - - //update module file - string fileName = "/var/lib/columnstore/local/module"; - - unlink (fileName.c_str()); - ofstream newFile (fileName.c_str()); - - string cmd = "echo " + configureModuleName + " > " + fileName; - system(cmd.c_str()); - - newFile.close(); - - return API_SUCCESS; -} - - -/****************************************************************************************** -* @brief checkSpecialProcessState -* -* purpose: Check if a SIMPLEX runtype Process has mates already up -* -******************************************************************************************/ -int ProcessMonitor::checkSpecialProcessState( std::string processName, std::string runType, string processModuleType ) -{ - MonitorLog log; - MonitorConfig config; - Oam oam; - SystemProcessStatus systemprocessstatus; - ProcessStatus processstatus; - int retStatus = oam::MAN_INIT; - - if ( runType == SIMPLEX || runType == ACTIVE_STANDBY) - { - - log.writeLog(__LINE__, "checkSpecialProcessState on : " + processName, LOG_TYPE_DEBUG); - - if ( runType == SIMPLEX && PMwithUM == "y" && processModuleType == "um" && gOAMParentModuleFlag) - retStatus = oam::COLD_STANDBY; - else if ( runType == SIMPLEX && gOAMParentModuleFlag ) - retStatus = oam::MAN_INIT; - else if ( runType == ACTIVE_STANDBY && processModuleType == "ParentOAMModule" && - ( gOAMParentModuleFlag || !runStandby ) ) - retStatus = oam::MAN_INIT; - else if ( runType == ACTIVE_STANDBY && processModuleType == "ParentOAMModule" && config.OAMStandbyParentFlag() ) - retStatus = oam::STANDBY; - else if ( runType == ACTIVE_STANDBY && processModuleType == "ParentOAMModule" ) - retStatus = oam::COLD_STANDBY; - else if ( runType == SIMPLEX && processModuleType == "ParentOAMModule" && !gOAMParentModuleFlag) - retStatus = oam::COLD_STANDBY; - else - { - //simplex on a non um1 or non-parent-pm - if ( processName == "DMLProc" || processName == "DDLProc" ) - { - string PrimaryUMModuleName; - - try - { - oam.getSystemConfig("PrimaryUMModuleName", PrimaryUMModuleName); - } - catch (...) {} - - if ( PrimaryUMModuleName != config.moduleName() ) - { - retStatus = oam::COLD_STANDBY; - } - } - - if ( retStatus != oam::COLD_STANDBY ) - { - try - { - oam.getProcessStatus(systemprocessstatus); - - for ( unsigned int i = 0 ; i < systemprocessstatus.processstatus.size(); i++) - { - if ( systemprocessstatus.processstatus[i].ProcessName == processName && - systemprocessstatus.processstatus[i].Module != config.moduleName() ) - { - if ( systemprocessstatus.processstatus[i].ProcessOpState == ACTIVE || - systemprocessstatus.processstatus[i].ProcessOpState == MAN_INIT || - systemprocessstatus.processstatus[i].ProcessOpState == AUTO_INIT || - // systemprocessstatus.processstatus[i].ProcessOpState == MAN_OFFLINE || - // systemprocessstatus.processstatus[i].ProcessOpState == INITIAL || - systemprocessstatus.processstatus[i].ProcessOpState == BUSY_INIT) - { - - // found a ACTIVE or going ACTIVE mate - if ( runType == oam::SIMPLEX ) - // SIMPLEX - retStatus = oam::COLD_STANDBY; - else - { - // ACTIVE_STANDBY - for ( unsigned int j = 0 ; j < systemprocessstatus.processstatus.size(); j++) - { - if ( systemprocessstatus.processstatus[j].ProcessName == processName && - systemprocessstatus.processstatus[j].Module != config.moduleName() ) - { - if ( systemprocessstatus.processstatus[j].ProcessOpState == STANDBY || - systemprocessstatus.processstatus[j].ProcessOpState == STANDBY_INIT) - // FOUND ACTIVE AND STANDBY - retStatus = oam::COLD_STANDBY; - } - } - - // FOUND AN ACTIVE, BUT NO STANDBY - log.writeLog(__LINE__, "checkSpecialProcessState, set STANDBY on " + processName, LOG_TYPE_DEBUG); - retStatus = oam::STANDBY; - } - } - } - } - } - catch (exception& ex) - { -// string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: Caught unknown exception!", LOG_TYPE_ERROR); - } - } - } - } - - if ( retStatus == oam::COLD_STANDBY || retStatus == oam::STANDBY ) - { - updateProcessInfo(processName, retStatus, 0); - - sendAlarm(processName, PROCESS_DOWN_MANUAL, CLEAR); - sendAlarm(processName, PROCESS_DOWN_AUTO, CLEAR); - sendAlarm(processName, PROCESS_INIT_FAILURE, CLEAR); - } - - log.writeLog(__LINE__, "checkSpecialProcessState status return : " + oam.itoa(retStatus), LOG_TYPE_DEBUG); - - return retStatus; -} - -/****************************************************************************************** -* @brief checkMateModuleState -* -* purpose: Check if Mate Module is Active -* -******************************************************************************************/ -int ProcessMonitor::checkMateModuleState() -{ - MonitorLog log; -// MonitorConfig config; -// ProcessMonitor aMonitor(config, log); - Oam oam; - SystemStatus systemstatus; - - try - { - oam.getSystemStatus(systemstatus, false); - - for ( unsigned int i = 0 ; i < systemstatus.systemmodulestatus.modulestatus.size(); i++) - { - string moduleName = systemstatus.systemmodulestatus.modulestatus[i].Module; - string moduleType = moduleName.substr(0, MAX_MODULE_TYPE_SIZE); - int moduleID = atoi(moduleName.substr(MAX_MODULE_TYPE_SIZE, MAX_MODULE_ID_SIZE).c_str()); - - if ( moduleType == config.moduleType() && moduleID != config.moduleID() ) - if ( systemstatus.systemmodulestatus.modulestatus[i].ModuleOpState == oam::ACTIVE ) - return API_SUCCESS; - } - } - catch (exception& ex) - { -// string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: " + error, LOG_TYPE_ERROR); - return API_FAILURE; - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: Caught unknown exception!", LOG_TYPE_ERROR); - return API_FAILURE; - } - - return API_FAILURE; -} - -/****************************************************************************************** -* @brief createDataDirs -* -* purpose: Create the Calpont Data directories -* -* -******************************************************************************************/ -int ProcessMonitor::createDataDirs(std::string cloud) -{ - MonitorLog log; - Oam oam; - - if ( config.moduleType() == "um" && - ( cloud == "amazon-ec2" || cloud == "amazon-vpc") ) - { - string UMStorageType; - - try - { - oam.getSystemConfig( "UMStorageType", UMStorageType); - } - catch (...) {} - - if (UMStorageType == "external") - { - if (!amazonVolumeCheck()) - { - return API_FAILURE; - } - } - } - - if ( config.moduleType() == "pm" ) - { - DBRootConfigList dbrootConfigList; - - string DBRootStorageType; - - try - { - oam.getSystemConfig( "DBRootStorageType", DBRootStorageType); - } - catch (...) {} - - try - { - systemStorageInfo_t t; - t = oam.getStorageConfig(); - - if ( boost::get<1>(t) == 0 ) - { - log.writeLog(__LINE__, "No dbroots are configured in Columnstore.xml file at proc mon startup time", LOG_TYPE_WARNING); - return API_INVALID_PARAMETER; - } - - DeviceDBRootList moduledbrootlist = boost::get<2>(t); - - DeviceDBRootList::iterator pt = moduledbrootlist.begin(); - - for ( ; pt != moduledbrootlist.end() ; pt++) - { - int moduleID = (*pt).DeviceID; - - DBRootConfigList::iterator pt1 = (*pt).dbrootConfigList.begin(); - - for ( ; pt1 != (*pt).dbrootConfigList.end() ; pt1++) - { - int id = *pt1; - - string DBRootName = "/var/lib/columnstore/data" + oam.itoa(id); - - string cmd = "mkdir " + DBRootName + " > /dev/null 2>&1"; - int rtnCode = system(cmd.c_str()); - - if (WEXITSTATUS(rtnCode) == 0) - log.writeLog(__LINE__, "Successful created directory " + DBRootName, LOG_TYPE_DEBUG); - - cmd = "chmod 755 " + DBRootName + " > /dev/null 2>&1"; - system(cmd.c_str()); - - if ( id == 1 ) - { - cmd = "mkdir -p /var/lib/columnstore/data1/systemFiles/dbrm > /dev/null 2>&1"; - system(cmd.c_str()); - } - - if ( (cloud == "amazon-ec2" || cloud == "amazon-vpc") && - DBRootStorageType == "external" && - config.moduleID() == moduleID) - { - if (!amazonVolumeCheck(id)) - { - return API_FAILURE; - } - } - } - } - } - catch (exception& ex) - { - string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getStorageConfig: " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getStorageConfig: Caught unknown exception!", LOG_TYPE_ERROR); - } - } - - return API_SUCCESS; -} - - -/****************************************************************************************** -* @brief processRestarted -* -* purpose: Process Restarted, inform Process Mgr -* -* -******************************************************************************************/ -int ProcessMonitor::processRestarted( std::string processName, bool manual) -{ - MonitorLog log; -// MonitorConfig config; -// ProcessMonitor aMonitor(config, log); - Oam oam; - ByteStream msg; - - log.writeLog(__LINE__, "Inform Process Mgr that process was restarted: " + processName, LOG_TYPE_DEBUG); - - int returnStatus = API_FAILURE; - - msg << (ByteStream::byte) PROCESSRESTART; - msg << config.moduleName(); - msg << processName; - msg << (ByteStream::byte) manual; - - try - { - MessageQueueClient mqRequest("ProcMgr"); - mqRequest.write(msg); - mqRequest.shutdown(); - returnStatus = API_SUCCESS; - } - catch (exception& ex) - { - string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on MessageQueueClient: " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on MessageQueueClient: Caught unknown exception!", LOG_TYPE_ERROR); - } - - return returnStatus; -} - -/****************************************************************************************** -* @brief getDBRMdata -* -* purpose: get DBRM Data from Process Mgr -* -* -******************************************************************************************/ -int ProcessMonitor::getDBRMdata(string *path) -{ - MonitorLog log; - - Oam oam; - ByteStream msg; - - int returnStatus = API_FAILURE; - - msg << (ByteStream::byte) GETDBRMDATA; - msg << config.moduleName(); - - try - { - MessageQueueClient mqRequest("ProcMgr"); - mqRequest.write(msg); - - ByteStream receivedMSG; - - struct timespec ts = { 30, 0 }; - - //read message type - try - { - receivedMSG = mqRequest.read(&ts); - } - catch (SocketClosed& ex) - { - string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on mqRequest.read: " + error, LOG_TYPE_ERROR); - return returnStatus; - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on mqRequest.read: Caught unknown exception!", LOG_TYPE_ERROR); - return returnStatus; - } - - if (receivedMSG.length() > 0) - { - - string type; - - receivedMSG >> type; - - log.writeLog(__LINE__, type, LOG_TYPE_DEBUG); - - if ( type == "initial" ) - { - log.writeLog(__LINE__, "initial system, no dbrm files to send", LOG_TYPE_DEBUG); - returnStatus = API_SUCCESS; - } - else - { - // files coming, read number of files - try - { - receivedMSG = mqRequest.read(&ts); - } - catch (SocketClosed& ex) - { - string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on mqRequest.read: " + error, LOG_TYPE_ERROR); - return returnStatus; - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on mqRequest.read: Caught unknown exception!", LOG_TYPE_ERROR); - return returnStatus; - } - - if (receivedMSG.length() > 0) - { - - ByteStream::byte numFiles; - - receivedMSG >> numFiles; - log.writeLog(__LINE__, oam.itoa(numFiles), LOG_TYPE_DEBUG); - - bool journalFile = false; - boost::uuids::uuid u = boost::uuids::random_generator()(); - bf::path pTmp = bf::path(*path) / boost::uuids::to_string(u); - if (config::Config::makeConfig()->getConfig("Installation", "DBRootStorageType") != "storagemanager") - bf::create_directories(pTmp); - *path = pTmp.string(); - log.writeLog(__LINE__, "Downloading DBRM files to " + *path, LOG_TYPE_DEBUG); - for ( int i = 0 ; i < numFiles ; i ++ ) - { - string fileName; - - //read file name - try - { - receivedMSG = mqRequest.read(&ts); - } - catch (SocketClosed& ex) - { - string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on mqRequest.read: " + error, LOG_TYPE_ERROR); - return returnStatus; - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on mqRequest.read: Caught unknown exception!", LOG_TYPE_ERROR); - return returnStatus; - } - - if (receivedMSG.length() > 0) - { - receivedMSG >> fileName; - - //check for journal file coming across - string::size_type pos = fileName.find("journal", 0); - - if (pos != string::npos) - journalFile = true; - - //change file name location to temp file local -// string::size_type pos1 = fileName.find("/dbrm",0); -// pos = fileName.find("data1",0); -// if (pos != string::npos) -// { -// string temp = fileName.substr(0,pos); -// string temp1 = temp + "data" + fileName.substr(pos1,80); -// fileName = temp1; -// } - bf::path pFilename(fileName); - pFilename = pTmp / pFilename.filename(); - const char *cFilename = pFilename.string().c_str(); - - boost::scoped_ptr out(IDBDataFile::open( - IDBPolicy::getType(cFilename, - IDBPolicy::WRITEENG), - cFilename, "w", 0)); - - // read file data - try - { - receivedMSG = mqRequest.read(&ts); - } - catch (SocketClosed& ex) - { - string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on mqRequest.read: " + error, LOG_TYPE_ERROR); - return returnStatus; - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on mqRequest.read: Caught unknown exception!", LOG_TYPE_ERROR); - return returnStatus; - } - - if (receivedMSG.length() > 0) - { - out->write(receivedMSG.buf(), receivedMSG.length()); - log.writeLog(__LINE__, fileName, LOG_TYPE_DEBUG); - log.writeLog(__LINE__, oam.itoa(receivedMSG.length()), LOG_TYPE_DEBUG); - } - else - log.writeLog(__LINE__, "ProcMgr Msg timeout on module", LOG_TYPE_ERROR); - } - else - log.writeLog(__LINE__, "ProcMgr Msg timeout on module", LOG_TYPE_ERROR); - } - - //create journal file if none come across - if ( !journalFile) - { - bf::path pJournalFilename(pTmp / "BRM_saves_journal"); - IDBDataFile *idbJournalFile = IDBDataFile::open(IDBPolicy::getType(pJournalFilename.string().c_str(), - IDBPolicy::WRITEENG), pJournalFilename.string().c_str(), "w", 0); - delete idbJournalFile; - //string cmd = "touch " + startup::StartUp::installDir() + "/data1/systemFiles/dbrm/BRM_saves_journal"; - //system(cmd.c_str()); - } - - returnStatus = oam::API_SUCCESS; - } - else - log.writeLog(__LINE__, "ProcMgr Msg timeout on module", LOG_TYPE_ERROR); - } - } - else - log.writeLog(__LINE__, "ProcMgr Msg timeout on module", LOG_TYPE_ERROR); - - mqRequest.shutdown(); - } - catch (exception& ex) - { - string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on MessageQueueClient: " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on MessageQueueClient: Caught unknown exception!", LOG_TYPE_ERROR); - } - - return returnStatus; -} - -/****************************************************************************************** -* @brief runStartupTest -* -* purpose: Runs DB sanity test -* -* -******************************************************************************************/ -int ProcessMonitor::runStartupTest() -{ - //ProcMon log file - MonitorLog log; -// MonitorConfig config; -// ProcessMonitor aMonitor(config, log); - Oam oam; - - //skip if module is DISABLED - int opState = oam::ACTIVE; - bool degraded; - oam.getModuleStatus(config.moduleName(), opState, degraded); - - if (geteuid() != 0 || opState == oam::MAN_DISABLED || opState == oam::AUTO_DISABLED) - return oam::API_SUCCESS; - - //run startup test script - string logdir("/var/log/mariadb/columnstore"); - - if (access(logdir.c_str(), W_OK) != 0) logdir = tmpLogDir; - - string cmd = "startupTests.sh > " + logdir + "/startupTests.log1 2>&1"; - system(cmd.c_str()); - - cmd = logdir + "/startupTests.log1"; - - bool fail = false; - - if (oam.checkLogStatus(cmd, "OK")) - { - log.writeLog(__LINE__, "startupTests passed", LOG_TYPE_DEBUG); - } - else - { - log.writeLog(__LINE__, "startupTests failed", LOG_TYPE_CRITICAL); - fail = true; - } - - if (!fail) - { - log.writeLog(__LINE__, "runStartupTest passed", LOG_TYPE_DEBUG); - //Clear the alarm - sendAlarm(config.moduleName().c_str(), STARTUP_DIAGNOTICS_FAILURE, CLEAR); - return oam::API_SUCCESS; - } - else - { - log.writeLog(__LINE__, "ERROR: runStartupTest failed", LOG_TYPE_CRITICAL); - //Set the alarm - sendAlarm(config.moduleName().c_str(), STARTUP_DIAGNOTICS_FAILURE, SET); - - //setModule status to failed - try - { - oam.setModuleStatus(config.moduleName(), oam::FAILED); - } - catch (exception& ex) - { - string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on setModuleStatus: " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on setModuleStatus: Caught unknown exception!", LOG_TYPE_ERROR); - } - - return oam::API_FAILURE; - } -} - - -/****************************************************************************************** -* @brief updateConfigFile -* -* purpose: Update local Calpont Config File -* -******************************************************************************************/ -int ProcessMonitor::updateConfigFile(messageqcpp::ByteStream msg) -{ - Config* sysConfig = Config::makeConfig(); - sysConfig->writeConfigFile(msg); - - return oam::API_SUCCESS; -} - - - -/****************************************************************************************** -* @brief sendMsgProcMon1 -* -* purpose: Sends a Msg to ProcMon -* -******************************************************************************************/ -std::string ProcessMonitor::sendMsgProcMon1( std::string module, ByteStream msg, int requestID ) -{ - string msgPort = module + "_ProcessMonitor"; - string returnStatus = "FAILED"; - - // do a ping test to determine a quick failure - Config* sysConfig = Config::makeConfig(); - - string IPAddr = sysConfig->getConfig(msgPort, "IPAddr"); - - string cmdLine = "ping "; - string cmdOption = " -c 1 -w 5 >> /dev/null 2>&1"; - string cmd = cmdLine + IPAddr + cmdOption; - - if ( system(cmd.c_str()) != 0 ) - { - //ping failure - log.writeLog(__LINE__, "sendMsgProcMon ping failure", LOG_TYPE_ERROR); - return returnStatus; - } - - try - { - MessageQueueClient mqRequest(msgPort); - mqRequest.write(msg); - - // wait 30 seconds for response - ByteStream::byte returnACK; - ByteStream::byte returnRequestID; - string requestStatus; - ByteStream receivedMSG; - - struct timespec ts = { 30, 0 }; - - try - { - receivedMSG = mqRequest.read(&ts); - } - catch (SocketClosed& ex) - { - string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on mqRequest.read: " + error, LOG_TYPE_ERROR); - return returnStatus; - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on mqRequest.read: Caught unknown exception!", LOG_TYPE_ERROR); - return returnStatus; - } - - if (receivedMSG.length() > 0) - { - receivedMSG >> returnACK; - receivedMSG >> returnRequestID; - receivedMSG >> requestStatus; - - if ( returnACK == oam::ACK && returnRequestID == requestID) - { - // ACK for this request - returnStatus = requestStatus; - } - else - log.writeLog(__LINE__, "sendMsgProcMon: message mismatch ", LOG_TYPE_ERROR); - } - else - log.writeLog(__LINE__, "sendMsgProcMon: ProcMon Msg timeout on module " + module, LOG_TYPE_ERROR); - - mqRequest.shutdown(); - } - catch (exception& ex) - { - string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on MessageQueueClient: " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on MessageQueueClient: Caught unknown exception!", LOG_TYPE_ERROR); - } - - return returnStatus; -} - -/****************************************************************************************** -* @brief checkModuleFailover -* -* purpose: check if module failover is needed due to a process outage -* -******************************************************************************************/ -void ProcessMonitor::checkModuleFailover( std::string processName) -{ - Oam oam; - - //force failover on certain processes - if ( processName == "DDLProc" || - processName == "DMLProc" ) { - log.writeLog(__LINE__, "checkModuleFailover: process failover, process outage of " + processName, LOG_TYPE_CRITICAL); - - try - { - SystemProcessStatus systemprocessstatus; - oam.getProcessStatus(systemprocessstatus); - - for ( unsigned int i = 0 ; i < systemprocessstatus.processstatus.size(); i++) - { - if ( systemprocessstatus.processstatus[i].ProcessName == processName && - systemprocessstatus.processstatus[i].Module != config.moduleName() ) - { - //make sure it matches module type - string procModuleType = systemprocessstatus.processstatus[i].Module.substr(0, MAX_MODULE_TYPE_SIZE); - - if ( config.moduleType() != procModuleType ) - continue; - - if ( systemprocessstatus.processstatus[i].ProcessOpState == oam::COLD_STANDBY || - systemprocessstatus.processstatus[i].ProcessOpState == oam::AUTO_OFFLINE || - systemprocessstatus.processstatus[i].ProcessOpState == oam::FAILED ) - { - // found a AVAILABLE mate, start it - log.writeLog(__LINE__, "Change UM Master to module " + systemprocessstatus.processstatus[i].Module, LOG_TYPE_DEBUG); - log.writeLog(__LINE__, "Stop local UM module " + config.moduleName(), LOG_TYPE_DEBUG); - log.writeLog(__LINE__, "Disable Local will Enable UM module " + systemprocessstatus.processstatus[i].Module, LOG_TYPE_DEBUG); - - oam::DeviceNetworkConfig devicenetworkconfig; - oam::DeviceNetworkList devicenetworklist; - - devicenetworkconfig.DeviceName = config.moduleName(); - devicenetworklist.push_back(devicenetworkconfig); - - try - { - oam.stopModule(devicenetworklist, oam::FORCEFUL, oam::ACK_YES); - log.writeLog(__LINE__, "success stopModule on module " + config.moduleName(), LOG_TYPE_DEBUG); - - try - { - oam.disableModule(devicenetworklist); - log.writeLog(__LINE__, "success disableModule on module " + config.moduleName(), LOG_TYPE_DEBUG); - } - catch (exception& e) - { - log.writeLog(__LINE__, "failed disableModule on module " + config.moduleName(), LOG_TYPE_ERROR); - } - } - catch (exception& e) - { - log.writeLog(__LINE__, "failed stopModule on module " + config.moduleName(), LOG_TYPE_ERROR); - } - - break; - } - } - } - } - catch (exception& ex) - { -// string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: Caught unknown exception!", LOG_TYPE_ERROR); - } - } -} - -/****************************************************************************************** -* @brief changeMyCnf -* -* purpose: Change my.cnf -* -******************************************************************************************/ -int ProcessMonitor::changeMyCnf(std::string type) -{ - Oam oam; - - log.writeLog(__LINE__, "changeMyCnf function called for " + type, LOG_TYPE_DEBUG); - - string mycnfFile = string(MCSMYCNFDIR) + "/columnstore.cnf"; - ifstream file (mycnfFile.c_str()); - - if (!file) - { - log.writeLog(__LINE__, "changeMyCnf - my.cnf file not found: " + mycnfFile, LOG_TYPE_CRITICAL); - return oam::API_FAILURE; - } - - //get server-id based on ExeMgrx setup - string serverID = "0"; - string localModuleName = config.moduleName(); - - for ( int id = 1 ; ; id++ ) - { - string Section = "ExeMgr" + oam.itoa(id); - - string moduleName; - - try - { - Config* sysConfig = Config::makeConfig(); - moduleName = sysConfig->getConfig(Section, "Module"); - - if ( moduleName == localModuleName ) - { - serverID = oam.itoa(id); - break; - } - } - catch (...) {} - } - - if ( serverID == "0" ) - { - log.writeLog(__LINE__, "changeMyCnf: ExeMgr for local module doesn't exist", LOG_TYPE_ERROR); - return oam::API_FAILURE; - } - - // set server-id and other options in my.cnf - vector lines; - char line[200]; - string buf; - - while (file.getline(line, 200)) - { - buf = line; - string::size_type pos = buf.find("server-id", 0); - - if ( pos != string::npos ) - { - buf = "server-id = " + serverID; - - string command = "SET GLOBAL server_id=" + serverID + ";"; - int ret = runMariaDBCommandLine(command); - - if (ret != 0) - { - log.writeLog(__LINE__, "changeMyCnf: runMariaDBCommandLine Error", LOG_TYPE_ERROR); - return oam::API_FAILURE; - } - } - - pos = buf.find("log_bin", 0); - if ( pos != string::npos ) - { - buf = "log_bin"; - } - - // set local query flag if on pm - if ( (PMwithUM == "y") && config.moduleType() == "pm" ) - { - pos = buf.find("columnstore_local_query", 0); - - if ( pos != string::npos ) - { - buf = "columnstore_local_query=1"; - - string command = "SET GLOBAL " + buf + ";"; - - int ret = runMariaDBCommandLine(command); - - if (ret != 0) - { - log.writeLog(__LINE__, "changeMyCnf: runMariaDBCommandLine Error", LOG_TYPE_ERROR); - return oam::API_FAILURE; - } - } - } - else - { - // disable, if needed - pos = buf.find("columnstore_local_query", 0); - - if ( pos != string::npos ) - { - buf = "columnstore_local_query=0"; - - string command = "SET GLOBAL " + buf + ";"; - int ret = runMariaDBCommandLine(command); - - if (ret != 0) - { - log.writeLog(__LINE__, "changeMyCnf: runMariaDBCommandLine Error", LOG_TYPE_ERROR); - return oam::API_FAILURE; - } - } - } - - //output to temp file - lines.push_back(buf); - } - - file.close(); - unlink (mycnfFile.c_str()); - ofstream newFile (mycnfFile.c_str()); - - //create new file - int fd = open(mycnfFile.c_str(), O_RDWR | O_CREAT, 0664); - - copy(lines.begin(), lines.end(), ostream_iterator(newFile, "\n")); - newFile.close(); - - close(fd); - - // set owner and permission - string cmd = "chmod 664 " + mycnfFile + " >/dev/null 2>&1"; - - system(cmd.c_str()); - - cmd = "chown " + USER + ":" + USER + " " + mycnfFile + " >/dev/null 2>&1"; - - system(cmd.c_str()); - - // restart mysql - try { - oam.actionMysqlCalpont(MYSQL_RESTART); - sleep(5); // give after mysql restart - } - catch(...) - {} - log.writeLog(__LINE__, "changeMyCnf function successfully completed", LOG_TYPE_DEBUG); - - return oam::API_SUCCESS; -} - -/****************************************************************************************** -* @brief runMariaDBCommandLine -* -* purpose: run MariaDB Command Line script -* -******************************************************************************************/ -int ProcessMonitor::runMariaDBCommandLine(std::string command) -{ - Oam oam; - - log.writeLog(__LINE__, "runMariaDBCommandLine function called: cmd = " + command, LOG_TYPE_DEBUG); - - // mysql port number - string MySQLPort; - - try - { - oam.getSystemConfig("MySQLPort", MySQLPort); - } - catch (...) - { - MySQLPort = "3306"; - } - - if ( MySQLPort.empty() ) - MySQLPort = "3306"; - - string cmd = "mariadb-command-line.sh --command='" + command + "' --port=" + MySQLPort + " --tmpdir=" + tmpLogDir + " > " + tmpLogDir + "/mariadb-command-line.sh.log 2>&1"; - - log.writeLog(__LINE__, "cmd = " + cmd, LOG_TYPE_DEBUG); - - system(cmd.c_str()); - - string logFile = tmpLogDir + "/mariadb-command-line.sh.log"; - - if (oam.checkLogStatus(logFile, "ERROR 1045") ) - { - log.writeLog(__LINE__, "mariadb-command-line.sh: MySQL Password Error, check .my.cnf", LOG_TYPE_ERROR); - return oam::API_FAILURE; - } - else - { - if (oam.checkLogStatus(logFile, "OK")) - { - log.writeLog(__LINE__, "mariadb-command-line.sh: Successful return", LOG_TYPE_DEBUG); - return oam::API_SUCCESS; - } - else - { - log.writeLog(__LINE__, "mariadb-command-line.sh: Error return, check log " + tmpLogDir + "/mariadb-command-line.sh.log", LOG_TYPE_ERROR); - return oam::API_FAILURE; - } - } - - return oam::API_FAILURE; -} - - -/****************************************************************************************** -* @brief runMasterRep -* -* purpose: run Master Replication script -* -******************************************************************************************/ -int ProcessMonitor::runMasterRep(std::string& masterLogFile, std::string& masterLogPos) -{ - Oam oam; - - SystemModuleTypeConfig systemModuleTypeConfig; - - try - { - oam.getSystemConfig(systemModuleTypeConfig); - } - catch (exception& ex) - { - string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR); - } - - // mysql port number - string MySQLPort; - - try - { - oam.getSystemConfig("MySQLPort", MySQLPort); - } - catch (...) - { - MySQLPort = "3306"; - } - - if ( MySQLPort.empty() ) - MySQLPort = "3306"; - - // create user for each module by ip address - for ( unsigned int i = 0 ; i < systemModuleTypeConfig.moduletypeconfig.size(); i++) - { - int moduleCount = systemModuleTypeConfig.moduletypeconfig[i].ModuleCount; - - if ( moduleCount == 0) - continue; - - DeviceNetworkList::iterator pt = systemModuleTypeConfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for ( ; pt != systemModuleTypeConfig.moduletypeconfig[i].ModuleNetworkList.end() ; pt++) - { - string moduleName = (*pt).DeviceName; - - //skip if module is not ACTIVE - -// int opState = oam::ACTIVE; -// bool degraded; -// oam.getModuleStatus(moduleName, opState, degraded); -// if (opState != oam::ACTIVE) -// continue; - - bool passwordError = false; - - string moduleType = systemModuleTypeConfig.moduletypeconfig[i].ModuleType; - - if ( ( (PMwithUM == "n") && (moduleType == "pm") ) && - ( config.ServerInstallType() != oam::INSTALL_COMBINE_DM_UM_PM) ) - continue; - - HostConfigList::iterator pt1 = (*pt).hostConfigList.begin(); - - for ( ; pt1 != (*pt).hostConfigList.end() ; pt1++ ) - { - string ipAddr = (*pt1).IPAddr; - - string logFile = tmpLogDir + "/master-rep-columnstore-" + moduleName + ".log"; - string cmd = "master-rep-columnstore.sh --hostIP=" + ipAddr + " --port=" + MySQLPort + " --tmpdir=" + tmpLogDir + " > " + logFile + " 2>&1"; - log.writeLog(__LINE__, "cmd = " + cmd, LOG_TYPE_DEBUG); - - system(cmd.c_str()); - - if (oam.checkLogStatus(logFile, "ERROR 1045") ) - { - if ( passwordError ) - { - log.writeLog(__LINE__, "master-rep-columnstore.sh: MySQL Password Error", LOG_TYPE_ERROR); - return oam::API_FAILURE; - } - - log.writeLog(__LINE__, "master-rep-columnstore.sh: Missing Password error, go check for a password and retry", LOG_TYPE_DEBUG); - passwordError = true; - break; - } - else - { - if (oam.checkLogStatus(logFile, "OK")) - log.writeLog(__LINE__, "master-rep-columnstore.sh: Successful return for node " + moduleName, LOG_TYPE_DEBUG); - else - { - log.writeLog(__LINE__, "master-rep-columnstore.sh: Error return, check log " + logFile, LOG_TYPE_ERROR); - return oam::API_FAILURE; - } - } - } - } - } - - // go parse out the MASTER_LOG_FILE and MASTER_LOG_POS - // this is what the output will look like - // - // SHOW MASTER STATUS - // File Position Binlog_Do_DB Binlog_Ignore_DB - // mysql-bin.000006 2921 - // - // in log - show-master-status.log - - string masterLog = tmpLogDir + "/show-master-status.log"; - ifstream file (masterLog.c_str()); - - if (!file) - { - log.writeLog(__LINE__, "runMasterRep - show master status log file doesn't exist - " + masterLog, LOG_TYPE_ERROR); - return oam::API_FAILURE; - } - else - { - char line[200]; - string buf; - - while (file.getline(line, 200)) - { - buf = line; - string::size_type pos = buf.find("000", 0); - - if ( pos != string::npos ) - { - pos = 0; - string::size_type pos1 = buf.find("\t", pos); - - if ( pos1 != string::npos ) - { - string masterlogfile = buf.substr(pos, pos1 - pos); - - //strip trailing spaces - string::size_type lead = masterlogfile.find_first_of(" "); - masterLogFile = masterlogfile.substr( 0, lead); - - string masterlogpos = buf.substr(pos1, 80); - - //strip off leading tab masterlogpos - lead = masterlogpos.find_first_not_of("\t"); - masterlogpos = masterlogpos.substr( lead, masterlogpos.length() - lead); - - //string trailing spaces - lead = masterlogpos.find_first_of(" "); - masterLogPos = masterlogpos.substr( 0, lead); - - log.writeLog(__LINE__, "runMasterRep: masterlogfile=" + masterLogFile + ", masterlogpos=" + masterLogPos, LOG_TYPE_DEBUG); - file.close(); - return oam::API_SUCCESS; - } - } - } - - file.close(); - } - - log.writeLog(__LINE__, "runMasterRep - 'mysql-bin not found in log file - " + masterLog, LOG_TYPE_ERROR); - - return oam::API_FAILURE; -} - -/****************************************************************************************** -* @brief runSlaveRep -* -* purpose: run Slave Replication script -* -******************************************************************************************/ -int ProcessMonitor::runSlaveRep(std::string& masterLogFile, std::string& masterLogPos) -{ - Oam oam; - - // get master replicaion module IP Address - string PrimaryUMModuleName; - oam.getSystemConfig("PrimaryUMModuleName", PrimaryUMModuleName); - - string masterIPAddress; - - try - { - ModuleConfig moduleconfig; - oam.getSystemConfig(PrimaryUMModuleName, moduleconfig); - HostConfigList::iterator pt1 = moduleconfig.hostConfigList.begin(); - masterIPAddress = (*pt1).IPAddr; - } - catch (...) - {} - - // mysql port number - string MySQLPort; - - try - { - oam.getSystemConfig("MySQLPort", MySQLPort); - } - catch (...) - { - MySQLPort = "3306"; - } - - if ( MySQLPort.empty() ) - MySQLPort = "3306"; - - bool passwordError = false; - - while (true) - { - string logFile = tmpLogDir + "/slave-rep-columnstore.log"; - - string cmd = "slave-rep-columnstore.sh --masteripaddr=" + masterIPAddress + " --masterlogfile=" + masterLogFile + " --masterlogpos=" + masterLogPos + " --port=" + MySQLPort + " --tmpdir=" + tmpLogDir + " > " + logFile + " 2>&1"; - - log.writeLog(__LINE__, "cmd = " + cmd, LOG_TYPE_DEBUG); - - system(cmd.c_str()); - - if (oam.checkLogStatus(logFile, "ERROR 1045") ) - { - if ( passwordError ) - { - log.writeLog(__LINE__, "slave-rep-columnstore.sh: MySQL Password Error", LOG_TYPE_ERROR); - return oam::API_FAILURE; - } - - log.writeLog(__LINE__, "slave-rep-columnstore.sh: Missing Password error, go check for a password and retry", LOG_TYPE_DEBUG); - passwordError = true; - } - else - { - if (oam.checkLogStatus(logFile, "OK")) - { - log.writeLog(__LINE__, "slave-rep-columnstore.sh: Successful return", LOG_TYPE_DEBUG); - return oam::API_SUCCESS; - } - else - { - log.writeLog(__LINE__, "slave-rep-columnstore.sh: Error return, check log " + logFile, LOG_TYPE_ERROR); - return oam::API_FAILURE; - } - } - } - - return oam::API_FAILURE; -} - -/****************************************************************************************** -* @brief runDisableRep -* -* purpose: run Disable Replication script -* -******************************************************************************************/ -int ProcessMonitor::runDisableRep() -{ - Oam oam; - - // mysql port number - string MySQLPort; - - try - { - oam.getSystemConfig("MySQLPort", MySQLPort); - } - catch (...) - { - MySQLPort = "3306"; - } - - if ( MySQLPort.empty() ) - MySQLPort = "3306"; - - string logFile = tmpLogDir + "/disable-rep-columnstore.log"; - - string cmd = "disable-rep-columnstore.sh --tmpdir=" + tmpLogDir + " > " + logFile + " 2>&1"; - - log.writeLog(__LINE__, "cmd = " + cmd, LOG_TYPE_DEBUG); - - system(cmd.c_str()); - - if (oam.checkLogStatus(logFile, "OK")) - { - log.writeLog(__LINE__, "disable-rep-columnstore.sh: Successful return", LOG_TYPE_DEBUG); - return oam::API_SUCCESS; - } - else - { - if (oam.checkLogStatus(cmd, "ERROR 1045") ) - { - log.writeLog(__LINE__, "disable-rep-columnstore.sh: Missing Password error, return success", LOG_TYPE_DEBUG); - return oam::API_SUCCESS; - } - - log.writeLog(__LINE__, "disable-rep-columnstore.sh: Error return, check log " + logFile, LOG_TYPE_ERROR); - return oam::API_FAILURE; - } - - return oam::API_FAILURE; -} - -/****************************************************************************************** -* @brief runMasterDist -* -* purpose: run Master DB Distribution -* -******************************************************************************************/ -int ProcessMonitor::runMasterDist(std::string& password, std::string& slaveModule) -{ - Oam oam; - - SystemModuleTypeConfig systemModuleTypeConfig; - - try - { - oam.getSystemConfig(systemModuleTypeConfig); - } - catch (exception& ex) - { - string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR); - } - - int slave = 0; - - if ( slaveModule == "all" ) - { - // Distrubuted MySQL Front-end DB to Slave Modules - for ( unsigned int i = 0 ; i < systemModuleTypeConfig.moduletypeconfig.size(); i++) - { - int moduleCount = systemModuleTypeConfig.moduletypeconfig[i].ModuleCount; - - if ( moduleCount == 0) - continue; - - string moduleType = systemModuleTypeConfig.moduletypeconfig[i].ModuleType; - - if ( ( (PMwithUM == "n") && (moduleType == "pm") ) && - ( config.ServerInstallType() != oam::INSTALL_COMBINE_DM_UM_PM) ) - continue; - - DeviceNetworkList::iterator pt = systemModuleTypeConfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for ( ; pt != systemModuleTypeConfig.moduletypeconfig[i].ModuleNetworkList.end() ; pt++) - { - string moduleName = (*pt).DeviceName; - - //skip if local master mode - if ( moduleName == config.moduleName() ) - continue; - - slave++; - - HostConfigList::iterator pt1 = (*pt).hostConfigList.begin(); - - for ( ; pt1 != (*pt).hostConfigList.end() ; pt1++ ) - { - string ipAddr = (*pt1).IPAddr; - - string logFile = tmpLogDir + "/master-dist_" + moduleName + ".log"; - string cmd = "rsync.sh " + ipAddr + " " + password + " 1 > " + logFile; - - log.writeLog(__LINE__, "cmd = " + cmd, LOG_TYPE_DEBUG); - system(cmd.c_str()); - - - if (!oam.checkLogStatus(logFile, "FAILED")) - { - log.writeLog(__LINE__, "runMasterDist: Success rsync to module: " + moduleName, LOG_TYPE_DEBUG); - break; - } - else - { - log.writeLog(__LINE__, "runMasterDist: Failure rsync to module: " + moduleName, LOG_TYPE_ERROR); - return oam::API_FAILURE; - } - } - } - } - } - else - { - // don't do PMs unless PMwithUM flag is set - - string moduleType = slaveModule.substr(0, MAX_MODULE_TYPE_SIZE); - - if ( (moduleType == "um") || - ( (PMwithUM == "y") && (moduleType == "pm") ) || - ( config.ServerInstallType() == oam::INSTALL_COMBINE_DM_UM_PM) ) - { - slave++; - - // get slave IP address - ModuleConfig moduleconfig; - oam.getSystemConfig(slaveModule, moduleconfig); - HostConfigList::iterator pt1 = moduleconfig.hostConfigList.begin(); - string ipAddr = (*pt1).IPAddr; - - string logFile = tmpLogDir + "/master-dist_" + slaveModule + ".log"; - - string cmd = "rsync.sh " + ipAddr + " " + password + " 1 > " + logFile; - system(cmd.c_str()); - - if (!oam.checkLogStatus(logFile, "FAILED")) - log.writeLog(__LINE__, "runMasterDist: Success rsync to module: " + slaveModule, LOG_TYPE_DEBUG); - else - { - log.writeLog(__LINE__, "runMasterDist: Failure rsync to module: " + slaveModule, LOG_TYPE_ERROR); - return oam::API_FAILURE; - } - } - } - - if (slave == 0 ) - log.writeLog(__LINE__, "runMasterDist: No configured slave nodes", LOG_TYPE_DEBUG); - - return oam::API_SUCCESS; -} - - -/****************************************************************************************** -* @brief amazonIPCheck -* -* purpose: check and setups Amazon EC2 IP Addresses -* -******************************************************************************************/ -bool ProcessMonitor::amazonIPCheck() -{ - MonitorLog log; - Oam oam; - - // delete description file so it will create a new one - string tmpLog = tmpLogDir + "/describeInstance.log"; - unlink(tmpLog.c_str()); - - // - // Get Module Info - // - SystemModuleTypeConfig systemModuleTypeConfig; - - try - { - oam.getSystemConfig(systemModuleTypeConfig); - } - catch (exception& ex) - { - string error = ex.what(); -// log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR); - } - catch (...) - { -// log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR); - } - - //get Elastic IP Address count - int AmazonElasticIPCount = 0; - - try - { - oam.getSystemConfig("AmazonElasticIPCount", AmazonElasticIPCount); - } - catch (...) - { - AmazonElasticIPCount = 0; - } - - ModuleTypeConfig moduletypeconfig; - - //get module/instance IDs - for ( unsigned int i = 0 ; i < systemModuleTypeConfig.moduletypeconfig.size(); i++) - { - int moduleCount = systemModuleTypeConfig.moduletypeconfig[i].ModuleCount; - - if ( moduleCount == 0 ) - // skip of no modules configured - continue; - - DeviceNetworkList::iterator pt = systemModuleTypeConfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for ( ; pt != systemModuleTypeConfig.moduletypeconfig[i].ModuleNetworkList.end() ; pt++) - { - DeviceNetworkConfig devicenetworkconfig; - HostConfig hostconfig; - - devicenetworkconfig.DeviceName = (*pt).DeviceName; - - HostConfigList::iterator pt1 = (*pt).hostConfigList.begin(); - - for ( ; pt1 != (*pt).hostConfigList.end() ; pt1++ ) - { - hostconfig.IPAddr = (*pt1).IPAddr; - hostconfig.HostName = (*pt1).HostName; - devicenetworkconfig.hostConfigList.push_back(hostconfig); - } - - moduletypeconfig.ModuleNetworkList.push_back(devicenetworkconfig); - } - } - - // now loop and wait for 5 minutes for all configured Instances to be running - // like after a reboot - bool startFail = false; - - for ( int time = 0 ; time < 30 ; time++ ) - { - startFail = false; - DeviceNetworkList::iterator pt = moduletypeconfig.ModuleNetworkList.begin(); - - for ( ; pt != moduletypeconfig.ModuleNetworkList.end() ; pt++) - { - string moduleName = (*pt).DeviceName; - - // get all ips if parent oam - // get just parent and local if not parent oam - MonitorConfig currentConfig; - - if ( config.moduleName() == currentConfig.OAMParentName() || - moduleName == config.moduleName() || - moduleName == currentConfig.OAMParentName() ) - { - HostConfigList::iterator pt1 = (*pt).hostConfigList.begin(); - - for ( ; pt1 != (*pt).hostConfigList.end() ; pt1++) - { - string IPAddr = (*pt1).IPAddr; - string instanceID = (*pt1).HostName; - - log.writeLog(__LINE__, "getEC2InstanceIpAddress called to get status for Module '" + moduleName + "' / Instance " + instanceID, LOG_TYPE_DEBUG); - string currentIPAddr = oam.getEC2InstanceIpAddress(instanceID); - - if (currentIPAddr == "stopped") - { - log.writeLog(__LINE__, "Module '" + moduleName + "' / Instance '" + instanceID + "' not running", LOG_TYPE_WARNING); - startFail = true; - } - else - { - if (currentIPAddr == "terminated") - { - log.writeLog(__LINE__, "Module '" + moduleName + "' / Instance '" + instanceID + "' has no Private IP Address Assigned, system failed to start", LOG_TYPE_CRITICAL); - startFail = true; - break; - } - else - { - if ( currentIPAddr != IPAddr ) - { - log.writeLog(__LINE__, "Module is Running: '" + moduleName + "' / Instance '" + instanceID + "' current IP being reconfigured in Columnstore.xml. old = " + IPAddr + ", new = " + currentIPAddr, LOG_TYPE_DEBUG); - - // update the Columnstore.xml with the new IP Address - string cmd = "sed -i s/" + IPAddr + "/" + currentIPAddr + "/g " + MCSSYSCONFDIR + "/columnstore/Columnstore.xml"; - system(cmd.c_str()); - } - else - log.writeLog(__LINE__, "Module is Running: '" + moduleName + "' / Instance '" + instanceID + "' current IP didn't change.", LOG_TYPE_DEBUG); - } - } - - //set Elastic IP Address, if configured - if (AmazonElasticIPCount > 0) - { - bool found = false; - int id = 1; - - for ( ; id < AmazonElasticIPCount + 1 ; id++ ) - { - string AmazonElasticModule = "AmazonElasticModule" + oam.itoa(id); - string ELmoduleName; - string AmazonElasticIPAddr = "AmazonElasticIPAddr" + oam.itoa(id); - string ELIPaddress; - - try - { - oam.getSystemConfig(AmazonElasticModule, ELmoduleName); - oam.getSystemConfig(AmazonElasticIPAddr, ELIPaddress); - } - catch (...) {} - - if ( ELmoduleName == moduleName ) - { - found = true; - - try - { - oam.assignElasticIP(instanceID, ELIPaddress); - log.writeLog(__LINE__, "Assign Elastic IP Address : '" + moduleName + "' / '" + ELIPaddress, LOG_TYPE_DEBUG); - } - catch (...) - { - log.writeLog(__LINE__, "Assign Elastic IP Address failed : '" + moduleName + "' / '" + ELIPaddress, LOG_TYPE_ERROR); - break; - } - break; - } - - if (found) - break; - } - } - } - } - } - - //continue when no instances are stopped - if (!startFail) - break; - - sleep(10); - } - - //check if an instance is stopped, exit out... - if (startFail) - { - log.writeLog(__LINE__, "A configured Instance isn't running. Check warning log", LOG_TYPE_CRITICAL); - } - - log.writeLog(__LINE__, "amazonIPCheck function successfully completed", LOG_TYPE_DEBUG); - - return true; - -} - -/****************************************************************************************** -* @brief amazonVolumeCheck -* -* purpose: check and setups Amazon EC2 Volume mounts -* -******************************************************************************************/ -bool ProcessMonitor::amazonVolumeCheck(int dbrootID) -{ - MonitorLog log; - Oam oam; - - { - log.writeLog(__LINE__, "amazonVolumeCheck function called for DBRoot" + oam.itoa(dbrootID), LOG_TYPE_DEBUG); - - string volumeNameID = "PMVolumeName" + oam.itoa(dbrootID); - string volumeName = oam::UnassignedName; - string deviceNameID = "PMVolumeDeviceName" + oam.itoa(dbrootID); - string deviceName = oam::UnassignedName; - - try - { - oam.getSystemConfig( volumeNameID, volumeName); - oam.getSystemConfig( deviceNameID, deviceName); - } - catch (...) - {} - - if ( volumeName.empty() || volumeName == oam::UnassignedName ) - { - log.writeLog(__LINE__, "amazonVolumeCheck function exiting, no volume assigned to DBRoot " + oam.itoa(dbrootID), LOG_TYPE_WARNING); - return false; - } - - string status = oam.getEC2VolumeStatus(volumeName); - log.writeLog(__LINE__, "amazonVolumeCheck: getEC2VolumeStatus: " + status, LOG_TYPE_DEBUG); - - - if ( status == "attached" ) - { - log.writeLog(__LINE__, "amazonVolumeCheck function successfully completed, volume attached: " + volumeName, LOG_TYPE_DEBUG); - return true; - } - - if ( status != "available" ) - { - log.writeLog(__LINE__, "amazonVolumeCheck function failed, volume not attached and not available: " + volumeName, LOG_TYPE_WARNING); - return false; - } - else - { - //get Module HostName / InstanceName - string instanceName; - - try - { - ModuleConfig moduleconfig; - oam.getSystemConfig(config.moduleName(), moduleconfig); - HostConfigList::iterator pt1 = moduleconfig.hostConfigList.begin(); - instanceName = (*pt1).HostName; - } - catch (...) - {} - - if (oam.attachEC2Volume(volumeName, deviceName, instanceName)) - { - log.writeLog(__LINE__, "amazonVolumeCheck function , volume to attached: " + volumeName, LOG_TYPE_DEBUG); - - string cmd = SUDO + "mount /var/lib/columnstore/data" + oam.itoa(dbrootID) + " > /dev/null"; - - system(cmd.c_str()); - log.writeLog(__LINE__, "amazonVolumeCheck function , volume to mounted: " + volumeName, LOG_TYPE_DEBUG); - - cmd = SUDO + "chown -R " + USER + ":" + USER + " /var/lib/columnstore/data" + oam.itoa(dbrootID); - system(cmd.c_str()); - - return true; - } - else - { - log.writeLog(__LINE__, "amazonVolumeCheck function failed, volume failed to attached: " + volumeName, LOG_TYPE_CRITICAL); - return false; - } - } - } - - log.writeLog(__LINE__, "amazonVolumeCheck function successfully completed", LOG_TYPE_DEBUG); - - return true; - -} - -/****************************************************************************************** -* @brief unmountExtraDBroots -* -* purpose: unmount Extra DBroots which were left mounted during a move -* -* -******************************************************************************************/ -void ProcessMonitor::unmountExtraDBroots() -{ - MonitorLog log; - ModuleConfig moduleconfig; - Oam oam; - - string DBRootStorageType = "internal"; - - try - { - oam.getSystemConfig("DBRootStorageType", DBRootStorageType); - - if ( DBRootStorageType == "hdfs" || DBRootStorageType == "storagemanager" || - ( DBRootStorageType == "internal" && DataRedundancyConfig == "n") ) - return; - } - catch (...) {} - -// if (DataRedundancyConfig == "y") -// return; - - try - { - systemStorageInfo_t t; - t = oam.getStorageConfig(); - - if ( boost::get<1>(t) == 0 ) - { - return; - } - - DeviceDBRootList moduledbrootlist = boost::get<2>(t); - - //Flush the cache - cacheutils::flushPrimProcCache(); - cacheutils::dropPrimProcFdCache(); - flushInodeCache(); - - DeviceDBRootList::iterator pt = moduledbrootlist.begin(); - - for ( ; pt != moduledbrootlist.end() ; pt++) - { - int moduleID = (*pt).DeviceID; - - DBRootConfigList::iterator pt1 = (*pt).dbrootConfigList.begin(); - - for ( ; pt1 != (*pt).dbrootConfigList.end() ; pt1++) - { - int id = *pt1; - - if (config.moduleID() != moduleID) - { - if ( DataRedundancyConfig == "n" ) - { - string cmd = SUDO + "umount /var/lib/columnstore/data" + oam.itoa(id) + " > /dev/null 2>&1"; - system(cmd.c_str()); - } - else - { - try - { - int ret = glusterUnassign(oam.itoa(id)); - - if ( ret != 0 ) - log.writeLog(__LINE__, "Error unassigning gluster dbroot# " + oam.itoa(id), LOG_TYPE_ERROR); - else - log.writeLog(__LINE__, "Gluster unassign gluster dbroot# " + oam.itoa(id)); - } - catch (...) - { - log.writeLog(__LINE__, "Exception unassigning gluster dbroot# " + oam.itoa(id), LOG_TYPE_ERROR); - } - } - } - } - } - } - catch (...) - {} - - log.writeLog(__LINE__, "unmountExtraDBroots finished ", LOG_TYPE_DEBUG); - - - return; -} - -/****************************************************************************************** -* @brief checkDataMount -* -* purpose: Check Data Mounts -* -* -******************************************************************************************/ -int ProcessMonitor::checkDataMount() -{ - MonitorLog log; - ModuleConfig moduleconfig; - Oam oam; - - //check/update the pmMount files - - string DBRootStorageType = "internal"; - vector dbrootList; - - for ( int retry = 0 ; retry < 10 ; retry++) - { - try - { - systemStorageInfo_t t; - t = oam.getStorageConfig(); - - if ( boost::get<1>(t) == 0 ) - { - log.writeLog(__LINE__, "getStorageConfig return: No dbroots are configured in Columnstore.xml file", LOG_TYPE_WARNING); - return API_INVALID_PARAMETER; - } - - DeviceDBRootList moduledbrootlist = boost::get<2>(t); - - DeviceDBRootList::iterator pt = moduledbrootlist.begin(); - - for ( ; pt != moduledbrootlist.end() ; pt++) - { - int moduleID = (*pt).DeviceID; - - DBRootConfigList::iterator pt1 = (*pt).dbrootConfigList.begin(); - - for ( ; pt1 != (*pt).dbrootConfigList.end() ; pt1++) - { - if (config.moduleID() == moduleID) - { - dbrootList.push_back(oam.itoa(*pt1)); - } - } - } - - break; - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "EXCEPTION ERROR on getStorageConfig: " + error, LOG_TYPE_ERROR); - sleep (1); - } - catch (...) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on getStorageConfig: Caught unknown exception!", LOG_TYPE_ERROR); - sleep (1); - } - } - - try - { - oam.getSystemConfig("DBRootStorageType", DBRootStorageType); - } - catch (...) {} - - //asign DBRoot is gluster - if (DataRedundancyConfig == "y") - { - vector::iterator p = dbrootList.begin(); - - while ( p != dbrootList.end() ) - { - string dbrootID = *p; - p++; - - try - { - int ret = glusterAssign(dbrootID); - - if ( ret != 0 ) - log.writeLog(__LINE__, "Error assigning gluster dbroot# " + dbrootID, LOG_TYPE_ERROR); - else - log.writeLog(__LINE__, "Gluster assign gluster dbroot# " + dbrootID); - } - catch (...) - { - log.writeLog(__LINE__, "Exception assigning gluster dbroot# " + dbrootID, LOG_TYPE_ERROR); - } - } - } - - if ( dbrootList.size() == 0 ) - { - log.writeLog(__LINE__, "No dbroots are configured in Columnstore.xml file", LOG_TYPE_WARNING); - return API_INVALID_PARAMETER; - } - - if ( DBRootStorageType == "hdfs" || - (DBRootStorageType == "internal" && DataRedundancyConfig == "n") ) - { - //create OAM-Test-Flag - vector::iterator p = dbrootList.begin(); - - while ( p != dbrootList.end() ) - { - string dbroot = "/var/lib/columnstore/data" + *p; - p++; - - string fileName = dbroot + "/OAMdbrootCheck"; - ofstream fout(fileName.c_str()); - - if (!fout) - { - log.writeLog(__LINE__, "ERROR: Failed test write to DBRoot: " + dbroot, LOG_TYPE_ERROR); - - return API_FAILURE; - } - } - - return API_SUCCESS; - } - else if (DBRootStorageType == "storagemanager") - { - /* StorageManager isn't running yet. Can't check for writability here. */ - return API_SUCCESS; - } - //go unmount disk NOT assigned to this pm - unmountExtraDBroots(); - - //Flush the cache - cacheutils::flushPrimProcCache(); - cacheutils::dropPrimProcFdCache(); - flushInodeCache(); - - //external or gluster - vector::iterator p = dbrootList.begin(); - - while ( p != dbrootList.end() ) - { - string dbroot = "/var/lib/columnstore/data" + *p; - string fileName = dbroot + "/OAMdbrootCheck"; - - if ( DataRedundancyConfig == "n" ) - { - //remove any local check flag for starters - string cmd = SUDO + "umount " + dbroot + " > " + tmpLogDir + "/umount.log 2>&1"; - system(cmd.c_str()); - - unlink(fileName.c_str()); - - // check if already/still mounted, skip if so - cmd = "grep " + dbroot + " /proc/mounts > /dev/null 2>&1"; - - int status = system(cmd.c_str()); - - if (WEXITSTATUS(status) != 0 ) - { - // not mounted, mount - string mountLog = tmpLogDir + "/mount.log"; - cmd = "export LC_ALL=C;" + SUDO + "mount " + dbroot + " > " + mountLog + " 2>&1"; - system(cmd.c_str()); - - ifstream in(mountLog.c_str()); - - in.seekg(0, std::ios::end); - int size = in.tellg(); - - if ( size != 0 ) - { - if (!oam.checkLogStatus(mountLog, "already")) - { - log.writeLog(__LINE__, "checkDataMount: mount failed, DBRoot: " + dbroot, LOG_TYPE_ERROR); - - try - { - oam.setDbrootStatus(*p, oam::AUTO_OFFLINE); - } - catch (exception& ex) - {} - - return API_FAILURE; - } - } - } - - if ( !rootUser) - { - cmd = SUDO + "chown -R " + USER + ":" + USER + " " + dbroot + " > /dev/null 2>&1"; - system(cmd.c_str()); - } - - log.writeLog(__LINE__, "checkDataMount: successfull mount " + dbroot, LOG_TYPE_DEBUG); - } - - //create OAM-Test-Flag check rw mount - ofstream fout(fileName.c_str()); - - if (!fout) - { - log.writeLog(__LINE__, "ERROR: Failed test write to DBRoot: " + dbroot, LOG_TYPE_ERROR); - - try - { - oam.setDbrootStatus(*p, oam::AUTO_OFFLINE); - } - catch (exception& ex) - {} - - return API_FAILURE; - } - - try - { - oam.setDbrootStatus(*p, oam::ACTIVE); - } - catch (exception& ex) - {} - - p++; - } - - return API_SUCCESS; -} - - -/****************************************************************************************** -* @brief calTotalUmMemory -* -* purpose: Calculate TotalUmMemory -* -* -******************************************************************************************/ -void ProcessMonitor::calTotalUmMemory() -{ - MonitorLog log; - Oam oam; - - struct sysinfo myinfo; - - //check/update the pmMount files - - try - { - sysinfo(&myinfo); - } - catch (...) - { - return; - } - - //get memory stats - long long total = myinfo.totalram / 1024 / 1000; - - // adjust max memory, 25% of total memory - string value; - - if ( total <= 2000 ) - value = "256M"; - else if ( total <= 4000 ) - value = "512M"; - else if ( total <= 8000 ) - value = "1G"; - else if ( total <= 16000 ) - value = "2G"; - else if ( total <= 32000 ) - value = "4G"; - else if ( total <= 64000 ) - value = "8G"; - else - value = "16G"; - - try - { - Config* sysConfig = Config::makeConfig(); - sysConfig->setConfig("HashJoin", "TotalUmMemory", value); - - //update Calpont Config table - try - { - sysConfig->write(); - } - catch (...) - { - log.writeLog(__LINE__, "ERROR: sysConfig->write", LOG_TYPE_ERROR); - return; - } - - log.writeLog(__LINE__, "set TotalUmMemory to " + value, LOG_TYPE_DEBUG); - } - catch (...) - { - log.writeLog(__LINE__, "Failed to set TotalUmMemory to " + value, LOG_TYPE_ERROR); - } - - return; - -} - -/****************************************************************************************** -* @brief flushInodeCache -* -* purpose: flush cache -* -* -******************************************************************************************/ -void ProcessMonitor::flushInodeCache() -{ - int fd; - ByteStream reply; - -#ifdef __linux__ - fd = open("/proc/sys/vm/drop_caches", O_WRONLY); - - if (fd >= 0) - { - if (write(fd, "3\n", 2) == 2) - { - log.writeLog(__LINE__, "flushInodeCache successful", LOG_TYPE_DEBUG); - } - else - { - log.writeLog(__LINE__, "flushInodeCache failed", LOG_TYPE_DEBUG); - } - - close(fd); - } - else - { - log.writeLog(__LINE__, "flushInodeCache failed to open file", LOG_TYPE_DEBUG); - } - -#endif -} - -/****************************************************************************************** -* @brief glusterAssign -* -* purpose: Gluster Assign DBroot on local module -* -* -******************************************************************************************/ -int ProcessMonitor::glusterAssign(std::string dbrootID) -{ - Oam oam; - Config* sysConfig = Config::makeConfig(); - string command; - std::string errmsg = ""; - - log.writeLog(__LINE__, "glusterAssign called : " + dbrootID, LOG_TYPE_DEBUG); - - string pmid = oam.itoa(config.moduleID()); - string dataDupIPaddr = "ModuleIPAddr" + pmid + "-1-3"; - string moduleIPAddr = sysConfig->getConfig("DataRedundancyConfig", dataDupIPaddr); - - if (moduleIPAddr.empty() || moduleIPAddr == oam::UnassignedIpAddr) - { - moduleIPAddr = sysConfig->getConfig("SystemModuleConfig", dataDupIPaddr); - } - - string tmpLog = tmpLogDir + "/glusterAssign.log"; - command = SUDO + "mount -tglusterfs -odirect-io-mode=enable " + moduleIPAddr + ":/dbroot" + dbrootID + " /var/lib/columnstore/data" + dbrootID + " > " + tmpLog + " 2>&1"; - - int ret = system(command.c_str()); - - if ( WEXITSTATUS(ret) != 0 ) - { - //log.writeLog(__LINE__, "glusterAssign mount failure: dbroot: " + dbrootID + " error: " + oam.itoa(WEXITSTATUS(ret)), LOG_TYPE_ERROR); - - ifstream in(tmpLog.c_str()); - in.seekg(0, std::ios::end); - int size = in.tellg(); - - if ( size != 0 ) - { - if (!oam.checkLogStatus(tmpLog, "already")) - { - log.writeLog(__LINE__, "glusterAssign failed.", LOG_TYPE_ERROR); - string cmd = "mv -f " + tmpLog + " " + tmpLog + "failed"; - system(cmd.c_str()); - return oam::API_FAILURE; - } - } - } - - return oam::API_SUCCESS; - -} - -/****************************************************************************************** -* @brief glusterAssign -* -* purpose: Gluster Assign DBroot on local module -* -* -******************************************************************************************/ -int ProcessMonitor::glusterUnassign(std::string dbrootID) -{ - Oam oam; - string command; - std::string errmsg = ""; - - log.writeLog(__LINE__, "glusterUnassign called: " + dbrootID, LOG_TYPE_DEBUG); - - string tmpLog = tmpLogDir + "/glusterUnassign.log"; - - command = SUDO + "umount -f /var/lib/columnstore/data" + dbrootID + " > " + tmpLog + " 2>&1"; - - int ret = system(command.c_str()); - - if ( WEXITSTATUS(ret) != 0 ) - { - //log.writeLog(__LINE__, "glusterUnassign mount failure: dbroot: " + dbrootID + " error: " + oam.itoa(WEXITSTATUS(ret)), LOG_TYPE_ERROR); - - ifstream in(tmpLog.c_str()); - in.seekg(0, std::ios::end); - int size = in.tellg(); - - if ( size != 0 ) - { - if (!oam.checkLogStatus(tmpLog, "not mounted")) - { - log.writeLog(__LINE__, "glusterUnassign failed.", LOG_TYPE_ERROR); - - string cmd = "mv -f " + tmpLog + " " + tmpLog + "failed"; - system(cmd.c_str()); - return oam::API_FAILURE; - } - } - } - - return oam::API_SUCCESS; -} - - -int ProcessMonitor::syncFS() -{ - Oam oam; - - string DBRMroot; - oam.getSystemConfig("DBRMRoot", DBRMroot); - - string currentFileName = DBRMroot + "_current"; - IDBFileSystem &fs = IDBPolicy::getFs(currentFileName.c_str()); - bool success = fs.filesystemSync(); - if (!success) - return oam::API_FAILURE; - return oam::API_SUCCESS; -} - -} //end of namespace -// vim:ts=4 sw=4: - diff --git a/procmon/processmonitor.h b/procmon/processmonitor.h deleted file mode 100644 index 57e90b1f4..000000000 --- a/procmon/processmonitor.h +++ /dev/null @@ -1,563 +0,0 @@ -/* Copyright (C) 2014 InfiniDB, Inc. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; version 2 of - the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301, USA. */ - -/*************************************************************************** -* $Id: processmonitor.h 1993 2013-04-04 18:33:48Z rdempsey $ -* - ***************************************************************************/ - - -#ifndef _PROCESSMONITOR_H_ -#define _PROCESSMONITOR_H_ - -#include -#include -#include -#include - -#include "liboamcpp.h" -#include "shmkeys.h" -#include "alarmglobal.h" -#include "socketclosed.h" - -namespace processmonitor -{ - -#define INIT_SUPPORT 0 // 0 = 'INIT -0' COMMAND COMPILED IN, 1 = 'INIT -0' COMMANDS COMPILED OUT - -#define MAXARGUMENTS 15 -#define MAXDEPENDANCY 6 - -/** - * @brief processStructure Process Config Structure - */ -typedef struct processStructure -{ - std::string ProcessModuleType; - std::string ProcessName; - std::string ProcessLocation; - std::string ProcessArgs[MAXARGUMENTS]; - time_t currentTime; - uint16_t launchID; - pid_t processID; - uint16_t state; - uint16_t BootLaunch; - uint16_t dieCounter; - std::string RunType; - std::string DepProcessName[MAXDEPENDANCY]; - std::string DepModuleName[MAXDEPENDANCY]; - std::string LogFile; -} processInfo; - -typedef std::vector processList; - - -/** - * @brief shmProcessStatus Process Status Structure - */ -struct shmProcessStatus -{ - pid_t ProcessID; //!< Process ID number - char StateChangeDate[24]; //!< Last time/date state change - uint16_t ProcessOpState; //!< Process Operational State -}; - -/** - * @brief processStatus Process Status Structure - */ -typedef struct processStatus -{ - std::string ProcessName; - std::string ModuleName; - uint16_t tableIndex; -} processstatus; - -typedef std::vector processStatusList; - -/** - * @brief shmDeviceStatus Status Structure - */ -#define NAMESIZE 128 -#define DATESIZE 24 - -struct shmDeviceStatus -{ - char Name[NAMESIZE]; //!< Device Name - char StateChangeDate[DATESIZE]; //!< Last time/date state change - uint16_t OpState; //!< Device Operational State -}; - - -/** - * @brief MonitorConfig class builds a list of process it starts - * - */ -class MonitorConfig -{ -public: - /** - * @brief constructor - * - */ - MonitorConfig(); - /** - * @brief destructor - * - */ - ~MonitorConfig(); - - /** - * moduleName accessor - */ - const std::string& moduleName() const - { - return flocalModuleName; - } - - /** - * moduleType accessor - */ - const std::string& moduleType() const - { - return flocalModuleType; - } - - /** - * moduleName accessor - */ - const uint16_t& moduleID() const - { - return flocalModuleID; - } - - /** - * parentName accessor - */ - const std::string& OAMParentName() const - { - return fOAMParentModuleName; - } - - /** - * parentFlag accessor - */ - const bool& OAMParentFlag() const - { - return fOAMParentModuleFlag; - } - - /** - * ServerInstallType accessor - */ - const uint16_t& ServerInstallType() const - { - return fserverInstallType; - } - - /** - * StandbyName accessor - */ - const std::string& OAMStandbyName() const - { - return fOAMStandbyModuleName; - } - - /** - * standbyParentFlag accessor - */ - const bool& OAMStandbyParentFlag() const - { - return fOAMStandbyModuleFlag; - } - - /** - * SoftwareVersion accessor - */ - const std::string& SoftwareVersion() const - { - return fsoftwareVersion; - } - - /** - * SoftwareRelease accessor - */ - const std::string& SoftwareRelease() const - { - return fsoftwareRelease; - } - - /** - * Build a list of processes the monitor started - */ - void buildList(std::string ProcessModuleType, - std::string processName, - std::string ProcessLocation, - std::string arg_list[MAXARGUMENTS], - uint16_t launchID, - pid_t processID, - uint16_t state, - uint16_t BootLaunch, - std::string RunType, - std::string DepProcessName[MAXDEPENDANCY], - std::string DepModuleName[MAXDEPENDANCY], - std::string logFile); - - /** - * return the process list - */ - processList* monitoredListPtr(); - - /** - * return the process ID - */ - - void findProcessInfo (pid_t processID, processInfo& info); - - /** - * @brief copy constructor - * - */ - MonitorConfig(const MonitorConfig& rhs); - /** - * @brief copy assignment operator - * - */ - MonitorConfig& operator=(const MonitorConfig& rhs); - - -private: - std::string flocalModuleName; - std::string flocalModuleType; - uint16_t flocalModuleID; - std::string fOAMParentModuleName; - bool fOAMParentModuleFlag; - uint16_t fserverInstallType; - std::string fOAMStandbyModuleName; - bool fOAMStandbyModuleFlag; - - processList fmonitoredListPtr; - std::string fsoftwareVersion; - std::string fsoftwareRelease; -}; - -/** - * @brief MonitorLog class logs the activities between Process Monitor - * and its child processes for debugging purpose. - */ -class MonitorLog -{ -public: - /** - * Constructor:open the log file for writing - */ - MonitorLog(); - - /** - * Destructor:close the log file - */ - ~MonitorLog(); - - /** - * @brief Write the message to the log - */ - void writeLog(const int lineNumber, const std::string logContent, const logging::LOG_TYPE logType = logging::LOG_TYPE_INFO); - - /** - * @brief Write the message to the log - */ - void writeLog(const int lineNumber, const int logContent, const logging::LOG_TYPE logType = logging::LOG_TYPE_INFO); - - -private: - std::ofstream logFile; - /** - * @brief copy constructor - * - */ - MonitorLog(const MonitorLog& rhs); - /** - * @brief copy assignment operator - * - */ - MonitorLog& operator=(const MonitorLog& rhs); - -}; - - -/** - * @brief ProcessMonitor class takes requests from Process Manager for - * starting, stopping, and restarting processes. It monitors the processes it started, - * logs its events, and restarts the died processes. - */ -class ProcessMonitor -{ -public: - /** - * Constructor - */ - ProcessMonitor(MonitorConfig& config, MonitorLog& log); - - /** - * Default Destructor - */ - ~ProcessMonitor(); - /** - * Start a process - */ - pid_t startProcess(std::string processModuleType, std::string processName, std::string processLocation, - std::string arg_list[MAXARGUMENTS], uint16_t launchID, uint16_t BootLaunch, - std::string RunType, std::string DepProcessName[MAXDEPENDANCY], - std::string DepModuleName[MAXDEPENDANCY], std::string LogFile, uint16_t startType, - uint16_t actIndicator = oam::FORCEFUL); - - /** - * get Alarm Data and send to requester - */ - int getAlarmData(messageqcpp::IOSocket mq, int type, std::string date); - - /** - * Stop a process - */ - int stopProcess(pid_t processID, std::string processName, std::string processLocation, int actionIndicator, bool manualFlag); - - /** - * Re-init a process - */ - int reinitProcess(pid_t processID, std::string processName, int actionIndicator); - - /** - * Stop all processes started by this monitor - */ - int stopAllProcess(int actionIndicator); - - /** - * receive and process message - */ - void processMessage(messageqcpp::ByteStream msg, messageqcpp::IOSocket mq); - - /** - * send message to the monitored process or the process manager - */ - int sendMessage(const std::string& toWho, const std::string& message); - - /** - * check child process heart beat - */ - int checkHeartBeat(const std::string processName); - - /** - * send a trap and log the process information - */ - void sendAlarm(std::string alarmItem, oam::ALARMS alarmID, int action); - - /** - *@brief update process disk state - */ - bool updateProcessInfo(std::string processName, int state, pid_t pid); - - /** - *@brief update log configuration - */ - int updateLog(std::string action, std::string level); - - /** - *@brief get log configuration - */ - int getConfigLog(); - - /** - *@brief change Log File priviledges - */ - void changeModLog(); - - /** - *@brief check Power-On Test results - */ - void checkPowerOnResults(); - - /** - *@brief update Config - */ - int updateConfig(); - - /** - *@brief build System Tables - */ - int buildSystemTables(); - - /** - *@brief configure Module fucntionality - */ - int configureModule(std::string configureModuleName); - - /** - *@brief reconfigure Module fucntionality - */ - int reconfigureModule(std::string reconfigureModuleName); - - /** - *@brief check Single Process State - */ - int checkSpecialProcessState( std::string processName, std::string runType, std::string processModuleType ); - - /** - *@brief Check if Mate Module is Active - */ - int checkMateModuleState(); - - /** - *@brief Create the Calpont Data directories - */ - - int createDataDirs(std::string cloud); - - /** - *@brief Process Restarted, inform Process Mgr - */ - - int processRestarted( std::string processName, bool manual = true ); - - /** - *@brief update Core Dump configuration - */ - int updateCore(std::string action); - - /** - *@brief Remove XM ProcMon setup - */ - int removeXMProcMon(); - - /** - *@brief unmount from associated system - */ - int umountSystem(); - - /** - *@brief Runs DB sanity test - */ - int runStartupTest(); - - /** - *@brief Update Calpont Config File - */ - int updateConfigFile(messageqcpp::ByteStream msg); - - int getDBRMdata(std::string *path); - - /** - *@brief Send Msg to Process Monitor - */ - std::string sendMsgProcMon1( std::string module, messageqcpp::ByteStream msg, int requestID ); - - /** - *@brief check if module failover is needed due to a process outage - */ - void checkModuleFailover(std::string processName); - - /** - *@brief run upgrade script - */ - int runUpgrade(); - - /** - *@brief change my.cnf - */ - int changeMyCnf(std::string type); - - - /** - *@brief run MariaDB Command Line script - */ - int runMariaDBCommandLine(std::string command); - - /** - *@brief run Master Replication script - */ - int runMasterRep(std::string& masterLogFile, std::string& masterLogPos); - - /** - *@brief run Master Distribution - */ - int runMasterDist(std::string& password, std::string& slaveModule); - - /** - *@brief run Slave Replication script - */ - int runSlaveRep(std::string& masterLogFile, std::string& masterLogPos); - - /** - *@brief run Disable Replication script - */ - - int runDisableRep(); - - /** - *@brief Amazon Instance and IP check - */ - bool amazonIPCheck(); - - /** - *@brief UnMOunt any extra dbroots - */ - void unmountExtraDBroots(); - - /** - *@brief Calculate TotalUmMemory - */ - void calTotalUmMemory(); - - /** - *@brief Amazon Volume check - */ - bool amazonVolumeCheck(int dbrootID = 0); - - /** - *@brief Check Data Mounts - */ - int checkDataMount(); - - /** @brief flush inode cache - */ - void flushInodeCache(); - - /** @brief glusterAssign - */ - int glusterAssign(std::string dbrootID); - - /** @brief glusterUnassign - */ - int glusterUnassign(std::string dbrootID); - - int syncFS(); - /** - * return the process list - */ -// processStatusList* statusListPtr(); - -// processStatusList fstatusListPtr; - -private: - /*need define copy ans assignment constructor */ - MonitorConfig& config; - MonitorLog& log; -}; - -} //end of namespace - - - -#endif // _PROCESSMONITOR_H_ diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index 5a50f6357..4df1fb652 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -6,7 +6,6 @@ add_subdirectory(getConfig) add_subdirectory(cplogger) add_subdirectory(clearShm) add_subdirectory(setConfig) -add_subdirectory(configMgt) add_subdirectory(viewtablelock) add_subdirectory(cleartablelock) add_subdirectory(ddlcleanup) diff --git a/tools/brmtest/CMakeLists.txt b/tools/brmtest/CMakeLists.txt deleted file mode 100644 index 4f2f5f1b0..000000000 --- a/tools/brmtest/CMakeLists.txt +++ /dev/null @@ -1,45 +0,0 @@ - -include_directories(${KDE4_INCLUDES} ${KDE4_INCLUDE_DIR} ${QT_INCLUDES} ) - - -########### next target ############### - -set(brmtest_SRCS brmtest.cpp locks.cpp) - -kde4_add_executable(brmtest ${brmtest_SRCS}) - -target_link_libraries(brmtest ${KDE4_KDECORE_LIBS} execplan joblist rowgroup dataconvert loggingcpp @boost_thread_lib@ brm messageqcpp rwlock configcpp xml2 joiner oamcpp snmpmanager @boost_filesystem_lib@ @boost_date_time_lib@ multicast funcexp) - -install(TARGETS brmtest ${INSTALL_TARGETS_DEFAULT_ARGS}) - - -########### install files ############### - - - - -#original Makefile.am contents follow: - -## $Id: Makefile.am 333 2009-04-03 20:35:04Z rdempsey $ -### Process this file with automake to produce Makefile.in -# -#AM_CPPFLAGS = $(idb_cppflags) -#AM_CFLAGS = $(idb_cflags) -#AM_CXXFLAGS = $(idb_cxxflags) -#AM_LDFLAGS = $(idb_ldflags) -#bin_PROGRAMS = brmtest -#brmtest_SOURCES = brmtest.cpp locks.cpp -#brmtest_CPPFLAGS = -I@prefix@/Calpont/include -I/usr/include/libxml2 $(AM_CPPFLAGS) -#brmtest_LDFLAGS = @idb_common_ldflags@ -lexecplan -ljoblist -lrowgroup -ldataconvert -lloggingcpp -l@boost_thread_lib@ -lbrm -lmessageqcpp -lrwlock -lconfigcpp -lxml2 -ljoiner \ -#-loamcpp -lsnmpmanager -l@boost_filesystem_lib@ -l@boost_date_time_lib@ @netsnmp_libs@ -lmulticast -lfuncexp $(AM_LDFLAGS) -# -#test: -# -#coverage: -# -#leakcheck: -# -#docs: -# -#bootstrap: install-data-am -# diff --git a/tools/brmtest/brmtest.cpp b/tools/brmtest/brmtest.cpp deleted file mode 100644 index 6162c892d..000000000 --- a/tools/brmtest/brmtest.cpp +++ /dev/null @@ -1,680 +0,0 @@ -/* -* $Id: brmtest.cpp 1739 2012-03-22 12:57:59Z pleblanc $ -*/ - -#include -#include -#include -#include -#include -using namespace std; - -#include -#include -#include - -#include "calpontsystemcatalog.h" -using namespace execplan; - -#include "extentmap.h" -#include "blockresolutionmanager.h" -using namespace BRM; - -#include "configcpp.h" -using namespace config; - -extern int query_locks(); -extern int reset_locks(); - -namespace -{ - -boost::shared_ptr Cat; - -void LBIDList(CalpontSystemCatalog::OID OID, ostringstream& cout_str) -{ - LBIDRange_v LBIDRanges; - - LBIDRange_v::size_type RangeCount; - ExtentMap em; - LBIDRange_v::size_type i; - LBIDRange LBIDR; - - cout_str << "HWM = " << em.getHWM(OID) << endl; - - em.lookup(OID, LBIDRanges); - RangeCount = LBIDRanges.size(); - idbassert(RangeCount < 1000); - - for (i = 0; i < RangeCount; i++) - { - LBIDR = LBIDRanges[i]; - cout_str << LBIDR.start << " - " << (LBIDR.start + LBIDR.size - 1) << " (" << LBIDR.size << ')'; - int64_t max = -1, min = -1; - int32_t seqNum = 0; - em.getMaxMin(LBIDR.start, max, min, seqNum); - cout_str << " min: " << min << ", max: " << max << ", seqNum: " << seqNum << endl; - } - - cout_str << endl; -} - -void doit(const CalpontSystemCatalog::TableColName& tcn, ostringstream& cout_str) -{ - CalpontSystemCatalog::OID OID; - - try - { - OID = Cat->columnRID(tcn).objnum; - } - catch (...) - { - cout_str << tcn.schema << '.' << tcn.table << '.' << tcn.column << ": OID not found" << endl; - return; - } - - if (OID > 0) - { - - CalpontSystemCatalog::OID ioid = Cat->lookupOID(tcn); - CalpontSystemCatalog::ColType CT = Cat->colType(ioid); - - CalpontSystemCatalog::DictOID DOID = CT.ddn; - - int DictOID = DOID.dictOID; - int ListOID = DOID.listOID; - int TreeOID = DOID.treeOID; - - cout_str << tcn.schema << '.' << tcn.table << '.' << tcn.column << ": OID = " << OID << endl; - - try - { - LBIDList(OID, cout_str); - } - catch (exception& ex) - { - cerr << ex.what() << endl; - } - - if (DictOID > 0) - { - cout_str << tcn.schema << '.' << tcn.table << '.' << tcn.column << ": DictOID = " << DictOID << endl; - - try - { - LBIDList(DictOID, cout_str); - } - catch (exception& ex) - { - cerr << ex.what() << endl; - } - } - - if (ListOID > 0) - { - cout_str << tcn.schema << '.' << tcn.table << '.' << tcn.column << ": DictListOID = " << ListOID << endl; - - try - { - LBIDList(ListOID, cout_str); - } - catch (exception& ex) - { - cerr << ex.what() << endl; - } - } - - if (TreeOID > 0) - { - cout_str << tcn.schema << '.' << tcn.table << '.' << tcn.column << ": DictTreeOID = " << TreeOID << endl; - - try - { - LBIDList(TreeOID, cout_str); - } - catch (exception& ex) - { - cerr << ex.what() << endl; - } - } - - CalpontSystemCatalog::IndexOID IOID = Cat->lookupIndexNbr(tcn); - - if (IOID.objnum > 0) - { - cout_str << tcn.schema << '.' << tcn.table << '.' << tcn.column << ": IndexOID = " << IOID.objnum << endl; - - try - { - LBIDList(IOID.objnum, cout_str); - } - catch (exception& ex) - { - cerr << ex.what() << endl; - } - } - - if (IOID.listOID > 0) - { - cout_str << tcn.schema << '.' << tcn.table << '.' << tcn.column << ": IndexLstOID = " << IOID.listOID << endl; - - try - { - LBIDList(IOID.listOID, cout_str); - } - catch (exception& ex) - { - cerr << ex.what() << endl; - } - } - } - else - { - cout_str << tcn.schema << '.' << tcn.table << '.' << tcn.column << ": OID was zero!" << endl; - } -} - -void usage(ostringstream& cout_str) -{ - cout_str << "usage: brmtest [-hrts] [-l LBID] [-c schema]" << endl; - cout_str << "\t-h display this help" << endl; - cout_str << "\t-r reset brm locks" << endl; - cout_str << "\t-l LBID display info about LBID" << endl; - cout_str << "\t-t dump TPC-H tables" << endl; - cout_str << "\t-s don't dump system catalog" << endl; - cout_str << "\t-c schema seach for TPC-H tables in schema" << endl; -} - -} - -int main(int argc, char** argv) -{ - int c; - - bool rflg = false; - bool lflg = false; - bool tflg = false; - bool sflg = true; - bool qflg = false; - - opterr = 0; - - uint64_t lbid = 0; - - ostringstream cout_str; - ostringstream cerr_str; - - string schema("tpch"); - - while ((c = getopt(argc, argv, "hrl:tsc:q")) != EOF) - switch (c) - { - case 'r': - rflg = true; - break; - - case 'l': - lflg = true; - lbid = strtoul(optarg, 0, 0); - break; - - case 'h': - usage(cout_str); - cerr << cout_str.str() << endl; - exit(0); - break; - - case 't': - tflg = true; - break; - - case 's': - sflg = false; - break; - - case 'c': - schema = optarg; - break; - - case 'q': - qflg = true; - break; - - default: - usage(cout_str); - cerr << cout_str.str() << endl; - exit(1); - break; - } - - if (rflg) - { - reset_locks(); - return 0; - } - - if (query_locks() != 0) - { - cerr << "BRM is locked!" << endl; - return 1; - } - - if (lflg) - { - BlockResolutionManager brm; - uint16_t ver = 0; - BRM::OID_t oid; - uint32_t fbo; - int rc; - rc = brm.lookup(lbid, ver, false, oid, fbo); - idbassert(rc == 0); - - if (qflg) - cout << oid << endl; - else - cout << "LBID " << lbid << " is part of OID " << oid << " at FBO " << fbo << endl; - - return 0; - } - - //Now, close out all output so we don't get any debug from PG/RA - int fd; - ::close(2); - ::close(1); - //fd = open("./brmtest.out", O_WRONLY|O_CREAT|O_TRUNC, 0666); - fd = open("/dev/null", O_WRONLY); - idbassert(fd >= 0); - - if (fd != 1) dup2(fd, 1); - - //fd = open("./brmtest.err", O_WRONLY|O_CREAT|O_TRUNC, 0666); - fd = open("/dev/null", O_WRONLY); - idbassert(fd >= 0); - - if (fd != 2) dup2(fd, 2); - - Cat = CalpontSystemCatalog::makeCalpontSystemCatalog(); - - fd = ::open("/dev/tty", O_WRONLY); - idbassert(fd >= 0); - - string status; - - if (tflg) - { - const string region("region"); - const string nation("nation"); - const string customer("customer"); - const string orders("orders"); - const string supplier("supplier"); - const string partsupp("partsupp"); - const string lineitem("lineitem"); - const string part("part"); - - status = "Reading REGION...\n"; - ::write(fd, status.c_str(), status.length()); - - doit(make_tcn(schema, region, "r_regionkey"), cout_str); - doit(make_tcn(schema, region, "r_name"), cout_str); - doit(make_tcn(schema, region, "r_comment"), cout_str); - - status = "Reading NATION...\n"; - ::write(fd, status.c_str(), status.length()); - - doit(make_tcn(schema, nation, "n_nationkey"), cout_str); - doit(make_tcn(schema, nation, "n_name"), cout_str); - doit(make_tcn(schema, nation, "n_regionkey"), cout_str); - doit(make_tcn(schema, nation, "n_comment"), cout_str); - - status = "Reading CUSTOMER...\n"; - ::write(fd, status.c_str(), status.length()); - - doit(make_tcn(schema, customer, "c_custkey"), cout_str); - doit(make_tcn(schema, customer, "c_name"), cout_str); - doit(make_tcn(schema, customer, "c_address"), cout_str); - doit(make_tcn(schema, customer, "c_nationkey"), cout_str); - doit(make_tcn(schema, customer, "c_phone"), cout_str); - doit(make_tcn(schema, customer, "c_acctbal"), cout_str); - doit(make_tcn(schema, customer, "c_mktsegment"), cout_str); - doit(make_tcn(schema, customer, "c_comment"), cout_str); - - status = "Reading ORDERS...\n"; - ::write(fd, status.c_str(), status.length()); - - doit(make_tcn(schema, orders, "o_orderkey"), cout_str); - doit(make_tcn(schema, orders, "o_custkey"), cout_str); - doit(make_tcn(schema, orders, "o_orderstatus"), cout_str); - doit(make_tcn(schema, orders, "o_totalprice"), cout_str); - doit(make_tcn(schema, orders, "o_orderdate"), cout_str); - doit(make_tcn(schema, orders, "o_orderpriority"), cout_str); - doit(make_tcn(schema, orders, "o_clerk"), cout_str); - doit(make_tcn(schema, orders, "o_shippriority"), cout_str); - doit(make_tcn(schema, orders, "o_comment"), cout_str); - - status = "Reading PART...\n"; - ::write(fd, status.c_str(), status.length()); - - doit(make_tcn(schema, part, "p_partkey"), cout_str); - doit(make_tcn(schema, part, "p_name"), cout_str); - doit(make_tcn(schema, part, "p_mfgr"), cout_str); - doit(make_tcn(schema, part, "p_brand"), cout_str); - doit(make_tcn(schema, part, "p_type"), cout_str); - doit(make_tcn(schema, part, "p_size"), cout_str); - doit(make_tcn(schema, part, "p_container"), cout_str); - doit(make_tcn(schema, part, "p_retailprice"), cout_str); - doit(make_tcn(schema, part, "p_comment"), cout_str); - - status = "Reading SUPPLIER...\n"; - ::write(fd, status.c_str(), status.length()); - - doit(make_tcn(schema, supplier, "s_suppkey"), cout_str); - doit(make_tcn(schema, supplier, "s_name"), cout_str); - doit(make_tcn(schema, supplier, "s_address"), cout_str); - doit(make_tcn(schema, supplier, "s_nationkey"), cout_str); - doit(make_tcn(schema, supplier, "s_phone"), cout_str); - doit(make_tcn(schema, supplier, "s_acctbal"), cout_str); - doit(make_tcn(schema, supplier, "s_comment"), cout_str); - - status = "Reading PARTSUPP...\n"; - ::write(fd, status.c_str(), status.length()); - - doit(make_tcn(schema, partsupp, "ps_partkey"), cout_str); - doit(make_tcn(schema, partsupp, "ps_suppkey"), cout_str); - doit(make_tcn(schema, partsupp, "ps_availqty"), cout_str); - doit(make_tcn(schema, partsupp, "ps_supplycost"), cout_str); - doit(make_tcn(schema, partsupp, "ps_comment"), cout_str); - - status = "Reading LINEITEM...\n"; - ::write(fd, status.c_str(), status.length()); - - doit(make_tcn(schema, lineitem, "l_orderkey"), cout_str); - doit(make_tcn(schema, lineitem, "l_linenumber"), cout_str); - doit(make_tcn(schema, lineitem, "l_partkey"), cout_str); - doit(make_tcn(schema, lineitem, "l_suppkey"), cout_str); - doit(make_tcn(schema, lineitem, "l_quantity"), cout_str); - doit(make_tcn(schema, lineitem, "l_extendedprice"), cout_str); - doit(make_tcn(schema, lineitem, "l_discount"), cout_str); - doit(make_tcn(schema, lineitem, "l_tax"), cout_str); - doit(make_tcn(schema, lineitem, "l_returnflag"), cout_str); - doit(make_tcn(schema, lineitem, "l_linestatus"), cout_str); - doit(make_tcn(schema, lineitem, "l_shipdate"), cout_str); - doit(make_tcn(schema, lineitem, "l_commitdate"), cout_str); - doit(make_tcn(schema, lineitem, "l_receiptdate"), cout_str); - doit(make_tcn(schema, lineitem, "l_shipinstruct"), cout_str); - doit(make_tcn(schema, lineitem, "l_shipmode"), cout_str); - doit(make_tcn(schema, lineitem, "l_comment"), cout_str); - } - - if (sflg) - { - status = "Reading CALPONTSYS...\n\n"; - ::write(fd, status.c_str(), status.length()); - - schema = CALPONT_SCHEMA; - string table = SYSCOLUMN_TABLE; - doit(make_tcn(schema, table, SCHEMA_COL), cout_str); - doit(make_tcn(schema, table, TABLENAME_COL), cout_str); - doit(make_tcn(schema, table, COLNAME_COL), cout_str); - ///doit(make_tcn(schema, table, INDEXNAME_COL), cout_str); - ///doit(make_tcn(schema, table, INDEXTYPE_COL), cout_str); - ///doit(make_tcn(schema, table, MULTICOLFLAG_COL), cout_str); - doit(make_tcn(schema, table, OBJECTID_COL), cout_str); - doit(make_tcn(schema, table, DICTOID_COL), cout_str); - doit(make_tcn(schema, table, LISTOBJID_COL), cout_str); - doit(make_tcn(schema, table, TREEOBJID_COL), cout_str); - doit(make_tcn(schema, table, DATATYPE_COL), cout_str); - ///doit(make_tcn(schema, table, COLUMNTYPE_COL), cout_str); - doit(make_tcn(schema, table, COLUMNLEN_COL), cout_str); - doit(make_tcn(schema, table, COLUMNPOS_COL), cout_str); - ///doit(make_tcn(schema, table, CREATEDATE_COL), cout_str); - doit(make_tcn(schema, table, LASTUPDATE_COL), cout_str); - doit(make_tcn(schema, table, DEFAULTVAL_COL), cout_str); - doit(make_tcn(schema, table, NULLABLE_COL), cout_str); - doit(make_tcn(schema, table, SCALE_COL), cout_str); - doit(make_tcn(schema, table, PRECISION_COL), cout_str); - ///doit(make_tcn(schema, table, NUMNULLS_COL), cout_str); - ///doit(make_tcn(schema, table, NUMDISTINCTVAL_COL), cout_str); - ///doit(make_tcn(schema, table, MINVAL_COL), cout_str); - ///doit(make_tcn(schema, table, MAXVAL_COL), cout_str); - ///doit(make_tcn(schema, table, DENSITY_COL), cout_str); - ///doit(make_tcn(schema, table, AVGRECORDLEN_COL), cout_str); - ///doit(make_tcn(schema, table, RECORDCOUNT_COL), cout_str); - ///doit(make_tcn(schema, table, LASTANYLDATE_COL), cout_str); - ///doit(make_tcn(schema, table, SAMPLESIZE_COL), cout_str); - ///doit(make_tcn(schema, table, PROPERTY_COL), cout_str); - doit(make_tcn(schema, table, AUTOINC_COL), cout_str); - ///doit(make_tcn(schema, table, DATANAME_COL), cout_str); - ///doit(make_tcn(schema, table, CATEGORY_COL), cout_str); - ///doit(make_tcn(schema, table, SIZE_COL), cout_str); - ///doit(make_tcn(schema, table, DESC_COL), cout_str); - ///doit(make_tcn(schema, table, INIT_COL), cout_str); - ///doit(make_tcn(schema, table, NEXT_COL), cout_str); - ///doit(make_tcn(schema, table, CONSTRAINTNAME_COL), cout_str); - ///doit(make_tcn(schema, table, CONSTRAINTNUM_COL), cout_str); - ///doit(make_tcn(schema, table, CONSTRAINTTYPE_COL), cout_str); - ///doit(make_tcn(schema, table, CONSTRAINTPRIM_COL), cout_str); - ///doit(make_tcn(schema, table, CONSTRAINTTEXT_COL), cout_str); - ///doit(make_tcn(schema, table, CONSTRAINTSTATUS_COL), cout_str); - ///doit(make_tcn(schema, table, TREELEVEL_COL), cout_str); - ///doit(make_tcn(schema, table, LEAFCOUNT_COL), cout_str); - ///doit(make_tcn(schema, table, DISTINCTKEYS_COL), cout_str); - ///doit(make_tcn(schema, table, LEAFBLOCKS_COL), cout_str); - ///doit(make_tcn(schema, table, AVGLEAFCOUNT_COL), cout_str); - ///doit(make_tcn(schema, table, AVGDATABLOCK_COL), cout_str); - ///doit(make_tcn(schema, table, CLUSTERFACTOR_COL), cout_str); - - table = SYSTABLE_TABLE; - doit(make_tcn(schema, table, SCHEMA_COL), cout_str); - doit(make_tcn(schema, table, TABLENAME_COL), cout_str); - ///doit(make_tcn(schema, table, COLNAME_COL), cout_str); - ///doit(make_tcn(schema, table, INDEXNAME_COL), cout_str); - ///doit(make_tcn(schema, table, INDEXTYPE_COL), cout_str); - ///doit(make_tcn(schema, table, MULTICOLFLAG_COL), cout_str); - doit(make_tcn(schema, table, OBJECTID_COL), cout_str); - ///doit(make_tcn(schema, table, DICTOID_COL), cout_str); - ///doit(make_tcn(schema, table, LISTOBJID_COL), cout_str); - ///doit(make_tcn(schema, table, TREEOBJID_COL), cout_str); - ///doit(make_tcn(schema, table, DATATYPE_COL), cout_str); - ///doit(make_tcn(schema, table, COLUMNTYPE_COL), cout_str); - ///doit(make_tcn(schema, table, COLUMNLEN_COL), cout_str); - ///doit(make_tcn(schema, table, COLUMNPOS_COL), cout_str); - doit(make_tcn(schema, table, CREATEDATE_COL), cout_str); - doit(make_tcn(schema, table, LASTUPDATE_COL), cout_str); - ///doit(make_tcn(schema, table, DEFAULTVAL_COL), cout_str); - ///doit(make_tcn(schema, table, NULLABLE_COL), cout_str); - ///doit(make_tcn(schema, table, SCALE_COL), cout_str); - ///doit(make_tcn(schema, table, PRECISION_COL), cout_str); - ///doit(make_tcn(schema, table, NUMNULLS_COL), cout_str); - ///doit(make_tcn(schema, table, NUMDISTINCTVAL_COL), cout_str); - ///doit(make_tcn(schema, table, MINVAL_COL), cout_str); - ///doit(make_tcn(schema, table, MAXVAL_COL), cout_str); - ///doit(make_tcn(schema, table, DENSITY_COL), cout_str); - ///doit(make_tcn(schema, table, AVGRECORDLEN_COL), cout_str); - ///doit(make_tcn(schema, table, RECORDCOUNT_COL), cout_str); - ///doit(make_tcn(schema, table, LASTANYLDATE_COL), cout_str); - ///doit(make_tcn(schema, table, SAMPLESIZE_COL), cout_str); - ///doit(make_tcn(schema, table, PROPERTY_COL), cout_str); - ///doit(make_tcn(schema, table, AUTOINC_COL), cout_str); - ///doit(make_tcn(schema, table, DATANAME_COL), cout_str); - ///doit(make_tcn(schema, table, CATEGORY_COL), cout_str); - ///doit(make_tcn(schema, table, SIZE_COL), cout_str); - ///doit(make_tcn(schema, table, DESC_COL), cout_str); - doit(make_tcn(schema, table, INIT_COL), cout_str); - doit(make_tcn(schema, table, NEXT_COL), cout_str); - ///doit(make_tcn(schema, table, CONSTRAINTNAME_COL), cout_str); - ///doit(make_tcn(schema, table, CONSTRAINTNUM_COL), cout_str); - ///doit(make_tcn(schema, table, CONSTRAINTTYPE_COL), cout_str); - ///doit(make_tcn(schema, table, CONSTRAINTPRIM_COL), cout_str); - ///doit(make_tcn(schema, table, CONSTRAINTTEXT_COL), cout_str); - ///doit(make_tcn(schema, table, CONSTRAINTSTATUS_COL), cout_str); - ///doit(make_tcn(schema, table, TREELEVEL_COL), cout_str); - ///doit(make_tcn(schema, table, LEAFCOUNT_COL), cout_str); - ///doit(make_tcn(schema, table, DISTINCTKEYS_COL), cout_str); - ///doit(make_tcn(schema, table, LEAFBLOCKS_COL), cout_str); - ///doit(make_tcn(schema, table, AVGLEAFCOUNT_COL), cout_str); - ///doit(make_tcn(schema, table, AVGDATABLOCK_COL), cout_str); - ///doit(make_tcn(schema, table, CLUSTERFACTOR_COL), cout_str); - - table = SYSSCHEMA_TABLE; - ///doit(make_tcn(schema, table, SCHEMA_COL), cout_str); - ///doit(make_tcn(schema, table, TABLENAME_COL), cout_str); - ///doit(make_tcn(schema, table, COLNAME_COL), cout_str); - ///doit(make_tcn(schema, table, INDEXNAME_COL), cout_str); - ///doit(make_tcn(schema, table, INDEXTYPE_COL), cout_str); - ///doit(make_tcn(schema, table, MULTICOLFLAG_COL), cout_str); - ///doit(make_tcn(schema, table, OBJECTID_COL), cout_str); - ///doit(make_tcn(schema, table, DICTOID_COL), cout_str); - ///doit(make_tcn(schema, table, LISTOBJID_COL), cout_str); - ///doit(make_tcn(schema, table, TREEOBJID_COL), cout_str); - ///doit(make_tcn(schema, table, DATATYPE_COL), cout_str); - ///doit(make_tcn(schema, table, COLUMNTYPE_COL), cout_str); - ///doit(make_tcn(schema, table, COLUMNLEN_COL), cout_str); - ///doit(make_tcn(schema, table, COLUMNPOS_COL), cout_str); - ///doit(make_tcn(schema, table, CREATEDATE_COL), cout_str); - ///doit(make_tcn(schema, table, LASTUPDATE_COL), cout_str); - ///doit(make_tcn(schema, table, DEFAULTVAL_COL), cout_str); - ///doit(make_tcn(schema, table, NULLABLE_COL), cout_str); - ///doit(make_tcn(schema, table, SCALE_COL), cout_str); - ///doit(make_tcn(schema, table, PRECISION_COL), cout_str); - ///doit(make_tcn(schema, table, NUMNULLS_COL), cout_str); - ///doit(make_tcn(schema, table, NUMDISTINCTVAL_COL), cout_str); - ///doit(make_tcn(schema, table, MINVAL_COL), cout_str); - ///doit(make_tcn(schema, table, MAXVAL_COL), cout_str); - ///doit(make_tcn(schema, table, DENSITY_COL), cout_str); - ///doit(make_tcn(schema, table, AVGRECORDLEN_COL), cout_str); - ///doit(make_tcn(schema, table, RECORDCOUNT_COL), cout_str); - ///doit(make_tcn(schema, table, LASTANYLDATE_COL), cout_str); - ///doit(make_tcn(schema, table, SAMPLESIZE_COL), cout_str); - ///doit(make_tcn(schema, table, PROPERTY_COL), cout_str); - ///doit(make_tcn(schema, table, AUTOINC_COL), cout_str); - ///doit(make_tcn(schema, table, DATANAME_COL), cout_str); - ///doit(make_tcn(schema, table, CATEGORY_COL), cout_str); - ///doit(make_tcn(schema, table, SIZE_COL), cout_str); - ///doit(make_tcn(schema, table, DESC_COL), cout_str); - ///doit(make_tcn(schema, table, INIT_COL), cout_str); - ///doit(make_tcn(schema, table, NEXT_COL), cout_str); - ///doit(make_tcn(schema, table, CONSTRAINTNAME_COL), cout_str); - ///doit(make_tcn(schema, table, CONSTRAINTNUM_COL), cout_str); - ///doit(make_tcn(schema, table, CONSTRAINTTYPE_COL), cout_str); - ///doit(make_tcn(schema, table, CONSTRAINTPRIM_COL), cout_str); - ///doit(make_tcn(schema, table, CONSTRAINTTEXT_COL), cout_str); - ///doit(make_tcn(schema, table, CONSTRAINTSTATUS_COL), cout_str); - ///doit(make_tcn(schema, table, TREELEVEL_COL), cout_str); - ///doit(make_tcn(schema, table, LEAFCOUNT_COL), cout_str); - ///doit(make_tcn(schema, table, DISTINCTKEYS_COL), cout_str); - ///doit(make_tcn(schema, table, LEAFBLOCKS_COL), cout_str); - ///doit(make_tcn(schema, table, AVGLEAFCOUNT_COL), cout_str); - ///doit(make_tcn(schema, table, AVGDATABLOCK_COL), cout_str); - ///doit(make_tcn(schema, table, CLUSTERFACTOR_COL), cout_str); - - table = SYSINDEX_TABLE; - doit(make_tcn(schema, table, SCHEMA_COL), cout_str); - doit(make_tcn(schema, table, TABLENAME_COL), cout_str); - ///doit(make_tcn(schema, table, COLNAME_COL), cout_str); - doit(make_tcn(schema, table, INDEXNAME_COL), cout_str); - doit(make_tcn(schema, table, INDEXTYPE_COL), cout_str); - doit(make_tcn(schema, table, MULTICOLFLAG_COL), cout_str); - ///doit(make_tcn(schema, table, OBJECTID_COL), cout_str); - ///doit(make_tcn(schema, table, DICTOID_COL), cout_str); - doit(make_tcn(schema, table, LISTOBJID_COL), cout_str); - doit(make_tcn(schema, table, TREEOBJID_COL), cout_str); - ///doit(make_tcn(schema, table, DATATYPE_COL), cout_str); - ///doit(make_tcn(schema, table, COLUMNTYPE_COL), cout_str); - ///doit(make_tcn(schema, table, COLUMNLEN_COL), cout_str); - ///doit(make_tcn(schema, table, COLUMNPOS_COL), cout_str); - doit(make_tcn(schema, table, CREATEDATE_COL), cout_str); - doit(make_tcn(schema, table, LASTUPDATE_COL), cout_str); - ///doit(make_tcn(schema, table, DEFAULTVAL_COL), cout_str); - ///doit(make_tcn(schema, table, NULLABLE_COL), cout_str); - ///doit(make_tcn(schema, table, SCALE_COL), cout_str); - ///doit(make_tcn(schema, table, PRECISION_COL), cout_str); - ///doit(make_tcn(schema, table, NUMNULLS_COL), cout_str); - ///doit(make_tcn(schema, table, NUMDISTINCTVAL_COL), cout_str); - ///doit(make_tcn(schema, table, MINVAL_COL), cout_str); - ///doit(make_tcn(schema, table, MAXVAL_COL), cout_str); - ///doit(make_tcn(schema, table, DENSITY_COL), cout_str); - ///doit(make_tcn(schema, table, AVGRECORDLEN_COL), cout_str); - doit(make_tcn(schema, table, RECORDCOUNT_COL), cout_str); - doit(make_tcn(schema, table, LASTANYLDATE_COL), cout_str); - doit(make_tcn(schema, table, SAMPLESIZE_COL), cout_str); - ///doit(make_tcn(schema, table, PROPERTY_COL), cout_str); - ///doit(make_tcn(schema, table, AUTOINC_COL), cout_str); - ///doit(make_tcn(schema, table, DATANAME_COL), cout_str); - ///doit(make_tcn(schema, table, CATEGORY_COL), cout_str); - ///doit(make_tcn(schema, table, SIZE_COL), cout_str); - ///doit(make_tcn(schema, table, DESC_COL), cout_str); - ///doit(make_tcn(schema, table, INIT_COL), cout_str); - ///doit(make_tcn(schema, table, NEXT_COL), cout_str); - ///doit(make_tcn(schema, table, CONSTRAINTNAME_COL), cout_str); - ///doit(make_tcn(schema, table, CONSTRAINTNUM_COL), cout_str); - ///doit(make_tcn(schema, table, CONSTRAINTTYPE_COL), cout_str); - ///doit(make_tcn(schema, table, CONSTRAINTPRIM_COL), cout_str); - ///doit(make_tcn(schema, table, CONSTRAINTTEXT_COL), cout_str); - ///doit(make_tcn(schema, table, CONSTRAINTSTATUS_COL), cout_str); - doit(make_tcn(schema, table, TREELEVEL_COL), cout_str); - doit(make_tcn(schema, table, LEAFCOUNT_COL), cout_str); - doit(make_tcn(schema, table, DISTINCTKEYS_COL), cout_str); - doit(make_tcn(schema, table, LEAFBLOCKS_COL), cout_str); - doit(make_tcn(schema, table, AVGLEAFCOUNT_COL), cout_str); - doit(make_tcn(schema, table, AVGDATABLOCK_COL), cout_str); - doit(make_tcn(schema, table, CLUSTERFACTOR_COL), cout_str); - - table = SYSINDEXCOL_TABLE; - doit(make_tcn(schema, table, SCHEMA_COL), cout_str); - doit(make_tcn(schema, table, TABLENAME_COL), cout_str); - doit(make_tcn(schema, table, COLNAME_COL), cout_str); - doit(make_tcn(schema, table, INDEXNAME_COL), cout_str); - ///doit(make_tcn(schema, table, INDEXTYPE_COL), cout_str); - ///doit(make_tcn(schema, table, MULTICOLFLAG_COL), cout_str); - ///doit(make_tcn(schema, table, OBJECTID_COL), cout_str); - ///doit(make_tcn(schema, table, DICTOID_COL), cout_str); - ///doit(make_tcn(schema, table, LISTOBJID_COL), cout_str); - ///doit(make_tcn(schema, table, TREEOBJID_COL), cout_str); - ///doit(make_tcn(schema, table, DATATYPE_COL), cout_str); - ///doit(make_tcn(schema, table, COLUMNTYPE_COL), cout_str); - ///doit(make_tcn(schema, table, COLUMNLEN_COL), cout_str); - doit(make_tcn(schema, table, COLUMNPOS_COL), cout_str); - ///doit(make_tcn(schema, table, CREATEDATE_COL), cout_str); - ///doit(make_tcn(schema, table, LASTUPDATE_COL), cout_str); - ///doit(make_tcn(schema, table, DEFAULTVAL_COL), cout_str); - ///doit(make_tcn(schema, table, NULLABLE_COL), cout_str); - ///doit(make_tcn(schema, table, SCALE_COL), cout_str); - ///doit(make_tcn(schema, table, PRECISION_COL), cout_str); - ///doit(make_tcn(schema, table, NUMNULLS_COL), cout_str); - ///doit(make_tcn(schema, table, NUMDISTINCTVAL_COL), cout_str); - ///doit(make_tcn(schema, table, MINVAL_COL), cout_str); - ///doit(make_tcn(schema, table, MAXVAL_COL), cout_str); - ///doit(make_tcn(schema, table, DENSITY_COL), cout_str); - ///doit(make_tcn(schema, table, AVGRECORDLEN_COL), cout_str); - ///doit(make_tcn(schema, table, RECORDCOUNT_COL), cout_str); - ///doit(make_tcn(schema, table, LASTANYLDATE_COL), cout_str); - ///doit(make_tcn(schema, table, SAMPLESIZE_COL), cout_str); - ///doit(make_tcn(schema, table, PROPERTY_COL), cout_str); - ///doit(make_tcn(schema, table, AUTOINC_COL), cout_str); - ///doit(make_tcn(schema, table, DATANAME_COL), cout_str); - ///doit(make_tcn(schema, table, CATEGORY_COL), cout_str); - ///doit(make_tcn(schema, table, SIZE_COL), cout_str); - ///doit(make_tcn(schema, table, DESC_COL), cout_str); - ///doit(make_tcn(schema, table, INIT_COL), cout_str); - ///doit(make_tcn(schema, table, NEXT_COL), cout_str); - ///doit(make_tcn(schema, table, CONSTRAINTNAME_COL), cout_str); - ///doit(make_tcn(schema, table, CONSTRAINTNUM_COL), cout_str); - ///doit(make_tcn(schema, table, CONSTRAINTTYPE_COL), cout_str); - ///doit(make_tcn(schema, table, CONSTRAINTPRIM_COL), cout_str); - ///doit(make_tcn(schema, table, CONSTRAINTTEXT_COL), cout_str); - ///doit(make_tcn(schema, table, CONSTRAINTSTATUS_COL), cout_str); - ///doit(make_tcn(schema, table, TREELEVEL_COL), cout_str); - ///doit(make_tcn(schema, table, LEAFCOUNT_COL), cout_str); - ///doit(make_tcn(schema, table, DISTINCTKEYS_COL), cout_str); - ///doit(make_tcn(schema, table, LEAFBLOCKS_COL), cout_str); - ///doit(make_tcn(schema, table, AVGLEAFCOUNT_COL), cout_str); - ///doit(make_tcn(schema, table, AVGDATABLOCK_COL), cout_str); - ///doit(make_tcn(schema, table, CLUSTERFACTOR_COL), cout_str); - } - - cout_str << ends; - ::write(fd, cout_str.str().c_str(), cout_str.str().length()); - - return 0; -} - diff --git a/tools/brmtest/locks.cpp b/tools/brmtest/locks.cpp deleted file mode 100644 index 61b95c278..000000000 --- a/tools/brmtest/locks.cpp +++ /dev/null @@ -1,82 +0,0 @@ -/* -* $Id: locks.cpp 282 2007-10-28 02:18:55Z rdempsey $ -*/ - -#include -#include -using namespace std; - -#include "sessionmanager.h" -#include "calpontsystemcatalog.h" -using namespace execplan; - -#include "shmkeys.h" -#include "rwlock.h" -#include "mastersegmenttable.h" -using namespace BRM; - -int query_locks() -{ - ShmKeys keys; - RWLock* rwlock[MasterSegmentTable::nTables]; - int RWLockKeys[MasterSegmentTable::nTables]; - int i; - - RWLockKeys[0] = keys.KEYRANGE_EXTENTMAP_BASE; - RWLockKeys[1] = keys.KEYRANGE_EMFREELIST_BASE; - RWLockKeys[2] = keys.KEYRANGE_VBBM_BASE; - RWLockKeys[3] = keys.KEYRANGE_VSS_BASE; - RWLockKeys[4] = keys.KEYRANGE_CL_BASE; - - for (i = 0; i < MasterSegmentTable::nTables; i++) - rwlock[i] = new RWLock(RWLockKeys[i]); - - for (i = 0; i < MasterSegmentTable::nTables; i++) - if (rwlock[i]->getWriting() > 0 || - //rwlock[i]->getReading() > 0 || - rwlock[i]->getWritersWaiting() > 0 || - rwlock[i]->getReadersWaiting() > 0) - return 1; - - for (i = 0; i < MasterSegmentTable::nTables; i++) - delete rwlock[i]; - - return 0; -} - -int reset_locks() -{ - ShmKeys keys; - RWLock* rwlock[MasterSegmentTable::nTables]; - int RWLockKeys[MasterSegmentTable::nTables]; - int i; - SessionManager sm(true); - - RWLockKeys[0] = keys.KEYRANGE_EXTENTMAP_BASE; - RWLockKeys[1] = keys.KEYRANGE_EMFREELIST_BASE; - RWLockKeys[2] = keys.KEYRANGE_VBBM_BASE; - RWLockKeys[3] = keys.KEYRANGE_VSS_BASE; - RWLockKeys[4] = keys.KEYRANGE_CL_BASE; - - for (i = 0; i < MasterSegmentTable::nTables; i++) - rwlock[i] = new RWLock(RWLockKeys[i]); - - for (i = 0; i < MasterSegmentTable::nTables; i++) - rwlock[i]->reset(); - - for (i = 0; i < MasterSegmentTable::nTables; i++) - delete rwlock[i]; - - try - { - sm.reset(); - } - catch (exception& e) - { - cout << e.what() << endl; - return -1; - } - - return 0; -} - diff --git a/tools/configMgt/CMakeLists.txt b/tools/configMgt/CMakeLists.txt deleted file mode 100644 index 56a4addc8..000000000 --- a/tools/configMgt/CMakeLists.txt +++ /dev/null @@ -1,13 +0,0 @@ - -include_directories( ${ENGINE_COMMON_INCLUDES} ) - - -########### next target ############### - -set(autoConfigure_SRCS autoConfigure.cpp) - -add_executable(autoConfigure ${autoConfigure_SRCS}) - -target_link_libraries(autoConfigure ${ENGINE_LDFLAGS} ${NETSNMP_LIBRARIES} ${MARIADB_CLIENT_LIBS} ${ENGINE_EXEC_LIBS}) - -install(TARGETS autoConfigure DESTINATION ${ENGINE_BINDIR} COMPONENT columnstore-engine) diff --git a/tools/configMgt/CalpontSystems.xml b/tools/configMgt/CalpontSystems.xml deleted file mode 100644 index bb3f304e0..000000000 --- a/tools/configMgt/CalpontSystems.xml +++ /dev/null @@ -1,137 +0,0 @@ - - - - - 26 - qcald02 - qalpont! - qaftest6 - *.x86_64.rpm - root - qcald02a - qalpont! - qaftest3 - *.x86_64.rpm - root - qperfd01 - qalpont! - srvqaperf2 - *.x86_64.rpm - root - devint2 - Calpont1 - srvperf4 - *.x86_64.rpm - root - demo01 - Calpont1 - srvdemo2 - *.x86_64.rpm - root - devint1 - Calpont1 - srvperf7 - *.x86_64.rpm - root - devint3 - Calpont1 - srvperf3 - *.x86_64.rpm - root - qcald02b - Calpont1 - qaftest2 - *.x86_64.rpm - root - caldev01 - qalpont! - srvswdev10 - *.x86_64.rpm - root - caldev02 - Calpont1 - srvswdev11 - *.x86_64.rpm - root - srvitfep1 - Calpont1 - srvitfep1 - *.x86_64.rpm - root - daniel - qalpont! - srvdaniel - *.i686.rpm - root - qaftest2 - Calpont1 - qaftest2 - *.x86_64.rpm - root - srvswdev11 - Calpont1 - srvswdev11 - *.x86_64.rpm - root - ss1 - qalpont! - srvqaperf8 - *.x86_64.rpm - root - ss2 - qalpont! - srvalpha2 - *.x86_64.rpm - root - srvprodtest1 - qalpont! - srvprodtest1 - *.x86_64.rpm - root - qaftest4 - qalpont! - qaftest4 - *.x86_64.rpm - root - srvqawin1 - Calpont1 - srvqawin1 - *.x86_64.rpm - root - devsn1 - Calpont1 - srvdevsn2 - *.x86_64.rpm - root - devsn2 - Calpont1 - srvdevsn6 - *.x86_64.rpm - root - devsn3 - Calpont1 - srvdevsn8 - *.x86_64.rpm - root - demo02 - Calpont1 - srvdemo3 - *.x86_64.rpm - root - srvprodtest2 - qalpont! - srvprodtest2 - *.x86_64.rpm - root - alphad03 - Calpont1 - srvalpha3 - *x86_64.bin.tar.gz - infinidb - devsn4 - Calpont1 - srvdevsn11 - *.x86_64.rpm - root - - diff --git a/tools/configMgt/README b/tools/configMgt/README deleted file mode 100644 index f1307af56..000000000 --- a/tools/configMgt/README +++ /dev/null @@ -1,43 +0,0 @@ - -This directory contains the Calpont Config Manager Build, Configure, and Install -scripts. - -User Command scripts: - -autoBuilder - Generate a RHEL5 or FC6 build based on latest genii code. - Execute './autoBuilder -h' for addition help. - -autoInstaller - Installs a Calpont system with a Calpont RPM. It can - install the latest build version or a specific released - versions. - Execute './autoInstaller -h' for addition help. - -autoReleaseNotes.sh - Generates bug reports for release notes - Execute './autoReleaseNotes.sh -h' for addition help. - -buildTester.sh - Installs a Calpont system with a New Latest Built or - Previously release Calpont RPM. If Latest Build is specified, - it will generate a new build based on the latest genii code. - If the build and install is successfully, it will perform a - set of Database Test. A report will be emailed. - Execute './buildTester -h' for addition help. - -configure - Interactively reads and Configures a local version of the - Columnstore.xml file based on user inputs. - Usage ./configure - -Supported Command Scipts. These scripts are called by the User Command scripts: - -autoConfigure, parent_installer.sh, remote_command.sh, and remote_scp_get.sh - -Directories: - -systems - Contains directories of the systems with associated Columnstore.xml - files for that system. - Also contains the CalpontSystem.xml file, which contains the - support systems that can be installed using the autoInstaller - command. - -RHEL5, FC6 - Would contain the build log files for a build failure using the - autoBuilder tool. - diff --git a/tools/configMgt/autoBuilder b/tools/configMgt/autoBuilder deleted file mode 100755 index 3e4d558e8..000000000 --- a/tools/configMgt/autoBuilder +++ /dev/null @@ -1,119 +0,0 @@ -#!/usr/bin/expect -# -# $Id: autoBuild.sh 421 2007-04-05 15:46:55Z dhill $ -# -# Remote command execution script to another server -# Argument 1 - OS type: RHEL5 , RHEL5-1 or FC6 (default to RHEL5) -# Argument 2 - release (optional) -# Argument 3 - SVN branch or trunk -set USERNAME root -set PASSWORD Calpont1 -set OS RHEL5 -set RELEASE Latest -set SVN genii -set VERSION EE -log_user 0 - -spawn -noecho /bin/bash - -for {set i 0} {$i<[llength $argv]} {incr i} { - set arg($i) [lindex $argv $i] -} - -set i 0 -while true { - if { $i == [llength $argv] } { break } - if { $arg($i) == "-h" } { - send_user "\n" - send_user "'autoBuilder' generates a RHEL5 or FC6 rpm based on the Latest\n" - send_user "files checked into svn genii branch. The build takes 20 to 30 minutes.\n" - send_user "It will place the rpm in //calweb/shared/Iterations/Latest/\n" - send_user "and in //calweb/shared/Iterations/'release'/ when specified\n" - send_user "\n" - send_user "Usage: autoBuild -o 'OS' -r 'release' -s 'svn-branch'\n" - send_user " OS - RHEL5 (srvperf1 4.x build) or RHEL5-1 (srvnightly 4.x build)\n" - send_user " release - Calpont release number (defaults to Latest)\n" - send_user " svn-branch - SVN Branch name (defaults to genii)\n" - exit - } - if { $arg($i) == "-o" } { - incr i - set OS $arg($i) - } else { - if { $arg($i) == "-r" } { - incr i - set RELEASE $arg($i) - } else { - if { $arg($i) == "-s" } { - incr i - set SVN $arg($i) - } - } - } - incr i -} - -if { $OS == "RHEL5-1" } { - set SERVER srvnightly - set COMMAND "'/root/calpont-build $RELEASE $SVN'" -} else { - set SERVER srvperf1 - set COMMAND "'/root/infinidb-build $RELEASE $SVN'" -} - - -set timeout 4000 -# -# send command to build calpont rpms -# -#get current date -exec date >/tmp/datesync.tmp -exec cat /tmp/datesync.tmp -set newtime [exec cat /tmp/datesync.tmp] - -send_user "\nStart Time: $newtime\n" - -log_user 1 -send_user "\nRPM Package Build takes 60 minutes to complete, please wait...\n" -log_user 0 - -send "ssh $USERNAME@$SERVER $COMMAND\n" -expect { - -re "authenticity" { send "yes\n" - expect { - -re "word: " { send "$PASSWORD\n" } abort - } - } - -re "service not known" { send_user "FAILED: Invalid Host\n" ; exit -1 } - -re "word: " { send "$PASSWORD\n" } abort - -re "TIMEOUT" { send_user "Calpont RPM Build Failed, timeout\n" ; exit -1 } - timeout { send_user "Calpont RPM Build Failed, timeout\n" ; exit -1 } - -re "no rpm" { send_user "Calpont RPM Build Failed, check build log files\n" ; exit -1 } - -re "Build Successfully Completed" { log_user 1 - #get current date - exec date >/tmp/datesync.tmp - exec cat /tmp/datesync.tmp - set newtime [exec cat /tmp/datesync.tmp] - - file delete -force $RELEASE - send_user "\nCalpont RPM Build successfully completed $newtime\n"; exit 0 - } -} - -expect { - -re "TIMEOUT" { send_user "Calpont RPM Build Failed, timeout\n" ; exit -1 } - timeout { send_user "Calpont RPM Build Failed, timeout\n" ; exit -1 } - -re "no rpm" { send_user "Calpont RPM Build Failed, check build log files\n" ; exit -1 } - -re "Build Successfully Completed" { } abort -} - -log_user 1 - -#get current date -exec date >/tmp/datesync.tmp -exec cat /tmp/datesync.tmp -set newtime [exec cat /tmp/datesync.tmp] - -file delete -force $RELEASE -send_user "\nCalpont RPM Build successfully completed $newtime\n" -exit 0 diff --git a/tools/configMgt/autoConfigure.cpp b/tools/configMgt/autoConfigure.cpp deleted file mode 100644 index d48399893..000000000 --- a/tools/configMgt/autoConfigure.cpp +++ /dev/null @@ -1,2126 +0,0 @@ -/* Copyright (C) 2014 InfiniDB, Inc. - Copyright (C) 2016 MariaDB Corporation - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; version 2 of - the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301, USA. */ - -/****************************************************************************************** -* $Id: configure.cpp 64 2006-10-12 22:21:51Z dhill $ -* -* -* List of files being updated by configure: -* Calpont/etc/Columnstore.xml -* -* -******************************************************************************************/ -/** - * @file - */ - - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "stdio.h" -#include "ctype.h" -#include - -#include "liboamcpp.h" -#include "configcpp.h" -#include "installdir.h" - - -using namespace std; -using namespace oam; -using namespace config; - -typedef struct Performance_Module_struct -{ - std::string moduleIP1; - std::string moduleIP2; - std::string moduleIP3; - std::string moduleIP4; -} PerformanceModule; - -typedef std::vector PerformanceModuleList; - -int main(int argc, char* argv[]) -{ - setenv("CALPONT_HOME", "./", 1); - - Oam oam; - string systemParentOAMModuleName; - string parentOAMModuleIPAddr; - PerformanceModuleList performancemodulelist; - int pmNumber = 0; - string prompt; - int DBRMworkernodeID = 0; - string remote_installer_debug = "0"; - Config* sysConfigOld; - Config* sysConfigNew; - string systemName; - bool extextMapCheckOnly = false; - - if (argc > 1) - { - string arg1 = argv[1]; - - if ( arg1 == "-e" ) - extextMapCheckOnly = true; - else - systemName = arg1; - } - else - systemName = oam::UnassignedName; - - try - { - sysConfigOld = Config::makeConfig("./Columnstore.xml"); // system version - sysConfigNew = Config::makeConfig("./Columnstore.xml.new"); // released version - } - catch (...) - { - cout << "ERROR: Problem reading Columnstore.xml files"; - exit(-1); - } - - string SystemSection = "SystemConfig"; - string InstallSection = "Installation"; - - //set install config flag - try - { - sysConfigNew->setConfig(InstallSection, "InitialInstallFlag", "y"); - } - catch (...) - { - cout << "ERROR: Problem setting InitialInstallFlag from the Calpont System Configuration file" << endl; - exit(-1); - } - - //read cloud parameter to see if OLD is a 3.0+ or pre-3.0 build being installed - string cloud; - - try - { - cloud = sysConfigOld->getConfig(InstallSection, "Cloud"); - } - catch (...) - {} - - // build3 is for 3 and above 4 - bool OLDbuild3 = false; - - if ( !cloud.empty() ) - OLDbuild3 = true; - - //read cloud parameter to see if NEW is a 3.0+ or pre-3.0 build being installed - try - { - cloud = sysConfigNew->getConfig(InstallSection, "Cloud"); - } - catch (...) - {} - - // build3 is for 3 and above 4 - bool build3 = false; - - if ( !cloud.empty() ) - build3 = true; - - // build 4.0 flag - string CoreFileFlag; - - try - { - CoreFileFlag = sysConfigNew->getConfig(InstallSection, "CoreFileFlag"); - } - catch (...) - {} - - bool build40 = false; - bool build401 = true; - - //set install config flag - try - { - sysConfigNew->setConfig(InstallSection, "InitialInstallFlag", "y"); - } - catch (...) - { - cout << "ERROR: Problem setting InitialInstallFlag from the Calpont System Configuration file" << endl; - exit(-1); - } - - - //check and update PMwithUM - try - { - string PMwithUM = sysConfigOld->getConfig(InstallSection, "PMwithUM"); - - if ( !PMwithUM.empty() ) - { - try - { - sysConfigNew->setConfig(InstallSection, "PMwithUM", PMwithUM); - } - catch (...) - { - cout << "ERROR: Problem setting PMwithUM in the Calpont System Configuration file" << endl; - exit(-1); - } - } - } - catch (...) - { - } - - //check and update PMwithUM - try - { - string MySQLRep = sysConfigOld->getConfig(InstallSection, "MySQLRep"); - - if ( !MySQLRep.empty() ) - { - try - { - sysConfigNew->setConfig(InstallSection, "MySQLRep", MySQLRep); - } - catch (...) - { - cout << "ERROR: Problem setting MySQLRep in the Calpont System Configuration file" << endl; - exit(-1); - } - } - } - catch (...) - { - } - - //set gluster flag if it exists - string DataRedundancyConfig; - string DataRedundancyCopies; - string DataRedundancyStorageType; - string DataRedundancyNetworkType; - - try - { - DataRedundancyConfig = sysConfigOld->getConfig(InstallSection, "DataRedundancyConfig"); - DataRedundancyCopies = sysConfigOld->getConfig(InstallSection, "DataRedundancyCopies"); - DataRedundancyStorageType = sysConfigOld->getConfig(InstallSection, "DataRedundancyStorageType"); - DataRedundancyNetworkType = sysConfigOld->getConfig(InstallSection, "DataRedundancyNetworkType"); - } - catch (...) - {} - - if ( !DataRedundancyConfig.empty() ) - { - try - { - sysConfigNew->setConfig(InstallSection, "DataRedundancyConfig", DataRedundancyConfig); - sysConfigNew->setConfig(InstallSection, "DataRedundancyCopies", DataRedundancyCopies); - sysConfigNew->setConfig(InstallSection, "DataRedundancyStorageType", DataRedundancyStorageType); - sysConfigNew->setConfig(InstallSection, "DataRedundancyNetworkType", DataRedundancyNetworkType); - } - catch (...) - {} - } - - //check and make sure the ExtentMap variables don't get changed at install - string oldFilesPerColumnPartition; - string oldExtentsPerSegmentFile; - string newFilesPerColumnPartition; - string newExtentsPerSegmentFile; - - try - { - oldFilesPerColumnPartition = sysConfigOld->getConfig("ExtentMap", "FilesPerColumnPartition"); - oldExtentsPerSegmentFile = sysConfigOld->getConfig("ExtentMap", "ExtentsPerSegmentFile"); - - newFilesPerColumnPartition = sysConfigNew->getConfig("ExtentMap", "FilesPerColumnPartition"); - newExtentsPerSegmentFile = sysConfigNew->getConfig("ExtentMap", "ExtentsPerSegmentFile"); - - if ( oldFilesPerColumnPartition != newFilesPerColumnPartition ) - { - try - { - sysConfigNew->setConfig("ExtentMap", "FilesPerColumnPartition", oldFilesPerColumnPartition); - } - catch (...) - { - cout << "ERROR: Problem setting FilesPerColumnPartition in the Calpont System Configuration file" << endl; - exit(-1); - } - } - - if ( oldExtentsPerSegmentFile != newExtentsPerSegmentFile ) - { - try - { - sysConfigNew->setConfig("ExtentMap", "ExtentsPerSegmentFile", oldExtentsPerSegmentFile); - } - catch (...) - { - cout << "ERROR: Problem setting ExtentsPerSegmentFile in the Calpont System Configuration file" << endl; - exit(-1); - } - } - } - catch (...) - { - } - - //check and update license key - try - { - string key = sysConfigOld->getConfig("SystemConfig", "Flags"); - - if ( !key.empty() ) - { - try - { - sysConfigNew->setConfig("SystemConfig", "Flags", key); - } - catch (...) - { - cout << "ERROR: Problem setting Flags in the Calpont System Configuration file" << endl; - exit(-1); - } - } - } - catch (...) - { - } - - sysConfigNew->write(); - - if ( extextMapCheckOnly ) - exit(0); - - systemParentOAMModuleName = "pm1"; - - //check if systemParentOAMModuleName (pm1) is configured, if not set to 'pm2' -// string IPaddr = sysConfigOld->getConfig("pm1_ProcessMonitor", "IPAddr"); -// if ( IPaddr == "0.0.0.0" ) -// systemParentOAMModuleName = "pm2"; - - //set Parent OAM Module Name - try - { - sysConfigNew->setConfig(SystemSection, "ParentOAMModuleName", systemParentOAMModuleName); - } - catch (...) - { - cout << "ERROR: Problem updating the Calpont System Configuration file" << endl; - exit(-1); - } - - //setup System Name - string oldSystemName; - - try - { - oldSystemName = sysConfigOld->getConfig(SystemSection, "SystemName"); - } - catch (...) - { } - - if ( !oldSystemName.empty() ) - systemName = oldSystemName; - - try - { - sysConfigNew->setConfig(SystemSection, "SystemName", systemName); - } - catch (...) - { - cout << "ERROR: Problem setting SystemName from the Calpont System Configuration file" << endl; - exit(-1); - } - - // WaitPeriod - try - { - string waitPeriod = sysConfigOld->getConfig(SystemSection, "WaitPeriod"); - if (waitPeriod.length() > 0) - { - sysConfigNew->setConfig(SystemSection, "WaitPeriod", waitPeriod); - } - } - catch (...) - { } - - //setup HA IP Address - string HA_IPadd; - - try - { - HA_IPadd = sysConfigOld->getConfig("ProcMgr_HA", "IPAddr"); - } - catch (...) - { } - - if ( !HA_IPadd.empty() ) - { - try - { - sysConfigNew->setConfig("ProcMgr_HA", "IPAddr", HA_IPadd); - } - catch (...) - { - cout << "ERROR: Problem setting ProcMgr_HA from the Calpont System Configuration file" << endl; - exit(-1); - } - } - - //setup CMP IP Addresses - string CMP_IPadd; - string CMP_port; - - for ( int id = 1 ;; id ++ ) - { - string cmpName = "CMP" + oam.itoa(id); - - try - { - CMP_IPadd = sysConfigOld->getConfig(cmpName, "IPAddr"); - } - catch (...) - { } - - if ( !CMP_IPadd.empty() ) - { - try - { - CMP_port = sysConfigOld->getConfig(cmpName, "Port"); - } - catch (...) - { } - - try - { - sysConfigNew->setConfig(cmpName, "IPAddr", CMP_IPadd); - sysConfigNew->setConfig(cmpName, "Port", CMP_port); - } - catch (...) - { - cout << "ERROR: Problem setting CMP from the Calpont System Configuration file" << endl; - exit(-1); - } - } - else - break; - } - - //setup module and process monitor settings - string ModuleHeartbeatPeriod = "3"; - string ModuleHeartbeatCount = "1"; - string ProcessRestartCount = "3"; - string ProcessRestartPeriod = "1"; - string SwapAction = "restartSystem"; - string ActivePmFailoverDisabled = "n"; - - try - { - ModuleHeartbeatPeriod = sysConfigOld->getConfig(SystemSection, "ModuleHeartbeatPeriod"); - ModuleHeartbeatCount = sysConfigOld->getConfig(SystemSection, "ModuleHeartbeatCount"); - ProcessRestartCount = sysConfigOld->getConfig(SystemSection, "ProcessRestartCount"); - ProcessRestartPeriod = sysConfigOld->getConfig(SystemSection, "ProcessRestartPeriod"); - SwapAction = sysConfigOld->getConfig(SystemSection, "SwapAction"); - ActivePmFailoverDisabled = sysConfigOld->getConfig(SystemSection, "ActivePmFailoverDisabled"); - } - catch (...) - { } - - try - { - sysConfigNew->setConfig(SystemSection, "ModuleHeartbeatPeriod", ModuleHeartbeatPeriod); - sysConfigNew->setConfig(SystemSection, "ModuleHeartbeatCount", ModuleHeartbeatCount); - sysConfigNew->setConfig(SystemSection, "ProcessRestartCount", ProcessRestartCount); - sysConfigNew->setConfig(SystemSection, "ProcessRestartPeriod", ProcessRestartPeriod); - sysConfigNew->setConfig(SystemSection, "SwapAction", SwapAction); - sysConfigNew->setConfig(SystemSection, "ActivePmFailoverDisabled", ActivePmFailoverDisabled); - } - catch (...) - {} - - //save EEPackageType - string EEPackageType = "rpm"; - - try - { - EEPackageType = sysConfigOld->getConfig(InstallSection, "EEPackageType"); - } - catch (...) - { } - - try - { - sysConfigNew->setConfig(InstallSection, "EEPackageType", EEPackageType); - } - catch (...) - { } - - if ( EEPackageType.empty() ) - EEPackageType = "rpm"; - - try - { - sysConfigNew->setConfig(InstallSection, "EEPackageType", EEPackageType); - } - catch (...) - {} - - // make DBRM backwards compatiable for pre 1.0.0.157 load - string dbrmMainProc = "DBRM_Controller"; - string dbrmSubProc = "DBRM_Worker"; - string numSubProc = "NumWorkers"; - - //set system startup offline option to default 'n' - try - { - sysConfigNew->setConfig(InstallSection, "SystemStartupOffline", "n"); - } - catch (...) - { - cout << "ERROR: Problem setting systemStartupOffline in the Calpont System Configuration file" << endl; - exit(-1); - } - - //CrossEngineSupport - string Host = ""; - string Port = "3306"; - string User = ""; - string Password = ""; - string TLSCA = ""; - string TLSClientCert = ""; - string TLSClientKey = ""; - - try - { - Host = sysConfigOld->getConfig("CrossEngineSupport", "Host"); - Port = sysConfigOld->getConfig("CrossEngineSupport", "Port"); - User = sysConfigOld->getConfig("CrossEngineSupport", "User"); - Password = sysConfigOld->getConfig("CrossEngineSupport", "Password"); - } - catch (...) - { - Host = ""; - Port = "3306"; - User = ""; - Password = ""; - } - - try - { - sysConfigNew->setConfig("CrossEngineSupport", "Host", Host); - sysConfigNew->setConfig("CrossEngineSupport", "Port", Port); - sysConfigNew->setConfig("CrossEngineSupport", "User", User); - sysConfigNew->setConfig("CrossEngineSupport", "Password", Password); - } - catch (...) - {} - - try - { - TLSCA = sysConfigOld->getConfig("CrossEngineSupport", "TLSCA"); - TLSClientCert = sysConfigOld->getConfig("CrossEngineSupport", "TLSClientCert"); - TLSClientKey = sysConfigOld->getConfig("CrossEngineSupport", "TLSClientKey"); - } - catch (...) - { - TLSCA = ""; - TLSClientCert = ""; - TLSClientKey = ""; - } - - try - { - sysConfigNew->setConfig("CrossEngineSupport", "TLSCA", TLSCA); - sysConfigNew->setConfig("CrossEngineSupport", "TLSClientCert", TLSClientCert); - sysConfigNew->setConfig("CrossEngineSupport", "TLSClientKey", TLSClientKey); - } - catch (...) - {} - - //QueryStats and UserPriority - string QueryStats = "N"; - string UserPriority = "N"; - - try - { - QueryStats = sysConfigOld->getConfig("QueryStats", "Enabled"); - UserPriority = sysConfigOld->getConfig("UserPriority", "Enabled"); - } - catch (...) - { - QueryStats = "N"; - UserPriority = "N"; - } - - try - { - sysConfigNew->setConfig("QueryStats", "Enabled", QueryStats); - sysConfigNew->setConfig("UserPriority", "Enabled", UserPriority); - } - catch (...) - {} - - // @bug4598, DirectIO setting - string directIO = "y"; - - try - { - directIO = sysConfigOld->getConfig("PrimitiveServers", "DirectIO"); - } - catch (...) - { - directIO = "y"; - } - - try - { - sysConfigNew->setConfig("PrimitiveServers", "DirectIO", directIO); - } - catch (...) - {} - - // @bug4507, configurable pm aggregation AggregationMemoryCheck - string aggMemCheck; - - try - { - aggMemCheck = sysConfigOld->getConfig("PrimitiveServers", "AggregationMemoryCheck"); - - if ( !( aggMemCheck.empty() || aggMemCheck == "" ) ) - { - sysConfigNew->setConfig("PrimitiveServers", "AggregationMemoryCheck", aggMemCheck); - } - } - catch (...) - { - } - - //Priority Settings - string HighPriorityPercentage; - - try - { - HighPriorityPercentage = sysConfigOld->getConfig("PrimitiveServers", "HighPriorityPercentage"); - sysConfigNew->setConfig("PrimitiveServers", "HighPriorityPercentage", HighPriorityPercentage); - } - catch (...) {} - - string MediumPriorityPercentage; - - try - { - MediumPriorityPercentage = sysConfigOld->getConfig("PrimitiveServers", "MediumPriorityPercentage"); - sysConfigNew->setConfig("PrimitiveServers", "MediumPriorityPercentage", MediumPriorityPercentage); - } - catch (...) {} - - string LowPriorityPercentage; - - try - { - LowPriorityPercentage = sysConfigOld->getConfig("PrimitiveServers", "LowPriorityPercentage"); - sysConfigNew->setConfig("PrimitiveServers", "LowPriorityPercentage", LowPriorityPercentage); - } - catch (...) {} - - // default to single-server install type - string OserverTypeInstall = oam.itoa(oam::INSTALL_COMBINE_DM_UM_PM);; - string NserverTypeInstall; - string OSingleServerInstall = "y"; - int IserverTypeInstall; - - try - { - OserverTypeInstall = sysConfigOld->getConfig(InstallSection, "ServerTypeInstall"); - OSingleServerInstall = sysConfigOld->getConfig(InstallSection, "SingleServerInstall"); - } - catch (...) - { - // default to Normal mult-server install type - OserverTypeInstall = oam.itoa(oam::INSTALL_COMBINE_DM_UM_PM); - OSingleServerInstall = "y"; - } - - // set Server Installation Type - try - { - sysConfigNew->setConfig(InstallSection, "ServerTypeInstall", OserverTypeInstall); - sysConfigNew->setConfig(InstallSection, "SingleServerInstall", OSingleServerInstall); - } - catch (...) - {} - - NserverTypeInstall = OserverTypeInstall; - - IserverTypeInstall = atoi(NserverTypeInstall.c_str()); - - //set RotatingDestination - switch ( IserverTypeInstall ) - { - case (oam::INSTALL_COMBINE_DM_UM_PM): // combined #1 - dm/um/pm on a single server - { - try - { - sysConfigNew->setConfig("PrimitiveServers", "RotatingDestination", "n"); - } - catch (...) - { - cout << "ERROR: Problem setting RotatingDestination in the Calpont System Configuration file" << endl; - exit(-1); - } - - break; - } - } - - string parentOAMModuleType = systemParentOAMModuleName.substr(0, MAX_MODULE_TYPE_SIZE); - - // - // get Data storage Mount - // - - string DBRootStorageType; - - int DBRootCount; - string deviceName; - - try - { - DBRootStorageType = sysConfigOld->getConfig(InstallSection, "DBRootStorageType"); - DBRootCount = strtol(sysConfigOld->getConfig(SystemSection, "DBRootCount").c_str(), 0, 0); - } - catch (...) - { - cout << "ERROR: Problem getting DB Storage Data from the Calpont System Configuration file" << endl; - exit(-1); - } - - // 2.2 to 3.x+ DBRootStorageTypeconversion - if ( DBRootStorageType == "local" ) - DBRootStorageType = "internal"; - - if ( DBRootStorageType == "storage" ) - DBRootStorageType = "external"; - - try - { - sysConfigNew->setConfig(InstallSection, "DBRootStorageType", DBRootStorageType); - } - catch (...) - { - cout << "ERROR: Problem setting DBRootStorageType in the Calpont System Configuration file" << endl; - exit(-1); - } - - try - { - sysConfigNew->setConfig(SystemSection, "DBRootCount", oam.itoa(DBRootCount)); - } - catch (...) - { - cout << "ERROR: Problem setting DBRoot Count in the Calpont System Configuration file" << endl; - exit(-1); - } - - // - //Update memory and cache settings - // - - string NumBlocksPct; - - try - { - NumBlocksPct = sysConfigOld->getConfig("DBBC", "NumBlocksPct"); - } - catch (...) - { } - - if ( ( NumBlocksPct.empty() || NumBlocksPct == "" ) && - IserverTypeInstall == oam::INSTALL_COMBINE_DM_UM_PM ) - { - NumBlocksPct = "50"; - } - - if ( !( NumBlocksPct.empty() || NumBlocksPct == "" ) ) - { - try - { - sysConfigNew->setConfig("DBBC", "NumBlocksPct", NumBlocksPct); - } - catch (...) - {} - } - - string TotalUmMemory; - - try - { - TotalUmMemory = sysConfigOld->getConfig("HashJoin", "TotalUmMemory"); - } - catch (...) - { } - - try - { - sysConfigNew->setConfig("HashJoin", "TotalUmMemory", TotalUmMemory); - } - catch (...) - {} - - string TotalPmUmMemory; - - try - { - TotalPmUmMemory = sysConfigOld->getConfig("HashJoin", "TotalPmUmMemory"); - } - catch (...) - { } - - try - { - sysConfigNew->setConfig("HashJoin", "TotalPmUmMemory", TotalPmUmMemory); - } - catch (...) - {} - - string strNumThreads; - - try - { - strNumThreads = sysConfigOld->getConfig("DBBC", "NumThreads"); - } - catch (...) - { } - - if ( !( strNumThreads.empty() || strNumThreads == "" ) ) - { - try - { - sysConfigNew->setConfig("DBBC", "NumThreads", strNumThreads); - } - catch (...) - {} - } - - string MySQLPort = "3306"; - - try - { - MySQLPort = sysConfigOld->getConfig("Installation", "MySQLPort"); - } - catch (...) - { - MySQLPort = "3306"; - } - - try - { - sysConfigNew->setConfig("Installation", "MySQLPort", MySQLPort); - } - catch (...) - {} - - sysConfigNew->write(); - - //Get list of configured system modules - SystemModuleTypeConfig sysModuleTypeConfig; - - try - { - oam.getSystemConfig(sysModuleTypeConfig); - } - catch (...) - { - cout << "ERROR: Problem reading the Calpont System Configuration file" << endl; - exit(-1); - } - - // - // Module Configuration - // - string ModuleSection = "SystemModuleConfig"; - unsigned int maxPMNicCount = 1; - - for ( unsigned int i = 0 ; i < sysModuleTypeConfig.moduletypeconfig.size(); i++) - { - string moduleType = sysModuleTypeConfig.moduletypeconfig[i].ModuleType; - string moduleDesc = sysModuleTypeConfig.moduletypeconfig[i].ModuleDesc; - int moduleCount = sysModuleTypeConfig.moduletypeconfig[i].ModuleCount; - - //verify and setup of modules count - switch ( IserverTypeInstall ) - { - case (oam::INSTALL_COMBINE_DM_UM_PM): - { - if ( moduleType == "um" ) - { - moduleCount = 0; - - try - { - string ModuleCountParm = "ModuleCount" + oam.itoa(i + 1); - sysConfigNew->setConfig(ModuleSection, ModuleCountParm, oam.itoa(moduleCount)); - continue; - } - catch (...) - { - cout << "ERROR: Problem setting Module Count in the Calpont System Configuration file" << endl; - exit(-1); - } - } - else - { - try - { - string ModuleCountParm = "ModuleCount" + oam.itoa(i + 1); - sysConfigNew->setConfig(ModuleSection, ModuleCountParm, oam.itoa(moduleCount)); - } - catch (...) - { - cout << "ERROR: Problem setting Module Count in the Calpont System Configuration file" << endl; - exit(-1); - } - } - - break; - } - - default: - { - try - { - string ModuleCountParm = "ModuleCount" + oam.itoa(i + 1); - sysConfigNew->setConfig(ModuleSection, ModuleCountParm, oam.itoa(moduleCount)); - } - catch (...) - { - cout << "ERROR: Problem setting Module Count in the Calpont System Configuration file" << endl; - exit(-1); - } - - break; - } - } - - if ( moduleCount == 0 ) - //no modules equipped for this Module Type, skip - continue; - - if ( moduleType == "pm" ) - pmNumber = moduleCount; - - //for 2.x to 3.x upgrade dbroot assignments - int dbrootNum = 0; - int systemDBRootCount = 0; - int dbrootCountPerModule = 0; - - if ( moduleType == "pm" && !OLDbuild3) - { - dbrootNum = 1; - systemDBRootCount = DBRootCount; - - if ( pmNumber > 0 ) - dbrootCountPerModule = DBRootCount / pmNumber; - - if ( dbrootCountPerModule == 0 ) - dbrootCountPerModule = 1; - } - - //get Module Name IP addresses and Host Names - DeviceNetworkList::iterator listPT = sysModuleTypeConfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for ( ; listPT != sysModuleTypeConfig.moduletypeconfig[i].ModuleNetworkList.end() ; listPT++) - { - PerformanceModule performancemodule; - string moduleName = (*listPT).DeviceName; - int moduleID = atoi(moduleName.substr(MAX_MODULE_TYPE_SIZE, MAX_MODULE_ID_SIZE).c_str()); - - string moduleDisableState = (*listPT).DisableState; - - if ( moduleDisableState.empty() || - moduleDisableState == oam::UnassignedName ) - moduleDisableState = oam::ENABLEDSTATE; - - if ( moduleDisableState == oam::AUTODISABLEDSTATE ) - moduleDisableState = oam::ENABLEDSTATE; - - //set Module Disable State - string moduleDisableStateParm = "ModuleDisableState" + oam.itoa(moduleID) + "-" + oam.itoa(i + 1); - - try - { - sysConfigNew->setConfig(ModuleSection, moduleDisableStateParm, moduleDisableState); - } - catch (...) - { - cout << "ERROR: Problem setting ModuleDisableState in the Calpont System Configuration file for " + moduleName << endl; - exit(-1); - } - - for ( unsigned int nicID = 1 ; nicID < MAX_NIC + 1 ; nicID++ ) - { - string moduleIPAddr = oam::UnassignedIpAddr; - string moduleHostName = oam::UnassignedName; - - HostConfigList::iterator pt1 = (*listPT).hostConfigList.begin(); - - for ( ; pt1 != (*listPT).hostConfigList.end() ; pt1++) - { - if ( moduleName == (*listPT).DeviceName && (*pt1).NicID == nicID) - { - moduleIPAddr = (*pt1).IPAddr; - moduleHostName = (*pt1).HostName; - break; - } - } - - if (moduleHostName.empty() || (moduleHostName == oam::UnassignedName) ) - // exit out to next module ID - break; - - if (moduleIPAddr.empty()) - moduleIPAddr = oam::UnassignedIpAddr; - - string moduleNameDesc = moduleDesc + " #" + oam.itoa(moduleID); - - //set New Module Host Name - string moduleHostNameParm = "ModuleHostName" + oam.itoa(moduleID) + "-" + oam.itoa(nicID) + "-" + oam.itoa(i + 1); - - try - { - sysConfigNew->setConfig(ModuleSection, moduleHostNameParm, moduleHostName); - } - catch (...) - { - cout << "ERROR: Problem setting Host Name in the Calpont System Configuration file" << endl; - exit(-1); - } - - //set Module IP address - string moduleIPAddrNameParm = "ModuleIPAddr" + oam.itoa(moduleID) + "-" + oam.itoa(nicID) + "-" + oam.itoa(i + 1); - - try - { - sysConfigNew->setConfig(ModuleSection, moduleIPAddrNameParm, moduleIPAddr); - } - catch (...) - { - cout << "ERROR: Problem setting IP address in the Calpont System Configuration file" << endl; - exit(-1); - } - - if ( moduleType == "pm" && moduleDisableState == oam::ENABLEDSTATE ) - { - - switch (nicID) - { - case 1: - performancemodule.moduleIP1 = moduleIPAddr; - break; - - case 2: - performancemodule.moduleIP2 = moduleIPAddr; - break; - - case 3: - performancemodule.moduleIP3 = moduleIPAddr; - break; - - case 4: - performancemodule.moduleIP4 = moduleIPAddr; - break; - } - - if ( maxPMNicCount < nicID ) - maxPMNicCount = nicID; - } - - if ( nicID > 1 ) - continue; - - //set port addresses - if ( moduleName == systemParentOAMModuleName ) - { - parentOAMModuleIPAddr = moduleIPAddr; - - //exit out if parentOAMModuleIPAddr is NOT set, this means the System Columnstore.xml isn't configured - if ( parentOAMModuleIPAddr == "0.0.0.0" ) - { - cout << "ERROR: System Columnstore.xml not configured" << endl; - exit(-1); - } - - //set Parent Processes Port IP Address - string parentProcessMonitor = systemParentOAMModuleName + "_ProcessMonitor"; - sysConfigNew->setConfig(parentProcessMonitor, "IPAddr", parentOAMModuleIPAddr); - sysConfigNew->setConfig(parentProcessMonitor, "Port", "8800"); - sysConfigNew->setConfig("ProcMgr", "IPAddr", parentOAMModuleIPAddr); - sysConfigNew->setConfig("ProcMgr_Alarm", "IPAddr", parentOAMModuleIPAddr); - sysConfigNew->setConfig("ProcStatusControl", "IPAddr", parentOAMModuleIPAddr); - string parentServerMonitor = systemParentOAMModuleName + "_ServerMonitor"; - sysConfigNew->setConfig(parentServerMonitor, "IPAddr", parentOAMModuleIPAddr); - sysConfigNew->setConfig(parentServerMonitor, "Port", "8622"); - - if (build3) - { - string portName = systemParentOAMModuleName + "_WriteEngineServer"; - sysConfigNew->setConfig(portName, "IPAddr", parentOAMModuleIPAddr); - sysConfigNew->setConfig(portName, "Port", "8630"); - } - else - { - sysConfigNew->setConfig("DDLProc", "IPAddr", parentOAMModuleIPAddr); - sysConfigNew->setConfig("DMLProc", "IPAddr", parentOAMModuleIPAddr); - } - - if ( IserverTypeInstall == oam::INSTALL_COMBINE_DM_UM_PM ) - { - - //set User Module's IP Addresses - string Section = "ExeMgr" + oam.itoa(moduleID); - - sysConfigNew->setConfig(Section, "IPAddr", moduleIPAddr); - sysConfigNew->setConfig(Section, "Port", "8601"); - - //set Performance Module's IP's to first NIC IP entered - sysConfigNew->setConfig("DDLProc", "IPAddr", moduleIPAddr); - sysConfigNew->setConfig("DMLProc", "IPAddr", moduleIPAddr); - } - } - else - { - //set child Process Monitor Port IP Address - string portName = moduleName + "_ProcessMonitor"; - sysConfigNew->setConfig(portName, "IPAddr", moduleIPAddr); - sysConfigNew->setConfig(portName, "Port", "8800"); - - //set child Server Monitor Port IP Address - portName = moduleName + "_ServerMonitor"; - sysConfigNew->setConfig(portName, "IPAddr", moduleIPAddr); - sysConfigNew->setConfig(portName, "Port", "8622"); - - //set Performance Module WriteEngineServer Port IP Address - if ( moduleType == "pm" && build3) - { - portName = moduleName + "_WriteEngineServer"; - sysConfigNew->setConfig(portName, "IPAddr", moduleIPAddr); - sysConfigNew->setConfig(portName, "Port", "8630"); - } - - //set User Module's IP Addresses - if ( moduleType == "um" || - ( moduleType == "pm" && IserverTypeInstall == oam::INSTALL_COMBINE_DM_UM_PM ) ) - { - - string Section = "ExeMgr" + oam.itoa(moduleID); - - sysConfigNew->setConfig(Section, "IPAddr", moduleIPAddr); - sysConfigNew->setConfig(Section, "Port", "8601"); - } - } - - //set Performance Module's IP's to first NIC IP entered - if ( moduleName == "um1" && build3) - { - sysConfigNew->setConfig("DDLProc", "IPAddr", moduleIPAddr); - sysConfigNew->setConfig("DMLProc", "IPAddr", moduleIPAddr); - } - - //setup DBRM processes - if ( moduleName == systemParentOAMModuleName ) - sysConfigNew->setConfig(dbrmMainProc, "IPAddr", moduleIPAddr); - - if ( moduleDisableState == oam::ENABLEDSTATE ) - { - DBRMworkernodeID++; - string DBRMSection = dbrmSubProc + oam.itoa(DBRMworkernodeID); - sysConfigNew->setConfig(DBRMSection, "IPAddr", moduleIPAddr); - sysConfigNew->setConfig(DBRMSection, "Module", moduleName); - } - } //end of nicID loop - - //set dbroot assigments - DeviceDBRootList::iterator pt3 = sysModuleTypeConfig.moduletypeconfig[i].ModuleDBRootList.begin(); - - //this will be empty if upgrading from 2.2 - if ( sysModuleTypeConfig.moduletypeconfig[i].ModuleDBRootList.size() == 0 ) - { - if ( !OLDbuild3 && moduleType == "pm") - { - int dbrootCount = dbrootCountPerModule; - string moduleCountParm = "ModuleDBRootCount" + oam.itoa(moduleID) + "-" + oam.itoa(i + 1); - - try - { - sysConfigNew->setConfig(ModuleSection, moduleCountParm, oam.itoa(dbrootCount)); - } - catch (...) - { - cout << "ERROR: Problem setting Host Name in the Calpont System Configuration file" << endl; - exit(-1); - } - - int entry = 1; - - for ( ; entry < dbrootCountPerModule + 1 ; entry++) - { - int dbrootid = dbrootNum; - - if ( dbrootNum > systemDBRootCount ) - dbrootid = 0; - else - dbrootNum++; - - string moduleDBRootIDParm = "ModuleDBRootID" + oam.itoa(moduleID) + "-" + oam.itoa(entry) + "-" + oam.itoa(i + 1); - - try - { - sysConfigNew->setConfig(ModuleSection, moduleDBRootIDParm, oam.itoa(dbrootid)); - } - catch (...) - { - cout << "ERROR: Problem setting Host Name in the Calpont System Configuration file" << endl; - exit(-1); - } - } - } - } - else - { - for ( ; pt3 != sysModuleTypeConfig.moduletypeconfig[i].ModuleDBRootList.end() ; pt3++) - { - if ( (*pt3).dbrootConfigList.size() > 0 ) - { - int moduleID = (*pt3).DeviceID; - - DBRootConfigList::iterator pt4 = (*pt3).dbrootConfigList.begin(); - - int dbrootCount = (*pt3).dbrootConfigList.size(); - string moduleCountParm = "ModuleDBRootCount" + oam.itoa(moduleID) + "-" + oam.itoa(i + 1); - - try - { - sysConfigNew->setConfig(ModuleSection, moduleCountParm, oam.itoa(dbrootCount)); - } - catch (...) - { - cout << "ERROR: Problem setting Host Name in the Calpont System Configuration file" << endl; - exit(-1); - } - - int entry = 1; - - for ( ; pt4 != (*pt3).dbrootConfigList.end() ; pt4++, entry++) - { - int dbrootid = *pt4; - - string moduleDBRootIDParm = "ModuleDBRootID" + oam.itoa(moduleID) + "-" + oam.itoa(entry) + "-" + oam.itoa(i + 1); - - try - { - sysConfigNew->setConfig(ModuleSection, moduleDBRootIDParm, oam.itoa(dbrootid)); - } - catch (...) - { - cout << "ERROR: Problem setting Host Name in the Calpont System Configuration file" << endl; - exit(-1); - } - } - } - } - } - - if ( ( moduleType == "pm" && moduleDisableState == oam::ENABLEDSTATE ) || - ( IserverTypeInstall == oam::INSTALL_COMBINE_DM_UM_PM ) ) - performancemodulelist.push_back(performancemodule); - - } //end of module loop - - sysConfigNew->write(); - - } //end of i for loop - - if ( performancemodulelist.size() == 0 ) - { - cout << "ERROR: performancemodulelist is empty, exiting..." << endl; - exit(-1); - } - - //set dm count to 0 always - try - { - sysConfigNew->setConfig(ModuleSection, "ModuleCount1", "0"); - } - catch (...) - { - cout << "ERROR: Problem setting Module Count in the Calpont System Configuration file" << endl; - exit(-1); - } - - //setup DBRM Controller - sysConfigNew->setConfig(dbrmMainProc, numSubProc, oam.itoa(DBRMworkernodeID)); - - //setup PrimitiveServers parameters - try - { - sysConfigNew->setConfig("PrimitiveServers", "ConnectionsPerPrimProc", oam.itoa(maxPMNicCount * 2)); - } - catch (...) - { - cout << "ERROR: Problem setting ConnectionsPerPrimProc in the Calpont System Configuration file" << endl; - exit(-1); - } - - //set the PM Ports based on Number of PM modules equipped, if any equipped - int minPmPorts = 32; - sysConfigNew->setConfig("PrimitiveServers", "Count", oam.itoa(pmNumber)); - - int pmPorts = pmNumber * (maxPMNicCount * 2); - - if ( pmPorts < minPmPorts ) - pmPorts = minPmPorts; - - if ( pmNumber > 0 || - ( IserverTypeInstall == oam::INSTALL_COMBINE_DM_UM_PM ) ) - { - const string PM = "PMS"; - - for ( int pmsID = 1; pmsID < pmPorts + 1 ; ) - { - for (unsigned int j = 1 ; j < maxPMNicCount + 1 ; j++) - { - PerformanceModuleList::iterator list1 = performancemodulelist.begin(); - - for (; list1 != performancemodulelist.end() ; list1++) - { - string pmName = PM + oam.itoa(pmsID); - string IpAddr; - - switch (j) - { - case 1: - IpAddr = (*list1).moduleIP1; - break; - - case 2: - IpAddr = (*list1).moduleIP2; - break; - - case 3: - IpAddr = (*list1).moduleIP3; - break; - - case 4: - IpAddr = (*list1).moduleIP4; - break; - } - - if ( !IpAddr.empty() && IpAddr != oam::UnassignedIpAddr ) - { - sysConfigNew->setConfig(pmName, "IPAddr", IpAddr); - pmsID++; - - if ( pmsID > pmPorts ) - break; - } - } - - if ( pmsID > pmPorts ) - break; - } - } - } - - sysConfigNew->write(); - - // - // Configure Ext Devices - // - SystemExtDeviceConfig systemextdeviceconfig; - - try - { - oam.getSystemConfig(systemextdeviceconfig); - } - catch (...) - { - cout << "ERROR: Problem reading the Calpont System Configuration file" << endl; - exit(-1); - } - - if ( systemextdeviceconfig.Count > 0 ) - { - - const string NAME = "Name"; - const string IPADDR = "IPAddr"; - const string DISABLE_STATE = "DisableState"; - - sysConfigNew->setConfig("SystemExtDeviceConfig", "Count", oam.itoa(systemextdeviceconfig.Count)); - - for ( unsigned int i = 0 ; i < systemextdeviceconfig.Count ; i++ ) - { - string name = NAME + oam.itoa(i + 1); - string ipaddr = IPADDR + oam.itoa(i + 1); - string disablestate = DISABLE_STATE + oam.itoa(i + 1); - - sysConfigNew->setConfig("SystemExtDeviceConfig", name, systemextdeviceconfig.extdeviceconfig[i].Name); - sysConfigNew->setConfig("SystemExtDeviceConfig", ipaddr, systemextdeviceconfig.extdeviceconfig[i].IPAddr); - sysConfigNew->setConfig("SystemExtDeviceConfig", disablestate, systemextdeviceconfig.extdeviceconfig[i].DisableState); - } - } - - sysConfigNew->write(); - - // - // Configure NMS Addresses - // - - string NMSIPAddress; - - try - { - NMSIPAddress = sysConfigOld->getConfig(SystemSection, "NMSIPAddress"); - } - catch (...) - { - cout << "ERROR: Problem getting NMSIPAddress from Calpont System Configuration file" << endl; - exit(-1); - } - - try - { - sysConfigNew->setConfig(SystemSection, "NMSIPAddress", NMSIPAddress); - } - catch (...) - { - cout << "ERROR: Problem setting NMSIPAddress in the Calpont System Configuration file" << endl; - exit(-1); - } - - // - // setup TransactionArchivePeriod - // - - string transactionArchivePeriod; - - try - { - transactionArchivePeriod = sysConfigOld->getConfig(SystemSection, "TransactionArchivePeriod"); - } - catch (...) - { - cout << "ERROR: Problem getting transactionArchivePeriod from Calpont System Configuration file" << endl; - exit(-1); - } - - try - { - sysConfigNew->setConfig(SystemSection, "TransactionArchivePeriod", transactionArchivePeriod); - } - catch (...) - { - cout << "ERROR: Problem setting IP address in the Calpont System Configuration file" << endl; - exit(-1); - } - - // - // 3 and above configuration items - // - if (build3) - { - //setup cloud parameters - string UMStorageType; - string PMInstanceType; - string UMInstanceType; - string UMSecurityGroup; - string UMVolumeSize; - string PMVolumeSize; - string AmazonAutoTagging; - string AmazonVPCNextPrivateIP; - string AmazonDeviceName; - string UMVolumeType; - string UMVolumeIOPS; - string PMVolumeType; - string PMVolumeIOPS; - - - try - { - cloud = sysConfigOld->getConfig(InstallSection, "Cloud"); - UMStorageType = sysConfigOld->getConfig(InstallSection, "UMStorageType"); - PMInstanceType = sysConfigOld->getConfig(InstallSection, "PMInstanceType"); - UMInstanceType = sysConfigOld->getConfig(InstallSection, "UMInstanceType"); - UMSecurityGroup = sysConfigOld->getConfig(InstallSection, "UMSecurityGroup"); - UMVolumeSize = sysConfigOld->getConfig(InstallSection, "UMVolumeSize"); - PMVolumeSize = sysConfigOld->getConfig(InstallSection, "PMVolumeSize"); - AmazonAutoTagging = sysConfigOld->getConfig(InstallSection, "AmazonAutoTagging"); - AmazonVPCNextPrivateIP = sysConfigOld->getConfig(InstallSection, "AmazonVPCNextPrivateIP"); - AmazonDeviceName = sysConfigOld->getConfig(InstallSection, "AmazonDeviceName"); - UMVolumeType = sysConfigOld->getConfig(InstallSection, "UMVolumeType"); - UMVolumeIOPS = sysConfigOld->getConfig(InstallSection, "UMVolumeIOPS"); - PMVolumeType = sysConfigOld->getConfig(InstallSection, "PMVolumeType"); - PMVolumeIOPS = sysConfigOld->getConfig(InstallSection, "PMVolumeIOPS"); - } - catch (...) - { } - - //this is for 2.2 to 4.x builds - if ( UMStorageType.empty() || UMStorageType == "" ) - UMStorageType = "internal"; - - // 3.x upgrade - if (build3 && !build40 && !build401) - { - if ( cloud == "no" || cloud == oam::UnassignedName) - cloud = "n"; - - if ( cloud == "amazon-ec2" || cloud == "amazon-vpc") - cloud = "amazon"; - } - - // 4.0 upgrade - if (build40 && !build401) - { - if ( cloud == "no" || cloud == "n" ) - cloud = oam::UnassignedName; - } - - // 4.0.1+ upgrade - if (build401) - { - if ( cloud == "no" || cloud == "n" ) - cloud = oam::UnassignedName; - - if ( cloud == "amazon") - cloud = "amazon-ec2"; - - if ( AmazonVPCNextPrivateIP.empty() ) - AmazonVPCNextPrivateIP = oam::UnassignedName; - - try - { - sysConfigNew->setConfig(InstallSection, "AmazonVPCNextPrivateIP", AmazonVPCNextPrivateIP); - } - catch (...) - { - // cout << "ERROR: Problem setting Cloud Parameters from the Calpont System Configuration file" << endl; - // exit(-1); - } - } - - try - { - sysConfigNew->setConfig(InstallSection, "Cloud", cloud); - sysConfigNew->setConfig(InstallSection, "UMStorageType", UMStorageType); - sysConfigNew->setConfig(InstallSection, "PMInstanceType", PMInstanceType); - sysConfigNew->setConfig(InstallSection, "UMInstanceType", UMInstanceType); - sysConfigNew->setConfig(InstallSection, "UMSecurityGroup", UMSecurityGroup); - sysConfigNew->setConfig(InstallSection, "UMVolumeSize", UMVolumeSize); - sysConfigNew->setConfig(InstallSection, "PMVolumeSize", PMVolumeSize); - sysConfigNew->setConfig(InstallSection, "AmazonAutoTagging", AmazonAutoTagging); - sysConfigNew->setConfig(InstallSection, "AmazonDeviceName", AmazonDeviceName); - - sysConfigNew->setConfig(InstallSection, "UMVolumeType", UMVolumeType); - sysConfigNew->setConfig(InstallSection, "UMVolumeIOPS", UMVolumeIOPS); - sysConfigNew->setConfig(InstallSection, "PMVolumeType", PMVolumeType); - sysConfigNew->setConfig(InstallSection, "PMVolumeIOPS", PMVolumeIOPS); - } - catch (...) - { - // cout << "ERROR: Problem setting Cloud Parameters from the Calpont System Configuration file" << endl; - // exit(-1); - } - - - if ( cloud == "amazon-ec2" || cloud == "amazon-vpc") - cloud = "amazon"; - - //setup um storage - if ( cloud == "amazon" && UMStorageType == "external") - { - - try - { - systemStorageInfo_t t; - t = oam.getStorageConfig(); - - ModuleTypeConfig moduletypeconfig; - oam.getSystemConfig("um", moduletypeconfig); - - for ( int id = 1; id < moduletypeconfig.ModuleCount + 1 ; id++) - { - string volumeNameID = "UMVolumeName" + oam.itoa(id); - string volumeName = oam::UnassignedName; - string deviceNameID = "UMVolumeDeviceName" + oam.itoa(id); - string deviceName = oam::UnassignedName; - - try - { - volumeName = sysConfigOld->getConfig(InstallSection, volumeNameID); - deviceName = sysConfigOld->getConfig(InstallSection, deviceNameID); - } - catch (...) - {} - - try - { - sysConfigNew->setConfig(InstallSection, volumeNameID, volumeName); - sysConfigNew->setConfig(InstallSection, deviceNameID, deviceName); - } - catch (...) - {} - } - } - catch (exception& e) - { - cout << endl << "**** getStorageConfig Failed : " << e.what() << endl; - } - } - - //setup dbroot storage - try - { - DBRootConfigList dbrootConfigList; - oam.getSystemDbrootConfig(dbrootConfigList); - - DBRootConfigList::iterator pt = dbrootConfigList.begin(); - - for ( ; pt != dbrootConfigList.end() ; pt++) - { - int id = *pt; - string DBrootID = "DBRoot" + oam.itoa(id);; - string pathID = "/var/lib/columnstore/data" + oam.itoa(id); - - try - { - sysConfigNew->setConfig(SystemSection, DBrootID, pathID); - } - catch (...) - { - cout << "ERROR: Problem setting DBRoot in the Calpont System Configuration file" << endl; - exit(-1); - } - - if ( cloud == "amazon" && DBRootStorageType == "external") - { - - string volumeNameID = "PMVolumeName" + oam.itoa(id); - string volumeName = oam::UnassignedName; - string deviceNameID = "PMVolumeDeviceName" + oam.itoa(id); - string deviceName = oam::UnassignedName; - string amazondeviceNameID = "PMVolumeAmazonDeviceName" + oam.itoa(id); - string amazondeviceName = oam::UnassignedName; - - try - { - volumeName = sysConfigOld->getConfig(InstallSection, volumeNameID); - deviceName = sysConfigOld->getConfig(InstallSection, deviceNameID); - amazondeviceName = sysConfigOld->getConfig(InstallSection, amazondeviceNameID); - } - catch (...) - {} - - try - { - sysConfigNew->setConfig(InstallSection, volumeNameID, volumeName); - sysConfigNew->setConfig(InstallSection, deviceNameID, deviceName); - sysConfigNew->setConfig(InstallSection, amazondeviceNameID, amazondeviceName); - } - catch (...) - {} - - - string UMVolumeSize = oam::UnassignedName; - string PMVolumeSize = oam::UnassignedName; - - try - { - UMVolumeSize = sysConfigOld->getConfig(InstallSection, "UMVolumeSize"); - PMVolumeSize = sysConfigOld->getConfig(InstallSection, "PMVolumeSize"); - } - catch (...) - {} - - try - { - sysConfigNew->setConfig(InstallSection, "UMVolumeSize", UMVolumeSize); - sysConfigNew->setConfig(InstallSection, "PMVolumeSize", PMVolumeSize); - } - catch (...) - {} - } - - if ( !DataRedundancyConfig.empty() ) - { - try - { - string dbrootPMsID = "DBRoot" + oam.itoa(id) + "PMs"; - string dbrootPMs = sysConfigOld->getConfig("DataRedundancyConfig", dbrootPMsID); - - try - { - sysConfigNew->setConfig("DataRedundancyConfig", dbrootPMsID, dbrootPMs); - } - catch (...) - {} - } - catch (...) - {} - } - } - } - catch (exception& e) - { - cout << endl << "**** getSystemDbrootConfig Failed : " << e.what() << endl; - } - - //setup unassigned dbroots - try - { - DBRootConfigList dbrootConfigList; - oam.getUnassignedDbroot(dbrootConfigList); - - DBRootConfigList::iterator pt = dbrootConfigList.begin(); - - for ( ; pt != dbrootConfigList.end() ; pt++) - { - int id = *pt; - string DBrootID = "DBRoot" + oam.itoa(id);; - string pathID = "/var/lib/columnstore/data" + oam.itoa(id); - - try - { - sysConfigNew->setConfig(SystemSection, DBrootID, pathID); - } - catch (...) - { - cout << "ERROR: Problem setting DBRoot in the Calpont System Configuration file" << endl; - exit(-1); - } - - if ( cloud == "amazon" && DBRootStorageType == "external") - { - - string volumeNameID = "VolumeName" + oam.itoa(id); - string volumeName = oam::UnassignedName; - string deviceNameID = "VolumeDeviceName" + oam.itoa(id); - string deviceName = oam::UnassignedName; - - try - { - volumeName = sysConfigOld->getConfig(InstallSection, volumeNameID); - deviceName = sysConfigOld->getConfig(InstallSection, deviceNameID); - } - catch (...) - {} - - try - { - sysConfigNew->setConfig(InstallSection, volumeNameID, volumeName); - sysConfigNew->setConfig(InstallSection, deviceNameID, deviceName); - } - catch (...) - {} - - } - } - } - catch (exception& e) - { - cout << endl << "**** getUnassignedDbroot Failed : " << e.what() << endl; - } - } - else - { - // pre 3.0 only - - string DBRootStorageLoc; - - for ( int i = 1 ; i < DBRootCount + 1 ; i++) - { - if ( DBRootStorageType != "local") - { - - string DBRootStorageLocID = "DBRootStorageLoc" + oam.itoa(i); - - try - { - DBRootStorageLoc = sysConfigOld->getConfig(InstallSection, DBRootStorageLocID); - } - catch (...) - { - cout << "ERROR: Problem getting '" + DBRootStorageLocID + "' from the Calpont System Configuration file" << endl; - exit(-1); - } - - try - { - sysConfigNew->setConfig(InstallSection, DBRootStorageLocID, DBRootStorageLoc); - } - catch (...) - { - cout << "ERROR: Problem setting '" + DBRootStorageLocID + "' in the Calpont System Configuration file" << endl; - exit(-1); - } - } - - string DBrootID = "DBRoot" + oam.itoa(i); - string pathID = "/var/lib/columnstore/data" + oam.itoa(i); - - try - { - sysConfigNew->setConfig(SystemSection, DBrootID, pathID); - } - catch (...) - { - cout << "ERROR: Problem setting DBRoot in the Calpont System Configuration file" << endl; - exit(-1); - } - } - } - - //do elastic IP configuration - int AmazonElasticIPCount = 0; - - try - { - AmazonElasticIPCount = atoi(sysConfigOld->getConfig(InstallSection, "AmazonElasticIPCount").c_str()); - - if (AmazonElasticIPCount > 0 ) - { - for ( int id = 1 ; id < AmazonElasticIPCount + 1 ; id++ ) - { - string AmazonElasticModule = "AmazonElasticModule" + oam.itoa(id); - string ELmoduleName; - string AmazonElasticIPAddr = "AmazonElasticIPAddr" + oam.itoa(id); - string ELIPaddress; - - ELmoduleName = sysConfigOld->getConfig(InstallSection, AmazonElasticModule); - ELIPaddress = sysConfigOld->getConfig(InstallSection, AmazonElasticIPAddr); - - try - { - sysConfigNew->setConfig(InstallSection, "AmazonElasticIPCount", oam.itoa(AmazonElasticIPCount)); - sysConfigNew->setConfig(InstallSection, AmazonElasticModule, ELmoduleName); - sysConfigNew->setConfig(InstallSection, AmazonElasticIPAddr, ELIPaddress); - } - catch (...) - {} - } - } - } - catch (...) - {} - - try - { - oam.getSystemConfig("AmazonElasticIPCount", AmazonElasticIPCount); - } - catch (...) - { - AmazonElasticIPCount = 0; - } - - // ConcurrentTransactions - string ConcurrentTransactions; - - try - { - ConcurrentTransactions = sysConfigOld->getConfig(SystemSection, "ConcurrentTransactions"); - - if ( !ConcurrentTransactions.empty() ) - { - try - { - sysConfigNew->setConfig(SystemSection, "ConcurrentTransactions", ConcurrentTransactions); - } - catch (...) - { - cout << "ERROR: Problem setting ConcurrentTransactions in the Calpont System Configuration file" << endl; - exit(-1); - } - } - } - catch (...) - {} - - // NetworkCompression Enabled - string NetworkCompression; - - try - { - NetworkCompression = sysConfigOld->getConfig("NetworkCompression", "Enabled"); - - if ( !NetworkCompression.empty() ) - { - try - { - sysConfigNew->setConfig("NetworkCompression", "Enabled", NetworkCompression); - } - catch (...) - { - cout << "ERROR: Problem setting NetworkCompression in the Calpont System Configuration file" << endl; - exit(-1); - } - } - } - catch (...) - {} - - // CoreFile Flag - try - { - CoreFileFlag = sysConfigOld->getConfig("Installation", "CoreFileFlag"); - - if ( !CoreFileFlag.empty() ) - { - try - { - sysConfigNew->setConfig("Installation", "CoreFileFlag", CoreFileFlag); - } - catch (...) - { - cout << "ERROR: Problem setting CoreFileFlag in the Calpont System Configuration file" << endl; - exit(-1); - } - } - } - catch (...) - {} - - //hadoop - string DataFilePlugin; - - try - { - DataFilePlugin = sysConfigOld->getConfig(SystemSection, "DataFilePlugin"); - - if ( !DataFilePlugin.empty() ) - { - try - { - sysConfigNew->setConfig(SystemSection, "DataFilePlugin", DataFilePlugin); - } - catch (...) - { - cout << "ERROR: Problem setting DataFilePlugin in the Calpont System Configuration file" << endl; - exit(-1); - } - - string ExtentsPerSegmentFile; - - try - { - ExtentsPerSegmentFile = sysConfigOld->getConfig("ExtentMap", "ExtentsPerSegmentFile"); - - try - { - sysConfigNew->setConfig("ExtentMap", "ExtentsPerSegmentFile", ExtentsPerSegmentFile); - } - catch (...) - { - cout << "ERROR: Problem setting ExtentsPerSegmentFile in the Calpont System Configuration file" << endl; - exit(-1); - } - } - catch (...) - {} - } - } - catch (...) - {} - - string DataFileLog; - - try - { - DataFileLog = sysConfigOld->getConfig(SystemSection, "DataFileLog"); - - if ( !DataFileLog.empty() ) - { - try - { - sysConfigNew->setConfig(SystemSection, "DataFileLog", DataFileLog); - } - catch (...) - { - cout << "ERROR: Problem setting DataFileLog in the Calpont System Configuration file" << endl; - exit(-1); - } - } - } - catch (...) - {} - - - string AllowDiskBasedJoin; - string TempFilePath; - string TempFileCompression; - - try - { - AllowDiskBasedJoin = sysConfigOld->getConfig("HashJoin", "AllowDiskBasedJoin"); - - if ( !AllowDiskBasedJoin.empty() ) - { - TempFileCompression = sysConfigOld->getConfig("HashJoin", "TempFileCompression"); - - try - { - sysConfigNew->setConfig("HashJoin", "AllowDiskBasedJoin", AllowDiskBasedJoin); - sysConfigNew->setConfig("HashJoin", "TempFileCompression", TempFileCompression); - } - catch (...) - { - cout << "ERROR: Problem setting AllowDiskBasedJoin in the Calpont System Configuration file" << endl; - exit(-1); - } - } - } - catch (...) - {} - - try - { - Host = sysConfigOld->getConfig("QueryTele", "Host"); - - if ( !Host.empty() ) - { - Port = sysConfigOld->getConfig("QueryTele", "Port"); - - try - { - sysConfigNew->setConfig("QueryTele", "Host", Host); - sysConfigNew->setConfig("QueryTele", "Port", Port); - } - catch (...) - { - cout << "ERROR: Problem setting QueryTele in the Calpont System Configuration file" << endl; - exit(-1); - } - } - } - catch (...) - {} - - try - { - string AmazonAccessKey = sysConfigOld->getConfig("Installation", "AmazonAccessKey"); - - if ( !AmazonAccessKey.empty() ) - { - try - { - sysConfigNew->setConfig("Installation", "AmazonAccessKey", AmazonAccessKey); - } - catch (...) - { - cout << "ERROR: Problem setting AmazonAccessKey in the Calpont System Configuration file" << endl; - exit(-1); - } - } - } - catch (...) - {} - - try - { - string AmazonSecretKey = sysConfigOld->getConfig("Installation", "AmazonSecretKey"); - - if ( !AmazonSecretKey.empty() ) - { - try - { - sysConfigNew->setConfig("Installation", "AmazonSecretKey", AmazonSecretKey); - } - catch (...) - { - cout << "ERROR: Problem setting AmazonSecretKey in the Calpont System Configuration file" << endl; - exit(-1); - } - } - } - catch (...) - {} - - try - { - string LockFileDirectory = sysConfigOld->getConfig("Installation", "LockFileDirectory"); - - if ( !LockFileDirectory.empty() ) - { - try - { - sysConfigNew->setConfig("Installation", "LockFileDirectory", LockFileDirectory); - } - catch (...) - { - cout << "ERROR: Problem setting LockFileDirectory in the Calpont System Configuration file" << endl; - exit(-1); - } - } - } - catch (...) - {} - - - // add entries from tuning guide - - string ColScanReadAheadBlocks; - string PrefetchThreshold; - string MaxOutstandingRequests; - string PmMaxMemorySmallSide; - string ThreadPoolSize; - - - try - { - ColScanReadAheadBlocks = sysConfigOld->getConfig("PrimitiveServers", "ColScanReadAheadBlocks"); - PrefetchThreshold = sysConfigOld->getConfig("PrimitiveServers", "PrefetchThreshold"); - PmMaxMemorySmallSide = sysConfigOld->getConfig("HashJoin", "PmMaxMemorySmallSide"); - ThreadPoolSize = sysConfigOld->getConfig("JobList", "ThreadPoolSize"); - } - catch (...) - {} - - try - { - sysConfigNew->setConfig("PrimitiveServers", "ColScanReadAheadBlocks", ColScanReadAheadBlocks); - sysConfigNew->setConfig("PrimitiveServers", "PrefetchThreshold", PrefetchThreshold); - sysConfigNew->setConfig("HashJoin", "PmMaxMemorySmallSide", PmMaxMemorySmallSide); - sysConfigNew->setConfig("JobList", "ThreadPoolSize", ThreadPoolSize); - } - catch (...) - {} - - //Write out Updated System Configuration File - sysConfigNew->write(); -} diff --git a/tools/configMgt/autoPackager b/tools/configMgt/autoPackager deleted file mode 100644 index a1682281a..000000000 --- a/tools/configMgt/autoPackager +++ /dev/null @@ -1,109 +0,0 @@ -#!/usr/bin/expect -# -# $Id: autoPackager.sh 421 2007-04-05 15:46:55Z dhill $ -# -# Remote command execution script to another server -set USERNAME root -set PASSWORD Calpont1 -set RELEASE "" -set PKGSERVER srvdeb5 -set OLD "n" -log_user 0 - -spawn -noecho /bin/bash - -for {set i 0} {$i<[llength $argv]} {incr i} { - set arg($i) [lindex $argv $i] -} - -set i 0 -while true { - if { $i == [llength $argv] } { break } - if { $arg($i) == "-h" } { - send_user "\n" - send_user "'autoPackager' generates a packages based on the set of RPMs\n" - send_user "in the calweb/iterations branch.\n" - send_user "It will place the packages in //calweb/shared/Iterations/Latest/packages\n" - send_user "and in //calweb/shared/Iterations/'release'/packages when specified\n" - send_user "\n" - send_user "Usage: autoPackage -r 'release' -o\n" - send_user " release - Calpont release number\n" - send_user " -o - Build Old Calpont Packages\n" - exit - } - if { $arg($i) == "-r" } { - incr i - set RELEASE $arg($i) - } - if { $arg($i) == "-o" } { - set OLD "y" - } - incr i -} - -if { $RELEASE == "" } { - send_user "\nError: provide a release number with -r option\n" - exit 1 -} - -set PKGCOMMAND "'/root/autoOAM/buildCalpontPackages.sh $RELEASE'" -if { $OLD == "n"} { - set PKGCOMMAND "'/root/autoOAM/buildInfiniDBPackages.sh $RELEASE'" -} - -set timeout 3700 -# -# send command to build calpont rpms -# -#get current date -exec date >/tmp/datesync.tmp -exec cat /tmp/datesync.tmp -set newtime [exec cat /tmp/datesync.tmp] - -send_user "\nStart Time: $newtime\n" - -log_user 1 - -set timeout 900 -# -# send command to build calpont packages -# -if { $OLD == "n"} { - send_user "\nGenerating 4.x INFINIDB Packages takes 15 minutes to complete, please wait...\n" -} else { - send_user "\nGenerating 3.x CALPONT Packages takes 15 minutes to complete, please wait...\n" -} - -log_user 0 - -send "ssh $USERNAME@$PKGSERVER $PKGCOMMAND\n" -expect { - -re "authenticity" { send "yes\n" - expect { - -re "word: " { send "$PASSWORD\n" } abort - } - } - -re "service not known" { send_user "FAILED: Invalid Host\n" ; exit -1 } - -re "Connection refused" { send_user "FAILED: Connection refused\n" ; exit -1 } - - -re "word: " { send "$PASSWORD\n" } abort -} -log_user 1 -expect { - -re "TIMEOUT" { send_user "Calpont Packages Build Failed, timeout" } abort - timeout { send_user "Calpont Packages Build Failed, timeout" } abort - -re "No such file" { send_user "$COMMAND file not found" } abort - -re "no rpm" { send_user "Calpont Packages Build Failed, check build log files" } abort - -re "Calpont Packages Build Successfully Completed" { send_user "Calpont Packages Successfully Completed" } abort -} - -#get current date -exec date >/tmp/datesync.tmp -exec cat /tmp/datesync.tmp -set newtime [exec cat /tmp/datesync.tmp] - -send_user " $newtime\n" - -file delete -force $RELEASE -send_user "\nBuild-Packaging successfully completed\n" -exit 0 diff --git a/tools/configMgt/autoReleaseNotes.sh b/tools/configMgt/autoReleaseNotes.sh deleted file mode 100755 index 07e3afe8f..000000000 --- a/tools/configMgt/autoReleaseNotes.sh +++ /dev/null @@ -1,81 +0,0 @@ -#!/usr/bin/expect -# -# $Id: autoReleaseNotes.sh 421 2007-04-05 15:46:55Z dhill $ -# -# Remote command execution script to another server -# Argument 1 - release -# Argument 2 - date since last release (for BUG generation) in "2008-06-02 00:00:00" format -# Argument 3 - debug flag -set timeout 1800 -set SERVER srvengcm1 -set USERNAME root -set PASSWORD Calpont1 -set RELEASE [lindex $argv 0] -set DATE [lindex $argv 1] -set DEBUG [lindex $argv 2] -set COMMAND "'/home/bugzilla/resolve_bug_report $RELEASE $DATE'" - -#set SHARED "//cal6500/shared" -set SHARED "//calweb/shared" - -log_user $DEBUG -spawn -noecho /bin/bash -if { $RELEASE == "-h" } { - send_user "\n" - send_user "'autoReleaseNotes.sh' generates bug reports for release notes\n" - send_user "\n" - send_user "Usage: autoReleaseNotes.sh 'release' 'date'\n" - send_user " release - Calpont release number (optional)\n" - send_user " date - date since last build ('2008-06-02 00:00:00' format)\n" - exit -} -# -# send command -# -send "ssh $USERNAME@$SERVER $COMMAND\n" -expect { - -re "authenticity" { send "yes\n" - expect { - -re "word: " { send "$PASSWORD\n" } abort - } - } - -re "service not known" { send_user "FAILED: Invalid Host\n" ; exit -1 } - -re "word: " { send "$PASSWORD\n" } abort -} -expect { - -re "Generate Resolved Bug Report Successfully Completed" { send_user "Successfully completed BUG Report and placed on //cal6500/shared/Iterations/\n" } abort -} - -exec echo -e "\n##### Subsystem: build \n" > svn_release_notes.txt -exec svn log -r "{$DATE}:HEAD" http://srvengcm1.calpont.com/svn/genii/build/trunk >> svn_release_notes.txt -exec echo -e "\n##### Subsystem: dbcon \n" >> svn_release_notes.txt -exec svn log -r "{$DATE}:HEAD" http://srvengcm1.calpont.com/svn/genii/dbcon/trunk >> svn_release_notes.txt -exec echo -e "\n##### Subsystem: dmlib \n" >> svn_release_notes.txt -exec svn log -r "{$DATE}:HEAD" http://srvengcm1.calpont.com/svn/genii/dmlib/trunk >> svn_release_notes.txt -exec echo -e "\n##### Subsystem: exemgr \n" >> svn_release_notes.txt -exec svn log -r "{$DATE}:HEAD" http://srvengcm1.calpont.com/svn/genii/exemgr/trunk >> svn_release_notes.txt -exec echo -e "\n##### Subsystem: oam \n" >> svn_release_notes.txt -exec svn log -r "{$DATE}:HEAD" http://srvengcm1.calpont.com/svn/genii/oam/trunk >> svn_release_notes.txt -exec echo -e "\n##### Subsystem: oamapps \n" >> svn_release_notes.txt -exec svn log -r "{$DATE}:HEAD" http://srvengcm1.calpont.com/svn/genii/oamapps/trunk >> svn_release_notes.txt -exec echo -e "\n##### Subsystem: primitives \n" >> svn_release_notes.txt -exec svn log -r "{$DATE}:HEAD" http://srvengcm1.calpont.com/svn/genii/primitives/trunk >> svn_release_notes.txt -exec echo -e "\n##### Subsystem: procmgr \n" >> svn_release_notes.txt -exec svn log -r "{$DATE}:HEAD" http://srvengcm1.calpont.com/svn/genii/procmgr/trunk >> svn_release_notes.txt -exec echo -e "\n##### Subsystem: procmon \n" >> svn_release_notes.txt -exec svn log -r "{$DATE}:HEAD" http://srvengcm1.calpont.com/svn/genii/procmon/trunk >> svn_release_notes.txt -exec echo -e "\n##### Subsystem: snmpd \n" >> svn_release_notes.txt -exec svn log -r "{$DATE}:HEAD" http://srvengcm1.calpont.com/svn/genii/snmpd/trunk >> svn_release_notes.txt -exec echo -e "\n##### Subsystem: tools \n" >> svn_release_notes.txt -exec svn log -r "{$DATE}:HEAD" http://srvengcm1.calpont.com/svn/genii/tools/trunk >> svn_release_notes.txt -exec echo -e "\n##### Subsystem: utils \n" >> svn_release_notes.txt -exec svn log -r "{$DATE}:HEAD" http://srvengcm1.calpont.com/svn/genii/utils/trunk >> svn_release_notes.txt -exec echo -e "\n##### Subsystem: versioning \n" >> svn_release_notes.txt -exec svn log -r "{$DATE}:HEAD" http://srvengcm1.calpont.com/svn/genii/versioning/trunk >> svn_release_notes.txt -exec echo -e "\n##### Subsystem: writeengine \n" >> svn_release_notes.txt -exec svn log -r "{$DATE}:HEAD" http://srvengcm1.calpont.com/svn/genii/writeengine/trunk >> svn_release_notes.txt - -exec smbclient $SHARED -Wcalpont -Uoamuser%Calpont1 -c "cd Iterations;cd $RELEASE;put svn_release_notes.txt" - -send_user "Successfully completed SVN Reports and placed on //cal6500/shared/Iterations/\n" -exit \ No newline at end of file diff --git a/tools/configMgt/autoReleaseNotes_branch.sh b/tools/configMgt/autoReleaseNotes_branch.sh deleted file mode 100644 index f4f527d10..000000000 --- a/tools/configMgt/autoReleaseNotes_branch.sh +++ /dev/null @@ -1,90 +0,0 @@ -#!/usr/bin/expect -# -# $Id: autoReleaseNotes.sh 421 2007-04-05 15:46:55Z dhill $ -# -# Remote command execution script to another server -# Argument 1 - release -# Argument 2 - date since last release (for BUG generation) in "2008-06-02 00:00:00" format -# Argument 3 - debug flag -set timeout 1800 -set SERVER srvengcm1 -set USERNAME root -set PASSWORD Calpont1 -set RELEASE [lindex $argv 0] -set DATE [lindex $argv 1] -set BRANCH [lindex $argv 2] -set DEBUG [lindex $argv 3] -set COMMAND "'/home/bugzilla/resolve_bug_report $RELEASE $DATE'" - -#set SHARED "//cal6500/shared" -set SHARED "//calweb/shared" - -log_user $DEBUG -spawn -noecho /bin/bash -if { $RELEASE == "-h" } { - send_user "\n" - send_user "'autoReleaseNotes.sh' generates bug reports for release notes\n" - send_user "\n" - send_user "Usage: autoReleaseNotes.sh 'release' 'date' 'svn-branch'\n" - send_user " release - Calpont release number\n" - send_user " date - date since last build ('2008-06-02 00:00:00' format)\n" - send_user " svn-branch - svn branch\n" - exit -} - -if { $RELEASE == " " && $DATE != " " && $BRANCH != " "} {puts "enter 'release' 'date' 'svn-branch, enter -h for additional info"; exit -1} - -# -# send command -# -send "ssh $USERNAME@$SERVER $COMMAND\n" -expect { - -re "authenticity" { send "yes\n" - expect { - -re "word: " { send "$PASSWORD\n" } abort - } - } - -re "service not known" { send_user "FAILED: Invalid Host\n" ; exit -1 } - -re "word: " { send "$PASSWORD\n" } abort -} -expect { - -re "Generate Resolved Bug Report Successfully Completed" { send_user "Successfully completed BUG Report and placed on //cal6500/shared/Iterations/\n" } abort -} - -exec echo -e "\n##### Subsystem: build \n" > svn_release_notes.txt -exec svn log -r "{$DATE}:HEAD" http://srvengcm1.calpont.com/svn/genii/build/branches/$BRANCH >> svn_release_notes.txt -exec echo -e "\n##### Subsystem: dbcon \n" >> svn_release_notes.txt -exec svn log -r "{$DATE}:HEAD" http://srvengcm1.calpont.com/svn/genii/dbcon/branches/$BRANCH >> svn_release_notes.txt -exec echo -e "\n##### Subsystem: ddlproc \n" >> svn_release_notes.txt -exec svn log -r "{$DATE}:HEAD" http://srvengcm1.calpont.com/svn/genii/ddlproc/branches/$BRANCH >> svn_release_notes.txt -exec echo -e "\n##### Subsystem: dmlib \n" >> svn_release_notes.txt -exec svn log -r "{$DATE}:HEAD" http://srvengcm1.calpont.com/svn/genii/dmlib/branches/$BRANCH >> svn_release_notes.txt -exec echo -e "\n##### Subsystem: dmlproc \n" >> svn_release_notes.txt -exec svn log -r "{$DATE}:HEAD" http://srvengcm1.calpont.com/svn/genii/dmlproc/branches/$BRANCH >> svn_release_notes.txt -exec echo -e "\n##### Subsystem: exemgr \n" >> svn_release_notes.txt -exec svn log -r "{$DATE}:HEAD" http://srvengcm1.calpont.com/svn/genii/exemgr/branches/$BRANCH >> svn_release_notes.txt -exec echo -e "\n##### Subsystem: oam \n" >> svn_release_notes.txt -exec svn log -r "{$DATE}:HEAD" http://srvengcm1.calpont.com/svn/genii/oam/branches/$BRANCH >> svn_release_notes.txt -exec echo -e "\n##### Subsystem: oamapps \n" >> svn_release_notes.txt -exec svn log -r "{$DATE}:HEAD" http://srvengcm1.calpont.com/svn/genii/oamapps/branches/$BRANCH >> svn_release_notes.txt -exec echo -e "\n##### Subsystem: primitives \n" >> svn_release_notes.txt -exec svn log -r "{$DATE}:HEAD" http://srvengcm1.calpont.com/svn/genii/primitives/branches/$BRANCH >> svn_release_notes.txt -exec echo -e "\n##### Subsystem: procmgr \n" >> svn_release_notes.txt -exec svn log -r "{$DATE}:HEAD" http://srvengcm1.calpont.com/svn/genii/procmgr/branches/$BRANCH >> svn_release_notes.txt -exec echo -e "\n##### Subsystem: procmon \n" >> svn_release_notes.txt -exec svn log -r "{$DATE}:HEAD" http://srvengcm1.calpont.com/svn/genii/procmon/branches/$BRANCH >> svn_release_notes.txt -exec echo -e "\n##### Subsystem: snmpd \n" >> svn_release_notes.txt -exec svn log -r "{$DATE}:HEAD" http://srvengcm1.calpont.com/svn/genii/snmpd/branches/$BRANCH >> svn_release_notes.txt -exec echo -e "\n##### Subsystem: tools \n" >> svn_release_notes.txt -exec svn log -r "{$DATE}:HEAD" http://srvengcm1.calpont.com/svn/genii/tools/branches/$BRANCH >> svn_release_notes.txt -exec echo -e "\n##### Subsystem: utils \n" >> svn_release_notes.txt -exec svn log -r "{$DATE}:HEAD" http://srvengcm1.calpont.com/svn/genii/utils/branches/$BRANCH >> svn_release_notes.txt -exec echo -e "\n##### Subsystem: versioning \n" >> svn_release_notes.txt -exec svn log -r "{$DATE}:HEAD" http://srvengcm1.calpont.com/svn/genii/versioning/branches/$BRANCH >> svn_release_notes.txt -exec echo -e "\n##### Subsystem: writeengine \n" >> svn_release_notes.txt -exec svn log -r "{$DATE}:HEAD" http://srvengcm1.calpont.com/svn/genii/writeengine/branches/$BRANCH >> svn_release_notes.txt - -exec smbclient $SHARED -Wcalpont -Uoamuser%Calpont1 -c "cd Iterations;cd $RELEASE;put svn_release_notes.txt" - -send_user "Successfully completed SVN Reports and placed on //cal6500/shared/Iterations/\n" -exit diff --git a/tools/configMgt/configure.cpp b/tools/configMgt/configure.cpp deleted file mode 100644 index 2a349ccff..000000000 --- a/tools/configMgt/configure.cpp +++ /dev/null @@ -1,1763 +0,0 @@ -/* Copyright (C) 2014 InfiniDB, Inc. - Copyright (C) 2016 MariaDB Corporation - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; version 2 of - the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301, USA. */ - -/****************************************************************************************** -* $Id: configure.cpp 64 2006-10-12 22:21:51Z dhill $ -* -* -* List of files being updated by configure: -* Calpont/etc/Columnstore.xml -* -* -******************************************************************************************/ -/** - * @file - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "stdio.h" -#include "ctype.h" -#include - -#include -#include - -#include "liboamcpp.h" -#include "configcpp.h" - -using namespace std; -using namespace oam; -using namespace config; - -typedef std::vector Devices; - -typedef struct Performance_Module_struct -{ - std::string moduleIP1; - std::string moduleIP2; - std::string moduleIP3; - std::string moduleIP4; -} PerformanceModule; - -typedef std::vector PerformanceModuleList; - -int main(int argc, char* argv[]) -{ - - setenv("CALPONT_HOME", ".", 1); - - char* pcommand = 0; - string parentOAMModuleName; - string parentOAMModuleIPAddr; - PerformanceModuleList performancemodulelist; - int pmNumber = 0; - string prompt; - int DBRMworkernodeID = 0; - string parentOAMModuleHostName; - string password; - - string SystemSection = "SystemConfig"; - string InstallSection = "Installation"; - - cout << endl; - cout << "This is the Calpont System Configuration tool." << endl; - cout << "It will generate a Calpont System specific Columnstore.xml file." << endl; - cout << "The file can then be used by the autoInstaller tool" << endl; - cout << endl; - - cout << "Instructions:" << endl << endl; - cout << " Press 'enter' to accept a default value in (), if available or" << endl; - cout << " Enter one of the options within [], if available, or" << endl; - cout << " Enter a new value" << endl << endl; - - //get system name - string systemName; - Config* netConfig = Config::makeConfig("./systems/CalpontSystems.xml"); - - - while (true) - { - prompt = "Enter Calpont system name > "; - pcommand = readline(prompt.c_str()); - - if (pcommand) - { - systemName = pcommand; - free(pcommand); - pcommand = 0; - - int systemCount; - - try - { - systemCount = strtol(netConfig->getConfig("NetworkConfig", "SystemCount").c_str(), 0, 0); - - if ( systemCount == 0 ) - { - cout << "ERROR: SystemCount in ./systems/CalpontSystems.xml equal to 0" << endl; - exit(-1); - } - } - catch (...) - { - cout << "ERROR: Problem getting SystemCount from ./systems/CalpontSystems.xml" << endl; - exit(-1); - } - - bool FOUND = false; - - for ( int i = 1 ; i < systemCount + 1 ; i++) - { - Oam oam; - string SystemName = "SystemName" + oam.itoa(i); - string oamParentModule = "OAMParentModule" + oam.itoa(i); - string SystemPassword = "SystemPassword" + oam.itoa(i); - - string tempSystem; - - try - { - tempSystem = netConfig->getConfig("NetworkConfig", SystemName ); - } - catch (...) - { - cout << "ERROR: Problem getting SystemName from ./systems/CalpontSystems.xml" << endl; - exit(-1); - } - - if ( tempSystem == systemName ) - { - try - { - parentOAMModuleHostName = netConfig->getConfig("NetworkConfig", oamParentModule ); - password = netConfig->getConfig("NetworkConfig", SystemPassword ); - - string cmd = "mkdir systems/" + systemName + " > /dev/null 2>&1" ; - system(cmd.c_str()); - - FOUND = true; - break; - } - catch (...) - { - cout << "ERROR: Problem getting SystemName from ./systems/CalpontSystems.xml" << endl; - exit(-1); - } - } - } - - if ( !FOUND ) - { - cout << "ERROR: System Name '" + systemName + "' not in ./systems/CalpontSystems.xml" << endl; - continue; - } - - break; - } - - cout << "Invalid System Name, please re-enter" << endl; - } - - //determine which Columnstore.xml to use as a base - while (true) - { - cout << endl; - cout << "Enter the Columnstore.xml file do you want to use as a based version" << endl; - cout << "Enter: 1 for System version (meaning copy from the system)" << endl; - cout << " 2 for Release version" << endl; - cout << " 3 for Columnstore.xml version already located in the systems directory" << endl; - - int option; - prompt = "Enter (1,2,or 3) > "; - pcommand = readline(prompt.c_str()); - - if (!pcommand) - continue; - else - { - option = atoi(pcommand); - free(pcommand); - pcommand = 0; - - switch (option) - { - case 1: //get system Columnstore.xml - { - cout << "Copying from system, please wait... " << flush; - //get Network IP Address - Oam oam; - parentOAMModuleIPAddr = oam.getIPAddress( parentOAMModuleHostName); - - if ( parentOAMModuleIPAddr.empty() ) - { - cout << "Invalid Host Name (No DNS IP found), exiting..." << endl; - exit (-1); - } - - //check to see if system is up - string cmdLine = "ping "; - string cmdOption = " -w 1 >> /dev/null"; - - string cmd = cmdLine + parentOAMModuleIPAddr + cmdOption; - int rtnCode = system(cmd.c_str()); - - if ( rtnCode != 0 ) - { - cout << "System is down, exiting..." << endl; - exit (-1); - } - - cmd = "./remote_scp_get.sh " + parentOAMModuleIPAddr + " " + password + " " + MCSSYSCONFDIR + "/columnstore/Columnstore.xml 0 "; - rtnCode = system(cmd.c_str()); - - if (rtnCode == 0) - { - cmd = "mv Columnstore.xml systems/" + systemName + "/."; - rtnCode = system(cmd.c_str()); - - if ( rtnCode != 0 ) - { - cout << "ERROR: No system Columnstore.xml found" << endl; - continue; - } - else - break; - } - - break; - } - - case 2: //get release Columnstore.xml - { - string release; - - while (true) - { - prompt = "Enter Calpont Release number > "; - pcommand = readline(prompt.c_str()); - - if (!pcommand) - continue; - else - { - release = pcommand; - free(pcommand); - pcommand = 0; - - if (release.empty()) continue; - - string cmd = "cd systems/" + systemName + ";rm -f Columnstore.xml;smbclient //cal6500/shared -Wcalpont -U" + oam::USERNAME + "%" + oam::PASSWORD + " -c 'cd Iterations/" + release + "/;prompt OFF;mget Columnstore.xml' > /dev/null 2>&1"; - int rtnCode = system(cmd.c_str()); - - if (rtnCode != 0) - cout << "FAILED: no Release Columnstore.xml found for " + release << endl; - else - { - cmd = "cd systems/" + systemName + ";ls Columnstore.xml > /dev/null 2>&1"; - rtnCode = system(cmd.c_str()); - - if (rtnCode != 0) - { - cout << "FAILED: no Release Columnstore.xml found for " + release << endl; - continue; - } - else - break; - } - } - } - - break; - } - - case 3: //use Columnstore.xml alyread in system directory - break; - - default: - continue; - } - - break; - } - } - - cout << endl; - - string env = "systems/" + systemName; - - setenv("CALPONT_HOME", env.c_str(), 1); - Oam oam; - - Config* sysConfig = Config::makeConfig(env + "/Columnstore.xml"); - - // make DBRM backwards compatiable for pre 1.0.0.157 load - string dbrmMainProc = "DBRM_Controller"; - string dbrmSubProc = "DBRM_Worker"; - string numSubProc = "NumWorkers"; - - try - { - if ( (sysConfig->getConfig(dbrmMainProc, "IPAddr")).empty() ) - { - dbrmMainProc = "DBRM_Master"; - dbrmSubProc = "DBRM_Slave"; - numSubProc = "NumSlaves"; - } - } - catch (...) - { - dbrmMainProc = "DBRM_Master"; - dbrmSubProc = "DBRM_Slave"; - numSubProc = "NumSlaves"; - } - - string singleServerInstall; - - try - { - singleServerInstall = sysConfig->getConfig(InstallSection, "SingleServerInstall"); - } - catch (...) - { - cout << "ERROR: Problem getting SingleServerInstall from the Calpont System Configuration file" << endl; - exit(-1); - } - - //get Parent OAM Module Name and setup of it's Custom OS files - try - { - parentOAMModuleName = sysConfig->getConfig(SystemSection, "ParentOAMModuleName"); - } - catch (...) - { - cout << "ERROR: Problem getting Parent OAM Module Name" << endl; - exit(-1); - } - - while (true) - { - prompt = "Single Server Installation? [y,n] (" + singleServerInstall + ") > "; - pcommand = readline(prompt.c_str()); - cout << endl; - - if (pcommand) - { - if (strlen(pcommand) > 0) singleServerInstall = pcommand; - - free(pcommand); - pcommand = 0; - } - - if ( singleServerInstall == "y" ) - { - cout << "Single Server Installation will be performed. The Server will be assigned as a Director Module #1." << endl; - cout << "All Calpont Processes will run on this single server." << endl; - } - else - { - if ( singleServerInstall == "n" ) - { - prompt = "Enter the OAM Parent Module Name or exit [mm1,dmx,exit] (" + parentOAMModuleName + ") > "; - pcommand = readline(prompt.c_str()); - - if (pcommand) - { - if (strlen(pcommand) > 0) parentOAMModuleName = pcommand; - - free(pcommand); - pcommand = 0; - - if (parentOAMModuleName == "exit") - exit(0); - } - - break; - } - else - cout << "Invalid Entry, please enter 'y' for yes or 'n' for no" << endl; - } - - break; - } - - // set Single Server Installation Indicator - try - { - sysConfig->setConfig(InstallSection, "SingleServerInstall", singleServerInstall); - } - catch (...) - { - cout << "ERROR: Problem setting DBRootStorageLoc in the Calpont System Configuration file" << endl; - exit(-1); - } - - //set Parent OAM Module Name - try - { - sysConfig->setConfig(SystemSection, "ParentOAMModuleName", parentOAMModuleName); - } - catch (...) - { - cout << "ERROR: Problem updating the Calpont System Configuration file" << endl; - exit(-1); - } - - string parentOAMModuleType = parentOAMModuleName.substr(0, MAX_MODULE_TYPE_SIZE); - int parentOAMModuleID = atoi(parentOAMModuleName.substr(MAX_MODULE_TYPE_SIZE, MAX_MODULE_ID_SIZE).c_str()); - - //Get list of configured system modules - SystemModuleTypeConfig sysModuleTypeConfig; - - try - { - oam.getSystemConfig(sysModuleTypeConfig); - } - catch (...) - { - cout << "ERROR: Problem reading the Calpont System Configuration file" << endl; - exit(-1); - } - - // - // get Data storage Mount - // - string DBRootStorageLoc; - string DBRootStorageType; - string UserStorageType; - string UserStorageLoc; - - string OAMStorageType; - string OAMStorageLoc; - - int DBRootCount; - string deviceName; - Devices devices; - - cout << endl << "===== Setup Data Storages Mount Configuration =====" << endl << endl; - - try - { - DBRootStorageType = sysConfig->getConfig(InstallSection, "DBRootStorageType"); - DBRootCount = strtol(sysConfig->getConfig(SystemSection, "DBRootCount").c_str(), 0, 0); - OAMStorageType = sysConfig->getConfig(InstallSection, "OAMStorageType"); - OAMStorageLoc = sysConfig->getConfig(InstallSection, "OAMStorageLoc"); - } - catch (...) - { - cout << "ERROR: Problem getting DB Storage Data from the Calpont System Configuration file" << endl; - exit(-1); - } - - //setup dbroot storage - while (true) - { - prompt = "Enter Calpont Data Storage Mount Type [storage,local,nfs] (" + DBRootStorageType + ") > "; - pcommand = readline(prompt.c_str()); - - if (pcommand) - { - if (strlen(pcommand) > 0) DBRootStorageType = pcommand; - - free(pcommand); - pcomand = 0; - } - - if ( DBRootStorageType == "nfs" || DBRootStorageType == "storage" || DBRootStorageType == "local") - break; - - cout << "Invalid Mount Type, please re-enter" << endl; - } - - try - { - sysConfig->setConfig(InstallSection, "DBRootStorageType", DBRootStorageType); - } - catch (...) - { - cout << "ERROR: Problem setting DBRootStorageType in the Calpont System Configuration file" << endl; - exit(-1); - } - - while (true) - { - prompt = "Enter the Number of Calpont Data Storage (DBRoots) areas (" + oam.itoa(DBRootCount) + ") > "; - pcommand = readline(prompt.c_str()); - - if (pcommand) - { - int newDBRootCount = DBRootCount; - - if (strlen(pcommand) > 0) newDBRootCount = atoi(pcommand); - - free(pcommand); - pcommand = 0; - - if (newDBRootCount <= 0) - { - cout << "ERROR: Invalid Number, please reenter" << endl; - continue; - } - - DBRootCount = newDBRootCount; - - try - { - sysConfig->setConfig(SystemSection, "DBRootCount", oam.itoa(DBRootCount)); - } - catch (...) - { - cout << "ERROR: Problem setting DBRoot Count in the Calpont System Configuration file" << endl; - exit(-1); - } - } - - break; - } - - for ( int i = 1 ; i < DBRootCount + 1 ; i++) - { - if ( DBRootStorageType != "local") - { - - string DBRootStorageLocID = "DBRootStorageLoc" + oam.itoa(i); - - try - { - DBRootStorageLoc = sysConfig->getConfig(InstallSection, DBRootStorageLocID); - } - catch (...) - { - cout << "ERROR: Problem getting DB Storage Data from the Calpont System Configuration file" << endl; - exit(-1); - } - - prompt = "Enter Storage Location for DBRoot #" + oam.itoa(i) + " (" + DBRootStorageLoc + ") > "; - pcommand = readline(prompt.c_str()); - - if (pcommand) - { - if (strlen(pcommand) > 0) DBRootStorageLoc = pcommand; - - free(pcommand); - pcommand = 0; - } - - devices.push_back(DBRootStorageLoc); - - try - { - sysConfig->setConfig(InstallSection, DBRootStorageLocID, DBRootStorageLoc); - } - catch (...) - { - cout << "ERROR: Problem setting DBRootStorageLoc in the Calpont System Configuration file" << endl; - exit(-1); - } - } - - string DBrootID = "DBRoot" + oam.itoa(i); - string pathID = "/var/lib/columnstore/data" + oam.itoa(i); - - try - { - sysConfig->setConfig(SystemSection, DBrootID, pathID); - } - catch (...) - { - cout << "ERROR: Problem setting DBRoot in the Calpont System Configuration file" << endl; - exit(-1); - } - } - - //setup OAM storage - cout << endl; - - while (true) - { - prompt = "Enter OAM Data Storage Mount Type [storage,local] (" + OAMStorageType + ") > "; - pcommand = readline(prompt.c_str()); - - if (pcommand) - { - if (strlen(pcommand) > 0) OAMStorageType = pcommand; - - free(pcommand); - pcommand = 0; - } - - if ( OAMStorageType == "storage" || OAMStorageType == "local" ) - break; - - cout << "Invalid Mount Location, please re-enter" << endl; - } - - try - { - sysConfig->setConfig(InstallSection, "OAMStorageType", OAMStorageType); - } - catch (...) - { - cout << "ERROR: Problem setting OAMStorageType in the Calpont System Configuration file" << endl; - exit(-1); - } - - if ( OAMStorageType == "storage") - { - cout << endl; - prompt = "Enter Device Name for OAM Storage mounting (" + OAMStorageLoc + ") > "; - pcommand = readline(prompt.c_str()); - - if (pcommand) - { - if (strlen(pcommand) > 0) OAMStorageLoc = pcommand; - - free(pcommand); - pcommand = 0; - } - - try - { - sysConfig->setConfig(InstallSection, "OAMStorageLoc", OAMStorageLoc); - } - catch (...) - { - cout << "ERROR: Problem setting OAMStorageLoc in the Calpont System Configuration file" << endl; - exit(-1); - } - } - - sysConfig->write(); - - // - // Module Configuration - // - cout << endl << "===== Setup the Module Configuration =====" << endl; - - string ModuleSection = "SystemModuleConfig"; - unsigned int maxPMNicCount = 1; - - for ( unsigned int i = 0 ; i < sysModuleTypeConfig.moduletypeconfig.size(); i++) - { - string moduleType = sysModuleTypeConfig.moduletypeconfig[i].ModuleType; - string moduleDesc = sysModuleTypeConfig.moduletypeconfig[i].ModuleDesc; - int moduleCount = sysModuleTypeConfig.moduletypeconfig[i].ModuleCount; - - //verify and setup of modules count - - if ( moduleType == "dm" && singleServerInstall == "y" ) - { - - cout << endl << "----- " << moduleDesc << " Configuration -----" << endl << endl; - - moduleCount = 1; - pmNumber = 1; - - try - { - string ModuleCountParm = "ModuleCount" + oam.itoa(i + 1); - sysConfig->setConfig(ModuleSection, ModuleCountParm, oam.itoa(moduleCount)); - } - catch (...) - { - cout << "ERROR: Problem setting Module Count in the Calpont System Configuration file" << endl; - exit(-1); - } - } - else - { - if ( singleServerInstall == "y" ) - { - moduleCount = 0; - - try - { - string ModuleCountParm = "ModuleCount" + oam.itoa(i + 1); - sysConfig->setConfig(ModuleSection, ModuleCountParm, oam.itoa(moduleCount)); - continue; - } - catch (...) - { - cout << "ERROR: Problem setting Module Count in the Calpont System Configuration file" << endl; - exit(-1); - } - } - - cout << endl << "----- " << moduleDesc << " Configuration -----" << endl << endl; - - while (true) - { - prompt = "Enter number of " + moduleDesc + "s (" + oam.itoa(moduleCount) + ") > "; - pcommand = readline(prompt.c_str()); - - if (pcommand) - { - //Update Count - string ModuleCountParm = "ModuleCount" + oam.itoa(i + 1); - int newmoduleCount = moduleCount; - - if (strlen(pcommand) > 0) newmoduleCount = atoi(pcommand); - - free(pcommand); - pcommand = 0; - - if (newmoduleCount <= 0) - { - cout << "ERROR: Invalid Number, please reenter" << endl; - continue; - } - - try - { - moduleCount = newmoduleCount; - sysConfig->setConfig(ModuleSection, ModuleCountParm, oam.itoa(moduleCount)); - } - catch (...) - { - cout << "ERROR: Problem setting Module Count in the Calpont System Configuration file" << endl; - exit(-1); - } - } - - if ( parentOAMModuleType == moduleType && parentOAMModuleID > moduleCount ) - { - cout << endl << "ERROR: Parent OAM Module is '" + parentOAMModuleName + "', so you have to have at least 1 of this Module Type" << endl << endl; - } - else - { - break; - } - } - } - - int listSize = sysModuleTypeConfig.moduletypeconfig[i].ModuleNetworkList.size(); - - if ( moduleCount == 0 ) - { - //set unEquipped Module IP addresses to oam::UnassignedIpAddr - for ( int j = moduleCount ; j < listSize ; j ++ ) - { - for ( unsigned int k = 1 ; k < MAX_NIC + 1 ; k++ ) - { - string ModuleIPAddr = "ModuleIPAddr" + oam.itoa(j + 1) + "-" + oam.itoa(k) + "-" + oam.itoa(i + 1); - - if ( !(sysConfig->getConfig(ModuleSection, ModuleIPAddr).empty()) ) - { - string ModuleHostName = "ModuleHostName" + oam.itoa(j + 1) + "-" + oam.itoa(k) + "-" + oam.itoa(i + 1); - - sysConfig->setConfig(ModuleSection, ModuleIPAddr, oam::UnassignedIpAddr); - sysConfig->setConfig(ModuleSection, ModuleHostName, oam::UnassignedName); - } - } - } - - //no modules equipped for this Module Type, skip - continue; - } - - if ( moduleType == "pm" ) - pmNumber = moduleCount; - - //Enter User Temp Storage location - if ( moduleType == "um" || singleServerInstall == "y" ) - { - try - { - UserStorageType = sysConfig->getConfig(InstallSection, "UserStorageType"); - } - catch (...) - { - cout << "ERROR: Problem getting DB Storage Data from the Calpont System Configuration file" << endl; - exit(-1); - } - - if ( moduleType == "um" ) - cout << endl; - - //setup User Temp Storage - while (true) - { - string newUserStorageType = UserStorageType; - prompt = "Enter User Module Temp Data Storage Mount Type [storage,local] (" + UserStorageType + ") > "; - pcommand = readline(prompt.c_str()); - - if (pcommand) - { - if (strlen(pcommand) > 0) newUserStorageType = pcommand; - - free(pcommand); - pcommand = 0; - } - - if ( newUserStorageType == "storage" || newUserStorageType == "local" ) - { - UserStorageType = newUserStorageType; - break; - } - - cout << "Invalid Mount Type, please re-enter" << endl; - } - - try - { - sysConfig->setConfig(InstallSection, "UserStorageType", UserStorageType); - } - catch (...) - { - cout << "ERROR: Problem setting UserStorageType in the Calpont System Configuration file" << endl; - exit(-1); - } - } - - int moduleID = 1; - - while (true) - { - prompt = "Enter Starting Module ID for " + moduleDesc + " (1) > "; - pcommand = readline(prompt.c_str()); - - if (pcommand) - { - if (strlen(pcommand) > 0) moduleID = atoi(pcommand); - - free(pcommand); - pcommand = 0; - } - - //valid if parent OAM module type and is consistent with parentOAMModuleName - if ( parentOAMModuleType == moduleType && - ( parentOAMModuleID < moduleID || parentOAMModuleID > moduleID + (moduleCount - 1) ) ) - { - cout << endl << "ERROR: Parent and Starting Module ID out of range, please re-enter" << endl << endl; - moduleID = 1; - } - else - break; - } - - //clear any Equipped Module IP addresses that aren't in current ID range - for ( int j = 0 ; j < listSize ; j++ ) - { - for ( unsigned int k = 1 ; k < MAX_NIC + 1 ; k++) - { - string ModuleIPAddr = "ModuleIPAddr" + oam.itoa(j + 1) + "-" + oam.itoa(k) + "-" + oam.itoa(i + 1); - - if ( !(sysConfig->getConfig(ModuleSection, ModuleIPAddr).empty()) ) - { - if ( j + 1 < moduleID || j + 1 > moduleID + (moduleCount - 1) ) - { - string ModuleHostName = "ModuleHostName" + oam.itoa(j + 1) + "-" + oam.itoa(k) + "-" + oam.itoa(i + 1); - - sysConfig->setConfig(ModuleSection, ModuleIPAddr, oam::UnassignedIpAddr); - sysConfig->setConfig(ModuleSection, ModuleHostName, oam::UnassignedName); - } - } - } - } - - //get IP addresses and Host Names - for ( int k = 0 ; k < moduleCount ; k++, moduleID++ ) - { - PerformanceModule performancemodule; - string newModuleName = moduleType + oam.itoa(moduleID); - string moduleNameDesc = moduleDesc + " #" + oam.itoa(moduleID); - - cout << endl << "*** " << moduleNameDesc << " Configuration ***" << endl << endl; - - //Enter User Temp Storage location - if ( moduleType == "um" || singleServerInstall == "y" ) - { - UserStorageLoc = oam::UnassignedName; - string USERSTORAGELOC = "UserStorageLoc" + oam.itoa(moduleID); - - if ( UserStorageType == "storage") - { - try - { - UserStorageLoc = sysConfig->getConfig(InstallSection, USERSTORAGELOC); - } - catch (...) - { - cout << "ERROR: Problem getting DB Storage Data from the Calpont System Configuration file" << endl; - exit(-1); - } - - prompt = "Enter Device Name for User Module '" + newModuleName + "' Temp Storage (" + UserStorageLoc + ") > "; - pcommand = readline(prompt.c_str()); - - if (pcommand) - { - if (strlen(pcommand) > 0) UserStorageLoc = pcommand; - - free(pcommand); - pcommand = 0; - } - - try - { - sysConfig->setConfig(InstallSection, USERSTORAGELOC, UserStorageLoc); - } - catch (...) - { - cout << "ERROR: Problem setting UserStorageLoc in the Calpont System Configuration file" << endl; - exit(-1); - } - } - else - { - try - { - sysConfig->setConfig(InstallSection, USERSTORAGELOC, oam::UnassignedName); - } - catch (...) - { - cout << "ERROR: Problem setting UserStorageLoc in the Calpont System Configuration file" << endl; - exit(-1); - } - } - } - - //setup HostName/IPAddress for each NIC - for ( unsigned int nicID = 1 ; nicID < MAX_NIC + 1 ; nicID++ ) - { - string moduleHostName = oam::UnassignedName; - string moduleIPAddr = oam::UnassignedIpAddr; - - DeviceNetworkList::iterator listPT = sysModuleTypeConfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for ( ; listPT != sysModuleTypeConfig.moduletypeconfig[i].ModuleNetworkList.end() ; listPT++) - { - HostConfigList::iterator pt1 = (*listPT).hostConfigList.begin(); - - for ( ; pt1 != (*listPT).hostConfigList.end() ; pt1++) - { - if ( newModuleName == (*listPT).DeviceName && (*pt1).NicID == nicID) - { - moduleHostName = (*pt1).HostName; - moduleIPAddr = (*pt1).IPAddr; - break; - } - } - } - - bool moduleHostNameFound = true; - - if (moduleHostName.empty()) - { - moduleHostNameFound = false; - moduleHostName = oam::UnassignedName; - } - - if (moduleIPAddr.empty()) - moduleIPAddr = oam::UnassignedIpAddr; - - string newModuleIPAddr; - string newModuleHostName; - - while (true) - { - prompt = "Enter Nic Interface #" + oam.itoa(nicID) + " Host Name (" + moduleHostName + ") > "; - pcommand = readline(prompt.c_str()); - - if (!pcommand) - newModuleHostName = moduleHostName; - else - { - if (strlen(pcommand) > 0) newModuleHostName = pcommand; - - free(pcommand); - pcommand = 0; - } - - if ( newModuleHostName == oam::UnassignedName && !moduleHostNameFound && nicID == 1 ) - cout << "Invalid Entry, please enter at least 1 valid Host Name for Module" << endl; - else - break; - } - - if ( newModuleHostName == oam::UnassignedName && moduleHostNameFound ) - // exit out to next module ID - break; - - //set New Module Host Name - string moduleHostNameParm = "ModuleHostName" + oam.itoa(moduleID) + "-" + oam.itoa(nicID) + "-" + oam.itoa(i + 1); - - try - { - sysConfig->setConfig(ModuleSection, moduleHostNameParm, newModuleHostName); - } - catch (...) - { - cout << "ERROR: Problem setting Host Name in the Calpont System Configuration file" << endl; - exit(-1); - } - - if ( newModuleHostName == oam::UnassignedName ) - newModuleIPAddr = oam::UnassignedIpAddr; - else - { - //get Network IP Address - string IPAddress = oam.getIPAddress( newModuleHostName); - - if ( !IPAddress.empty() ) - newModuleIPAddr = IPAddress; - else - newModuleIPAddr = moduleIPAddr; - - //prompt for IP address - while (true) - { - prompt = "Enter Nic Interface #" + oam.itoa(nicID) + " IP Address of " + newModuleHostName + " (" + newModuleIPAddr + ") > "; - pcommand = readline(prompt.c_str()); - - if (pcommand) - { - if (strlen(pcommand) > 0) newModuleIPAddr = pcommand; - - free(pcommand); - pcommand = 0; - } - - if (oam.isValidIP(newModuleIPAddr)) - break; - else - cout << "Invalid IP Address format, xxx.xxx.xxx.xxx, please reenter" << endl; - } - } - - //set Module IP address - string moduleIPAddrNameParm = "ModuleIPAddr" + oam.itoa(moduleID) + "-" + oam.itoa(nicID) + "-" + oam.itoa(i + 1); - - try - { - sysConfig->setConfig(ModuleSection, moduleIPAddrNameParm, newModuleIPAddr); - } - catch (...) - { - cout << "ERROR: Problem setting IP address in the Calpont System Configuration file" << endl; - exit(-1); - } - - if (moduleType == "pm" || ( moduleType == "dm" && singleServerInstall == "y" )) - { - - switch (nicID) - { - case 1: - performancemodule.moduleIP1 = newModuleIPAddr; - break; - - case 2: - performancemodule.moduleIP2 = newModuleIPAddr; - break; - - case 3: - performancemodule.moduleIP3 = newModuleIPAddr; - break; - - case 4: - performancemodule.moduleIP4 = newModuleIPAddr; - break; - } - - if ( maxPMNicCount < nicID ) - maxPMNicCount = nicID; - } - - if ( nicID > 1 ) - continue; - - //set port addresses - if ( newModuleName == parentOAMModuleName ) - { - parentOAMModuleIPAddr = newModuleIPAddr; - - //set Parent Processes Port IP Address - string parentProcessMonitor = parentOAMModuleName + "_ProcessMonitor"; - sysConfig->setConfig(parentProcessMonitor, "IPAddr", parentOAMModuleIPAddr); - sysConfig->setConfig(parentProcessMonitor, "Port", "8606"); - sysConfig->setConfig("ProcMgr", "IPAddr", parentOAMModuleIPAddr); - sysConfig->setConfig("ProcHeartbeatControl", "IPAddr", parentOAMModuleIPAddr); - sysConfig->setConfig("ProcStatusControl", "IPAddr", parentOAMModuleIPAddr); - string parentServerMonitor = parentOAMModuleName + "_ServerMonitor"; - sysConfig->setConfig(parentServerMonitor, "IPAddr", parentOAMModuleIPAddr); - - if ( singleServerInstall == "y" ) - { - //set User Module's IP Addresses - string Section = "ExeMgr" + oam.itoa(moduleID); - - sysConfig->setConfig(Section, "IPAddr", newModuleIPAddr); - sysConfig->setConfig(Section, "Port", "8601"); - - //set Performance Module's IP's to first NIC IP entered - sysConfig->setConfig("DDLProc", "IPAddr", newModuleIPAddr); - sysConfig->setConfig("DMLProc", "IPAddr", newModuleIPAddr); - } - } - else - { - //set child Process Monitor Port IP Address - string portName = newModuleName + "_ProcessMonitor"; - sysConfig->setConfig(portName, "IPAddr", newModuleIPAddr); - sysConfig->setConfig(portName, "Port", "8606"); - - //set child Server Monitor Port IP Address - portName = newModuleName + "_ServerMonitor"; - sysConfig->setConfig(portName, "IPAddr", newModuleIPAddr); - sysConfig->setConfig(portName, "Port", "8622"); - - //set User Module's IP Addresses - if ( moduleType == "um" ) - { - string Section = "ExeMgr" + oam.itoa(moduleID); - - sysConfig->setConfig(Section, "IPAddr", newModuleIPAddr); - sysConfig->setConfig(Section, "Port", "8601"); - } - - //set Performance Module's IP's to first NIC IP entered - if ( newModuleName == "pm1" ) - { - sysConfig->setConfig("DDLProc", "IPAddr", newModuleIPAddr); - sysConfig->setConfig("DMLProc", "IPAddr", newModuleIPAddr); - } - } - - //setup DBRM processes - if ( newModuleName == "dm1" ) - sysConfig->setConfig(dbrmMainProc, "IPAddr", newModuleIPAddr); - - DBRMworkernodeID++; - string DBRMSection = dbrmSubProc + oam.itoa(DBRMworkernodeID); - sysConfig->setConfig(DBRMSection, "IPAddr", newModuleIPAddr); - sysConfig->setConfig(DBRMSection, "Module", newModuleName); - - } //end of nicID loop - - if (moduleType == "pm" || singleServerInstall == "y" ) - performancemodulelist.push_back(performancemodule); - - } //end of k (moduleCount) loop - - sysConfig->write(); - - } //end of i for loop - - //setup DBRM Controller - sysConfig->setConfig(dbrmMainProc, numSubProc, oam.itoa(DBRMworkernodeID)); - - //set ConnectionsPerPrimProc - try - { - sysConfig->setConfig("PrimitiveServers", "ConnectionsPerPrimProc", oam.itoa(maxPMNicCount * 2)); - } - catch (...) - { - cout << "ERROR: Problem setting ConnectionsPerPrimProc in the Calpont System Configuration file" << endl; - exit(-1); - } - - //set the PM Ports based on Number of PM modules equipped, if any equipped - int pmPorts = 32; - sysConfig->setConfig("PrimitiveServers", "Count", oam.itoa(pmNumber)); - - if ( pmNumber > 0 || singleServerInstall == "y" ) - { - const string PM = "PMS"; - - for ( int pmsID = 1; pmsID < pmPorts + 1 ; ) - { - for (unsigned int j = 1 ; j < maxPMNicCount + 1 ; j++) - { - PerformanceModuleList::iterator list1 = performancemodulelist.begin(); - - for (; list1 != performancemodulelist.end() ; list1++) - { - string pmName = PM + oam.itoa(pmsID); - string IpAddr; - - switch (j) - { - case 1: - IpAddr = (*list1).moduleIP1; - break; - - case 2: - IpAddr = (*list1).moduleIP2; - break; - - case 3: - IpAddr = (*list1).moduleIP3; - break; - - case 4: - IpAddr = (*list1).moduleIP4; - break; - } - - if ( !IpAddr.empty() && IpAddr != oam::UnassignedIpAddr ) - { - sysConfig->setConfig(pmName, "IPAddr", IpAddr); - pmsID++; - - if ( pmsID > pmPorts ) - break; - } - } - - if ( pmsID > pmPorts ) - break; - } - } - } - - sysConfig->write(); - - // - // Configure switches - // - SystemSwitchTypeConfig sysSwitchTypeConfig; - - try - { - oam.getSystemConfig(sysSwitchTypeConfig); - } - catch (...) - { - cout << "ERROR: Problem reading the Calpont System Configuration file" << endl; - exit(-1); - } - - cout << endl << "===== Setup the Switch Configuration =====" << endl; - - string SwitchSection = "SystemSwitchConfig"; - - for ( unsigned int i = 0 ; i < sysSwitchTypeConfig.switchtypeconfig.size(); i++) - { - string switchType = sysSwitchTypeConfig.switchtypeconfig[i].SwitchType; - string switchDesc = sysSwitchTypeConfig.switchtypeconfig[i].SwitchDesc; - int switchCount = sysSwitchTypeConfig.switchtypeconfig[i].SwitchCount; - - //verify and setup of switches count - cout << endl << "----- " << switchDesc << " Configuration -----" << endl << endl; - - while (true) - { - prompt = "Enter number of " + switchDesc + "es (" + oam.itoa(switchCount) + ") > "; - pcommand = readline(prompt.c_str()); - - if (pcommand) - { - //Update Count and continue with IP addresses - string SwitchCountParm = "SwitchCount" + oam.itoa(i + 1); - int newswitchCount = switchCount; - - if (strlen(pcommand) > 0) newswitchCount = atoi(pcommand); - - free(pcommand); - pcommand = 0; - - if (newswitchCount <= 0) - { - cout << "ERROR: Invalid Number, please reenter" << endl; - continue; - } - - try - { - switchCount = newswitchCount; - sysConfig->setConfig(SwitchSection, SwitchCountParm, oam.itoa(switchCount)); - break; - } - catch (...) - { - cout << "ERROR: Problem setting Switch Count in the Calpont System Configuration file" << endl; - exit(-1); - } - } - - break; - } - - int listSize = sysSwitchTypeConfig.switchtypeconfig[i].SwitchNetworkList.size(); - - //set unEquipped Switch IP addresses to oam::UnassignedIpAddr - if ( switchCount < listSize ) - { - for ( int j = switchCount ; j < listSize ; j ++ ) - { - string SwitchIPAddr = "SwitchIPAddr" + oam.itoa(j + 1) + "-" + oam.itoa(i + 1); - string SwitchHostName = "SwitchHostName" + oam.itoa(j + 1) + "-" + oam.itoa(i + 1); - - sysConfig->setConfig(SwitchSection, SwitchIPAddr, oam::UnassignedIpAddr); - sysConfig->setConfig(SwitchSection, SwitchHostName, oam::UnassignedName); - } - } - - if ( switchCount == 0 ) - //no switches equipped for this Switch Type, skip - continue; - - //add new entried to Calpont System Config table for additional IP Addresses and Hostnames - for ( int j = listSize ; j < switchCount ; j ++ ) - { - string SwitchIPAddr = "SwitchIPAddr" + oam.itoa(j + 1) + "-" + oam.itoa(i + 1); - string SwitchHostName = "SwitchHostName" + oam.itoa(j + 1) + "-" + oam.itoa(i + 1); - - sysConfig->setConfig(SwitchSection, SwitchIPAddr, oam::UnassignedIpAddr); - sysConfig->setConfig(SwitchSection, SwitchHostName, oam::UnassignedName); - } - - //get IP addresses and Host Names - for ( int k = 0 ; k < switchCount ; k++ ) - { - int switchID = k + 1; - string newSwitchName = switchType + oam.itoa(switchID); - string switchIPAddr = oam::UnassignedIpAddr; - string switchHostName = oam::UnassignedName; - string switchNameDesc = switchDesc + " #" + oam.itoa(switchID); - - cout << endl << "*** " << switchNameDesc << " Configuration ***" << endl << endl; - - DeviceNetworkList::iterator listPT = sysSwitchTypeConfig.switchtypeconfig[i].SwitchNetworkList.begin(); - - for ( ; listPT != sysSwitchTypeConfig.switchtypeconfig[i].SwitchNetworkList.end() ; listPT++) - { - if ( newSwitchName == (*listPT).DeviceName) - { - HostConfigList::iterator pt1 = (*listPT).hostConfigList.begin(); - - for ( ; pt1 != (*listPT).hostConfigList.end() ; pt1++) - { - switchIPAddr = (*pt1).IPAddr; - switchHostName = (*pt1).HostName; - break; - } - - break; - } - } - - if (switchIPAddr.empty()) - switchIPAddr = oam::UnassignedIpAddr; - - if (switchHostName.empty()) - switchHostName = oam::UnassignedName; - - string newSwitchIPAddr; - string newSwitchHostName; - - prompt = "Enter Host Name (" + switchHostName + ") > "; - pcommand = readline(prompt.c_str()); - - if (!pcommand) - newSwitchHostName = switchHostName; - else - { - if (strlen(pcommand) > 0 )newSwitchHostName = pcommand; - - free(pcommand); - pcommand = 0; - - //set New Switch Host Name - string switchHostNameParm = "SwitchHostName" + oam.itoa(switchID) + "-" + oam.itoa(i + 1); - - try - { - sysConfig->setConfig(SwitchSection, switchHostNameParm, newSwitchHostName); - } - catch (...) - { - cout << "ERROR: Problem setting Host Name in the Calpont System Configuration file" << endl; - exit(-1); - } - } - - //get Network IP Address - string IPAddress = oam.getIPAddress( newSwitchHostName); - - if ( !IPAddress.empty() ) - { - newSwitchIPAddr = IPAddress; - - cout << "'" << newSwitchHostName << "' DNS IP Address is '" << newSwitchIPAddr << "'" << endl; - } - else - { - //no DNS address found for Host Name - while (true) - { - prompt = "Enter IP Address of (" + switchIPAddr + ") > "; - pcommand = readline(prompt.c_str()); - - if (!pcommand) - newSwitchIPAddr = switchIPAddr; - else - { - if (strlen(pcommand) > 0) newSwitchIPAddr = pcommand; - - free(pcommand); - pcommand = 0; - } - - if (oam.isValidIP(newSwitchIPAddr)) - break; - else - cout << "Invalid IP Address format, xxx.xxx.xxx.xxx, please reenter" << endl; - } - } - - //set New Switch IP address - string switchIPAddrNameParm = "SwitchIPAddr" + oam.itoa(switchID) + "-" + oam.itoa(i + 1); - - try - { - sysConfig->setConfig(SwitchSection, switchIPAddrNameParm, newSwitchIPAddr); - } - catch (...) - { - cout << "ERROR: Problem setting IP address in the Calpont System Configuration file" << endl; - exit(-1); - } - } //end of k loop - } //end of i for loop - - sysConfig->write(); - - // - // Configure storages - // - SystemStorageTypeConfig sysStorageTypeConfig; - - try - { - oam.getSystemConfig(sysStorageTypeConfig); - } - catch (...) - { - cout << "ERROR: Problem reading the Calpont System Configuration file" << endl; - exit(-1); - } - - string StorageSection = "SystemStorageConfig"; - - cout << endl << "===== Setup the Storage Configuration =====" << endl; - - for ( unsigned int i = 0 ; i < sysStorageTypeConfig.storagetypeconfig.size(); i++) - { - string storageType = sysStorageTypeConfig.storagetypeconfig[i].StorageType; - string storageDesc = sysStorageTypeConfig.storagetypeconfig[i].StorageDesc; - int storageCount = sysStorageTypeConfig.storagetypeconfig[i].StorageCount; - - //verify and setup of storages count - cout << endl << "----- " << storageDesc << " Configuration -----" << endl << endl; - - while (true) - { - prompt = "Enter number of " + storageDesc + "s (" + oam.itoa(storageCount) + ") > "; - pcommand = readline(prompt.c_str()); - - if (pcommand) - { - //Update Count and continue with IP addresses - string StorageCountParm = "StorageCount" + oam.itoa(i + 1); - int newstorageCount = storageCount; - - if (strlen(pcommand) > 0) newstorageCount = atoi(pcommand); - - free(pcommand); - pcommand = 0; - - if (newstorageCount <= 0) - { - cout << "ERROR: Invalid Number, please reenter" << endl; - continue; - } - - try - { - storageCount = newstorageCount; - sysConfig->setConfig(StorageSection, StorageCountParm, oam.itoa(storageCount)); - break; - } - catch (...) - { - cout << "ERROR: Problem setting Storage Count in the Calpont System Configuration file" << endl; - exit(-1); - } - } - - break; - } - - int listSize = sysStorageTypeConfig.storagetypeconfig[i].StorageNetworkList.size(); - - //set unEquipped Storage IP addresses to oam::UnassignedIpAddr - if ( storageCount < listSize ) - { - for ( int j = storageCount ; j < listSize ; j ++ ) - { - string storageIPAddr = "StorageIPAddr" + oam.itoa(j + 1) + "-" + oam.itoa(i + 1); - string StorageHostName = "StorageHostName" + oam.itoa(j + 1) + "-" + oam.itoa(i + 1); - - sysConfig->setConfig(StorageSection, storageIPAddr, oam::UnassignedIpAddr); - sysConfig->setConfig(StorageSection, StorageHostName, oam::UnassignedName); - } - } - - if ( storageCount == 0 ) - //no storages equipped for this Storage Type, skip - continue; - - //add new entried to Calpont System Config table for additional IP Addresses and Hostnames - for ( int j = listSize ; j < storageCount ; j ++ ) - { - string storageIPAddr = "StorageIPAddr" + oam.itoa(j + 1) + "-" + oam.itoa(i + 1); - string StorageHostName = "StorageHostName" + oam.itoa(j + 1) + "-" + oam.itoa(i + 1); - - sysConfig->setConfig(StorageSection, storageIPAddr, oam::UnassignedIpAddr); - sysConfig->setConfig(StorageSection, StorageHostName, oam::UnassignedName); - } - - - //get IP addresses and Host Names - for ( int k = 0 ; k < storageCount ; k++ ) - { - int storageID = k + 1; - string newStorageName = storageType + oam.itoa(storageID); - string storageIPAddr = oam::UnassignedIpAddr; - string storageHostName = oam::UnassignedName; - string storageNameDesc = storageDesc + " #" + oam.itoa(storageID); - - cout << endl << "*** " << storageNameDesc << " Configuration ***" << endl << endl; - - DeviceNetworkList::iterator listPT = sysStorageTypeConfig.storagetypeconfig[i].StorageNetworkList.begin(); - - for ( ; listPT != sysStorageTypeConfig.storagetypeconfig[i].StorageNetworkList.end() ; listPT++) - { - if ( newStorageName == (*listPT).DeviceName) - { - HostConfigList::iterator pt1 = (*listPT).hostConfigList.begin(); - - for ( ; pt1 != (*listPT).hostConfigList.end() ; pt1++) - { - storageIPAddr = (*pt1).IPAddr; - storageHostName = (*pt1).HostName; - break; - } - - break; - } - } - - if (storageIPAddr.empty()) - storageIPAddr = oam::UnassignedIpAddr; - - if (storageHostName.empty()) - storageHostName = oam::UnassignedName; - - string newStorageIPAddr; - string newStorageHostName; - - prompt = "Enter Host Name (" + storageHostName + ") > "; - pcommand = readline(prompt.c_str()); - - if (!pcommand) - newStorageHostName = storageHostName; - else - { - if (strlen(pcommand) > 0) newStorageHostName = pcommand; - - free(pcommand); - pcommand = 0; - - //set New Storage Host Name - string storageHostNameParm = "StorageHostName" + oam.itoa(storageID) + "-" + oam.itoa(i + 1); - - try - { - sysConfig->setConfig(StorageSection, storageHostNameParm, newStorageHostName); - } - catch (...) - { - cout << "ERROR: Problem setting Host Name in the Calpont System Configuration file" << endl; - exit(-1); - } - } - - //get Network IP Address - string IPAddress = oam.getIPAddress( newStorageHostName); - - if ( !IPAddress.empty() ) - { - newStorageIPAddr = IPAddress; - - cout << "'" << newStorageHostName << "' DNS IP Address is '" << newStorageIPAddr << "'" << endl; - } - else - { - //no DNS address found for Host Name - while (true) - { - prompt = "Enter IP Address of (" + storageIPAddr + ") > "; - pcommand = readline(prompt.c_str()); - - if (!pcommand) - newStorageIPAddr = storageIPAddr; - else - { - if (strlen(pcommand) > 0) newStorageIPAddr = pcommand; - - free(pcommand); - pcommand = 0; - } - - if (oam.isValidIP(newStorageIPAddr)) - break; - else - cout << "Invalid IP Address format, xxx.xxx.xxx.xxx, please reenter" << endl; - } - } - - //set New Storage IP address - string storageIPAddrNameParm = "StorageIPAddr" + oam.itoa(storageID) + "-" + oam.itoa(i + 1); - - try - { - sysConfig->setConfig(StorageSection, storageIPAddrNameParm, newStorageIPAddr); - } - catch (...) - { - cout << "ERROR: Problem setting IP address in the Calpont System Configuration file" << endl; - exit(-1); - } - } //end of while loop - } //end of i for loop - - sysConfig->write(); - - // - // Configure NMS Addresses - // - - cout << endl << "===== Setup the External Network Management System (NMS) Server Configuration =====" << endl << endl; - - string NMSIPAddress; - - try - { - NMSIPAddress = sysConfig->getConfig(SystemSection, "NMSIPAddress"); - } - catch (...) - { - cout << "ERROR: Problem getting NMSIPAddress from Calpont System Configuration file" << endl; - exit(-1); - } - - prompt = "Enter IP Address(es) of NMS Server (" + NMSIPAddress + ") > "; - pcommand = readline(prompt.c_str()); - - if (pcommand) - { - if (strlen(pcommand) > 0) NMSIPAddress = pcommand; - - free(pcommand); - pcommand = 0; - } - - try - { - sysConfig->setConfig(SystemSection, "NMSIPAddress", NMSIPAddress); - } - catch (...) - { - cout << "ERROR: Problem setting NMSIPAddress in the Calpont System Configuration file" << endl; - exit(-1); - } - - - // - // setup TransactionArchivePeriod - // - - cout << endl << "===== Setup the Transaction Log Archive Time Period =====" << endl << endl; - - string transactionArchivePeriod; - - try - { - transactionArchivePeriod = sysConfig->getConfig(SystemSection, "TransactionArchivePeriod"); - } - catch (...) - { - cout << "ERROR: Problem getting transactionArchivePeriod from Calpont System Configuration file" << endl; - exit(-1); - } - - prompt = "Enter Transaction Archive Period in minutes (" + transactionArchivePeriod + ") > "; - pcommand = readline(prompt.c_str()); - - if (pcommand) - { - if (strlen(pcommand) > 0) transactionArchivePeriod = pcommand; - - free(pcommand); - pcommand = 0; - } - - try - { - sysConfig->setConfig(SystemSection, "TransactionArchivePeriod", transactionArchivePeriod); - } - catch (...) - { - cout << "ERROR: Problem setting IP address in the Calpont System Configuration file" << endl; - exit(-1); - } - - // - //Update oidbitmap in Columnstore.xml - // - - try - { - sysConfig->setConfig("OIDManager", "OIDBitmapFile", "/mnt/OAM/dbrm/oidbitmap"); - } - catch (...) - { - cout << "ERROR: Problem setting OIDBitmapFile Calpont System Configuration file" << endl; - exit(-1); - } - - //Write out Updated System Configuration File - sysConfig->write(); - - cout << endl << "Configure is successfuly completed, Columnstore.xml is located in systems/" + systemName << endl << endl; -} diff --git a/tools/configMgt/datdup-build b/tools/configMgt/datdup-build deleted file mode 100755 index 6825d923d..000000000 --- a/tools/configMgt/datdup-build +++ /dev/null @@ -1,44 +0,0 @@ -#! /bin/sh -# -# $Id: calpont-build 421 2007-14-15 15:46:55Z dhill $ -# -# build calpont rpm and install on call6500 -# $1 - release number or 'Latest' -# $2 - svn branch -# -if [ "$2" = "" ] ; then - echo "enter release and svn-branch arguments" - exit 0 -fi -# !!! set for OS type 32 or 64 bit !!! - -OS=x86_64 - -# -REL=$1 -DIR=$2 - -HOME=/home/nightly/$DIR -rm -fr /home/nightly/rpm/* -# -su - nightly -c "cd $HOME;./build/build_rpms -g > ~/$DIR/build_datdup_rpm.log 2>&1" -# -test -d /home/nightly/rpm/RPMS || echo "no rpm, Build Failed" -test -d /home/nightly/rpm/RPMS || exit 1 -cd /home/nightly/rpm/RPMS/$OS/ -test -f calpont-datdup-$version*.rpm || echo "no rpm, Build Failed" -test -f calpont-datdup-$version*.rpm || exit 1 -{ -if [ $REL != "Latest" ] ; then - # copy rpms to calweb - cd /home/nightly/rpm/RPMS/$OS/ - smbclient //calweb/shared -Wcalpont -Uoamuser%Calpont1 -c "cd Iterations/$REL/packages;rm calpont-datdup*.$OS.rpm;prompt OFF;mput calpont-datdup*" -else - # copy nightly on calweb - cd /home/nightly/rpm/RPMS/$OS/ - smbclient //calweb/shared -Wcalpont -Uoamuser%Calpont1 -c "cd Iterations/Latest/packages;rm calpont-datdup*.$OS.rpm;prompt OFF;mput calpont-datdup*" -fi -} 2> /root/datdup-build-$REL-$DIR.log -# -echo "Build Successfully Completed" -exit 0 diff --git a/tools/configMgt/mysql-build-branch b/tools/configMgt/mysql-build-branch deleted file mode 100755 index afa79cd60..000000000 --- a/tools/configMgt/mysql-build-branch +++ /dev/null @@ -1,8 +0,0 @@ -#! /bin/sh -# -# $Id: mysql-build 421 2007-04-05 15:46:55Z dhill $ -# -# build calpont mysql - -su - mysql -c "scripts/build-branch $1" - diff --git a/tools/configMgt/mysql-build-genii b/tools/configMgt/mysql-build-genii deleted file mode 100755 index 4a3f21deb..000000000 --- a/tools/configMgt/mysql-build-genii +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash -# -# $Id: mysql-build 421 2007-04-05 15:46:55Z dhill $ -# -# build calpont mysql - -su - mysql -c scripts/build-genii - diff --git a/tools/configMgt/recreateBranch.sh b/tools/configMgt/recreateBranch.sh deleted file mode 100755 index f00e9f243..000000000 --- a/tools/configMgt/recreateBranch.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/expect -# -# $Id: autoRecreateBranch.sh 421 2007-04-05 15:46:55Z dhill $ -# -set BRANCH [lindex $argv 0] -log_user 1 -spawn -noecho /bin/bash -if { $BRANCH == "-h" } { - send_user "\n" - send_user "'autoRecreateBranch.sh' deletes and recreates source from Branch on a \n" - send_user "development branch\n" - send_user "Usage: autoRecreateBranch.sh 'branch'\n" - send_user " branch - SVN branch name\n" - exit -} - -send_user "\n##### Subsystem: delete build \n" -exec svn delete -m "deleting branch $BRANCH" http://srvengcm1.calpont.com/svn/genii/build/branches/$BRANCH -send_user "\n##### Subsystem: delete dbcon \n" -exec svn delete -m "deleting branch $BRANCH" http://srvengcm1.calpont.com/svn/genii/dbcon/branches/$BRANCH -send_user "\n##### Subsystem: delete ddlproc \n" -exec svn delete -m "deleting branch $BRANCH" http://srvengcm1.calpont.com/svn/genii/ddlproc/branches/$BRANCH -send_user "\n##### Subsystem: delete dmlproc \n" -exec svn delete -m "deleting branch $BRANCH" http://srvengcm1.calpont.com/svn/genii/dmlproc/branches/$BRANCH -send_user "\n##### Subsystem: delete dmlib \n" -exec svn delete -m "deleting branch $BRANCH" http://srvengcm1.calpont.com/svn/genii/dmlib/branches/$BRANCH -send_user "\n##### Subsystem: delete exemgr \n" -exec svn delete -m "deleting branch $BRANCH" http://srvengcm1.calpont.com/svn/genii/exemgr/branches/$BRANCH -send_user "\n##### Subsystem: delete oam \n" -exec svn delete -m "deleting branch $BRANCH" http://srvengcm1.calpont.com/svn/genii/oam/branches/$BRANCH -send_user "\n##### Subsystem: delete oamapps \n" -exec svn delete -m "deleting branch $BRANCH" http://srvengcm1.calpont.com/svn/genii/oamapps/branches/$BRANCH -send_user "\n##### Subsystem: delete primitives \n" -exec svn delete -m "deleting branch $BRANCH" http://srvengcm1.calpont.com/svn/genii/primitives/branches/$BRANCH -send_user "\n##### Subsystem: delete procmgr \n" -exec svn delete -m "deleting branch $BRANCH" http://srvengcm1.calpont.com/svn/genii/procmgr/branches/$BRANCH -send_user "\n##### Subsystem: delete procmon \n" -exec svn delete -m "deleting branch $BRANCH" http://srvengcm1.calpont.com/svn/genii/procmon/branches/$BRANCH -send_user "\n##### Subsystem: delete snmpd \n" -exec svn delete -m "deleting branch $BRANCH" http://srvengcm1.calpont.com/svn/genii/snmpd/branches/$BRANCH -send_user "\n##### Subsystem: delete tools \n" -exec svn delete -m "deleting branch $BRANCH" http://srvengcm1.calpont.com/svn/genii/tools/branches/$BRANCH -send_user "\n##### Subsystem: delete utils \n" -exec svn delete -m "deleting branch $BRANCH" http://srvengcm1.calpont.com/svn/genii/utils/branches/$BRANCH -send_user "\n##### Subsystem: delete versioning \n" -exec svn delete -m "deleting branch $BRANCH" http://srvengcm1.calpont.com/svn/genii/versioning/branches/$BRANCH -send_user "\n##### Subsystem: delete writeengine \n" -exec svn delete -m "deleting branch $BRANCH" http://srvengcm1.calpont.com/svn/genii/writeengine/branches/$BRANCH -send_user "\n##### Make branch $BRANCH \n" -exec ~/genii/build/makeBranch.pl $BRANCH - -send_user "Successfully recreated branch $BRANCH\n" -exit \ No newline at end of file diff --git a/tools/configMgt/remote_command.sh b/tools/configMgt/remote_command.sh deleted file mode 100755 index 80de51117..000000000 --- a/tools/configMgt/remote_command.sh +++ /dev/null @@ -1,52 +0,0 @@ -#!/usr/bin/expect -# -# $Id: remote_commend.sh 421 2007-04-05 15:46:55Z dhill $ -# -# Remote command execution script to another server -# Argument 1 - Remote Server Host Name or IP address -# Argument 2 - Remote Server root password -# Argument 3 - Command -# Argument 4 - Good Response -# Argument 5 - Bad Response -# Argument 6 - timeout -# Argument 7 - Debug flag -set stty_init {cols 512 -opost}; -set timeout 30 -set SERVER [lindex $argv 0] -set USERNAME [lindex $argv 1] -set PASSWORD [lindex $argv 2] -set COMMAND [lindex $argv 3] -set GOOD_RESPONSE [lindex $argv 4] -set BAD_RESPONSE [lindex $argv 5] -set timeout [lindex $argv 6] -set DEBUG [lindex $argv 7] -log_user $DEBUG -spawn -noecho /bin/bash -expect -re {[$#] } -# -# send command -# -send "ssh $USERNAME@$SERVER $COMMAND\n" -expect { - -re "authenticity" { send "yes\n" - expect { - timeout { send_user "TIMEOUT\n" ; exit 2 } - -re "word: " { send "$PASSWORD\n" } abort - } - } - timeout { send_user "TIMEOUT\n" ; exit 2 } - -re "service not known" { send_user "FAILED: Invalid Host\n" ; exit 1 } - -re "Permission denied" { send_user "FAILED: Invalid Password\n" ; exit 1 } - -re "word: " { send "$PASSWORD\n" } abort - -re $GOOD_RESPONSE { send_user " " ; exit 0 } - -re $BAD_RESPONSE { send_user "FAILED\n" ; exit 1 } -} -expect { -# -re $GOOD_RESPONSE exit - timeout { send_user "FAILED-TIMEOUT\n" ; exit 1 } - -re $GOOD_RESPONSE { send_user " " ; exit 0 } - -re $BAD_RESPONSE { send_user "FAILED\n" ; exit 1 } - -re "No such file" { send_user "FAILED\n" ; exit 1 } -} -exit 1 - diff --git a/tools/configMgt/remote_command_test.sh b/tools/configMgt/remote_command_test.sh deleted file mode 100755 index 281b0852a..000000000 --- a/tools/configMgt/remote_command_test.sh +++ /dev/null @@ -1,47 +0,0 @@ -#!/usr/bin/expect -# -# $Id: remote_commend.sh 421 2007-04-05 15:46:55Z dhill $ -# -# Remote command execution script to another server -# Argument 1 - Remote Server Host Name or IP address -# Argument 2 - Remote Server root password -# Argument 3 - Command -# Argument 4 - Good Response -# Argument 5 - Bad Response -# Argument 6 - timeout -# Argument 7 - Debug flag -set timeout 30 -set USERNAME root -set SERVER [lindex $argv 0] -set PASSWORD [lindex $argv 1] -set COMMAND [lindex $argv 2] -set GOOD_RESPONSE [lindex $argv 3] -set BAD_RESPONSE [lindex $argv 4] -set timeout [lindex $argv 5] -set DEBUG [lindex $argv 6] -log_user $DEBUG -spawn -noecho /bin/bash -expect -re "# " -# -# send command -# -send "ssh $USERNAME@$SERVER $COMMAND\n" -expect { - -re "authenticity" { send "yes\n" - expect { - -re "word: " { send "$PASSWORD\n" } abort - } - } - -re "service not known" { send_user "FAILED: Invalid Host\n" ; exit } - -re "Permission denied" { send_user "FAILED: Invalid Password\n" ; exit } - -re "word: " { send "$PASSWORD\n" } abort -} -expect { -# -re $GOOD_RESPONSE exit - -re $GOOD_RESPONSE { send_user " " ; exit } - -re $BAD_RESPONSE { send_user "FAILED\n" ; exit } - timeout { send_user "TIMEOUT\n" ; exit } -} -send_user "UNKNOWN RESPONSE\n" -exit - diff --git a/tools/configMgt/remote_scp_get.sh b/tools/configMgt/remote_scp_get.sh deleted file mode 100755 index c8dfe3af2..000000000 --- a/tools/configMgt/remote_scp_get.sh +++ /dev/null @@ -1,38 +0,0 @@ -#!/usr/bin/expect -# -# $Id: remote_commend.sh 421 2007-04-05 15:46:55Z dhill $ -# -# Remote command execution script to another server -# Argument 1 - Remote Server Host Name or IP address -# Argument 2 - Remote Server root password -# Argument 3 - Command -set timeout 30 -set SERVER [lindex $argv 0] -set PASSWORD [lindex $argv 1] -set FILE [lindex $argv 2] -set USERNAME [lindex $argv 3] -set DEBUG [lindex $argv 4] -log_user $DEBUG -spawn -noecho /bin/bash -# -# send command -# -expect -re {[$#] } -send "scp $USERNAME@$SERVER:$FILE .\n" -expect { - -re "authenticity" { send "yes\n" - expect { - -re "word: " { send "$PASSWORD\n" } abort - } - } - -re "service not known" { send_user "FAILED: Invalid Host\n" ; exit -1 } - -re "word: " { send "$PASSWORD\n" } abort -} -expect { - -re "100%" { send_user "DONE\n" } abort - -re "scp" { send_user "FAILED\n" ; exit -1 } - -re "Permission denied" { send_user "FAILED: Invalid password\n" ; exit -1 } - -re "No such file or directory" { send_user "FAILED: Invalid package\n" ; exit -1 } -} -exit - diff --git a/tools/configMgt/rpm_txt.sh b/tools/configMgt/rpm_txt.sh deleted file mode 100755 index 8988c32fe..000000000 --- a/tools/configMgt/rpm_txt.sh +++ /dev/null @@ -1,32 +0,0 @@ -#! /bin/sh -# - -version=$1 -OS=x86_64 - -cd /home/nightly/rpm/ > /dev/null 2>&1 -cd RPMS/$OS/ > /dev/null 2>&1 -test -f infinidb-plafform-$version*.rpm || echo "no rpm" -test -f infinidb-plafform-$version*.rpm || exit -1 -rpm -qi -p infinidb-plafform-$version*.rpm > infinidb-plafform-$OS-rpm-info.txt -echo " " >> infinidb-plafform-$OS-rpm-info.txt -echo "MD5SUM" >> infinidb-plafform-$OS-rpm-info.txt -md5sum infinidb-plafform-$version*.rpm >> infinidb-plafform-$OS-rpm-info.txt -echo " " >> infinidb-plafform-$OS-rpm-info.txt -# -test -f infinidb-storage-engine-*.rpm || echo "no rpm" -test -f infinidb-storage-engine-*.rpm || exit -1 -rpm -qi -p infinidb-storage-engine-*.rpm > infinidb-storage-engine-$OS-rpm-info.txt -echo " " >> infinidb-storage-engine-$OS-rpm-info.txt -echo "MD5SUM" >> infinidb-storage-engine-$OS-rpm-info.txt -md5sum infinidb-storage-engine-*.rpm >> infinidb-storage-engine-$OS-rpm-info.txt -echo " " >> infinidb-storage-engine-$OS-rpm-info.txt -# -test -f infinidb-mysql-*.rpm || echo "no rpm" -test -f infinidb-mysql-*.rpm || exit -1 -rpm -qi -p infinidb-mysql-*.rpm > infinidb-mysql-$OS-rpm-info.txt -echo " " >> infinidb-mysql-$OS-rpm-info.txt -echo "MD5SUM" >> infinidb-mysql-$OS-rpm-info.txt -md5sum infinidb-mysql-*.rpm >> infinidb-mysql-$OS-rpm-info.txt -echo " " >> infinidb-mysql-$OS-rpm-info.txt -# diff --git a/tools/notificationTester/client.cpp b/tools/notificationTester/client.cpp deleted file mode 100644 index ae675551f..000000000 --- a/tools/notificationTester/client.cpp +++ /dev/null @@ -1,21 +0,0 @@ -#include -//#define NDEBUG -#include -using namespace std; - -#include "liboamcpp.h" -using namespace oam; - -int main(int argc, char** argv) -{ - Oam oam; - int rc; - - rc = oam.sendDeviceNotification("PM1", START_PM_MASTER_DOWN, "This is a test"); - - assert(rc == API_SUCCESS); - - return 0; -} - -// vim:ts=4 sw=4: diff --git a/tools/notificationTester/main.cpp b/tools/notificationTester/main.cpp deleted file mode 100644 index 7e0ff5173..000000000 --- a/tools/notificationTester/main.cpp +++ /dev/null @@ -1,313 +0,0 @@ -/* Copyright (C) 2014 InfiniDB, Inc. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; version 2 of - the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301, USA. */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "liboamcpp.h" -#include "messagelog.h" -#include "messageobj.h" - -using namespace std; -using namespace oam; -using namespace messageqcpp; -using namespace config; - -#pragma pack(push,1) -struct NotifyMsgStruct -{ - uint32_t magic; - uint32_t msgno; - char node[8]; - uint32_t paylen; -}; -#pragma pack(pop) - -/***************************************************************************** -* @brief main -* -* purpose: Notification Receiver Test App -* -* -******************************************************************************/ - -int main (int argc, char** argv) -{ - ByteStream msg; - IOSocket fIos; - - string msgPort = "CMP1"; - - //check if configured - Config* sysConfig = Config::makeConfig(); - string ipaddr = sysConfig->getConfig(msgPort, "IPAddr"); - - if (ipaddr.empty()) - { - cerr << "CMP1 not configured in Columnstore.xml, example of what to add to Columnstore.xml:" << endl; - cerr << "" << endl; - cerr << " 127.0.0.1" << endl; - cerr << " 8650" << endl; - cerr << "" << endl << endl; - cerr << "Exiting..." << endl; - return 1; - } - - //read and cleanup port before trying to use - string port; - - try - { - Config* sysConfig = Config::makeConfig(); - port = sysConfig->getConfig(msgPort, "Port"); - string cmd = "fuser -k " + port + "/tcp >/dev/null 2>&1"; - system(cmd.c_str()); - } - catch (...) - {} - - cout << endl << "Notification Receiver Tester Started" << endl << endl; - - //wait for notification messages - while (true) - { - try - { - int ls = -1; - ls = ::socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); - - if (ls < 0) throw runtime_error("socket create error"); - - int rc = 0; - struct sockaddr_in serv_addr; - struct in_addr la; - ::inet_aton(ipaddr.c_str(), &la); - memset(&serv_addr, 0, sizeof(serv_addr)); - serv_addr.sin_family = AF_INET; - serv_addr.sin_addr.s_addr = la.s_addr; - serv_addr.sin_port = htons(atoi(port.c_str())); - rc = ::bind(ls, (sockaddr*)&serv_addr, sizeof(serv_addr)); - - if (rc < 0) throw runtime_error("socket bind error"); - - rc = ::listen(ls, 5); - - if (rc < 0) throw runtime_error("socket listen error"); - - int ds = -1; - - for (;;) - { - try - { - ds = ::accept(ls, 0, 0); - - if (ds < 0) throw runtime_error("socket accept error"); - - char hbuf[sizeof(NotifyMsgStruct)]; - ssize_t bytesRead = 0; - ssize_t bytesNeeded = sizeof(NotifyMsgStruct); - - while (bytesRead < bytesNeeded) - { - ssize_t thisRead = -1; - thisRead = ::read(ds, &hbuf[bytesRead], bytesNeeded); - - if (thisRead < 0) throw runtime_error("socket read error"); - - bytesRead += thisRead; - bytesNeeded -= thisRead; - } - - NotifyMsgStruct* nmsp = (NotifyMsgStruct*)&hbuf[0]; - - if (bytesRead > 0) - { - if ( nmsp->magic != oam::NOTIFICATIONKEY ) - { - cout << "ERROR: Invalid Header Key received, tossing the msg" << endl; - cout << "headerKey received: " << nmsp->magic << endl; - continue; - } - - time_t now; - now = time(NULL); - struct tm tm; - localtime_r(&now, &tm); - char timestamp[200]; - strftime (timestamp, 200, "%H:%M:%S", &tm); - - cout << "Message Received: " << timestamp; - cout << " device: " << nmsp->node; - cout << " requestType ID: " << nmsp->msgno; - - if ( nmsp->paylen > 0 ) - { - char* payLoad; - payLoad = (char*)alloca(nmsp->paylen); - bytesRead = 0; - bytesNeeded = nmsp->paylen; - - while (bytesRead < bytesNeeded) - { - ssize_t thisRead = -1; - thisRead = ::read(ds, &payLoad[bytesRead], bytesNeeded); - - if (thisRead < 0) throw runtime_error("socket read error"); - - bytesRead += thisRead; - bytesNeeded -= thisRead; - } - - cout << " payload: " << payLoad; - } - - switch (nmsp->msgno) - { - case START_PM_MASTER_DOWN: - { - cout << " requestType: START_PM_MASTER_DOWN" << endl; - } - break; - - case START_PM_STANDBY_DOWN: - { - cout << " requestType: START_PM_STANDBY_DOWN" << endl; - } - break; - - case START_PM_COLD_DOWN: - { - cout << " requestType: START_PM_COLD_DOWN" << endl; - } - break; - - case START_UM_DOWN: - { - cout << " requestType: START_UM_DOWN" << endl; - } - break; - - case MODULE_DOWN: - { - cout << " requestType: MODULE_DOWN" << endl; - } - break; - - case START_STANDBY_TO_MASTER: - { - cout << " requestType: START_STANDBY_TO_MASTER" << endl; - } - break; - - case PM_MASTER_ACTIVE: - { - cout << " requestType: PM_MASTER_ACTIVE" << endl; - } - break; - - case PM_STANDBY_ACTIVE: - { - cout << " requestType: PM_STANDBY_ACTIVE" << endl; - } - break; - - case PM_COLD_ACTIVE: - { - cout << " requestType: PM_COLD_ACTIVE" << endl; - } - break; - - case UM_ACTIVE: - { - cout << " requestType: UM_ACTIVE" << endl; - } - break; - - case PM_MASTER_FAILED_DISABLED: - { - cout << " requestType: PM_MASTER_FAILED_DISABLED" << endl; - } - break; - - case DBROOT_DOWN: - { - cout << " requestType: DBROOT_DOWN" << endl; - } - break; - - case DBROOT_UP: - { - cout << " requestType: DBROOT_UP" << endl; - } - break; - - case DB_HEALTH_CHECK_FAILED: - { - cout << " requestType: DB_HEALTH_CHECK_FAILED" << endl; - } - break; - - case DBROOT_MOUNT_FAILURE: - { - cout << " requestType: DBROOT_MOUNT_FAILURE" << endl; - } - break; - - case MODULE_UP: - { - cout << " requestType: MODULE_UP" << endl; - } - break; - - default: - { - cout << " Invalid requestType: " << nmsp->msgno << endl; - } - break; - } - } - else - cout << "Message received of size 0" << endl; - - ::shutdown(ds, SHUT_RDWR); - ::close(ds); - } - catch (...) - { - cout << "accept/read exception received" << endl; - } - } - } - catch (...) - { - cout << "MessageQueueServer exception received" << endl; - } - } - - return 0; -} -// vim:ts=4 sw=4: - diff --git a/tools/setConfig/CMakeLists.txt b/tools/setConfig/CMakeLists.txt index 33f413cf1..2664f8462 100644 --- a/tools/setConfig/CMakeLists.txt +++ b/tools/setConfig/CMakeLists.txt @@ -1,8 +1,6 @@ include_directories( ${ENGINE_COMMON_INCLUDES} ) -configure_file("${CMAKE_CURRENT_SOURCE_DIR}/configxml.sh.in" "${CMAKE_CURRENT_SOURCE_DIR}/configxml.sh" @ONLY) - ########### next target ############### set(setConfig_SRCS main.cpp) @@ -13,5 +11,3 @@ target_link_libraries(mcsSetConfig ${ENGINE_LDFLAGS} ${NETSNMP_LIBRARIES} ${MARI install(TARGETS mcsSetConfig DESTINATION ${ENGINE_BINDIR} COMPONENT columnstore-engine) -install(PROGRAMS configxml.sh DESTINATION ${ENGINE_BINDIR} COMPONENT columnstore-engine) - diff --git a/tools/setConfig/configxml.sh.in b/tools/setConfig/configxml.sh.in deleted file mode 100755 index bce2438e7..000000000 --- a/tools/setConfig/configxml.sh.in +++ /dev/null @@ -1,70 +0,0 @@ -#!/bin/bash -# -# configxml set/get an entry in Columnstore.xml file -# -# - - -case "$1" in - setconfig) - - if [ $# -ne 4 ]; then - echo $"Usage: $0 setconfig section variable set-value" - exit 1 - fi - - oldvalue=$(mcsGetConfig $2 $3) - - #if [ -z $oldvalue ]; then - # echo "$2 / $3 not found in Columnstore.xml" - # exit 1 - #fi - - echo "Old value of $2 / $3 is $oldvalue" - - calxml=@ENGINE_SYSCONFDIR@/columnstore/Columnstore.xml - - seconds=$(date +%s) - cp $calxml $calxml.$seconds - echo - echo "$calxml backed up to $calxml.$seconds" - echo - - oldvalue=$(mcsGetConfig $2 $3) - - echo "Old value of $2 / $3 is $oldvalue" - - if ( [ $# -eq 4 ] && [ -z $4 ] ); then - mcsSetConfig $2 $3 "" - else - mcsSetConfig $2 $3 $4 - fi - - newvalue=$(mcsGetConfig $2 $3) - - echo "$2 / $3 now set to $newvalue" - ;; - - getconfig) - if test ! $3 ; then - echo $"Usage: $0 getconfig section variable" - exit 1 - fi - - value=$(mcsGetConfig $2 $3) - - if [ -z $value ]; then - echo "$2 / $3 not found in Columnstore.xml" - exit 1 - fi - - echo "Current value of $2 / $3 is $value" - ;; - - *) - echo $"Usage: $0 {setconfig|getconfig} section variable set-value" - exit 1 - -esac -# vim:ts=4 sw=4: - diff --git a/tools/setConfig/main.cpp b/tools/setConfig/main.cpp index 9561e5aed..abaad2602 100644 --- a/tools/setConfig/main.cpp +++ b/tools/setConfig/main.cpp @@ -154,17 +154,6 @@ int main(int argc, char** argv) //get number of pms string count = cf->getConfig("PrimitiveServers", "Count"); - try - { - oam.distributeConfigFile(); - //sleep to give time for change to be distributed - sleep(atoi(count.c_str())); - } - catch (...) - { - return 1; - } - return 0; } // vim:ts=4 sw=4: diff --git a/tools/vbgen/CMakeLists.txt b/tools/vbgen/CMakeLists.txt deleted file mode 100644 index 7791a1870..000000000 --- a/tools/vbgen/CMakeLists.txt +++ /dev/null @@ -1,61 +0,0 @@ - -include_directories(${KDE4_INCLUDES} ${KDE4_INCLUDE_DIR} ${QT_INCLUDES} ) - - -########### next target ############### - -set(vbgen_SRCS vbgen.cpp myrand.cpp) - -kde4_add_executable(vbgen ${vbgen_SRCS}) - -target_link_libraries(vbgen ${KDE4_KDECORE_LIBS}) - -install(TARGETS vbgen ${INSTALL_TARGETS_DEFAULT_ARGS}) - - -########### install files ############### - - - - -#original Makefile.am contents follow: - -## Copyright (C) 2014 InfiniDB, Inc. -## -## This program is free software; you can redistribute it and/or -## modify it under the terms of the GNU General Public License -## as published by the Free Software Foundation; version 2 of -## the License. -## -## This program is distributed in the hope that it will be useful, -## but WITHOUT ANY WARRANTY; without even the implied warranty of -## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -## GNU General Public License for more details. -## -## You should have received a copy of the GNU General Public License -## along with this program; if not, write to the Free Software -## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, -## MA 02110-1301, USA. -# -## $Id$ -### Process this file with automake to produce Makefile.in -# -#AM_CPPFLAGS = $(idb_cppflags) -#AM_CFLAGS = $(idb_cflags) -#AM_CXXFLAGS = $(idb_cxxflags) -#AM_LDFLAGS = $(idb_ldflags) -#bin_PROGRAMS = vbgen -#vbgen_SOURCES = vbgen.cpp myrand.cpp -#vbgen_CPPFLAGS = $(AM_CPPFLAGS) -#vbgen_LDFLAGS = $(AM_LDFLAGS) -# -#test: -# -#coverage: -# -#leakcheck: -# -#docs: -# -#bootstrap: install-data-am -# diff --git a/tools/vbgen/myrand.cpp b/tools/vbgen/myrand.cpp deleted file mode 100644 index 0b2b59d1c..000000000 --- a/tools/vbgen/myrand.cpp +++ /dev/null @@ -1,30 +0,0 @@ -#include "myrand.h" - -#include -#include -#include -#include -#include -using namespace std; - -namespace myrand -{ - -MyRand::MyRand(int min, int max) : - fMin(min), - fMax(max) -{ - if (fMax < fMin) throw range_error("max - -namespace myrand -{ -class MyRand -{ -public: - explicit MyRand(int min, int max); - ~MyRand() { } - - int generate() - { - return (fMin + (int)((double)(fMax - fMin + 1) * (rand_r(&fSeed) / (RAND_MAX + 1.0)))); - } - - int operator()() - { - return generate(); - } - -protected: - -private: - //defaults okay (I guess) - //MyRand(const MyRand& rhs); - //MyRand& operator=(const MyRand& rhs); - - unsigned int fSeed; - int fMin; - int fMax; -}; -} - -#endif -// vim:ts=4 sw=4: - diff --git a/tools/vbgen/vbgen.cpp b/tools/vbgen/vbgen.cpp deleted file mode 100644 index c97a7e6ed..000000000 --- a/tools/vbgen/vbgen.cpp +++ /dev/null @@ -1,73 +0,0 @@ -#include -#include -#include -#include -#include -#include -using namespace std; - -#include "myrand.h" -using namespace myrand; - -namespace -{ -class PrintIt -{ -public: - PrintIt(ostream& os) : fOs(os) - { - fOs.fill('0'); - fOs.flags(ios::hex); - } - ~PrintIt() { } - - void operator()(const int i) const - { - unsigned u = static_cast(i & 0xff); - fOs << setw(2) << u; - } - -private: - //Defaults okay - //PrintIt(const PrintIt& rhs); - //PrintIt& operator=(const PrintIt& rhs); - - ostream& fOs; -}; -} - -int main(int argc, char** argv) -{ - int c; - unsigned long long numRows = 20000000; - - opterr = 0; - - while ((c = getopt(argc, argv, "r:h")) != -1) - switch (c) - { - case 'r': - numRows = strtoull(optarg, 0, 0); - break; - - case 'h': - case '?': - default: - break; - } - - MyRand mrwidth(100, 800); - - for (unsigned long long i = 0; i < numRows; i++) - { - int w = mrwidth.generate(); - vector vi(w); - generate(vi.begin(), vi.end(), MyRand(0, 255)); - cout << "0|"; - for_each(vi.begin(), vi.end(), PrintIt(cout)); - cout << '|' << endl; - } - - return 0; -} - diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt index 36c94d26b..5c16031a8 100644 --- a/utils/CMakeLists.txt +++ b/utils/CMakeLists.txt @@ -22,7 +22,6 @@ add_subdirectory(idbdatafile) add_subdirectory(winport) add_subdirectory(thrift) add_subdirectory(querytele) -add_subdirectory(clusterTester) add_subdirectory(libmysql_client) add_subdirectory(regr) add_subdirectory(cloudio) diff --git a/utils/clusterTester/CMakeLists.txt b/utils/clusterTester/CMakeLists.txt deleted file mode 100644 index d555374d8..000000000 --- a/utils/clusterTester/CMakeLists.txt +++ /dev/null @@ -1,5 +0,0 @@ - -install(PROGRAMS columnstoreClusterTester.sh - os_detect.sh - DESTINATION ${ENGINE_BINDIR} COMPONENT columnstore-engine) - diff --git a/utils/clusterTester/columnstoreClusterTester.sh b/utils/clusterTester/columnstoreClusterTester.sh deleted file mode 100755 index ce968ec8f..000000000 --- a/utils/clusterTester/columnstoreClusterTester.sh +++ /dev/null @@ -1,1399 +0,0 @@ -#!/bin/bash - -bold=$(tput bold) -normal=$(tput sgr0) - -IPADDRESSES="" -OS="" -PASSWORD="ssh" -CHECK=true -REPORTPASS=true -LOGFILE="" - -OS_LIST=("centos6" "centos7" "debian8" "debian9" "suse12" "ubuntu16" "ubuntu18") - -NODE_IPADDRESS="" - -#get temp directory -tmpDir=`mcsGetConfig SystemConfig SystemTempFileDir` - -checkContinue() { - - if [ "$CHECK" = false ]; then - return 0 - fi - - echo "" - read -p "Failure occurred, do you want to continue? (y,n) > " answer - case ${answer:0:1} in - y|Y ) - return 0 - ;; - * ) - exit - ;; - esac -} - -### -# Print Functions -### - -helpPrint () { - ################################################################################ - echo "" - echo "This is the MariaDB ColumnStore Cluster System Test Tool." - echo "" - echo "It will run a set of test to validate the setup of the MariaDB Columnstore system." - echo "This can be run prior to the install of MariaDB ColumnStore to make sure the" - echo "servers/nodes are configured properly. It should be run as the user of the planned" - echo "install. Meaning if MariaDB ColumnStore is going to be installed as root user," - echo "then run from root user. Also the assumption is that the servers/node have be" - echo "setup based on the Preparing for ColumnStore Installation." - echo "It should also be run on the server that is designated as Performance Module #1." - echo "This is the same server where the MariaDB ColumnStore package would be installed" - echo " and where the install script would be executed from, postConfigure." - echo "" - echo "Additional information on Tool is documented at:" - echo "" - echo "https://mariadb.com/kb/en/library/mariadb-columnstore-cluster-test-tool/" - echo "" - echo "Items that are checked:" - echo " Node Ping test" - echo " Node SSH test" - echo " ColumnStore Port test" - echo " OS version" - echo " Locale settings" - echo " Umask settings" - echo " Firewall settings" - echo " Date/time settings" - echo " Dependent packages installed" - echo "" - echo "Usage: $0 [options]" - echo "OPTIONS:" - echo " -h,--help Help" - echo " --ipaddr=[ipaddresses] Remote Node IP-Addresses/Hostnames, if not provide, will only check local node" - echo " examples: 192.168.1.1,192.168.1.2 or serverum1,serverpm2" - echo " --os=[os] Optional: Set OS Version (centos6, centos7, debian8, debian9, suse12, ubuntu16)" - echo " --password=[password] Provide a user password. (Default: ssh-keys setup will be assumed)" - echo " -c,--continue Continue on failures" - echo " --logfile=[fileName] Output results to a log file" - echo "" - echo "NOTE: Dependent package : 'nmap' and 'expect' packages need to be installed locally" - echo "" -} - -# Parse command line options. -while getopts hc-: OPT; do - case "$OPT" in - h) - echo $USAGE - helpPrint - exit 0 - ;; - c) - CHECK=false - ;; - -) LONG_OPTARG="${OPTARG#*=}" - ## Parsing hack for the long style of arguments. - case $OPTARG in - help ) - helpPrint - exit 0 - ;; - continue ) - CHECK=false - ;; - ipaddr=?* ) - IPADDRESSES="$LONG_OPTARG" - ;; - os=?* ) - OS="$LONG_OPTARG" - match=false - for SUPPORTED_OS in "${OS_LIST[@]}"; do - if [ $SUPPORTED_OS == "$OS" ]; then - match=true - break; - fi - done - - if [ $match == "false" ] ; then - echo "" - echo "OS version not-supported, please re-run and provide the OS from list of support OSs " - for SUPPORTED_OS in "${OS_LIST[@]}"; do - echo "$SUPPORTED_OS" - done - echo "" - exit 1 - fi - - ;; - password=?* ) - PASSWORD="$LONG_OPTARG" - ;; - logfile=?* ) - LOGFILE="$LONG_OPTARG" - exec 1<>$LOGFILE - exec 2>&1 - ;; - ipaddr* ) - echo "No arg for --$OPTARG option" >&2 - exit 1 - ;; - os* ) - echo "No arg for --$OPTARG option" >&2 - exit 1 - ;; - password* ) - echo "No arg for --$OPTARG option" >&2 - exit 1 - ;; - continue* ) - echo "No arg allowed for --$OPTARG option" >&2 - exit 1 - ;; - logfile* ) - echo "No arg for --$OPTARG option" >&2 - exit 1 - ;; - help* ) - helpPrint - exit 0 - ;; - '' ) - break ;; # "--" terminates argument processing - * ) - echo "Illegal option --$OPTARG" >&2 - exit 1 - ;; - esac - ;; - \?) - # getopts issues an error message - echo $USAGE >&2 - exit 1 - ;; - esac -done - -# Remove the switches we parsed above. -shift `expr $OPTIND - 1` - -if [ "$IPADDRESSES" != "" ]; then - #parse IP Addresses into an array - IFS=',' - read -ra NODE_IPADDRESS <<< "$IPADDRESSES" - - if ! type expect > /dev/null 2>&1 ; then - echo "expect is not installed. Please install and rerun." - exit 1 - fi - - if ! type nmap > /dev/null 2>&1; then - echo "nmap is not installed. Please install and rerun." - exit 1 - fi -fi - -checkLocalOS() -{ - echo "** Validate local OS is supported" - echo "" - - #get local OS - `os_detect.sh > ${tmpDir}/os_detect 2>&1` - if [ "$?" -eq 0 ]; then - localOS=`cat ${tmpDir}/os_detect | grep "Operating System name" | cut -f2 -d '"'` - echo "Local Node OS System Name : $localOS" - - if [ "$OS" != "" ] ; then - echo "" - echo "Local Node OS Versions doesn't match the command line OS argument" - echo "Contining using the Detected Local Node OS Version" - OS=`cat ${tmpDir}/os_detect | grep "Operating System tag" | cut -f4 -d " "` - - echo "Local Node OS Version : $OS" - else - OS=`cat ${tmpDir}/os_detect | grep "Operating System tag" | cut -f4 -d " "` - fi - else - localOS=`cat ${tmpDir}/os_detect | grep "Operating System name" | cut -f2 -d '"'` - echo "Local Node OS System Name : $localOS" - - if [ "$OS" == "" ] ; then - echo "" - echo "Operating System name doesn't match any of the supported OS's" - echo "" - if [ $LOGFILE != "" ] ; then - exit 1 - fi - - echo "Please select from this OS list or enter 'exit'" - for SUPPORTED_OS in "${OS_LIST[@]}"; do - echo " $SUPPORTED_OS" - done - - echo "" - read -p "Enter OS or 'exit' > " answer - if [ $answer == 'exit' ] ; then - exit 1 - fi - match=false - for SUPPORTED_OS in "${OS_LIST[@]}"; do - if [ $SUPPORTED_OS == $answer ] ; then - OS=$answer - match=true - break; - fi - done - - if [ $match == "false" ] ; then - echo "OS version unknown, please re-run and provide the OS in the command line --os" - exit 1 - fi - else - echo "${bold}Warning${normal}: Local Node OS version detected is not supported and different than the enter OS Version" - fi - fi -} - -checkLocalDir() -{ - if [ "$USER" != "root" ]; then - # Non-root User directory permissions check - # - echo "" - echo "** Run Non-root User directory permissions check on Local Node (dev/shm)" - echo "" - - `touch /dev/shm/cs_check > /dev/null 2>&1` - if [ "$?" -eq 0 ]; then - echo "Local Node permission test on /dev/shm : Passed" - `rm -f /dev/shm/cs_check` - else - echo "Local Node permission test on /dev/shm : ${bold}Failed${normal}, change permissions to 777 and re-test" - pass=false - REPORTPASS=false - fi - fi -} - -checkPing() -{ - # ping test - # - echo "" - echo "** Run Ping access Test to remote nodes" - echo "" - - for ipadd in "${NODE_IPADDRESS[@]}"; do - - `ping $ipadd -c 1 -w 5 > /dev/null 2>&1` - if [ "$?" -eq 0 ]; then - echo $ipadd " Node Passed ping test" - else - echo $ipadd " Node ${bold}Failed${normal} ping test, correct and retest" - exit 1 - fi - done -} - -checkSSH() -{ - # Login test - # - echo "" - echo "** Run SSH Login access Test to remote nodes" - echo "" - - for ipadd in "${NODE_IPADDRESS[@]}"; do - `remote_command.sh $ipadd $PASSWORD ls 1 > /dev/null 2>&1`; - rc="$?" - if [ $rc -eq 0 ] || ( [ $rc -eq 2 ] && [ $OS == "suse12" ] ) ; then - if [ $PASSWORD == "ssh" ] ; then - echo $ipadd " Node Passed SSH login test using ssh-keys" - else - echo $ipadd " Node Passed SSH login test using user password" - fi - else - if [ $PASSWORD == "ssh" ] ; then - echo $ipadd " Node ${bold}Failed${normal} SSH login test using ssh-keys" - else - echo $ipadd " Node ${bold}Failed${normal} SSH login test using user password" - fi - - echo "Error - Fix the SSH login issue and rerun test" - exit 1 - fi - done -} - -checkRemoteDir() -{ - if [ "$USER" != "root" ]; then - # Non-root User directory permissions check - # - echo "" - echo "** Run Non-root User directory permissions check on remote nodes (/dev/shm)" - echo "" - - `remote_command.sh $ipadd $PASSWORD 'touch /dev/shm/cs_check' 1 > ${tmpDir}/remote_command_check 2>&1` - rc="$?" - if [ $rc -eq 0 ] || ( [ $rc -eq 2 ] && [ $OS == "suse12" ] ) ; then - `grep "Permission denied" ${tmpDir}/remote_command_check > /dev/null 2>&1` - if [ "$?" -eq 0 ]; then - echo "$ipadd Node permission test on /dev/shm : ${bold}Failed${normal}, change permissions to 777 and re-test" - pass=false - REPORTPASS=false - else - echo "$ipadd Node permission test on /dev/shm : Passed" - fi - else - echo "Error running remote_command.sh to $ipadd Node, check ${tmpDir}/remote_command_check" - pass=false - REPORTPASS=false - fi - - if ! $pass; then - checkContinue - fi - fi -} - -checkOS() -{ - # Os check - # - echo "" - echo "** Run OS check - OS version needs to be the same on all nodes" - echo "" - - echo "Local Node OS Version : $localOS" - echo "" - - pass=true - `/bin/cp -f os_detect.sh ${tmpDir}/.` - for ipadd in "${NODE_IPADDRESS[@]}"; do - `remote_scp_put.sh $ipadd $PASSWORD ${tmpDir}/os_detect.sh 1 > ${tmpDir}/remote_scp_put_check 2>&1` - if [ "$?" -ne 0 ]; then - echo "Error running remote_scp_put.sh to $ipadd Node, check ${tmpDir}/remote_scp_put_check" - exit 1 - else - `remote_command.sh $ipadd $PASSWORD ${tmpDir}/os_detect.sh 1 > ${tmpDir}/remote_command_check` - rc="$?" - if [ "$?" -ne 0 ]; then - echo "Error running remote_command.sh ${tmpDir}/os_detect.sh on $ipadd Node, check ${tmpDir}/remote_command_check" - exit 1 - else - remoteOS=`cat ${tmpDir}/remote_command_check | grep "Operating System name" | cut -f2 -d '"'` - echo "$ipadd Node OS Version : $remoteOS" - if [ $localOS != $remoteOS ]; then - echo "${bold}Failed${normal}, $ipadd has a different OS than local node" - pass=false - REPORTPASS=false - fi - fi - fi - done - - if ! $pass; then - checkContinue - fi -} - -checkLocale() -{ - # Locale check - # - echo "" - echo "** Run Locale check - Locale needs to be the same on all nodes" - echo "" - - #get local Locale - `locale | grep LANG= > ${tmpDir}/locale_check 2>&1` - if [ "$?" -eq 0 ]; then - echo "Local Node Locale : `cat ${tmpDir}/locale_check`" - else - echo "Error running 'locale' command on local node" - fi - - pass=true - for ipadd in "${NODE_IPADDRESS[@]}"; do - `remote_command.sh $ipadd $PASSWORD 'locale | grep LANG= > locale_check 2>&1' 1 > ${tmpDir}/remote_command_check` - rc="$?" - if [ $rc -eq 0 ] || ( [ $rc -eq 2 ] && [ $OS == "suse12" ] ) ; then - `remote_scp_get.sh $ipadd $PASSWORD locale_check > ${tmpDir}/remote_scp_get_check 2>&1` - if [ "$?" -ne 0 ]; then - echo "Error running remote_scp_get.sh to $ipadd Node, check ${tmpDir}/remote_scp_get_check" - exit 1 - else - echo "$ipadd Node Locale : `cat locale_check`" - `diff ${tmpDir}/locale_check locale_check > /dev/null 2>&1` - if [ "$?" -ne 0 ]; then - echo "${bold}Failed${normal}, $ipadd has a different Locale setting than local node" - pass=false - REPORTPASS=false - fi - `rm -f locale_check` - fi - else - echo "Error running remote_command.sh to $ipadd Node, check ${tmpDir}/remote_command_check" - exit 1 - pass=false - REPORTPASS=false - fi - done - - if ! $pass; then - checkContinue - fi -} - -checkLocalUMASK() -{ - # UMASK check - # - echo "" - echo "** Run Local UMASK check" - echo "" - - pass=true - filename=UMASKtest - - rm -f $filename - touch $filename - permission=$(stat -c "%A" "$filename") - result=${permission:4:1} - if [ ${result} == "r" ] ; then - result=${permission:7:1} - if [ ${result} == "r" ] ; then - echo "UMASK local setting test passed" - else - echo "${bold}Warning${normal}, UMASK test failed, check local UMASK setting. Requirement is set to 0022" - pass=false - fi - else - echo "${bold}Warning${normal}, UMASK test failed, check local UMASK setting. Requirement is set to 0022" - pass=false - fi - - if ! $pass; then - checkContinue - fi - - rm -f $filename -} - -checkLocalSELINUX() -{ - # SELINUX check - # - echo "" - echo "** Run Local SELINUX check" - echo "" - - pass=true - #check local SELINUX - if [ -f /etc/selinux/config ]; then - `cat /etc/selinux/config | grep SELINUX | grep enforcing > ${tmpDir}/selinux_check 2>&1` - if [ "$?" -eq 0 ]; then - echo "${bold}Warning${normal}, Local Node SELINUX setting is Enabled, check port test results" - pass=false - else - echo "Local Node SELINUX setting is Not Enabled" - fi - else - echo "Local Node SELINUX setting is Not Enabled" - fi - - if ! $pass; then - checkContinue - fi -} - -checkUMASK() -{ - # UMASK check - # - echo "" - echo "** Run UMASK check" - echo "" - - pass=true - - for ipadd in "${NODE_IPADDRESS[@]}"; do - `remote_command.sh $ipadd $PASSWORD 'rm -f UMASKtest;touch UMASKtest;echo $(stat -c "%A" "UMASKtest") > test.log' > ${tmpDir}/remote_command_check 2>&1` - if [ "$?" -eq 0 ]; then - `remote_scp_get.sh $ipadd Calpont1 test.log >> ${tmpDir}/remote_scp_get 2>&1` - if [ "$?" -eq 0 ]; then - permission=`cat test.log` - result=${permission:4:1} - if [ ${result} == "r" ] ; then - result=${permission:7:1} - if [ ${result} == "r" ] ; then - echo "$ipadd Node UMASK setting test passed" - else - echo "${bold}Warning${normal}, $ipadd Node UMASK test failed, check UMASK setting. Requirement is set to 0022" - pass=false - fi - else - echo "${bold}Warning${normal}, $ipadd Node UMASK test failed, check UMASK setting. Requirement is set to 0022" - pass=false - fi - else - echo "${bold}Warning${normal}, $ipadd UMASK test failed, remote_scp_get.sh error, check ${tmpDir}/remote_scp_get" - pass=false - fi - else - echo "${bold}Warning${normal}, $ipadd UMASK test failed, remote_command.sh error, check ${tmpDir}/remote_command_check" - pass=false - fi - `rm -f test.log` - done - - if ! $pass; then - checkContinue - fi - - rm -f $filename -} - -checkSELINUX() -{ - # SELINUX check - # - echo "" - echo "** Run SELINUX check" - echo "" - - pass=true - for ipadd in "${NODE_IPADDRESS[@]}"; do - `remote_scp_get.sh $ipadd $PASSWORD /etc/selinux/config > ${tmpDir}/remote_scp_get_check 2>&1` - if [ "$?" -ne 0 ]; then - echo "$ipadd Node SELINUX setting is Not Enabled" - else - `cat config | grep SELINUX | grep enforcing > ${tmpDir}/selinux_check 2>&1` - if [ "$?" -eq 0 ]; then - echo "${bold}Warning${normal}, $ipadd SELINUX setting is Enabled, check port test results" - pass=false - else - echo "$ipadd Node SELINUX setting is Not Enabled" - fi - `rm -f config` - fi - done - - if ! $pass; then - checkContinue - fi -} - -checkFirewalls() -{ - # FIREWALL checks - # - echo "" - echo "** Run Firewall Services check" - echo "" - - declare -a FIREWALL_LIST=("iptables" "ufw" "firewalld" "firewall") - - #check local FIREWALLS - for firewall in "${FIREWALL_LIST[@]}"; do - pass=true - `service $firewall status > ${tmpDir}/firewall1_check 2>&1` - if [ "$?" -eq 0 ]; then - echo "${bold}Warning${normal}, Local Node $firewall service is Active, check port test results" - pass=false - else - `systemctl status $firewall > ${tmpDir}/firewall1_check 2>&1` - if [ "$?" -eq 0 ]; then - echo "${bold}Warning${normal}, Local Node $firewall service is Active, check port test results" - pass=false - fi - fi - - if $pass ; then - echo "Local Node $firewall service is Not Active" - fi - done - - echo "" - fpass=true - for ipadd in "${NODE_IPADDRESS[@]}"; do - # 'sysconfig not on remote node - for firewall in "${FIREWALL_LIST[@]}"; do - pass=true - `remote_command.sh $ipadd $PASSWORD "service '$firewall' status > ${tmpDir}/firewall_check 2>&1" 1 > ${tmpDir}/remote_command_check` - if [ "$?" -eq 0 ]; then - echo "${bold}Warning${normal}, $ipadd Node $firewall service is Active, check port test results" - pass=false - else - `remote_command.sh $ipadd $PASSWORD "systemctl status '$firewall' > ${tmpDir}/firewall_check 2>&1" 1 > ${tmpDir}/remote_command_check` - if [ "$?" -eq 0 ]; then - echo "${bold}Warning${normal}, $ipadd Node $firewall service is Active, check port test results" - pass=false - fi - fi - - if $pass ; then - echo "$ipadd Node $firewall service is Not Enabled" - fi - done - - echo "" - done - - if [ $OS == "suse12" ]; then - # rcSuSEfirewall2 check - # - echo "" - echo "** Run rcSuSEfirewall2 check" - echo "" - - pass=true - #check local IPTABLES - `/sbin/rcSuSEfirewall2 status > ${tmpDir}/rcSuSEfirewall2_check 2>&1` - if [ "$?" -eq 0 ]; then - echo "${bold}Failed${normal}, Local Node rcSuSEfirewall2 service is Enabled, check port test results" - pass=false - else - echo "Local Node rcSuSEfirewall2 service is Not Enabled" - fi - - for ipadd in "${NODE_IPADDRESS[@]}"; do - `remote_command.sh $ipadd $PASSWORD '/sbin/rcSuSEfirewall2 status > ${tmpDir}/rcSuSEfirewall2_check 2>&1' 1 > ${tmpDir}/remote_command_check` - rc="$?" - if [ $rc -eq 0 ] ; then - echo "${bold}Failed${normal}, $ipadd Node rcSuSEfirewall2 service is Enabled, check port test results" - pass=false - else - echo "$ipadd Node rcSuSEfirewall2 service is Not Enabled" - fi - done - fi -} - -checkPorts() -{ - # port test - # - echo "" - echo "** Run MariaDB ColumnStore Port (8600-8630,8700,8800,3306) availability test" - echo "" - - pass=true - for ipadd in "${NODE_IPADDRESS[@]}"; do - - `nmap $ipadd -p 8600-8630,8700,8800,3306 | grep 'filtered' > ${tmpDir}/port_test` - if [ "$?" -ne 0 ]; then - echo $ipadd " Node Passed port test" - else - echo $ipadd " Node ${bold}Failed${normal} port test, check and disable any firewalls or open ports in firewall" - cat ${tmpDir}/port_test - pass=false - REPORTPASS=false - fi - done - - if ! $pass; then - checkContinue - fi -} - -checkTime() -{ - # Time check - # - echo "" - echo "** Run Date/Time check - Date/Time should be within 10 seconds on all nodes" - echo "" - - pass=true - #get local epoch time - localTime=`date +%s` - for ipadd in "${NODE_IPADDRESS[@]}"; do - `remote_command.sh $ipadd $PASSWORD 'date +%s > time_check' > ${tmpDir}/time_check` - rc="$?" - if [ $rc -ne 0 ] ; then - echo $ipadd " Node ${bold}Failed${normal} date/time check failed, check ${tmpDir}/time_check" - pass=false - REPORTPASS=false - else - `remote_scp_get.sh $ipadd $PASSWORD time_check > ${tmpDir}/remote_scp_get_check 2>&1` - if [ "$?" -ne 0 ]; then - echo "Error running remote_scp_get.sh to $ipadd Node, check ${tmpDir}/remote_scp_get_check" - else - remoteTime=`cat time_check` - timeDiff=`echo "$(($remoteTime-$localTime))"` - range=10 - if [ $timeDiff -gt $range ] || [ $timeDiff -lt -$range ] ; then - echo $ipadd " Node ${bold}Failed${normal}, $ipadd Node date/time is more than 10 seconds away from local node" - pass=false - else - echo "Passed: $ipadd Node date/time is within 10 seconds of local node" - fi - fi - fi - done - `rm -f time_check` - - if ! $pass; then - checkContinue - fi -} - -checkMysqlPassword() -{ - # Locale check - # - echo "" - echo "** Run MariaDB Console Password check" - echo "" - - #get MariaDB password - pass=true - `systemctl start mariadb.service > /dev/null 2>&1` - `mariadb-command-line.sh > /dev/null 2>&1` - if [ "$?" -eq 2 ]; then - echo "${bold}Failed${normal}, Local Node MariaDB login failed with missing password file, /root/.my.cnf" - fi - - if [ "$IPADDRESSES" != "" ]; then - `/bin/cp -f mariadb-command-line.sh ${tmpDir}/.` - - for ipadd in "${NODE_IPADDRESS[@]}"; do - `remote_command.sh $ipadd $PASSWORD systemctl start mariadb.service > /dev/null 2>&1` - `remote_scp_put.sh $ipadd $PASSWORD ${tmpDir}/mariadb-command-line.sh 1 > ${tmpDir}/remote_scp_put_check 2>&1` - if [ "$?" -ne 0 ]; then - echo "Error running remote_scp_put.sh to $ipadd Node, check ${tmpDir}/remote_scp_put_check" - exit 1 - else - `remote_command.sh $ipadd $PASSWORD ${tmpDir}/mariadb-command-line.sh 1 > ${tmpDir}/remote_command_check` - `cat ${tmpDir}/remote_command_check | grep "ERROR - PASSWORD" > /dev/null 2>&1` - if [ "$?" -eq 0 ]; then - echo "${bold}Failed${normal}, $ipadd Node MariaDB login failed with missing password file, /root/.my.cnf" - pass=false - fi - fi - done - fi - - if ! $pass; then - checkContinue - else - echo "Passed, no problems detected with a MariaDB password being set without an associated /root/.my.cnf" - fi -} - -checkPackages() -{ - # - # now check packaging on local and remote nodes - # - - echo "" - echo "** Run MariaDB ColumnStore Dependent Package Check" - echo "" - - declare -a CENTOS_PKG=("expect" "perl" "perl-DBI" "openssl" "zlib" "file" "libaio" "rsync" "jemalloc" "snappy" "net-tools" "numactl-libs") - declare -a CENTOS_PKG_NOT=("mariadb-libs") - - if [ "$OS" == "centos6" ] || [ "$OS" == "centos7" ]; then - if [ ! `which yum 2>/dev/null` ] ; then - echo "${bold}Failed${normal}, Local Node ${bold}yum${normal} package not installed" - pass=false - REPORTPASS=false - else - pass=true - #check centos packages on local node - for PKG in "${CENTOS_PKG[@]}"; do - if [ $OS == "centos6" ] && [ "$PKG" == "boost" ]; then - `ls /usr/lib/libboost_regex.so > /dev/null 2>&1` - if [ "$?" -ne 0 ]; then - echo "${bold}Failed${normal}, Local Node ${bold}boost libraries${normal} not installed" - pass=false - REPORTPASS=false - fi - else - `yum list installed "$PKG" > ${tmpDir}/pkg_check 2>&1` - `cat ${tmpDir}/pkg_check | grep Installed > /dev/null 2>&1` - if [ "$?" -ne 0 ]; then - echo "${bold}Failed${normal}, Local Node package ${bold}${PKG}${normal} is not installed, please install" - pass=false - REPORTPASS=false - fi - fi - done - fi - - if [ $pass == true ] ; then - echo "Local Node - Passed, all dependency packages are installed" - else - checkContinue - fi - - #check for package that shouldnt be installed - pass=true - for PKG in "${CENTOS_PKG_NOT[@]}"; do - `yum list installed "$PKG" > ${tmpDir}/pkg_check 2>&1` - `cat ${tmpDir}/pkg_check | grep Installed > /dev/null 2>&1` - if [ "$?" -eq 0 ]; then - echo "${bold}Failed${normal}, Local Node package ${bold}${PKG}${normal} is installed, please un-install" - pass=false - REPORTPASS=false - fi - done - - if [ $pass == true ] ; then - echo "Local Node - Passed, all packages that should not be installed aren't installed" - else - checkContinue - fi - - echo "" - pass=true - if [ "$IPADDRESSES" != "" ]; then - for ipadd in "${NODE_IPADDRESS[@]}"; do - for PKG in "${CENTOS_PKG[@]}"; do - if [ $OS == "centos6" ] && [ $PKG == "boost" ]; then - `remote_command.sh $ipadd $PASSWORD 'ls /usr/lib/libboost_regex.so > /dev/null 2>&1' 1 > ${tmpDir}/remote_command_check 2>&1` - if [ $? -ne 0 ] ; then - echo "${bold}Failed${normal}, $ipadd Node ${bold}boost libraries${normal} not installed" - pass=false - REPORTPASS=false - fi - else - `remote_command.sh $ipadd $PASSWORD "yum list installed '$PKG' > ${tmpDir}/pkg_check 2>&1" 1 > ${tmpDir}/remote_command_check 2>&1` - rc="$?" - if [ $rc -eq 2 ] ; then - echo "${bold}Failed${normal}, $ipadd Node, 'yum' not installed" - pass=false - REPORTPASS=false - break - elif [ $rc -eq 1 ] ; then - echo "${bold}Failed${normal}, $ipadd Node package ${bold}${PKG}${normal} is not installed, please install" - pass=false - REPORTPASS=false - fi - fi - done - - if $pass; then - echo "$ipadd Node - Passed, all dependency packages are installed" - else - checkContinue - pass=true - fi - echo "" - - #check for package that shouldnt be installed - for PKG in "${CENTOS_PKG_NOT[@]}"; do - `remote_command.sh $ipadd $PASSWORD "yum list installed '$PKG' > ${tmpDir}/pkg_check 2>&1" 1 > ${tmpDir}/remote_command_check 2>&1` - rc="$?" - if [ $rc -eq 2 ] ; then - echo "${bold}Failed${normal}, $ipadd Node, 'yum' not installed" - pass=false - REPORTPASS=false - break - elif [ $rc -ne 1 ] ; then - echo "${bold}Failed${normal}, $ipadd Node package ${bold}${PKG}${normal} is installed, please un-install" - pass=false - REPORTPASS=false - fi - done - - if $pass; then - echo "$ipadd Node - Passed, all packages that should not be installed aren't installed" - else - checkContinue - pass=true - fi - echo "" - - done - fi - fi - - declare -a SUSE_PKG=("boost-devel" "expect" "perl" "perl-DBI" "openssl" "file" "libaio1" "rsync" "jemalloc" "libsnappy1" "net-tools" "libnuma1") - declare -a SUSE_PKG_NOT=("mariadb" , "libmariadb18") - - if [ "$OS" == "suse12" ]; then - if [ ! `which rpm 2>/dev/null` ] ; then - echo "${bold}Failed${normal}, Local Node ${bold}rpm${normal} package not installed" - pass=false - REPORTPASS=false - else - pass=true - #check centos packages on local node - for PKG in "${SUSE_PKG[@]}"; do - `rpm -qi "$PKG" > ${tmpDir}/pkg_check 2>&1` - `cat ${tmpDir}/pkg_check | grep "not installed" > /dev/null 2>&1` - if [ "$?" -eq 0 ]; then - echo "${bold}Failed${normal}, Local Node package ${bold}${PKG}${normal} is not installed, please install" - pass=false - REPORTPASS=false - fi - done - - if $pass; then - echo "Local Node - Passed, all dependency packages are installed" - else - checkContinue - fi - - #check for package that shouldnt be installed - pass=true - for PKG in "${SUSE_PKG_NOT[@]}"; do - `rpm -qi "$PKG" > ${tmpDir}/pkg_check 2>&1` - `cat ${tmpDir}/pkg_check | grep "not installed" > /dev/null 2>&1` - if [ "$?" -ne 0 ]; then - echo "${bold}Failed${normal}, Local Node package ${bold}${PKG}${normal} is installed, please un-install" - pass=false - REPORTPASS=false - fi - done - - if $pass; then - echo "Local Node - Passed, all packages that should not be installed aren't installed" - else - checkContinue - fi - fi - - echo "" - pass=true - if [ "$IPADDRESSES" != "" ]; then - for ipadd in "${NODE_IPADDRESS[@]}"; do - for PKG in "${SUSE_PKG[@]}"; do - `remote_command.sh $ipadd $PASSWORD "rpm -qi '$PKG' > ${tmpDir}/pkg_check 2>&1" 1 > ${tmpDir}/remote_command_check 2>&1` - rc="$?" - if [ $rc -ne 0 ] ; then - echo "${bold}Failed${normal}, $ipadd Node package ${bold}${PKG}${normal} is not installed, please install" - pass=false - REPORTPASS=false - fi - done - - if $pass; then - echo "$ipadd Node - Passed, all dependency packages are installed" - else - checkContinue - pass=true - fi - echo "" - - #check for package that shouldnt be installed - for PKG in "${SUSE_PKG_NOT[@]}"; do - `remote_command.sh $ipadd $PASSWORD "rpm -qi '$PKG' > ${tmpDir}/pkg_check 2>&1" 1 > ${tmpDir}/remote_command_check 2>&1` - rc="$?" - if [ $rc -eq 0 ] ; then - echo "${bold}Failed${normal}, $ipadd Node package ${bold}${PKG}${normal} is installed, please un-install" - pass=false - REPORTPASS=false - fi - done - - if $pass; then - echo "$ipadd Node - Passed, all packages that should not be installed aren't installed" - else - checkContinue - pass=true - fi - echo "" - - done - fi - fi - - declare -a UBUNTU_PKG=("libboost-all-dev" "expect" "libdbi-perl" "perl" "openssl" "file" "libreadline-dev" "rsync" "libjemalloc1" "libsnappy1V5" "net-tools" "libnuma1") - declare -a UBUNTU_PKG_NOT=("mariadb-server" "libmariadb18") - - if [ "$OS" == "ubuntu16" ] || [ "$OS" == "ubuntu18" ]; then - if [ ! `which dpkg 2>/dev/null` ] ; then - echo "${bold}Failed${normal}, Local Node ${bold}rpm${normal} package not installed" - pass=false - REPORTPASS=false - else - pass=true - #check centos packages on local node - for PKG in "${UBUNTU_PKG[@]}"; do - `dpkg -s "$PKG" > ${tmpDir}/pkg_check 2>&1` - `cat ${tmpDir}/pkg_check | grep 'install ok installed' > /dev/null 2>&1` - if [ "$?" -ne 0 ]; then - echo "${bold}Failed${normal}, Local Node package ${bold}${PKG}${normal} is not installed, please install" - pass=false - REPORTPASS=false - fi - done - - if $pass; then - echo "Local Node - Passed, all dependency packages are installed" - else - checkContinue - fi - - #check for package that shouldnt be installed - pass=true - for PKG in "${UBUNTU_PKG_NOT[@]}"; do - `dpkg -s "$PKG" > ${tmpDir}/pkg_check 2>&1` - `cat ${tmpDir}/pkg_check | grep 'install ok installed' > /dev/null 2>&1` - if [ "$?" -eq 0 ]; then - echo "${bold}Failed${normal}, Local Node package ${bold}${PKG}${normal} is installed, please un-install" - pass=false - REPORTPASS=false - fi - done - - if $pass; then - echo "Local Node - Passed, all packages that should not be installed aren't installed" - else - checkContinue - fi - fi - - echo "" - pass=true - if [ "$IPADDRESSES" != "" ]; then - for ipadd in "${NODE_IPADDRESS[@]}"; do - for PKG in "${UBUNTU_PKG[@]}"; do - `remote_command.sh $ipadd $PASSWORD "dpkg -s '$PKG' > ${tmpDir}/pkg_check 2>&1" 1 > ${tmpDir}/remote_command_check 2>&1` - `remote_scp_get.sh $ipadd $PASSWORD ${tmpDir}/pkg_check > ${tmpDir}/remote_scp_get_check 2>&1` - if [ "$?" -ne 0 ]; then - echo "Error running remote_scp_get.sh to $ipadd Node, check ${tmpDir}/remote_scp_get_check" - else - `cat ${tmpDir}/remote_command_check | grep 'command not found' > /dev/null 2>&1` - if [ "$?" -eq 0 ]; then - echo "${bold}Failed${normal}, $ipadd Node ${bold}dpkg${normal} package not installed" - pass=false - break - else - `cat pkg_check | grep 'install ok installed' > /dev/null 2>&1` - if [ "$?" -ne 0 ]; then - echo "${bold}Failed${normal}, $ipadd Node package ${bold}${PKG}${normal} is not installed, please install" - pass=false - fi - - `rm -f pkg_check` - fi - fi - done - - if $pass; then - echo "$ipadd Node - Passed, all dependency packages are installed" - else - checkContinue - pass=true - fi - echo "" - - #check for package that shouldnt be installed - for PKG in "${UBUNTU_PKG_NOT[@]}"; do - `remote_command.sh $ipadd $PASSWORD "dpkg -s '$PKG' > ${tmpDir}/pkg_check 2>&1" 1 > ${tmpDir}/remote_command_check 2>&1` - `remote_scp_get.sh $ipadd $PASSWORD ${tmpDir}/pkg_check > ${tmpDir}/remote_scp_get_check 2>&1` - if [ "$?" -ne 0 ]; then - echo "Error running remote_scp_get.sh to $ipadd Node, check ${tmpDir}/remote_scp_get_check" - else - `cat ${tmpDir}/remote_command_check | grep 'command not found' > /dev/null 2>&1` - if [ "$?" -eq 0 ]; then - echo "${bold}Failed${normal}, $ipadd Node ${bold}dpkg${normal} package not installed" - pass=false - break - else - `cat pkg_check | grep 'install ok installed' > /dev/null 2>&1` - if [ "$?" -eq 0 ]; then - echo "${bold}Failed${normal}, $ipadd Node package ${bold}${PKG}${normal} is installed, please un-install" - pass=false - fi - - `rm -f pkg_check` - fi - fi - done - - if $pass; then - echo "$ipadd Node - Passed, all packages that should not be installed aren't installed" - else - checkContinue - pass=true - fi - echo "" - - done - fi - fi - - declare -a DEBIAN_PKG=("libboost-all-dev" "expect" "libdbi-perl" "perl" "openssl" "file" "libreadline-dev" "rsync" "libjemalloc1" "libsnappy1" "net-tools" "libnuma1") - declare -a DEBIAN_PKG_NOT=("libmariadb18" "mariadb-server") - - if [ "$OS" == "debian8" ]; then - if [ ! `which dpkg 2>/dev/null` ] ; then - echo "${bold}Failed${normal}, Local Node ${bold}rpm${normal} package not installed" - pass=false - REPORTPASS=false - else - pass=true - #check centos packages on local node - for PKG in "${DEBIAN_PKG[@]}"; do - `dpkg -s "$PKG" > ${tmpDir}/pkg_check 2>&1` - `cat ${tmpDir}/pkg_check | grep 'install ok installed' > /dev/null 2>&1` - if [ "$?" -ne 0 ]; then - echo "${bold}Failed${normal}, Local Node package ${bold}${PKG}${normal} is not installed, please install" - pass=false - REPORTPASS=false - fi - done - - if $pass; then - echo "Local Node - Passed, all dependency packages are installed" - else - checkContinue - fi - - #check for package that shouldnt be installed - pass=true - for PKG in "${DEBIAN_PKG_NOT[@]}"; do - `dpkg -s "$PKG" > ${tmpDir}/pkg_check 2>&1` - `cat ${tmpDir}/pkg_check | grep 'install ok installed' > /dev/null 2>&1` - if [ "$?" -eq 0 ]; then - echo "${bold}Failed${normal}, Local Node package ${bold}${PKG}${normal} is installed, please un-install" - pass=false - REPORTPASS=false - fi - done - - if $pass; then - echo "Local Node - Passed, all packages that should not be installed aren't installed" - else - checkContinue - fi - fi - - echo "" - pass=true - if [ "$IPADDRESSES" != "" ]; then - for ipadd in "${NODE_IPADDRESS[@]}"; do - for PKG in "${DEBIAN_PKG[@]}"; do - `remote_command.sh $ipadd $PASSWORD "dpkg -s '$PKG' > ${tmpDir}/pkg_check 2>&1" 1 > ${tmpDir}/remote_command_check 2>&1` - `remote_scp_get.sh $ipadd $PASSWORD ${tmpDir}/pkg_check > ${tmpDir}/remote_scp_get_check 2>&1` - if [ "$?" -ne 0 ]; then - echo "Error running remote_scp_get.sh to $ipadd Node, check ${tmpDir}/remote_scp_get_check" - else - `cat ${tmpDir}/remote_command_check | grep 'command not found' > /dev/null 2>&1` - if [ "$?" -eq 0 ]; then - echo "${bold}Failed${normal}, $ipadd Node ${bold}dpkg${normal} package not installed" - pass=false - break - else - `cat pkg_check | grep 'install ok installed' > /dev/null 2>&1` - if [ "$?" -ne 0 ]; then - echo "${bold}Failed${normal}, $ipadd Node package ${bold}${PKG}${normal} is not installed, please install" - pass=false - fi - - `rm -f pkg_check` - fi - fi - done - - if $pass; then - echo "$ipadd Node - Passed, all dependency packages are installed" - else - checkContinue - pass=true - fi - echo "" - - #check for package that shouldnt be installed - for PKG in "${DEBIAN_PKG_NOT[@]}"; do - `remote_command.sh $ipadd $PASSWORD "dpkg -s '$PKG' > ${tmpDir}/pkg_check 2>&1" 1 > ${tmpDir}/remote_command_check 2>&1` - `remote_scp_get.sh $ipadd $PASSWORD ${tmpDir}/pkg_check > ${tmpDir}/remote_scp_get_check 2>&1` - if [ "$?" -ne 0 ]; then - echo "Error running remote_scp_get.sh to $ipadd Node, check ${tmpDir}/remote_scp_get_check" - else - `cat ${tmpDir}/remote_command_check | grep 'command not found' > /dev/null 2>&1` - if [ "$?" -eq 0 ]; then - echo "${bold}Failed${normal}, $ipadd Node ${bold}dpkg${normal} package not installed" - pass=false - break - else - `cat pkg_check | grep 'install ok installed' > /dev/null 2>&1` - if [ "$?" -eq 0 ]; then - echo "${bold}Failed${normal}, $ipadd Node package ${bold}${PKG}${normal} is installed, please un-install" - pass=false - fi - - `rm -f pkg_check` - fi - fi - done - - if $pass; then - echo "$ipadd Node - Passed, all packages that should not be installed aren't installed" - else - checkContinue - pass=true - fi - echo "" - - done - fi - fi - - declare -a DEBIAN9_PKG=("libboost-all-dev" "expect" "libdbi-perl" "perl" "openssl" "file" "libreadline5" "rsync" "libjemalloc1" "libsnappy1V5" "net-tools" "libaio1") - declare -a DEBIAN9_PKG_NOT=("libmariadb18" "mariadb-server") - - if [ "$OS" == "debian9" ]; then - if [ ! `which dpkg 2>/dev/null` ] ; then - echo "${bold}Failed${normal}, Local Node ${bold}rpm${normal} package not installed" - pass=false - REPORTPASS=false - else - pass=true - #check centos packages on local node - for PKG in "${DEBIAN9_PKG[@]}"; do - `dpkg -s "$PKG" > ${tmpDir}/pkg_check 2>&1` - `cat ${tmpDir}/pkg_check | grep 'install ok installed' > /dev/null 2>&1` - if [ "$?" -ne 0 ]; then - echo "${bold}Failed${normal}, Local Node package ${bold}${PKG}${normal} is not installed, please install" - pass=false - REPORTPASS=false - fi - done - - if $pass; then - echo "Local Node - Passed, all dependency packages are installed" - else - checkContinue - fi - - #check for package that shouldnt be installed - pass=true - for PKG in "${DEBIAN9_PKG_NOT[@]}"; do - `dpkg -s "$PKG" > ${tmpDir}/pkg_check 2>&1` - `cat ${tmpDir}/pkg_check | grep 'install ok installed' > /dev/null 2>&1` - if [ "$?" -eq 0 ]; then - echo "${bold}Failed${normal}, Local Node package ${bold}${PKG}${normal} is installed, please un-install" - pass=false - REPORTPASS=false - fi - done - - if $pass; then - echo "Local Node - Passed, all packages that should not be installed aren't installed" - else - checkContinue - fi - - fi - - echo "" - pass=true - if [ "$IPADDRESSES" != "" ]; then - for ipadd in "${NODE_IPADDRESS[@]}"; do - for PKG in "${DEBIAN9_PKG[@]}"; do - `remote_command.sh $ipadd $PASSWORD "dpkg -s '$PKG' > ${tmpDir}/pkg_check 2>&1" 1 > ${tmpDir}/remote_command_check 2>&1` - `remote_scp_get.sh $ipadd $PASSWORD ${tmpDir}/pkg_check > ${tmpDir}/remote_scp_get_check 2>&1` - if [ "$?" -ne 0 ]; then - echo "Error running remote_scp_get.sh to $ipadd Node, check ${tmpDir}/remote_scp_get_check" - else - `cat ${tmpDir}/remote_command_check | grep 'command not found' > /dev/null 2>&1` - if [ "$?" -eq 0 ]; then - echo "${bold}Failed${normal}, $ipadd Node ${bold}dpkg${normal} package not installed" - pass=false - break - else - `cat pkg_check | grep 'install ok installed' > /dev/null 2>&1` - if [ "$?" -ne 0 ]; then - echo "${bold}Failed${normal}, $ipadd Node package ${bold}${PKG}${normal} is not installed, please install" - pass=false - fi - - `rm -f pkg_check` - fi - fi - done - - if $pass; then - echo "$ipadd Node - Passed, all dependency packages are installed" - else - checkContinue - pass=true - fi - echo "" - - #check for package that shouldnt be installed - for PKG in "${DEBIAN9_PKG_NOT[@]}"; do - `remote_command.sh $ipadd $PASSWORD "dpkg -s '$PKG' > ${tmpDir}/pkg_check 2>&1" 1 > ${tmpDir}/remote_command_check 2>&1` - `remote_scp_get.sh $ipadd $PASSWORD ${tmpDir}/pkg_check > ${tmpDir}/remote_scp_get_check 2>&1` - if [ "$?" -ne 0 ]; then - echo "Error running remote_scp_get.sh to $ipadd Node, check ${tmpDir}/remote_scp_get_check" - else - `cat ${tmpDir}/remote_command_check | grep 'command not found' > /dev/null 2>&1` - if [ "$?" -eq 0 ]; then - echo "${bold}Failed${normal}, $ipadd Node ${bold}dpkg${normal} package not installed" - pass=false - break - else - `cat pkg_check | grep 'install ok installed' > /dev/null 2>&1` - if [ "$?" -eq 0 ]; then - echo "${bold}Failed${normal}, $ipadd Node package ${bold}${PKG}${normal} is installed, please un-install" - pass=false - fi - - `rm -f pkg_check` - fi - fi - done - - if $pass; then - echo "$ipadd Node - Passed, all packages that should not be installed aren't installed" - else - checkContinue - pass=true - fi - echo "" - - done - fi - fi - - -} - -echo "" -echo "*** This is the MariaDB Columnstore Cluster System Test Tool ***" -echo "" - -checkLocalOS -checkLocalDir -checkLocalUMASK -checkLocalSELINUX -if [ "$IPADDRESSES" != "" ]; then - checkPing - checkSSH - checkRemoteDir - checkOS - checkLocale - checkUMASK - checkSELINUX - checkFirewalls - checkPorts - checkTime -fi - -checkMysqlPassword -checkPackages - -if [ $REPORTPASS == true ] ; then - echo "" - echo "*** Finished Validation of the Cluster, all Tests Passed ***" - echo "" - exit 0 -else - echo "" - echo "*** Finished Validation of the Cluster, Failures occurred. Check for Error/Failed test results ***" - echo "" - exit 1 -fi - diff --git a/utils/clusterTester/os_detect.sh b/utils/clusterTester/os_detect.sh deleted file mode 100755 index be69e870e..000000000 --- a/utils/clusterTester/os_detect.sh +++ /dev/null @@ -1,40 +0,0 @@ -#!/bin/sh -# -detectOS () { - checkFile1=/etc/os-release - checkFile2=/etc/centos-release - if [ -f "$checkFile1" ] - then - osPrettyName=`cat $checkFile1 | grep PRETTY_NAME|awk -F"=" '{print $2}'` - osVersionID=`cat $checkFile1 | grep VERSION_ID | awk -F"=" '{print $2}' | awk -F"." '{print $1}' | sed 's/"//g'` - else - osPrettyName=`head -n 1 $checkFile2` - osVersionID=`echo $osPrettyName | awk -F" " '{print $3}' | awk -F"." '{print $1}'` - fi -# - osName=`echo $osPrettyName | awk -F" " '{print $1}' | sed 's/"//g'` - if [ -z "$osPrettyName" ] - then - osPrettyName=`uname -o -s -r -v` - fi - if [ -z "$osName" ] || [ -z "$osVersionID" ] - then - osTag="" - else - osTag=`echo $osName$osVersionID | awk '{print tolower($0)}'` - fi -} -# - detectOS - echo Operating System name: $osPrettyName - echo Operating System tag: $osTag - case "$osTag" in - centos6|centos7|ubuntu16|debian8|suse12|debian9|ubuntu18) - ;; - *) - echo OS not supported - exit 1 - ;; - esac - - exit 0 diff --git a/versioning/BRM/masterdbrmnode.cpp b/versioning/BRM/masterdbrmnode.cpp index c4328a22e..580b108e3 100644 --- a/versioning/BRM/masterdbrmnode.cpp +++ b/versioning/BRM/masterdbrmnode.cpp @@ -27,8 +27,6 @@ #include "sessionmanager.h" #include "socketclosed.h" -#include "alarmglobal.h" -#include "alarmmanager.h" #include "liboamcpp.h" #include "stopwatch.h" #include "masterdbrmnode.h" @@ -69,24 +67,6 @@ for (it = responses.begin(); it != responses.end(); it++) \ delete *it; } -#if 1 -#define SEND_ALARM \ - try { \ - alarmmanager::ALARMManager alarmMgr; \ - alarmMgr.sendAlarmReport("System", oam::DBRM_READ_ONLY, alarmmanager::SET); \ - } \ - catch (...) { } -#define CLEAR_ALARM \ - try { \ - alarmmanager::ALARMManager alarmMgr; \ - alarmMgr.sendAlarmReport("System", oam::DBRM_READ_ONLY, alarmmanager::CLEAR); \ - } \ - catch (...) { } -#else -#define SEND_ALARM -#define CLEAR_ALARM -#endif - using namespace std; using namespace messageqcpp; using namespace logging; @@ -302,8 +282,6 @@ void MasterDBRMNode::run() IOSocket* s; boost::thread* reader; - CLEAR_ALARM - while (!die) { s = new IOSocket(); @@ -648,7 +626,6 @@ retrycmd: if (readOnly) { - SEND_ALARM slaveLock.unlock(); sendError(p->sock, ERR_READONLY); goto out; @@ -762,7 +739,6 @@ retrycmd: { if (!halting) { - SEND_ALARM undo(); readOnly = true; slaveLock.unlock(); @@ -850,7 +826,6 @@ retrycmd: { if (!halting) { - SEND_ALARM ostringstream ostr; ostr << "DBRM Controller: Caught network error. " "Confirming command " << (uint32_t)cmd << @@ -1034,8 +1009,6 @@ int MasterDBRMNode::compareResponses(uint8_t cmd, } catch (exception&) { - SEND_ALARM - readOnly = true; ostringstream os; os << "DBRM Controller: Network error on node 1. " @@ -1055,8 +1028,6 @@ int MasterDBRMNode::compareResponses(uint8_t cmd, for (it = responses.begin(), it2 = it + 1, i = 2; it2 != responses.end(); it++, it2++, i++) if (**it != **it2 && !halting) { - SEND_ALARM - ostringstream ostr; #ifdef BRM_VERBOSE cerr << "DBRM Controller: error: response from node " << i << " is different" << endl; diff --git a/versioning/BRM/masternode.cpp b/versioning/BRM/masternode.cpp index 7b85ae1b5..ae01a922d 100644 --- a/versioning/BRM/masternode.cpp +++ b/versioning/BRM/masternode.cpp @@ -48,21 +48,6 @@ bool die= false; using namespace std; using namespace BRM; -void fail() -{ - try - { - oam::Oam oam; - - oam.processInitFailure(); - } - catch (exception&) - { - cerr << "failed to notify OAM of server failure" << endl; - } -} - - class Opt { protected: @@ -89,7 +74,6 @@ public: { perror(m_progname); log_errno(std::string(m_progname)); - fail(); } void ParentLogChildMessage(const std::string &str) override { @@ -189,22 +173,6 @@ int ServiceControllerNode::Child() NotifyServiceStarted(); - try - { - oam::Oam oam; - - oam.processInitComplete("DBRMControllerNode"); - } - catch (exception& e) - { - ostringstream os; - - os << "failed to notify OAM: " << e.what(); - os << " continuing anyway"; - cerr << os.str() << endl; - log(os.str(), logging::LOG_TYPE_WARNING); - } - m->run(); retries = 0; delete m; @@ -227,7 +195,6 @@ int ServiceControllerNode::Child() { NotifyServiceInitializationFailed(); log(string("Exiting after too many errors")); - fail(); return 1; } diff --git a/versioning/BRM/slavenode.cpp b/versioning/BRM/slavenode.cpp index b8db2c3ae..2c4bb528f 100644 --- a/versioning/BRM/slavenode.cpp +++ b/versioning/BRM/slavenode.cpp @@ -49,21 +49,6 @@ SlaveComm* comm; bool die= false; boost::thread_group monitorThreads; -void fail() -{ - try - { - oam::Oam oam; - - oam.processInitFailure(); - } - catch (exception&) - { - cerr << "failed to notify OAM of server failure" << endl; - } -} - - class Opt { protected: @@ -92,7 +77,6 @@ public: { perror(m_progname); log_errno(std::string(m_progname)); - fail(); } void ParentLogChildMessage(const std::string &str) override { @@ -160,7 +144,6 @@ int ServiceWorkerNode::Child() os << "An error occured: " << e.what(); cerr << os.str() << endl; log(os.str()); - fail(); NotifyServiceInitializationFailed(); return 1; } @@ -175,21 +158,6 @@ int ServiceWorkerNode::Child() monitorThreads.create_thread(RWLockMonitor (&die, slave.getVSSLockStatus(), keys.KEYRANGE_VSS_BASE)); - try - { - oam::Oam oam; - - oam.processInitComplete("DBRMWorkerNode"); - } - catch (exception& e) - { - ostringstream os; - os << "failed to notify OAM: " << e.what(); - os << " continuing anyway"; - cerr << os.str() << endl; - log(os.str(), logging::LOG_TYPE_WARNING); - } - try { comm->run(); @@ -228,7 +196,6 @@ int main(int argc, char** argv) os << "Usage: " << argv[0] << " DBRM_WorkerN"; cerr << os.str() << endl; log(os.str()); - fail(); exit(1); } diff --git a/writeengine/client/we_clients.cpp b/writeengine/client/we_clients.cpp index 92b7c7f9c..6f58065f9 100644 --- a/writeengine/client/we_clients.cpp +++ b/writeengine/client/we_clients.cpp @@ -52,8 +52,6 @@ using namespace config; using namespace logging; #include "liboamcpp.h" -#include "alarmmanager.h" -using namespace alarmmanager; using namespace oam; #include "we_clients.h" @@ -415,13 +413,6 @@ Error: itor++; } - - // send alarm - ALARMManager alarmMgr; -// string alarmItem = sin_addr2String(client->serv_addr().sin_addr); - string alarmItem = client->addr2String(); - alarmItem.append(" WriteEngineServer"); - alarmMgr.sendAlarmReport(alarmItem.c_str(), oam::CONN_FAILURE, SET); } return; } diff --git a/writeengine/server/we_server.cpp b/writeengine/server/we_server.cpp index f3233369a..41ab037dd 100644 --- a/writeengine/server/we_server.cpp +++ b/writeengine/server/we_server.cpp @@ -187,21 +187,6 @@ void ServiceWriteEngine::setupChildSignalHandlers() int ServiceWriteEngine::Child() { - - //set BUSY_INIT state - { - // Is there a reason to have a seperate Oam instance for this? - Oam oam; - - try - { - oam.processInitComplete("WriteEngineServer", oam::BUSY_INIT); - } - catch (...) - { - } - } - setupChildSignalHandlers(); // Init WriteEngine Wrapper (including Config Columnstore.xml cache) @@ -246,16 +231,6 @@ int ServiceWriteEngine::Child() } else { - Oam oam; - - try // Get out of BUSYINIT state; else OAM will not retry - { - oam.processInitComplete("WriteEngineServer"); - } - catch (...) - { - } - // If/when a common logging class or function is added to the // WriteEngineServer, we should use that. In the mean time, // I will log this errmsg with inline calls to the logging. @@ -310,14 +285,6 @@ int ServiceWriteEngine::Child() ml.logCriticalMessage( message ); cerr << errMsg << endl; - try - { - oam.processInitFailure(); - } - catch (...) - { - } - NotifyServiceInitializationFailed(); return 2; } @@ -328,18 +295,6 @@ int ServiceWriteEngine::Child() size_t qs = mt * 100; ThreadPool tp(mt, qs); - //set ACTIVE state - { - Oam oam; - - try - { - oam.processInitComplete("WriteEngineServer", ACTIVE); - } - catch (...) - { - } - } cout << "WriteEngineServer is ready" << endl; NotifyServiceStarted(); diff --git a/writeengine/splitter/we_sdhandler.cpp b/writeengine/splitter/we_sdhandler.cpp index 5a830beed..a87d3389f 100644 --- a/writeengine/splitter/we_sdhandler.cpp +++ b/writeengine/splitter/we_sdhandler.cpp @@ -49,9 +49,6 @@ using namespace config; //----- -#include "alarmmanager.h" -using namespace alarmmanager; - #include "messagequeue.h" #include "bytestream.h" using namespace messageqcpp; diff --git a/writeengine/splitter/we_splclient.cpp b/writeengine/splitter/we_splclient.cpp index 5f7b096c6..bd51a042c 100644 --- a/writeengine/splitter/we_splclient.cpp +++ b/writeengine/splitter/we_splclient.cpp @@ -50,9 +50,6 @@ using namespace messageqcpp; #include "liboamcpp.h" using namespace oam; -#include "alarmmanager.h" -using namespace alarmmanager; - #include "we_sdhandler.h" #include "we_splclient.h" @@ -428,12 +425,12 @@ void WESplClient::onDisconnect() try { - // send alarm - ALARMManager alarmMgr; + // BT Should log this probably instead + // ALARMManager alarmMgr; //std::string alarmItem = sin_addr2String(fClnt->serv_addr().sin_addr); std::string alarmItem = fClnt->addr2String(); alarmItem.append(" WriteEngineServer"); - alarmMgr.sendAlarmReport(alarmItem.c_str(), oam::CONN_FAILURE, SET); + //alarmMgr.sendAlarmReport(alarmItem.c_str(), oam::CONN_FAILURE, SET); } catch (...) {