diff --git a/VERSION b/VERSION index e737f4599..91d2f876b 100644 --- a/VERSION +++ b/VERSION @@ -1,4 +1,4 @@ COLUMNSTORE_VERSION_MAJOR=1 COLUMNSTORE_VERSION_MINOR=0 COLUMNSTORE_VERSION_PATCH=10 -COLUMNSTORE_VERSION_RELEASE=1 +COLUMNSTORE_VERSION_RELEASE=2 diff --git a/oam/oamcpp/liboamcpp.cpp b/oam/oamcpp/liboamcpp.cpp index b7384d638..aa8dd4804 100644 --- a/oam/oamcpp/liboamcpp.cpp +++ b/oam/oamcpp/liboamcpp.cpp @@ -2536,25 +2536,48 @@ namespace oam Oam::getAlarmConfig(alarmid, name, returnValue); - // only allow user to change these levels - if ( name != "Threshold" && - name != "Occurrences" && - name != "LastIssueTime" ) + // only allow user to change these levels + if ( name != "Threshold" && + name != "Occurrences" && + name != "LastIssueTime" ) exceptionControl("setAlarmConfig", API_READONLY_PARAMETER); + string fileName = AlarmConfigFile; + + int fd = open(fileName.c_str(), O_RDWR|O_CREAT, 0644); + + // Aquire an exclusive lock + if (flock(fd,LOCK_EX) == -1) { + throw runtime_error ("Lock file error: " + fileName); + } + // write parameter to disk Config* alaConfig = Config::makeConfig(AlarmConfigFile.c_str()); alaConfig->setConfig(Section, name, value); - try + + try + { + alaConfig->write(); + } + catch(...) + { + // Release lock + if (flock(fd,LOCK_UN)==-1) { - alaConfig->write(); - } - catch(...) - { - exceptionControl("setAlarmConfig", API_FAILURE); + throw runtime_error ("Release lock file error: " + fileName); } + exceptionControl("setAlarmConfig", API_FAILURE); + } + + // Release lock + if (flock(fd,LOCK_UN)==-1) + { + throw runtime_error ("Release lock file error: " + fileName); + } + + close(fd); } /******************************************************************** @@ -6967,7 +6990,7 @@ namespace oam //current amazon max dbroot id support = 190; string PMdeviceName = "/dev/sd"; - string deviceLetter[] = {"g","h","i","j","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z","end"}; + string deviceLetter[] = {"g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z","end"}; /*************************************************************************** * diff --git a/oam/oamcpp/liboamcpp.h b/oam/oamcpp/liboamcpp.h index 5f4d7f7a7..78f24f6ed 100644 --- a/oam/oamcpp/liboamcpp.h +++ b/oam/oamcpp/liboamcpp.h @@ -38,6 +38,7 @@ #include #endif #include +#include #include "bytestream.h" #include "configcpp.h" diff --git a/oamapps/serverMonitor/cpuMonitor.cpp b/oamapps/serverMonitor/cpuMonitor.cpp index f469abac4..a03d88433 100644 --- a/oamapps/serverMonitor/cpuMonitor.cpp +++ b/oamapps/serverMonitor/cpuMonitor.cpp @@ -333,26 +333,26 @@ void ServerMonitor::checkCPUAlarm(string alarmItem, ALARMS alarmID) switch (alarmID) { case ALARM_NONE: // clear all alarms set if any found - if ( oam.checkActiveAlarm(CPU_USAGE_HIGH, serverName, alarmItem) ) + if ( serverMonitor.checkActiveAlarm(CPU_USAGE_HIGH, serverName, alarmItem) ) // alarm set, clear it clearAlarm(alarmItem, CPU_USAGE_HIGH); - if ( oam.checkActiveAlarm(CPU_USAGE_MED, serverName, alarmItem) ) + if ( serverMonitor.checkActiveAlarm(CPU_USAGE_MED, serverName, alarmItem) ) // alarm set, clear it clearAlarm(alarmItem, CPU_USAGE_MED); - if ( oam.checkActiveAlarm(CPU_USAGE_LOW, serverName, alarmItem) ) + if ( serverMonitor.checkActiveAlarm(CPU_USAGE_LOW, serverName, alarmItem) ) // alarm set, clear it clearAlarm(alarmItem, CPU_USAGE_LOW); break; case CPU_USAGE_LOW: // clear high and medium alarms set if any found - if ( oam.checkActiveAlarm(CPU_USAGE_HIGH, serverName, alarmItem) ) + if ( serverMonitor.checkActiveAlarm(CPU_USAGE_HIGH, serverName, alarmItem) ) // alarm set, clear it clearAlarm(alarmItem, CPU_USAGE_HIGH); - if ( oam.checkActiveAlarm(CPU_USAGE_MED, serverName, alarmItem) ) + if ( serverMonitor.checkActiveAlarm(CPU_USAGE_MED, serverName, alarmItem) ) // alarm set, clear it clearAlarm(alarmItem, CPU_USAGE_MED); break; case CPU_USAGE_MED: // clear high alarms set if any found - if ( oam.checkActiveAlarm(CPU_USAGE_HIGH, serverName, alarmItem) ) + if ( serverMonitor.checkActiveAlarm(CPU_USAGE_HIGH, serverName, alarmItem) ) // alarm set, clear it clearAlarm(alarmItem, CPU_USAGE_HIGH); break; diff --git a/oamapps/serverMonitor/diskMonitor.cpp b/oamapps/serverMonitor/diskMonitor.cpp index 8264c82e3..4fe5e6f09 100644 --- a/oamapps/serverMonitor/diskMonitor.cpp +++ b/oamapps/serverMonitor/diskMonitor.cpp @@ -645,26 +645,26 @@ void ServerMonitor::checkDiskAlarm(string alarmItem, ALARMS alarmID) switch (alarmID) { case ALARM_NONE: // clear all alarms set if any found - if ( oam.checkActiveAlarm(DISK_USAGE_HIGH, serverName, alarmItem) ) + if ( serverMonitor.checkActiveAlarm(DISK_USAGE_HIGH, serverName, alarmItem) ) // alarm set, clear it clearAlarm(alarmItem, DISK_USAGE_HIGH); - if ( oam.checkActiveAlarm(DISK_USAGE_MED, serverName, alarmItem) ) + if ( serverMonitor.checkActiveAlarm(DISK_USAGE_MED, serverName, alarmItem) ) // alarm set, clear it clearAlarm(alarmItem, DISK_USAGE_MED); - if ( oam.checkActiveAlarm(DISK_USAGE_LOW, serverName, alarmItem) ) + if ( serverMonitor.checkActiveAlarm(DISK_USAGE_LOW, serverName, alarmItem) ) // alarm set, clear it clearAlarm(alarmItem, DISK_USAGE_LOW); break; case DISK_USAGE_LOW: // clear high and medium alarms set if any found - if ( oam.checkActiveAlarm(DISK_USAGE_HIGH, serverName, alarmItem) ) + if ( serverMonitor.checkActiveAlarm(DISK_USAGE_HIGH, serverName, alarmItem) ) // alarm set, clear it clearAlarm(alarmItem, DISK_USAGE_HIGH); - if ( oam.checkActiveAlarm(DISK_USAGE_MED, serverName, alarmItem) ) + if ( serverMonitor.checkActiveAlarm(DISK_USAGE_MED, serverName, alarmItem) ) // alarm set, clear it clearAlarm(alarmItem, DISK_USAGE_MED); break; case DISK_USAGE_MED: // clear high alarms set if any found - if ( oam.checkActiveAlarm(DISK_USAGE_HIGH, serverName, alarmItem) ) + if ( serverMonitor.checkActiveAlarm(DISK_USAGE_HIGH, serverName, alarmItem) ) // alarm set, clear it clearAlarm(alarmItem, DISK_USAGE_HIGH); break; diff --git a/oamapps/serverMonitor/main.cpp b/oamapps/serverMonitor/main.cpp index 3d51a067a..10903d6a4 100644 --- a/oamapps/serverMonitor/main.cpp +++ b/oamapps/serverMonitor/main.cpp @@ -291,3 +291,27 @@ int main (int argc, char** argv) return 0; } +// common functions + +// +// Check Active alarms +// +// Use a mutex to limit the number of calls +// + +pthread_mutex_t THREAD_LOCK; + + +bool ServerMonitor::checkActiveAlarm(const int alarmid, const std::string moduleName, const std::string deviceName) +{ + Oam oam; + + pthread_mutex_lock(&THREAD_LOCK); + + bool status = oam.checkActiveAlarm(alarmid, moduleName, deviceName); + + pthread_mutex_unlock(&THREAD_LOCK); + + return status; +} + diff --git a/oamapps/serverMonitor/memoryMonitor.cpp b/oamapps/serverMonitor/memoryMonitor.cpp index a1de4540f..2ada69022 100644 --- a/oamapps/serverMonitor/memoryMonitor.cpp +++ b/oamapps/serverMonitor/memoryMonitor.cpp @@ -289,26 +289,26 @@ void ServerMonitor::checkMemoryAlarm(string alarmItem, ALARMS alarmID) switch (alarmID) { case ALARM_NONE: // clear all alarms set if any found - if ( oam.checkActiveAlarm(MEMORY_USAGE_HIGH, serverName, alarmItem) ) + if ( serverMonitor.checkActiveAlarm(MEMORY_USAGE_HIGH, serverName, alarmItem) ) // alarm set, clear it clearAlarm(alarmItem, MEMORY_USAGE_HIGH); - if ( oam.checkActiveAlarm(MEMORY_USAGE_MED, serverName, alarmItem) ) + if ( serverMonitor.checkActiveAlarm(MEMORY_USAGE_MED, serverName, alarmItem) ) // alarm set, clear it clearAlarm(alarmItem, MEMORY_USAGE_MED); - if ( oam.checkActiveAlarm(MEMORY_USAGE_LOW, serverName, alarmItem) ) + if ( serverMonitor.checkActiveAlarm(MEMORY_USAGE_LOW, serverName, alarmItem) ) // alarm set, clear it clearAlarm(alarmItem, MEMORY_USAGE_LOW); break; case MEMORY_USAGE_LOW: // clear high and medium alarms set if any found - if ( oam.checkActiveAlarm(MEMORY_USAGE_HIGH, serverName, alarmItem) ) + if ( serverMonitor.checkActiveAlarm(MEMORY_USAGE_HIGH, serverName, alarmItem) ) // alarm set, clear it clearAlarm(alarmItem, MEMORY_USAGE_HIGH); - if ( oam.checkActiveAlarm(MEMORY_USAGE_MED, serverName, alarmItem) ) + if ( serverMonitor.checkActiveAlarm(MEMORY_USAGE_MED, serverName, alarmItem) ) // alarm set, clear it clearAlarm(alarmItem, MEMORY_USAGE_MED); break; case MEMORY_USAGE_MED: // clear high alarms set if any found - if ( oam.checkActiveAlarm(MEMORY_USAGE_HIGH, serverName, alarmItem) ) + if ( serverMonitor.checkActiveAlarm(MEMORY_USAGE_HIGH, serverName, alarmItem) ) // alarm set, clear it clearAlarm(alarmItem, MEMORY_USAGE_HIGH); break; @@ -342,26 +342,26 @@ void ServerMonitor::checkSwapAlarm(string alarmItem, ALARMS alarmID) switch (alarmID) { case ALARM_NONE: // clear all alarms set if any found - if ( oam.checkActiveAlarm(SWAP_USAGE_HIGH, serverName, alarmItem) ) + if ( serverMonitor.checkActiveAlarm(SWAP_USAGE_HIGH, serverName, alarmItem) ) // alarm set, clear it clearAlarm(alarmItem, SWAP_USAGE_HIGH); - if ( oam.checkActiveAlarm(SWAP_USAGE_MED, serverName, alarmItem) ) + if ( serverMonitor.checkActiveAlarm(SWAP_USAGE_MED, serverName, alarmItem) ) // alarm set, clear it clearAlarm(alarmItem, SWAP_USAGE_MED); - if ( oam.checkActiveAlarm(SWAP_USAGE_LOW, serverName, alarmItem) ) + if ( serverMonitor.checkActiveAlarm(SWAP_USAGE_LOW, serverName, alarmItem) ) // alarm set, clear it clearAlarm(alarmItem, SWAP_USAGE_LOW); break; case SWAP_USAGE_LOW: // clear high and medium alarms set if any found - if ( oam.checkActiveAlarm(SWAP_USAGE_HIGH, serverName, alarmItem) ) + if ( serverMonitor.checkActiveAlarm(SWAP_USAGE_HIGH, serverName, alarmItem) ) // alarm set, clear it clearAlarm(alarmItem, SWAP_USAGE_HIGH); - if ( oam.checkActiveAlarm(SWAP_USAGE_MED, serverName, alarmItem) ) + if ( serverMonitor.checkActiveAlarm(SWAP_USAGE_MED, serverName, alarmItem) ) // alarm set, clear it clearAlarm(alarmItem, SWAP_USAGE_MED); break; case SWAP_USAGE_MED: // clear high alarms set if any found - if ( oam.checkActiveAlarm(SWAP_USAGE_HIGH, serverName, alarmItem) ) + if ( serverMonitor.checkActiveAlarm(SWAP_USAGE_HIGH, serverName, alarmItem) ) // alarm set, clear it clearAlarm(alarmItem, SWAP_USAGE_HIGH); break; diff --git a/oamapps/serverMonitor/serverMonitor.h b/oamapps/serverMonitor/serverMonitor.h index 1ff947589..4c1a1b37c 100644 --- a/oamapps/serverMonitor/serverMonitor.h +++ b/oamapps/serverMonitor/serverMonitor.h @@ -235,6 +235,12 @@ public: * @brief db health check */ int healthCheck(bool action = true); + + /** + * @brief Check Active Alarm + */ + bool checkActiveAlarm(const int alarmid, const std::string moduleName, const std::string deviceName); + }; // end of class diff --git a/procmgr/main.cpp b/procmgr/main.cpp index 8281068d5..8f353eeed 100644 --- a/procmgr/main.cpp +++ b/procmgr/main.cpp @@ -59,6 +59,7 @@ string localHostName; string PMwithUM = "n"; string MySQLRep = "n"; string DBRootStorageType = "internal"; +int requestCount; // pushing the ACTIVE_ALARMS_FILE to all nodes every 10 seconds. const int ACTIVE_ALARMS_PUSHING_INTERVAL = 10; @@ -421,10 +422,21 @@ static void messageThread(Configuration config) { } + // Number of Max requests +// int ProcessManagerRequest = 10; + +// try { +// oam.getSystemConfig("ProcessManagerRequest", ProcessManagerRequest); +// } +// catch (...) { +// ProcessManagerRequest = 10; +// } + // //waiting for request // IOSocket fIos; + requestCount = 0; for (;;) { @@ -446,23 +458,33 @@ static void messageThread(Configuration config) catch(...) {} + requestCount++; + //log.writeLog(__LINE__, "requestCount = " + oam.itoa(requestCount), LOG_TYPE_ERROR); + + // loop until count decreases + //while(true) + //{ + // if ( requestCount < ProcessManagerRequest ) + // break; + // log.writeLog(__LINE__, "in loop", LOG_TYPE_ERROR); + //} } } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "EXCEPTION ERROR on MessageQueueServer for ProcMgr:" + error, LOG_TYPE_ERROR); + catch (exception& ex) + { + string error = ex.what(); + log.writeLog(__LINE__, "EXCEPTION ERROR on MessageQueueServer for ProcMgr:" + error, LOG_TYPE_ERROR); - // takes 2 - 4 minites to free sockets, sleep and retry - sleep(60); - } - catch(...) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on MessageQueueServer for ProcMgr: Caught unknown exception!", LOG_TYPE_ERROR); + // takes 2 - 4 minites to free sockets, sleep and retry + sleep(60); + } + catch(...) + { + log.writeLog(__LINE__, "EXCEPTION ERROR on MessageQueueServer for ProcMgr: Caught unknown exception!", LOG_TYPE_ERROR); - // takes 2 - 4 minites to free sockets, sleep and retry - sleep(60); - } + // takes 2 - 4 minites to free sockets, sleep and retry + sleep(60); + } } return; } diff --git a/procmgr/processmanager.cpp b/procmgr/processmanager.cpp index b6fea4c5d..1079699fb 100644 --- a/procmgr/processmanager.cpp +++ b/procmgr/processmanager.cpp @@ -56,6 +56,7 @@ extern string localHostName; extern string PMwithUM; extern string AmazonPMFailover; extern string DBRootStorageType; +extern int requestCount; typedef map moduleList; extern moduleList moduleInfoList; @@ -2987,12 +2988,19 @@ void processMSG(messageqcpp::IOSocket* cfIos) break; } - sleep(5); +// sleep(5); + +// requestCount--; +// log.writeLog(__LINE__, "requestCount = " + oam.itoa(requestCount), LOG_TYPE_ERROR); + fIos.close(); pthread_detach (ThreadId); pthread_exit(0); } + pthread_mutex_t ALARM_LOCK; + + /****************************************************************************************** * @brief getAlarmData * @@ -3003,6 +3011,9 @@ int ProcessManager::getAlarmData(messageqcpp::IOSocket fIos, int type, std::stri { ByteStream msg; Oam oam; + + pthread_mutex_lock(&ALARM_LOCK); + int returnStatus = oam::API_SUCCESS; AlarmList alarmList; @@ -3022,6 +3033,7 @@ int ProcessManager::getAlarmData(messageqcpp::IOSocket fIos, int type, std::stri } catch(...) {} + pthread_mutex_unlock(&ALARM_LOCK); return oam::API_FAILURE; } } @@ -3041,6 +3053,7 @@ int ProcessManager::getAlarmData(messageqcpp::IOSocket fIos, int type, std::stri } catch(...) {} + pthread_mutex_unlock(&ALARM_LOCK); return oam::API_FAILURE; } } @@ -3071,6 +3084,7 @@ int ProcessManager::getAlarmData(messageqcpp::IOSocket fIos, int type, std::stri } catch(...) {} + pthread_mutex_unlock(&ALARM_LOCK); return returnStatus; } @@ -6245,7 +6259,7 @@ void ProcessManager::saveBRM(bool skipSession, bool clearshm) if ( skipSession ) skip = "-s"; - string cmd = startup::StartUp::installDir() + "/bin/reset_locks " + skip + " > + logdir + /reset_locks.log1 2>&1"; + string cmd = startup::StartUp::installDir() + "/bin/reset_locks " + skip + " > " + logdir + "/reset_locks.log1 2>&1"; int rtnCode = system(cmd.c_str()); log.writeLog(__LINE__, "Ran reset_locks", LOG_TYPE_DEBUG);