You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-08-10 01:22:48 +03:00
487 lines
14 KiB
C++
487 lines
14 KiB
C++
/* Copyright (C) 2014 InfiniDB, Inc.
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public License
|
|
as published by the Free Software Foundation; version 2 of
|
|
the License.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
|
MA 02110-1301, USA. */
|
|
|
|
/***************************************************************************
|
|
* $Id: memoryMonitor.cpp 34 2006-09-29 21:13:54Z dhill $
|
|
*
|
|
* Author: Zhixuan Zhu
|
|
***************************************************************************/
|
|
|
|
#include "cgroupconfigurator.h"
|
|
#include "serverMonitor.h"
|
|
|
|
using namespace std;
|
|
using namespace oam;
|
|
using namespace snmpmanager;
|
|
using namespace logging;
|
|
using namespace servermonitor;
|
|
//using namespace procheartbeat;
|
|
|
|
unsigned long totalMem;
|
|
ProcessMemoryList pml;
|
|
int swapFlag = 0;
|
|
|
|
pthread_mutex_t MEMORY_LOCK;
|
|
|
|
/*****************************************************************************************
|
|
* @brief memoryMonitor Thread
|
|
*
|
|
* purpose: Get current Memory and Swap usage, report alarms
|
|
*
|
|
*****************************************************************************************/
|
|
void memoryMonitor()
|
|
{
|
|
ServerMonitor serverMonitor;
|
|
|
|
int swapUsagePercent = 0;
|
|
|
|
// set defaults
|
|
int memoryCritical = 90,
|
|
memoryMajor = 0,
|
|
memoryMinor = 0,
|
|
swapCritical = 90,
|
|
swapMajor = 80,
|
|
swapMinor = 70;
|
|
|
|
int day = 0;
|
|
|
|
//set monitoring period to 60 seconds
|
|
int monitorPeriod = MONITOR_PERIOD;
|
|
utils::CGroupConfigurator cg;
|
|
|
|
while(true)
|
|
{
|
|
// Get MEMORY usage water mark from server configuration and compare
|
|
ModuleTypeConfig moduleTypeConfig;
|
|
Oam oam;
|
|
|
|
try {
|
|
oam.getSystemConfig (moduleTypeConfig);
|
|
memoryCritical = moduleTypeConfig.ModuleMemCriticalThreshold;
|
|
memoryMajor = moduleTypeConfig.ModuleMemMajorThreshold;
|
|
memoryMinor = moduleTypeConfig.ModuleMemMinorThreshold;
|
|
swapCritical = moduleTypeConfig.ModuleSwapCriticalThreshold;
|
|
swapMajor = moduleTypeConfig.ModuleSwapMajorThreshold;
|
|
swapMinor = moduleTypeConfig.ModuleSwapMinorThreshold;
|
|
} catch (...)
|
|
{
|
|
sleep(5);
|
|
continue;
|
|
}
|
|
|
|
//get memory stats
|
|
totalMem = cg.getTotalMemory();
|
|
uint64_t freeMem = cg.getFreeMemory();
|
|
uint64_t usedMem = totalMem - freeMem;
|
|
|
|
//get swap stats
|
|
uint64_t totalSwap = cg.getTotalSwapSpace();
|
|
uint64_t usedSwap = cg.getSwapInUse();
|
|
|
|
|
|
if ( totalSwap == 0 ) {
|
|
swapUsagePercent = 0;
|
|
swapFlag = 2;
|
|
|
|
//get current day, log warning only once a day
|
|
time_t now;
|
|
now = time(NULL);
|
|
struct tm tm;
|
|
localtime_r(&now, &tm);
|
|
|
|
if ( day != tm.tm_mday) {
|
|
day = tm.tm_mday;
|
|
|
|
//Log this event
|
|
LoggingID lid(SERVER_MONITOR_LOG_ID);
|
|
MessageLog ml(lid);
|
|
Message msg;
|
|
Message::Args args;
|
|
args.add("Total Swap space is set to 0");
|
|
msg.format(args);
|
|
ml.logWarningMessage(msg);
|
|
}
|
|
}
|
|
else
|
|
swapUsagePercent = usedSwap / (totalSwap / 100);
|
|
|
|
int memoryUsagePercent;
|
|
if ( totalMem == 0 ) {
|
|
memoryUsagePercent = 0;
|
|
|
|
//Log this event
|
|
LoggingID lid(SERVER_MONITOR_LOG_ID);
|
|
MessageLog ml(lid);
|
|
Message msg;
|
|
Message::Args args;
|
|
args.add("Total Memory space is set to 0");
|
|
msg.format(args);
|
|
ml.logWarningMessage(msg);
|
|
}
|
|
else
|
|
memoryUsagePercent = (usedMem / (totalMem / 100)) + 1;
|
|
|
|
// check for Memory alarms
|
|
if (memoryUsagePercent >= memoryCritical && memoryCritical > 0 ) {
|
|
if ( monitorPeriod == MONITOR_PERIOD ) {
|
|
//first time called, log
|
|
//adjust if over 100%
|
|
if ( memoryUsagePercent > 100 )
|
|
memoryUsagePercent = 100;
|
|
|
|
LoggingID lid(SERVER_MONITOR_LOG_ID);
|
|
MessageLog ml(lid);
|
|
Message msg;
|
|
Message::Args args;
|
|
args.add("Local Memory above Critical Memory threshold with a percentage of ");
|
|
args.add((int) memoryUsagePercent);
|
|
args.add(" ; Swap ");
|
|
args.add((int) swapUsagePercent);
|
|
msg.format(args);
|
|
ml.logInfoMessage(msg);
|
|
serverMonitor.sendResourceAlarm("Local-Memory", MEMORY_USAGE_HIGH, SET, memoryUsagePercent);
|
|
|
|
pthread_mutex_lock(&MEMORY_LOCK);
|
|
serverMonitor.outputProcMemory(true);
|
|
pthread_mutex_unlock(&MEMORY_LOCK);
|
|
}
|
|
// change to 1 second for quick swap space monitoring
|
|
monitorPeriod = 1;
|
|
}
|
|
else if (memoryUsagePercent >= memoryMajor && memoryMajor > 0 ) {
|
|
monitorPeriod = MONITOR_PERIOD;
|
|
LoggingID lid(SERVER_MONITOR_LOG_ID);
|
|
MessageLog ml(lid);
|
|
Message msg;
|
|
Message::Args args;
|
|
args.add("Local Memory above Major Memory threshold with a percentage of ");
|
|
args.add((int) memoryUsagePercent);
|
|
msg.format(args);
|
|
ml.logInfoMessage(msg);
|
|
serverMonitor.sendResourceAlarm("Local-Memory", MEMORY_USAGE_MED, SET, memoryUsagePercent);
|
|
}
|
|
else if (memoryUsagePercent >= memoryMinor && memoryMinor > 0 ) {
|
|
monitorPeriod = MONITOR_PERIOD;
|
|
LoggingID lid(SERVER_MONITOR_LOG_ID);
|
|
MessageLog ml(lid);
|
|
Message msg;
|
|
Message::Args args;
|
|
args.add("Local Memory above Minor Memory threshold with a percentage of ");
|
|
args.add((int) memoryUsagePercent);
|
|
msg.format(args);
|
|
ml.logInfoMessage(msg);
|
|
serverMonitor.sendResourceAlarm("Local-Memory", MEMORY_USAGE_LOW, SET, memoryUsagePercent);
|
|
}
|
|
else {
|
|
monitorPeriod = MONITOR_PERIOD;
|
|
serverMonitor.checkMemoryAlarm("Local-Memory");
|
|
}
|
|
|
|
// check for Swap alarms
|
|
if (swapUsagePercent >= swapCritical && swapCritical > 0 ) {
|
|
//adjust if over 100%
|
|
if ( swapUsagePercent > 100 )
|
|
swapUsagePercent = 100;
|
|
LoggingID lid(SERVER_MONITOR_LOG_ID);
|
|
MessageLog ml(lid);
|
|
Message msg;
|
|
Message::Args args;
|
|
args.add("Swap above Critical Memory threshold with a percentage of ");
|
|
args.add((int) swapUsagePercent);
|
|
msg.format(args);
|
|
ml.logInfoMessage(msg);
|
|
serverMonitor.sendResourceAlarm("Swap", SWAP_USAGE_HIGH, SET, swapUsagePercent);
|
|
serverMonitor.checkSwapAction();
|
|
}
|
|
else if (swapUsagePercent >= swapMajor && swapMajor > 0 ) {
|
|
LoggingID lid(SERVER_MONITOR_LOG_ID);
|
|
MessageLog ml(lid);
|
|
Message msg;
|
|
Message::Args args;
|
|
args.add("Swap above Major Memory threshold with a percentage of ");
|
|
args.add((int) swapUsagePercent);
|
|
msg.format(args);
|
|
ml.logInfoMessage(msg);
|
|
serverMonitor.sendResourceAlarm("Swap", SWAP_USAGE_MED, SET, swapUsagePercent);
|
|
serverMonitor.checkSwapAction();
|
|
}
|
|
else if (swapUsagePercent >= swapMinor && swapMinor > 0 ) {
|
|
swapFlag = 2;
|
|
LoggingID lid(SERVER_MONITOR_LOG_ID);
|
|
MessageLog ml(lid);
|
|
Message msg;
|
|
Message::Args args;
|
|
args.add("Swap above Minor Memory threshold with a percentage of ");
|
|
args.add((int) swapUsagePercent);
|
|
msg.format(args);
|
|
ml.logInfoMessage(msg);
|
|
serverMonitor.sendResourceAlarm("Swap", SWAP_USAGE_LOW, SET, swapUsagePercent);
|
|
}
|
|
else
|
|
{
|
|
swapFlag = 2;
|
|
serverMonitor.checkSwapAlarm("Swap");
|
|
}
|
|
|
|
// sleep, 1 minute
|
|
sleep(monitorPeriod);
|
|
|
|
} // end of while loop
|
|
}
|
|
|
|
/******************************************************************************************
|
|
* @brief checkMemoryAlarm
|
|
*
|
|
* purpose: check to see if an alarm(s) is set on MEMORY and clear if so
|
|
*
|
|
******************************************************************************************/
|
|
void ServerMonitor::checkMemoryAlarm(string alarmItem, ALARMS alarmID)
|
|
{
|
|
Oam oam;
|
|
ServerMonitor serverMonitor;
|
|
|
|
// get current server name
|
|
string serverName;
|
|
oamModuleInfo_t st;
|
|
try {
|
|
st = oam.getModuleInfo();
|
|
serverName = boost::get<0>(st);
|
|
}
|
|
catch (...) {
|
|
serverName = "Unknown Server";
|
|
}
|
|
|
|
switch (alarmID) {
|
|
case ALARM_NONE: // clear all alarms set if any found
|
|
if ( oam.checkActiveAlarm(MEMORY_USAGE_HIGH, serverName, alarmItem) )
|
|
// alarm set, clear it
|
|
clearAlarm(alarmItem, MEMORY_USAGE_HIGH);
|
|
if ( oam.checkActiveAlarm(MEMORY_USAGE_MED, serverName, alarmItem) )
|
|
// alarm set, clear it
|
|
clearAlarm(alarmItem, MEMORY_USAGE_MED);
|
|
if ( oam.checkActiveAlarm(MEMORY_USAGE_LOW, serverName, alarmItem) )
|
|
// alarm set, clear it
|
|
clearAlarm(alarmItem, MEMORY_USAGE_LOW);
|
|
break;
|
|
case MEMORY_USAGE_LOW: // clear high and medium alarms set if any found
|
|
if ( oam.checkActiveAlarm(MEMORY_USAGE_HIGH, serverName, alarmItem) )
|
|
// alarm set, clear it
|
|
clearAlarm(alarmItem, MEMORY_USAGE_HIGH);
|
|
if ( oam.checkActiveAlarm(MEMORY_USAGE_MED, serverName, alarmItem) )
|
|
// alarm set, clear it
|
|
clearAlarm(alarmItem, MEMORY_USAGE_MED);
|
|
break;
|
|
case MEMORY_USAGE_MED: // clear high alarms set if any found
|
|
if ( oam.checkActiveAlarm(MEMORY_USAGE_HIGH, serverName, alarmItem) )
|
|
// alarm set, clear it
|
|
clearAlarm(alarmItem, MEMORY_USAGE_HIGH);
|
|
break;
|
|
default: // none to clear
|
|
break;
|
|
} // end of switch
|
|
return;
|
|
}
|
|
|
|
/******************************************************************************************
|
|
* @brief checkSwapAlarm
|
|
*
|
|
* purpose: check to see if an alarm(s) is set on SWAP and clear if so
|
|
*
|
|
******************************************************************************************/
|
|
void ServerMonitor::checkSwapAlarm(string alarmItem, ALARMS alarmID)
|
|
{
|
|
Oam oam;
|
|
ServerMonitor serverMonitor;
|
|
|
|
// get current server name
|
|
string serverName;
|
|
oamModuleInfo_t st;
|
|
try {
|
|
st = oam.getModuleInfo();
|
|
serverName = boost::get<0>(st);
|
|
}
|
|
catch (...) {
|
|
serverName = "Unknown Server";
|
|
}
|
|
|
|
switch (alarmID) {
|
|
case ALARM_NONE: // clear all alarms set if any found
|
|
if ( oam.checkActiveAlarm(SWAP_USAGE_HIGH, serverName, alarmItem) )
|
|
// alarm set, clear it
|
|
clearAlarm(alarmItem, SWAP_USAGE_HIGH);
|
|
if ( oam.checkActiveAlarm(SWAP_USAGE_MED, serverName, alarmItem) )
|
|
// alarm set, clear it
|
|
clearAlarm(alarmItem, SWAP_USAGE_MED);
|
|
if ( oam.checkActiveAlarm(SWAP_USAGE_LOW, serverName, alarmItem) )
|
|
// alarm set, clear it
|
|
clearAlarm(alarmItem, SWAP_USAGE_LOW);
|
|
break;
|
|
case SWAP_USAGE_LOW: // clear high and medium alarms set if any found
|
|
if ( oam.checkActiveAlarm(SWAP_USAGE_HIGH, serverName, alarmItem) )
|
|
// alarm set, clear it
|
|
clearAlarm(alarmItem, SWAP_USAGE_HIGH);
|
|
if ( oam.checkActiveAlarm(SWAP_USAGE_MED, serverName, alarmItem) )
|
|
// alarm set, clear it
|
|
clearAlarm(alarmItem, SWAP_USAGE_MED);
|
|
break;
|
|
case SWAP_USAGE_MED: // clear high alarms set if any found
|
|
if ( oam.checkActiveAlarm(SWAP_USAGE_HIGH, serverName, alarmItem) )
|
|
// alarm set, clear it
|
|
clearAlarm(alarmItem, SWAP_USAGE_HIGH);
|
|
break;
|
|
default: // none to clear
|
|
break;
|
|
} // end of switch
|
|
return;
|
|
}
|
|
|
|
/******************************************************************************************
|
|
* @brief checkSwapAction
|
|
*
|
|
* purpose: check if any system action needs tyo be taken
|
|
*
|
|
******************************************************************************************/
|
|
void ServerMonitor::checkSwapAction()
|
|
{
|
|
Oam oam;
|
|
|
|
if ( swapFlag == 0 ) {
|
|
LoggingID lid(SERVER_MONITOR_LOG_ID);
|
|
MessageLog ml(lid);
|
|
Message msg;
|
|
Message::Args args;
|
|
args.add("Swap Space usage over Major threashold, startSystem failure");
|
|
msg.format(args);
|
|
ml.logCriticalMessage(msg);
|
|
|
|
swapFlag = 1;
|
|
sleep(5);
|
|
return;
|
|
}
|
|
|
|
string swapAction = "restartSystem";
|
|
|
|
try {
|
|
oam.getSystemConfig ("SwapAction", swapAction);
|
|
} catch (...)
|
|
{}
|
|
|
|
if (swapAction == "none")
|
|
return;
|
|
|
|
LoggingID lid(SERVER_MONITOR_LOG_ID);
|
|
MessageLog ml(lid);
|
|
Message msg;
|
|
Message::Args args;
|
|
args.add("Swap Space usage over Major threashold, perform OAM command ");
|
|
args.add( swapAction);
|
|
msg.format(args);
|
|
ml.logCriticalMessage(msg);
|
|
|
|
GRACEFUL_FLAG gracefulTemp = GRACEFUL;
|
|
ACK_FLAG ackTemp = ACK_YES;
|
|
|
|
if ( swapAction == "stopSystem") {
|
|
try
|
|
{
|
|
oam.stopSystem(gracefulTemp, ackTemp);
|
|
}
|
|
catch (exception& e)
|
|
{
|
|
}
|
|
}
|
|
else if ( swapAction == "restartSystem") {
|
|
try
|
|
{
|
|
oam.restartSystem(gracefulTemp, ackTemp);
|
|
}
|
|
catch (exception& e)
|
|
{
|
|
}
|
|
}
|
|
}
|
|
|
|
/******************************************************************************************
|
|
* @brief outputProcMemory
|
|
*
|
|
* purpose: output Top memory users
|
|
*
|
|
******************************************************************************************/
|
|
void ServerMonitor::outputProcMemory(bool log)
|
|
{
|
|
//
|
|
// get top 5 Memory users by process
|
|
//
|
|
|
|
system("ps -e -orss=1,args= | sort -b -k1,1n |tail -n 5 | awk '{print $1,$2}' > /tmp/columnstore_tmp_files/processMem");
|
|
|
|
ifstream oldFile ("/tmp/columnstore_tmp_files/processMem");
|
|
|
|
string process;
|
|
long long memory;
|
|
int memoryUsage;
|
|
pml.clear();
|
|
|
|
char line[400];
|
|
while (oldFile.getline(line, 400))
|
|
{
|
|
string buf = line;
|
|
string::size_type pos = buf.find (' ',0);
|
|
if (pos != string::npos) {
|
|
memory = atol(buf.substr(0,pos-1).c_str());
|
|
memoryUsage = (memory * 1024 * 1000 / totalMem) + 1 ;
|
|
process = buf.substr(pos+1,80);
|
|
|
|
//cleanup process name
|
|
pos = process.rfind ('/');
|
|
if (pos != string::npos)
|
|
process=process.substr(pos+1,80);
|
|
else
|
|
{
|
|
pos = process.find ('[',0);
|
|
if (pos != string::npos)
|
|
process=process.substr(pos+1,80);
|
|
pos = process.find (']',0);
|
|
if (pos != string::npos)
|
|
process=process.substr(0,pos);
|
|
}
|
|
|
|
processMemory pm;
|
|
pm.processName = process;
|
|
pm.usedBlocks = memory;
|
|
pm.usedPercent = memoryUsage;
|
|
pml.push_back(pm);
|
|
|
|
if (log) {
|
|
LoggingID lid(SERVER_MONITOR_LOG_ID);
|
|
MessageLog ml(lid);
|
|
Message msg;
|
|
Message::Args args;
|
|
args.add("Memory Usage for Process: ");
|
|
args.add(process);
|
|
args.add(" : Memory Used ");
|
|
args.add((int) memory);
|
|
args.add(" : % Used ");
|
|
args.add(memoryUsage);
|
|
msg.format(args);
|
|
ml.logInfoMessage(msg);
|
|
}
|
|
}
|
|
}
|
|
oldFile.close();
|
|
}
|