You've already forked mariadb-columnstore-engine
							
							
				mirror of
				https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
				synced 2025-11-03 17:13:17 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			413 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			413 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
/* Copyright (C) 2014 InfiniDB, Inc.
 | 
						|
 | 
						|
   This program is free software; you can redistribute it and/or
 | 
						|
   modify it under the terms of the GNU General Public License
 | 
						|
   as published by the Free Software Foundation; version 2 of
 | 
						|
   the License.
 | 
						|
 | 
						|
   This program is distributed in the hope that it will be useful,
 | 
						|
   but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
						|
   GNU General Public License for more details.
 | 
						|
 | 
						|
   You should have received a copy of the GNU General Public License
 | 
						|
   along with this program; if not, write to the Free Software
 | 
						|
   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 | 
						|
   MA 02110-1301, USA. */
 | 
						|
 | 
						|
/***************************************************************************
 | 
						|
 * $Id: hardwareMonitor.cpp 34 2006-09-29 21:13:54Z dhill $
 | 
						|
 *
 | 
						|
 *   Author: David Hill
 | 
						|
 ***************************************************************************/
 | 
						|
 | 
						|
#include "hardwareMonitor.h"
 | 
						|
 | 
						|
using namespace std;
 | 
						|
using namespace oam;
 | 
						|
using namespace alarmmanager;
 | 
						|
using namespace logging;
 | 
						|
 | 
						|
 | 
						|
/************************************************************************************************************
 | 
						|
* @brief	main function
 | 
						|
*
 | 
						|
* purpose:	Get current hardware status and report alarms
 | 
						|
*
 | 
						|
* Parses file generated by the ipmitool
 | 
						|
*
 | 
						|
* pattern =  what it is | value | units | status | value 1 | value 2 | value 3 | value 4 | value 5 | value 6
 | 
						|
* data(0) = what it is
 | 
						|
* data(1) = value
 | 
						|
* data(2) = units
 | 
						|
* data(3) = status
 | 
						|
* data(4)-data(9) = barrier values
 | 
						|
*   data(4) - low non-recoverable, i.e. fatal
 | 
						|
*   data(5) - low critical
 | 
						|
*   data(6) - low warning
 | 
						|
*   data(7) - high warning
 | 
						|
*   data(8) - high critical
 | 
						|
*   data(9) - high non-recoverable, i.e. fatal
 | 
						|
*
 | 
						|
************************************************************************************************************/
 | 
						|
int main (int argc, char** argv)
 | 
						|
{
 | 
						|
    string data[10];
 | 
						|
    string SensorName;
 | 
						|
    float SensorValue;
 | 
						|
    string Units;
 | 
						|
    string SensorStatus;
 | 
						|
    float lowFatal;
 | 
						|
    float lowCritical;
 | 
						|
    float lowWarning;
 | 
						|
    float highWarning;
 | 
						|
    float highCritical;
 | 
						|
    float highFatal;
 | 
						|
    char* p;
 | 
						|
 | 
						|
    // check for IPMI_SUPPORT FLAG passed in
 | 
						|
    if (argc > 1)
 | 
						|
        IPMI_SUPPORT = atoi(argv[1]);
 | 
						|
 | 
						|
    // loop forever reading the hardware status
 | 
						|
    while (true)
 | 
						|
    {
 | 
						|
        if ( IPMI_SUPPORT == 0)
 | 
						|
        {
 | 
						|
            int returnCode = system("ipmitool sensor list > /tmp/harwareMonitor.txt");
 | 
						|
 | 
						|
            if (returnCode)
 | 
						|
            {
 | 
						|
                // System error, Log this event
 | 
						|
                LoggingID lid;
 | 
						|
                MessageLog ml(lid);
 | 
						|
                Message msg;
 | 
						|
                Message::Args args;
 | 
						|
                args.add("Error running ipmitool sensor list!!!");
 | 
						|
                msg.format(args);
 | 
						|
                ml.logWarningMessage(msg);
 | 
						|
                sleep(300);
 | 
						|
                continue;
 | 
						|
            }
 | 
						|
        }
 | 
						|
 | 
						|
        // parse output file
 | 
						|
 | 
						|
        ifstream File ("/tmp/harwareMonitor.txt");
 | 
						|
 | 
						|
        if (!File)
 | 
						|
        {
 | 
						|
            // System error, Log this event
 | 
						|
            LoggingID lid;
 | 
						|
            MessageLog ml(lid);
 | 
						|
            Message msg;
 | 
						|
            Message::Args args;
 | 
						|
            args.add("Error opening /tmp/harwareMonitor.txt!!!");
 | 
						|
            msg.format(args);
 | 
						|
            ml.logWarningMessage(msg);
 | 
						|
            sleep(300);
 | 
						|
            continue;
 | 
						|
        }
 | 
						|
 | 
						|
        char line[200];
 | 
						|
 | 
						|
        while (File.getline(line, 200))
 | 
						|
        {
 | 
						|
            // parse the line
 | 
						|
            int f = 0;
 | 
						|
            p = strtok(line, "|");
 | 
						|
 | 
						|
            while (p)
 | 
						|
            {
 | 
						|
                data[f] = p;
 | 
						|
                data[f] = StripWhitespace(data[f]);
 | 
						|
                p = strtok (NULL, "|");
 | 
						|
                f++;
 | 
						|
            }
 | 
						|
 | 
						|
            if ( f == 0 )
 | 
						|
                // nothing on this line, skip
 | 
						|
                continue;
 | 
						|
 | 
						|
            SensorName = data[0];
 | 
						|
            SensorValue = atof(data[1].c_str());
 | 
						|
            Units = data[2];
 | 
						|
            SensorStatus = data[3];
 | 
						|
            lowFatal = atof(data[4].c_str());
 | 
						|
            lowCritical = atof(data[5].c_str());
 | 
						|
            lowWarning = atof(data[6].c_str());
 | 
						|
            highWarning = atof(data[7].c_str());
 | 
						|
            highCritical = atof(data[8].c_str());
 | 
						|
            highFatal = atof(data[9].c_str());
 | 
						|
 | 
						|
            // check status and issue apporiate alarm if needed
 | 
						|
            if ( (SensorStatus != "ok") && (SensorStatus != "nr") && (SensorStatus != "na") )
 | 
						|
            {
 | 
						|
                // Status error, check for warning or critical levels
 | 
						|
 | 
						|
                if ( SensorValue >= highFatal )
 | 
						|
                {
 | 
						|
                    // issue critical alarm and send message to shutdown Server
 | 
						|
                    sendAlarm(SensorName, HARDWARE_HIGH, SET, SensorValue);
 | 
						|
                    sendMsgShutdownServer();
 | 
						|
                }
 | 
						|
                else if ( (SensorValue < highFatal) && (SensorValue >= highCritical) )
 | 
						|
                    // issue major alarm
 | 
						|
                    sendAlarm(SensorName, HARDWARE_MED, SET, SensorValue);
 | 
						|
 | 
						|
                else if ( (SensorValue < highCritical ) && (SensorValue >= highWarning) )
 | 
						|
                    // issue minor alarm
 | 
						|
                    sendAlarm(SensorName, HARDWARE_LOW, SET, SensorValue);
 | 
						|
 | 
						|
                else if ( (SensorValue <= lowWarning) && (SensorValue > lowCritical) )
 | 
						|
                    // issue minor alarm
 | 
						|
                    sendAlarm(SensorName, HARDWARE_LOW, SET, SensorValue);
 | 
						|
 | 
						|
                else if ( (SensorValue <= lowCritical) && (SensorValue > lowFatal) )
 | 
						|
                    // issue major alarm
 | 
						|
                    sendAlarm(SensorName, HARDWARE_MED, SET, SensorValue);
 | 
						|
 | 
						|
                else if ( SensorValue <= lowFatal )
 | 
						|
                {
 | 
						|
                    // issue critical alarm and send message to shutdown Server
 | 
						|
                    sendAlarm(SensorName, HARDWARE_HIGH, SET, SensorValue);
 | 
						|
                    sendMsgShutdownServer();
 | 
						|
                }
 | 
						|
                else
 | 
						|
                    // check if there are any active alarms that needs to be cleared
 | 
						|
                    checkAlarm(SensorName);
 | 
						|
            }
 | 
						|
            else
 | 
						|
                // check if there are any active alarms that needs to be cleared
 | 
						|
                checkAlarm(SensorName);
 | 
						|
 | 
						|
        } //end of parsing file while
 | 
						|
 | 
						|
        File.close();
 | 
						|
        // sleep for 1 minute
 | 
						|
        sleep(60);
 | 
						|
    } //end of forever while loop
 | 
						|
}
 | 
						|
 | 
						|
/******************************************************************************************
 | 
						|
* @brief	sendAlarm
 | 
						|
*
 | 
						|
* purpose:	send a trap and log the process information
 | 
						|
*
 | 
						|
******************************************************************************************/
 | 
						|
void sendAlarm(string alarmItem, ALARMS alarmID, int action, float sensorValue)
 | 
						|
{
 | 
						|
    Oam oam;
 | 
						|
 | 
						|
    //Log this event
 | 
						|
    LoggingID lid;
 | 
						|
    MessageLog ml(lid);
 | 
						|
    Message msg;
 | 
						|
    Message::Args args;
 | 
						|
    args.add(alarmItem);
 | 
						|
    args.add(", sensor value out-of-range: ");
 | 
						|
    args.add(sensorValue);
 | 
						|
 | 
						|
    // get current server name
 | 
						|
    string serverName;
 | 
						|
    oamServerInfo_t st;
 | 
						|
 | 
						|
    try
 | 
						|
    {
 | 
						|
        st = oam.getServerInfo();
 | 
						|
        serverName = boost::get<0>(st);
 | 
						|
    }
 | 
						|
    catch (...)
 | 
						|
    {
 | 
						|
        serverName = "Unknown Server";
 | 
						|
    }
 | 
						|
 | 
						|
    // check if there is an active alarm above the reporting theshold
 | 
						|
    // that needs to be cleared
 | 
						|
    checkAlarm(alarmItem, alarmID);
 | 
						|
 | 
						|
    // check if Alarm is already active, don't resend
 | 
						|
    if ( !( oam.checkActiveAlarm(alarmID, serverName, alarmItem)) )
 | 
						|
    {
 | 
						|
 | 
						|
        ALARMManager alarmMgr;
 | 
						|
        // send alarm
 | 
						|
        alarmMgr.sendAlarmReport(alarmItem.c_str(), alarmID, action);
 | 
						|
 | 
						|
        args.add(", Alarm set: ");
 | 
						|
        args.add(alarmID);
 | 
						|
    }
 | 
						|
 | 
						|
    // output log
 | 
						|
    msg.format(args);
 | 
						|
    ml.logWarningMessage(msg);
 | 
						|
 | 
						|
    return;
 | 
						|
}
 | 
						|
 | 
						|
/******************************************************************************************
 | 
						|
* @brief	checkAlarm
 | 
						|
*
 | 
						|
* purpose:	check to see if an alarm(s) is set on device and clear if so
 | 
						|
*
 | 
						|
******************************************************************************************/
 | 
						|
void checkAlarm(string alarmItem, ALARMS alarmID)
 | 
						|
{
 | 
						|
    Oam oam;
 | 
						|
 | 
						|
    // get current server name
 | 
						|
    string serverName;
 | 
						|
    oamServerInfo_t st;
 | 
						|
 | 
						|
    try
 | 
						|
    {
 | 
						|
        st = oam.getServerInfo();
 | 
						|
        serverName = boost::get<0>(st);
 | 
						|
    }
 | 
						|
    catch (...)
 | 
						|
    {
 | 
						|
        serverName = "Unknown Server";
 | 
						|
    }
 | 
						|
 | 
						|
    switch (alarmID)
 | 
						|
    {
 | 
						|
        case ALARM_NONE: 	// clear all alarms set if any found
 | 
						|
            if ( oam.checkActiveAlarm(HARDWARE_HIGH, serverName, alarmItem) )
 | 
						|
                //  alarm set, clear it
 | 
						|
                clearAlarm(alarmItem, HARDWARE_HIGH);
 | 
						|
 | 
						|
            if ( oam.checkActiveAlarm(HARDWARE_MED, serverName, alarmItem) )
 | 
						|
                //  alarm set, clear it
 | 
						|
                clearAlarm(alarmItem, HARDWARE_MED);
 | 
						|
 | 
						|
            if ( oam.checkActiveAlarm(HARDWARE_LOW, serverName, alarmItem) )
 | 
						|
                //  alarm set, clear it
 | 
						|
                clearAlarm(alarmItem, HARDWARE_LOW);
 | 
						|
 | 
						|
            break;
 | 
						|
 | 
						|
        case HARDWARE_LOW: 	// clear high and medium alarms set if any found
 | 
						|
            if ( oam.checkActiveAlarm(HARDWARE_HIGH, serverName, alarmItem) )
 | 
						|
                //  alarm set, clear it
 | 
						|
                clearAlarm(alarmItem, HARDWARE_HIGH);
 | 
						|
 | 
						|
            if ( oam.checkActiveAlarm(HARDWARE_MED, serverName, alarmItem) )
 | 
						|
                //  alarm set, clear it
 | 
						|
                clearAlarm(alarmItem, HARDWARE_MED);
 | 
						|
 | 
						|
            break;
 | 
						|
 | 
						|
        case HARDWARE_MED: 	// clear high alarms set if any found
 | 
						|
            if ( oam.checkActiveAlarm(HARDWARE_HIGH, serverName, alarmItem) )
 | 
						|
                //  alarm set, clear it
 | 
						|
                clearAlarm(alarmItem, HARDWARE_HIGH);
 | 
						|
 | 
						|
            break;
 | 
						|
 | 
						|
        default:			// none to clear
 | 
						|
            break;
 | 
						|
    } // end of switch
 | 
						|
 | 
						|
    return;
 | 
						|
}
 | 
						|
 | 
						|
/******************************************************************************************
 | 
						|
* @brief	clearAlarm
 | 
						|
*
 | 
						|
* purpose:	clear Alarm that was previously set
 | 
						|
*
 | 
						|
******************************************************************************************/
 | 
						|
void clearAlarm(string alarmItem, ALARMS alarmID)
 | 
						|
{
 | 
						|
    ALARMManager alarmMgr;
 | 
						|
    alarmMgr.sendAlarmReport(alarmItem.c_str(), alarmID, CLEAR);
 | 
						|
 | 
						|
    //Log this event
 | 
						|
    LoggingID lid;
 | 
						|
    MessageLog ml(lid);
 | 
						|
    Message msg;
 | 
						|
    Message::Args args;
 | 
						|
    args.add(alarmItem);
 | 
						|
    args.add(" alarm #");
 | 
						|
    args.add(alarmID);
 | 
						|
    args.add("cleared");
 | 
						|
    msg.format(args);
 | 
						|
    ml.logWarningMessage(msg);
 | 
						|
}
 | 
						|
/******************************************************************************************
 | 
						|
* @brief	sendMsgShutdownServer
 | 
						|
*
 | 
						|
* purpose:	send a Message to Shutdown server
 | 
						|
*
 | 
						|
******************************************************************************************/
 | 
						|
void sendMsgShutdownServer()
 | 
						|
{
 | 
						|
    Oam oam;
 | 
						|
 | 
						|
    //Log this event
 | 
						|
    LoggingID lid;
 | 
						|
    MessageLog ml(lid);
 | 
						|
    Message msg;
 | 
						|
    Message::Args args;
 | 
						|
    args.add("Fatal Hardware Alarm detected, Server being shutdown");
 | 
						|
    msg.format(args);
 | 
						|
    ml.logCriticalMessage(msg);
 | 
						|
 | 
						|
    string serverName;
 | 
						|
    oamServerInfo_t st;
 | 
						|
 | 
						|
    try
 | 
						|
    {
 | 
						|
        st = oam.getServerInfo();
 | 
						|
        serverName = boost::get<0>(st);
 | 
						|
    }
 | 
						|
    catch (...)
 | 
						|
    {
 | 
						|
        // o well, let's take out own action
 | 
						|
        if ( IPMI_SUPPORT == 0)
 | 
						|
            system("init 0");
 | 
						|
    }
 | 
						|
 | 
						|
    try
 | 
						|
    {
 | 
						|
        oam.shutdownServer(serverName, FORCEFUL, ACK_NO);
 | 
						|
    }
 | 
						|
    catch (exception& e)
 | 
						|
    {
 | 
						|
        // o well, let's take out own action
 | 
						|
        if ( IPMI_SUPPORT == 0)
 | 
						|
            system("init 0");
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
/******************************************************************************************
 | 
						|
* @brief	StripWhitespace
 | 
						|
*
 | 
						|
* purpose:	strip off whitespaces from a string
 | 
						|
*
 | 
						|
******************************************************************************************/
 | 
						|
string StripWhitespace(string value)
 | 
						|
{
 | 
						|
    for (;;)
 | 
						|
    {
 | 
						|
        string::size_type pos = value.find (' ', 0);
 | 
						|
 | 
						|
        if (pos == string::npos)
 | 
						|
            // no more found
 | 
						|
            break;
 | 
						|
 | 
						|
        // strip leading
 | 
						|
        if (pos == 0)
 | 
						|
        {
 | 
						|
            value = value.substr (pos + 1, 10000);
 | 
						|
        }
 | 
						|
        else
 | 
						|
        {
 | 
						|
            // strip trailing
 | 
						|
            value = value.substr (0, pos);
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    return value;
 | 
						|
}
 |