/* Copyright (C) 2014 InfiniDB, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ /*************************************************************************** * $Id: procmonMonitor.cpp 34 2006-09-29 21:13:54Z dhill $ * * Author: David Hill ***************************************************************************/ #include "serverMonitor.h" using namespace std; using namespace oam; using namespace snmpmanager; using namespace logging; using namespace servermonitor; using namespace messageqcpp; /************************************************************************************************************ * @brief procmonMonitor function * * purpose: Monitor Local Process Monitor (like a local heartbeat check) abd reset when it's not responding * * ************************************************************************************************************/ void procmonMonitor() { ServerMonitor serverMonitor; Oam oam; //wait before monitoring is started sleep(60); // get current server name string moduleName; oamModuleInfo_t st; try { st = oam.getModuleInfo(); moduleName = boost::get<0>(st); } catch (...) { // Critical error, Log this event and exit LoggingID lid(SERVER_MONITOR_LOG_ID); MessageLog ml(lid); Message msg; Message::Args args; args.add("Failed to read local module Info"); msg.format(args); ml.logCriticalMessage(msg); exit(-1); } string msgPort = moduleName + "_ProcessMonitor"; int heartbeatCount = 0; // loop forever monitoring Local Process Monitor while(true) { ByteStream msg; ByteStream::byte requestID = LOCALHEARTBEAT; msg << requestID; try { MessageQueueClient mqRequest(msgPort); mqRequest.write(msg); // wait 10 seconds for response ByteStream::byte returnACK; ByteStream::byte returnRequestID; ByteStream::byte requestStatus; ByteStream receivedMSG; struct timespec ts = { 10, 0 }; try { receivedMSG = mqRequest.read(&ts); if (receivedMSG.length() > 0) { receivedMSG >> returnACK; receivedMSG >> returnRequestID; receivedMSG >> requestStatus; if ( returnACK == oam::ACK && returnRequestID == requestID) { // ACK for this request heartbeatCount = 0; } } else { LoggingID lid(SERVER_MONITOR_LOG_ID); MessageLog ml(lid); Message msg; Message::Args args; args.add("procmonMonitor: ProcMon Msg timeout!!!"); msg.format(args); ml.logWarningMessage(msg); heartbeatCount++; if ( heartbeatCount > 2 ) { //Process Monitor not responding, restart it system("pkill ProcMon"); LoggingID lid(SERVER_MONITOR_LOG_ID); MessageLog ml(lid); Message msg; Message::Args args; args.add("procmonMonitor: Restarting ProcMon"); msg.format(args); ml.logWarningMessage(msg); sleep(60); heartbeatCount = 0; } } mqRequest.shutdown(); } catch (SocketClosed &ex) { string error = ex.what(); LoggingID lid(SERVER_MONITOR_LOG_ID); MessageLog ml(lid); Message msg; Message::Args args; args.add("procmonMonitor: EXCEPTION ERROR on mqRequest.read: " + error); msg.format(args); ml.logErrorMessage(msg); } catch (...) { LoggingID lid(SERVER_MONITOR_LOG_ID); MessageLog ml(lid); Message msg; Message::Args args; args.add("procmonMonitor: EXCEPTION ERROR on mqRequest.read: Caught unknown exception"); msg.format(args); ml.logErrorMessage(msg); } } catch (exception& ex) { string error = ex.what(); LoggingID lid(SERVER_MONITOR_LOG_ID); MessageLog ml(lid); Message msg; Message::Args args; args.add("procmonMonitor: EXCEPTION ERROR on MessageQueueClient.read: " + error); msg.format(args); ml.logErrorMessage(msg); } catch(...) { LoggingID lid(SERVER_MONITOR_LOG_ID); MessageLog ml(lid); Message msg; Message::Args args; args.add("procmonMonitor: EXCEPTION ERROR on MessageQueueClient: Caught unknown exception"); msg.format(args); ml.logErrorMessage(msg); } sleep(60); } //while loop }