You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-11-02 06:13:16 +03:00
184 lines
4.6 KiB
C++
184 lines
4.6 KiB
C++
/* Copyright (C) 2014 InfiniDB, Inc.
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public License
|
|
as published by the Free Software Foundation; version 2 of
|
|
the License.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
|
MA 02110-1301, USA. */
|
|
|
|
/***************************************************************************
|
|
* $Id: procmonMonitor.cpp 34 2006-09-29 21:13:54Z dhill $
|
|
*
|
|
* Author: David Hill
|
|
***************************************************************************/
|
|
|
|
#include "serverMonitor.h"
|
|
|
|
using namespace std;
|
|
using namespace oam;
|
|
using namespace snmpmanager;
|
|
using namespace logging;
|
|
using namespace servermonitor;
|
|
using namespace messageqcpp;
|
|
|
|
/************************************************************************************************************
|
|
* @brief procmonMonitor function
|
|
*
|
|
* purpose: Monitor Local Process Monitor (like a local heartbeat check) abd reset when it's not responding
|
|
*
|
|
*
|
|
************************************************************************************************************/
|
|
|
|
void procmonMonitor()
|
|
{
|
|
ServerMonitor serverMonitor;
|
|
Oam oam;
|
|
|
|
//wait before monitoring is started
|
|
sleep(60);
|
|
|
|
// get current server name
|
|
string moduleName;
|
|
oamModuleInfo_t st;
|
|
try {
|
|
st = oam.getModuleInfo();
|
|
moduleName = boost::get<0>(st);
|
|
}
|
|
catch (...) {
|
|
// Critical error, Log this event and exit
|
|
LoggingID lid(SERVER_MONITOR_LOG_ID);
|
|
MessageLog ml(lid);
|
|
Message msg;
|
|
Message::Args args;
|
|
args.add("Failed to read local module Info");
|
|
msg.format(args);
|
|
ml.logCriticalMessage(msg);
|
|
exit(-1);
|
|
}
|
|
|
|
string msgPort = moduleName + "_ProcessMonitor";
|
|
|
|
int heartbeatCount = 0;
|
|
|
|
// loop forever monitoring Local Process Monitor
|
|
while(true)
|
|
{
|
|
|
|
ByteStream msg;
|
|
ByteStream::byte requestID = LOCALHEARTBEAT;
|
|
|
|
msg << requestID;
|
|
|
|
try
|
|
{
|
|
MessageQueueClient mqRequest(msgPort);
|
|
mqRequest.write(msg);
|
|
|
|
// wait 10 seconds for response
|
|
ByteStream::byte returnACK;
|
|
ByteStream::byte returnRequestID;
|
|
ByteStream::byte requestStatus;
|
|
ByteStream receivedMSG;
|
|
|
|
struct timespec ts = { 10, 0 };
|
|
try {
|
|
receivedMSG = mqRequest.read(&ts);
|
|
|
|
if (receivedMSG.length() > 0) {
|
|
receivedMSG >> returnACK;
|
|
receivedMSG >> returnRequestID;
|
|
receivedMSG >> requestStatus;
|
|
|
|
if ( returnACK == oam::ACK && returnRequestID == requestID) {
|
|
// ACK for this request
|
|
heartbeatCount = 0;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
LoggingID lid(SERVER_MONITOR_LOG_ID);
|
|
MessageLog ml(lid);
|
|
Message msg;
|
|
Message::Args args;
|
|
args.add("procmonMonitor: ProcMon Msg timeout!!!");
|
|
msg.format(args);
|
|
ml.logWarningMessage(msg);
|
|
|
|
heartbeatCount++;
|
|
|
|
if ( heartbeatCount > 2 ) {
|
|
//Process Monitor not responding, restart it
|
|
system("pkill ProcMon");
|
|
LoggingID lid(SERVER_MONITOR_LOG_ID);
|
|
MessageLog ml(lid);
|
|
Message msg;
|
|
Message::Args args;
|
|
args.add("procmonMonitor: Restarting ProcMon");
|
|
msg.format(args);
|
|
ml.logWarningMessage(msg);
|
|
|
|
sleep(60);
|
|
heartbeatCount = 0;
|
|
}
|
|
}
|
|
|
|
mqRequest.shutdown();
|
|
|
|
}
|
|
catch (SocketClosed &ex) {
|
|
string error = ex.what();
|
|
|
|
LoggingID lid(SERVER_MONITOR_LOG_ID);
|
|
MessageLog ml(lid);
|
|
Message msg;
|
|
Message::Args args;
|
|
args.add("procmonMonitor: EXCEPTION ERROR on mqRequest.read: " + error);
|
|
msg.format(args);
|
|
ml.logErrorMessage(msg);
|
|
}
|
|
catch (...) {
|
|
LoggingID lid(SERVER_MONITOR_LOG_ID);
|
|
MessageLog ml(lid);
|
|
Message msg;
|
|
Message::Args args;
|
|
args.add("procmonMonitor: EXCEPTION ERROR on mqRequest.read: Caught unknown exception");
|
|
msg.format(args);
|
|
ml.logErrorMessage(msg);
|
|
}
|
|
}
|
|
catch (exception& ex)
|
|
{
|
|
string error = ex.what();
|
|
|
|
LoggingID lid(SERVER_MONITOR_LOG_ID);
|
|
MessageLog ml(lid);
|
|
Message msg;
|
|
Message::Args args;
|
|
args.add("procmonMonitor: EXCEPTION ERROR on MessageQueueClient.read: " + error);
|
|
msg.format(args);
|
|
ml.logErrorMessage(msg);
|
|
}
|
|
catch(...)
|
|
{
|
|
LoggingID lid(SERVER_MONITOR_LOG_ID);
|
|
MessageLog ml(lid);
|
|
Message msg;
|
|
Message::Args args;
|
|
args.add("procmonMonitor: EXCEPTION ERROR on MessageQueueClient: Caught unknown exception");
|
|
msg.format(args);
|
|
ml.logErrorMessage(msg);
|
|
}
|
|
|
|
sleep(60);
|
|
} //while loop
|
|
}
|