mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-04-18 21:44:02 +03:00
3697 lines
131 KiB
C++
3697 lines
131 KiB
C++
/* Copyright (C) 2014 InfiniDB, Inc.
|
|
Copyright (C) 2016 MariaDB Corporation
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public License
|
|
as published by the Free Software Foundation; version 2 of
|
|
the License.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
|
MA 02110-1301, USA. */
|
|
|
|
|
|
#include <boost/interprocess/shared_memory_object.hpp>
|
|
#include <boost/interprocess/mapped_region.hpp>
|
|
#include <boost/version.hpp>
|
|
namespace bi = boost::interprocess;
|
|
|
|
#include "processmonitor.h"
|
|
#include "installdir.h"
|
|
|
|
#include "IDBPolicy.h"
|
|
#include "utils_utf8.h"
|
|
#include "crashtrace.h"
|
|
#include "checks.h"
|
|
|
|
using namespace std;
|
|
using namespace messageqcpp;
|
|
using namespace processmonitor;
|
|
using namespace oam;
|
|
using namespace logging;
|
|
using namespace alarmmanager;
|
|
using namespace config;
|
|
using namespace idbdatafile;
|
|
|
|
//using namespace procheartbeat;
|
|
|
|
static void* messageThread(MonitorConfig* config);
|
|
static void* statusControlThread(void*);
|
|
static void* sigchldHandleThread(void*);
|
|
static void SIGCHLDHandler(int signal_number);
|
|
static void* chldHandleThread(MonitorConfig* config);
|
|
static void sigHupHandler(int sig);
|
|
static void* mysqlMonitorThread(MonitorConfig* config);
|
|
string systemOAM;
|
|
string dm_server;
|
|
string cloud;
|
|
string DataRedundancyConfig = "n";
|
|
bool HDFS = false;
|
|
|
|
void updateShareMemory(processStatusList* aPtr);
|
|
|
|
bool runStandby = false;
|
|
bool processInitComplete = false;
|
|
bool rootUser = true;
|
|
bool mainResumeFlag;
|
|
string USER = "root";
|
|
string PMwithUM = "n";
|
|
bool startProcMon = false;
|
|
string tmpLogDir;
|
|
string SUDO = "";
|
|
|
|
//extern std::string gOAMParentModuleName;
|
|
extern bool gOAMParentModuleFlag;
|
|
|
|
pthread_mutex_t STATUS_LOCK;
|
|
|
|
bool getshm(const string &name, int size, bi::shared_memory_object &target) {
|
|
MonitorLog log;
|
|
bool created = false;
|
|
try
|
|
{
|
|
bi::permissions perms;
|
|
perms.set_unrestricted();
|
|
bi::shared_memory_object shm(bi::create_only, name.c_str(), bi::read_write, perms);
|
|
created = true;
|
|
shm.truncate(size);
|
|
target.swap(shm);
|
|
}
|
|
catch (bi::interprocess_exception& biex)
|
|
{
|
|
if (biex.get_error_code() == bi::already_exists_error) {
|
|
try {
|
|
bi::shared_memory_object shm(bi::open_only, name.c_str(), bi::read_write);
|
|
target.swap(shm);
|
|
}
|
|
catch (exception &e) {
|
|
ostringstream os;
|
|
os << "ProcMon failed to attach to the " << name << " shared mem segment, got " << e.what();
|
|
log.writeLog(__LINE__, os.str(), LOG_TYPE_CRITICAL);
|
|
exit(1);
|
|
}
|
|
}
|
|
else {
|
|
ostringstream os;
|
|
os << "ProcMon failed to create the '" << name << "' shared mem segment, got " << biex.what() << ".";
|
|
os << " Check the permissions on /dev/shm; should be 1777";
|
|
log.writeLog(__LINE__, os.str(), LOG_TYPE_CRITICAL);
|
|
exit(1);
|
|
}
|
|
}
|
|
return created;
|
|
}
|
|
|
|
|
|
/******************************************************************************************
|
|
* @brief main
|
|
*
|
|
* purpose: Launch boot child processes and sit on read for incoming messages
|
|
*
|
|
******************************************************************************************/
|
|
int main(int argc, char** argv)
|
|
{
|
|
#ifndef _MSC_VER
|
|
setuid(0); // set effective ID to root; ignore return status
|
|
#endif
|
|
|
|
struct sigaction ign;
|
|
|
|
memset(&ign, 0, sizeof(ign));
|
|
ign.sa_handler = fatalHandler;
|
|
sigaction(SIGSEGV, &ign, 0);
|
|
sigaction(SIGABRT, &ign, 0);
|
|
sigaction(SIGFPE, &ign, 0);
|
|
|
|
if (argc > 1 && string(argv[1]) == "--daemon")
|
|
{
|
|
if (fork() != 0) return 0;
|
|
|
|
umask(0);
|
|
setsid();
|
|
chdir("/");
|
|
close(0);
|
|
close(1);
|
|
close(2);
|
|
open("/dev/null", O_RDONLY);
|
|
open("/dev/null", O_WRONLY);
|
|
open("/dev/null", O_WRONLY);
|
|
}
|
|
|
|
// setup environment for using HDFS.
|
|
idbdatafile::IDBPolicy::configIDBPolicy();
|
|
|
|
Oam oam;
|
|
MonitorLog log;
|
|
MonitorConfig config;
|
|
ProcessMonitor aMonitor(config, log);
|
|
|
|
log.writeLog(__LINE__, " ");
|
|
log.writeLog(__LINE__, "**********Process Monitor Started**********");
|
|
log.writeLog(__LINE__, " ", LOG_TYPE_DEBUG);
|
|
log.writeLog(__LINE__, "**********Process Monitor Started**********", LOG_TYPE_DEBUG);
|
|
|
|
//Ignore SIGPIPE signals
|
|
signal(SIGPIPE, SIG_IGN);
|
|
|
|
//create SIGHUP handler to get configuration updates
|
|
signal(SIGHUP, sigHupHandler);
|
|
|
|
//check if root-user
|
|
int user;
|
|
user = getuid();
|
|
|
|
if (user != 0)
|
|
{
|
|
rootUser = false;
|
|
SUDO = "sudo ";
|
|
}
|
|
|
|
char* p = getenv("USER");
|
|
|
|
if (p && *p)
|
|
USER = p;
|
|
|
|
// Set locale language
|
|
setlocale(LC_ALL, "");
|
|
setlocale(LC_NUMERIC, "C");
|
|
|
|
//get tmp log directory
|
|
tmpLogDir = startup::StartUp::tmpDir();
|
|
|
|
string cmd = "mkdir -p " + tmpLogDir;
|
|
system(cmd.c_str());
|
|
|
|
// create message thread
|
|
pthread_t MessageThread;
|
|
int ret = pthread_create (&MessageThread, NULL, (void*(*)(void*))&messageThread, &config);
|
|
|
|
if ( ret != 0 )
|
|
{
|
|
log.writeLog(__LINE__, "pthread_create failed, exiting..., return code = " + oam.itoa(ret), LOG_TYPE_CRITICAL);
|
|
string cmd = "columnstore stop > /dev/null 2>&1";
|
|
system(cmd.c_str());
|
|
exit(1);
|
|
}
|
|
|
|
//check if this is a fresh install, meaning the Columnstore.xml file is not setup
|
|
//if so, wait for messages from Procmgr to start us up
|
|
Config* sysConfig = Config::makeConfig();
|
|
string exemgrIpadd = sysConfig->getConfig("ExeMgr1", "IPAddr");
|
|
|
|
if ( exemgrIpadd == "0.0.0.0" )
|
|
{
|
|
int count = 0;
|
|
|
|
while (true)
|
|
{
|
|
if ( startProcMon )
|
|
break;
|
|
else
|
|
{
|
|
count++;
|
|
|
|
if (count > 10 )
|
|
{
|
|
count = 0;
|
|
log.writeLog(__LINE__, "Waiting for ProcMgr to start up", LOG_TYPE_DEBUG);
|
|
}
|
|
|
|
sleep(1);
|
|
}
|
|
}
|
|
|
|
//re-read local system info with updated Columnstore.xml
|
|
sleep(1);
|
|
// Config* sysConfig = Config::makeConfig();
|
|
MonitorConfig config;
|
|
|
|
//PMwithUM config
|
|
try
|
|
{
|
|
oam.getSystemConfig( "PMwithUM", PMwithUM);
|
|
}
|
|
catch (...)
|
|
{
|
|
PMwithUM = "n";
|
|
}
|
|
|
|
string modType = config.moduleType();
|
|
|
|
string mysqlpw = oam.getMySQLPassword();
|
|
|
|
string passwordOption = "";
|
|
if ( mysqlpw != oam::UnassignedName )
|
|
passwordOption = " --password=" + mysqlpw;
|
|
|
|
|
|
//run the module install script
|
|
string cmd = "columnstore_module_installer.sh --module=" + modType + " " + passwordOption + " > " + tmpLogDir + "/module_installer.log 2>&1";
|
|
log.writeLog(__LINE__, "run columnstore_module_installer.sh", LOG_TYPE_DEBUG);
|
|
log.writeLog(__LINE__, cmd, LOG_TYPE_DEBUG);
|
|
|
|
int ret = system(cmd.c_str());
|
|
|
|
if ( ret != 0 )
|
|
{
|
|
log.writeLog(__LINE__, "columnstore_module_installer.sh error, exiting..., return code = " + oam.itoa(ret), LOG_TYPE_CRITICAL);
|
|
string cmd = "columnstore stop > /dev/null 2>&1";
|
|
system(cmd.c_str());
|
|
exit(1);
|
|
}
|
|
|
|
//exit to allow ProcMon to restart in a setup state
|
|
log.writeLog(__LINE__, "restarting for a initial setup", LOG_TYPE_DEBUG);
|
|
|
|
exit (0);
|
|
}
|
|
|
|
// if amazon cloud, check and update Instance IP Addresses and volumes
|
|
try
|
|
{
|
|
oam.getSystemConfig( "Cloud", cloud);
|
|
log.writeLog(__LINE__, "Cloud setting = " + cloud, LOG_TYPE_DEBUG);
|
|
}
|
|
catch (...) {}
|
|
|
|
if ( cloud == "amazon-ec2" || cloud == "amazon-vpc" )
|
|
{
|
|
if (!aMonitor.amazonIPCheck())
|
|
{
|
|
log.writeLog(__LINE__, "ERROR: amazonIPCheck failed, exiting", LOG_TYPE_CRITICAL);
|
|
sleep(2);
|
|
string cmd = "columnstore stop > /dev/null 2>&1";
|
|
system(cmd.c_str());
|
|
exit(1);
|
|
}
|
|
}
|
|
|
|
//get gluster config
|
|
try
|
|
{
|
|
oam.getSystemConfig( "DataRedundancyConfig", DataRedundancyConfig);
|
|
}
|
|
catch (...)
|
|
{
|
|
DataRedundancyConfig = "n";
|
|
}
|
|
|
|
if ( DataRedundancyConfig == "y" )
|
|
{
|
|
system("mount -a > /dev/null 2>&1");
|
|
}
|
|
|
|
//hdfs / hadoop config
|
|
string DBRootStorageType;
|
|
|
|
try
|
|
{
|
|
oam.getSystemConfig( "DBRootStorageType", DBRootStorageType);
|
|
}
|
|
catch (...) {}
|
|
|
|
if ( DBRootStorageType == "hdfs" )
|
|
HDFS = true;
|
|
|
|
//PMwithUM config
|
|
try
|
|
{
|
|
oam.getSystemConfig( "PMwithUM", PMwithUM);
|
|
}
|
|
catch (...)
|
|
{
|
|
PMwithUM = "n";
|
|
}
|
|
|
|
//define entry if missing
|
|
if ( gOAMParentModuleFlag )
|
|
{
|
|
string PrimaryUMModuleName;
|
|
|
|
try
|
|
{
|
|
oam.getSystemConfig("PrimaryUMModuleName", PrimaryUMModuleName);
|
|
}
|
|
catch (...)
|
|
{
|
|
sysConfig->setConfig("SystemConfig", "PrimaryUMModuleName", oam::UnassignedName);
|
|
sysConfig->write();
|
|
}
|
|
}
|
|
|
|
if ( config.moduleType() == "pm" )
|
|
{
|
|
if ( gOAMParentModuleFlag )
|
|
log.writeLog(__LINE__, "ProcMon: Starting as ACTIVE Parent", LOG_TYPE_DEBUG);
|
|
else
|
|
log.writeLog(__LINE__, "ProcMon: Starting as NON-ACTIVE Parent", LOG_TYPE_DEBUG);
|
|
}
|
|
|
|
//create and mount data directories
|
|
aMonitor.createDataDirs(cloud);
|
|
|
|
//check if this module is recovering after a reboot for an active OAM parent state
|
|
ByteStream msg;
|
|
ByteStream::byte requestID = GETPARENTOAMMODULE;
|
|
msg << requestID;
|
|
|
|
int moduleStatus = oam::ACTIVE;
|
|
|
|
//check if currently configured as Parent OAM Module on startup
|
|
if ( gOAMParentModuleFlag )
|
|
{
|
|
try
|
|
{
|
|
oam.getSystemConfig( "DBRootStorageType", DBRootStorageType);
|
|
}
|
|
catch (...) {}
|
|
|
|
if ( ( config.OAMStandbyName() != oam::UnassignedName ) &&
|
|
DBRootStorageType != "internal" )
|
|
{
|
|
//try for 20 minutes checking if the standby node is up
|
|
string parentOAMModule;
|
|
log.writeLog(__LINE__, "starting has parent, double check. checking with old Standby Module", LOG_TYPE_DEBUG);
|
|
int count = 0;
|
|
|
|
for (; count < 120 ; count++)
|
|
{
|
|
parentOAMModule = aMonitor.sendMsgProcMon1( config.OAMStandbyName(), msg, requestID );
|
|
|
|
if ( parentOAMModule != "FAILED" )
|
|
break;
|
|
|
|
log.writeLog(__LINE__, "Standby PM not responding, retrying", LOG_TYPE_WARNING);
|
|
sleep(10);
|
|
}
|
|
|
|
// check if standby never replied, if so, shutdown
|
|
if ( count >= 120 )
|
|
{
|
|
log.writeLog(__LINE__, "Standby PM not responding, ColumnStore shutting down", LOG_TYPE_CRITICAL);
|
|
//Set the alarm
|
|
// aMonitor.sendAlarm(config.moduleName().c_str(), STARTUP_DIAGNOTICS_FAILURE, SET);
|
|
// sleep (1);
|
|
|
|
string cmd = "columnstore stop > /dev/null 2>&1";
|
|
|
|
system(cmd.c_str());
|
|
}
|
|
|
|
log.writeLog(__LINE__, "Old Standby has moduleparentOAMModule = " + parentOAMModule, LOG_TYPE_DEBUG);
|
|
|
|
if ( parentOAMModule != config.moduleName() )
|
|
{
|
|
gOAMParentModuleFlag = false;
|
|
|
|
log.writeLog(__LINE__, "NOT Parent OAM Module", LOG_TYPE_DEBUG);
|
|
log.writeLog(__LINE__, "NOT Parent OAM Module");
|
|
|
|
try
|
|
{
|
|
Config* sysConfig = Config::makeConfig();
|
|
|
|
// get Standby IP address
|
|
ModuleConfig moduleconfig;
|
|
oam.getSystemConfig(config.OAMStandbyName(), moduleconfig);
|
|
HostConfigList::iterator pt1 = moduleconfig.hostConfigList.begin();
|
|
string IPaddr = (*pt1).IPAddr;
|
|
|
|
sysConfig->setConfig("ProcMgr", "IPAddr", IPaddr);
|
|
sysConfig->setConfig("ProcMgr_Alarm", "IPAddr", IPaddr);
|
|
|
|
log.writeLog(__LINE__, "set ProcMgr IPaddr to Old Standby Module: " + IPaddr, LOG_TYPE_DEBUG);
|
|
//update MariaDB ColumnStore Config table
|
|
try
|
|
{
|
|
sysConfig->write();
|
|
sleep(1);
|
|
}
|
|
catch (...)
|
|
{
|
|
log.writeLog(__LINE__, "ERROR: sysConfig->write", LOG_TYPE_ERROR);
|
|
}
|
|
}
|
|
catch (...)
|
|
{
|
|
log.writeLog(__LINE__, "ERROR: makeConfig failed", LOG_TYPE_ERROR);
|
|
}
|
|
|
|
// get updated Columnstore.xml and ProcessConfig.xml, retry in case ProcMgr isn't up yet
|
|
if (!HDFS)
|
|
{
|
|
int count = 0;
|
|
|
|
while (true)
|
|
{
|
|
try
|
|
{
|
|
oam.distributeConfigFile(config.moduleName());
|
|
log.writeLog(__LINE__, "Successfull return from distributeConfigFile", LOG_TYPE_DEBUG);
|
|
|
|
oam.distributeConfigFile(config.moduleName(), "ProcessConfig.xml");
|
|
log.writeLog(__LINE__, "Successfull return from distributeProcessFile", LOG_TYPE_DEBUG);
|
|
break;
|
|
}
|
|
catch (...)
|
|
{
|
|
count++;
|
|
|
|
if (count > 10 )
|
|
{
|
|
count = 0;
|
|
log.writeLog(__LINE__, "error return from distributeConfigFile, waiting for Active ProcMgr to start", LOG_TYPE_DEBUG);
|
|
}
|
|
|
|
sleep(1);
|
|
}
|
|
}
|
|
}
|
|
|
|
// not OAM parent module, delay starting until a successful get status is performed
|
|
// makes sure the Parent OAM ProcMon is fully ready
|
|
while (true)
|
|
{
|
|
try
|
|
{
|
|
bool degraded;
|
|
oam.getModuleStatus(config.moduleName(), moduleStatus, degraded);
|
|
|
|
// if HDFS, wait until module state is MAN_INIT before continuing
|
|
if (HDFS)
|
|
{
|
|
if ( moduleStatus == oam::MAN_INIT)
|
|
break;
|
|
}
|
|
|
|
break;
|
|
}
|
|
catch (...)
|
|
{
|
|
log.writeLog(__LINE__, "waiting for good return from getModuleStatus", LOG_TYPE_DEBUG);
|
|
sleep (1);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// not active Parent, get updated Columnstore.xml, retry in case ProcMgr isn't up yet
|
|
if (!HDFS)
|
|
{
|
|
int count = 0;
|
|
|
|
while (true)
|
|
{
|
|
try
|
|
{
|
|
oam.distributeConfigFile(config.moduleName());
|
|
log.writeLog(__LINE__, "Successfull return from distributeConfigFile", LOG_TYPE_DEBUG);
|
|
|
|
oam.distributeConfigFile(config.moduleName(), "ProcessConfig.xml");
|
|
log.writeLog(__LINE__, "Successfull return from distributeProcessFile", LOG_TYPE_DEBUG);
|
|
|
|
break;
|
|
}
|
|
catch (...)
|
|
{
|
|
count++;
|
|
|
|
if (count > 10 )
|
|
{
|
|
count = 0;
|
|
log.writeLog(__LINE__, "error return from distributeConfigFile, waiting for Active ProcMgr to start", LOG_TYPE_DEBUG);
|
|
}
|
|
|
|
sleep(1);
|
|
}
|
|
}
|
|
}
|
|
|
|
// not OAM parent module, delay starting until a successful get status is performed
|
|
// makes sure the Parent OAM ProcMon is fully ready
|
|
while (true)
|
|
{
|
|
try
|
|
{
|
|
bool degraded;
|
|
oam.getModuleStatus(config.moduleName(), moduleStatus, degraded);
|
|
|
|
// if HDFS, wait until module state is MAN_INIT before continuing
|
|
if (HDFS)
|
|
{
|
|
if ( moduleStatus == oam::MAN_INIT)
|
|
break;
|
|
}
|
|
|
|
break;
|
|
}
|
|
catch (...)
|
|
{
|
|
log.writeLog(__LINE__, "waiting for good return from getModuleStatus", LOG_TYPE_DEBUG);
|
|
sleep (1);
|
|
}
|
|
}
|
|
}
|
|
|
|
// this will occur on non-distributed installs the first time ProcMon runs
|
|
if ( config.OAMParentName() == oam::UnassignedName )
|
|
{
|
|
cerr << endl << "OAMParentModuleName == oam::UnassignedName, exiting " << endl;
|
|
log.writeLog(__LINE__, "OAMParentModuleName == oam::UnassignedName, restarting");
|
|
exit (1);
|
|
}
|
|
|
|
//check if module is in a DISABLED state
|
|
bool DISABLED = false;
|
|
|
|
if ( moduleStatus == oam::MAN_DISABLED ||
|
|
moduleStatus == oam::AUTO_DISABLED )
|
|
DISABLED = true;
|
|
|
|
if ( config.moduleType() == "pm" )
|
|
{
|
|
int retry = 0;
|
|
|
|
for ( ; retry < 20 ; retry++ )
|
|
{
|
|
int ret = aMonitor.checkDataMount();
|
|
|
|
if ( ret == oam::API_SUCCESS)
|
|
break;
|
|
|
|
if (ret == API_INVALID_PARAMETER)
|
|
{
|
|
//no dbroots assigned, treat as disabled
|
|
if ( !DISABLED )
|
|
DISABLED = true;
|
|
}
|
|
|
|
if ( DISABLED )
|
|
{
|
|
log.writeLog(__LINE__, "ERROR: checkDataMount to failed, module is disabled, continuing", LOG_TYPE_WARNING);
|
|
break;
|
|
}
|
|
else
|
|
log.writeLog(__LINE__, "ERROR: checkDataMount to failed, retrying", LOG_TYPE_WARNING);
|
|
|
|
//send notification about the mount setup failure
|
|
oam.sendDeviceNotification(config.moduleName(), DBROOT_MOUNT_FAILURE);
|
|
sleep(30);
|
|
}
|
|
|
|
if ( retry == 20 )
|
|
{
|
|
log.writeLog(__LINE__, "Check DB mounts failed, shutting down", LOG_TYPE_CRITICAL);
|
|
|
|
//Set the alarm
|
|
// aMonitor.sendAlarm(config.moduleName().c_str(), STARTUP_DIAGNOTICS_FAILURE, SET);
|
|
// sleep (1);
|
|
string cmd = "columnstore stop > /dev/null 2>&1";
|
|
system(cmd.c_str());
|
|
}
|
|
|
|
if ( !gOAMParentModuleFlag )
|
|
{
|
|
runStandby = true;
|
|
// delete any old active alarm log file
|
|
unlink ("/var/log/mariadb/columnstore/activeAlarms");
|
|
}
|
|
|
|
//Clear mainResumeFlag
|
|
|
|
mainResumeFlag = false;
|
|
|
|
//launch Status table control thread on 'pm' modules
|
|
pthread_t statusThread;
|
|
int ret = pthread_create (&statusThread, NULL, &statusControlThread, NULL);
|
|
|
|
if ( ret != 0 )
|
|
log.writeLog(__LINE__, "pthread_create failed, return code = " + oam.itoa(ret), LOG_TYPE_ERROR);
|
|
|
|
//wait for flag to be set
|
|
|
|
while (!mainResumeFlag)
|
|
{
|
|
log.writeLog(__LINE__, "WAITING FOR mainResumeFlag to be set", LOG_TYPE_DEBUG);
|
|
|
|
sleep(1);
|
|
}
|
|
}
|
|
|
|
SystemStatus systemstatus;
|
|
|
|
try
|
|
{
|
|
oam.getSystemStatus(systemstatus, false);
|
|
}
|
|
catch (...)
|
|
{
|
|
}
|
|
|
|
// determine Standby OAM Module, if needed
|
|
if ( gOAMParentModuleFlag &&
|
|
config.OAMStandbyName() == oam::UnassignedName &&
|
|
config.ServerInstallType() != oam::INSTALL_COMBINE_DM_UM_PM )
|
|
{
|
|
|
|
|
|
string standbyPM = "";
|
|
|
|
//check if gluster, if so then find PMs that have copies of DBROOT #1
|
|
string pmList = "";
|
|
|
|
if (DataRedundancyConfig == "y")
|
|
{
|
|
|
|
try
|
|
{
|
|
string errmsg;
|
|
oam.glusterctl(oam::GLUSTER_WHOHAS, "1", pmList, errmsg);
|
|
|
|
log.writeLog(__LINE__, "glusterctl called :" + pmList, LOG_TYPE_DEBUG);
|
|
|
|
boost::char_separator<char> sep(" ");
|
|
boost::tokenizer< boost::char_separator<char> > tokens(pmList, sep);
|
|
|
|
for ( boost::tokenizer< boost::char_separator<char> >::iterator it = tokens.begin();
|
|
it != tokens.end();
|
|
++it)
|
|
{
|
|
string pm = "pm" + *it;
|
|
|
|
// skip if current module
|
|
if ( pm == config.moduleName() )
|
|
continue;
|
|
|
|
int opState;
|
|
bool degraded;
|
|
|
|
try
|
|
{
|
|
oam.getModuleStatus(pm, opState, degraded);
|
|
}
|
|
catch (...)
|
|
{}
|
|
|
|
if (opState == oam::MAN_DISABLED || opState == oam::AUTO_DISABLED)
|
|
{
|
|
continue;
|
|
}
|
|
else
|
|
{
|
|
standbyPM = pm;
|
|
break;
|
|
}
|
|
}
|
|
|
|
}
|
|
catch (...)
|
|
{}
|
|
}
|
|
else
|
|
{
|
|
for ( unsigned int i = 0 ; i < systemstatus.systemmodulestatus.modulestatus.size(); i++)
|
|
{
|
|
string moduleName = systemstatus.systemmodulestatus.modulestatus[i].Module;
|
|
|
|
if ( moduleName.substr(0, MAX_MODULE_TYPE_SIZE) == "pm" &&
|
|
moduleName != config.moduleName() )
|
|
{
|
|
// multi pm system
|
|
int moduleStatus = systemstatus.systemmodulestatus.modulestatus[i].ModuleOpState;
|
|
|
|
if ( moduleStatus != oam::MAN_DISABLED &&
|
|
moduleStatus != oam::AUTO_DISABLED )
|
|
{
|
|
|
|
standbyPM = moduleName;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if ( standbyPM != "" )
|
|
{
|
|
// found a standby candidate
|
|
oam.setSystemConfig("StandbyOAMModuleName", standbyPM);
|
|
|
|
// update Standby IP Address
|
|
ModuleConfig moduleconfig;
|
|
oam.getSystemConfig(standbyPM, moduleconfig);
|
|
HostConfigList::iterator pt1 = moduleconfig.hostConfigList.begin();
|
|
string standbyIPaddr = (*pt1).IPAddr;
|
|
|
|
Config* sysConfig2 = Config::makeConfig();
|
|
|
|
sysConfig2->setConfig("ProcStatusControlStandby", "IPAddr", standbyIPaddr);
|
|
sysConfig2->write();
|
|
|
|
oam.setHotStandbyPM(standbyIPaddr);
|
|
|
|
log.writeLog(__LINE__, "Columnstore.xml Standby OAM updated : " + standbyPM + ":" + standbyIPaddr, LOG_TYPE_DEBUG);
|
|
log.writeLog(__LINE__, "Set Standby Module = " + standbyPM, LOG_TYPE_DEBUG);
|
|
|
|
try
|
|
{
|
|
oam.distributeConfigFile(config.moduleName());
|
|
log.writeLog(__LINE__, "successfull return from distributeConfigFile", LOG_TYPE_DEBUG);
|
|
}
|
|
catch (...)
|
|
{}
|
|
}
|
|
}
|
|
|
|
// non Parent Module, don't start until process-manager is up on parent module
|
|
// away to control starting mutliple Active Process-Managers
|
|
if ( !gOAMParentModuleFlag && config.moduleType() == "pm" )
|
|
{
|
|
string parentOAMModuleName;
|
|
|
|
while (true)
|
|
{
|
|
try
|
|
{
|
|
Config* sysConfig = Config::makeConfig();
|
|
parentOAMModuleName = sysConfig->getConfig("SystemConfig", "ParentOAMModuleName");
|
|
|
|
if ( parentOAMModuleName != oam::UnassignedName )
|
|
break;
|
|
|
|
sleep(1);
|
|
log.writeLog(__LINE__, "Waiting for process-manager on parent module", LOG_TYPE_ERROR);
|
|
}
|
|
catch (...)
|
|
{
|
|
log.writeLog(__LINE__, "Problem getting the ParentOAMModuleName key from the Columnstore System Configuration file", LOG_TYPE_CRITICAL);
|
|
exit(1);
|
|
}
|
|
}
|
|
|
|
while (true)
|
|
{
|
|
try
|
|
{
|
|
Oam oam;
|
|
ProcessStatus procstat;
|
|
oam.getProcessStatus("ProcessManager", parentOAMModuleName, procstat);
|
|
|
|
if ( procstat.ProcessOpState == oam::ACTIVE )
|
|
break;
|
|
|
|
sleep(1);
|
|
log.writeLog(__LINE__, "Waiting for process-manager to go ACTIVE", LOG_TYPE_DEBUG);
|
|
}
|
|
catch (exception& ex)
|
|
{
|
|
// string error = ex.what();
|
|
// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: " + error, LOG_TYPE_ERROR);
|
|
}
|
|
catch (...)
|
|
{
|
|
// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: Caught unknown exception!", LOG_TYPE_ERROR);
|
|
}
|
|
}
|
|
}
|
|
|
|
//Mark this process AUTO-OFFLINE
|
|
aMonitor.updateProcessInfo("ProcessMonitor", oam::AUTO_OFFLINE, getpid());
|
|
|
|
//handle SIGCHLD signal
|
|
pthread_t signalThread;
|
|
ret = pthread_create (&signalThread, NULL, &sigchldHandleThread, NULL);
|
|
|
|
if ( ret != 0 )
|
|
log.writeLog(__LINE__, "pthread_create failed, return code = " + oam.itoa(ret), LOG_TYPE_ERROR);
|
|
|
|
//mysqld status monitor thread
|
|
if ( config.moduleType() == "um" ||
|
|
( config.moduleType() == "pm" && config.ServerInstallType() == oam::INSTALL_COMBINE_DM_UM_PM ) ||
|
|
( config.moduleType() == "pm" && PMwithUM == "y") )
|
|
{
|
|
pthread_t mysqlThread;
|
|
ret = pthread_create (&mysqlThread, NULL, (void*(*)(void*))&mysqlMonitorThread, NULL);
|
|
|
|
if ( ret != 0 )
|
|
log.writeLog(__LINE__, "pthread_create failed, return code = " + oam.itoa(ret), LOG_TYPE_ERROR);
|
|
}
|
|
|
|
//update syslog file priviledges
|
|
aMonitor.changeModLog();
|
|
|
|
//Read ProcessConfig file to get process list belong to this process monitor
|
|
SystemProcessConfig systemprocessconfig;
|
|
|
|
try
|
|
{
|
|
oam.getProcessConfig(systemprocessconfig);
|
|
}
|
|
catch (exception& ex)
|
|
{
|
|
string error = ex.what();
|
|
// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessConfig: " + error, LOG_TYPE_ERROR);
|
|
}
|
|
catch (...)
|
|
{
|
|
// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessConfig: Caught unknown exception!", LOG_TYPE_ERROR);
|
|
}
|
|
|
|
string OAMParentModuleType = config.OAMParentName().substr(0, 2);
|
|
|
|
//Build a map for application name tag and launch ID for this Process-Monitor
|
|
for ( unsigned int i = 0 ; i < systemprocessconfig.processconfig.size(); i++)
|
|
{
|
|
//skip if both BootLaunch and LaunchID are 0
|
|
if ( systemprocessconfig.processconfig[i].BootLaunch == 0 &&
|
|
systemprocessconfig.processconfig[i].LaunchID == 0 )
|
|
continue;
|
|
|
|
if ( (systemprocessconfig.processconfig[i].ModuleType == config.moduleType() ) ||
|
|
( systemprocessconfig.processconfig[i].ModuleType == "um" &&
|
|
config.moduleType() == "pm" && PMwithUM == "y") ||
|
|
( systemprocessconfig.processconfig[i].ModuleType == "ChildExtOAMModule") ||
|
|
( systemprocessconfig.processconfig[i].ModuleType == "ChildOAMModule" ) ||
|
|
( systemprocessconfig.processconfig[i].ModuleType == "ParentOAMModule" &&
|
|
config.moduleType() == OAMParentModuleType ) )
|
|
{
|
|
// If Process Monitor, update local state
|
|
if ( systemprocessconfig.processconfig[i].ProcessName == "ProcessMonitor")
|
|
{
|
|
config.buildList(systemprocessconfig.processconfig[i].ModuleType,
|
|
systemprocessconfig.processconfig[i].ProcessName,
|
|
systemprocessconfig.processconfig[i].ProcessLocation,
|
|
systemprocessconfig.processconfig[i].ProcessArgs,
|
|
systemprocessconfig.processconfig[i].LaunchID,
|
|
getpid(),
|
|
oam::AUTO_OFFLINE,
|
|
systemprocessconfig.processconfig[i].BootLaunch,
|
|
systemprocessconfig.processconfig[i].RunType,
|
|
systemprocessconfig.processconfig[i].DepProcessName,
|
|
systemprocessconfig.processconfig[i].DepModuleName,
|
|
systemprocessconfig.processconfig[i].LogFile);
|
|
}
|
|
else
|
|
{
|
|
if ( systemprocessconfig.processconfig[i].ModuleType == "um" &&
|
|
config.moduleType() == "pm" && PMwithUM == "y" &&
|
|
systemprocessconfig.processconfig[i].ProcessName == "DMLProc" )
|
|
continue;
|
|
|
|
|
|
if ( systemprocessconfig.processconfig[i].ModuleType == "um" &&
|
|
config.moduleType() == "pm" && PMwithUM == "y" &&
|
|
systemprocessconfig.processconfig[i].ProcessName == "DDLProc" )
|
|
continue;
|
|
|
|
// Get Last Known Process Status and PID
|
|
int state = oam::AUTO_OFFLINE;
|
|
int PID = 0;
|
|
|
|
try
|
|
{
|
|
Oam oam;
|
|
ProcessStatus procstat;
|
|
oam.getProcessStatus(systemprocessconfig.processconfig[i].ProcessName, config.moduleName(), procstat);
|
|
state = procstat.ProcessOpState;
|
|
PID = procstat.ProcessID;
|
|
}
|
|
catch (exception& ex)
|
|
{
|
|
// string error = ex.what();
|
|
// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: " + error, LOG_TYPE_ERROR);
|
|
}
|
|
catch (...)
|
|
{
|
|
// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: Caught unknown exception!", LOG_TYPE_ERROR);
|
|
}
|
|
|
|
config.buildList(systemprocessconfig.processconfig[i].ModuleType,
|
|
systemprocessconfig.processconfig[i].ProcessName,
|
|
systemprocessconfig.processconfig[i].ProcessLocation,
|
|
systemprocessconfig.processconfig[i].ProcessArgs,
|
|
systemprocessconfig.processconfig[i].LaunchID,
|
|
PID,
|
|
state,
|
|
systemprocessconfig.processconfig[i].BootLaunch,
|
|
systemprocessconfig.processconfig[i].RunType,
|
|
systemprocessconfig.processconfig[i].DepProcessName,
|
|
systemprocessconfig.processconfig[i].DepModuleName,
|
|
systemprocessconfig.processconfig[i].LogFile);
|
|
}
|
|
}
|
|
}
|
|
|
|
log.writeLog(__LINE__, "SYSTEM STATUS = " + oam.itoa(systemstatus.SystemOpState), LOG_TYPE_DEBUG);
|
|
|
|
if ( systemstatus.SystemOpState != MAN_OFFLINE && !DISABLED)
|
|
{
|
|
|
|
// Loop through the process list to check the process current state
|
|
// Launch the Processes controlled by the Process-Monitor
|
|
processList::iterator listPtr;
|
|
processList* aPtr = config.monitoredListPtr();
|
|
listPtr = aPtr->begin();
|
|
|
|
for (; listPtr != aPtr->end(); ++listPtr)
|
|
{
|
|
// If Process Monitor, skip
|
|
if ( (*listPtr).ProcessName == "ProcessMonitor")
|
|
continue;
|
|
|
|
if ((*listPtr).processID != 0)
|
|
{
|
|
if ((*listPtr).BootLaunch == BOOT_LAUNCH)
|
|
{
|
|
//Check for SIMPLEX runtype processes
|
|
int initType = aMonitor.checkSpecialProcessState( (*listPtr).ProcessName, (*listPtr).RunType, (*listPtr).ProcessModuleType );
|
|
|
|
if ( initType == oam::COLD_STANDBY )
|
|
{
|
|
//there is a mate active, skip
|
|
(*listPtr).state = oam::COLD_STANDBY;
|
|
// sleep(1);
|
|
continue;
|
|
}
|
|
else if ( initType == oam::MAN_INIT )
|
|
initType = oam::AUTO_INIT;
|
|
|
|
//Check the process current state
|
|
if ((kill((*listPtr).processID, 0)) != 0
|
|
&& (*listPtr).state != oam::MAN_OFFLINE)
|
|
{
|
|
//The process died, start the process, reset the pid and time
|
|
|
|
//Set the alarm
|
|
aMonitor.sendAlarm((*listPtr).ProcessName.c_str(), PROCESS_DOWN_AUTO, SET);
|
|
|
|
//stop the process first to make sure it's gone
|
|
aMonitor.stopProcess((*listPtr).processID,
|
|
(*listPtr).ProcessName,
|
|
(*listPtr).ProcessLocation,
|
|
oam::FORCEFUL,
|
|
false);
|
|
|
|
//Start the process
|
|
(*listPtr).processID = aMonitor.startProcess( (*listPtr).ProcessModuleType,
|
|
(*listPtr).ProcessName,
|
|
(*listPtr).ProcessLocation,
|
|
(*listPtr).ProcessArgs,
|
|
(*listPtr).launchID,
|
|
(*listPtr).BootLaunch,
|
|
(*listPtr).RunType,
|
|
(*listPtr).DepProcessName,
|
|
(*listPtr).DepModuleName,
|
|
(*listPtr).LogFile,
|
|
initType);
|
|
|
|
// StorageManager doesn't send the "I'm online" msg to Proc*.
|
|
// Just mark it active for now. TODO: make it use the ping fcn in IDB* instead.
|
|
if (listPtr->ProcessName == "StorageManager")
|
|
oam.setProcessStatus("StorageManager", boost::get<0>(oam.getModuleInfo()),
|
|
oam::ACTIVE, listPtr->processID);
|
|
|
|
string restartStatus;
|
|
|
|
if ( (*listPtr).processID == oam::API_MINOR_FAILURE ||
|
|
(*listPtr).processID == oam::API_FAILURE )
|
|
// restart failed
|
|
string restartStatus = " restart failed!!";
|
|
else
|
|
string restartStatus = " restarted successfully!!";
|
|
|
|
log.writeLog(__LINE__, restartStatus, LOG_TYPE_INFO);
|
|
}
|
|
}
|
|
}
|
|
else if ((*listPtr).BootLaunch == BOOT_LAUNCH)
|
|
{
|
|
//Check for SIMPLEX runtype processes
|
|
int initType = aMonitor.checkSpecialProcessState( (*listPtr).ProcessName, (*listPtr).RunType, (*listPtr).ProcessModuleType );
|
|
|
|
if ( initType == oam::COLD_STANDBY )
|
|
{
|
|
//there is a mate active, skip
|
|
(*listPtr).state = oam::COLD_STANDBY;
|
|
sleep(1);
|
|
continue;
|
|
}
|
|
else if ( initType == oam::MAN_INIT )
|
|
initType = oam::AUTO_INIT;
|
|
|
|
if ((*listPtr).state == oam::MAN_OFFLINE)
|
|
continue;
|
|
|
|
//stop the process first to make sure it's gone
|
|
aMonitor.stopProcess((*listPtr).processID,
|
|
(*listPtr).ProcessName,
|
|
(*listPtr).ProcessLocation,
|
|
oam::FORCEFUL,
|
|
false);
|
|
|
|
//Start the boot time processes, set its state, ProcessID
|
|
(*listPtr).processID = aMonitor.startProcess((*listPtr).ProcessModuleType,
|
|
(*listPtr).ProcessName,
|
|
(*listPtr).ProcessLocation,
|
|
(*listPtr).ProcessArgs,
|
|
(*listPtr).launchID,
|
|
(*listPtr).BootLaunch,
|
|
(*listPtr).RunType,
|
|
(*listPtr).DepProcessName,
|
|
(*listPtr).DepModuleName,
|
|
(*listPtr).LogFile,
|
|
initType);
|
|
|
|
// StorageManager doesn't send the "I'm online" msg to Proc*.
|
|
// Just mark it active for now. TODO: make it use the ping fcn in IDB* instead.
|
|
if (listPtr->ProcessName == "StorageManager")
|
|
oam.setProcessStatus("StorageManager", boost::get<0>(oam.getModuleInfo()),
|
|
oam::ACTIVE, listPtr->processID);
|
|
|
|
string restartStatus;
|
|
|
|
if ( (*listPtr).processID == oam::API_MINOR_FAILURE ||
|
|
(*listPtr).processID == oam::API_FAILURE )
|
|
// restart failed
|
|
string restartStatus = " restart failed!!";
|
|
else
|
|
string restartStatus = " restarted successfully!!";
|
|
|
|
log.writeLog(__LINE__, restartStatus, LOG_TYPE_INFO);
|
|
}
|
|
} //end of for loop
|
|
}
|
|
|
|
// create process health (monitor) thread
|
|
pthread_t processHealthThread;
|
|
ret = pthread_create (&processHealthThread, NULL, (void*(*)(void*))&chldHandleThread, &config);
|
|
|
|
if ( ret != 0 )
|
|
log.writeLog(__LINE__, "pthread_create failed, return code = " + oam.itoa(ret), LOG_TYPE_ERROR);
|
|
|
|
//Mark this process Init Complete
|
|
while (true)
|
|
{
|
|
try
|
|
{
|
|
oam.processInitComplete("ProcessMonitor");
|
|
log.writeLog(__LINE__, "processInitComplete Successfully Called", LOG_TYPE_DEBUG);
|
|
}
|
|
catch (exception& ex)
|
|
{
|
|
string error = ex.what();
|
|
log.writeLog(__LINE__, "EXCEPTION ERROR on processInitComplete: " + error, LOG_TYPE_ERROR);
|
|
// this would fail if Parent OAM Node is down
|
|
sleep(1);
|
|
continue;
|
|
}
|
|
catch (...)
|
|
{
|
|
log.writeLog(__LINE__, "EXCEPTION ERROR on processInitComplete: Caught unknown exception!", LOG_TYPE_ERROR);
|
|
// this would fail if Parent OAM Node is down
|
|
sleep(1);
|
|
continue;
|
|
}
|
|
|
|
for ( unsigned int i = 0 ; i < systemprocessconfig.processconfig.size(); i++)
|
|
{
|
|
// If Process Monitor, update local state
|
|
if ( systemprocessconfig.processconfig[i].ProcessName == "ProcessMonitor")
|
|
{
|
|
config.buildList(systemprocessconfig.processconfig[i].ModuleType,
|
|
systemprocessconfig.processconfig[i].ProcessName,
|
|
systemprocessconfig.processconfig[i].ProcessLocation,
|
|
systemprocessconfig.processconfig[i].ProcessArgs,
|
|
systemprocessconfig.processconfig[i].LaunchID,
|
|
getpid(),
|
|
oam::ACTIVE,
|
|
systemprocessconfig.processconfig[i].BootLaunch,
|
|
systemprocessconfig.processconfig[i].RunType,
|
|
systemprocessconfig.processconfig[i].DepProcessName,
|
|
systemprocessconfig.processconfig[i].DepModuleName,
|
|
systemprocessconfig.processconfig[i].LogFile);
|
|
break;
|
|
}
|
|
}
|
|
|
|
break;
|
|
|
|
//Clear the alarms
|
|
aMonitor.sendAlarm("ProcessMonitor", PROCESS_DOWN_MANUAL, CLEAR);
|
|
aMonitor.sendAlarm("ProcessMonitor", PROCESS_DOWN_AUTO, CLEAR);
|
|
}
|
|
|
|
//set process init complete and ready to process message request
|
|
processInitComplete = true;
|
|
|
|
// suspend forever
|
|
while (true)
|
|
{
|
|
sleep(1000);
|
|
}
|
|
}
|
|
|
|
/******************************************************************************************
|
|
* @brief messageThread
|
|
*
|
|
* purpose: Read incoming messages
|
|
*
|
|
******************************************************************************************/
|
|
static void* messageThread(MonitorConfig* config)
|
|
{
|
|
//ProcMon log file
|
|
MonitorLog log;
|
|
assert(config);
|
|
ProcessMonitor aMonitor(*config, log);
|
|
log.writeLog(__LINE__, "Message Thread started ..", LOG_TYPE_DEBUG);
|
|
Oam oam;
|
|
|
|
string msgPort = config->moduleName() + "_ProcessMonitor";
|
|
string port = "";
|
|
|
|
//ProcMon will wait for request
|
|
IOSocket fIos;
|
|
Config* sysConfig = Config::makeConfig();
|
|
|
|
//read and cleanup port before trying to use
|
|
try
|
|
{
|
|
port = sysConfig->getConfig(msgPort, "Port");
|
|
}
|
|
catch (...)
|
|
{}
|
|
|
|
//check if enter doesnt exist, if not use pm1's
|
|
if (port.empty() or port == "" )
|
|
{
|
|
msgPort = "pm1_ProcessMonitor";
|
|
port = sysConfig->getConfig(msgPort, "Port");
|
|
}
|
|
|
|
log.writeLog(__LINE__, "PORTS: " + msgPort + "/" + port, LOG_TYPE_DEBUG);
|
|
|
|
string cmd = "fuser -k " + port + "/tcp >/dev/null 2>&1";
|
|
|
|
system(cmd.c_str());
|
|
|
|
for (;;)
|
|
{
|
|
try
|
|
{
|
|
ByteStream msg;
|
|
MessageQueueServer mqs(msgPort);
|
|
|
|
for (;;)
|
|
{
|
|
try
|
|
{
|
|
fIos = mqs.accept();
|
|
|
|
try
|
|
{
|
|
msg = fIos.read();
|
|
|
|
if (msg.length() > 0)
|
|
{
|
|
aMonitor.processMessage(msg, fIos);
|
|
}
|
|
}
|
|
catch (exception& ex)
|
|
{
|
|
string error = ex.what();
|
|
// log.writeLog(__LINE__, "EXCEPTION ERROR on fIos.read() for " + msgPort + ", error: " + error, LOG_TYPE_ERROR);
|
|
}
|
|
catch (...)
|
|
{
|
|
// log.writeLog(__LINE__, "EXCEPTION ERROR on fIos.read() for " + msgPort + ", Caught unknown exception!", LOG_TYPE_ERROR);
|
|
}
|
|
}
|
|
catch (exception& ex)
|
|
{
|
|
string error = ex.what();
|
|
// log.writeLog(__LINE__, "EXCEPTION ERROR on mqs.accept() for " + msgPort + ", error: " + error, LOG_TYPE_ERROR);
|
|
}
|
|
catch (...)
|
|
{
|
|
// log.writeLog(__LINE__, "EXCEPTION ERROR on mqs.accept() for " + msgPort + ", Caught unknown exception!", LOG_TYPE_ERROR);
|
|
}
|
|
|
|
// give time to allow Mgr to read any acks before closing
|
|
sleep(1);
|
|
fIos.close();
|
|
}
|
|
}
|
|
catch (exception& ex)
|
|
{
|
|
string error = ex.what();
|
|
log.writeLog(__LINE__, "EXCEPTION ERROR on MessageQueueServer for " + msgPort + ": " + error, LOG_TYPE_ERROR);
|
|
|
|
// takes 2 - 4 minites to free sockets, sleep and retry
|
|
sleep(1);
|
|
}
|
|
catch (...)
|
|
{
|
|
log.writeLog(__LINE__, "EXCEPTION ERROR on MessageQueueServer for " + msgPort + ": Caught unknown exception!", LOG_TYPE_ERROR);
|
|
|
|
// takes 2 - 4 minites to free sockets, sleep and retry
|
|
sleep(1);
|
|
}
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
/******************************************************************************************
|
|
* @brief mysqlMonitorThread
|
|
*
|
|
* purpose: monitor mysqld by getting status
|
|
*
|
|
******************************************************************************************/
|
|
static void* mysqlMonitorThread(MonitorConfig* config)
|
|
{
|
|
MonitorLog log;
|
|
assert(config);
|
|
ProcessMonitor aMonitor(*config, log);
|
|
log.writeLog(__LINE__, "mysqld Monitoring Thread started ..", LOG_TYPE_DEBUG);
|
|
Oam oam;
|
|
|
|
while (true)
|
|
{
|
|
//read status, whichs set process status
|
|
try
|
|
{
|
|
oam.actionMysqlCalpont(MYSQL_STATUS);
|
|
}
|
|
catch (...)
|
|
{}
|
|
|
|
sleep(5);
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
/******************************************************************************************
|
|
* @brief sigchldHandleThread / SIGCHLDHandler
|
|
*
|
|
* purpose: Catch and process dieing child processes
|
|
*
|
|
******************************************************************************************/
|
|
static void* sigchldHandleThread(void*)
|
|
{
|
|
struct sigaction sigchld_action;
|
|
memset (&sigchld_action, 0, sizeof (sigchld_action));
|
|
sigchld_action.sa_handler = &SIGCHLDHandler;
|
|
sigaction(SIGCHLD, &sigchld_action, NULL);
|
|
return NULL;
|
|
}
|
|
|
|
static void SIGCHLDHandler(int signal_number)
|
|
{
|
|
int status;
|
|
|
|
waitpid(-1, &status, WNOHANG);
|
|
|
|
return;
|
|
}
|
|
|
|
/******************************************************************************************
|
|
* @brief chldHandleThread
|
|
*
|
|
* purpose: Monitor and process dieing Non SIGCHILD SNMP child processes
|
|
* Also validate the internal Process status with the Process-Status disk file
|
|
*
|
|
******************************************************************************************/
|
|
static void* chldHandleThread(MonitorConfig* config)
|
|
{
|
|
//ProcMon log file
|
|
MonitorLog log;
|
|
assert(config);
|
|
ProcessMonitor aMonitor(*config, log);
|
|
log.writeLog(__LINE__, "Child Process Monitoring Thread started ..", LOG_TYPE_DEBUG);
|
|
Oam oam;
|
|
SystemProcessStatus systemprocessstatus;
|
|
|
|
//Loop through the process list to check the process current state
|
|
processList::iterator listPtr;
|
|
processList* aPtr = config->monitoredListPtr();
|
|
|
|
//get dbhealth flag
|
|
string DBFunctionalMonitorFlag;
|
|
|
|
try
|
|
{
|
|
oam.getSystemConfig( "DBFunctionalMonitorFlag", DBFunctionalMonitorFlag);
|
|
}
|
|
catch (...) {}
|
|
|
|
int delayCount = 0;
|
|
|
|
while (true)
|
|
{
|
|
//get process restart configured settings
|
|
int processRestartCount = 10;
|
|
int processRestartPeriod = 120;
|
|
|
|
try
|
|
{
|
|
oam.getSystemConfig("ProcessRestartCount", processRestartCount);
|
|
oam.getSystemConfig("ProcessRestartPeriod", processRestartPeriod);
|
|
}
|
|
catch (...)
|
|
{
|
|
processRestartCount = 10;
|
|
processRestartPeriod = 120;
|
|
}
|
|
|
|
listPtr = aPtr->begin();
|
|
|
|
for (; listPtr != aPtr->end(); ++listPtr)
|
|
{
|
|
// compare internal process state and PID with system process status
|
|
// Issue alarm if system state is INIT for longer than 1 minute
|
|
// Update internal process state when in INIT and System is ACTIVE/FAILED
|
|
// Updated System process state when AOS and different from internal
|
|
int outOfSyncCount = 0;
|
|
|
|
if ( delayCount == 2 )
|
|
{
|
|
while (true)
|
|
{
|
|
int state = (*listPtr).state; //set as default
|
|
int PID = (*listPtr).processID; //set as default
|
|
|
|
try
|
|
{
|
|
ProcessStatus procstat;
|
|
oam.getProcessStatus((*listPtr).ProcessName, config->moduleName(), procstat);
|
|
state = procstat.ProcessOpState;
|
|
PID = procstat.ProcessID;
|
|
|
|
if (state == oam::BUSY_INIT )
|
|
{
|
|
// updated local state ot BUSY_INIT
|
|
(*listPtr).state = state;
|
|
break;
|
|
}
|
|
|
|
if ( (state == oam::AUTO_INIT && (*listPtr).state == oam::AUTO_INIT) ||
|
|
(state == oam::MAN_INIT && (*listPtr).state == oam::MAN_INIT) )
|
|
{
|
|
// get current time in seconds
|
|
time_t cal;
|
|
time (&cal);
|
|
|
|
if ( (cal - (*listPtr).currentTime) > 20 )
|
|
{
|
|
// issue ALARM and update status to FAILED
|
|
aMonitor.sendAlarm((*listPtr).ProcessName, PROCESS_INIT_FAILURE, SET);
|
|
// (*listPtr).state = oam::FAILED;
|
|
// aMonitor.updateProcessInfo((*listPtr).ProcessName, oam::FAILED, (*listPtr).processID);
|
|
|
|
//force restart the un-initted process
|
|
log.writeLog(__LINE__, (*listPtr).ProcessName + "/" + oam.itoa((*listPtr).processID) + " failed to init in 20 seconds, force killing it so it can restart", LOG_TYPE_CRITICAL);
|
|
|
|
//skip killing 0 or 1
|
|
if ( (*listPtr).processID > 1 )
|
|
kill((*listPtr).processID, SIGKILL);
|
|
|
|
break;
|
|
}
|
|
|
|
break;
|
|
}
|
|
}
|
|
catch (exception& ex)
|
|
{
|
|
string error = ex.what();
|
|
// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: " + error, LOG_TYPE_ERROR);
|
|
break;
|
|
}
|
|
catch (...)
|
|
{
|
|
// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessStatus: Caught unknown exception!", LOG_TYPE_ERROR);
|
|
break;
|
|
}
|
|
|
|
if (state != (*listPtr).state || PID != (*listPtr).processID)
|
|
{
|
|
if ( state == oam::STANDBY && (*listPtr).state == oam::ACTIVE )
|
|
break;
|
|
else
|
|
{
|
|
if ( (state == oam::ACTIVE && (*listPtr).state == oam::AUTO_INIT) ||
|
|
(state == oam::ACTIVE && (*listPtr).state == oam::MAN_INIT) ||
|
|
(state == oam::ACTIVE && (*listPtr).state == oam::STANDBY) ||
|
|
(state == oam::ACTIVE && (*listPtr).state == oam::INITIAL) ||
|
|
(state == oam::ACTIVE && (*listPtr).state == oam::STANDBY_INIT) ||
|
|
(state == oam::ACTIVE && (*listPtr).state == oam::BUSY_INIT) ||
|
|
(state == oam::STANDBY && (*listPtr).state == oam::AUTO_INIT) ||
|
|
(state == oam::STANDBY && (*listPtr).state == oam::MAN_INIT) ||
|
|
(state == oam::STANDBY && (*listPtr).state == oam::INITIAL) ||
|
|
(state == oam::STANDBY && (*listPtr).state == oam::BUSY_INIT) ||
|
|
(state == oam::STANDBY && (*listPtr).state == oam::STANDBY_INIT) )
|
|
{
|
|
// updated local state to ACTIVE
|
|
(*listPtr).state = state;
|
|
break;
|
|
}
|
|
|
|
if ( (state == oam::FAILED && (*listPtr).state == oam::AUTO_INIT) ||
|
|
(state == oam::FAILED && (*listPtr).state == oam::BUSY_INIT) ||
|
|
(state == oam::FAILED && (*listPtr).state == oam::MAN_INIT) )
|
|
{
|
|
// issue ALARM and update local status to FAILED
|
|
log.writeLog(__LINE__, (*listPtr).ProcessName + " failed initialization", LOG_TYPE_WARNING);
|
|
aMonitor.sendAlarm((*listPtr).ProcessName, PROCESS_INIT_FAILURE, SET);
|
|
(*listPtr).state = state;
|
|
|
|
//setModule status to failed
|
|
try
|
|
{
|
|
oam.setModuleStatus(config->moduleName(), oam::FAILED);
|
|
}
|
|
catch (exception& ex)
|
|
{
|
|
string error = ex.what();
|
|
// log.writeLog(__LINE__, "EXCEPTION ERROR on setModuleStatus: " + error, LOG_TYPE_ERROR);
|
|
}
|
|
catch (...)
|
|
{
|
|
// log.writeLog(__LINE__, "EXCEPTION ERROR on setModuleStatus: Caught unknown exception!", LOG_TYPE_ERROR);
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
if (state == oam::AUTO_OFFLINE || state == oam::INITIAL ||
|
|
PID != (*listPtr).processID)
|
|
{
|
|
//due to a small window, only process if out-of-sync for more than 1 second
|
|
outOfSyncCount++;
|
|
|
|
if ( outOfSyncCount == 2 )
|
|
{
|
|
// out of sync, update with internal state/PID
|
|
log.writeLog(__LINE__, "State out-of-sync, update on " + (*listPtr).ProcessName + "/" + oam.itoa((*listPtr).state) + "/" + oam.itoa((*listPtr).processID), LOG_TYPE_DEBUG);
|
|
|
|
aMonitor.updateProcessInfo((*listPtr).ProcessName, (*listPtr).state, (*listPtr).processID);
|
|
break;
|
|
}
|
|
|
|
sleep(1);
|
|
}
|
|
else
|
|
break;
|
|
}
|
|
}
|
|
else
|
|
break;
|
|
}
|
|
}
|
|
|
|
//Handle died or out of sync process if in the right state
|
|
if ( (*listPtr).state == oam::MAN_OFFLINE )
|
|
//skip
|
|
continue;
|
|
|
|
//log.writeLog(__LINE__, "check status " + (*listPtr).ProcessName + "/" + oam.itoa((*listPtr).processID) + " " + oam.itoa(kill((*listPtr).processID, 0)) + " " + oam.itoa((*listPtr).state) , LOG_TYPE_CRITICAL);
|
|
if ( ( (kill((*listPtr).processID, 0)) != 0 && (*listPtr).state == oam::ACTIVE ) ||
|
|
( (kill((*listPtr).processID, 0)) != 0 && (*listPtr).state == oam::STANDBY ) ||
|
|
( (kill((*listPtr).processID, 0)) != 0 && (*listPtr).state == oam::MAN_INIT ) ||
|
|
( (kill((*listPtr).processID, 0)) != 0 && (*listPtr).state == oam::BUSY_INIT ) ||
|
|
( (kill((*listPtr).processID, 0)) != 0 && (*listPtr).state == oam::AUTO_INIT &&
|
|
(*listPtr).processID != 0 ) ||
|
|
( (*listPtr).state == oam::ACTIVE && (*listPtr).processID == 0 ) )
|
|
{
|
|
log.writeLog(__LINE__, "*****MariaDB ColumnStore Process Restarting: " + (*listPtr).ProcessName + ", old PID = " + oam.itoa((*listPtr).processID), LOG_TYPE_CRITICAL);
|
|
|
|
if ( (*listPtr).dieCounter >= processRestartCount ||
|
|
processRestartCount == 0)
|
|
{
|
|
// don't restart it
|
|
config->buildList((*listPtr).ProcessModuleType,
|
|
(*listPtr).ProcessName,
|
|
(*listPtr).ProcessLocation,
|
|
(*listPtr).ProcessArgs,
|
|
(*listPtr).launchID,
|
|
0,
|
|
oam::AUTO_OFFLINE,
|
|
(*listPtr).BootLaunch,
|
|
(*listPtr).RunType,
|
|
(*listPtr).DepProcessName,
|
|
(*listPtr).DepModuleName,
|
|
(*listPtr).LogFile);
|
|
|
|
//Set the alarm
|
|
aMonitor.sendAlarm((*listPtr).ProcessName, PROCESS_DOWN_AUTO, SET);
|
|
|
|
//Update ProcessConfig file
|
|
aMonitor.updateProcessInfo((*listPtr).ProcessName, oam::AUTO_OFFLINE, 0);
|
|
|
|
//Log this event
|
|
if ( processRestartCount == 0)
|
|
log.writeLog(__LINE__, "*****Process not restarted, restart count set to 0: " + (*listPtr).ProcessName, LOG_TYPE_CRITICAL);
|
|
else
|
|
log.writeLog(__LINE__, "*****Process continually dying, stopped trying to restore it: " + (*listPtr).ProcessName, LOG_TYPE_CRITICAL);
|
|
|
|
//setModule status to degraded
|
|
try
|
|
{
|
|
bool degraded;
|
|
int moduleStatus;
|
|
oam.getModuleStatus(config->moduleName(), moduleStatus, degraded);
|
|
|
|
if ( moduleStatus == oam::ACTIVE)
|
|
{
|
|
try
|
|
{
|
|
oam.setModuleStatus(config->moduleName(), oam::DEGRADED);
|
|
}
|
|
catch (exception& ex)
|
|
{
|
|
string error = ex.what();
|
|
log.writeLog(__LINE__, "EXCEPTION ERROR on setModuleStatus: " + error, LOG_TYPE_ERROR);
|
|
}
|
|
catch (...)
|
|
{
|
|
log.writeLog(__LINE__, "EXCEPTION ERROR on setModuleStatus: Caught unknown exception!", LOG_TYPE_ERROR);
|
|
}
|
|
|
|
}
|
|
}
|
|
catch (...)
|
|
{}
|
|
|
|
// check if Mdoule failover is needed due to process outage
|
|
aMonitor.checkModuleFailover((*listPtr).ProcessName);
|
|
|
|
//check the db health
|
|
if (DBFunctionalMonitorFlag == "y" )
|
|
{
|
|
log.writeLog(__LINE__, "Call the check DB Functional API", LOG_TYPE_DEBUG);
|
|
|
|
try
|
|
{
|
|
oam.checkDBFunctional();
|
|
log.writeLog(__LINE__, "check DB Functional passed", LOG_TYPE_DEBUG);
|
|
}
|
|
catch (...)
|
|
{
|
|
log.writeLog(__LINE__, "check DB Functional FAILED", LOG_TYPE_ERROR);
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
time_t cal;
|
|
time (&cal);
|
|
|
|
if ( (cal - (*listPtr).currentTime) > (int) processRestartPeriod )
|
|
(*listPtr).dieCounter = 0;
|
|
else
|
|
++(*listPtr).dieCounter;
|
|
|
|
int initStatus = oam::AUTO_INIT;
|
|
|
|
if ( (*listPtr).RunType == oam::ACTIVE_STANDBY && runStandby)
|
|
initStatus = oam::STANDBY;
|
|
|
|
//record the process information into processList
|
|
config->buildList((*listPtr).ProcessModuleType,
|
|
(*listPtr).ProcessName,
|
|
(*listPtr).ProcessLocation,
|
|
(*listPtr).ProcessArgs,
|
|
(*listPtr).launchID,
|
|
0,
|
|
oam::AUTO_OFFLINE,
|
|
(*listPtr).BootLaunch,
|
|
(*listPtr).RunType,
|
|
(*listPtr).DepProcessName,
|
|
(*listPtr).DepModuleName,
|
|
(*listPtr).LogFile);
|
|
|
|
//Set the alarm
|
|
aMonitor.sendAlarm((*listPtr).ProcessName, PROCESS_DOWN_AUTO, SET);
|
|
|
|
int i = 0;
|
|
string restartStatus;
|
|
|
|
for ( ; i < 10 ; i++ )
|
|
{
|
|
//stop the process first to make sure it's gone
|
|
aMonitor.stopProcess((*listPtr).processID,
|
|
(*listPtr).ProcessName,
|
|
(*listPtr).ProcessLocation,
|
|
oam::FORCEFUL,
|
|
false);
|
|
|
|
//Start the process
|
|
(*listPtr).processID = aMonitor.startProcess( (*listPtr).ProcessModuleType,
|
|
(*listPtr).ProcessName,
|
|
(*listPtr).ProcessLocation,
|
|
(*listPtr).ProcessArgs,
|
|
(*listPtr).launchID,
|
|
(*listPtr).BootLaunch,
|
|
(*listPtr).RunType,
|
|
(*listPtr).DepProcessName,
|
|
(*listPtr).DepModuleName,
|
|
(*listPtr).LogFile,
|
|
initStatus);
|
|
|
|
// StorageManager doesn't send the "I'm online" msg to Proc*.
|
|
// Just mark it active for now. TODO: make it use the ping fcn in IDB* instead.
|
|
if (listPtr->ProcessName == "StorageManager")
|
|
oam.setProcessStatus("StorageManager", boost::get<0>(oam.getModuleInfo()),
|
|
oam::ACTIVE, listPtr->processID);
|
|
|
|
if ( (*listPtr).processID == oam::API_FAILURE )
|
|
{
|
|
// restart hard failure
|
|
restartStatus = " restart failed with hard failure, don't retry!!";
|
|
(*listPtr).processID = 0;
|
|
|
|
// check if Module failover is needed due to process outage
|
|
aMonitor.checkModuleFailover((*listPtr).ProcessName);
|
|
break;
|
|
}
|
|
else
|
|
{
|
|
if ( (*listPtr).processID != oam::API_MINOR_FAILURE )
|
|
{
|
|
//restarted successful
|
|
//Inform Process Manager that Process restart
|
|
aMonitor.processRestarted( (*listPtr).ProcessName, false);
|
|
break;
|
|
}
|
|
}
|
|
// restart failed with minor error, sleep and try
|
|
sleep(5);
|
|
}
|
|
|
|
if ( i == 10 || (*listPtr).processID == oam::API_FAILURE)
|
|
{
|
|
//setModule status to degraded
|
|
try
|
|
{
|
|
bool degraded;
|
|
int moduleStatus;
|
|
oam.getModuleStatus(config->moduleName(), moduleStatus, degraded);
|
|
|
|
if ( moduleStatus == oam::ACTIVE)
|
|
{
|
|
try
|
|
{
|
|
oam.setModuleStatus(config->moduleName(), oam::DEGRADED);
|
|
}
|
|
catch (exception& ex)
|
|
{
|
|
string error = ex.what();
|
|
log.writeLog(__LINE__, "EXCEPTION ERROR on setModuleStatus: " + error, LOG_TYPE_ERROR);
|
|
}
|
|
catch (...)
|
|
{
|
|
log.writeLog(__LINE__, "EXCEPTION ERROR on setModuleStatus: Caught unknown exception!", LOG_TYPE_ERROR);
|
|
}
|
|
|
|
}
|
|
}
|
|
catch (...)
|
|
{}
|
|
|
|
//check the db health
|
|
if (DBFunctionalMonitorFlag == "y" )
|
|
{
|
|
log.writeLog(__LINE__, "Call the check DB Functional API", LOG_TYPE_DEBUG);
|
|
|
|
try
|
|
{
|
|
oam.checkDBFunctional();
|
|
log.writeLog(__LINE__, "check DB Functional passed", LOG_TYPE_DEBUG);
|
|
}
|
|
catch (...)
|
|
{
|
|
log.writeLog(__LINE__, "check DB Functional FAILED", LOG_TYPE_ERROR);
|
|
}
|
|
}
|
|
}
|
|
|
|
if ( i == 10 )
|
|
{
|
|
// restart timeout
|
|
restartStatus = " restart failed after 10 retries";
|
|
(*listPtr).processID = 0;
|
|
}
|
|
else
|
|
{
|
|
restartStatus = " restarted successfully!!";
|
|
|
|
//Inform Process Manager that Process restart
|
|
aMonitor.processRestarted( (*listPtr).ProcessName, false);
|
|
}
|
|
|
|
//Log this event
|
|
log.writeLog(__LINE__, "MariaDB ColumnStore Process " + (*listPtr).ProcessName + restartStatus, LOG_TYPE_INFO);
|
|
}
|
|
}
|
|
}
|
|
|
|
delayCount++;
|
|
|
|
if ( delayCount > 2 )
|
|
delayCount = 0;
|
|
|
|
sleep(5);
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
/******************************************************************************************
|
|
* @brief sigHupHandler
|
|
*
|
|
* purpose: Hanlder SIGHUP signal and update internal DB
|
|
*
|
|
******************************************************************************************/
|
|
static void sigHupHandler(int sig)
|
|
{
|
|
MonitorLog log;
|
|
MonitorConfig config;
|
|
ProcessMonitor aMonitor(config, log);
|
|
log.writeLog(__LINE__, "SIGHUP Thread started ..", LOG_TYPE_DEBUG);
|
|
|
|
aMonitor.updateConfig();
|
|
|
|
}
|
|
|
|
static int PROCSTATshmsize = 0;
|
|
shmProcessStatus* fShmProcessStatus = 0;
|
|
boost::interprocess::shared_memory_object fProcStatShmobj;
|
|
boost::interprocess::mapped_region fProcStatMapreg;
|
|
|
|
int fmoduleNumber = 0;
|
|
int extDeviceNumber = 0;
|
|
int NICNumber = 0;
|
|
int dbrootNumber = 0;
|
|
int processNumber = 0;
|
|
|
|
boost::interprocess::shared_memory_object fSysStatShmobj;
|
|
boost::interprocess::mapped_region fSysStatMapreg;
|
|
|
|
void* processStatusMSG(messageqcpp::IOSocket* fIos);
|
|
|
|
processStatusList* aPtr;
|
|
SystemProcessConfig systemprocessconfig;
|
|
ModuleTypeConfig moduletypeconfig;
|
|
SystemModuleTypeConfig systemModuleTypeConfig;
|
|
SystemExtDeviceConfig systemextdeviceconfig;
|
|
|
|
std::vector<string> moduleDisableStateList;
|
|
std::vector<string> hostNameList;
|
|
std::vector<string> ipaddrNameList;
|
|
std::vector<string> moduleNameList;
|
|
std::vector<string> extDeviceNameList;
|
|
|
|
shmDeviceStatus* fShmNICStatus = 0;
|
|
shmDeviceStatus* fShmDbrootStatus = 0;
|
|
shmDeviceStatus* fShmExtDeviceStatus = 0;
|
|
shmDeviceStatus* fShmSystemStatus = 0;
|
|
|
|
processStatusList fstatusListPtr;
|
|
|
|
processStatusList* statusListPtr()
|
|
{
|
|
return &fstatusListPtr;
|
|
}
|
|
|
|
|
|
/******************************************************************************************
|
|
* @brief statusControlThread
|
|
*
|
|
* purpose: Setup Status Shared-Memory table and process request to get and set
|
|
* into the Status Shared-Memory table
|
|
*
|
|
******************************************************************************************/
|
|
static void* statusControlThread(void*)
|
|
{
|
|
MonitorLog log;
|
|
MonitorConfig config;
|
|
ProcessMonitor aMonitor(config, log);
|
|
Oam oam;
|
|
BRM::ShmKeys fShmKeys;
|
|
|
|
log.writeLog(__LINE__, "statusControlThread Thread started ..", LOG_TYPE_DEBUG);
|
|
|
|
//
|
|
//Read ProcessConfig file to get process list and build Status List
|
|
//
|
|
try
|
|
{
|
|
oam.getProcessConfig(systemprocessconfig);
|
|
}
|
|
catch (exception& ex)
|
|
{
|
|
string error = ex.what();
|
|
// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessConfig: " + error, LOG_TYPE_ERROR);
|
|
}
|
|
catch (...)
|
|
{
|
|
// log.writeLog(__LINE__, "EXCEPTION ERROR on getProcessConfig: Caught unknown exception!", LOG_TYPE_ERROR);
|
|
}
|
|
|
|
try
|
|
{
|
|
oam.getSystemConfig(systemModuleTypeConfig);
|
|
}
|
|
catch (exception& ex)
|
|
{
|
|
string error = ex.what();
|
|
// log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR);
|
|
}
|
|
catch (...)
|
|
{
|
|
// log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR);
|
|
}
|
|
|
|
// build status list
|
|
|
|
for ( unsigned int i = 0 ; i < systemModuleTypeConfig.moduletypeconfig.size(); i++)
|
|
{
|
|
int moduleCount = systemModuleTypeConfig.moduletypeconfig[i].ModuleCount;
|
|
|
|
if ( moduleCount == 0 )
|
|
// skip of no modules configured
|
|
continue;
|
|
|
|
// dm/um/pm
|
|
string systemModuleType = systemModuleTypeConfig.moduletypeconfig[i].ModuleType;
|
|
|
|
fmoduleNumber = fmoduleNumber + moduleCount;
|
|
|
|
// store ModuleNames / HostNames and IP Addresses (NIC)
|
|
DeviceNetworkList::iterator pt = systemModuleTypeConfig.moduletypeconfig[i].ModuleNetworkList.begin();
|
|
|
|
for ( ; pt != systemModuleTypeConfig.moduletypeconfig[i].ModuleNetworkList.end() ; pt++)
|
|
{
|
|
moduleNameList.push_back((*pt).DeviceName);
|
|
moduleDisableStateList.push_back((*pt).DisableState);
|
|
|
|
HostConfigList::iterator pt1 = (*pt).hostConfigList.begin();
|
|
|
|
for ( ; pt1 != (*pt).hostConfigList.end() ; pt1++ )
|
|
{
|
|
ipaddrNameList.push_back((*pt1).IPAddr);
|
|
hostNameList.push_back((*pt1).HostName);
|
|
}
|
|
}
|
|
|
|
NICNumber = hostNameList.size();
|
|
string OAMParentModuleType = config.OAMParentName().substr(0, 2);
|
|
|
|
pt = systemModuleTypeConfig.moduletypeconfig[i].ModuleNetworkList.begin();
|
|
|
|
for ( ; pt != systemModuleTypeConfig.moduletypeconfig[i].ModuleNetworkList.end() ; pt++)
|
|
{
|
|
for ( unsigned int j = 0; j < systemprocessconfig.processconfig.size(); j++)
|
|
{
|
|
//skip if both BootLaunch and LaunchID are 0
|
|
if ( systemprocessconfig.processconfig[j].BootLaunch == 0 &&
|
|
systemprocessconfig.processconfig[j].LaunchID == 0 )
|
|
continue;
|
|
|
|
// "ChildOAMModule" "ParentOAMModule" dm/um/pm
|
|
string processModuleType = systemprocessconfig.processconfig[j].ModuleType;
|
|
|
|
if (processModuleType == systemModuleType
|
|
|| ( processModuleType == "um" &&
|
|
systemModuleType == "pm" && PMwithUM == "y")
|
|
|| processModuleType == "ChildExtOAMModule"
|
|
|| (processModuleType == "ChildOAMModule" )
|
|
|| (processModuleType == "ParentOAMModule" && systemModuleType == OAMParentModuleType) )
|
|
{
|
|
if ( processModuleType == "um" &&
|
|
systemModuleType == "pm" && PMwithUM == "y" &&
|
|
systemprocessconfig.processconfig[j].ProcessName == "DMLProc" )
|
|
continue;
|
|
|
|
|
|
if ( processModuleType == "um" &&
|
|
systemModuleType == "pm" && PMwithUM == "y" &&
|
|
systemprocessconfig.processconfig[j].ProcessName == "DDLProc" )
|
|
continue;
|
|
|
|
processstatus procstat;
|
|
procstat.ProcessName = systemprocessconfig.processconfig[j].ProcessName;
|
|
procstat.ModuleName = (*pt).DeviceName;
|
|
procstat.tableIndex = processNumber;
|
|
fstatusListPtr.push_back(procstat);
|
|
processNumber++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
aPtr = statusListPtr();
|
|
|
|
//
|
|
//Allocate Shared Memory for storing Process Status Data
|
|
//
|
|
|
|
string shmLocation = "/dev/shm/";
|
|
|
|
PROCSTATshmsize = MAX_PROCESS * sizeof(shmProcessStatus);
|
|
bool memInit = true;
|
|
#if 0
|
|
int shmid = shmget(fShmKeys.PROCESSSTATUS_SYSVKEY, PROCSTATshmsize, IPC_EXCL | IPC_CREAT | 0666);
|
|
|
|
if (shmid == -1)
|
|
{
|
|
// table already exist
|
|
memInit = false;
|
|
shmid = shmget(fShmKeys.PROCESSSTATUS_SYSVKEY, PROCSTATshmsize, 0666);
|
|
|
|
if (shmid == -1)
|
|
{
|
|
log.writeLog(__LINE__, "*****ProcessStatusTable shmget failed.", LOG_TYPE_ERROR);
|
|
exit(1);
|
|
}
|
|
}
|
|
|
|
fShmProcessStatus = static_cast<struct shmProcessStatus*>(shmat(shmid, NULL, 0));
|
|
#endif
|
|
string keyName = BRM::ShmKeys::keyToName(fShmKeys.PROCESSSTATUS_SYSVKEY);
|
|
memInit = getshm(keyName, PROCSTATshmsize, fProcStatShmobj);
|
|
|
|
bi::mapped_region region(fProcStatShmobj, bi::read_write);
|
|
fProcStatMapreg.swap(region);
|
|
fShmProcessStatus = static_cast<shmProcessStatus*>(fProcStatMapreg.get_address());
|
|
|
|
if (fShmProcessStatus == 0)
|
|
{
|
|
log.writeLog(__LINE__, "*****ProcessStatusTable shmat failed.", LOG_TYPE_CRITICAL);
|
|
exit(1);
|
|
}
|
|
|
|
//Initialize Shared memory
|
|
if (memInit)
|
|
{
|
|
memset(fShmProcessStatus, 0, PROCSTATshmsize);
|
|
|
|
for ( int i = 0; i < processNumber ; ++i)
|
|
{
|
|
fShmProcessStatus[i].ProcessOpState = oam::INITIAL;
|
|
}
|
|
|
|
log.writeLog(__LINE__, "Process Status shared Memory allocated and Initialized", LOG_TYPE_DEBUG);
|
|
}
|
|
|
|
//
|
|
//Allocate Shared Memory for storing System/Module Status Data
|
|
//
|
|
fmoduleNumber++; //add 1 to cover system status entry
|
|
|
|
static const int SYSTEMSTATshmsize = MAX_MODULE * sizeof(shmDeviceStatus);
|
|
memInit = true;
|
|
#if 0
|
|
shmid = shmget(fShmKeys.SYSTEMSTATUS_SYSVKEY, SYSTEMSTATshmsize, IPC_EXCL | IPC_CREAT | 0666);
|
|
|
|
if (shmid == -1)
|
|
{
|
|
// table already exist
|
|
memInit = false;
|
|
shmid = shmget(fShmKeys.SYSTEMSTATUS_SYSVKEY, SYSTEMSTATshmsize, 0666);
|
|
|
|
if (shmid == -1)
|
|
{
|
|
log.writeLog(__LINE__, "*****SystemStatusTable shmget failed.", LOG_TYPE_ERROR);
|
|
exit(1);
|
|
}
|
|
}
|
|
|
|
fShmSystemStatus = static_cast<struct shmDeviceStatus*>(shmat(shmid, NULL, 0));
|
|
#endif
|
|
keyName = BRM::ShmKeys::keyToName(fShmKeys.SYSTEMSTATUS_SYSVKEY);
|
|
memInit = getshm(keyName, SYSTEMSTATshmsize, fSysStatShmobj);
|
|
|
|
bi::mapped_region region2(fSysStatShmobj, bi::read_write);
|
|
fSysStatMapreg.swap(region2);
|
|
fShmSystemStatus = static_cast<shmDeviceStatus*>(fSysStatMapreg.get_address());
|
|
|
|
if (fShmSystemStatus == 0)
|
|
{
|
|
log.writeLog(__LINE__, "*****SystemStatusTable shmat failed.", LOG_TYPE_CRITICAL);
|
|
exit(1);
|
|
}
|
|
|
|
//Initialize Shared memory
|
|
if (memInit)
|
|
{
|
|
// Init System/Module Status Memory
|
|
memset(fShmSystemStatus, 0, SYSTEMSTATshmsize);
|
|
|
|
//set system status
|
|
memcpy(fShmSystemStatus[0].Name, "system", sizeof("system"));
|
|
|
|
if (runStandby)
|
|
{
|
|
try
|
|
{
|
|
SystemStatus systemstatus;
|
|
oam.getSystemStatus(systemstatus);
|
|
fShmSystemStatus[0].OpState = systemstatus.SystemOpState;
|
|
memcpy(fShmSystemStatus[0].StateChangeDate, oam.getCurrentTime().c_str(), DATESIZE);
|
|
}
|
|
catch (...)
|
|
{
|
|
fShmSystemStatus[0].OpState = oam::DOWN;
|
|
}
|
|
}
|
|
else
|
|
fShmSystemStatus[0].OpState = oam::DOWN;
|
|
|
|
//set module status
|
|
for ( int i = 1; i < fmoduleNumber ; ++i)
|
|
{
|
|
memcpy(fShmSystemStatus[i].Name, moduleNameList[i - 1].c_str(), NAMESIZE);
|
|
|
|
if (runStandby)
|
|
{
|
|
try
|
|
{
|
|
int opState;
|
|
bool degraded;
|
|
oam.getModuleStatus(moduleNameList[i - 1], opState, degraded);
|
|
fShmSystemStatus[i].OpState = opState;
|
|
memcpy(fShmSystemStatus[i].StateChangeDate, oam.getCurrentTime().c_str(), DATESIZE);
|
|
}
|
|
catch (...)
|
|
{
|
|
fShmSystemStatus[i].OpState = oam::INITIAL;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if ( moduleDisableStateList[i - 1] == oam::MANDISABLEDSTATE )
|
|
fShmSystemStatus[i].OpState = oam::MAN_DISABLED;
|
|
else if ( moduleDisableStateList[i - 1] == oam::AUTODISABLEDSTATE )
|
|
fShmSystemStatus[i].OpState = oam::AUTO_DISABLED;
|
|
else
|
|
fShmSystemStatus[i].OpState = oam::INITIAL;
|
|
}
|
|
}
|
|
|
|
log.writeLog(__LINE__, "System/Module Status shared Memory allocated and Initialized", LOG_TYPE_DEBUG);
|
|
}
|
|
|
|
//
|
|
//Allocate Shared Memory for storing NIC Status Data
|
|
//
|
|
boost::interprocess::shared_memory_object fNICStatShmobj;
|
|
static const int NICSTATshmsize = (MAX_MODULE * MAX_NIC) * sizeof(shmDeviceStatus);
|
|
|
|
keyName = BRM::ShmKeys::keyToName(fShmKeys.NICSTATUS_SYSVKEY);
|
|
memInit = getshm(keyName, NICSTATshmsize, fNICStatShmobj);
|
|
|
|
bi::mapped_region fNICStatMapreg(fNICStatShmobj, bi::read_write);
|
|
fShmNICStatus = static_cast<shmDeviceStatus*>(fNICStatMapreg.get_address());
|
|
|
|
if (fShmNICStatus == 0)
|
|
{
|
|
log.writeLog(__LINE__, "*****NICStatusTable shmat failed.", LOG_TYPE_CRITICAL);
|
|
exit(1);
|
|
}
|
|
|
|
//Initialize Shared memory
|
|
if (memInit)
|
|
{
|
|
// Init NIC Status Memory
|
|
memset(fShmNICStatus, 0, NICSTATshmsize);
|
|
|
|
for ( int i = 0; i < NICNumber ; ++i)
|
|
{
|
|
fShmNICStatus[i].OpState = oam::INITIAL;
|
|
memcpy(fShmNICStatus[i].Name, hostNameList[i].c_str(), NAMESIZE);
|
|
}
|
|
|
|
log.writeLog(__LINE__, "NIC Status shared Memory allocated and Initialized", LOG_TYPE_DEBUG);
|
|
}
|
|
|
|
//
|
|
//Allocate Shared Memory for storing External Device Status Data
|
|
//
|
|
|
|
try
|
|
{
|
|
oam.getSystemConfig(systemextdeviceconfig);
|
|
}
|
|
catch (exception& ex)
|
|
{
|
|
string error = ex.what();
|
|
// log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR);
|
|
}
|
|
catch (...)
|
|
{
|
|
// log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR);
|
|
}
|
|
|
|
for ( unsigned int i = 0 ; i < systemextdeviceconfig.Count; i++)
|
|
{
|
|
if ( systemextdeviceconfig.extdeviceconfig[i].Name == oam::UnassignedName ||
|
|
systemextdeviceconfig.extdeviceconfig[i].Name.empty() )
|
|
continue;
|
|
|
|
extDeviceNameList.push_back(systemextdeviceconfig.extdeviceconfig[i].Name);
|
|
extDeviceNumber++;
|
|
}
|
|
|
|
boost::interprocess::shared_memory_object fExtStatShmobj;
|
|
static const int EXTDEVICESTATshmsize = MAX_EXT_DEVICE * sizeof(shmDeviceStatus);
|
|
keyName = BRM::ShmKeys::keyToName(fShmKeys.SWITCHSTATUS_SYSVKEY);
|
|
memInit = getshm(keyName, EXTDEVICESTATshmsize, fExtStatShmobj);
|
|
|
|
bi::mapped_region fExtStatMapreg(fExtStatShmobj, bi::read_write);
|
|
fShmExtDeviceStatus = static_cast<shmDeviceStatus*>(fExtStatMapreg.get_address());
|
|
|
|
if (fShmExtDeviceStatus == 0)
|
|
{
|
|
log.writeLog(__LINE__, "*****ExtDeviceStatusTable shmat failed.", LOG_TYPE_CRITICAL);
|
|
exit(1);
|
|
}
|
|
|
|
//Initialize Shared memory
|
|
if (memInit)
|
|
{
|
|
// Init Ext Device Status Memory
|
|
memset(fShmExtDeviceStatus, 0, EXTDEVICESTATshmsize);
|
|
|
|
for ( int i = 0; i < extDeviceNumber ; ++i)
|
|
{
|
|
fShmExtDeviceStatus[i].OpState = oam::INITIAL;
|
|
memcpy(fShmExtDeviceStatus[i].Name, extDeviceNameList[i].c_str(), NAMESIZE);
|
|
}
|
|
|
|
log.writeLog(__LINE__, "Ext Device Status shared Memory allocated and Initialized", LOG_TYPE_DEBUG);
|
|
}
|
|
|
|
//
|
|
//Allocate Shared Memory for storing DBRoot Status Data
|
|
//
|
|
|
|
string DBRootStorageType;
|
|
|
|
try
|
|
{
|
|
oam.getSystemConfig("DBRootStorageType", DBRootStorageType);
|
|
}
|
|
catch (...) {}
|
|
|
|
std::vector<string>dbrootList;
|
|
|
|
if ( DBRootStorageType == "external" ||
|
|
DataRedundancyConfig == "y")
|
|
{
|
|
//get system dbroots
|
|
DBRootConfigList dbrootConfigList;
|
|
|
|
try
|
|
{
|
|
oam.getSystemDbrootConfig(dbrootConfigList);
|
|
}
|
|
catch (exception& e)
|
|
{
|
|
log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemDbrootConfig: Caught unknown exception!", LOG_TYPE_ERROR);
|
|
}
|
|
|
|
DBRootConfigList::iterator pt = dbrootConfigList.begin();
|
|
|
|
for ( ; pt != dbrootConfigList.end() ; pt++)
|
|
{
|
|
dbrootList.push_back(oam.itoa(*pt));
|
|
dbrootNumber++;
|
|
}
|
|
}
|
|
|
|
boost::interprocess::shared_memory_object fDbrootShmobj;
|
|
static const int DBROOTSTATshmsize = MAX_DBROOT * sizeof(shmDeviceStatus);
|
|
keyName = BRM::ShmKeys::keyToName(fShmKeys.DBROOTSTATUS_SYSVKEY);
|
|
memInit = getshm(keyName, DBROOTSTATshmsize, fDbrootShmobj);
|
|
|
|
bi::mapped_region fdDbrootStatMapreg(fDbrootShmobj, bi::read_write);
|
|
fShmDbrootStatus = static_cast<shmDeviceStatus*>(fdDbrootStatMapreg.get_address());
|
|
|
|
if (fShmDbrootStatus == 0)
|
|
{
|
|
log.writeLog(__LINE__, "*****DbrootStatusTable shmat failed.", LOG_TYPE_CRITICAL);
|
|
exit(1);
|
|
}
|
|
|
|
//Initialize Shared memory
|
|
if (memInit)
|
|
{
|
|
// Init DBRoot Status Memory
|
|
memset(fShmDbrootStatus, 0, DBROOTSTATshmsize);
|
|
|
|
for ( int i = 0; i < dbrootNumber ; ++i)
|
|
{
|
|
fShmDbrootStatus[i].OpState = oam::INITIAL;
|
|
memcpy(fShmDbrootStatus[i].Name, dbrootList[i].c_str(), NAMESIZE);
|
|
}
|
|
|
|
log.writeLog(__LINE__, "Dbroot Status shared Memory allocated and Initialized", LOG_TYPE_DEBUG);
|
|
}
|
|
|
|
//Set mainResumeFlag, to start up main thread
|
|
|
|
mainResumeFlag = true;
|
|
|
|
string portName = "ProcStatusControl";
|
|
|
|
if (runStandby)
|
|
{
|
|
portName = "ProcStatusControlStandby";
|
|
processStatusList* aPtr = statusListPtr();
|
|
updateShareMemory(aPtr);
|
|
}
|
|
|
|
//
|
|
//Now wait for Process Status Get and Set request
|
|
//
|
|
|
|
//read and cleanup port before trying to use
|
|
try
|
|
{
|
|
Config* sysConfig = Config::makeConfig();
|
|
string port = sysConfig->getConfig(portName, "Port");
|
|
string cmd = "fuser -k " + port + "/tcp >/dev/null 2>&1";
|
|
|
|
system(cmd.c_str());
|
|
}
|
|
catch (...)
|
|
{
|
|
}
|
|
|
|
log.writeLog(__LINE__, "statusControlThread Thread reading " + portName + " port", LOG_TYPE_DEBUG);
|
|
|
|
IOSocket* fIos;
|
|
MessageQueueServer* mqs;
|
|
int standbyUpdateCount = 0;
|
|
|
|
mqs = new MessageQueueServer(portName);
|
|
struct timespec ts = { 1, 0 };
|
|
|
|
for (;;)
|
|
{
|
|
if (!runStandby && portName == "ProcStatusControlStandby")
|
|
{
|
|
portName = "ProcStatusControl";
|
|
delete mqs;
|
|
mqs = new MessageQueueServer(portName);
|
|
|
|
log.writeLog(__LINE__, "statusControlThread Thread reading " + portName + " port", LOG_TYPE_DEBUG);
|
|
|
|
processStatusList* aPtr = statusListPtr();
|
|
updateShareMemory(aPtr);
|
|
}
|
|
|
|
if (runStandby && portName == "ProcStatusControl")
|
|
{
|
|
portName = "ProcStatusControlStandby";
|
|
delete mqs;
|
|
mqs = new MessageQueueServer(portName);
|
|
|
|
log.writeLog(__LINE__, "statusControlThread Thread reading " + portName + " port", LOG_TYPE_DEBUG);
|
|
}
|
|
|
|
fIos = NULL;
|
|
try
|
|
{
|
|
//log.writeLog(__LINE__, "***before accept", LOG_TYPE_DEBUG);
|
|
fIos = new IOSocket();
|
|
*fIos = mqs->accept(&ts);
|
|
|
|
if ( fIos->isOpen() )
|
|
{
|
|
//log.writeLog(__LINE__, "***before create thread", LOG_TYPE_DEBUG);
|
|
pthread_t messagethread;
|
|
int status = pthread_create (&messagethread, NULL, (void*(*)(void*))&processStatusMSG, fIos);
|
|
|
|
//log.writeLog(__LINE__, "***after create thread", LOG_TYPE_DEBUG);
|
|
|
|
if ( status != 0 )
|
|
{
|
|
log.writeLog(__LINE__, "messagethread: pthread_create failed, return status = " + oam.itoa(status), LOG_TYPE_ERROR);
|
|
delete fIos;
|
|
}
|
|
}
|
|
else
|
|
delete fIos;
|
|
|
|
}
|
|
catch (...)
|
|
{
|
|
if (fIos)
|
|
delete fIos;
|
|
}
|
|
|
|
if ( runStandby )
|
|
{
|
|
standbyUpdateCount++;
|
|
|
|
if ( standbyUpdateCount >= 3 )
|
|
{
|
|
//processStatusList* aPtr = statusListPtr();
|
|
updateShareMemory(aPtr);
|
|
standbyUpdateCount = 0;
|
|
}
|
|
}
|
|
} // end of for loop
|
|
return NULL;
|
|
}
|
|
|
|
/******************************************************************************************
|
|
* @brief processStatusMSG
|
|
*
|
|
* purpose: Process the status message
|
|
*
|
|
******************************************************************************************/
|
|
void* processStatusMSG(messageqcpp::IOSocket* cfIos)
|
|
{
|
|
messageqcpp::IOSocket* fIos = cfIos;
|
|
|
|
pthread_t ThreadId;
|
|
ThreadId = pthread_self();
|
|
|
|
MonitorLog log;
|
|
MonitorConfig config;
|
|
ProcessMonitor aMonitor(config, log);
|
|
Oam oam;
|
|
|
|
ByteStream* msg;
|
|
msg = new ByteStream();
|
|
|
|
//log.writeLog(__LINE__, "***start create thread", LOG_TYPE_DEBUG);
|
|
struct timespec ts = { 20, 0 };
|
|
|
|
try
|
|
{
|
|
*msg = fIos->read(&ts);
|
|
}
|
|
catch (exception& ex)
|
|
{
|
|
string error = ex.what();
|
|
// log.writeLog(__LINE__, "***read error, close create thread: " + error, LOG_TYPE_DEBUG);
|
|
fIos->close();
|
|
delete fIos;
|
|
delete msg;
|
|
pthread_detach (ThreadId);
|
|
pthread_exit(0);
|
|
}
|
|
catch (...)
|
|
{
|
|
// log.writeLog(__LINE__, "***read error, close create thread", LOG_TYPE_DEBUG);
|
|
fIos->close();
|
|
delete fIos;
|
|
delete msg;
|
|
pthread_detach (ThreadId);
|
|
pthread_exit(0);
|
|
}
|
|
|
|
if (msg->length() <= 0)
|
|
{
|
|
// log.writeLog(__LINE__, "***0 bytes, close create thread", LOG_TYPE_DEBUG);
|
|
fIos->close();
|
|
delete fIos;
|
|
delete msg;
|
|
pthread_detach (ThreadId);
|
|
pthread_exit(0);
|
|
}
|
|
|
|
ByteStream::byte requestType;
|
|
*msg >> requestType;
|
|
//log.writeLog(__LINE__, "statusControl: Msg received, requestType = " + oam.itoa(requestType), LOG_TYPE_DEBUG);
|
|
|
|
switch (requestType)
|
|
{
|
|
case GET_PROC_STATUS:
|
|
{
|
|
std::string moduleName;
|
|
std::string processName;
|
|
|
|
ByteStream::byte state;
|
|
ByteStream::quadbyte PID;
|
|
std::string changeDate;
|
|
ByteStream ackmsg;
|
|
|
|
*msg >> moduleName;
|
|
*msg >> processName;
|
|
|
|
processStatusList::iterator listPtr;
|
|
//processStatusList* aPtr = statusListPtr();
|
|
listPtr = aPtr->begin();
|
|
|
|
int shmIndex = 0;
|
|
|
|
for (; listPtr != aPtr->end(); ++listPtr)
|
|
{
|
|
if ((*listPtr).ProcessName == processName &&
|
|
(*listPtr).ModuleName == moduleName)
|
|
{
|
|
shmIndex = (*listPtr).tableIndex;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (listPtr == aPtr->end())
|
|
{
|
|
// not in list
|
|
// log.writeLog(__LINE__, "statusControl: GET_PROC_STATUS: Process not valid: " + processName + " / " + moduleName, LOG_TYPE_DEBUG);
|
|
ackmsg << (ByteStream::byte) API_FAILURE;
|
|
fIos->write(ackmsg);
|
|
break;
|
|
}
|
|
|
|
//get table info
|
|
state = fShmProcessStatus[shmIndex].ProcessOpState;
|
|
PID = fShmProcessStatus[shmIndex].ProcessID;
|
|
changeDate = fShmProcessStatus[shmIndex].StateChangeDate;
|
|
|
|
ackmsg << (ByteStream::byte) API_SUCCESS;
|
|
ackmsg << state;
|
|
ackmsg << PID;
|
|
ackmsg << changeDate;
|
|
fIos->write(ackmsg);
|
|
}
|
|
break;
|
|
|
|
case SET_PROC_STATUS:
|
|
{
|
|
std::string moduleName;
|
|
std::string processName;
|
|
ByteStream::byte state;
|
|
ByteStream::quadbyte PID;
|
|
std::string shmName;
|
|
char charName[NAMESIZE];
|
|
|
|
*msg >> moduleName;
|
|
*msg >> processName;
|
|
*msg >> state;
|
|
*msg >> PID;
|
|
|
|
if (!runStandby)
|
|
{
|
|
ByteStream ackmsg;
|
|
ackmsg << (ByteStream::byte) requestType;
|
|
fIos->write(ackmsg);
|
|
}
|
|
|
|
log.writeLog(__LINE__, "statusControl: REQUEST RECEIVED: Set Process " + moduleName + "/" + processName + " State = " + oamState[state], LOG_TYPE_DEBUG);
|
|
|
|
processStatusList::iterator listPtr;
|
|
//processStatusList* aPtr = statusListPtr();
|
|
listPtr = aPtr->begin();
|
|
|
|
int shmIndex = 0;
|
|
|
|
for (; listPtr != aPtr->end(); ++listPtr)
|
|
{
|
|
if ((*listPtr).ProcessName == processName &&
|
|
(*listPtr).ModuleName == moduleName)
|
|
{
|
|
shmIndex = (*listPtr).tableIndex;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (listPtr == aPtr->end())
|
|
{
|
|
// not in list
|
|
log.writeLog(__LINE__, "statusControl: SET_PROC_STATUS: Process not valid: " + moduleName + "/" + processName, LOG_TYPE_DEBUG);
|
|
break;
|
|
}
|
|
|
|
//check and process for Active/Standby process run-type
|
|
if ( state == oam::ACTIVE )
|
|
{
|
|
|
|
std::string moduleType = moduleName.substr(0, 2);
|
|
|
|
for ( unsigned int i = 0 ; i < systemprocessconfig.processconfig.size(); i++)
|
|
{
|
|
if ( systemprocessconfig.processconfig[i].ModuleType == moduleType &&
|
|
systemprocessconfig.processconfig[i].ProcessName == processName )
|
|
{
|
|
if ( systemprocessconfig.processconfig[i].RunType == oam::ACTIVE_STANDBY )
|
|
{
|
|
// process is ACTIVE_STANDBY run-state, get Module run-type and state
|
|
try
|
|
{
|
|
oam.getSystemConfig(moduleType, moduletypeconfig);
|
|
|
|
if ( moduletypeconfig.RunType == oam::ACTIVE_STANDBY )
|
|
{
|
|
for ( int i = 1; i < fmoduleNumber; ++i)
|
|
{
|
|
memcpy(charName, fShmSystemStatus[i].Name, NAMESIZE);
|
|
shmName = charName;
|
|
|
|
if ( moduleName == shmName )
|
|
{
|
|
if ( fShmSystemStatus[i].OpState == oam::STANDBY )
|
|
{
|
|
//set current state to STANDBY
|
|
state = oam::STANDBY;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
catch (exception& ex)
|
|
{
|
|
string error = ex.what();
|
|
// log.writeLog(__LINE__, "statusControl: EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR);
|
|
break;
|
|
}
|
|
catch (...)
|
|
{
|
|
// log.writeLog(__LINE__, "statusControl: EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR);
|
|
break;
|
|
}
|
|
}
|
|
else
|
|
// not oam::ACTIVE/STANDBY
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
// invalid state change ACTIVE TO MAN_INIT / AUTO_INIT
|
|
if ( fShmProcessStatus[shmIndex].ProcessOpState == oam::ACTIVE )
|
|
{
|
|
if ( state == oam::MAN_INIT || state == oam::AUTO_INIT )
|
|
{
|
|
log.writeLog(__LINE__, "statusControl: " + moduleName + "/" + processName + " Current State = ACTIVE, invalid update request to " + oamState[state], LOG_TYPE_DEBUG);
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!utils::is_nonnegative(PID))
|
|
PID = 0;
|
|
|
|
log.writeLog(__LINE__, "statusControl: Set Process " + moduleName + "/" + processName + + " State = " + oamState[state] + " PID = " + oam.itoa(PID), LOG_TYPE_DEBUG);
|
|
|
|
//update table
|
|
if ( state < PID_UPDATE )
|
|
fShmProcessStatus[shmIndex].ProcessOpState = state;
|
|
|
|
if ( PID != 1 )
|
|
fShmProcessStatus[shmIndex].ProcessID = PID;
|
|
|
|
memcpy(fShmProcessStatus[shmIndex].StateChangeDate, oam.getCurrentTime().c_str(), DATESIZE);
|
|
|
|
//if DMLProc set to BUSY_INIT, set system state to BUSY_INIT
|
|
if ( processName == "DMLProc" && state == oam::BUSY_INIT )
|
|
{
|
|
fShmSystemStatus[0].OpState = state;
|
|
memcpy(fShmSystemStatus[0].StateChangeDate, oam.getCurrentTime().c_str(), DATESIZE);
|
|
log.writeLog(__LINE__, "statusControl: REQUEST RECEIVED: Set System State = " + oamState[state], LOG_TYPE_DEBUG);
|
|
}
|
|
|
|
//if DMLProc set to ACTIVE, set system state to ACTIVE if in an INIT state
|
|
if ( processName == "DMLProc" && state == oam::ACTIVE )
|
|
{
|
|
if ( fShmSystemStatus[0].OpState == oam::BUSY_INIT ||
|
|
fShmSystemStatus[0].OpState == oam::MAN_INIT ||
|
|
fShmSystemStatus[0].OpState == oam::AUTO_INIT )
|
|
{
|
|
fShmSystemStatus[0].OpState = state;
|
|
memcpy(fShmSystemStatus[0].StateChangeDate, oam.getCurrentTime().c_str(), DATESIZE);
|
|
log.writeLog(__LINE__, "statusControl: REQUEST RECEIVED: Set System State = " + oamState[state], LOG_TYPE_DEBUG);
|
|
}
|
|
|
|
BRM::DBRM dbrm;
|
|
dbrm.setSystemQueryReady(true);
|
|
}
|
|
|
|
}
|
|
break;
|
|
|
|
case GET_ALL_PROC_STATUS:
|
|
{
|
|
ByteStream ackmsg;
|
|
ByteStream::byte state;
|
|
ByteStream::quadbyte PID;
|
|
std::string changeDate;
|
|
std::string processName;
|
|
std::string moduleName;
|
|
|
|
processStatusList::iterator listPtr;
|
|
////processStatusList* aPtr = statusListPtr();
|
|
|
|
ackmsg << (ByteStream::quadbyte) aPtr->size();
|
|
|
|
for ( unsigned int i = 0 ; i < systemModuleTypeConfig.moduletypeconfig.size(); i++)
|
|
{
|
|
int moduleCount = systemModuleTypeConfig.moduletypeconfig[i].ModuleCount;
|
|
|
|
if ( moduleCount == 0 )
|
|
// skip of no modules configured
|
|
continue;
|
|
|
|
string moduleType = systemModuleTypeConfig.moduletypeconfig[i].ModuleType;
|
|
|
|
listPtr = aPtr->begin();
|
|
|
|
for (; listPtr != aPtr->end(); ++listPtr)
|
|
{
|
|
moduleName = (*listPtr).ModuleName;
|
|
|
|
if ( moduleName.find(moduleType) != string::npos )
|
|
{
|
|
processName = (*listPtr).ProcessName;
|
|
int shmIndex = (*listPtr).tableIndex;
|
|
state = fShmProcessStatus[shmIndex].ProcessOpState;
|
|
PID = fShmProcessStatus[shmIndex].ProcessID;
|
|
changeDate = fShmProcessStatus[shmIndex].StateChangeDate;
|
|
|
|
ackmsg << processName;
|
|
ackmsg << moduleName;
|
|
ackmsg << state;
|
|
ackmsg << PID;
|
|
ackmsg << changeDate;
|
|
}
|
|
}
|
|
}
|
|
|
|
fIos->write(ackmsg);
|
|
}
|
|
break;
|
|
|
|
case GET_PROC_STATUS_BY_PID:
|
|
{
|
|
std::string moduleName;
|
|
std::string processName;
|
|
|
|
ByteStream ackmsg;
|
|
ByteStream::byte state;
|
|
ByteStream::quadbyte PID;
|
|
|
|
*msg >> moduleName;
|
|
*msg >> PID;
|
|
|
|
processStatusList::iterator listPtr;
|
|
//processStatusList* aPtr = statusListPtr();
|
|
listPtr = aPtr->begin();
|
|
|
|
int shmIndex = 0;
|
|
|
|
for (; listPtr != aPtr->end(); ++listPtr)
|
|
{
|
|
if ((*listPtr).ModuleName == moduleName)
|
|
{
|
|
shmIndex = (*listPtr).tableIndex;
|
|
|
|
//get PID
|
|
if ( PID == (ByteStream::quadbyte) fShmProcessStatus[shmIndex].ProcessID)
|
|
{
|
|
// match found, get state
|
|
state = fShmProcessStatus[shmIndex].ProcessOpState;
|
|
//get process name
|
|
processName = (*listPtr).ProcessName;
|
|
|
|
ackmsg << (ByteStream::byte) API_SUCCESS;
|
|
ackmsg << state;
|
|
ackmsg << processName;
|
|
fIos->write(ackmsg);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (listPtr == aPtr->end())
|
|
{
|
|
// not in list
|
|
ackmsg << (ByteStream::byte) API_FAILURE;
|
|
fIos->write(ackmsg);
|
|
// log.writeLog(__LINE__, "statusControl: GET_PROC_STATUS_BY_PID: PID not valid: " + oam.itoa(PID) + " / " + moduleName);
|
|
break;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case GET_SYSTEM_STATUS:
|
|
{
|
|
ByteStream ackmsg;
|
|
ByteStream::byte state;
|
|
std::string name;
|
|
std::string changeDate;
|
|
ByteStream::byte systemStatusOnly;
|
|
|
|
*msg >> systemStatusOnly;
|
|
|
|
if ( systemStatusOnly == 1 )
|
|
{
|
|
for (int j = 0 ; j < fmoduleNumber; ++j)
|
|
{
|
|
name = fShmSystemStatus[j].Name;
|
|
|
|
if ( name.find("system") != string::npos )
|
|
{
|
|
state = fShmSystemStatus[j].OpState;
|
|
changeDate = fShmSystemStatus[j].StateChangeDate;
|
|
|
|
ackmsg << name;
|
|
ackmsg << state;
|
|
ackmsg << changeDate;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
ackmsg << (ByteStream::byte) fmoduleNumber;
|
|
|
|
for (int j = 0 ; j < fmoduleNumber; ++j)
|
|
{
|
|
name = fShmSystemStatus[j].Name;
|
|
|
|
if ( name.find("system") != string::npos )
|
|
{
|
|
state = fShmSystemStatus[j].OpState;
|
|
changeDate = fShmSystemStatus[j].StateChangeDate;
|
|
|
|
ackmsg << name;
|
|
ackmsg << state;
|
|
ackmsg << changeDate;
|
|
}
|
|
}
|
|
|
|
for ( unsigned int i = 0 ; i < systemModuleTypeConfig.moduletypeconfig.size(); i++)
|
|
{
|
|
int moduleCount = systemModuleTypeConfig.moduletypeconfig[i].ModuleCount;
|
|
|
|
if ( moduleCount == 0 )
|
|
// skip of no modules configured
|
|
continue;
|
|
|
|
string moduleType = systemModuleTypeConfig.moduletypeconfig[i].ModuleType;
|
|
|
|
for (int j = 0 ; j < fmoduleNumber; ++j)
|
|
{
|
|
name = fShmSystemStatus[j].Name;
|
|
|
|
if ( name.find(moduleType) != string::npos )
|
|
{
|
|
state = fShmSystemStatus[j].OpState;
|
|
changeDate = fShmSystemStatus[j].StateChangeDate;
|
|
|
|
ackmsg << name;
|
|
ackmsg << state;
|
|
ackmsg << changeDate;
|
|
}
|
|
}
|
|
}
|
|
|
|
ackmsg << (ByteStream::byte) extDeviceNumber;
|
|
|
|
for (int i = 0 ; i < extDeviceNumber; ++i)
|
|
{
|
|
name = fShmExtDeviceStatus[i].Name;
|
|
state = fShmExtDeviceStatus[i].OpState;
|
|
changeDate = fShmExtDeviceStatus[i].StateChangeDate;
|
|
|
|
ackmsg << name;
|
|
ackmsg << state;
|
|
ackmsg << changeDate;
|
|
}
|
|
|
|
ackmsg << (ByteStream::byte) NICNumber;
|
|
|
|
for (int i = 0 ; i < NICNumber; ++i)
|
|
{
|
|
name = fShmNICStatus[i].Name;
|
|
state = fShmNICStatus[i].OpState;
|
|
changeDate = fShmNICStatus[i].StateChangeDate;
|
|
|
|
ackmsg << name;
|
|
ackmsg << state;
|
|
ackmsg << changeDate;
|
|
}
|
|
|
|
ackmsg << (ByteStream::byte) dbrootNumber;
|
|
|
|
for (int i = 0 ; i < dbrootNumber; ++i)
|
|
{
|
|
name = fShmDbrootStatus[i].Name;
|
|
state = fShmDbrootStatus[i].OpState;
|
|
changeDate = fShmDbrootStatus[i].StateChangeDate;
|
|
|
|
ackmsg << name;
|
|
ackmsg << state;
|
|
ackmsg << changeDate;
|
|
}
|
|
}
|
|
|
|
fIos->write(ackmsg);
|
|
}
|
|
break;
|
|
|
|
case SET_SYSTEM_STATUS:
|
|
{
|
|
ByteStream::byte state;
|
|
*msg >> state;
|
|
fShmSystemStatus[0].OpState = state;
|
|
memcpy(fShmSystemStatus[0].StateChangeDate, oam.getCurrentTime().c_str(), DATESIZE);
|
|
log.writeLog(__LINE__, "statusControl: REQUEST RECEIVED: Set System State = " + oamState[state], LOG_TYPE_DEBUG);
|
|
|
|
if (!runStandby)
|
|
{
|
|
ByteStream ackmsg;
|
|
ackmsg << (ByteStream::byte) requestType;
|
|
fIos->write(ackmsg);
|
|
}
|
|
}
|
|
break;
|
|
|
|
case SET_MODULE_STATUS:
|
|
{
|
|
ByteStream::byte state;
|
|
std::string moduleName;
|
|
std::string shmName;
|
|
char charName[NAMESIZE];
|
|
|
|
*msg >> moduleName;
|
|
*msg >> state;
|
|
|
|
if (!runStandby)
|
|
{
|
|
ByteStream ackmsg;
|
|
ackmsg << (ByteStream::byte) requestType;
|
|
fIos->write(ackmsg);
|
|
}
|
|
|
|
log.writeLog(__LINE__, "statusControl: REQUEST RECEIVED: Set Module " + moduleName + " State = " + oamState[state], LOG_TYPE_DEBUG);
|
|
|
|
//Handle Module RunType of ACTIVE_STANDBY
|
|
string moduletype = moduleName.substr(0, MAX_MODULE_TYPE_SIZE);
|
|
string moduleID = moduleName.substr(MAX_MODULE_TYPE_SIZE, MAX_MODULE_ID_SIZE);
|
|
|
|
try
|
|
{
|
|
oam.getSystemConfig(moduletype, moduletypeconfig);
|
|
}
|
|
catch (exception& ex)
|
|
{
|
|
string error = ex.what();
|
|
log.writeLog(__LINE__, "statusControl: EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR);
|
|
break;
|
|
}
|
|
catch (...)
|
|
{
|
|
// log.writeLog(__LINE__, "statusControl: EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR);
|
|
break;
|
|
}
|
|
|
|
if ( moduletypeconfig.RunType == oam::ACTIVE_STANDBY )
|
|
{
|
|
if ( state == oam::ACTIVE )
|
|
{
|
|
for ( int i = 1; i < fmoduleNumber; ++i)
|
|
{
|
|
memcpy(charName, fShmSystemStatus[i].Name, NAMESIZE);
|
|
shmName = charName;
|
|
string othermoduletype = shmName.substr(0, MAX_MODULE_TYPE_SIZE);
|
|
string othermoduleID = shmName.substr(MAX_MODULE_TYPE_SIZE, MAX_MODULE_ID_SIZE);
|
|
|
|
if ( moduletype == othermoduletype &&
|
|
moduleID != othermoduleID )
|
|
{
|
|
if ( fShmSystemStatus[i].OpState == oam::ACTIVE )
|
|
{
|
|
//found one, set current state to STANDBY
|
|
state = oam::STANDBY;
|
|
|
|
//set ACTIVE_STANDBY process to STANDBY state
|
|
try
|
|
{
|
|
oam.getProcessConfig(systemprocessconfig);
|
|
|
|
for ( unsigned int i = 0 ; i < systemprocessconfig.processconfig.size(); i++)
|
|
{
|
|
if ( systemprocessconfig.processconfig[i].ModuleType == moduletype &&
|
|
systemprocessconfig.processconfig[i].RunType == oam::ACTIVE_STANDBY )
|
|
{
|
|
|
|
processStatusList::iterator listPtr;
|
|
//processStatusList* aPtr = statusListPtr();
|
|
listPtr = aPtr->begin();
|
|
|
|
for (; listPtr != aPtr->end(); ++listPtr)
|
|
{
|
|
if ( systemprocessconfig.processconfig[i].ProcessName == (*listPtr).ProcessName &&
|
|
moduleName == (*listPtr).ModuleName )
|
|
{
|
|
int shmIndex = (*listPtr).tableIndex;
|
|
fShmProcessStatus[shmIndex].ProcessOpState = oam::STANDBY;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
catch (exception& ex)
|
|
{
|
|
string error = ex.what();
|
|
// log.writeLog(__LINE__, "statusControl: EXCEPTION ERROR on getProcessConfig: " + error, LOG_TYPE_ERROR);
|
|
}
|
|
catch (...)
|
|
{
|
|
// log.writeLog(__LINE__, "statusControl: EXCEPTION ERROR on getProcessConfig: Caught unknown exception!", LOG_TYPE_ERROR);
|
|
}
|
|
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
//check to see if a STANDBY Mate needs to go ACTIVE
|
|
if ( state == oam::DOWN || state == oam::MAN_OFFLINE
|
|
|| state == oam::FAILED)
|
|
{
|
|
for ( int i = 1; i < fmoduleNumber; ++i)
|
|
{
|
|
memcpy(charName, fShmSystemStatus[i].Name, NAMESIZE);
|
|
shmName = charName;
|
|
string othermoduletype = shmName.substr(0, MAX_MODULE_TYPE_SIZE);
|
|
string othermoduleID = shmName.substr(MAX_MODULE_TYPE_SIZE, MAX_MODULE_ID_SIZE);
|
|
|
|
if ( moduletype == othermoduletype &&
|
|
moduleID != othermoduleID )
|
|
{
|
|
if ( fShmSystemStatus[i].OpState == oam::STANDBY )
|
|
{
|
|
//found one, set it to ACTIVE
|
|
fShmSystemStatus[i].OpState = oam::ACTIVE;
|
|
memcpy(fShmSystemStatus[i].StateChangeDate, oam.getCurrentTime().c_str(), DATESIZE);
|
|
|
|
//set ACTIVE_STANDBY process to ACTIVE state
|
|
try
|
|
{
|
|
oam.getProcessConfig(systemprocessconfig);
|
|
|
|
for ( unsigned int i = 0 ; i < systemprocessconfig.processconfig.size(); i++)
|
|
{
|
|
if ( systemprocessconfig.processconfig[i].ModuleType == moduletype &&
|
|
systemprocessconfig.processconfig[i].RunType == oam::ACTIVE_STANDBY )
|
|
{
|
|
|
|
processStatusList::iterator listPtr;
|
|
//processStatusList* aPtr = statusListPtr();
|
|
listPtr = aPtr->begin();
|
|
|
|
for (; listPtr != aPtr->end(); ++listPtr)
|
|
{
|
|
if ( systemprocessconfig.processconfig[i].ProcessName == (*listPtr).ProcessName &&
|
|
shmName == (*listPtr).ModuleName )
|
|
{
|
|
int shmIndex = (*listPtr).tableIndex;
|
|
fShmProcessStatus[shmIndex].ProcessOpState = oam::ACTIVE;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
catch (exception& ex)
|
|
{
|
|
string error = ex.what();
|
|
// log.writeLog(__LINE__, "statusControl: EXCEPTION ERROR on getProcessConfig: " + error, LOG_TYPE_ERROR);
|
|
}
|
|
catch (...)
|
|
{
|
|
// log.writeLog(__LINE__, "statusControl: EXCEPTION ERROR on getProcessConfig: Caught unknown exception!", LOG_TYPE_ERROR);
|
|
}
|
|
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
//set current Module state
|
|
int i = 1;
|
|
|
|
for ( ; i < fmoduleNumber; ++i)
|
|
{
|
|
memcpy(charName, fShmSystemStatus[i].Name, NAMESIZE);
|
|
shmName = charName;
|
|
|
|
if ( moduleName == shmName )
|
|
{
|
|
fShmSystemStatus[i].OpState = state;
|
|
memcpy(fShmSystemStatus[i].StateChangeDate, oam.getCurrentTime().c_str(), DATESIZE);
|
|
break;
|
|
}
|
|
}
|
|
|
|
if ( i == fmoduleNumber)
|
|
{
|
|
// not in list
|
|
log.writeLog(__LINE__, "statusControl: SET_MODULE_STATUS: Module not valid: " + moduleName, LOG_TYPE_ERROR);
|
|
break;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case SET_EXT_DEVICE_STATUS:
|
|
{
|
|
ByteStream::byte state;
|
|
std::string name;
|
|
std::string shmName;
|
|
char charName[NAMESIZE];
|
|
|
|
*msg >> name;
|
|
*msg >> state;
|
|
|
|
if (!runStandby)
|
|
{
|
|
ByteStream ackmsg;
|
|
ackmsg << (ByteStream::byte) requestType;
|
|
fIos->write(ackmsg);
|
|
}
|
|
|
|
log.writeLog(__LINE__, "statusControl: REQUEST RECEIVED: Set Ext Device " + name + " State = " + oamState[state], LOG_TYPE_DEBUG);
|
|
|
|
int i = 0;
|
|
|
|
for ( ; i < extDeviceNumber; ++i)
|
|
{
|
|
memcpy(charName, fShmExtDeviceStatus[i].Name, NAMESIZE);
|
|
shmName = charName;
|
|
|
|
if ( name == shmName )
|
|
{
|
|
fShmExtDeviceStatus[i].OpState = state;
|
|
memcpy(fShmExtDeviceStatus[i].StateChangeDate, oam.getCurrentTime().c_str(), DATESIZE);
|
|
break;
|
|
}
|
|
}
|
|
|
|
if ( i == extDeviceNumber)
|
|
{
|
|
// not in list
|
|
log.writeLog(__LINE__, "statusControl: SET_SWITCH_STATUS: Switch not valid: " + name, LOG_TYPE_ERROR);
|
|
break;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case SET_DBROOT_STATUS:
|
|
{
|
|
ByteStream::byte state;
|
|
std::string name;
|
|
std::string shmName;
|
|
char charName[NAMESIZE];
|
|
|
|
*msg >> name;
|
|
*msg >> state;
|
|
|
|
if (!runStandby)
|
|
{
|
|
ByteStream ackmsg;
|
|
ackmsg << (ByteStream::byte) requestType;
|
|
fIos->write(ackmsg);
|
|
}
|
|
|
|
log.writeLog(__LINE__, "statusControl: REQUEST RECEIVED: Set DBroot " + name + " State = " + oamState[state], LOG_TYPE_DEBUG);
|
|
|
|
if ( dbrootNumber == 0 )
|
|
{
|
|
// no dbroots setup in shared memory, must be internal
|
|
log.writeLog(__LINE__, "statusControl: SET_DBROOT_STATUS: DBroot not valid: " + name, LOG_TYPE_ERROR);
|
|
break;
|
|
}
|
|
|
|
int i = 0;
|
|
|
|
for ( ; i < dbrootNumber; ++i)
|
|
{
|
|
memcpy(charName, fShmDbrootStatus[i].Name, NAMESIZE);
|
|
shmName = charName;
|
|
|
|
if ( name == shmName )
|
|
{
|
|
fShmDbrootStatus[i].OpState = state;
|
|
memcpy(fShmDbrootStatus[i].StateChangeDate, oam.getCurrentTime().c_str(), DATESIZE);
|
|
break;
|
|
}
|
|
}
|
|
|
|
if ( i == dbrootNumber)
|
|
{
|
|
// not in list
|
|
log.writeLog(__LINE__, "statusControl: SET_DBROOT_STATUS: DBroot not valid: " + name, LOG_TYPE_ERROR);
|
|
break;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case SET_NIC_STATUS:
|
|
{
|
|
ByteStream::byte state;
|
|
std::string hostName;
|
|
std::string shmName;
|
|
char charName[NAMESIZE];
|
|
|
|
*msg >> hostName;
|
|
*msg >> state;
|
|
|
|
if (!runStandby)
|
|
{
|
|
ByteStream ackmsg;
|
|
ackmsg << (ByteStream::byte) requestType;
|
|
fIos->write(ackmsg);
|
|
}
|
|
|
|
log.writeLog(__LINE__, "statusControl: REQUEST RECEIVED: Set NIC " + hostName + " State = " + oamState[state], LOG_TYPE_DEBUG);
|
|
|
|
int i = 0;
|
|
|
|
for ( ; i < NICNumber; ++i)
|
|
{
|
|
memcpy(charName, fShmNICStatus[i].Name, NAMESIZE);
|
|
shmName = charName;
|
|
|
|
if ( hostName == shmName )
|
|
{
|
|
fShmNICStatus[i].OpState = state;
|
|
memcpy(fShmNICStatus[i].StateChangeDate, oam.getCurrentTime().c_str(), DATESIZE);
|
|
break;
|
|
}
|
|
}
|
|
|
|
if ( i == NICNumber)
|
|
{
|
|
// not in list
|
|
log.writeLog(__LINE__, "statusControl: SET_NIC_STATUS: NIC not valid: " + hostName, LOG_TYPE_ERROR);
|
|
break;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case ADD_MODULE:
|
|
{
|
|
ByteStream ackmsg;
|
|
ByteStream::byte moduleCount, nicCount;
|
|
oam::DeviceNetworkConfig devicenetworkconfig;
|
|
oam::DeviceNetworkList devicenetworklist;
|
|
string value;
|
|
MonitorConfig currentConfig;
|
|
|
|
*msg >> moduleCount;
|
|
|
|
for (int i = 0; i < moduleCount; i++)
|
|
{
|
|
*msg >> value;
|
|
devicenetworkconfig.DeviceName = value;
|
|
devicenetworklist.push_back(devicenetworkconfig);
|
|
}
|
|
|
|
log.writeLog(__LINE__, "statusControl: REQUEST RECEIVED: Add Module");
|
|
|
|
string moduleType = devicenetworkconfig.DeviceName.substr(0, MAX_MODULE_TYPE_SIZE);
|
|
string OAMParentModuleType = currentConfig.OAMParentName().substr(0, 2);
|
|
|
|
// add to module status shared memory
|
|
DeviceNetworkList::iterator pt = devicenetworklist.begin();
|
|
|
|
for ( ; pt != devicenetworklist.end() ; pt++)
|
|
{
|
|
moduleNameList.push_back((*pt).DeviceName);
|
|
|
|
string moduleName = (*pt).DeviceName;
|
|
memcpy(fShmSystemStatus[fmoduleNumber].Name, moduleName.c_str(), NAMESIZE);
|
|
fShmSystemStatus[fmoduleNumber].OpState = oam::MAN_DISABLED;
|
|
memcpy(fShmSystemStatus[fmoduleNumber].StateChangeDate, oam.getCurrentTime().c_str(), DATESIZE);
|
|
fmoduleNumber++;
|
|
}
|
|
|
|
// add to NIC status shared memory
|
|
*msg >> nicCount;
|
|
|
|
for (int i = 0; i < nicCount; i++)
|
|
{
|
|
*msg >> value;
|
|
memcpy(fShmNICStatus[NICNumber].Name, value.c_str(), NAMESIZE);
|
|
fShmNICStatus[NICNumber].OpState = oam::INITIAL;
|
|
memcpy(fShmNICStatus[NICNumber].StateChangeDate, oam.getCurrentTime().c_str(), DATESIZE);
|
|
NICNumber++;
|
|
}
|
|
|
|
processStatusList::iterator listPtr;
|
|
listPtr = aPtr->begin();
|
|
|
|
// add to process status shared memory
|
|
pt = devicenetworklist.begin();
|
|
|
|
for ( ; pt != devicenetworklist.end() ; pt++)
|
|
{
|
|
for ( unsigned int j = 0; j < systemprocessconfig.processconfig.size(); j++)
|
|
{
|
|
//skip if both BootLaunch and LaunchID are 0
|
|
if ( systemprocessconfig.processconfig[j].BootLaunch == 0 &&
|
|
systemprocessconfig.processconfig[j].LaunchID == 0 )
|
|
continue;
|
|
|
|
// "ChildOAMModule" "ParentOAMModule" dm/um/pm
|
|
string processModuleType = systemprocessconfig.processconfig[j].ModuleType;
|
|
|
|
if (processModuleType == moduleType
|
|
|| ( processModuleType == "um" &&
|
|
moduleType == "pm" && PMwithUM == "y")
|
|
|| processModuleType == "ChildExtOAMModule"
|
|
|| (processModuleType == "ChildOAMModule" )
|
|
|| (processModuleType == "ParentOAMModule" && moduleType == OAMParentModuleType) )
|
|
{
|
|
if ( processModuleType == "um" &&
|
|
moduleType == "pm" && PMwithUM == "y" &&
|
|
systemprocessconfig.processconfig[j].ProcessName == "DMLProc" )
|
|
continue;
|
|
|
|
if ( processModuleType == "um" &&
|
|
moduleType == "pm" && PMwithUM == "y" &&
|
|
systemprocessconfig.processconfig[j].ProcessName == "DDLProc" )
|
|
continue;
|
|
|
|
processstatus procstat;
|
|
procstat.ProcessName = systemprocessconfig.processconfig[j].ProcessName;
|
|
procstat.ModuleName = (*pt).DeviceName;
|
|
procstat.tableIndex = processNumber;
|
|
fstatusListPtr.push_back(procstat);
|
|
|
|
fShmProcessStatus[processNumber].ProcessOpState = oam::MAN_OFFLINE;
|
|
fShmProcessStatus[processNumber].ProcessID = 0;
|
|
memcpy(fShmProcessStatus[processNumber].StateChangeDate, oam.getCurrentTime().c_str(), DATESIZE);
|
|
processNumber++;
|
|
}
|
|
}
|
|
}
|
|
|
|
ackmsg << (ByteStream::byte) API_SUCCESS;
|
|
fIos->write(ackmsg);
|
|
|
|
try
|
|
{
|
|
oam.getSystemConfig(systemModuleTypeConfig);
|
|
}
|
|
catch (exception& ex)
|
|
{
|
|
string error = ex.what();
|
|
log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR);
|
|
}
|
|
catch (...)
|
|
{
|
|
log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR);
|
|
}
|
|
}
|
|
break;
|
|
|
|
case REMOVE_MODULE:
|
|
{
|
|
ByteStream ackmsg;
|
|
ByteStream::byte moduleCount;
|
|
oam::DeviceNetworkConfig devicenetworkconfig;
|
|
oam::DeviceNetworkList devicenetworklist;
|
|
string value;
|
|
std::string shmName;
|
|
char charName[NAMESIZE];
|
|
|
|
*msg >> moduleCount;
|
|
|
|
for (int i = 0; i < moduleCount; i++)
|
|
{
|
|
*msg >> value;
|
|
devicenetworkconfig.DeviceName = value;
|
|
devicenetworklist.push_back(devicenetworkconfig);
|
|
}
|
|
|
|
log.writeLog(__LINE__, "statusControl: REQUEST RECEIVED: Remove Module");
|
|
|
|
// remove from module status shared memory
|
|
DeviceNetworkList::iterator pt = devicenetworklist.begin();
|
|
|
|
for ( ; pt != devicenetworklist.end() ; pt++)
|
|
{
|
|
string moduleName = (*pt).DeviceName;
|
|
|
|
for ( int j = 0 ; j < fmoduleNumber ; j++ )
|
|
{
|
|
memcpy(charName, fShmSystemStatus[j].Name, NAMESIZE);
|
|
shmName = charName;
|
|
|
|
if ( moduleName == shmName )
|
|
{
|
|
for ( int k = j + 1 ; k < fmoduleNumber ; k++)
|
|
{
|
|
string name = fShmSystemStatus[k].Name;
|
|
int state = fShmSystemStatus[k].OpState;
|
|
string changeDate = fShmSystemStatus[k].StateChangeDate;
|
|
|
|
memcpy(fShmSystemStatus[j].Name, name.c_str(), NAMESIZE);
|
|
fShmSystemStatus[j].OpState = state;
|
|
memcpy(fShmSystemStatus[j].StateChangeDate, changeDate.c_str(), DATESIZE);
|
|
}
|
|
|
|
fmoduleNumber--;
|
|
}
|
|
}
|
|
}
|
|
|
|
// remove from process status shared memory
|
|
pt = devicenetworklist.begin();
|
|
|
|
for ( ; pt != devicenetworklist.end() ; pt++)
|
|
{
|
|
string moduleName = (*pt).DeviceName;
|
|
|
|
processStatusList::iterator listPtr;
|
|
//processStatusList* aPtr = statusListPtr();
|
|
listPtr = aPtr->begin();
|
|
|
|
for (; listPtr != aPtr->end(); )
|
|
{
|
|
if ( moduleName == (*listPtr).ModuleName )
|
|
aPtr->erase(listPtr);
|
|
else
|
|
++listPtr;
|
|
}
|
|
}
|
|
|
|
ackmsg << (ByteStream::byte) API_SUCCESS;
|
|
fIos->write(ackmsg);
|
|
|
|
try
|
|
{
|
|
oam.getSystemConfig(systemModuleTypeConfig);
|
|
}
|
|
catch (exception& ex)
|
|
{
|
|
string error = ex.what();
|
|
log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: " + error, LOG_TYPE_ERROR);
|
|
}
|
|
catch (...)
|
|
{
|
|
log.writeLog(__LINE__, "EXCEPTION ERROR on getSystemConfig: Caught unknown exception!", LOG_TYPE_ERROR);
|
|
}
|
|
}
|
|
break;
|
|
|
|
case ADD_EXT_DEVICE:
|
|
{
|
|
ByteStream ackmsg;
|
|
string device;
|
|
|
|
log.writeLog(__LINE__, "statusControl: REQUEST RECEIVED: Add External Device");
|
|
|
|
*msg >> device;
|
|
|
|
fShmExtDeviceStatus[extDeviceNumber].OpState = oam::INITIAL;
|
|
memcpy(fShmExtDeviceStatus[extDeviceNumber].Name, device.c_str(), NAMESIZE);
|
|
extDeviceNumber++;
|
|
|
|
if (!runStandby)
|
|
{
|
|
ackmsg << (ByteStream::byte) ADD_EXT_DEVICE;
|
|
fIos->write(ackmsg);
|
|
}
|
|
}
|
|
break;
|
|
|
|
case REMOVE_EXT_DEVICE:
|
|
{
|
|
ByteStream ackmsg;
|
|
string device;
|
|
std::string shmName;
|
|
char charName[NAMESIZE];
|
|
|
|
log.writeLog(__LINE__, "statusControl: REQUEST RECEIVED: Remove External Device");
|
|
|
|
*msg >> device;
|
|
|
|
for ( int j = 0 ; j < extDeviceNumber ; j++ )
|
|
{
|
|
memcpy(charName, fShmExtDeviceStatus[j].Name, NAMESIZE);
|
|
shmName = charName;
|
|
|
|
if ( device == shmName )
|
|
{
|
|
for ( int k = j + 1 ; k < extDeviceNumber ; k++)
|
|
{
|
|
string name = fShmExtDeviceStatus[k].Name;
|
|
int state = fShmExtDeviceStatus[k].OpState;
|
|
string changeDate = fShmExtDeviceStatus[k].StateChangeDate;
|
|
|
|
memcpy(fShmExtDeviceStatus[j].Name, name.c_str(), NAMESIZE);
|
|
fShmExtDeviceStatus[j].OpState = state;
|
|
memcpy(fShmExtDeviceStatus[j].StateChangeDate, changeDate.c_str(), DATESIZE);
|
|
}
|
|
|
|
extDeviceNumber--;
|
|
}
|
|
}
|
|
|
|
if (!runStandby)
|
|
{
|
|
ackmsg << (ByteStream::byte) REMOVE_EXT_DEVICE;
|
|
fIos->write(ackmsg);
|
|
}
|
|
}
|
|
break;
|
|
|
|
case ADD_DBROOT:
|
|
{
|
|
ByteStream ackmsg;
|
|
string device;
|
|
|
|
log.writeLog(__LINE__, "statusControl: REQUEST RECEIVED: Add DBRoot");
|
|
|
|
*msg >> device;
|
|
|
|
fShmDbrootStatus[dbrootNumber].OpState = oam::INITIAL;
|
|
memcpy(fShmDbrootStatus[dbrootNumber].Name, device.c_str(), NAMESIZE);
|
|
memcpy(fShmDbrootStatus[dbrootNumber].StateChangeDate, oam.getCurrentTime().c_str(), DATESIZE);
|
|
|
|
dbrootNumber++;
|
|
|
|
if (!runStandby)
|
|
{
|
|
ackmsg << (ByteStream::byte) ADD_DBROOT;
|
|
fIos->write(ackmsg);
|
|
}
|
|
}
|
|
break;
|
|
|
|
case REMOVE_DBROOT:
|
|
{
|
|
ByteStream ackmsg;
|
|
string device;
|
|
std::string shmName;
|
|
char charName[NAMESIZE];
|
|
|
|
log.writeLog(__LINE__, "statusControl: REQUEST RECEIVED: Remove DBRoot");
|
|
|
|
*msg >> device;
|
|
|
|
for ( int j = 0 ; j < dbrootNumber ; j++ )
|
|
{
|
|
memcpy(charName, fShmDbrootStatus[j].Name, NAMESIZE);
|
|
shmName = charName;
|
|
|
|
if ( device == shmName )
|
|
{
|
|
for ( int k = j + 1 ; k < dbrootNumber ; k++)
|
|
{
|
|
string name = fShmDbrootStatus[k].Name;
|
|
int state = fShmDbrootStatus[k].OpState;
|
|
string changeDate = fShmDbrootStatus[k].StateChangeDate;
|
|
|
|
memcpy(fShmDbrootStatus[j].Name, name.c_str(), NAMESIZE);
|
|
fShmDbrootStatus[j].OpState = state;
|
|
memcpy(fShmDbrootStatus[j].StateChangeDate, changeDate.c_str(), DATESIZE);
|
|
}
|
|
|
|
dbrootNumber--;
|
|
}
|
|
}
|
|
|
|
if (!runStandby)
|
|
{
|
|
ackmsg << (ByteStream::byte) REMOVE_DBROOT;
|
|
fIos->write(ackmsg);
|
|
}
|
|
}
|
|
break;
|
|
|
|
case GET_SHARED_MEM:
|
|
{
|
|
ByteStream ackmsg;
|
|
ByteStream::byte type;
|
|
|
|
*msg >> type;
|
|
|
|
switch (type)
|
|
{
|
|
case 1:
|
|
{
|
|
log.writeLog(__LINE__, "statusControl: REQUEST RECEIVED: GET_SHARED_MEM for process");
|
|
|
|
ByteStream::byte processNumber;
|
|
|
|
*msg >> processNumber;
|
|
|
|
ackmsg << (ByteStream::byte) GET_SHARED_MEM;
|
|
|
|
for ( int i = 0 ; i < processNumber ; i++ )
|
|
{
|
|
ackmsg << (ByteStream::quadbyte) fShmProcessStatus[i].ProcessID;
|
|
ackmsg << fShmProcessStatus[i].ProcessOpState;
|
|
}
|
|
|
|
fIos->write(ackmsg);
|
|
|
|
break;
|
|
}
|
|
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
|
|
} // end of switch
|
|
|
|
//log.writeLog(__LINE__, "***end, close create thread", LOG_TYPE_DEBUG);
|
|
fIos->close();
|
|
delete fIos;
|
|
delete msg;
|
|
pthread_detach (ThreadId);
|
|
pthread_exit(0);
|
|
return NULL;
|
|
}
|
|
|
|
/******************************************************************************************
|
|
* @brief updateShareMemory
|
|
*
|
|
* purpose: Get and update shared memory from Parent OAM module
|
|
*
|
|
******************************************************************************************/
|
|
void updateShareMemory(processStatusList* aPtr)
|
|
{
|
|
MonitorLog log;
|
|
MonitorConfig config;
|
|
ProcessMonitor aMonitor(config, log);
|
|
Oam oam;
|
|
|
|
// log.writeLog(__LINE__, "Get Process Status shared Memory from Active OAM", LOG_TYPE_DEBUG);
|
|
|
|
SystemProcessStatus systemprocessstatus;
|
|
ProcessStatus processstatus;
|
|
|
|
processStatusList::iterator listPtr;
|
|
listPtr = aPtr->begin();
|
|
|
|
try
|
|
{
|
|
oam.getProcessStatus(systemprocessstatus);
|
|
|
|
for ( unsigned int i = 0 ; i < systemprocessstatus.processstatus.size(); i++)
|
|
{
|
|
int shmIndex = 0;
|
|
|
|
for (; listPtr != aPtr->end(); ++listPtr)
|
|
{
|
|
if ((*listPtr).ProcessName == systemprocessstatus.processstatus[i].ProcessName &&
|
|
(*listPtr).ModuleName == systemprocessstatus.processstatus[i].Module)
|
|
{
|
|
shmIndex = (*listPtr).tableIndex;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (listPtr == aPtr->end())
|
|
continue;
|
|
|
|
//update table
|
|
fShmProcessStatus[shmIndex].ProcessOpState = systemprocessstatus.processstatus[i].ProcessOpState;
|
|
fShmProcessStatus[shmIndex].ProcessID = systemprocessstatus.processstatus[i].ProcessID;
|
|
string stime = systemprocessstatus.processstatus[i].StateChangeDate ;
|
|
memcpy(fShmProcessStatus[shmIndex].StateChangeDate, stime.c_str(), DATESIZE);
|
|
}
|
|
|
|
// log.writeLog(__LINE__, "Process Status shared Memory Initialized from Active OAM Module", LOG_TYPE_DEBUG);
|
|
}
|
|
catch (...)
|
|
{
|
|
return;
|
|
}
|
|
|
|
// log.writeLog(__LINE__, "Get System Status shared Memory from Active OAM", LOG_TYPE_DEBUG);
|
|
|
|
SystemStatus systemstatus;
|
|
|
|
try
|
|
{
|
|
oam.getSystemStatus(systemstatus, false);
|
|
fShmSystemStatus[0].OpState = systemstatus.SystemOpState;
|
|
string stime = systemstatus.systemmodulestatus.modulestatus[0].StateChangeDate ;
|
|
memcpy(fShmSystemStatus[0].StateChangeDate, stime.c_str(), DATESIZE);
|
|
}
|
|
catch (...)
|
|
{
|
|
return;
|
|
}
|
|
|
|
// log.writeLog(__LINE__, "Get Module Status shared Memory from Active OAM", LOG_TYPE_DEBUG);
|
|
|
|
std::string shmName;
|
|
char charName[NAMESIZE];
|
|
|
|
for ( unsigned int i = 0 ; i < systemstatus.systemmodulestatus.modulestatus.size(); i++)
|
|
{
|
|
if ( systemstatus.systemmodulestatus.modulestatus[i].Module.empty() )
|
|
// end of list
|
|
break;
|
|
|
|
int j = 1;
|
|
|
|
for ( ; j < fmoduleNumber; ++j)
|
|
{
|
|
memcpy(charName, fShmSystemStatus[j].Name, NAMESIZE);
|
|
shmName = charName;
|
|
|
|
if ( systemstatus.systemmodulestatus.modulestatus[i].Module == shmName )
|
|
{
|
|
fShmSystemStatus[j].OpState = systemstatus.systemmodulestatus.modulestatus[i].ModuleOpState;
|
|
string stime = systemstatus.systemmodulestatus.modulestatus[i].StateChangeDate ;
|
|
memcpy(fShmSystemStatus[j].StateChangeDate, stime.c_str(), DATESIZE);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// vim:ts=4 sw=4:
|
|
|